Git Product home page Git Product logo

Comments (10)

axboe avatar axboe commented on August 21, 2024

How about this:

diff --git a/fs/io_uring.c b/fs/io_uring.c
index ee14a0fcd59f..44a08536a76e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6508,6 +6508,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p)
 
 	p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
 			IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS;
+	p->last_opcode = IORING_OP_LAST - 1;
 	trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
 	return ret;
 err:
@@ -6528,8 +6529,10 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
 
 	if (copy_from_user(&p, params, sizeof(p)))
 		return -EFAULT;
-	for (i = 0; i < ARRAY_SIZE(p.resv); i++) {
-		if (p.resv[i])
+	if (p.resv1 || p.resv2)
+		return -EINVAL;
+	for (i = 0; i < ARRAY_SIZE(p.resv3); i++) {
+		if (p.resv3[i])
 			return -EINVAL;
 	}
 
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index fea7da182851..4f0b5cd347b6 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -170,7 +170,10 @@ struct io_uring_params {
 	__u32 sq_thread_cpu;
 	__u32 sq_thread_idle;
 	__u32 features;
-	__u32 resv[4];
+	__u8 last_opcode;
+	__u8 resv1;
+	__u16 resv2;
+	__u32 resv3[3];
 	struct io_sqring_offsets sq_off;
 	struct io_cqring_offsets cq_off;
 };

from liburing.

axboe avatar axboe commented on August 21, 2024

Only downside I can see here is that if someone backports a specific command to an older kernel, then you could have a sparse range of opcodes supported. But not sure how better to convey this information.

from liburing.

axboe avatar axboe commented on August 21, 2024

Only other idea I have here is to add a IORING_REGISTER_PROBE type command, which will return a header of supported things, combined with an array of opcodes and whether they are supported or not. It'd return something ala:

#define IO_URING_OP_SUPPORTED (1U << 0)
struct io_uring_op {
    __u8 opcode;
    __u8 flags; /* IO_URING_OP_* flags */
};

struct io_uring_probe {
    __u32 bla;
    __u32 foo;
    __u32 bar;
    __u32 len; /* length of array below */
    struct io_uring_op ops[0];
};

Or something like that. That'd work for any kind of layout, sparse or not. Only downside is that this ends up being used like hardware commands, where you don't know the exact length of the returned data. So you can either issue one with a low data len count, then look at what the max length would be, then issue a new one with the correct length to retrieve all the data. Or just go big initially ("it should fit within this size") and hope that's enough, but you'd probably still need to have the re-probe logic. Or maybe you don't for that case, you probably know what the max opcode you are looking for is anyway and you can just set the length to that. If the available length is longer, you don't care, since you are by definition not going to use those commands.

What do you think? liburing could provide a helper for this of course, so the application wouldn't need to setup an sqe, submit it, etc. The helper would just fill in the io_uring_probe struct for you.

from liburing.

axboe avatar axboe commented on August 21, 2024

Something like this:

diff --git a/fs/io_uring.c b/fs/io_uring.c
index ee14a0fcd59f..c67d7500abae 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6554,6 +6554,33 @@ SYSCALL_DEFINE2(io_uring_setup, u32, entries,
 	return io_uring_setup(entries, params);
 }
 
+static int io_probe(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args)
+{
+	struct io_uring_probe *p;
+	size_t size;
+	int i, ret;
+
+	size = struct_size(p, ops, nr_args);
+	p = kzalloc(size, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	p->last_op = IORING_OP_LAST - 1;
+	if (nr_args) {
+		for (i = 0; i < nr_args; i++) {
+			p->ops[i].op = i;
+			p->ops[i].flags = IO_URING_OP_SUPPORTED;
+		}
+		p->ops_len = i;
+	}
+
+	ret = 0;
+	if (copy_to_user(arg, p, size))
+		ret = -EFAULT;
+	kfree(p);
+	return ret;
+}
+
 static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 			       void __user *arg, unsigned nr_args)
 	__releases(ctx->uring_lock)
@@ -6570,7 +6597,8 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 		return -ENXIO;
 
 	if (opcode != IORING_UNREGISTER_FILES &&
-	    opcode != IORING_REGISTER_FILES_UPDATE) {
+	    opcode != IORING_REGISTER_FILES_UPDATE &&
+	    opcode != IORING_REGISTER_PROBE) {
 		percpu_ref_kill(&ctx->refs);
 
 		/*
@@ -6632,6 +6660,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 			break;
 		ret = io_eventfd_unregister(ctx);
 		break;
+	case IORING_REGISTER_PROBE:
+		ret = -EINVAL;
+		if (!arg)
+			break;
+		ret = io_probe(ctx, arg, nr_args);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -6639,7 +6673,8 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 
 
 	if (opcode != IORING_UNREGISTER_FILES &&
-	    opcode != IORING_REGISTER_FILES_UPDATE) {
+	    opcode != IORING_REGISTER_FILES_UPDATE &&
+	    opcode != IORING_REGISTER_PROBE) {
 		/* bring the ctx back to life */
 		percpu_ref_reinit(&ctx->refs);
 out:
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index fea7da182851..1a48d2c4664b 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -194,6 +194,7 @@ struct io_uring_params {
 #define IORING_UNREGISTER_EVENTFD	5
 #define IORING_REGISTER_FILES_UPDATE	6
 #define IORING_REGISTER_EVENTFD_ASYNC	7
+#define IORING_REGISTER_PROBE		8
 
 struct io_uring_files_update {
 	__u32 offset;
@@ -201,4 +202,19 @@ struct io_uring_files_update {
 	__aligned_u64 /* __s32 * */ fds;
 };
 
+#define IO_URING_OP_SUPPORTED	(1U << 0)
+
+struct io_uring_probe_op {
+	__u8 op;
+	__u8 flags;	/* IO_URING_OP_* flags */
+};
+
+struct io_uring_probe {
+	__u8 last_op;	/* last opcode supported */
+	__u8 ops_len;	/* length of ops[] array below */
+	__u16 resv;
+	__u32 resv2[4];
+	struct io_uring_probe_op ops[0];
+};
+
 #endif

from liburing.

CarterLi avatar CarterLi commented on August 21, 2024

Although it seems to be a little bit complicated, it does provide more flexibility.

So you can either issue one with a low data len count, then look at what the max length would be, then issue a new one with the correct length to retrieve all the data.

Why not let the user deside what opcodes to detect?

That is: let user fill io_uring_probe_op::op with opcodes that the user wants to use. Then the kernel fills io_uring_probe_op::flags accordingly

from liburing.

axboe avatar axboe commented on August 21, 2024

We could let the user fill it out, but feels like that's just extra complexity for very little gain. We have a max of 256 opcodes, so it's not going to be a massive amount of data. May as well just send the whole thing. The app can just use 256 as the array size and it'll always get the full picture.

from liburing.

axboe avatar axboe commented on August 21, 2024

If you want "is this opcode supported" logic, we could just do that in liburing. It's not like efficiency is super important here.

from liburing.

CarterLi avatar CarterLi commented on August 21, 2024

If you want "is this opcode supported" logic, we could just do that in liburing. It's not like efficiency is super important here.

Ok

Another suggestion: io_uring_probe_op::flags doesn't seem to be very useful. I suggest version. 0 means it's not supported. `1 means it's just introduced. And we can increase its value for user-awared fixes / changes.

For example: IORING_OP_POLL won't return -ECANCELED when it's canceled before Linux 5.5. This small change may still break something. We should let user know its behavior is changed.

from liburing.

axboe avatar axboe commented on August 21, 2024

I'd be worried that would turn into an unwieldy mess, and as things are (and have) matured, I expect that not to happen again. If we add a version number, it seems like an invitation to be able to make changes like that and expect the application to just deal with it because "well we did update the version of it", which I don't think is a valid reason at all.

I can see sub-flags being useful, if particular new functionality is available for an opcode. Maybe a version would be useful there, but the problem with a version number is also that people would need to find out what version 2 does that version 1 does not. A named sub-flag at least gives you some idea of what it could be, if it's named appropriately.

from liburing.

axboe avatar axboe commented on August 21, 2024

That said, if a version does become useful, the first __u8 reserved field could just become that later on. It's zero now, so it'll be backwards compatible if that change is made in the future.

from liburing.

Related Issues (20)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.