From 375505a4a8068bf7cb623e18c3aedb831c17fd0e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 7 Apr 2023 16:21:57 -0600 Subject: mnl: set SO_SNDBUF before SO_SNDBUFFORCE Set SO_SNDBUF before SO_SNDBUFFORCE: Unpriviledged user namespace does not have CAP_NET_ADMIN on the host (user_init_ns) namespace. SO_SNDBUF always succeeds in Linux, always try SO_SNDBUFFORCE after it. Moreover, suggest the user to bump socket limits if EMSGSIZE after having see EPERM previously, when calling SO_SNDBUFFORCE. Provide a hint to the user too: # nft -f test.nft netlink: Error: Could not process rule: Message too long Please, rise /proc/sys/net/core/wmem_max on the host namespace. Hint: 4194304 bytes Dave Pfike says: Prior to this patch, nft inside a systemd-nspawn container was failing to install my ruleset (which includes a large-ish map), with the error netlink: Error: Could not process rule: Message too long strace reveals: setsockopt(3, SOL_SOCKET, SO_SNDBUFFORCE, [524288], 4) = -1 EPERM (Operation not permitted) This is despite the nspawn process supposedly having CAP_NET_ADMIN. A web search reveals at least one other user having the same issue: https://old.reddit.com/r/Proxmox/comments/scnoav/lxc_container_debian_11_nftables_geoblocking/ Reported-by: Dave Pifke Signed-off-by: Pablo Neira Ayuso --- src/mnl.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'src/mnl.c') diff --git a/src/mnl.c b/src/mnl.c index 26f943db..ce9e4ee1 100644 --- a/src/mnl.c +++ b/src/mnl.c @@ -245,9 +245,10 @@ void mnl_err_list_free(struct mnl_err *err) xfree(err); } -static void mnl_set_sndbuffer(const struct mnl_socket *nl, - struct nftnl_batch *batch) +static void mnl_set_sndbuffer(struct netlink_ctx *ctx) { + struct mnl_socket *nl = ctx->nft->nf_sock; + struct nftnl_batch *batch = ctx->batch; socklen_t len = sizeof(int); int sndnlbuffsiz = 0; int newbuffsiz; @@ -260,9 +261,15 @@ static void mnl_set_sndbuffer(const struct mnl_socket *nl, return; /* Rise sender buffer length to avoid hitting -EMSGSIZE */ + setsockopt(mnl_socket_get_fd(nl), SOL_SOCKET, SO_SNDBUF, + &newbuffsiz, sizeof(socklen_t)); + + /* unpriviledged containers check for CAP_NET_ADMIN on the init_user_ns. */ if (setsockopt(mnl_socket_get_fd(nl), SOL_SOCKET, SO_SNDBUFFORCE, - &newbuffsiz, sizeof(socklen_t)) < 0) - return; + &newbuffsiz, sizeof(socklen_t)) < 0) { + if (errno == EPERM) + ctx->maybe_emsgsize = newbuffsiz; + } } static unsigned int nlsndbufsiz; @@ -409,7 +416,7 @@ int mnl_batch_talk(struct netlink_ctx *ctx, struct list_head *err_list, .nl_ctx = ctx, }; - mnl_set_sndbuffer(ctx->nft->nf_sock, ctx->batch); + mnl_set_sndbuffer(ctx); mnl_nft_batch_to_msg(ctx, &msg, &snl, iov, iov_len); -- cgit v1.2.3