diff options
Diffstat (limited to 'src/mnl.c')
-rw-r--r-- | src/mnl.c | 1315 |
1 files changed, 1091 insertions, 224 deletions
@@ -2,12 +2,14 @@ * Copyright (c) 2013-2017 Pablo Neira Ayuso <pablo@netfilter.org> * * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. + * it under the terms of the GNU General Public License version 2 (or any + * later) as published by the Free Software Foundation. * * Development of this code funded by Astaro AG (http://www.astaro.com/) */ +#include <nft.h> + #include <libmnl/libmnl.h> #include <libnftnl/common.h> #include <libnftnl/ruleset.h> @@ -22,18 +24,32 @@ #include <libnftnl/udata.h> #include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_hook.h> #include <linux/netfilter/nf_tables.h> #include <mnl.h> -#include <string.h> +#include <cmd.h> #include <net/if.h> #include <sys/socket.h> #include <arpa/inet.h> #include <fcntl.h> #include <errno.h> -#include <stdlib.h> #include <utils.h> #include <nftables.h> +#include <linux/netfilter.h> +#include <linux/netfilter_arp.h> + +struct basehook { + struct list_head list; + const char *module_name; + const char *hookfn; + const char *table; + const char *chain; + int family; + int chain_family; + uint32_t num; + int prio; +}; struct mnl_socket *nft_mnl_socket_open(void) { @@ -52,13 +68,6 @@ struct mnl_socket *nft_mnl_socket_open(void) return nf_sock; } -struct mnl_socket *nft_mnl_socket_reopen(struct mnl_socket *nf_sock) -{ - mnl_socket_close(nf_sock); - - return nft_mnl_socket_open(); -} - uint32_t mnl_seqnum_alloc(unsigned int *seqnum) { return (*seqnum)++; @@ -77,20 +86,31 @@ nft_mnl_recv(struct netlink_ctx *ctx, uint32_t portid, int (*cb)(const struct nlmsghdr *nlh, void *data), void *cb_data) { char buf[NFT_NLMSG_MAXSIZE]; + bool eintr = false; int ret; ret = mnl_socket_recvfrom(ctx->nft->nf_sock, buf, sizeof(buf)); while (ret > 0) { ret = mnl_cb_run(buf, ret, ctx->seqnum, portid, cb, cb_data); - if (ret <= 0) - goto out; + if (ret == 0) + break; + if (ret < 0) { + if (errno == EAGAIN) { + ret = 0; + break; + } + if (errno != EINTR) + break; + /* process all pending messages before reporting EINTR */ + eintr = true; + } ret = mnl_socket_recvfrom(ctx->nft->nf_sock, buf, sizeof(buf)); } -out: - if (ret < 0 && errno == EAGAIN) - return 0; - + if (eintr) { + ret = -1; + errno = EINTR; + } return ret; } @@ -156,11 +176,11 @@ static int check_genid(const struct nlmsghdr *nlh) * Batching */ -/* selected batch page is 256 Kbytes long to load ruleset of - * half a million rules without hitting -EMSGSIZE due to large - * iovec. +/* Selected batch page is 2 Mbytes long to support loading a ruleset of 3.5M + * rules matching on source and destination address as well as input and output + * interfaces. This is what legacy iptables supports. */ -#define BATCH_PAGE_SIZE getpagesize() * 32 +#define BATCH_PAGE_SIZE 2 * 1024 * 1024 struct nftnl_batch *mnl_batch_init(void) { @@ -222,12 +242,13 @@ static void mnl_err_list_node_add(struct list_head *err_list, int error, void mnl_err_list_free(struct mnl_err *err) { list_del(&err->head); - xfree(err); + free(err); } -static void mnl_set_sndbuffer(const struct mnl_socket *nl, - struct nftnl_batch *batch) +static void mnl_set_sndbuffer(struct netlink_ctx *ctx) { + struct mnl_socket *nl = ctx->nft->nf_sock; + struct nftnl_batch *batch = ctx->batch; socklen_t len = sizeof(int); int sndnlbuffsiz = 0; int newbuffsiz; @@ -240,9 +261,15 @@ static void mnl_set_sndbuffer(const struct mnl_socket *nl, return; /* Rise sender buffer length to avoid hitting -EMSGSIZE */ + setsockopt(mnl_socket_get_fd(nl), SOL_SOCKET, SO_SNDBUF, + &newbuffsiz, sizeof(socklen_t)); + + /* unpriviledged containers check for CAP_NET_ADMIN on the init_user_ns. */ if (setsockopt(mnl_socket_get_fd(nl), SOL_SOCKET, SO_SNDBUFFORCE, - &newbuffsiz, sizeof(socklen_t)) < 0) - return; + &newbuffsiz, sizeof(socklen_t)) < 0) { + if (errno == EPERM) + ctx->maybe_emsgsize = newbuffsiz; + } } static unsigned int nlsndbufsiz; @@ -273,24 +300,16 @@ static int mnl_set_rcvbuffer(const struct mnl_socket *nl, socklen_t bufsiz) return ret; } -static size_t mnl_nft_batch_to_msg(struct netlink_ctx *ctx, struct msghdr *msg, - const struct sockaddr_nl *snl, - struct iovec *iov, unsigned int iov_len) +static void mnl_nft_batch_to_msg(struct netlink_ctx *ctx, struct msghdr *msg, + const struct sockaddr_nl *snl, + struct iovec *iov, unsigned int iov_len) { - unsigned int i; - size_t len = 0; - msg->msg_name = (struct sockaddr_nl *)snl; msg->msg_namelen = sizeof(*snl); msg->msg_iov = iov; msg->msg_iovlen = iov_len; nftnl_batch_iovec(ctx->batch, iov, iov_len); - - for (i = 0; i < iov_len; i++) - len += msg->msg_iov[i].iov_len; - - return len; } static ssize_t mnl_nft_socket_sendmsg(struct netlink_ctx *ctx, @@ -366,7 +385,10 @@ static int mnl_batch_extack_cb(const struct nlmsghdr *nlh, void *data) return MNL_CB_ERROR; } -#define NFT_MNL_ECHO_RCVBUFF_DEFAULT (MNL_SOCKET_BUFFER_SIZE * 1024) +#define NFT_MNL_ECHO_RCVBUFF_DEFAULT (MNL_SOCKET_BUFFER_SIZE * 1024U) +#define NFT_MNL_ACK_MAXSIZE ((sizeof(struct nlmsghdr) + \ + sizeof(struct nfgenmsg) + (1 << 16)) + \ + MNL_SOCKET_BUFFER_SIZE) int mnl_batch_talk(struct netlink_ctx *ctx, struct list_head *err_list, uint32_t num_cmds) @@ -374,7 +396,7 @@ int mnl_batch_talk(struct netlink_ctx *ctx, struct list_head *err_list, struct mnl_socket *nl = ctx->nft->nf_sock; int ret, fd = mnl_socket_get_fd(nl), portid = mnl_socket_get_portid(nl); uint32_t iov_len = nftnl_batch_iovec_len(ctx->batch); - char rcv_buf[MNL_SOCKET_BUFFER_SIZE]; + char rcv_buf[NFT_MNL_ACK_MAXSIZE]; const struct sockaddr_nl snl = { .nl_family = AF_NETLINK }; @@ -385,7 +407,6 @@ int mnl_batch_talk(struct netlink_ctx *ctx, struct list_head *err_list, struct iovec iov[iov_len]; struct msghdr msg = {}; unsigned int rcvbufsiz; - size_t batch_size; fd_set readfds; static mnl_cb_t cb_ctl_array[NLMSG_MIN_TYPE] = { [NLMSG_ERROR] = mnl_batch_extack_cb, @@ -395,16 +416,14 @@ int mnl_batch_talk(struct netlink_ctx *ctx, struct list_head *err_list, .nl_ctx = ctx, }; - mnl_set_sndbuffer(ctx->nft->nf_sock, ctx->batch); + mnl_set_sndbuffer(ctx); - batch_size = mnl_nft_batch_to_msg(ctx, &msg, &snl, iov, iov_len); + mnl_nft_batch_to_msg(ctx, &msg, &snl, iov, iov_len); + rcvbufsiz = num_cmds * 1024; if (nft_output_echo(&ctx->nft->output)) { - rcvbufsiz = num_cmds * 1024; if (rcvbufsiz < NFT_MNL_ECHO_RCVBUFF_DEFAULT) rcvbufsiz = NFT_MNL_ECHO_RCVBUFF_DEFAULT; - } else { - rcvbufsiz = num_cmds * div_round_up(batch_size, num_cmds) * 4; } mnl_set_rcvbuffer(ctx->nft->nf_sock, rcvbufsiz); @@ -437,13 +456,55 @@ int mnl_batch_talk(struct netlink_ctx *ctx, struct list_head *err_list, return 0; } +struct mnl_nft_rule_build_ctx { + struct netlink_linearize_ctx *lctx; + struct nlmsghdr *nlh; + struct cmd *cmd; +}; + +static int mnl_nft_expr_build_cb(struct nftnl_expr *nle, void *data) +{ + struct mnl_nft_rule_build_ctx *ctx = data; + struct nlmsghdr *nlh = ctx->nlh; + struct cmd *cmd = ctx->cmd; + struct nft_expr_loc *eloc; + struct nlattr *nest; + + eloc = nft_expr_loc_find(nle, ctx->lctx); + if (eloc) + cmd_add_loc(cmd, nlh->nlmsg_len, eloc->loc); + + nest = mnl_attr_nest_start(nlh, NFTA_LIST_ELEM); + nftnl_expr_build_payload(nlh, nle); + mnl_attr_nest_end(nlh, nest); + + nftnl_rule_del_expr(nle); + nftnl_expr_free(nle); + + return 0; +} + +static void mnl_nft_rule_build_ctx_init(struct mnl_nft_rule_build_ctx *rule_ctx, + struct nlmsghdr *nlh, + struct cmd *cmd, + struct netlink_linearize_ctx *lctx) +{ + memset(rule_ctx, 0, sizeof(*rule_ctx)); + rule_ctx->nlh = nlh; + rule_ctx->cmd = cmd; + rule_ctx->lctx = lctx; +} + int mnl_nft_rule_add(struct netlink_ctx *ctx, struct cmd *cmd, unsigned int flags) { + struct mnl_nft_rule_build_ctx rule_ctx; + struct netlink_linearize_ctx lctx; struct rule *rule = cmd->rule; struct handle *h = &rule->handle; struct nftnl_rule *nlr; struct nlmsghdr *nlh; + struct nlattr *nest; nlr = nftnl_rule_alloc(); if (!nlr) @@ -457,7 +518,8 @@ int mnl_nft_rule_add(struct netlink_ctx *ctx, struct cmd *cmd, if (h->position_id) nftnl_rule_set_u32(nlr, NFTNL_RULE_POSITION_ID, h->position_id); - netlink_linearize_rule(ctx, nlr, rule); + netlink_linearize_init(&lctx, nlr); + netlink_linearize_rule(ctx, rule, &lctx); nlh = nftnl_nlmsg_build_hdr(nftnl_batch_buffer(ctx->batch), NFT_MSG_NEWRULE, cmd->handle.family, @@ -472,8 +534,15 @@ int mnl_nft_rule_add(struct netlink_ctx *ctx, struct cmd *cmd, else mnl_attr_put_strz(nlh, NFTA_RULE_CHAIN, h->chain.name); + mnl_nft_rule_build_ctx_init(&rule_ctx, nlh, cmd, &lctx); + + nest = mnl_attr_nest_start(nlh, NFTA_RULE_EXPRESSIONS); + nftnl_expr_foreach(nlr, mnl_nft_expr_build_cb, &rule_ctx); + mnl_attr_nest_end(nlh, nest); + nftnl_rule_nlmsg_build_payload(nlh, nlr); nftnl_rule_free(nlr); + netlink_linearize_fini(&lctx); mnl_nft_batch_continue(ctx->batch); @@ -482,11 +551,14 @@ int mnl_nft_rule_add(struct netlink_ctx *ctx, struct cmd *cmd, int mnl_nft_rule_replace(struct netlink_ctx *ctx, struct cmd *cmd) { + struct mnl_nft_rule_build_ctx rule_ctx; + struct netlink_linearize_ctx lctx; struct rule *rule = cmd->rule; struct handle *h = &rule->handle; unsigned int flags = 0; struct nftnl_rule *nlr; struct nlmsghdr *nlh; + struct nlattr *nest; if (nft_output_echo(&ctx->nft->output)) flags |= NLM_F_ECHO; @@ -497,7 +569,8 @@ int mnl_nft_rule_replace(struct netlink_ctx *ctx, struct cmd *cmd) nftnl_rule_set_u32(nlr, NFTNL_RULE_FAMILY, h->family); - netlink_linearize_rule(ctx, nlr, rule); + netlink_linearize_init(&lctx, nlr); + netlink_linearize_rule(ctx, rule, &lctx); nlh = nftnl_nlmsg_build_hdr(nftnl_batch_buffer(ctx->batch), NFT_MSG_NEWRULE, cmd->handle.family, @@ -510,8 +583,15 @@ int mnl_nft_rule_replace(struct netlink_ctx *ctx, struct cmd *cmd) cmd_add_loc(cmd, nlh->nlmsg_len, &h->handle.location); mnl_attr_put_u64(nlh, NFTA_RULE_HANDLE, htobe64(h->handle.id)); + mnl_nft_rule_build_ctx_init(&rule_ctx, nlh, cmd, &lctx); + + nest = mnl_attr_nest_start(nlh, NFTA_RULE_EXPRESSIONS); + nftnl_expr_foreach(nlr, mnl_nft_expr_build_cb, &rule_ctx); + mnl_attr_nest_end(nlh, nest); + nftnl_rule_nlmsg_build_payload(nlh, nlr); nftnl_rule_free(nlr); + netlink_linearize_fini(&lctx); mnl_nft_batch_continue(ctx->batch); @@ -520,6 +600,7 @@ int mnl_nft_rule_replace(struct netlink_ctx *ctx, struct cmd *cmd) int mnl_nft_rule_del(struct netlink_ctx *ctx, struct cmd *cmd) { + enum nf_tables_msg_types msg_type = NFT_MSG_DELRULE; struct handle *h = &cmd->handle; struct nftnl_rule *nlr; struct nlmsghdr *nlh; @@ -530,8 +611,11 @@ int mnl_nft_rule_del(struct netlink_ctx *ctx, struct cmd *cmd) nftnl_rule_set_u32(nlr, NFTNL_RULE_FAMILY, h->family); + if (cmd->op == CMD_DESTROY) + msg_type = NFT_MSG_DESTROYRULE; + nlh = nftnl_nlmsg_build_hdr(nftnl_batch_buffer(ctx->batch), - NFT_MSG_DELRULE, + msg_type, nftnl_rule_get_u32(nlr, NFTNL_RULE_FAMILY), 0, ctx->seqnum); @@ -581,20 +665,45 @@ err_free: return MNL_CB_OK; } -struct nftnl_rule_list *mnl_nft_rule_dump(struct netlink_ctx *ctx, - int family) +struct nftnl_rule_list *mnl_nft_rule_dump(struct netlink_ctx *ctx, int family, + const char *table, const char *chain, + uint64_t rule_handle, + bool dump, bool reset) { + uint16_t nl_flags = dump ? NLM_F_DUMP : NLM_F_ACK; char buf[MNL_SOCKET_BUFFER_SIZE]; struct nftnl_rule_list *nlr_list; + struct nftnl_rule *nlr = NULL; struct nlmsghdr *nlh; - int ret; + int msg_type, ret; + + if (reset) + msg_type = NFT_MSG_GETRULE_RESET; + else + msg_type = NFT_MSG_GETRULE; + + if (table) { + nlr = nftnl_rule_alloc(); + if (!nlr) + memory_allocation_error(); + + nftnl_rule_set_str(nlr, NFTNL_RULE_TABLE, table); + if (chain) + nftnl_rule_set_str(nlr, NFTNL_RULE_CHAIN, chain); + if (rule_handle) + nftnl_rule_set_u64(nlr, NFTNL_RULE_HANDLE, rule_handle); + } nlr_list = nftnl_rule_list_alloc(); if (nlr_list == NULL) memory_allocation_error(); - nlh = nftnl_nlmsg_build_hdr(buf, NFT_MSG_GETRULE, family, - NLM_F_DUMP, ctx->seqnum); + nlh = nftnl_nlmsg_build_hdr(buf, msg_type, family, + nl_flags, ctx->seqnum); + if (nlr) { + nftnl_rule_nlmsg_build_payload(nlh, nlr); + nftnl_rule_free(nlr); + } ret = nft_mnl_talk(ctx, nlh, nlh->nlmsg_len, rule_cb, nlr_list); if (ret < 0) @@ -609,17 +718,98 @@ err: /* * Chain */ + +struct nft_dev { + const char *ifname; + const struct location *location; +}; + +static void nft_dev_add(struct nft_dev *dev_array, const struct expr *expr, int i) +{ + unsigned int ifname_len; + char ifname[IFNAMSIZ]; + + ifname_len = div_round_up(expr->len, BITS_PER_BYTE); + memset(ifname, 0, sizeof(ifname)); + mpz_export_data(ifname, expr->value, BYTEORDER_HOST_ENDIAN, ifname_len); + dev_array[i].ifname = xstrdup(ifname); + dev_array[i].location = &expr->location; +} + +static struct nft_dev *nft_dev_array(const struct expr *dev_expr, int *num_devs) +{ + struct nft_dev *dev_array; + int i = 0, len = 1; + struct expr *expr; + + switch (dev_expr->etype) { + case EXPR_SET: + case EXPR_LIST: + list_for_each_entry(expr, &dev_expr->expressions, list) + len++; + + dev_array = xmalloc(sizeof(struct nft_dev) * len); + + list_for_each_entry(expr, &dev_expr->expressions, list) { + nft_dev_add(dev_array, expr, i); + i++; + } + break; + case EXPR_VALUE: + len++; + dev_array = xmalloc(sizeof(struct nft_dev) * len); + nft_dev_add(dev_array, dev_expr, i); + i++; + break; + default: + assert(0); + } + + dev_array[i].ifname = NULL; + *num_devs = i; + + return dev_array; +} + +static void nft_dev_array_free(const struct nft_dev *dev_array) +{ + int i = 0; + + while (dev_array[i].ifname != NULL) + free_const(dev_array[i++].ifname); + + free_const(dev_array); +} + +static void mnl_nft_chain_devs_build(struct nlmsghdr *nlh, struct cmd *cmd) +{ + const struct expr *dev_expr = cmd->chain->dev_expr; + const struct nft_dev *dev_array; + struct nlattr *nest_dev; + int i, num_devs = 0; + + dev_array = nft_dev_array(dev_expr, &num_devs); + if (num_devs == 1) { + cmd_add_loc(cmd, nlh->nlmsg_len, dev_array[0].location); + mnl_attr_put_strz(nlh, NFTA_HOOK_DEV, dev_array[0].ifname); + } else { + nest_dev = mnl_attr_nest_start(nlh, NFTA_HOOK_DEVS); + for (i = 0; i < num_devs; i++) { + cmd_add_loc(cmd, nlh->nlmsg_len, dev_array[i].location); + mnl_attr_put_strz(nlh, NFTA_DEVICE_NAME, dev_array[i].ifname); + mnl_attr_nest_end(nlh, nest_dev); + } + } + nft_dev_array_free(dev_array); +} + int mnl_nft_chain_add(struct netlink_ctx *ctx, struct cmd *cmd, unsigned int flags) { - int priority, policy, i = 0; + struct nftnl_udata_buf *udbuf; struct nftnl_chain *nlc; - unsigned int ifname_len; - const char **dev_array; - char ifname[IFNAMSIZ]; struct nlmsghdr *nlh; - struct expr *expr; - int dev_array_len; + int priority, policy; nlc = nftnl_chain_alloc(); if (nlc == NULL) @@ -632,45 +822,15 @@ int mnl_nft_chain_add(struct netlink_ctx *ctx, struct cmd *cmd, nftnl_chain_set_u32(nlc, NFTNL_CHAIN_FLAGS, CHAIN_F_HW_OFFLOAD); } - if (cmd->chain->flags & CHAIN_F_BASECHAIN) { - nftnl_chain_set_u32(nlc, NFTNL_CHAIN_HOOKNUM, - cmd->chain->hook.num); - mpz_export_data(&priority, - cmd->chain->priority.expr->value, - BYTEORDER_HOST_ENDIAN, sizeof(int)); - nftnl_chain_set_s32(nlc, NFTNL_CHAIN_PRIO, priority); - nftnl_chain_set_str(nlc, NFTNL_CHAIN_TYPE, - cmd->chain->type); - } - if (cmd->chain->dev_expr) { - dev_array = xmalloc(sizeof(char *) * 8); - dev_array_len = 8; - list_for_each_entry(expr, &cmd->chain->dev_expr->expressions, list) { - ifname_len = div_round_up(expr->len, BITS_PER_BYTE); - memset(ifname, 0, sizeof(ifname)); - mpz_export_data(ifname, expr->value, - BYTEORDER_HOST_ENDIAN, - ifname_len); - dev_array[i++] = xstrdup(ifname); - if (i == dev_array_len) { - dev_array_len *= 2; - dev_array = xrealloc(dev_array, - dev_array_len * sizeof(char *)); - } - } - - dev_array[i] = NULL; - if (i == 1) - nftnl_chain_set_str(nlc, NFTNL_CHAIN_DEV, dev_array[0]); - else if (i > 1) - nftnl_chain_set_data(nlc, NFTNL_CHAIN_DEVICES, dev_array, - sizeof(char *) * dev_array_len); - - i = 0; - while (dev_array[i] != NULL) - xfree(dev_array[i++]); - - xfree(dev_array); + if (cmd->chain->comment) { + udbuf = nftnl_udata_buf_alloc(NFT_USERDATA_MAXLEN); + if (!udbuf) + memory_allocation_error(); + if (!nftnl_udata_put_strz(udbuf, NFTNL_UDATA_CHAIN_COMMENT, cmd->chain->comment)) + memory_allocation_error(); + nftnl_chain_set_data(nlc, NFTNL_CHAIN_USERDATA, nftnl_udata_buf_data(udbuf), + nftnl_udata_buf_len(udbuf)); + nftnl_udata_buf_free(udbuf); } } netlink_dump_chain(nlc, ctx); @@ -703,7 +863,33 @@ int mnl_nft_chain_add(struct netlink_ctx *ctx, struct cmd *cmd, mnl_attr_put_u32(nlh, NFTA_CHAIN_POLICY, htonl(policy)); } + nftnl_chain_unset(nlc, NFTNL_CHAIN_TYPE); + nftnl_chain_nlmsg_build_payload(nlh, nlc); + + if (cmd->chain && cmd->chain->flags & CHAIN_F_BASECHAIN) { + struct nlattr *nest; + + if (cmd->chain->type.str) { + cmd_add_loc(cmd, nlh->nlmsg_len, &cmd->chain->type.loc); + mnl_attr_put_strz(nlh, NFTA_CHAIN_TYPE, cmd->chain->type.str); + } + + nest = mnl_attr_nest_start(nlh, NFTA_CHAIN_HOOK); + + if (cmd->chain->type.str) { + mnl_attr_put_u32(nlh, NFTA_HOOK_HOOKNUM, htonl(cmd->chain->hook.num)); + mpz_export_data(&priority, cmd->chain->priority.expr->value, + BYTEORDER_HOST_ENDIAN, sizeof(int)); + mnl_attr_put_u32(nlh, NFTA_HOOK_PRIORITY, htonl(priority)); + } + + if (cmd->chain && cmd->chain->dev_expr) + mnl_nft_chain_devs_build(nlh, cmd); + + mnl_attr_nest_end(nlh, nest); + } + nftnl_chain_free(nlc); mnl_nft_batch_continue(ctx->batch); @@ -743,6 +929,7 @@ int mnl_nft_chain_rename(struct netlink_ctx *ctx, const struct cmd *cmd, int mnl_nft_chain_del(struct netlink_ctx *ctx, struct cmd *cmd) { + enum nf_tables_msg_types msg_type = NFT_MSG_DELCHAIN; struct nftnl_chain *nlc; struct nlmsghdr *nlh; @@ -752,8 +939,11 @@ int mnl_nft_chain_del(struct netlink_ctx *ctx, struct cmd *cmd) nftnl_chain_set_u32(nlc, NFTNL_CHAIN_FAMILY, cmd->handle.family); + if (cmd->op == CMD_DESTROY) + msg_type = NFT_MSG_DESTROYCHAIN; + nlh = nftnl_nlmsg_build_hdr(nftnl_batch_buffer(ctx->batch), - NFT_MSG_DELCHAIN, + msg_type, cmd->handle.family, 0, ctx->seqnum); @@ -768,6 +958,15 @@ int mnl_nft_chain_del(struct netlink_ctx *ctx, struct cmd *cmd) htobe64(cmd->handle.handle.id)); } + if (cmd->op == CMD_DELETE && + cmd->chain && cmd->chain->dev_expr) { + struct nlattr *nest; + + nest = mnl_attr_nest_start(nlh, NFTA_CHAIN_HOOK); + mnl_nft_chain_devs_build(nlh, cmd); + mnl_attr_nest_end(nlh, nest); + } + nftnl_chain_nlmsg_build_payload(nlh, nlc); nftnl_chain_free(nlc); @@ -800,10 +999,12 @@ err_free: } struct nftnl_chain_list *mnl_nft_chain_dump(struct netlink_ctx *ctx, - int family) + int family, const char *table, + const char *chain) { char buf[MNL_SOCKET_BUFFER_SIZE]; struct nftnl_chain_list *nlc_list; + struct nftnl_chain *nlc = NULL; struct nlmsghdr *nlh; int ret; @@ -811,11 +1012,24 @@ struct nftnl_chain_list *mnl_nft_chain_dump(struct netlink_ctx *ctx, if (nlc_list == NULL) memory_allocation_error(); + if (table && chain) { + nlc = nftnl_chain_alloc(); + if (!nlc) + memory_allocation_error(); + + nftnl_chain_set_str(nlc, NFTNL_CHAIN_TABLE, table); + nftnl_chain_set_str(nlc, NFTNL_CHAIN_NAME, chain); + } + nlh = nftnl_nlmsg_build_hdr(buf, NFT_MSG_GETCHAIN, family, - NLM_F_DUMP, ctx->seqnum); + nlc ? NLM_F_ACK : NLM_F_DUMP, ctx->seqnum); + if (nlc) { + nftnl_chain_nlmsg_build_payload(nlh, nlc); + nftnl_chain_free(nlc); + } ret = nft_mnl_talk(ctx, nlh, nlh->nlmsg_len, chain_cb, nlc_list); - if (ret < 0) + if (ret < 0 && errno != ENOENT) goto err; return nlc_list; @@ -830,6 +1044,7 @@ err: int mnl_nft_table_add(struct netlink_ctx *ctx, struct cmd *cmd, unsigned int flags) { + struct nftnl_udata_buf *udbuf; struct nftnl_table *nlt; struct nlmsghdr *nlh; @@ -838,10 +1053,22 @@ int mnl_nft_table_add(struct netlink_ctx *ctx, struct cmd *cmd, memory_allocation_error(); nftnl_table_set_u32(nlt, NFTNL_TABLE_FAMILY, cmd->handle.family); - if (cmd->table) + if (cmd->table) { nftnl_table_set_u32(nlt, NFTNL_TABLE_FLAGS, cmd->table->flags); - else + + if (cmd->table->comment) { + udbuf = nftnl_udata_buf_alloc(NFT_USERDATA_MAXLEN); + if (!udbuf) + memory_allocation_error(); + if (!nftnl_udata_put_strz(udbuf, NFTNL_UDATA_TABLE_COMMENT, cmd->table->comment)) + memory_allocation_error(); + nftnl_table_set_data(nlt, NFTNL_TABLE_USERDATA, nftnl_udata_buf_data(udbuf), + nftnl_udata_buf_len(udbuf)); + nftnl_udata_buf_free(udbuf); + } + } else { nftnl_table_set_u32(nlt, NFTNL_TABLE_FLAGS, 0); + } nlh = nftnl_nlmsg_build_hdr(nftnl_batch_buffer(ctx->batch), NFT_MSG_NEWTABLE, @@ -860,6 +1087,7 @@ int mnl_nft_table_add(struct netlink_ctx *ctx, struct cmd *cmd, int mnl_nft_table_del(struct netlink_ctx *ctx, struct cmd *cmd) { + enum nf_tables_msg_types msg_type = NFT_MSG_DELTABLE; struct nftnl_table *nlt; struct nlmsghdr *nlh; @@ -869,17 +1097,18 @@ int mnl_nft_table_del(struct netlink_ctx *ctx, struct cmd *cmd) nftnl_table_set_u32(nlt, NFTNL_TABLE_FAMILY, cmd->handle.family); - nlh = nftnl_nlmsg_build_hdr(nftnl_batch_buffer(ctx->batch), - NFT_MSG_DELTABLE, - cmd->handle.family, - 0, ctx->seqnum); + if (cmd->op == CMD_DESTROY) + msg_type = NFT_MSG_DESTROYTABLE; + + nlh = nftnl_nlmsg_build_hdr(nftnl_batch_buffer(ctx->batch), msg_type, + cmd->handle.family, 0, ctx->seqnum); if (cmd->handle.table.name) { cmd_add_loc(cmd, nlh->nlmsg_len, &cmd->handle.table.location); mnl_attr_put_strz(nlh, NFTA_TABLE_NAME, cmd->handle.table.name); } else if (cmd->handle.handle.id) { cmd_add_loc(cmd, nlh->nlmsg_len, &cmd->handle.handle.location); - mnl_attr_put_u64(nlh, NFTA_TABLE_NAME, + mnl_attr_put_u64(nlh, NFTA_TABLE_HANDLE, htobe64(cmd->handle.handle.id)); } nftnl_table_nlmsg_build_payload(nlh, nlt); @@ -914,10 +1143,12 @@ err_free: } struct nftnl_table_list *mnl_nft_table_dump(struct netlink_ctx *ctx, - int family) + int family, const char *table) { char buf[MNL_SOCKET_BUFFER_SIZE]; struct nftnl_table_list *nlt_list; + struct nftnl_table *nlt = NULL; + int flags = NLM_F_DUMP; struct nlmsghdr *nlh; int ret; @@ -925,11 +1156,25 @@ struct nftnl_table_list *mnl_nft_table_dump(struct netlink_ctx *ctx, if (nlt_list == NULL) return NULL; + if (table) { + nlt = nftnl_table_alloc(); + if (!nlt) + memory_allocation_error(); + + nftnl_table_set_u32(nlt, NFTNL_TABLE_FAMILY, family); + nftnl_table_set_str(nlt, NFTNL_TABLE_NAME, table); + flags = NLM_F_ACK; + } + nlh = nftnl_nlmsg_build_hdr(buf, NFT_MSG_GETTABLE, family, - NLM_F_DUMP, ctx->seqnum); + flags, ctx->seqnum); + if (nlt) { + nftnl_table_nlmsg_build_payload(nlh, nlt); + nftnl_table_free(nlt); + } ret = nft_mnl_talk(ctx, nlh, nlh->nlmsg_len, table_cb, nlt_list); - if (ret < 0) + if (ret < 0 && errno != ENOENT) goto err; return nlt_list; @@ -945,9 +1190,7 @@ static void set_key_expression(struct netlink_ctx *ctx, { struct nftnl_udata *nest1, *nest2; - if (expr->flags & EXPR_F_CONSTANT || - set_flags & NFT_SET_ANONYMOUS || - !expr_ops(expr)->build_udata) + if (!expr_ops(expr)->build_udata) return; nest1 = nftnl_udata_nest_start(udbuf, type); @@ -969,6 +1212,8 @@ int mnl_nft_set_add(struct netlink_ctx *ctx, struct cmd *cmd, struct set *set = cmd->set; struct nftnl_set *nls; struct nlmsghdr *nlh; + struct stmt *stmt; + int num_stmts = 0; nls = nftnl_set_alloc(); if (!nls) @@ -1042,13 +1287,27 @@ int mnl_nft_set_add(struct netlink_ctx *ctx, struct cmd *cmd, sizeof(set->desc.field_len[0])); } + if (set->comment) { + if (!nftnl_udata_put_strz(udbuf, NFTNL_UDATA_SET_COMMENT, set->comment)) + memory_allocation_error(); + } + nftnl_set_set_data(nls, NFTNL_SET_USERDATA, nftnl_udata_buf_data(udbuf), nftnl_udata_buf_len(udbuf)); nftnl_udata_buf_free(udbuf); - if (set->stmt) { - nftnl_set_set_data(nls, NFTNL_SET_EXPR, - netlink_gen_stmt_stateful(set->stmt), 0); + list_for_each_entry(stmt, &set->stmt_list, list) + num_stmts++; + + if (num_stmts == 1) { + list_for_each_entry(stmt, &set->stmt_list, list) { + nftnl_set_set_data(nls, NFTNL_SET_EXPR, + netlink_gen_stmt_stateful(stmt), 0); + break; + } + } else if (num_stmts > 1) { + list_for_each_entry(stmt, &set->stmt_list, list) + nftnl_set_add_expr(nls, netlink_gen_stmt_stateful(stmt)); } netlink_dump_set(nls, ctx); @@ -1076,6 +1335,7 @@ int mnl_nft_set_add(struct netlink_ctx *ctx, struct cmd *cmd, int mnl_nft_set_del(struct netlink_ctx *ctx, struct cmd *cmd) { + enum nf_tables_msg_types msg_type = NFT_MSG_DELSET; const struct handle *h = &cmd->handle; struct nftnl_set *nls; struct nlmsghdr *nlh; @@ -1086,8 +1346,11 @@ int mnl_nft_set_del(struct netlink_ctx *ctx, struct cmd *cmd) nftnl_set_set_u32(nls, NFTNL_SET_FAMILY, h->family); + if (cmd->op == CMD_DESTROY) + msg_type = NFT_MSG_DESTROYSET; + nlh = nftnl_nlmsg_build_hdr(nftnl_batch_buffer(ctx->batch), - NFT_MSG_DELSET, + msg_type, h->family, 0, ctx->seqnum); @@ -1134,10 +1397,12 @@ err_free: } struct nftnl_set_list * -mnl_nft_set_dump(struct netlink_ctx *ctx, int family, const char *table) +mnl_nft_set_dump(struct netlink_ctx *ctx, int family, + const char *table, const char *set) { char buf[MNL_SOCKET_BUFFER_SIZE]; struct nftnl_set_list *nls_list; + int flags = NLM_F_DUMP; struct nlmsghdr *nlh; struct nftnl_set *s; int ret; @@ -1146,10 +1411,15 @@ mnl_nft_set_dump(struct netlink_ctx *ctx, int family, const char *table) if (s == NULL) memory_allocation_error(); - nlh = nftnl_nlmsg_build_hdr(buf, NFT_MSG_GETSET, family, - NLM_F_DUMP, ctx->seqnum); if (table != NULL) nftnl_set_set_str(s, NFTNL_SET_TABLE, table); + if (set) { + nftnl_set_set_str(s, NFTNL_SET_NAME, set); + flags = NLM_F_ACK; + } + + nlh = nftnl_nlmsg_build_hdr(buf, NFT_MSG_GETSET, family, + flags, ctx->seqnum); nftnl_set_nlmsg_build_payload(nlh, s); nftnl_set_free(s); @@ -1158,7 +1428,7 @@ mnl_nft_set_dump(struct netlink_ctx *ctx, int family, const char *table) memory_allocation_error(); ret = nft_mnl_talk(ctx, nlh, nlh->nlmsg_len, set_cb, nls_list); - if (ret < 0) + if (ret < 0 && errno != ENOENT) goto err; return nls_list; @@ -1171,6 +1441,7 @@ int mnl_nft_obj_add(struct netlink_ctx *ctx, struct cmd *cmd, unsigned int flags) { struct obj *obj = cmd->object; + struct nftnl_udata_buf *udbuf; struct nftnl_obj *nlo; struct nlmsghdr *nlh; @@ -1181,6 +1452,17 @@ int mnl_nft_obj_add(struct netlink_ctx *ctx, struct cmd *cmd, nftnl_obj_set_u32(nlo, NFTNL_OBJ_FAMILY, cmd->handle.family); nftnl_obj_set_u32(nlo, NFTNL_OBJ_TYPE, obj->type); + if (obj->comment) { + udbuf = nftnl_udata_buf_alloc(NFT_USERDATA_MAXLEN); + if (!udbuf) + memory_allocation_error(); + if (!nftnl_udata_put_strz(udbuf, NFTNL_UDATA_OBJ_COMMENT, obj->comment)) + memory_allocation_error(); + nftnl_obj_set_data(nlo, NFTNL_OBJ_USERDATA, nftnl_udata_buf_data(udbuf), + nftnl_udata_buf_len(udbuf)); + nftnl_udata_buf_free(udbuf); + } + switch (obj->type) { case NFT_OBJECT_COUNTER: nftnl_obj_set_u64(nlo, NFTNL_OBJ_CTR_PKTS, @@ -1272,6 +1554,7 @@ int mnl_nft_obj_add(struct netlink_ctx *ctx, struct cmd *cmd, int mnl_nft_obj_del(struct netlink_ctx *ctx, struct cmd *cmd, int type) { + enum nf_tables_msg_types msg_type = NFT_MSG_DELOBJ; struct nftnl_obj *nlo; struct nlmsghdr *nlh; @@ -1282,8 +1565,11 @@ int mnl_nft_obj_del(struct netlink_ctx *ctx, struct cmd *cmd, int type) nftnl_obj_set_u32(nlo, NFTNL_OBJ_FAMILY, cmd->handle.family); nftnl_obj_set_u32(nlo, NFTNL_OBJ_TYPE, type); + if (cmd->op == CMD_DESTROY) + msg_type = NFT_MSG_DESTROYOBJ; + nlh = nftnl_nlmsg_build_hdr(nftnl_batch_buffer(ctx->batch), - NFT_MSG_DELOBJ, cmd->handle.family, + msg_type, cmd->handle.family, 0, ctx->seqnum); cmd_add_loc(cmd, nlh->nlmsg_len, &cmd->handle.table.location); @@ -1388,39 +1674,114 @@ static int set_elem_cb(const struct nlmsghdr *nlh, void *data) return MNL_CB_OK; } -static int mnl_nft_setelem_batch(struct nftnl_set *nls, +static bool mnl_nft_attr_nest_overflow(struct nlmsghdr *nlh, + const struct nlattr *from, + const struct nlattr *to) +{ + int len = (void *)to + to->nla_len - (void *)from; + + /* The attribute length field is 16 bits long, thus the maximum payload + * that an attribute can convey is UINT16_MAX. In case of overflow, + * discard the last attribute that did not fit into the nest. + */ + if (len > UINT16_MAX) { + nlh->nlmsg_len -= to->nla_len; + return true; + } + return false; +} + +static void netlink_dump_setelem(const struct nftnl_set_elem *nlse, + struct netlink_ctx *ctx) +{ + FILE *fp = ctx->nft->output.output_fp; + char buf[4096]; + + if (!(ctx->nft->debug_mask & NFT_DEBUG_NETLINK) || !fp) + return; + + nftnl_set_elem_snprintf(buf, sizeof(buf), nlse, NFTNL_OUTPUT_DEFAULT, 0); + fprintf(fp, "\t%s", buf); +} + +static void netlink_dump_setelem_done(struct netlink_ctx *ctx) +{ + FILE *fp = ctx->nft->output.output_fp; + + if (!(ctx->nft->debug_mask & NFT_DEBUG_NETLINK) || !fp) + return; + + fprintf(fp, "\n"); +} + +static int mnl_nft_setelem_batch(const struct nftnl_set *nls, struct cmd *cmd, struct nftnl_batch *batch, - enum nf_tables_msg_types cmd, - unsigned int flags, uint32_t seqnum) + enum nf_tables_msg_types msg_type, + unsigned int flags, uint32_t seqnum, + const struct expr *set, + struct netlink_ctx *ctx) { + struct nlattr *nest1, *nest2; + struct nftnl_set_elem *nlse; struct nlmsghdr *nlh; - struct nftnl_set_elems_iter *iter; - int ret; - - iter = nftnl_set_elems_iter_create(nls); - if (iter == NULL) - memory_allocation_error(); + struct expr *expr = NULL; + int i = 0; - if (cmd == NFT_MSG_NEWSETELEM) + if (msg_type == NFT_MSG_NEWSETELEM) flags |= NLM_F_CREATE; - while (nftnl_set_elems_iter_cur(iter)) { - nlh = nftnl_nlmsg_build_hdr(nftnl_batch_buffer(batch), cmd, - nftnl_set_get_u32(nls, NFTNL_SET_FAMILY), - flags, seqnum); - ret = nftnl_set_elems_nlmsg_build_payload_iter(nlh, iter); - mnl_nft_batch_continue(batch); - if (ret <= 0) - break; + if (set) + expr = list_first_entry(&set->expressions, struct expr, list); + +next: + nlh = nftnl_nlmsg_build_hdr(nftnl_batch_buffer(batch), msg_type, + nftnl_set_get_u32(nls, NFTNL_SET_FAMILY), + flags, seqnum); + + if (nftnl_set_is_set(nls, NFTNL_SET_TABLE)) { + mnl_attr_put_strz(nlh, NFTA_SET_ELEM_LIST_TABLE, + nftnl_set_get_str(nls, NFTNL_SET_TABLE)); } + if (nftnl_set_is_set(nls, NFTNL_SET_NAME)) { + mnl_attr_put_strz(nlh, NFTA_SET_ELEM_LIST_SET, + nftnl_set_get_str(nls, NFTNL_SET_NAME)); + } + if (nftnl_set_is_set(nls, NFTNL_SET_ID)) { + mnl_attr_put_u32(nlh, NFTA_SET_ELEM_LIST_SET_ID, + htonl(nftnl_set_get_u32(nls, NFTNL_SET_ID))); + } + + if (!set || list_empty(&set->expressions)) + return 0; - nftnl_set_elems_iter_destroy(iter); + assert(expr); + nest1 = mnl_attr_nest_start(nlh, NFTA_SET_ELEM_LIST_ELEMENTS); + list_for_each_entry_from(expr, &set->expressions, list) { + nlse = alloc_nftnl_setelem(set, expr); + + cmd_add_loc(cmd, nlh->nlmsg_len, &expr->location); + nest2 = mnl_attr_nest_start(nlh, ++i); + nftnl_set_elem_nlmsg_build_payload(nlh, nlse); + mnl_attr_nest_end(nlh, nest2); + + netlink_dump_setelem(nlse, ctx); + nftnl_set_elem_free(nlse); + if (mnl_nft_attr_nest_overflow(nlh, nest1, nest2)) { + mnl_attr_nest_end(nlh, nest1); + mnl_nft_batch_continue(batch); + goto next; + } + } + mnl_attr_nest_end(nlh, nest1); + mnl_nft_batch_continue(batch); + netlink_dump_setelem_done(ctx); return 0; } -int mnl_nft_setelem_add(struct netlink_ctx *ctx, const struct set *set, - const struct expr *expr, unsigned int flags) +int mnl_nft_setelem_add(struct netlink_ctx *ctx, struct cmd *cmd, + const struct set *set, const struct expr *expr, + unsigned int flags) { const struct handle *h = &set->handle; struct nftnl_set *nls; @@ -1435,12 +1796,14 @@ int mnl_nft_setelem_add(struct netlink_ctx *ctx, const struct set *set, nftnl_set_set_str(nls, NFTNL_SET_NAME, h->set.name); if (h->set_id) nftnl_set_set_u32(nls, NFTNL_SET_ID, h->set_id); + if (set_is_datamap(set->flags)) + nftnl_set_set_u32(nls, NFTNL_SET_DATA_TYPE, + dtype_map_to_kernel(set->data->dtype)); - alloc_setelem_cache(expr, nls); netlink_dump_set(nls, ctx); - err = mnl_nft_setelem_batch(nls, ctx->batch, NFT_MSG_NEWSETELEM, flags, - ctx->seqnum); + err = mnl_nft_setelem_batch(nls, cmd, ctx->batch, NFT_MSG_NEWSETELEM, + flags, ctx->seqnum, expr, ctx); nftnl_set_free(nls); return err; @@ -1476,9 +1839,10 @@ int mnl_nft_setelem_flush(struct netlink_ctx *ctx, const struct cmd *cmd) return 0; } -int mnl_nft_setelem_del(struct netlink_ctx *ctx, const struct cmd *cmd) +int mnl_nft_setelem_del(struct netlink_ctx *ctx, struct cmd *cmd, + const struct handle *h, const struct expr *init) { - const struct handle *h = &cmd->handle; + enum nf_tables_msg_types msg_type = NFT_MSG_DELSETELEM; struct nftnl_set *nls; int err; @@ -1493,26 +1857,34 @@ int mnl_nft_setelem_del(struct netlink_ctx *ctx, const struct cmd *cmd) else if (h->handle.id) nftnl_set_set_u64(nls, NFTNL_SET_HANDLE, h->handle.id); - if (cmd->expr) - alloc_setelem_cache(cmd->expr, nls); netlink_dump_set(nls, ctx); - err = mnl_nft_setelem_batch(nls, ctx->batch, NFT_MSG_DELSETELEM, 0, - ctx->seqnum); + if (cmd->op == CMD_DESTROY) + msg_type = NFT_MSG_DESTROYSETELEM; + + err = mnl_nft_setelem_batch(nls, cmd, ctx->batch, msg_type, 0, + ctx->seqnum, init, ctx); nftnl_set_free(nls); return err; } struct nftnl_set *mnl_nft_setelem_get_one(struct netlink_ctx *ctx, - struct nftnl_set *nls_in) + struct nftnl_set *nls_in, + bool reset) { char buf[MNL_SOCKET_BUFFER_SIZE]; struct nftnl_set *nls_out; struct nlmsghdr *nlh; + int msg_type; int err; - nlh = nftnl_nlmsg_build_hdr(buf, NFT_MSG_GETSETELEM, + if (reset) + msg_type = NFT_MSG_GETSETELEM_RESET; + else + msg_type = NFT_MSG_GETSETELEM; + + nlh = nftnl_nlmsg_build_hdr(buf, msg_type, nftnl_set_get_u32(nls_in, NFTNL_SET_FAMILY), NLM_F_ACK, ctx->seqnum); nftnl_set_elems_nlmsg_build_payload(nlh, nls_in); @@ -1535,12 +1907,19 @@ struct nftnl_set *mnl_nft_setelem_get_one(struct netlink_ctx *ctx, return nls_out; } -int mnl_nft_setelem_get(struct netlink_ctx *ctx, struct nftnl_set *nls) +int mnl_nft_setelem_get(struct netlink_ctx *ctx, struct nftnl_set *nls, + bool reset) { char buf[MNL_SOCKET_BUFFER_SIZE]; struct nlmsghdr *nlh; + int msg_type; + + if (reset) + msg_type = NFT_MSG_GETSETELEM_RESET; + else + msg_type = NFT_MSG_GETSETELEM; - nlh = nftnl_nlmsg_build_hdr(buf, NFT_MSG_GETSETELEM, + nlh = nftnl_nlmsg_build_hdr(buf, msg_type, nftnl_set_get_u32(nls, NFTNL_SET_FAMILY), NLM_F_DUMP, ctx->seqnum); nftnl_set_elems_nlmsg_build_payload(nlh, nls); @@ -1572,11 +1951,13 @@ err_free: } struct nftnl_flowtable_list * -mnl_nft_flowtable_dump(struct netlink_ctx *ctx, int family, const char *table) +mnl_nft_flowtable_dump(struct netlink_ctx *ctx, int family, + const char *table, const char *ft) { struct nftnl_flowtable_list *nln_list; char buf[MNL_SOCKET_BUFFER_SIZE]; struct nftnl_flowtable *n; + int flags = NLM_F_DUMP; struct nlmsghdr *nlh; int ret; @@ -1584,10 +1965,14 @@ mnl_nft_flowtable_dump(struct netlink_ctx *ctx, int family, const char *table) if (n == NULL) memory_allocation_error(); - nlh = nftnl_nlmsg_build_hdr(buf, NFT_MSG_GETFLOWTABLE, family, - NLM_F_DUMP, ctx->seqnum); if (table != NULL) nftnl_flowtable_set_str(n, NFTNL_FLOWTABLE_TABLE, table); + if (ft) { + nftnl_flowtable_set_str(n, NFTNL_FLOWTABLE_NAME, ft); + flags = NLM_F_ACK; + } + nlh = nftnl_nlmsg_build_hdr(buf, NFT_MSG_GETFLOWTABLE, family, + flags, ctx->seqnum); nftnl_flowtable_nlmsg_build_payload(nlh, n); nftnl_flowtable_free(n); @@ -1596,7 +1981,7 @@ mnl_nft_flowtable_dump(struct netlink_ctx *ctx, int family, const char *table) memory_allocation_error(); ret = nft_mnl_talk(ctx, nlh, nlh->nlmsg_len, flowtable_cb, nln_list); - if (ret < 0) + if (ret < 0 && errno != ENOENT) goto err; return nln_list; @@ -1605,48 +1990,30 @@ err: return NULL; } -static const char **nft_flowtable_dev_array(struct cmd *cmd) +static void mnl_nft_ft_devs_build(struct nlmsghdr *nlh, struct cmd *cmd) { - unsigned int ifname_len; - const char **dev_array; - char ifname[IFNAMSIZ]; - int i = 0, len = 1; - struct expr *expr; - - list_for_each_entry(expr, &cmd->flowtable->dev_expr->expressions, list) - len++; - - dev_array = xmalloc(sizeof(char *) * len); - - list_for_each_entry(expr, &cmd->flowtable->dev_expr->expressions, list) { - ifname_len = div_round_up(expr->len, BITS_PER_BYTE); - memset(ifname, 0, sizeof(ifname)); - mpz_export_data(ifname, expr->value, BYTEORDER_HOST_ENDIAN, - ifname_len); - dev_array[i++] = xstrdup(ifname); + const struct expr *dev_expr = cmd->flowtable->dev_expr; + const struct nft_dev *dev_array; + struct nlattr *nest_dev; + int i, num_devs= 0; + + dev_array = nft_dev_array(dev_expr, &num_devs); + nest_dev = mnl_attr_nest_start(nlh, NFTA_FLOWTABLE_HOOK_DEVS); + for (i = 0; i < num_devs; i++) { + cmd_add_loc(cmd, nlh->nlmsg_len, dev_array[i].location); + mnl_attr_put_strz(nlh, NFTA_DEVICE_NAME, dev_array[i].ifname); } - dev_array[i] = NULL; - - return dev_array; -} - -static void nft_flowtable_dev_array_free(const char **dev_array) -{ - int i = 0; - - while (dev_array[i] != NULL) - xfree(dev_array[i++]); - - free(dev_array); + mnl_attr_nest_end(nlh, nest_dev); + nft_dev_array_free(dev_array); } int mnl_nft_flowtable_add(struct netlink_ctx *ctx, struct cmd *cmd, unsigned int flags) { struct nftnl_flowtable *flo; - const char **dev_array; struct nlmsghdr *nlh; + struct nlattr *nest; int priority; flo = nftnl_flowtable_alloc(); @@ -1656,24 +2023,6 @@ int mnl_nft_flowtable_add(struct netlink_ctx *ctx, struct cmd *cmd, nftnl_flowtable_set_u32(flo, NFTNL_FLOWTABLE_FAMILY, cmd->handle.family); - if (cmd->flowtable->hook.name) { - nftnl_flowtable_set_u32(flo, NFTNL_FLOWTABLE_HOOKNUM, - cmd->flowtable->hook.num); - mpz_export_data(&priority, cmd->flowtable->priority.expr->value, - BYTEORDER_HOST_ENDIAN, sizeof(int)); - nftnl_flowtable_set_u32(flo, NFTNL_FLOWTABLE_PRIO, priority); - } else { - nftnl_flowtable_set_u32(flo, NFTNL_FLOWTABLE_HOOKNUM, 0); - nftnl_flowtable_set_u32(flo, NFTNL_FLOWTABLE_PRIO, 0); - } - - if (cmd->flowtable->dev_expr) { - dev_array = nft_flowtable_dev_array(cmd); - nftnl_flowtable_set_data(flo, NFTNL_FLOWTABLE_DEVICES, - dev_array, 0); - nft_flowtable_dev_array_free(dev_array); - } - nftnl_flowtable_set_u32(flo, NFTNL_FLOWTABLE_FLAGS, cmd->flowtable->flags); @@ -1689,6 +2038,21 @@ int mnl_nft_flowtable_add(struct netlink_ctx *ctx, struct cmd *cmd, mnl_attr_put_strz(nlh, NFTA_FLOWTABLE_NAME, cmd->handle.flowtable.name); nftnl_flowtable_nlmsg_build_payload(nlh, flo); + + nest = mnl_attr_nest_start(nlh, NFTA_FLOWTABLE_HOOK); + + if (cmd->flowtable && cmd->flowtable->priority.expr) { + mnl_attr_put_u32(nlh, NFTA_FLOWTABLE_HOOK_NUM, htonl(cmd->flowtable->hook.num)); + mpz_export_data(&priority, cmd->flowtable->priority.expr->value, + BYTEORDER_HOST_ENDIAN, sizeof(int)); + mnl_attr_put_u32(nlh, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(priority)); + } + + if (cmd->flowtable->dev_expr) + mnl_nft_ft_devs_build(nlh, cmd); + + mnl_attr_nest_end(nlh, nest); + nftnl_flowtable_free(flo); mnl_nft_batch_continue(ctx->batch); @@ -1698,9 +2062,10 @@ int mnl_nft_flowtable_add(struct netlink_ctx *ctx, struct cmd *cmd, int mnl_nft_flowtable_del(struct netlink_ctx *ctx, struct cmd *cmd) { + enum nf_tables_msg_types msg_type = NFT_MSG_DELFLOWTABLE; struct nftnl_flowtable *flo; - const char **dev_array; struct nlmsghdr *nlh; + struct nlattr *nest; flo = nftnl_flowtable_alloc(); if (!flo) @@ -1709,18 +2074,11 @@ int mnl_nft_flowtable_del(struct netlink_ctx *ctx, struct cmd *cmd) nftnl_flowtable_set_u32(flo, NFTNL_FLOWTABLE_FAMILY, cmd->handle.family); - if (cmd->flowtable && cmd->flowtable->dev_expr) { - nftnl_flowtable_set_u32(flo, NFTNL_FLOWTABLE_HOOKNUM, 0); - nftnl_flowtable_set_u32(flo, NFTNL_FLOWTABLE_PRIO, 0); - - dev_array = nft_flowtable_dev_array(cmd); - nftnl_flowtable_set_data(flo, NFTNL_FLOWTABLE_DEVICES, - dev_array, 0); - nft_flowtable_dev_array_free(dev_array); - } + if (cmd->op == CMD_DESTROY) + msg_type = NFT_MSG_DESTROYFLOWTABLE; nlh = nftnl_nlmsg_build_hdr(nftnl_batch_buffer(ctx->batch), - NFT_MSG_DELFLOWTABLE, cmd->handle.family, + msg_type, cmd->handle.family, 0, ctx->seqnum); cmd_add_loc(cmd, nlh->nlmsg_len, &cmd->handle.table.location); @@ -1738,6 +2096,14 @@ int mnl_nft_flowtable_del(struct netlink_ctx *ctx, struct cmd *cmd) } nftnl_flowtable_nlmsg_build_payload(nlh, flo); + + if (cmd->op == CMD_DELETE && + cmd->flowtable && cmd->flowtable->dev_expr) { + nest = mnl_attr_nest_start(nlh, NFTA_FLOWTABLE_HOOK); + mnl_nft_ft_devs_build(nlh, cmd); + mnl_attr_nest_end(nlh, nest); + } + nftnl_flowtable_free(flo); mnl_nft_batch_continue(ctx->batch); @@ -1756,7 +2122,7 @@ int mnl_nft_event_listener(struct mnl_socket *nf_sock, unsigned int debug_mask, void *cb_data) { /* Set netlink socket buffer size to 16 Mbytes to reduce chances of - * message loss due to ENOBUFS. + * message loss due to ENOBUFS. */ unsigned int bufsiz = NFTABLES_NLEVENT_BUFSIZ; int fd = mnl_socket_get_fd(nf_sock); @@ -1800,3 +2166,504 @@ int mnl_nft_event_listener(struct mnl_socket *nf_sock, unsigned int debug_mask, } return ret; } + +static struct basehook *basehook_alloc(void) +{ + return xzalloc(sizeof(struct basehook)); +} + +static void basehook_free(struct basehook *b) +{ + list_del(&b->list); + free_const(b->module_name); + free_const(b->hookfn); + free_const(b->chain); + free_const(b->table); + free(b); +} + +static void basehook_list_add_tail(struct basehook *b, struct list_head *head) +{ + struct basehook *hook; + + list_for_each_entry(hook, head, list) { + if (hook->family != b->family) + continue; + if (hook->num != b->num) + continue; + if (hook->prio < b->prio) + continue; + + list_add(&b->list, &hook->list); + return; + } + + list_add_tail(&b->list, head); +} + +static int dump_nf_attr_cb(const struct nlattr *attr, void *data) +{ + int type = mnl_attr_get_type(attr); + const struct nlattr **tb = data; + + if (mnl_attr_type_valid(attr, NFNLA_HOOK_MAX) < 0) + return MNL_CB_OK; + + switch(type) { + case NFNLA_HOOK_HOOKNUM: + case NFNLA_HOOK_PRIORITY: + if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0) + return MNL_CB_ERROR; + break; + case NFNLA_HOOK_DEV: + if (mnl_attr_validate(attr, MNL_TYPE_STRING) < 0) + return MNL_CB_ERROR; + break; + case NFNLA_HOOK_MODULE_NAME: + case NFNLA_HOOK_FUNCTION_NAME: + if (mnl_attr_validate(attr, MNL_TYPE_NUL_STRING) < 0) + return MNL_CB_ERROR; + break; + case NFNLA_HOOK_CHAIN_INFO: + if (mnl_attr_validate(attr, MNL_TYPE_NESTED) < 0) + return MNL_CB_ERROR; + break; + default: + return MNL_CB_OK; + } + + tb[type] = attr; + return MNL_CB_OK; +} + +static int dump_nf_chain_info_cb(const struct nlattr *attr, void *data) +{ + int type = mnl_attr_get_type(attr); + const struct nlattr **tb = data; + + if (mnl_attr_type_valid(attr, NFNLA_HOOK_INFO_MAX) < 0) + return MNL_CB_OK; + + switch(type) { + case NFNLA_HOOK_INFO_DESC: + if (mnl_attr_validate(attr, MNL_TYPE_NESTED) < 0) + return MNL_CB_ERROR; + break; + case NFNLA_HOOK_INFO_TYPE: + if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0) + return MNL_CB_ERROR; + break; + default: + return MNL_CB_OK; + } + + tb[type] = attr; + return MNL_CB_OK; +} + +static int dump_nf_attr_chain_cb(const struct nlattr *attr, void *data) +{ + int type = mnl_attr_get_type(attr); + const struct nlattr **tb = data; + + if (mnl_attr_type_valid(attr, NFNLA_CHAIN_MAX) < 0) + return MNL_CB_OK; + + switch(type) { + case NFNLA_CHAIN_TABLE: + case NFNLA_CHAIN_NAME: + if (mnl_attr_validate(attr, MNL_TYPE_NUL_STRING) < 0) + return MNL_CB_ERROR; + break; + case NFNLA_CHAIN_FAMILY: + if (mnl_attr_validate(attr, MNL_TYPE_U8) < 0) + return MNL_CB_ERROR; + break; + default: + return MNL_CB_OK; + } + + tb[type] = attr; + return MNL_CB_OK; +} + +static int dump_nf_attr_bpf_cb(const struct nlattr *attr, void *data) +{ + int type = mnl_attr_get_type(attr); + const struct nlattr **tb = data; + + if (mnl_attr_type_valid(attr, NFNLA_HOOK_BPF_MAX) < 0) + return MNL_CB_OK; + + switch(type) { + case NFNLA_HOOK_BPF_ID: + if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0) + return MNL_CB_ERROR; + break; + default: + return MNL_CB_OK; + } + + tb[type] = attr; + return MNL_CB_OK; +} + +struct dump_nf_hook_data { + struct list_head *hook_list; + int family; +}; + +static int dump_nf_hooks(const struct nlmsghdr *nlh, void *_data) +{ + const struct nfgenmsg *nfg = mnl_nlmsg_get_payload(nlh); + struct nlattr *tb[NFNLA_HOOK_MAX + 1] = {}; + struct dump_nf_hook_data *data = _data; + struct basehook *hook; + + /* NB: Don't check the nft generation ID, this is not + * an nftables subsystem. + */ + if (mnl_attr_parse(nlh, sizeof(*nfg), dump_nf_attr_cb, tb) < 0) + return -1; + + if (!tb[NFNLA_HOOK_PRIORITY]) + netlink_abi_error(); + + hook = basehook_alloc(); + hook->prio = ntohl(mnl_attr_get_u32(tb[NFNLA_HOOK_PRIORITY])); + + if (tb[NFNLA_HOOK_FUNCTION_NAME]) + hook->hookfn = xstrdup(mnl_attr_get_str(tb[NFNLA_HOOK_FUNCTION_NAME])); + + if (tb[NFNLA_HOOK_MODULE_NAME]) + hook->module_name = xstrdup(mnl_attr_get_str(tb[NFNLA_HOOK_MODULE_NAME])); + + if (tb[NFNLA_HOOK_CHAIN_INFO]) { + struct nlattr *nested[NFNLA_HOOK_INFO_MAX + 1] = {}; + uint32_t type; + + if (mnl_attr_parse_nested(tb[NFNLA_HOOK_CHAIN_INFO], + dump_nf_chain_info_cb, nested) < 0) { + basehook_free(hook); + return -1; + } + + type = ntohl(mnl_attr_get_u32(nested[NFNLA_HOOK_INFO_TYPE])); + if (type == NFNL_HOOK_TYPE_NFTABLES) { + struct nlattr *info[NFNLA_CHAIN_MAX + 1] = {}; + const char *tablename, *chainname; + + if (mnl_attr_parse_nested(nested[NFNLA_HOOK_INFO_DESC], + dump_nf_attr_chain_cb, + info) < 0) { + basehook_free(hook); + return -1; + } + + tablename = mnl_attr_get_str(info[NFNLA_CHAIN_TABLE]); + chainname = mnl_attr_get_str(info[NFNLA_CHAIN_NAME]); + if (tablename && chainname) { + hook->table = xstrdup(tablename); + hook->chain = xstrdup(chainname); + } + hook->chain_family = mnl_attr_get_u8(info[NFNLA_CHAIN_FAMILY]); + } else if (type == NFNL_HOOK_TYPE_BPF) { + struct nlattr *info[NFNLA_HOOK_BPF_MAX + 1] = {}; + + if (mnl_attr_parse_nested(nested[NFNLA_HOOK_INFO_DESC], + dump_nf_attr_bpf_cb, info) < 0) { + basehook_free(hook); + return -1; + } + + if (info[NFNLA_HOOK_BPF_ID]) { + char tmpbuf[16]; + + snprintf(tmpbuf, sizeof(tmpbuf), "id %u", + ntohl(mnl_attr_get_u32(info[NFNLA_HOOK_BPF_ID]))); + + hook->chain = xstrdup(tmpbuf); + } + } + } + if (tb[NFNLA_HOOK_HOOKNUM]) + hook->num = ntohl(mnl_attr_get_u32(tb[NFNLA_HOOK_HOOKNUM])); + + hook->family = nfg->nfgen_family; + + /* Netdev hooks potentially interfer with this family datapath. */ + if (hook->family == NFPROTO_NETDEV) { + switch (data->family) { + case NFPROTO_IPV4: + case NFPROTO_IPV6: + case NFPROTO_INET: + case NFPROTO_BRIDGE: + hook->family = data->family; + hook->num = NF_INET_INGRESS; + break; + case NFPROTO_ARP: + if (hook->chain_family == NFPROTO_NETDEV) { + hook->family = data->family; + hook->num = __NF_ARP_INGRESS; + } + break; + } + } + + basehook_list_add_tail(hook, data->hook_list); + + return MNL_CB_OK; +} + +static struct nlmsghdr *nf_hook_dump_request(char *buf, uint8_t family, uint32_t seq) +{ + struct nlmsghdr *nlh = mnl_nlmsg_put_header(buf); + struct nfgenmsg *nfg; + + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + nlh->nlmsg_type = NFNL_SUBSYS_HOOK << 8; + nlh->nlmsg_seq = seq; + + nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg)); + nfg->nfgen_family = family; + nfg->version = NFNETLINK_V0; + + return nlh; +} + +static int __mnl_nft_dump_nf_hooks(struct netlink_ctx *ctx, uint8_t query_family, + uint8_t family, uint8_t hooknum, + const char *devname, + struct list_head *hook_list) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct dump_nf_hook_data data = { + .hook_list = hook_list, + .family = query_family, + }; + struct nlmsghdr *nlh; + + nlh = nf_hook_dump_request(buf, family, ctx->seqnum); + if (devname) + mnl_attr_put_strz(nlh, NFNLA_HOOK_DEV, devname); + + mnl_attr_put_u32(nlh, NFNLA_HOOK_HOOKNUM, htonl(hooknum)); + + return nft_mnl_talk(ctx, nlh, nlh->nlmsg_len, dump_nf_hooks, &data); +} + +static void print_hooks(struct netlink_ctx *ctx, int family, struct list_head *hook_list) +{ + struct basehook *hook, *tmp, *prev = NULL; + bool same, family_in_use = false; + int prio; + FILE *fp; + + fp = ctx->nft->output.output_fp; + + list_for_each_entry_safe(hook, tmp, hook_list, list) { + if (hook->family == family) { + family_in_use = true; + break; + } + } + + if (!family_in_use) + return; + + fprintf(fp, "family %s {\n", family2str(family)); + + list_for_each_entry_safe(hook, tmp, hook_list, list) { + if (hook->family != family) + continue; + + if (prev) { + if (prev->num == hook->num) { + fprintf(fp, "\n"); + same = true; + } else { + same = false; + fprintf(fp, "\n\t}\n"); + } + } else { + same = false; + } + prev = hook; + + if (!same) { + fprintf(fp, "\thook %s {\n", + hooknum2str(family, hook->num)); + } + + prio = hook->prio; + if (prio < 0) + fprintf(fp, "\t\t%011d", prio); /* outputs a '-' sign */ + else if (prio == 0) + fprintf(fp, "\t\t %010u", prio); + else + fprintf(fp, "\t\t+%010u", prio); + + if (hook->table && hook->chain) + fprintf(fp, " chain %s %s %s", family2str(hook->chain_family), hook->table, hook->chain); + else if (hook->hookfn && hook->chain) + fprintf(fp, " %s %s", hook->hookfn, hook->chain); + else if (hook->hookfn) { + fprintf(fp, " %s", hook->hookfn); + } + if (hook->module_name) + fprintf(fp, " [%s]", hook->module_name); + } + + fprintf(fp, "\n\t}\n"); + fprintf(fp, "}\n"); +} + +#define HOOK_FAMILY_MAX 5 + +static uint8_t hook_family[HOOK_FAMILY_MAX] = { + NFPROTO_IPV4, + NFPROTO_IPV6, + NFPROTO_BRIDGE, + NFPROTO_ARP, +}; + +static int mnl_nft_dump_nf(struct netlink_ctx *ctx, int family, int hook, + const char *devname, struct list_head *hook_list, + int *ret) +{ + int i, err; + + /* show ingress in first place in hook listing. */ + err = __mnl_nft_dump_nf_hooks(ctx, family, NFPROTO_NETDEV, NF_NETDEV_INGRESS, devname, hook_list); + if (err < 0) + *ret = err; + + for (i = 0; i <= NF_INET_POST_ROUTING; i++) { + err = __mnl_nft_dump_nf_hooks(ctx, family, family, i, devname, hook_list); + if (err < 0) + *ret = err; + } + + return err; +} + +static int mnl_nft_dump_nf_arp(struct netlink_ctx *ctx, int family, int hook, + const char *devname, struct list_head *hook_list, + int *ret) +{ + int err; + + /* show ingress in first place in hook listing. */ + err = __mnl_nft_dump_nf_hooks(ctx, family, NFPROTO_NETDEV, NF_NETDEV_INGRESS, devname, hook_list); + if (err < 0) + *ret = err; + + err = __mnl_nft_dump_nf_hooks(ctx, family, family, NF_ARP_IN, devname, hook_list); + if (err < 0) + *ret = err; + err = __mnl_nft_dump_nf_hooks(ctx, family, family, NF_ARP_OUT, devname, hook_list); + if (err < 0) + *ret = err; + + return err; +} + +static int mnl_nft_dump_nf_netdev(struct netlink_ctx *ctx, int family, int hook, + const char *devname, struct list_head *hook_list, + int *ret) +{ + int err; + + err = __mnl_nft_dump_nf_hooks(ctx, family, NFPROTO_NETDEV, NF_NETDEV_INGRESS, devname, hook_list); + if (err < 0) + *ret = err; + + return err; +} + +static int mnl_nft_dump_nf_decnet(struct netlink_ctx *ctx, int family, int hook, + const char *devname, struct list_head *hook_list, + int *ret) +{ + int i, err; + + /* show ingress in first place in hook listing. */ + err = __mnl_nft_dump_nf_hooks(ctx, family, NFPROTO_NETDEV, NF_NETDEV_INGRESS, devname, hook_list); + if (err < 0) + *ret = err; + +#define NF_DN_NUMHOOKS 7 + for (i = 0; i < NF_DN_NUMHOOKS; i++) { + err = __mnl_nft_dump_nf_hooks(ctx, family, family, i, devname, hook_list); + if (err < 0) { + *ret = err; + return err; + } + } + + return err; +} + +static void release_hook_list(struct list_head *hook_list) +{ + struct basehook *hook, *next; + + list_for_each_entry_safe(hook, next, hook_list, list) + basehook_free(hook); +} + +int mnl_nft_dump_nf_hooks(struct netlink_ctx *ctx, int family, int hook, const char *devname) +{ + LIST_HEAD(hook_list); + unsigned int i; + int ret; + + errno = 0; + ret = 0; + + switch (family) { + case NFPROTO_UNSPEC: + mnl_nft_dump_nf(ctx, NFPROTO_IPV4, hook, devname, &hook_list, &ret); + mnl_nft_dump_nf(ctx, NFPROTO_IPV6, hook, devname, &hook_list, &ret); + mnl_nft_dump_nf(ctx, NFPROTO_BRIDGE, hook, devname, &hook_list, &ret); + mnl_nft_dump_nf_decnet(ctx, NFPROTO_DECNET, hook, devname, &hook_list, &ret); + break; + case NFPROTO_INET: + mnl_nft_dump_nf(ctx, NFPROTO_IPV4, hook, devname, &hook_list, &ret); + mnl_nft_dump_nf(ctx, NFPROTO_IPV6, hook, devname, &hook_list, &ret); + break; + case NFPROTO_IPV4: + case NFPROTO_IPV6: + case NFPROTO_BRIDGE: + mnl_nft_dump_nf(ctx, family, hook, devname, &hook_list, &ret); + break; + case NFPROTO_ARP: + mnl_nft_dump_nf_arp(ctx, family, hook, devname, &hook_list, &ret); + break; + case NFPROTO_NETDEV: + mnl_nft_dump_nf_netdev(ctx, family, hook, devname, &hook_list, &ret); + break; + case NFPROTO_DECNET: + mnl_nft_dump_nf_decnet(ctx, family, hook, devname, &hook_list, &ret); + break; + } + + switch (family) { + case NFPROTO_UNSPEC: + for (i = 0; i < HOOK_FAMILY_MAX; i++) + print_hooks(ctx, hook_family[i], &hook_list); + break; + case NFPROTO_INET: + print_hooks(ctx, NFPROTO_IPV4, &hook_list); + print_hooks(ctx, NFPROTO_IPV6, &hook_list); + break; + default: + print_hooks(ctx, family, &hook_list); + break; + } + + release_hook_list(&hook_list); + ret = 0; + + return ret; +} |