diff options
Diffstat (limited to 'src/mnl.c')
-rw-r--r-- | src/mnl.c | 1308 |
1 files changed, 1068 insertions, 240 deletions
@@ -2,12 +2,15 @@ * Copyright (c) 2013-2017 Pablo Neira Ayuso <pablo@netfilter.org> * * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. + * it under the terms of the GNU General Public License version 2 (or any + * later) as published by the Free Software Foundation. * * Development of this code funded by Astaro AG (http://www.astaro.com/) */ +#include <nft.h> +#include <iface.h> + #include <libmnl/libmnl.h> #include <libnftnl/common.h> #include <libnftnl/ruleset.h> @@ -22,18 +25,34 @@ #include <libnftnl/udata.h> #include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_hook.h> #include <linux/netfilter/nf_tables.h> #include <mnl.h> -#include <string.h> +#include <cmd.h> +#include <intervals.h> #include <net/if.h> #include <sys/socket.h> #include <arpa/inet.h> #include <fcntl.h> #include <errno.h> -#include <stdlib.h> #include <utils.h> #include <nftables.h> +#include <linux/netfilter.h> +#include <linux/netfilter_arp.h> + +struct basehook { + struct list_head list; + const char *module_name; + const char *hookfn; + const char *table; + const char *chain; + const char *devname; + int family; + int chain_family; + uint32_t num; + int prio; +}; struct mnl_socket *nft_mnl_socket_open(void) { @@ -52,14 +71,7 @@ struct mnl_socket *nft_mnl_socket_open(void) return nf_sock; } -struct mnl_socket *nft_mnl_socket_reopen(struct mnl_socket *nf_sock) -{ - mnl_socket_close(nf_sock); - - return nft_mnl_socket_open(); -} - -uint32_t mnl_seqnum_alloc(unsigned int *seqnum) +uint32_t mnl_seqnum_inc(unsigned int *seqnum) { return (*seqnum)++; } @@ -77,20 +89,31 @@ nft_mnl_recv(struct netlink_ctx *ctx, uint32_t portid, int (*cb)(const struct nlmsghdr *nlh, void *data), void *cb_data) { char buf[NFT_NLMSG_MAXSIZE]; + bool eintr = false; int ret; ret = mnl_socket_recvfrom(ctx->nft->nf_sock, buf, sizeof(buf)); while (ret > 0) { ret = mnl_cb_run(buf, ret, ctx->seqnum, portid, cb, cb_data); - if (ret <= 0) - goto out; + if (ret == 0) + break; + if (ret < 0) { + if (errno == EAGAIN) { + ret = 0; + break; + } + if (errno != EINTR) + break; + /* process all pending messages before reporting EINTR */ + eintr = true; + } ret = mnl_socket_recvfrom(ctx->nft->nf_sock, buf, sizeof(buf)); } -out: - if (ret < 0 && errno == EAGAIN) - return 0; - + if (eintr) { + ret = -1; + errno = EINTR; + } return ret; } @@ -156,11 +179,11 @@ static int check_genid(const struct nlmsghdr *nlh) * Batching */ -/* selected batch page is 256 Kbytes long to load ruleset of - * half a million rules without hitting -EMSGSIZE due to large - * iovec. +/* Selected batch page is 2 Mbytes long to support loading a ruleset of 3.5M + * rules matching on source and destination address as well as input and output + * interfaces. This is what legacy iptables supports. */ -#define BATCH_PAGE_SIZE getpagesize() * 32 +#define BATCH_PAGE_SIZE 2 * 1024 * 1024 struct nftnl_batch *mnl_batch_init(void) { @@ -222,12 +245,13 @@ static void mnl_err_list_node_add(struct list_head *err_list, int error, void mnl_err_list_free(struct mnl_err *err) { list_del(&err->head); - xfree(err); + free(err); } -static void mnl_set_sndbuffer(const struct mnl_socket *nl, - struct nftnl_batch *batch) +static void mnl_set_sndbuffer(struct netlink_ctx *ctx) { + struct mnl_socket *nl = ctx->nft->nf_sock; + struct nftnl_batch *batch = ctx->batch; socklen_t len = sizeof(int); int sndnlbuffsiz = 0; int newbuffsiz; @@ -240,9 +264,15 @@ static void mnl_set_sndbuffer(const struct mnl_socket *nl, return; /* Rise sender buffer length to avoid hitting -EMSGSIZE */ + setsockopt(mnl_socket_get_fd(nl), SOL_SOCKET, SO_SNDBUF, + &newbuffsiz, sizeof(socklen_t)); + + /* unpriviledged containers check for CAP_NET_ADMIN on the init_user_ns. */ if (setsockopt(mnl_socket_get_fd(nl), SOL_SOCKET, SO_SNDBUFFORCE, - &newbuffsiz, sizeof(socklen_t)) < 0) - return; + &newbuffsiz, sizeof(socklen_t)) < 0) { + if (errno == EPERM) + ctx->maybe_emsgsize = newbuffsiz; + } } static unsigned int nlsndbufsiz; @@ -358,7 +388,7 @@ static int mnl_batch_extack_cb(const struct nlmsghdr *nlh, void *data) return MNL_CB_ERROR; } -#define NFT_MNL_ECHO_RCVBUFF_DEFAULT (MNL_SOCKET_BUFFER_SIZE * 1024) +#define NFT_MNL_ECHO_RCVBUFF_DEFAULT (MNL_SOCKET_BUFFER_SIZE * 1024U) #define NFT_MNL_ACK_MAXSIZE ((sizeof(struct nlmsghdr) + \ sizeof(struct nfgenmsg) + (1 << 16)) + \ MNL_SOCKET_BUFFER_SIZE) @@ -389,7 +419,7 @@ int mnl_batch_talk(struct netlink_ctx *ctx, struct list_head *err_list, .nl_ctx = ctx, }; - mnl_set_sndbuffer(ctx->nft->nf_sock, ctx->batch); + mnl_set_sndbuffer(ctx); mnl_nft_batch_to_msg(ctx, &msg, &snl, iov, iov_len); @@ -445,7 +475,7 @@ static int mnl_nft_expr_build_cb(struct nftnl_expr *nle, void *data) eloc = nft_expr_loc_find(nle, ctx->lctx); if (eloc) - cmd_add_loc(cmd, nlh->nlmsg_len, eloc->loc); + cmd_add_loc(cmd, nlh, eloc->loc); nest = mnl_attr_nest_start(nlh, NFTA_LIST_ELEM); nftnl_expr_build_payload(nlh, nle); @@ -498,9 +528,9 @@ int mnl_nft_rule_add(struct netlink_ctx *ctx, struct cmd *cmd, cmd->handle.family, NLM_F_CREATE | flags, ctx->seqnum); - cmd_add_loc(cmd, nlh->nlmsg_len, &h->table.location); + cmd_add_loc(cmd, nlh, &h->table.location); mnl_attr_put_strz(nlh, NFTA_RULE_TABLE, h->table.name); - cmd_add_loc(cmd, nlh->nlmsg_len, &h->chain.location); + cmd_add_loc(cmd, nlh, &h->chain.location); if (h->chain_id) mnl_attr_put_u32(nlh, NFTA_RULE_CHAIN_ID, htonl(h->chain_id)); @@ -549,11 +579,11 @@ int mnl_nft_rule_replace(struct netlink_ctx *ctx, struct cmd *cmd) cmd->handle.family, NLM_F_REPLACE | flags, ctx->seqnum); - cmd_add_loc(cmd, nlh->nlmsg_len, &h->table.location); + cmd_add_loc(cmd, nlh, &h->table.location); mnl_attr_put_strz(nlh, NFTA_RULE_TABLE, h->table.name); - cmd_add_loc(cmd, nlh->nlmsg_len, &h->chain.location); + cmd_add_loc(cmd, nlh, &h->chain.location); mnl_attr_put_strz(nlh, NFTA_RULE_CHAIN, h->chain.name); - cmd_add_loc(cmd, nlh->nlmsg_len, &h->handle.location); + cmd_add_loc(cmd, nlh, &h->handle.location); mnl_attr_put_u64(nlh, NFTA_RULE_HANDLE, htobe64(h->handle.id)); mnl_nft_rule_build_ctx_init(&rule_ctx, nlh, cmd, &lctx); @@ -573,6 +603,7 @@ int mnl_nft_rule_replace(struct netlink_ctx *ctx, struct cmd *cmd) int mnl_nft_rule_del(struct netlink_ctx *ctx, struct cmd *cmd) { + enum nf_tables_msg_types msg_type = NFT_MSG_DELRULE; struct handle *h = &cmd->handle; struct nftnl_rule *nlr; struct nlmsghdr *nlh; @@ -583,19 +614,22 @@ int mnl_nft_rule_del(struct netlink_ctx *ctx, struct cmd *cmd) nftnl_rule_set_u32(nlr, NFTNL_RULE_FAMILY, h->family); + if (cmd->op == CMD_DESTROY) + msg_type = NFT_MSG_DESTROYRULE; + nlh = nftnl_nlmsg_build_hdr(nftnl_batch_buffer(ctx->batch), - NFT_MSG_DELRULE, + msg_type, nftnl_rule_get_u32(nlr, NFTNL_RULE_FAMILY), 0, ctx->seqnum); - cmd_add_loc(cmd, nlh->nlmsg_len, &h->table.location); + cmd_add_loc(cmd, nlh, &h->table.location); mnl_attr_put_strz(nlh, NFTA_RULE_TABLE, h->table.name); if (h->chain.name) { - cmd_add_loc(cmd, nlh->nlmsg_len, &h->chain.location); + cmd_add_loc(cmd, nlh, &h->chain.location); mnl_attr_put_strz(nlh, NFTA_RULE_CHAIN, h->chain.name); } if (h->handle.id) { - cmd_add_loc(cmd, nlh->nlmsg_len, &h->handle.location); + cmd_add_loc(cmd, nlh, &h->handle.location); mnl_attr_put_u64(nlh, NFTA_RULE_HANDLE, htobe64(h->handle.id)); } @@ -634,20 +668,45 @@ err_free: return MNL_CB_OK; } -struct nftnl_rule_list *mnl_nft_rule_dump(struct netlink_ctx *ctx, - int family) +struct nftnl_rule_list *mnl_nft_rule_dump(struct netlink_ctx *ctx, int family, + const char *table, const char *chain, + uint64_t rule_handle, + bool dump, bool reset) { + uint16_t nl_flags = dump ? NLM_F_DUMP : NLM_F_ACK; char buf[MNL_SOCKET_BUFFER_SIZE]; struct nftnl_rule_list *nlr_list; + struct nftnl_rule *nlr = NULL; struct nlmsghdr *nlh; - int ret; + int msg_type, ret; + + if (reset) + msg_type = NFT_MSG_GETRULE_RESET; + else + msg_type = NFT_MSG_GETRULE; + + if (table) { + nlr = nftnl_rule_alloc(); + if (!nlr) + memory_allocation_error(); + + nftnl_rule_set_str(nlr, NFTNL_RULE_TABLE, table); + if (chain) + nftnl_rule_set_str(nlr, NFTNL_RULE_CHAIN, chain); + if (rule_handle) + nftnl_rule_set_u64(nlr, NFTNL_RULE_HANDLE, rule_handle); + } nlr_list = nftnl_rule_list_alloc(); if (nlr_list == NULL) memory_allocation_error(); - nlh = nftnl_nlmsg_build_hdr(buf, NFT_MSG_GETRULE, family, - NLM_F_DUMP, ctx->seqnum); + nlh = nftnl_nlmsg_build_hdr(buf, msg_type, family, + nl_flags, ctx->seqnum); + if (nlr) { + nftnl_rule_nlmsg_build_payload(nlh, nlr); + nftnl_rule_free(nlr); + } ret = nft_mnl_talk(ctx, nlh, nlh->nlmsg_len, rule_cb, nlr_list); if (ret < 0) @@ -662,18 +721,109 @@ err: /* * Chain */ + +struct nft_dev { + const char *ifname; + const struct location *location; +}; + +static void nft_dev_add(struct nft_dev *dev_array, const struct expr *expr, int i) +{ + unsigned int ifname_len; + char ifname[IFNAMSIZ]; + + if (expr->etype != EXPR_VALUE) + BUG("Must be a value, not %s\n", expr_name(expr)); + + ifname_len = div_round_up(expr->len, BITS_PER_BYTE); + memset(ifname, 0, sizeof(ifname)); + + if (ifname_len > sizeof(ifname)) + BUG("Interface length %u exceeds limit\n", ifname_len); + + mpz_export_data(ifname, expr->value, BYTEORDER_HOST_ENDIAN, ifname_len); + + if (strnlen(ifname, IFNAMSIZ) >= IFNAMSIZ) + BUG("Interface length %zu exceeds limit, no NUL byte\n", strnlen(ifname, IFNAMSIZ)); + + dev_array[i].ifname = xstrdup(ifname); + dev_array[i].location = &expr->location; +} + +static struct nft_dev *nft_dev_array(const struct expr *dev_expr, int *num_devs) +{ + struct nft_dev *dev_array; + int i = 0, len = 1; + struct expr *expr; + + switch (dev_expr->etype) { + case EXPR_SET: + case EXPR_LIST: + list_for_each_entry(expr, &dev_expr->expressions, list) + len++; + + dev_array = xmalloc(sizeof(struct nft_dev) * len); + + list_for_each_entry(expr, &dev_expr->expressions, list) { + nft_dev_add(dev_array, expr, i); + i++; + } + break; + case EXPR_VALUE: + len++; + dev_array = xmalloc(sizeof(struct nft_dev) * len); + nft_dev_add(dev_array, dev_expr, i); + i++; + break; + default: + assert(0); + } + + dev_array[i].ifname = NULL; + *num_devs = i; + + return dev_array; +} + +static void nft_dev_array_free(const struct nft_dev *dev_array) +{ + int i = 0; + + while (dev_array[i].ifname != NULL) + free_const(dev_array[i++].ifname); + + free_const(dev_array); +} + +static void mnl_nft_chain_devs_build(struct nlmsghdr *nlh, struct cmd *cmd) +{ + const struct expr *dev_expr = cmd->chain->dev_expr; + const struct nft_dev *dev_array; + struct nlattr *nest_dev; + int i, num_devs = 0; + + dev_array = nft_dev_array(dev_expr, &num_devs); + if (num_devs == 1) { + cmd_add_loc(cmd, nlh, dev_array[0].location); + mnl_attr_put_strz(nlh, NFTA_HOOK_DEV, dev_array[0].ifname); + } else { + nest_dev = mnl_attr_nest_start(nlh, NFTA_HOOK_DEVS); + for (i = 0; i < num_devs; i++) { + cmd_add_loc(cmd, nlh, dev_array[i].location); + mnl_attr_put_strz(nlh, NFTA_DEVICE_NAME, dev_array[i].ifname); + mnl_attr_nest_end(nlh, nest_dev); + } + } + nft_dev_array_free(dev_array); +} + int mnl_nft_chain_add(struct netlink_ctx *ctx, struct cmd *cmd, unsigned int flags) { struct nftnl_udata_buf *udbuf; - int priority, policy, i = 0; struct nftnl_chain *nlc; - unsigned int ifname_len; - const char **dev_array; - char ifname[IFNAMSIZ]; struct nlmsghdr *nlh; - struct expr *expr; - int dev_array_len; + int priority, policy; nlc = nftnl_chain_alloc(); if (nlc == NULL) @@ -686,46 +836,6 @@ int mnl_nft_chain_add(struct netlink_ctx *ctx, struct cmd *cmd, nftnl_chain_set_u32(nlc, NFTNL_CHAIN_FLAGS, CHAIN_F_HW_OFFLOAD); } - if (cmd->chain->flags & CHAIN_F_BASECHAIN) { - nftnl_chain_set_u32(nlc, NFTNL_CHAIN_HOOKNUM, - cmd->chain->hook.num); - mpz_export_data(&priority, - cmd->chain->priority.expr->value, - BYTEORDER_HOST_ENDIAN, sizeof(int)); - nftnl_chain_set_s32(nlc, NFTNL_CHAIN_PRIO, priority); - nftnl_chain_set_str(nlc, NFTNL_CHAIN_TYPE, - cmd->chain->type); - } - if (cmd->chain->dev_expr) { - dev_array = xmalloc(sizeof(char *) * 8); - dev_array_len = 8; - list_for_each_entry(expr, &cmd->chain->dev_expr->expressions, list) { - ifname_len = div_round_up(expr->len, BITS_PER_BYTE); - memset(ifname, 0, sizeof(ifname)); - mpz_export_data(ifname, expr->value, - BYTEORDER_HOST_ENDIAN, - ifname_len); - dev_array[i++] = xstrdup(ifname); - if (i == dev_array_len) { - dev_array_len *= 2; - dev_array = xrealloc(dev_array, - dev_array_len * sizeof(char *)); - } - } - - dev_array[i] = NULL; - if (i == 1) - nftnl_chain_set_str(nlc, NFTNL_CHAIN_DEV, dev_array[0]); - else if (i > 1) - nftnl_chain_set_data(nlc, NFTNL_CHAIN_DEVICES, dev_array, - sizeof(char *) * dev_array_len); - - i = 0; - while (dev_array[i] != NULL) - xfree(dev_array[i++]); - - xfree(dev_array); - } if (cmd->chain->comment) { udbuf = nftnl_udata_buf_alloc(NFT_USERDATA_MAXLEN); if (!udbuf) @@ -737,16 +847,24 @@ int mnl_nft_chain_add(struct netlink_ctx *ctx, struct cmd *cmd, nftnl_udata_buf_free(udbuf); } } + + nftnl_chain_set_str(nlc, NFTNL_CHAIN_TABLE, cmd->handle.table.name); + if (cmd->handle.chain.name) + nftnl_chain_set_str(nlc, NFTNL_CHAIN_NAME, cmd->handle.chain.name); + netlink_dump_chain(nlc, ctx); + nftnl_chain_unset(nlc, NFTNL_CHAIN_TABLE); + nftnl_chain_unset(nlc, NFTNL_CHAIN_NAME); + nlh = nftnl_nlmsg_build_hdr(nftnl_batch_buffer(ctx->batch), NFT_MSG_NEWCHAIN, cmd->handle.family, NLM_F_CREATE | flags, ctx->seqnum); - cmd_add_loc(cmd, nlh->nlmsg_len, &cmd->handle.table.location); + cmd_add_loc(cmd, nlh, &cmd->handle.table.location); mnl_attr_put_strz(nlh, NFTA_CHAIN_TABLE, cmd->handle.table.name); - cmd_add_loc(cmd, nlh->nlmsg_len, &cmd->handle.chain.location); + cmd_add_loc(cmd, nlh, &cmd->handle.chain.location); if (!cmd->chain || !(cmd->chain->flags & CHAIN_F_BINDING)) { mnl_attr_put_strz(nlh, NFTA_CHAIN_NAME, cmd->handle.chain.name); @@ -763,11 +881,37 @@ int mnl_nft_chain_add(struct netlink_ctx *ctx, struct cmd *cmd, if (cmd->chain && cmd->chain->policy) { mpz_export_data(&policy, cmd->chain->policy->value, BYTEORDER_HOST_ENDIAN, sizeof(int)); - cmd_add_loc(cmd, nlh->nlmsg_len, &cmd->chain->policy->location); + cmd_add_loc(cmd, nlh, &cmd->chain->policy->location); mnl_attr_put_u32(nlh, NFTA_CHAIN_POLICY, htonl(policy)); } + nftnl_chain_unset(nlc, NFTNL_CHAIN_TYPE); + nftnl_chain_nlmsg_build_payload(nlh, nlc); + + if (cmd->chain && cmd->chain->flags & CHAIN_F_BASECHAIN) { + struct nlattr *nest; + + if (cmd->chain->type.str) { + cmd_add_loc(cmd, nlh, &cmd->chain->type.loc); + mnl_attr_put_strz(nlh, NFTA_CHAIN_TYPE, cmd->chain->type.str); + } + + nest = mnl_attr_nest_start(nlh, NFTA_CHAIN_HOOK); + + if (cmd->chain->type.str) { + mnl_attr_put_u32(nlh, NFTA_HOOK_HOOKNUM, htonl(cmd->chain->hook.num)); + mpz_export_data(&priority, cmd->chain->priority.expr->value, + BYTEORDER_HOST_ENDIAN, sizeof(int)); + mnl_attr_put_u32(nlh, NFTA_HOOK_PRIORITY, htonl(priority)); + } + + if (cmd->chain && cmd->chain->dev_expr) + mnl_nft_chain_devs_build(nlh, cmd); + + mnl_attr_nest_end(nlh, nest); + } + nftnl_chain_free(nlc); mnl_nft_batch_continue(ctx->batch); @@ -807,6 +951,7 @@ int mnl_nft_chain_rename(struct netlink_ctx *ctx, const struct cmd *cmd, int mnl_nft_chain_del(struct netlink_ctx *ctx, struct cmd *cmd) { + enum nf_tables_msg_types msg_type = NFT_MSG_DELCHAIN; struct nftnl_chain *nlc; struct nlmsghdr *nlh; @@ -816,22 +961,34 @@ int mnl_nft_chain_del(struct netlink_ctx *ctx, struct cmd *cmd) nftnl_chain_set_u32(nlc, NFTNL_CHAIN_FAMILY, cmd->handle.family); + if (cmd->op == CMD_DESTROY) + msg_type = NFT_MSG_DESTROYCHAIN; + nlh = nftnl_nlmsg_build_hdr(nftnl_batch_buffer(ctx->batch), - NFT_MSG_DELCHAIN, + msg_type, cmd->handle.family, 0, ctx->seqnum); - cmd_add_loc(cmd, nlh->nlmsg_len, &cmd->handle.table.location); + cmd_add_loc(cmd, nlh, &cmd->handle.table.location); mnl_attr_put_strz(nlh, NFTA_CHAIN_TABLE, cmd->handle.table.name); if (cmd->handle.chain.name) { - cmd_add_loc(cmd, nlh->nlmsg_len, &cmd->handle.chain.location); + cmd_add_loc(cmd, nlh, &cmd->handle.chain.location); mnl_attr_put_strz(nlh, NFTA_CHAIN_NAME, cmd->handle.chain.name); } else if (cmd->handle.handle.id) { - cmd_add_loc(cmd, nlh->nlmsg_len, &cmd->handle.handle.location); + cmd_add_loc(cmd, nlh, &cmd->handle.handle.location); mnl_attr_put_u64(nlh, NFTA_CHAIN_HANDLE, htobe64(cmd->handle.handle.id)); } + if (cmd->op == CMD_DELETE && + cmd->chain && cmd->chain->dev_expr) { + struct nlattr *nest; + + nest = mnl_attr_nest_start(nlh, NFTA_CHAIN_HOOK); + mnl_nft_chain_devs_build(nlh, cmd); + mnl_attr_nest_end(nlh, nest); + } + nftnl_chain_nlmsg_build_payload(nlh, nlc); nftnl_chain_free(nlc); @@ -864,10 +1021,12 @@ err_free: } struct nftnl_chain_list *mnl_nft_chain_dump(struct netlink_ctx *ctx, - int family) + int family, const char *table, + const char *chain) { char buf[MNL_SOCKET_BUFFER_SIZE]; struct nftnl_chain_list *nlc_list; + struct nftnl_chain *nlc = NULL; struct nlmsghdr *nlh; int ret; @@ -875,11 +1034,24 @@ struct nftnl_chain_list *mnl_nft_chain_dump(struct netlink_ctx *ctx, if (nlc_list == NULL) memory_allocation_error(); + if (table && chain) { + nlc = nftnl_chain_alloc(); + if (!nlc) + memory_allocation_error(); + + nftnl_chain_set_str(nlc, NFTNL_CHAIN_TABLE, table); + nftnl_chain_set_str(nlc, NFTNL_CHAIN_NAME, chain); + } + nlh = nftnl_nlmsg_build_hdr(buf, NFT_MSG_GETCHAIN, family, - NLM_F_DUMP, ctx->seqnum); + nlc ? NLM_F_ACK : NLM_F_DUMP, ctx->seqnum); + if (nlc) { + nftnl_chain_nlmsg_build_payload(nlh, nlc); + nftnl_chain_free(nlc); + } ret = nft_mnl_talk(ctx, nlh, nlh->nlmsg_len, chain_cb, nlc_list); - if (ret < 0) + if (ret < 0 && errno != ENOENT) goto err; return nlc_list; @@ -925,7 +1097,7 @@ int mnl_nft_table_add(struct netlink_ctx *ctx, struct cmd *cmd, cmd->handle.family, flags, ctx->seqnum); - cmd_add_loc(cmd, nlh->nlmsg_len, &cmd->handle.table.location); + cmd_add_loc(cmd, nlh, &cmd->handle.table.location); mnl_attr_put_strz(nlh, NFTA_TABLE_NAME, cmd->handle.table.name); nftnl_table_nlmsg_build_payload(nlh, nlt); nftnl_table_free(nlt); @@ -937,6 +1109,7 @@ int mnl_nft_table_add(struct netlink_ctx *ctx, struct cmd *cmd, int mnl_nft_table_del(struct netlink_ctx *ctx, struct cmd *cmd) { + enum nf_tables_msg_types msg_type = NFT_MSG_DELTABLE; struct nftnl_table *nlt; struct nlmsghdr *nlh; @@ -946,17 +1119,18 @@ int mnl_nft_table_del(struct netlink_ctx *ctx, struct cmd *cmd) nftnl_table_set_u32(nlt, NFTNL_TABLE_FAMILY, cmd->handle.family); - nlh = nftnl_nlmsg_build_hdr(nftnl_batch_buffer(ctx->batch), - NFT_MSG_DELTABLE, - cmd->handle.family, - 0, ctx->seqnum); + if (cmd->op == CMD_DESTROY) + msg_type = NFT_MSG_DESTROYTABLE; + + nlh = nftnl_nlmsg_build_hdr(nftnl_batch_buffer(ctx->batch), msg_type, + cmd->handle.family, 0, ctx->seqnum); if (cmd->handle.table.name) { - cmd_add_loc(cmd, nlh->nlmsg_len, &cmd->handle.table.location); + cmd_add_loc(cmd, nlh, &cmd->handle.table.location); mnl_attr_put_strz(nlh, NFTA_TABLE_NAME, cmd->handle.table.name); } else if (cmd->handle.handle.id) { - cmd_add_loc(cmd, nlh->nlmsg_len, &cmd->handle.handle.location); - mnl_attr_put_u64(nlh, NFTA_TABLE_NAME, + cmd_add_loc(cmd, nlh, &cmd->handle.handle.location); + mnl_attr_put_u64(nlh, NFTA_TABLE_HANDLE, htobe64(cmd->handle.handle.id)); } nftnl_table_nlmsg_build_payload(nlh, nlt); @@ -991,10 +1165,12 @@ err_free: } struct nftnl_table_list *mnl_nft_table_dump(struct netlink_ctx *ctx, - int family) + int family, const char *table) { char buf[MNL_SOCKET_BUFFER_SIZE]; struct nftnl_table_list *nlt_list; + struct nftnl_table *nlt = NULL; + int flags = NLM_F_DUMP; struct nlmsghdr *nlh; int ret; @@ -1002,11 +1178,28 @@ struct nftnl_table_list *mnl_nft_table_dump(struct netlink_ctx *ctx, if (nlt_list == NULL) return NULL; + if (table) { + nlt = nftnl_table_alloc(); + if (!nlt) + memory_allocation_error(); + + if (family != NFPROTO_UNSPEC) + nftnl_table_set_u32(nlt, NFTNL_TABLE_FAMILY, family); + if (table) + nftnl_table_set_str(nlt, NFTNL_TABLE_NAME, table); + + flags = NLM_F_ACK; + } + nlh = nftnl_nlmsg_build_hdr(buf, NFT_MSG_GETTABLE, family, - NLM_F_DUMP, ctx->seqnum); + flags, ctx->seqnum); + if (nlt) { + nftnl_table_nlmsg_build_payload(nlh, nlt); + nftnl_table_free(nlt); + } ret = nft_mnl_talk(ctx, nlh, nlh->nlmsg_len, table_cb, nlt_list); - if (ret < 0) + if (ret < 0 && errno != ENOENT) goto err; return nlt_list; @@ -1022,9 +1215,7 @@ static void set_key_expression(struct netlink_ctx *ctx, { struct nftnl_udata *nest1, *nest2; - if (expr->flags & EXPR_F_CONSTANT || - set_flags & NFT_SET_ANONYMOUS || - !expr_ops(expr)->build_udata) + if (!expr_ops(expr)->build_udata) return; nest1 = nftnl_udata_nest_start(udbuf, type); @@ -1086,8 +1277,6 @@ int mnl_nft_set_add(struct netlink_ctx *ctx, struct cmd *cmd, if (set->desc.size != 0) nftnl_set_set_u32(nls, NFTNL_SET_DESC_SIZE, set->desc.size); - } else if (set->init) { - nftnl_set_set_u32(nls, NFTNL_SET_DESC_SIZE, set->init->size); } udbuf = nftnl_udata_buf_alloc(NFT_USERDATA_MAXLEN); @@ -1154,9 +1343,9 @@ int mnl_nft_set_add(struct netlink_ctx *ctx, struct cmd *cmd, h->family, NLM_F_CREATE | flags, ctx->seqnum); - cmd_add_loc(cmd, nlh->nlmsg_len, &h->table.location); + cmd_add_loc(cmd, nlh, &h->table.location); mnl_attr_put_strz(nlh, NFTA_SET_TABLE, h->table.name); - cmd_add_loc(cmd, nlh->nlmsg_len, &h->set.location); + cmd_add_loc(cmd, nlh, &h->set.location); mnl_attr_put_strz(nlh, NFTA_SET_NAME, h->set.name); nftnl_set_nlmsg_build_payload(nlh, nls); @@ -1169,6 +1358,7 @@ int mnl_nft_set_add(struct netlink_ctx *ctx, struct cmd *cmd, int mnl_nft_set_del(struct netlink_ctx *ctx, struct cmd *cmd) { + enum nf_tables_msg_types msg_type = NFT_MSG_DELSET; const struct handle *h = &cmd->handle; struct nftnl_set *nls; struct nlmsghdr *nlh; @@ -1179,18 +1369,21 @@ int mnl_nft_set_del(struct netlink_ctx *ctx, struct cmd *cmd) nftnl_set_set_u32(nls, NFTNL_SET_FAMILY, h->family); + if (cmd->op == CMD_DESTROY) + msg_type = NFT_MSG_DESTROYSET; + nlh = nftnl_nlmsg_build_hdr(nftnl_batch_buffer(ctx->batch), - NFT_MSG_DELSET, + msg_type, h->family, 0, ctx->seqnum); - cmd_add_loc(cmd, nlh->nlmsg_len, &cmd->handle.table.location); + cmd_add_loc(cmd, nlh, &cmd->handle.table.location); mnl_attr_put_strz(nlh, NFTA_SET_TABLE, cmd->handle.table.name); if (h->set.name) { - cmd_add_loc(cmd, nlh->nlmsg_len, &cmd->handle.set.location); + cmd_add_loc(cmd, nlh, &cmd->handle.set.location); mnl_attr_put_strz(nlh, NFTA_SET_NAME, cmd->handle.set.name); } else if (h->handle.id) { - cmd_add_loc(cmd, nlh->nlmsg_len, &cmd->handle.handle.location); + cmd_add_loc(cmd, nlh, &cmd->handle.handle.location); mnl_attr_put_u64(nlh, NFTA_SET_HANDLE, htobe64(cmd->handle.handle.id)); } @@ -1203,9 +1396,15 @@ int mnl_nft_set_del(struct netlink_ctx *ctx, struct cmd *cmd) return 0; } +struct set_cb_args { + struct netlink_ctx *ctx; + struct nftnl_set_list *list; +}; + static int set_cb(const struct nlmsghdr *nlh, void *data) { - struct nftnl_set_list *nls_list = data; + struct set_cb_args *args = data; + struct nftnl_set_list *nls_list = args->list; struct nftnl_set *s; if (check_genid(nlh) < 0) @@ -1218,6 +1417,8 @@ static int set_cb(const struct nlmsghdr *nlh, void *data) if (nftnl_set_nlmsg_parse(nlh, s) < 0) goto err_free; + netlink_dump_set(s, args->ctx); + nftnl_set_list_add_tail(s, nls_list); return MNL_CB_OK; @@ -1227,22 +1428,30 @@ err_free: } struct nftnl_set_list * -mnl_nft_set_dump(struct netlink_ctx *ctx, int family, const char *table) +mnl_nft_set_dump(struct netlink_ctx *ctx, int family, + const char *table, const char *set) { char buf[MNL_SOCKET_BUFFER_SIZE]; struct nftnl_set_list *nls_list; + int flags = NLM_F_DUMP; struct nlmsghdr *nlh; struct nftnl_set *s; int ret; + struct set_cb_args args; s = nftnl_set_alloc(); if (s == NULL) memory_allocation_error(); - nlh = nftnl_nlmsg_build_hdr(buf, NFT_MSG_GETSET, family, - NLM_F_DUMP, ctx->seqnum); if (table != NULL) nftnl_set_set_str(s, NFTNL_SET_TABLE, table); + if (set) { + nftnl_set_set_str(s, NFTNL_SET_NAME, set); + flags = NLM_F_ACK; + } + + nlh = nftnl_nlmsg_build_hdr(buf, NFT_MSG_GETSET, family, + flags, ctx->seqnum); nftnl_set_nlmsg_build_payload(nlh, s); nftnl_set_free(s); @@ -1250,8 +1459,10 @@ mnl_nft_set_dump(struct netlink_ctx *ctx, int family, const char *table) if (nls_list == NULL) memory_allocation_error(); - ret = nft_mnl_talk(ctx, nlh, nlh->nlmsg_len, set_cb, nls_list); - if (ret < 0) + args.list = nls_list; + args.ctx = ctx; + ret = nft_mnl_talk(ctx, nlh, nlh->nlmsg_len, set_cb, &args); + if (ret < 0 && errno != ENOENT) goto err; return nls_list; @@ -1362,9 +1573,9 @@ int mnl_nft_obj_add(struct netlink_ctx *ctx, struct cmd *cmd, NFT_MSG_NEWOBJ, cmd->handle.family, NLM_F_CREATE | flags, ctx->seqnum); - cmd_add_loc(cmd, nlh->nlmsg_len, &cmd->handle.table.location); + cmd_add_loc(cmd, nlh, &cmd->handle.table.location); mnl_attr_put_strz(nlh, NFTA_OBJ_TABLE, cmd->handle.table.name); - cmd_add_loc(cmd, nlh->nlmsg_len, &cmd->handle.obj.location); + cmd_add_loc(cmd, nlh, &cmd->handle.obj.location); mnl_attr_put_strz(nlh, NFTA_OBJ_NAME, cmd->handle.obj.name); nftnl_obj_nlmsg_build_payload(nlh, nlo); @@ -1377,6 +1588,7 @@ int mnl_nft_obj_add(struct netlink_ctx *ctx, struct cmd *cmd, int mnl_nft_obj_del(struct netlink_ctx *ctx, struct cmd *cmd, int type) { + enum nf_tables_msg_types msg_type = NFT_MSG_DELOBJ; struct nftnl_obj *nlo; struct nlmsghdr *nlh; @@ -1387,18 +1599,21 @@ int mnl_nft_obj_del(struct netlink_ctx *ctx, struct cmd *cmd, int type) nftnl_obj_set_u32(nlo, NFTNL_OBJ_FAMILY, cmd->handle.family); nftnl_obj_set_u32(nlo, NFTNL_OBJ_TYPE, type); + if (cmd->op == CMD_DESTROY) + msg_type = NFT_MSG_DESTROYOBJ; + nlh = nftnl_nlmsg_build_hdr(nftnl_batch_buffer(ctx->batch), - NFT_MSG_DELOBJ, cmd->handle.family, + msg_type, cmd->handle.family, 0, ctx->seqnum); - cmd_add_loc(cmd, nlh->nlmsg_len, &cmd->handle.table.location); + cmd_add_loc(cmd, nlh, &cmd->handle.table.location); mnl_attr_put_strz(nlh, NFTA_OBJ_TABLE, cmd->handle.table.name); if (cmd->handle.obj.name) { - cmd_add_loc(cmd, nlh->nlmsg_len, &cmd->handle.obj.location); + cmd_add_loc(cmd, nlh, &cmd->handle.obj.location); mnl_attr_put_strz(nlh, NFTA_OBJ_NAME, cmd->handle.obj.name); } else if (cmd->handle.handle.id) { - cmd_add_loc(cmd, nlh->nlmsg_len, &cmd->handle.handle.location); + cmd_add_loc(cmd, nlh, &cmd->handle.handle.location); mnl_attr_put_u64(nlh, NFTA_OBJ_HANDLE, htobe64(cmd->handle.handle.id)); } @@ -1493,39 +1708,169 @@ static int set_elem_cb(const struct nlmsghdr *nlh, void *data) return MNL_CB_OK; } -static int mnl_nft_setelem_batch(struct nftnl_set *nls, - struct nftnl_batch *batch, - enum nf_tables_msg_types cmd, - unsigned int flags, uint32_t seqnum) +static bool mnl_nft_attr_nest_overflow(struct nlmsghdr *nlh, + const struct nlattr *from, + const struct nlattr *to) { - struct nlmsghdr *nlh; - struct nftnl_set_elems_iter *iter; - int ret; + int len = (void *)to + to->nla_len - (void *)from; + + /* The attribute length field is 16 bits long, thus the maximum payload + * that an attribute can convey is UINT16_MAX. In case of overflow, + * discard the last attribute that did not fit into the nest. + */ + if (len > UINT16_MAX) { + nlh->nlmsg_len -= to->nla_len; + return true; + } + return false; +} + +static void netlink_dump_setelem(const struct nftnl_set_elem *nlse, + struct netlink_ctx *ctx) +{ + FILE *fp = ctx->nft->output.output_fp; + char buf[4096]; + + if (!(ctx->nft->debug_mask & NFT_DEBUG_NETLINK) || !fp) + return; + + nftnl_set_elem_snprintf(buf, sizeof(buf), nlse, NFTNL_OUTPUT_DEFAULT, 0); + fprintf(fp, "\t%s", buf); +} + +static void netlink_dump_setelem_done(struct netlink_ctx *ctx) +{ + FILE *fp = ctx->nft->output.output_fp; + + if (!(ctx->nft->debug_mask & NFT_DEBUG_NETLINK) || !fp) + return; + + fprintf(fp, "\n"); +} + +static struct nftnl_set_elem * +alloc_nftnl_setelem_interval(const struct set *set, const struct expr *init, + struct expr *elem, struct expr *next_elem, + struct nftnl_set_elem **nlse_high) +{ + struct nftnl_set_elem *nlse[2] = {}; + LIST_HEAD(interval_list); + struct expr *expr, *next; + int i = 0; - iter = nftnl_set_elems_iter_create(nls); - if (iter == NULL) + if (setelem_to_interval(set, elem, next_elem, &interval_list) < 0) memory_allocation_error(); - if (cmd == NFT_MSG_NEWSETELEM) + if (list_empty(&interval_list)) { + *nlse_high = NULL; + nlse[i++] = alloc_nftnl_setelem(init, elem); + return nlse[0]; + } + + list_for_each_entry_safe(expr, next, &interval_list, list) { + nlse[i++] = alloc_nftnl_setelem(init, expr); + list_del(&expr->list); + expr_free(expr); + } + *nlse_high = nlse[1]; + + return nlse[0]; +} + +static int mnl_nft_setelem_batch(const struct nftnl_set *nls, struct cmd *cmd, + struct nftnl_batch *batch, + enum nf_tables_msg_types msg_type, + unsigned int flags, uint32_t *seqnum, + const struct set *set, const struct expr *init, + struct netlink_ctx *ctx) +{ + struct nftnl_set_elem *nlse, *nlse_high = NULL; + struct expr *expr = NULL, *next; + struct nlattr *nest1, *nest2; + struct nlmsghdr *nlh; + int i = 0; + + if (msg_type == NFT_MSG_NEWSETELEM) flags |= NLM_F_CREATE; - while (nftnl_set_elems_iter_cur(iter)) { - nlh = nftnl_nlmsg_build_hdr(nftnl_batch_buffer(batch), cmd, - nftnl_set_get_u32(nls, NFTNL_SET_FAMILY), - flags, seqnum); - ret = nftnl_set_elems_nlmsg_build_payload_iter(nlh, iter); - mnl_nft_batch_continue(batch); - if (ret <= 0) - break; + if (init) + expr = list_first_entry(&init->expressions, struct expr, list); + +next: + nlh = nftnl_nlmsg_build_hdr(nftnl_batch_buffer(batch), msg_type, + nftnl_set_get_u32(nls, NFTNL_SET_FAMILY), + flags, *seqnum); + + if (nftnl_set_is_set(nls, NFTNL_SET_TABLE)) { + mnl_attr_put_strz(nlh, NFTA_SET_ELEM_LIST_TABLE, + nftnl_set_get_str(nls, NFTNL_SET_TABLE)); + } + if (nftnl_set_is_set(nls, NFTNL_SET_NAME)) { + mnl_attr_put_strz(nlh, NFTA_SET_ELEM_LIST_SET, + nftnl_set_get_str(nls, NFTNL_SET_NAME)); } + if (nftnl_set_is_set(nls, NFTNL_SET_ID)) { + mnl_attr_put_u32(nlh, NFTA_SET_ELEM_LIST_SET_ID, + htonl(nftnl_set_get_u32(nls, NFTNL_SET_ID))); + } + + if (!init || list_empty(&init->expressions)) + return 0; + + assert(expr); + nest1 = mnl_attr_nest_start(nlh, NFTA_SET_ELEM_LIST_ELEMENTS); + list_for_each_entry_from(expr, &init->expressions, list) { + + if (set_is_non_concat_range(set)) { + if (set_is_anonymous(set->flags) && + !list_is_last(&expr->list, &init->expressions)) + next = list_next_entry(expr, list); + else + next = NULL; + + if (!nlse_high) { + nlse = alloc_nftnl_setelem_interval(set, init, expr, next, &nlse_high); + } else { + nlse = nlse_high; + nlse_high = NULL; + } + } else { + nlse = alloc_nftnl_setelem(init, expr); + } + + cmd_add_loc(cmd, nlh, &expr->location); + + /* remain with this element, range high still needs to be added. */ + if (nlse_high) + expr = list_prev_entry(expr, list); + + nest2 = mnl_attr_nest_start(nlh, ++i); + nftnl_set_elem_nlmsg_build_payload(nlh, nlse); + mnl_attr_nest_end(nlh, nest2); - nftnl_set_elems_iter_destroy(iter); + netlink_dump_setelem(nlse, ctx); + nftnl_set_elem_free(nlse); + if (mnl_nft_attr_nest_overflow(nlh, nest1, nest2)) { + if (nlse_high) { + nftnl_set_elem_free(nlse_high); + nlse_high = NULL; + } + mnl_attr_nest_end(nlh, nest1); + mnl_nft_batch_continue(batch); + mnl_seqnum_inc(seqnum); + goto next; + } + } + mnl_attr_nest_end(nlh, nest1); + mnl_nft_batch_continue(batch); + netlink_dump_setelem_done(ctx); return 0; } -int mnl_nft_setelem_add(struct netlink_ctx *ctx, const struct set *set, - const struct expr *expr, unsigned int flags) +int mnl_nft_setelem_add(struct netlink_ctx *ctx, struct cmd *cmd, + const struct set *set, const struct expr *expr, + unsigned int flags) { const struct handle *h = &set->handle; struct nftnl_set *nls; @@ -1540,12 +1885,14 @@ int mnl_nft_setelem_add(struct netlink_ctx *ctx, const struct set *set, nftnl_set_set_str(nls, NFTNL_SET_NAME, h->set.name); if (h->set_id) nftnl_set_set_u32(nls, NFTNL_SET_ID, h->set_id); + if (set_is_datamap(set->flags)) + nftnl_set_set_u32(nls, NFTNL_SET_DATA_TYPE, + dtype_map_to_kernel(set->data->dtype)); - alloc_setelem_cache(expr, nls); netlink_dump_set(nls, ctx); - err = mnl_nft_setelem_batch(nls, ctx->batch, NFT_MSG_NEWSETELEM, flags, - ctx->seqnum); + err = mnl_nft_setelem_batch(nls, cmd, ctx->batch, NFT_MSG_NEWSETELEM, + flags, &ctx->seqnum, set, expr, ctx); nftnl_set_free(nls); return err; @@ -1581,9 +1928,11 @@ int mnl_nft_setelem_flush(struct netlink_ctx *ctx, const struct cmd *cmd) return 0; } -int mnl_nft_setelem_del(struct netlink_ctx *ctx, const struct cmd *cmd) +int mnl_nft_setelem_del(struct netlink_ctx *ctx, struct cmd *cmd, + const struct handle *h, const struct set *set, + const struct expr *init) { - const struct handle *h = &cmd->handle; + enum nf_tables_msg_types msg_type = NFT_MSG_DELSETELEM; struct nftnl_set *nls; int err; @@ -1598,26 +1947,34 @@ int mnl_nft_setelem_del(struct netlink_ctx *ctx, const struct cmd *cmd) else if (h->handle.id) nftnl_set_set_u64(nls, NFTNL_SET_HANDLE, h->handle.id); - if (cmd->expr) - alloc_setelem_cache(cmd->expr, nls); netlink_dump_set(nls, ctx); - err = mnl_nft_setelem_batch(nls, ctx->batch, NFT_MSG_DELSETELEM, 0, - ctx->seqnum); + if (cmd->op == CMD_DESTROY) + msg_type = NFT_MSG_DESTROYSETELEM; + + err = mnl_nft_setelem_batch(nls, cmd, ctx->batch, msg_type, 0, + &ctx->seqnum, set, init, ctx); nftnl_set_free(nls); return err; } struct nftnl_set *mnl_nft_setelem_get_one(struct netlink_ctx *ctx, - struct nftnl_set *nls_in) + struct nftnl_set *nls_in, + bool reset) { char buf[MNL_SOCKET_BUFFER_SIZE]; struct nftnl_set *nls_out; struct nlmsghdr *nlh; + int msg_type; int err; - nlh = nftnl_nlmsg_build_hdr(buf, NFT_MSG_GETSETELEM, + if (reset) + msg_type = NFT_MSG_GETSETELEM_RESET; + else + msg_type = NFT_MSG_GETSETELEM; + + nlh = nftnl_nlmsg_build_hdr(buf, msg_type, nftnl_set_get_u32(nls_in, NFTNL_SET_FAMILY), NLM_F_ACK, ctx->seqnum); nftnl_set_elems_nlmsg_build_payload(nlh, nls_in); @@ -1640,12 +1997,19 @@ struct nftnl_set *mnl_nft_setelem_get_one(struct netlink_ctx *ctx, return nls_out; } -int mnl_nft_setelem_get(struct netlink_ctx *ctx, struct nftnl_set *nls) +int mnl_nft_setelem_get(struct netlink_ctx *ctx, struct nftnl_set *nls, + bool reset) { char buf[MNL_SOCKET_BUFFER_SIZE]; struct nlmsghdr *nlh; + int msg_type; + + if (reset) + msg_type = NFT_MSG_GETSETELEM_RESET; + else + msg_type = NFT_MSG_GETSETELEM; - nlh = nftnl_nlmsg_build_hdr(buf, NFT_MSG_GETSETELEM, + nlh = nftnl_nlmsg_build_hdr(buf, msg_type, nftnl_set_get_u32(nls, NFTNL_SET_FAMILY), NLM_F_DUMP, ctx->seqnum); nftnl_set_elems_nlmsg_build_payload(nlh, nls); @@ -1677,11 +2041,13 @@ err_free: } struct nftnl_flowtable_list * -mnl_nft_flowtable_dump(struct netlink_ctx *ctx, int family, const char *table) +mnl_nft_flowtable_dump(struct netlink_ctx *ctx, int family, + const char *table, const char *ft) { struct nftnl_flowtable_list *nln_list; char buf[MNL_SOCKET_BUFFER_SIZE]; struct nftnl_flowtable *n; + int flags = NLM_F_DUMP; struct nlmsghdr *nlh; int ret; @@ -1689,10 +2055,14 @@ mnl_nft_flowtable_dump(struct netlink_ctx *ctx, int family, const char *table) if (n == NULL) memory_allocation_error(); - nlh = nftnl_nlmsg_build_hdr(buf, NFT_MSG_GETFLOWTABLE, family, - NLM_F_DUMP, ctx->seqnum); if (table != NULL) nftnl_flowtable_set_str(n, NFTNL_FLOWTABLE_TABLE, table); + if (ft) { + nftnl_flowtable_set_str(n, NFTNL_FLOWTABLE_NAME, ft); + flags = NLM_F_ACK; + } + nlh = nftnl_nlmsg_build_hdr(buf, NFT_MSG_GETFLOWTABLE, family, + flags, ctx->seqnum); nftnl_flowtable_nlmsg_build_payload(nlh, n); nftnl_flowtable_free(n); @@ -1701,7 +2071,7 @@ mnl_nft_flowtable_dump(struct netlink_ctx *ctx, int family, const char *table) memory_allocation_error(); ret = nft_mnl_talk(ctx, nlh, nlh->nlmsg_len, flowtable_cb, nln_list); - if (ret < 0) + if (ret < 0 && errno != ENOENT) goto err; return nln_list; @@ -1710,48 +2080,30 @@ err: return NULL; } -static const char **nft_flowtable_dev_array(struct cmd *cmd) +static void mnl_nft_ft_devs_build(struct nlmsghdr *nlh, struct cmd *cmd) { - unsigned int ifname_len; - const char **dev_array; - char ifname[IFNAMSIZ]; - int i = 0, len = 1; - struct expr *expr; - - list_for_each_entry(expr, &cmd->flowtable->dev_expr->expressions, list) - len++; - - dev_array = xmalloc(sizeof(char *) * len); - - list_for_each_entry(expr, &cmd->flowtable->dev_expr->expressions, list) { - ifname_len = div_round_up(expr->len, BITS_PER_BYTE); - memset(ifname, 0, sizeof(ifname)); - mpz_export_data(ifname, expr->value, BYTEORDER_HOST_ENDIAN, - ifname_len); - dev_array[i++] = xstrdup(ifname); + const struct expr *dev_expr = cmd->flowtable->dev_expr; + const struct nft_dev *dev_array; + struct nlattr *nest_dev; + int i, num_devs= 0; + + dev_array = nft_dev_array(dev_expr, &num_devs); + nest_dev = mnl_attr_nest_start(nlh, NFTA_FLOWTABLE_HOOK_DEVS); + for (i = 0; i < num_devs; i++) { + cmd_add_loc(cmd, nlh, dev_array[i].location); + mnl_attr_put_strz(nlh, NFTA_DEVICE_NAME, dev_array[i].ifname); } - dev_array[i] = NULL; - - return dev_array; -} - -static void nft_flowtable_dev_array_free(const char **dev_array) -{ - int i = 0; - - while (dev_array[i] != NULL) - xfree(dev_array[i++]); - - free(dev_array); + mnl_attr_nest_end(nlh, nest_dev); + nft_dev_array_free(dev_array); } int mnl_nft_flowtable_add(struct netlink_ctx *ctx, struct cmd *cmd, unsigned int flags) { struct nftnl_flowtable *flo; - const char **dev_array; struct nlmsghdr *nlh; + struct nlattr *nest; int priority; flo = nftnl_flowtable_alloc(); @@ -1761,24 +2113,6 @@ int mnl_nft_flowtable_add(struct netlink_ctx *ctx, struct cmd *cmd, nftnl_flowtable_set_u32(flo, NFTNL_FLOWTABLE_FAMILY, cmd->handle.family); - if (cmd->flowtable->hook.name) { - nftnl_flowtable_set_u32(flo, NFTNL_FLOWTABLE_HOOKNUM, - cmd->flowtable->hook.num); - mpz_export_data(&priority, cmd->flowtable->priority.expr->value, - BYTEORDER_HOST_ENDIAN, sizeof(int)); - nftnl_flowtable_set_u32(flo, NFTNL_FLOWTABLE_PRIO, priority); - } else { - nftnl_flowtable_set_u32(flo, NFTNL_FLOWTABLE_HOOKNUM, 0); - nftnl_flowtable_set_u32(flo, NFTNL_FLOWTABLE_PRIO, 0); - } - - if (cmd->flowtable->dev_expr) { - dev_array = nft_flowtable_dev_array(cmd); - nftnl_flowtable_set_data(flo, NFTNL_FLOWTABLE_DEVICES, - dev_array, 0); - nft_flowtable_dev_array_free(dev_array); - } - nftnl_flowtable_set_u32(flo, NFTNL_FLOWTABLE_FLAGS, cmd->flowtable->flags); @@ -1788,12 +2122,27 @@ int mnl_nft_flowtable_add(struct netlink_ctx *ctx, struct cmd *cmd, NFT_MSG_NEWFLOWTABLE, cmd->handle.family, NLM_F_CREATE | flags, ctx->seqnum); - cmd_add_loc(cmd, nlh->nlmsg_len, &cmd->handle.table.location); + cmd_add_loc(cmd, nlh, &cmd->handle.table.location); mnl_attr_put_strz(nlh, NFTA_FLOWTABLE_TABLE, cmd->handle.table.name); - cmd_add_loc(cmd, nlh->nlmsg_len, &cmd->handle.flowtable.location); + cmd_add_loc(cmd, nlh, &cmd->handle.flowtable.location); mnl_attr_put_strz(nlh, NFTA_FLOWTABLE_NAME, cmd->handle.flowtable.name); nftnl_flowtable_nlmsg_build_payload(nlh, flo); + + nest = mnl_attr_nest_start(nlh, NFTA_FLOWTABLE_HOOK); + + if (cmd->flowtable && cmd->flowtable->priority.expr) { + mnl_attr_put_u32(nlh, NFTA_FLOWTABLE_HOOK_NUM, htonl(cmd->flowtable->hook.num)); + mpz_export_data(&priority, cmd->flowtable->priority.expr->value, + BYTEORDER_HOST_ENDIAN, sizeof(int)); + mnl_attr_put_u32(nlh, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(priority)); + } + + if (cmd->flowtable->dev_expr) + mnl_nft_ft_devs_build(nlh, cmd); + + mnl_attr_nest_end(nlh, nest); + nftnl_flowtable_free(flo); mnl_nft_batch_continue(ctx->batch); @@ -1803,9 +2152,10 @@ int mnl_nft_flowtable_add(struct netlink_ctx *ctx, struct cmd *cmd, int mnl_nft_flowtable_del(struct netlink_ctx *ctx, struct cmd *cmd) { + enum nf_tables_msg_types msg_type = NFT_MSG_DELFLOWTABLE; struct nftnl_flowtable *flo; - const char **dev_array; struct nlmsghdr *nlh; + struct nlattr *nest; flo = nftnl_flowtable_alloc(); if (!flo) @@ -1814,35 +2164,35 @@ int mnl_nft_flowtable_del(struct netlink_ctx *ctx, struct cmd *cmd) nftnl_flowtable_set_u32(flo, NFTNL_FLOWTABLE_FAMILY, cmd->handle.family); - if (cmd->flowtable && cmd->flowtable->dev_expr) { - nftnl_flowtable_set_u32(flo, NFTNL_FLOWTABLE_HOOKNUM, 0); - nftnl_flowtable_set_u32(flo, NFTNL_FLOWTABLE_PRIO, 0); - - dev_array = nft_flowtable_dev_array(cmd); - nftnl_flowtable_set_data(flo, NFTNL_FLOWTABLE_DEVICES, - dev_array, 0); - nft_flowtable_dev_array_free(dev_array); - } + if (cmd->op == CMD_DESTROY) + msg_type = NFT_MSG_DESTROYFLOWTABLE; nlh = nftnl_nlmsg_build_hdr(nftnl_batch_buffer(ctx->batch), - NFT_MSG_DELFLOWTABLE, cmd->handle.family, + msg_type, cmd->handle.family, 0, ctx->seqnum); - cmd_add_loc(cmd, nlh->nlmsg_len, &cmd->handle.table.location); + cmd_add_loc(cmd, nlh, &cmd->handle.table.location); mnl_attr_put_strz(nlh, NFTA_FLOWTABLE_TABLE, cmd->handle.table.name); if (cmd->handle.flowtable.name) { - cmd_add_loc(cmd, nlh->nlmsg_len, - &cmd->handle.flowtable.location); + cmd_add_loc(cmd, nlh, &cmd->handle.flowtable.location); mnl_attr_put_strz(nlh, NFTA_FLOWTABLE_NAME, cmd->handle.flowtable.name); } else if (cmd->handle.handle.id) { - cmd_add_loc(cmd, nlh->nlmsg_len, &cmd->handle.handle.location); + cmd_add_loc(cmd, nlh, &cmd->handle.handle.location); mnl_attr_put_u64(nlh, NFTA_FLOWTABLE_HANDLE, htobe64(cmd->handle.handle.id)); } nftnl_flowtable_nlmsg_build_payload(nlh, flo); + + if (cmd->op == CMD_DELETE && + cmd->flowtable && cmd->flowtable->dev_expr) { + nest = mnl_attr_nest_start(nlh, NFTA_FLOWTABLE_HOOK); + mnl_nft_ft_devs_build(nlh, cmd); + mnl_attr_nest_end(nlh, nest); + } + nftnl_flowtable_free(flo); mnl_nft_batch_continue(ctx->batch); @@ -1861,7 +2211,7 @@ int mnl_nft_event_listener(struct mnl_socket *nf_sock, unsigned int debug_mask, void *cb_data) { /* Set netlink socket buffer size to 16 Mbytes to reduce chances of - * message loss due to ENOBUFS. + * message loss due to ENOBUFS. */ unsigned int bufsiz = NFTABLES_NLEVENT_BUFSIZ; int fd = mnl_socket_get_fd(nf_sock); @@ -1905,3 +2255,481 @@ int mnl_nft_event_listener(struct mnl_socket *nf_sock, unsigned int debug_mask, } return ret; } + +static struct basehook *basehook_alloc(void) +{ + return xzalloc(sizeof(struct basehook)); +} + +static void basehook_free(struct basehook *b) +{ + list_del(&b->list); + free_const(b->module_name); + free_const(b->hookfn); + free_const(b->chain); + free_const(b->table); + free_const(b->devname); + free(b); +} + +static bool basehook_eq(const struct basehook *prev, const struct basehook *hook) +{ + if (prev->num != hook->num) + return false; + + if (prev->devname != NULL && hook->devname != NULL) + return strcmp(prev->devname, hook->devname) == 0; + + if (prev->devname == NULL && hook->devname == NULL) + return true; + + return false; +} + +static void basehook_list_add_tail(struct basehook *b, struct list_head *head) +{ + struct basehook *hook; + + list_for_each_entry(hook, head, list) { + if (hook->family != b->family) + continue; + if (!basehook_eq(hook, b)) + continue; + if (hook->prio < b->prio) + continue; + + list_add(&b->list, &hook->list); + return; + } + + list_add_tail(&b->list, head); +} + +static int dump_nf_attr_cb(const struct nlattr *attr, void *data) +{ + int type = mnl_attr_get_type(attr); + const struct nlattr **tb = data; + + if (mnl_attr_type_valid(attr, NFNLA_HOOK_MAX) < 0) + return MNL_CB_OK; + + switch(type) { + case NFNLA_HOOK_HOOKNUM: + case NFNLA_HOOK_PRIORITY: + if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0) + return MNL_CB_ERROR; + break; + case NFNLA_HOOK_DEV: + if (mnl_attr_validate(attr, MNL_TYPE_STRING) < 0) + return MNL_CB_ERROR; + break; + case NFNLA_HOOK_MODULE_NAME: + case NFNLA_HOOK_FUNCTION_NAME: + if (mnl_attr_validate(attr, MNL_TYPE_NUL_STRING) < 0) + return MNL_CB_ERROR; + break; + case NFNLA_HOOK_CHAIN_INFO: + if (mnl_attr_validate(attr, MNL_TYPE_NESTED) < 0) + return MNL_CB_ERROR; + break; + default: + return MNL_CB_OK; + } + + tb[type] = attr; + return MNL_CB_OK; +} + +static int dump_nf_chain_info_cb(const struct nlattr *attr, void *data) +{ + int type = mnl_attr_get_type(attr); + const struct nlattr **tb = data; + + if (mnl_attr_type_valid(attr, NFNLA_HOOK_INFO_MAX) < 0) + return MNL_CB_OK; + + switch(type) { + case NFNLA_HOOK_INFO_DESC: + if (mnl_attr_validate(attr, MNL_TYPE_NESTED) < 0) + return MNL_CB_ERROR; + break; + case NFNLA_HOOK_INFO_TYPE: + if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0) + return MNL_CB_ERROR; + break; + default: + return MNL_CB_OK; + } + + tb[type] = attr; + return MNL_CB_OK; +} + +static int dump_nf_attr_chain_cb(const struct nlattr *attr, void *data) +{ + int type = mnl_attr_get_type(attr); + const struct nlattr **tb = data; + + if (mnl_attr_type_valid(attr, NFNLA_CHAIN_MAX) < 0) + return MNL_CB_OK; + + switch(type) { + case NFNLA_CHAIN_TABLE: + case NFNLA_CHAIN_NAME: + if (mnl_attr_validate(attr, MNL_TYPE_NUL_STRING) < 0) + return MNL_CB_ERROR; + break; + case NFNLA_CHAIN_FAMILY: + if (mnl_attr_validate(attr, MNL_TYPE_U8) < 0) + return MNL_CB_ERROR; + break; + default: + return MNL_CB_OK; + } + + tb[type] = attr; + return MNL_CB_OK; +} + +static int dump_nf_attr_bpf_cb(const struct nlattr *attr, void *data) +{ + int type = mnl_attr_get_type(attr); + const struct nlattr **tb = data; + + if (mnl_attr_type_valid(attr, NFNLA_HOOK_BPF_MAX) < 0) + return MNL_CB_OK; + + switch(type) { + case NFNLA_HOOK_BPF_ID: + if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0) + return MNL_CB_ERROR; + break; + default: + return MNL_CB_OK; + } + + tb[type] = attr; + return MNL_CB_OK; +} + +struct dump_nf_hook_data { + struct list_head *hook_list; + const char *devname; + int family; +}; + +static int dump_nf_hooks(const struct nlmsghdr *nlh, void *_data) +{ + const struct nfgenmsg *nfg = mnl_nlmsg_get_payload(nlh); + struct nlattr *tb[NFNLA_HOOK_MAX + 1] = {}; + struct dump_nf_hook_data *data = _data; + struct basehook *hook; + + /* NB: Don't check the nft generation ID, this is not + * an nftables subsystem. + */ + if (mnl_attr_parse(nlh, sizeof(*nfg), dump_nf_attr_cb, tb) < 0) + return -1; + + if (!tb[NFNLA_HOOK_PRIORITY]) + netlink_abi_error(); + + hook = basehook_alloc(); + hook->prio = ntohl(mnl_attr_get_u32(tb[NFNLA_HOOK_PRIORITY])); + hook->devname = data->devname ? xstrdup(data->devname) : NULL; + + if (tb[NFNLA_HOOK_FUNCTION_NAME]) + hook->hookfn = xstrdup(mnl_attr_get_str(tb[NFNLA_HOOK_FUNCTION_NAME])); + + if (tb[NFNLA_HOOK_MODULE_NAME]) + hook->module_name = xstrdup(mnl_attr_get_str(tb[NFNLA_HOOK_MODULE_NAME])); + + if (tb[NFNLA_HOOK_CHAIN_INFO]) { + struct nlattr *nested[NFNLA_HOOK_INFO_MAX + 1] = {}; + uint32_t type; + + if (mnl_attr_parse_nested(tb[NFNLA_HOOK_CHAIN_INFO], + dump_nf_chain_info_cb, nested) < 0) { + basehook_free(hook); + return -1; + } + + type = ntohl(mnl_attr_get_u32(nested[NFNLA_HOOK_INFO_TYPE])); + if (type == NFNL_HOOK_TYPE_NFTABLES) { + struct nlattr *info[NFNLA_CHAIN_MAX + 1] = {}; + const char *tablename, *chainname; + + if (mnl_attr_parse_nested(nested[NFNLA_HOOK_INFO_DESC], + dump_nf_attr_chain_cb, + info) < 0) { + basehook_free(hook); + return -1; + } + + tablename = mnl_attr_get_str(info[NFNLA_CHAIN_TABLE]); + chainname = mnl_attr_get_str(info[NFNLA_CHAIN_NAME]); + if (tablename && chainname) { + hook->table = xstrdup(tablename); + hook->chain = xstrdup(chainname); + } + hook->chain_family = mnl_attr_get_u8(info[NFNLA_CHAIN_FAMILY]); + } else if (type == NFNL_HOOK_TYPE_BPF) { + struct nlattr *info[NFNLA_HOOK_BPF_MAX + 1] = {}; + + if (mnl_attr_parse_nested(nested[NFNLA_HOOK_INFO_DESC], + dump_nf_attr_bpf_cb, info) < 0) { + basehook_free(hook); + return -1; + } + + if (info[NFNLA_HOOK_BPF_ID]) { + char tmpbuf[16]; + + snprintf(tmpbuf, sizeof(tmpbuf), "id %u", + ntohl(mnl_attr_get_u32(info[NFNLA_HOOK_BPF_ID]))); + + hook->chain = xstrdup(tmpbuf); + } + } + } + if (tb[NFNLA_HOOK_HOOKNUM]) + hook->num = ntohl(mnl_attr_get_u32(tb[NFNLA_HOOK_HOOKNUM])); + + hook->family = nfg->nfgen_family; + + basehook_list_add_tail(hook, data->hook_list); + + return MNL_CB_OK; +} + +static struct nlmsghdr *nf_hook_dump_request(char *buf, uint8_t family, uint32_t seq) +{ + struct nlmsghdr *nlh = mnl_nlmsg_put_header(buf); + struct nfgenmsg *nfg; + + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + nlh->nlmsg_type = NFNL_SUBSYS_HOOK << 8; + nlh->nlmsg_seq = seq; + + nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg)); + nfg->nfgen_family = family; + nfg->version = NFNETLINK_V0; + + return nlh; +} + +static int __mnl_nft_dump_nf_hooks(struct netlink_ctx *ctx, uint8_t query_family, + uint8_t family, uint8_t hooknum, + const char *devname, + struct list_head *hook_list) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct dump_nf_hook_data data = { + .hook_list = hook_list, + .devname = devname, + .family = query_family, + }; + struct nlmsghdr *nlh; + + nlh = nf_hook_dump_request(buf, family, ctx->seqnum); + if (devname) + mnl_attr_put_strz(nlh, NFNLA_HOOK_DEV, devname); + + mnl_attr_put_u32(nlh, NFNLA_HOOK_HOOKNUM, htonl(hooknum)); + + return nft_mnl_talk(ctx, nlh, nlh->nlmsg_len, dump_nf_hooks, &data); +} + +static void print_hooks(struct netlink_ctx *ctx, int family, struct list_head *hook_list) +{ + struct basehook *hook, *tmp, *prev = NULL; + bool same, family_in_use = false; + int prio; + FILE *fp; + + fp = ctx->nft->output.output_fp; + + list_for_each_entry_safe(hook, tmp, hook_list, list) { + if (hook->family == family) { + family_in_use = true; + break; + } + } + + if (!family_in_use) + return; + + fprintf(fp, "family %s {\n", family2str(family)); + + list_for_each_entry_safe(hook, tmp, hook_list, list) { + if (hook->family != family) + continue; + + if (prev) { + if (basehook_eq(prev, hook)) { + fprintf(fp, "\n"); + same = true; + } else { + same = false; + fprintf(fp, "\n\t}\n"); + } + } else { + same = false; + } + prev = hook; + + if (!same) { + if (hook->devname) + fprintf(fp, "\thook %s device %s {\n", + hooknum2str(family, hook->num), hook->devname); + else + fprintf(fp, "\thook %s {\n", + hooknum2str(family, hook->num)); + } + + prio = hook->prio; + if (prio < 0) + fprintf(fp, "\t\t%011d", prio); /* outputs a '-' sign */ + else if (prio == 0) + fprintf(fp, "\t\t %010u", prio); + else + fprintf(fp, "\t\t+%010u", prio); + + if (hook->table && hook->chain) + fprintf(fp, " chain %s %s %s", family2str(hook->chain_family), hook->table, hook->chain); + else if (hook->hookfn && hook->chain) + fprintf(fp, " %s %s", hook->hookfn, hook->chain); + else if (hook->hookfn) { + fprintf(fp, " %s", hook->hookfn); + } + if (hook->module_name) + fprintf(fp, " [%s]", hook->module_name); + } + + fprintf(fp, "\n\t}\n"); + fprintf(fp, "}\n"); +} + +static int mnl_nft_dump_nf(struct netlink_ctx *ctx, int family, + const char *devname, struct list_head *hook_list) +{ + int i, err; + + for (i = 0; i <= NF_INET_POST_ROUTING; i++) { + int tmp; + + tmp = __mnl_nft_dump_nf_hooks(ctx, family, family, i, devname, hook_list); + if (tmp == 0) + err = 0; + } + + return err; +} + +static int mnl_nft_dump_nf_arp(struct netlink_ctx *ctx, int family, + const char *devname, struct list_head *hook_list) +{ + int err1, err2; + + err1 = __mnl_nft_dump_nf_hooks(ctx, family, family, NF_ARP_IN, devname, hook_list); + err2 = __mnl_nft_dump_nf_hooks(ctx, family, family, NF_ARP_OUT, devname, hook_list); + + return err1 ? err2 : err1; +} + +static int mnl_nft_dump_nf_netdev(struct netlink_ctx *ctx, int family, + const char *devname, struct list_head *hook_list) +{ + int err1, err2; + + err1 = __mnl_nft_dump_nf_hooks(ctx, family, NFPROTO_NETDEV, NF_NETDEV_INGRESS, devname, hook_list); + err2 = __mnl_nft_dump_nf_hooks(ctx, family, NFPROTO_NETDEV, NF_NETDEV_EGRESS, devname, hook_list); + + return err1 ? err2 : err1; +} + +static void release_hook_list(struct list_head *hook_list) +{ + struct basehook *hook, *next; + + list_for_each_entry_safe(hook, next, hook_list, list) + basehook_free(hook); +} + +static void warn_if_device(struct nft_ctx *nft, const char *devname) +{ + if (devname) + nft_print(&nft->output, "# device keyword (%s) unexpected for this family\n", devname); +} + +int mnl_nft_dump_nf_hooks(struct netlink_ctx *ctx, int family, const char *devname) +{ + LIST_HEAD(hook_list); + int ret = -1, tmp; + + errno = 0; + + switch (family) { + case NFPROTO_UNSPEC: + ret = mnl_nft_dump_nf_hooks(ctx, NFPROTO_ARP, NULL); + tmp = mnl_nft_dump_nf_hooks(ctx, NFPROTO_INET, NULL); + if (tmp == 0) + ret = 0; + tmp = mnl_nft_dump_nf_hooks(ctx, NFPROTO_BRIDGE, NULL); + if (tmp == 0) + ret = 0; + + tmp = mnl_nft_dump_nf_hooks(ctx, NFPROTO_NETDEV, devname); + if (tmp == 0) + ret = 0; + + return ret; + case NFPROTO_INET: + ret = 0; + if (devname) + ret = __mnl_nft_dump_nf_hooks(ctx, family, NFPROTO_NETDEV, + NF_NETDEV_INGRESS, devname, &hook_list); + tmp = mnl_nft_dump_nf_hooks(ctx, NFPROTO_IPV4, NULL); + if (tmp == 0) + ret = 0; + tmp = mnl_nft_dump_nf_hooks(ctx, NFPROTO_IPV6, NULL); + if (tmp == 0) + ret = 0; + + break; + case NFPROTO_IPV4: + case NFPROTO_IPV6: + case NFPROTO_BRIDGE: + warn_if_device(ctx->nft, devname); + ret = mnl_nft_dump_nf(ctx, family, devname, &hook_list); + break; + case NFPROTO_ARP: + warn_if_device(ctx->nft, devname); + ret = mnl_nft_dump_nf_arp(ctx, family, devname, &hook_list); + break; + case NFPROTO_NETDEV: + if (devname) { + ret = mnl_nft_dump_nf_netdev(ctx, family, devname, &hook_list); + } else { + const struct iface *iface; + + iface = iface_cache_get_next_entry(NULL); + ret = 0; + + while (iface) { + tmp = mnl_nft_dump_nf_netdev(ctx, family, iface->name, &hook_list); + if (tmp == 0) + ret = 0; + + iface = iface_cache_get_next_entry(iface); + } + } + + break; + } + + print_hooks(ctx, family, &hook_list); + release_hook_list(&hook_list); + + return ret; +} |