diff --git a/MAINTAINERS b/MAINTAINERS index 2a86fb06a134..62b0307a7ca4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8587,9 +8587,8 @@ F: Documentation/networking/s2io.txt F: Documentation/networking/vxge.txt F: drivers/net/ethernet/neterion/ -NETFILTER ({IP,IP6,ARP,EB,NF}TABLES) +NETFILTER M: Pablo Neira Ayuso -M: Patrick McHardy M: Jozsef Kadlecsik L: netfilter-devel@vger.kernel.org L: coreteam@netfilter.org diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 1d82dd5e9a08..1b49209dd5c7 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -28,6 +28,7 @@ struct nfnetlink_subsystem { const struct nfnl_callback *cb; /* callback for individual types */ int (*commit)(struct net *net, struct sk_buff *skb); int (*abort)(struct net *net, struct sk_buff *skb); + bool (*valid_genid)(struct net *net, u32 genid); }; int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 7dfdb517f0be..ac84686aaafb 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -203,6 +203,7 @@ struct nft_set_elem { struct nft_set; struct nft_set_iter { u8 genmask; + bool flush; unsigned int count; unsigned int skip; int err; @@ -243,11 +244,13 @@ enum nft_set_class { * characteristics * * @size: required memory - * @class: lookup performance class + * @lookup: lookup performance class + * @space: memory class */ struct nft_set_estimate { unsigned int size; - enum nft_set_class class; + enum nft_set_class lookup; + enum nft_set_class space; }; struct nft_set_ext; @@ -260,7 +263,7 @@ struct nft_expr; * @insert: insert new element into set * @activate: activate new element in the next generation * @deactivate: lookup for element and deactivate it in the next generation - * @deactivate_one: deactivate element in the next generation + * @flush: deactivate element in the next generation * @remove: remove element from set * @walk: iterate over all set elemeennts * @privsize: function to return size of set private data @@ -295,10 +298,11 @@ struct nft_set_ops { void * (*deactivate)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem); - bool (*deactivate_one)(const struct net *net, - const struct nft_set *set, - void *priv); - void (*remove)(const struct nft_set *set, + bool (*flush)(const struct net *net, + const struct nft_set *set, + void *priv); + void (*remove)(const struct net *net, + const struct nft_set *set, const struct nft_set_elem *elem); void (*walk)(const struct nft_ctx *ctx, struct nft_set *set, @@ -1198,10 +1202,13 @@ struct nft_trans { struct nft_trans_rule { struct nft_rule *rule; + u32 rule_id; }; #define nft_trans_rule(trans) \ (((struct nft_trans_rule *)trans->data)->rule) +#define nft_trans_rule_id(trans) \ + (((struct nft_trans_rule *)trans->data)->rule_id) struct nft_trans_set { struct nft_set *set; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 7b730cab99bd..05215d30fe5c 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -207,6 +207,7 @@ enum nft_chain_attributes { * @NFTA_RULE_COMPAT: compatibility specifications of the rule (NLA_NESTED: nft_rule_compat_attributes) * @NFTA_RULE_POSITION: numeric handle of the previous rule (NLA_U64) * @NFTA_RULE_USERDATA: user data (NLA_BINARY, NFT_USERDATA_MAXLEN) + * @NFTA_RULE_ID: uniquely identifies a rule in a transaction (NLA_U32) */ enum nft_rule_attributes { NFTA_RULE_UNSPEC, @@ -218,6 +219,7 @@ enum nft_rule_attributes { NFTA_RULE_POSITION, NFTA_RULE_USERDATA, NFTA_RULE_PAD, + NFTA_RULE_ID, __NFTA_RULE_MAX }; #define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) @@ -704,13 +706,32 @@ enum nft_payload_attributes { }; #define NFTA_PAYLOAD_MAX (__NFTA_PAYLOAD_MAX - 1) +enum nft_exthdr_flags { + NFT_EXTHDR_F_PRESENT = (1 << 0), +}; + /** - * enum nft_exthdr_attributes - nf_tables IPv6 extension header expression netlink attributes + * enum nft_exthdr_op - nf_tables match options + * + * @NFT_EXTHDR_OP_IPV6: match against ipv6 extension headers + * @NFT_EXTHDR_OP_TCP: match against tcp options + */ +enum nft_exthdr_op { + NFT_EXTHDR_OP_IPV6, + NFT_EXTHDR_OP_TCPOPT, + __NFT_EXTHDR_OP_MAX +}; +#define NFT_EXTHDR_OP_MAX (__NFT_EXTHDR_OP_MAX - 1) + +/** + * enum nft_exthdr_attributes - nf_tables extension header expression netlink attributes * * @NFTA_EXTHDR_DREG: destination register (NLA_U32: nft_registers) * @NFTA_EXTHDR_TYPE: extension header type (NLA_U8) * @NFTA_EXTHDR_OFFSET: extension header offset (NLA_U32) * @NFTA_EXTHDR_LEN: extension header length (NLA_U32) + * @NFTA_EXTHDR_FLAGS: extension header flags (NLA_U32) + * @NFTA_EXTHDR_OP: option match type (NLA_U8) */ enum nft_exthdr_attributes { NFTA_EXTHDR_UNSPEC, @@ -718,6 +739,8 @@ enum nft_exthdr_attributes { NFTA_EXTHDR_TYPE, NFTA_EXTHDR_OFFSET, NFTA_EXTHDR_LEN, + NFTA_EXTHDR_FLAGS, + NFTA_EXTHDR_OP, __NFTA_EXTHDR_MAX }; #define NFTA_EXTHDR_MAX (__NFTA_EXTHDR_MAX - 1) @@ -864,6 +887,7 @@ enum nft_rt_attributes { * @NFT_CT_PKTS: conntrack packets * @NFT_CT_BYTES: conntrack bytes * @NFT_CT_AVGPKT: conntrack average bytes per packet + * @NFT_CT_ZONE: conntrack zone */ enum nft_ct_keys { NFT_CT_STATE, @@ -883,6 +907,7 @@ enum nft_ct_keys { NFT_CT_PKTS, NFT_CT_BYTES, NFT_CT_AVGPKT, + NFT_CT_ZONE, }; /** diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h index 4bb8cb7730e7..a09906a30d77 100644 --- a/include/uapi/linux/netfilter/nfnetlink.h +++ b/include/uapi/linux/netfilter/nfnetlink.h @@ -65,4 +65,16 @@ struct nfgenmsg { #define NFNL_MSG_BATCH_BEGIN NLMSG_MIN_TYPE #define NFNL_MSG_BATCH_END NLMSG_MIN_TYPE+1 +/** + * enum nfnl_batch_attributes - nfnetlink batch netlink attributes + * + * @NFNL_BATCH_GENID: generation ID for this changeset (NLA_U32) + */ +enum nfnl_batch_attributes { + NFNL_BATCH_UNSPEC, + NFNL_BATCH_GENID, + __NFNL_BATCH_MAX +}; +#define NFNL_BATCH_MAX (__NFNL_BATCH_MAX - 1) + #endif /* _UAPI_NFNETLINK_H */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index dfbe9deeb8c4..9b28864cc36a 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -467,10 +467,10 @@ config NF_TABLES_NETDEV This option enables support for the "netdev" table. config NFT_EXTHDR - tristate "Netfilter nf_tables IPv6 exthdr module" + tristate "Netfilter nf_tables exthdr module" help This option adds the "exthdr" expression that you can use to match - IPv6 extension headers. + IPv6 extension headers and tcp options. config NFT_META tristate "Netfilter nf_tables meta module" @@ -509,6 +509,12 @@ config NFT_SET_HASH This option adds the "hash" set type that is used to build one-way mappings between matchings and actions. +config NFT_SET_BITMAP + tristate "Netfilter nf_tables bitmap set module" + help + This option adds the "bitmap" set type that is used to build sets + whose keys are smaller or equal to 16 bits. + config NFT_COUNTER tristate "Netfilter nf_tables counter module" help diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 6b3034f12661..c9b78e7b342f 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -93,6 +93,7 @@ obj-$(CONFIG_NFT_REJECT) += nft_reject.o obj-$(CONFIG_NFT_REJECT_INET) += nft_reject_inet.o obj-$(CONFIG_NFT_SET_RBTREE) += nft_set_rbtree.o obj-$(CONFIG_NFT_SET_HASH) += nft_set_hash.o +obj-$(CONFIG_NFT_SET_BITMAP) += nft_set_bitmap.o obj-$(CONFIG_NFT_COUNTER) += nft_counter.o obj-$(CONFIG_NFT_LOG) += nft_log.o obj-$(CONFIG_NFT_MASQ) += nft_masq.o diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index f8dbacf66795..e19a69787d99 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -353,7 +353,7 @@ void nf_ct_expect_put(struct nf_conntrack_expect *exp) } EXPORT_SYMBOL_GPL(nf_ct_expect_put); -static int nf_ct_expect_insert(struct nf_conntrack_expect *exp) +static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) { struct nf_conn_help *master_help = nfct_help(exp->master); struct nf_conntrack_helper *helper; @@ -380,7 +380,6 @@ static int nf_ct_expect_insert(struct nf_conntrack_expect *exp) add_timer(&exp->timeout); NF_CT_STAT_INC(net, expect_create); - return 0; } /* Race with expectations being used means we could have none to find; OK. */ @@ -464,9 +463,8 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, if (ret <= 0) goto out; - ret = nf_ct_expect_insert(expect); - if (ret < 0) - goto out; + nf_ct_expect_insert(expect); + spin_unlock_bh(&nf_conntrack_expect_lock); nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report); return ret; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index c3fc14e021ec..24174c520239 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -809,13 +809,11 @@ static int refresh_signalling_expectation(struct nf_conn *ct, exp->tuple.dst.protonum != proto || exp->tuple.dst.u.udp.port != port) continue; - if (!del_timer(&exp->timeout)) - continue; - exp->flags &= ~NF_CT_EXPECT_INACTIVE; - exp->timeout.expires = jiffies + expires * HZ; - add_timer(&exp->timeout); - found = 1; - break; + if (mod_timer_pending(&exp->timeout, jiffies + expires * HZ)) { + exp->flags &= ~NF_CT_EXPECT_INACTIVE; + found = 1; + break; + } } spin_unlock_bh(&nf_conntrack_expect_lock); return found; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 57eeae63f597..ff7304ae58ac 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -240,6 +240,10 @@ static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type, if (trans == NULL) return NULL; + if (msg_type == NFT_MSG_NEWRULE && ctx->nla[NFTA_RULE_ID] != NULL) { + nft_trans_rule_id(trans) = + ntohl(nla_get_be32(ctx->nla[NFTA_RULE_ID])); + } nft_trans_rule(trans) = rule; list_add_tail(&trans->list, &ctx->net->nft.commit_list); @@ -2293,6 +2297,22 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, return err; } +static struct nft_rule *nft_rule_lookup_byid(const struct net *net, + const struct nlattr *nla) +{ + u32 id = ntohl(nla_get_be32(nla)); + struct nft_trans *trans; + + list_for_each_entry(trans, &net->nft.commit_list, list) { + struct nft_rule *rule = nft_trans_rule(trans); + + if (trans->msg_type == NFT_MSG_NEWRULE && + id == nft_trans_rule_id(trans)) + return rule; + } + return ERR_PTR(-ENOENT); +} + static int nf_tables_delrule(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -2330,6 +2350,12 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, if (IS_ERR(rule)) return PTR_ERR(rule); + err = nft_delrule(&ctx, rule); + } else if (nla[NFTA_RULE_ID]) { + rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_ID]); + if (IS_ERR(rule)) + return PTR_ERR(rule); + err = nft_delrule(&ctx, rule); } else { err = nft_delrule_by_chain(&ctx); @@ -2398,12 +2424,14 @@ nft_select_set_ops(const struct nlattr * const nla[], features = 0; if (nla[NFTA_SET_FLAGS] != NULL) { features = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); - features &= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_TIMEOUT; + features &= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_TIMEOUT | + NFT_SET_OBJECT; } - bops = NULL; - best.size = ~0; - best.class = ~0; + bops = NULL; + best.size = ~0; + best.lookup = ~0; + best.space = ~0; list_for_each_entry(ops, &nf_tables_set_ops, list) { if ((ops->features & features) != features) @@ -2413,16 +2441,27 @@ nft_select_set_ops(const struct nlattr * const nla[], switch (policy) { case NFT_SET_POL_PERFORMANCE: - if (est.class < best.class) - break; - if (est.class == best.class && est.size < best.size) + if (est.lookup < best.lookup) break; + if (est.lookup == best.lookup) { + if (!desc->size) { + if (est.space < best.space) + break; + } else if (est.size < best.size) { + break; + } + } continue; case NFT_SET_POL_MEMORY: - if (est.size < best.size) - break; - if (est.size == best.size && est.class < best.class) + if (!desc->size) { + if (est.space < best.space) + break; + if (est.space == best.space && + est.lookup < best.lookup) + break; + } else if (est.size < best.size) { break; + } continue; default: break; @@ -3121,6 +3160,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, iter.count = 0; iter.err = 0; iter.fn = nf_tables_bind_check_setelem; + iter.flush = false; set->ops->walk(ctx, set, &iter); if (iter.err < 0) @@ -3374,6 +3414,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) args.iter.count = 0; args.iter.err = 0; args.iter.fn = nf_tables_dump_setelem; + args.iter.flush = false; set->ops->walk(&ctx, set, &args.iter); nla_nest_end(skb, nest); @@ -3752,7 +3793,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, return 0; err6: - set->ops->remove(set, &elem); + set->ops->remove(ctx->net, set, &elem); err5: kfree(trans); err4: @@ -3898,7 +3939,7 @@ static int nft_flush_set(const struct nft_ctx *ctx, if (!trans) return -ENOMEM; - if (!set->ops->deactivate_one(ctx->net, set, elem->priv)) { + if (!set->ops->flush(ctx->net, set, elem->priv)) { err = -ENOENT; goto err1; } @@ -3936,15 +3977,14 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, return -EBUSY; if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) { - struct nft_set_dump_args args = { - .iter = { - .genmask = genmask, - .fn = nft_flush_set, - }, + struct nft_set_iter iter = { + .genmask = genmask, + .fn = nft_flush_set, + .flush = true, }; - set->ops->walk(&ctx, set, &args.iter); + set->ops->walk(&ctx, set, &iter); - return args.iter.err; + return iter.err; } nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { @@ -4804,7 +4844,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, NFT_MSG_DELSETELEM, 0); - te->set->ops->remove(te->set, &te->elem); + te->set->ops->remove(net, te->set, &te->elem); atomic_dec(&te->set->nelems); te->set->ndeact--; break; @@ -4925,7 +4965,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) case NFT_MSG_NEWSETELEM: te = (struct nft_trans_elem *)trans->data; - te->set->ops->remove(te->set, &te->elem); + te->set->ops->remove(net, te->set, &te->elem); atomic_dec(&te->set->nelems); break; case NFT_MSG_DELSETELEM: @@ -4959,6 +4999,11 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) return 0; } +static bool nf_tables_valid_genid(struct net *net, u32 genid) +{ + return net->nft.base_seq == genid; +} + static const struct nfnetlink_subsystem nf_tables_subsys = { .name = "nf_tables", .subsys_id = NFNL_SUBSYS_NFTABLES, @@ -4966,6 +5011,7 @@ static const struct nfnetlink_subsystem nf_tables_subsys = { .cb = nf_tables_cb, .commit = nf_tables_commit, .abort = nf_tables_abort, + .valid_genid = nf_tables_valid_genid, }; int nft_chain_validate_dependency(const struct nft_chain *chain, @@ -5091,6 +5137,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, iter.count = 0; iter.err = 0; iter.fn = nf_tables_loop_check_setelem; + iter.flush = false; set->ops->walk(ctx, set, &iter); if (iter.err < 0) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index a09fa9fd8f3d..a2148d0bc50e 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -3,7 +3,7 @@ * * (C) 2001 by Jay Schulist , * (C) 2002-2005 by Harald Welte - * (C) 2005,2007 by Pablo Neira Ayuso + * (C) 2005-2017 by Pablo Neira Ayuso * * Initial netfilter messages via netlink development funded and * generally made possible by Network Robots, Inc. (www.networkrobots.com) @@ -100,9 +100,9 @@ int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n) } EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister); -static inline const struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t type) +static inline const struct nfnetlink_subsystem *nfnetlink_get_subsys(u16 type) { - u_int8_t subsys_id = NFNL_SUBSYS_ID(type); + u8 subsys_id = NFNL_SUBSYS_ID(type); if (subsys_id >= NFNL_SUBSYS_COUNT) return NULL; @@ -111,9 +111,9 @@ static inline const struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t t } static inline const struct nfnl_callback * -nfnetlink_find_client(u_int16_t type, const struct nfnetlink_subsystem *ss) +nfnetlink_find_client(u16 type, const struct nfnetlink_subsystem *ss) { - u_int8_t cb_id = NFNL_MSG_TYPE(type); + u8 cb_id = NFNL_MSG_TYPE(type); if (cb_id >= ss->cb_count) return NULL; @@ -185,7 +185,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); - u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); + u8 cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); struct nlattr *cda[ss->cb[cb_id].attr_count + 1]; struct nlattr *attr = (void *)nlh + min_len; int attrlen = nlh->nlmsg_len - min_len; @@ -273,7 +273,7 @@ enum { }; static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, - u_int16_t subsys_id) + u16 subsys_id, u32 genid) { struct sk_buff *oskb = skb; struct net *net = sock_net(skb->sk); @@ -315,6 +315,12 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, return kfree_skb(skb); } + if (genid && ss->valid_genid && !ss->valid_genid(net, genid)) { + nfnl_unlock(subsys_id); + netlink_ack(oskb, nlh, -ERESTART); + return kfree_skb(skb); + } + while (skb->len >= nlmsg_total_size(0)) { int msglen, type; @@ -365,7 +371,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, { int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); - u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); + u8 cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); struct nlattr *cda[ss->cb[cb_id].attr_count + 1]; struct nlattr *attr = (void *)nlh + min_len; int attrlen = nlh->nlmsg_len - min_len; @@ -436,11 +442,51 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, kfree_skb(skb); } +static const struct nla_policy nfnl_batch_policy[NFNL_BATCH_MAX + 1] = { + [NFNL_BATCH_GENID] = { .type = NLA_U32 }, +}; + +static void nfnetlink_rcv_skb_batch(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); + struct nlattr *attr = (void *)nlh + min_len; + struct nlattr *cda[NFNL_BATCH_MAX + 1]; + int attrlen = nlh->nlmsg_len - min_len; + struct nfgenmsg *nfgenmsg; + int msglen, err; + u32 gen_id = 0; + u16 res_id; + + msglen = NLMSG_ALIGN(nlh->nlmsg_len); + if (msglen > skb->len) + msglen = skb->len; + + if (nlh->nlmsg_len < NLMSG_HDRLEN || + skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg)) + return; + + err = nla_parse(cda, NFNL_BATCH_MAX, attr, attrlen, nfnl_batch_policy); + if (err < 0) { + netlink_ack(skb, nlh, err); + return; + } + if (cda[NFNL_BATCH_GENID]) + gen_id = ntohl(nla_get_be32(cda[NFNL_BATCH_GENID])); + + nfgenmsg = nlmsg_data(nlh); + skb_pull(skb, msglen); + /* Work around old nft using host byte order */ + if (nfgenmsg->res_id == NFNL_SUBSYS_NFTABLES) + res_id = NFNL_SUBSYS_NFTABLES; + else + res_id = ntohs(nfgenmsg->res_id); + + nfnetlink_rcv_batch(skb, nlh, res_id, gen_id); +} + static void nfnetlink_rcv(struct sk_buff *skb) { struct nlmsghdr *nlh = nlmsg_hdr(skb); - u_int16_t res_id; - int msglen; if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) @@ -451,28 +497,10 @@ static void nfnetlink_rcv(struct sk_buff *skb) return; } - if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN) { - struct nfgenmsg *nfgenmsg; - - msglen = NLMSG_ALIGN(nlh->nlmsg_len); - if (msglen > skb->len) - msglen = skb->len; - - if (nlh->nlmsg_len < NLMSG_HDRLEN || - skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg)) - return; - - nfgenmsg = nlmsg_data(nlh); - skb_pull(skb, msglen); - /* Work around old nft using host byte order */ - if (nfgenmsg->res_id == NFNL_SUBSYS_NFTABLES) - res_id = NFNL_SUBSYS_NFTABLES; - else - res_id = ntohs(nfgenmsg->res_id); - nfnetlink_rcv_batch(skb, nlh, res_id); - } else { + if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN) + nfnetlink_rcv_skb_batch(skb, nlh); + else netlink_rcv_skb(skb, &nfnetlink_rcv_msg); - } } #ifdef CONFIG_MODULES diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 66a2377510e1..c6b8022c0e47 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -32,6 +32,11 @@ struct nft_ct { }; }; +#ifdef CONFIG_NF_CONNTRACK_ZONES +static DEFINE_PER_CPU(struct nf_conn *, nft_ct_pcpu_template); +static unsigned int nft_ct_pcpu_template_refcnt __read_mostly; +#endif + static u64 nft_ct_get_eval_counter(const struct nf_conn_counter *c, enum nft_ct_keys k, enum ip_conntrack_dir d) @@ -151,6 +156,18 @@ static void nft_ct_get_eval(const struct nft_expr *expr, case NFT_CT_PROTOCOL: *dest = nf_ct_protonum(ct); return; +#ifdef CONFIG_NF_CONNTRACK_ZONES + case NFT_CT_ZONE: { + const struct nf_conntrack_zone *zone = nf_ct_zone(ct); + + if (priv->dir < IP_CT_DIR_MAX) + *dest = nf_ct_zone_id(zone, priv->dir); + else + *dest = zone->id; + + return; + } +#endif default: break; } @@ -179,6 +196,53 @@ static void nft_ct_get_eval(const struct nft_expr *expr, regs->verdict.code = NFT_BREAK; } +#ifdef CONFIG_NF_CONNTRACK_ZONES +static void nft_ct_set_zone_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nf_conntrack_zone zone = { .dir = NF_CT_DEFAULT_ZONE_DIR }; + const struct nft_ct *priv = nft_expr_priv(expr); + struct sk_buff *skb = pkt->skb; + enum ip_conntrack_info ctinfo; + u16 value = regs->data[priv->sreg]; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + if (ct) /* already tracked */ + return; + + zone.id = value; + + switch (priv->dir) { + case IP_CT_DIR_ORIGINAL: + zone.dir = NF_CT_ZONE_DIR_ORIG; + break; + case IP_CT_DIR_REPLY: + zone.dir = NF_CT_ZONE_DIR_REPL; + break; + default: + break; + } + + ct = this_cpu_read(nft_ct_pcpu_template); + + if (likely(atomic_read(&ct->ct_general.use) == 1)) { + nf_ct_zone_add(ct, &zone); + } else { + /* previous skb got queued to userspace */ + ct = nf_ct_tmpl_alloc(nft_net(pkt), &zone, GFP_ATOMIC); + if (!ct) { + regs->verdict.code = NF_DROP; + return; + } + } + + atomic_inc(&ct->ct_general.use); + nf_ct_set(skb, ct, IP_CT_NEW); +} +#endif + static void nft_ct_set_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -257,6 +321,45 @@ static void nft_ct_netns_put(struct net *net, uint8_t family) nf_ct_netns_put(net, family); } +#ifdef CONFIG_NF_CONNTRACK_ZONES +static void nft_ct_tmpl_put_pcpu(void) +{ + struct nf_conn *ct; + int cpu; + + for_each_possible_cpu(cpu) { + ct = per_cpu(nft_ct_pcpu_template, cpu); + if (!ct) + break; + nf_ct_put(ct); + per_cpu(nft_ct_pcpu_template, cpu) = NULL; + } +} + +static bool nft_ct_tmpl_alloc_pcpu(void) +{ + struct nf_conntrack_zone zone = { .id = 0 }; + struct nf_conn *tmp; + int cpu; + + if (nft_ct_pcpu_template_refcnt) + return true; + + for_each_possible_cpu(cpu) { + tmp = nf_ct_tmpl_alloc(&init_net, &zone, GFP_KERNEL); + if (!tmp) { + nft_ct_tmpl_put_pcpu(); + return false; + } + + atomic_set(&tmp->ct_general.use, 1); + per_cpu(nft_ct_pcpu_template, cpu) = tmp; + } + + return true; +} +#endif + static int nft_ct_get_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -266,6 +369,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, int err; priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY])); + priv->dir = IP_CT_DIR_MAX; switch (priv->key) { case NFT_CT_DIRECTION: if (tb[NFTA_CT_DIRECTION] != NULL) @@ -333,11 +437,13 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, case NFT_CT_BYTES: case NFT_CT_PKTS: case NFT_CT_AVGPKT: - /* no direction? return sum of original + reply */ - if (tb[NFTA_CT_DIRECTION] == NULL) - priv->dir = IP_CT_DIR_MAX; len = sizeof(u64); break; +#ifdef CONFIG_NF_CONNTRACK_ZONES + case NFT_CT_ZONE: + len = sizeof(u16); + break; +#endif default: return -EOPNOTSUPP; } @@ -371,15 +477,33 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, return 0; } +static void __nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv) +{ + switch (priv->key) { +#ifdef CONFIG_NF_CONNTRACK_LABELS + case NFT_CT_LABELS: + nf_connlabels_put(ctx->net); + break; +#endif +#ifdef CONFIG_NF_CONNTRACK_ZONES + case NFT_CT_ZONE: + if (--nft_ct_pcpu_template_refcnt == 0) + nft_ct_tmpl_put_pcpu(); +#endif + default: + break; + } +} + static int nft_ct_set_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_ct *priv = nft_expr_priv(expr); - bool label_got = false; unsigned int len; int err; + priv->dir = IP_CT_DIR_MAX; priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY])); switch (priv->key) { #ifdef CONFIG_NF_CONNTRACK_MARK @@ -397,13 +521,30 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1); if (err) return err; - label_got = true; + break; +#endif +#ifdef CONFIG_NF_CONNTRACK_ZONES + case NFT_CT_ZONE: + if (!nft_ct_tmpl_alloc_pcpu()) + return -ENOMEM; + nft_ct_pcpu_template_refcnt++; break; #endif default: return -EOPNOTSUPP; } + if (tb[NFTA_CT_DIRECTION]) { + priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]); + switch (priv->dir) { + case IP_CT_DIR_ORIGINAL: + case IP_CT_DIR_REPLY: + break; + default: + return -EINVAL; + } + } + priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]); err = nft_validate_register_load(priv->sreg, len); if (err < 0) @@ -416,8 +557,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, return 0; err1: - if (label_got) - nf_connlabels_put(ctx->net); + __nft_ct_set_destroy(ctx, priv); return err; } @@ -432,16 +572,7 @@ static void nft_ct_set_destroy(const struct nft_ctx *ctx, { struct nft_ct *priv = nft_expr_priv(expr); - switch (priv->key) { -#ifdef CONFIG_NF_CONNTRACK_LABELS - case NFT_CT_LABELS: - nf_connlabels_put(ctx->net); - break; -#endif - default: - break; - } - + __nft_ct_set_destroy(ctx, priv); nft_ct_netns_put(ctx->net, ctx->afi->family); } @@ -465,6 +596,7 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) case NFT_CT_BYTES: case NFT_CT_PKTS: case NFT_CT_AVGPKT: + case NFT_CT_ZONE: if (priv->dir < IP_CT_DIR_MAX && nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir)) goto nla_put_failure; @@ -487,6 +619,17 @@ static int nft_ct_set_dump(struct sk_buff *skb, const struct nft_expr *expr) goto nla_put_failure; if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key))) goto nla_put_failure; + + switch (priv->key) { + case NFT_CT_ZONE: + if (priv->dir < IP_CT_DIR_MAX && + nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir)) + goto nla_put_failure; + break; + default: + break; + } + return 0; nla_put_failure: @@ -512,6 +655,17 @@ static const struct nft_expr_ops nft_ct_set_ops = { .dump = nft_ct_set_dump, }; +#ifdef CONFIG_NF_CONNTRACK_ZONES +static const struct nft_expr_ops nft_ct_set_zone_ops = { + .type = &nft_ct_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)), + .eval = nft_ct_set_zone_eval, + .init = nft_ct_set_init, + .destroy = nft_ct_set_destroy, + .dump = nft_ct_set_dump, +}; +#endif + static const struct nft_expr_ops * nft_ct_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) @@ -525,8 +679,13 @@ nft_ct_select_ops(const struct nft_ctx *ctx, if (tb[NFTA_CT_DREG]) return &nft_ct_get_ops; - if (tb[NFTA_CT_SREG]) + if (tb[NFTA_CT_SREG]) { +#ifdef CONFIG_NF_CONNTRACK_ZONES + if (nla_get_be32(tb[NFTA_CT_KEY]) == htonl(NFT_CT_ZONE)) + return &nft_ct_set_zone_ops; +#endif return &nft_ct_set_ops; + } return ERR_PTR(-EINVAL); } diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 47beb3abcc9d..c308920b194c 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -15,19 +15,29 @@ #include #include #include -// FIXME: -#include +#include struct nft_exthdr { u8 type; u8 offset; u8 len; + u8 op; enum nft_registers dreg:8; + u8 flags; }; -static void nft_exthdr_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static unsigned int optlen(const u8 *opt, unsigned int offset) +{ + /* Beware zero-length options: make finite progress */ + if (opt[offset] <= TCPOPT_NOP || opt[offset + 1] == 0) + return 1; + else + return opt[offset + 1]; +} + +static void nft_exthdr_ipv6_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { struct nft_exthdr *priv = nft_expr_priv(expr); u32 *dest = ®s->data[priv->dreg]; @@ -35,8 +45,12 @@ static void nft_exthdr_eval(const struct nft_expr *expr, int err; err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL); - if (err < 0) + if (priv->flags & NFT_EXTHDR_F_PRESENT) { + *dest = (err >= 0); + return; + } else if (err < 0) { goto err; + } offset += priv->offset; dest[priv->len / NFT_REG32_SIZE] = 0; @@ -47,11 +61,59 @@ static void nft_exthdr_eval(const struct nft_expr *expr, regs->verdict.code = NFT_BREAK; } +static void nft_exthdr_tcp_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE]; + struct nft_exthdr *priv = nft_expr_priv(expr); + unsigned int i, optl, tcphdr_len, offset; + u32 *dest = ®s->data[priv->dreg]; + struct tcphdr *tcph; + u8 *opt; + + if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP) + goto err; + + tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, sizeof(*tcph), buff); + if (!tcph) + goto err; + + tcphdr_len = __tcp_hdrlen(tcph); + if (tcphdr_len < sizeof(*tcph)) + goto err; + + tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, tcphdr_len, buff); + if (!tcph) + goto err; + + opt = (u8 *)tcph; + for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) { + optl = optlen(opt, i); + + if (priv->type != opt[i]) + continue; + + if (i + optl > tcphdr_len || priv->len + priv->offset > optl) + goto err; + + offset = i + priv->offset; + dest[priv->len / NFT_REG32_SIZE] = 0; + memcpy(dest, opt + offset, priv->len); + + return; + } + +err: + regs->verdict.code = NFT_BREAK; +} + static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = { [NFTA_EXTHDR_DREG] = { .type = NLA_U32 }, [NFTA_EXTHDR_TYPE] = { .type = NLA_U8 }, [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 }, [NFTA_EXTHDR_LEN] = { .type = NLA_U32 }, + [NFTA_EXTHDR_FLAGS] = { .type = NLA_U32 }, }; static int nft_exthdr_init(const struct nft_ctx *ctx, @@ -59,13 +121,13 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_exthdr *priv = nft_expr_priv(expr); - u32 offset, len; + u32 offset, len, flags = 0, op = NFT_EXTHDR_OP_IPV6; int err; - if (tb[NFTA_EXTHDR_DREG] == NULL || - tb[NFTA_EXTHDR_TYPE] == NULL || - tb[NFTA_EXTHDR_OFFSET] == NULL || - tb[NFTA_EXTHDR_LEN] == NULL) + if (!tb[NFTA_EXTHDR_DREG] || + !tb[NFTA_EXTHDR_TYPE] || + !tb[NFTA_EXTHDR_OFFSET] || + !tb[NFTA_EXTHDR_LEN]) return -EINVAL; err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset); @@ -76,10 +138,27 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, if (err < 0) return err; + if (tb[NFTA_EXTHDR_FLAGS]) { + err = nft_parse_u32_check(tb[NFTA_EXTHDR_FLAGS], U8_MAX, &flags); + if (err < 0) + return err; + + if (flags & ~NFT_EXTHDR_F_PRESENT) + return -EINVAL; + } + + if (tb[NFTA_EXTHDR_OP]) { + err = nft_parse_u32_check(tb[NFTA_EXTHDR_OP], U8_MAX, &op); + if (err < 0) + return err; + } + priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); priv->offset = offset; priv->len = len; priv->dreg = nft_parse_register(tb[NFTA_EXTHDR_DREG]); + priv->flags = flags; + priv->op = op; return nft_validate_register_store(ctx, priv->dreg, NULL, NFT_DATA_VALUE, priv->len); @@ -97,6 +176,10 @@ static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr) goto nla_put_failure; if (nla_put_be32(skb, NFTA_EXTHDR_LEN, htonl(priv->len))) goto nla_put_failure; + if (nla_put_be32(skb, NFTA_EXTHDR_FLAGS, htonl(priv->flags))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_EXTHDR_OP, htonl(priv->op))) + goto nla_put_failure; return 0; nla_put_failure: @@ -104,17 +187,45 @@ static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr) } static struct nft_expr_type nft_exthdr_type; -static const struct nft_expr_ops nft_exthdr_ops = { +static const struct nft_expr_ops nft_exthdr_ipv6_ops = { .type = &nft_exthdr_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), - .eval = nft_exthdr_eval, + .eval = nft_exthdr_ipv6_eval, .init = nft_exthdr_init, .dump = nft_exthdr_dump, }; +static const struct nft_expr_ops nft_exthdr_tcp_ops = { + .type = &nft_exthdr_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), + .eval = nft_exthdr_tcp_eval, + .init = nft_exthdr_init, + .dump = nft_exthdr_dump, +}; + +static const struct nft_expr_ops * +nft_exthdr_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + u32 op; + + if (!tb[NFTA_EXTHDR_OP]) + return &nft_exthdr_ipv6_ops; + + op = ntohl(nla_get_u32(tb[NFTA_EXTHDR_OP])); + switch (op) { + case NFT_EXTHDR_OP_TCPOPT: + return &nft_exthdr_tcp_ops; + case NFT_EXTHDR_OP_IPV6: + return &nft_exthdr_ipv6_ops; + } + + return ERR_PTR(-EOPNOTSUPP); +} + static struct nft_expr_type nft_exthdr_type __read_mostly = { .name = "exthdr", - .ops = &nft_exthdr_ops, + .select_ops = &nft_exthdr_select_ops, .policy = nft_exthdr_policy, .maxattr = NFTA_EXTHDR_MAX, .owner = THIS_MODULE, diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c new file mode 100644 index 000000000000..97f9649bcc7e --- /dev/null +++ b/net/netfilter/nft_set_bitmap.c @@ -0,0 +1,314 @@ +/* + * Copyright (c) 2017 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* This bitmap uses two bits to represent one element. These two bits determine + * the element state in the current and the future generation. + * + * An element can be in three states. The generation cursor is represented using + * the ^ character, note that this cursor shifts on every succesful transaction. + * If no transaction is going on, we observe all elements are in the following + * state: + * + * 11 = this element is active in the current generation. In case of no updates, + * ^ it stays active in the next generation. + * 00 = this element is inactive in the current generation. In case of no + * ^ updates, it stays inactive in the next generation. + * + * On transaction handling, we observe these two temporary states: + * + * 01 = this element is inactive in the current generation and it becomes active + * ^ in the next one. This happens when the element is inserted but commit + * path has not yet been executed yet, so activation is still pending. On + * transaction abortion, the element is removed. + * 10 = this element is active in the current generation and it becomes inactive + * ^ in the next one. This happens when the element is deactivated but commit + * path has not yet been executed yet, so removal is still pending. On + * transation abortion, the next generation bit is reset to go back to + * restore its previous state. + */ +struct nft_bitmap { + u16 bitmap_size; + u8 bitmap[]; +}; + +static inline void nft_bitmap_location(u32 key, u32 *idx, u32 *off) +{ + u32 k = (key << 1); + + *idx = k / BITS_PER_BYTE; + *off = k % BITS_PER_BYTE; +} + +/* Fetch the two bits that represent the element and check if it is active based + * on the generation mask. + */ +static inline bool +nft_bitmap_active(const u8 *bitmap, u32 idx, u32 off, u8 genmask) +{ + return (bitmap[idx] & (0x3 << off)) & (genmask << off); +} + +static bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext) +{ + const struct nft_bitmap *priv = nft_set_priv(set); + u8 genmask = nft_genmask_cur(net); + u32 idx, off; + + nft_bitmap_location(*key, &idx, &off); + + return nft_bitmap_active(priv->bitmap, idx, off, genmask); +} + +static int nft_bitmap_insert(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, + struct nft_set_ext **_ext) +{ + struct nft_bitmap *priv = nft_set_priv(set); + struct nft_set_ext *ext = elem->priv; + u8 genmask = nft_genmask_next(net); + u32 idx, off; + + nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off); + if (nft_bitmap_active(priv->bitmap, idx, off, genmask)) + return -EEXIST; + + /* Enter 01 state. */ + priv->bitmap[idx] |= (genmask << off); + + return 0; +} + +static void nft_bitmap_remove(const struct net *net, + const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_bitmap *priv = nft_set_priv(set); + struct nft_set_ext *ext = elem->priv; + u8 genmask = nft_genmask_next(net); + u32 idx, off; + + nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off); + /* Enter 00 state. */ + priv->bitmap[idx] &= ~(genmask << off); +} + +static void nft_bitmap_activate(const struct net *net, + const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_bitmap *priv = nft_set_priv(set); + struct nft_set_ext *ext = elem->priv; + u8 genmask = nft_genmask_next(net); + u32 idx, off; + + nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off); + /* Enter 11 state. */ + priv->bitmap[idx] |= (genmask << off); +} + +static bool nft_bitmap_flush(const struct net *net, + const struct nft_set *set, void *ext) +{ + struct nft_bitmap *priv = nft_set_priv(set); + u8 genmask = nft_genmask_next(net); + u32 idx, off; + + nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off); + /* Enter 10 state, similar to deactivation. */ + priv->bitmap[idx] &= ~(genmask << off); + + return true; +} + +static struct nft_set_ext *nft_bitmap_ext_alloc(const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_set_ext_tmpl tmpl; + struct nft_set_ext *ext; + + nft_set_ext_prepare(&tmpl); + nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); + + ext = kzalloc(tmpl.len, GFP_KERNEL); + if (!ext) + return NULL; + + nft_set_ext_init(ext, &tmpl); + memcpy(nft_set_ext_key(ext), elem->key.val.data, set->klen); + + return ext; +} + +static void *nft_bitmap_deactivate(const struct net *net, + const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_bitmap *priv = nft_set_priv(set); + u8 genmask = nft_genmask_next(net); + struct nft_set_ext *ext; + u32 idx, off, key = 0; + + memcpy(&key, elem->key.val.data, set->klen); + nft_bitmap_location(key, &idx, &off); + + if (!nft_bitmap_active(priv->bitmap, idx, off, genmask)) + return NULL; + + /* We have no real set extension since this is a bitmap, allocate this + * dummy object that is released from the commit/abort path. + */ + ext = nft_bitmap_ext_alloc(set, elem); + if (!ext) + return NULL; + + /* Enter 10 state. */ + priv->bitmap[idx] &= ~(genmask << off); + + return ext; +} + +static void nft_bitmap_walk(const struct nft_ctx *ctx, + struct nft_set *set, + struct nft_set_iter *iter) +{ + const struct nft_bitmap *priv = nft_set_priv(set); + struct nft_set_ext_tmpl tmpl; + struct nft_set_elem elem; + struct nft_set_ext *ext; + int idx, off; + u16 key; + + nft_set_ext_prepare(&tmpl); + nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); + + for (idx = 0; idx < priv->bitmap_size; idx++) { + for (off = 0; off < BITS_PER_BYTE; off += 2) { + if (iter->count < iter->skip) + goto cont; + + if (!nft_bitmap_active(priv->bitmap, idx, off, + iter->genmask)) + goto cont; + + ext = kzalloc(tmpl.len, GFP_KERNEL); + if (!ext) { + iter->err = -ENOMEM; + return; + } + nft_set_ext_init(ext, &tmpl); + key = ((idx * BITS_PER_BYTE) + off) >> 1; + memcpy(nft_set_ext_key(ext), &key, set->klen); + + elem.priv = ext; + iter->err = iter->fn(ctx, set, iter, &elem); + + /* On set flush, this dummy extension object is released + * from the commit/abort path. + */ + if (!iter->flush) + kfree(ext); + + if (iter->err < 0) + return; +cont: + iter->count++; + } + } +} + +/* The bitmap size is pow(2, key length in bits) / bits per byte. This is + * multiplied by two since each element takes two bits. For 8 bit keys, the + * bitmap consumes 66 bytes. For 16 bit keys, 16388 bytes. + */ +static inline u32 nft_bitmap_size(u32 klen) +{ + return ((2 << ((klen * BITS_PER_BYTE) - 1)) / BITS_PER_BYTE) << 1; +} + +static inline u32 nft_bitmap_total_size(u32 klen) +{ + return sizeof(struct nft_bitmap) + nft_bitmap_size(klen); +} + +static unsigned int nft_bitmap_privsize(const struct nlattr * const nla[]) +{ + u32 klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN])); + + return nft_bitmap_total_size(klen); +} + +static int nft_bitmap_init(const struct nft_set *set, + const struct nft_set_desc *desc, + const struct nlattr * const nla[]) +{ + struct nft_bitmap *priv = nft_set_priv(set); + + priv->bitmap_size = nft_bitmap_total_size(set->klen); + + return 0; +} + +static void nft_bitmap_destroy(const struct nft_set *set) +{ +} + +static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features, + struct nft_set_estimate *est) +{ + /* Make sure bitmaps we don't get bitmaps larger than 16 Kbytes. */ + if (desc->klen > 2) + return false; + + est->size = nft_bitmap_total_size(desc->klen); + est->lookup = NFT_SET_CLASS_O_1; + est->space = NFT_SET_CLASS_O_1; + + return true; +} + +static struct nft_set_ops nft_bitmap_ops __read_mostly = { + .privsize = nft_bitmap_privsize, + .estimate = nft_bitmap_estimate, + .init = nft_bitmap_init, + .destroy = nft_bitmap_destroy, + .insert = nft_bitmap_insert, + .remove = nft_bitmap_remove, + .deactivate = nft_bitmap_deactivate, + .flush = nft_bitmap_flush, + .activate = nft_bitmap_activate, + .lookup = nft_bitmap_lookup, + .walk = nft_bitmap_walk, + .owner = THIS_MODULE, +}; + +static int __init nft_bitmap_module_init(void) +{ + return nft_register_set(&nft_bitmap_ops); +} + +static void __exit nft_bitmap_module_exit(void) +{ + nft_unregister_set(&nft_bitmap_ops); +} + +module_init(nft_bitmap_module_init); +module_exit(nft_bitmap_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_ALIAS_NFT_SET(); diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index e36069fb76ae..5f652720fc78 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -167,8 +167,8 @@ static void nft_hash_activate(const struct net *net, const struct nft_set *set, nft_set_elem_clear_busy(&he->ext); } -static bool nft_hash_deactivate_one(const struct net *net, - const struct nft_set *set, void *priv) +static bool nft_hash_flush(const struct net *net, + const struct nft_set *set, void *priv) { struct nft_hash_elem *he = priv; @@ -195,7 +195,7 @@ static void *nft_hash_deactivate(const struct net *net, rcu_read_lock(); he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); if (he != NULL && - !nft_hash_deactivate_one(net, set, he)) + !nft_hash_flush(net, set, he)) he = NULL; rcu_read_unlock(); @@ -203,7 +203,8 @@ static void *nft_hash_deactivate(const struct net *net, return he; } -static void nft_hash_remove(const struct nft_set *set, +static void nft_hash_remove(const struct net *net, + const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_hash *priv = nft_set_priv(set); @@ -383,7 +384,8 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, est->size = esize + 2 * sizeof(struct nft_hash_elem *); } - est->class = NFT_SET_CLASS_O_1; + est->lookup = NFT_SET_CLASS_O_1; + est->space = NFT_SET_CLASS_O_N; return true; } @@ -397,12 +399,12 @@ static struct nft_set_ops nft_hash_ops __read_mostly = { .insert = nft_hash_insert, .activate = nft_hash_activate, .deactivate = nft_hash_deactivate, - .deactivate_one = nft_hash_deactivate_one, + .flush = nft_hash_flush, .remove = nft_hash_remove, .lookup = nft_hash_lookup, .update = nft_hash_update, .walk = nft_hash_walk, - .features = NFT_SET_MAP | NFT_SET_TIMEOUT, + .features = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT, .owner = THIS_MODULE, }; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index f06f55ee516d..71e8fb886a73 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -151,7 +151,8 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, return err; } -static void nft_rbtree_remove(const struct nft_set *set, +static void nft_rbtree_remove(const struct net *net, + const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_rbtree *priv = nft_set_priv(set); @@ -171,8 +172,8 @@ static void nft_rbtree_activate(const struct net *net, nft_set_elem_change_active(net, set, &rbe->ext); } -static bool nft_rbtree_deactivate_one(const struct net *net, - const struct nft_set *set, void *priv) +static bool nft_rbtree_flush(const struct net *net, + const struct nft_set *set, void *priv) { struct nft_rbtree_elem *rbe = priv; @@ -213,7 +214,7 @@ static void *nft_rbtree_deactivate(const struct net *net, parent = parent->rb_right; continue; } - nft_rbtree_deactivate_one(net, set, rbe); + nft_rbtree_flush(net, set, rbe); return rbe; } } @@ -290,7 +291,8 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, else est->size = nsize; - est->class = NFT_SET_CLASS_O_LOG_N; + est->lookup = NFT_SET_CLASS_O_LOG_N; + est->space = NFT_SET_CLASS_O_N; return true; } @@ -304,11 +306,11 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = { .insert = nft_rbtree_insert, .remove = nft_rbtree_remove, .deactivate = nft_rbtree_deactivate, - .deactivate_one = nft_rbtree_deactivate_one, + .flush = nft_rbtree_flush, .activate = nft_rbtree_activate, .lookup = nft_rbtree_lookup, .walk = nft_rbtree_walk, - .features = NFT_SET_INTERVAL | NFT_SET_MAP, + .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT, .owner = THIS_MODULE, };