summaryrefslogtreecommitdiffstats
path: root/src/payload.c
diff options
context:
space:
mode:
authorFlorian Westphal <fw@strlen.de>2019-01-09 00:15:09 +0100
committerFlorian Westphal <fw@strlen.de>2019-01-11 15:06:00 +0100
commitce2651222911f09ca838fbdd29b5b2e0ff5f262f (patch)
tree6298b45b289b47edf36a1b71addcedaea7dce7c9 /src/payload.c
parentb338244abc7f018d79a95657fff88eadee7e9f6b (diff)
payload: refine payload expr merging
nf_tables can handle payload exprs for sizes <= sizeof(u32) via a direct operation from the eval loop, rather than a a call to the payload expression. Two loads for four byte quantities are thus faster than a single load for an 8 byte load. ip saddr 1.2.3.4 ip daddr 2.3.4.5 is faster with this applied, even though it involves two payload and two two compare expressions, just because all can be handled from the main loop without any calls to expression ops. Keep merging for linklayer and when at least one of the expressions already exceeded the 4 byte "limit" anyway. Signed-off-by: Florian Westphal <fw@strlen.de> Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Diffstat (limited to 'src/payload.c')
-rw-r--r--src/payload.c28
1 files changed, 27 insertions, 1 deletions
diff --git a/src/payload.c b/src/payload.c
index fab97b11..f638b0c0 100644
--- a/src/payload.c
+++ b/src/payload.c
@@ -721,7 +721,33 @@ bool payload_can_merge(const struct expr *e1, const struct expr *e2)
if (total < e1->len || total > (NFT_REG_SIZE * BITS_PER_BYTE))
return false;
- return true;
+ /* could return true after this, the expressions are mergeable.
+ *
+ * However, there are some caveats.
+ *
+ * Loading anything <= sizeof(u32) with base >= network header
+ * is fast, because its handled directly from eval loop in the
+ * kernel.
+ *
+ * We thus restrict merging a bit more.
+ */
+
+ /* can still be handled by fastpath after merge */
+ if (total <= NFT_REG32_SIZE * BITS_PER_BYTE)
+ return true;
+
+ /* Linklayer base is not handled in fastpath, merge */
+ if (e1->payload.base == PROTO_BASE_LL_HDR)
+ return true;
+
+ /* Also merge if at least one expression is already
+ * above REG32 size, in this case merging is faster.
+ */
+ if (e1->len > (NFT_REG32_SIZE * BITS_PER_BYTE) ||
+ e2->len > (NFT_REG32_SIZE * BITS_PER_BYTE))
+ return true;
+
+ return false;
}
/**