aboutsummaryrefslogtreecommitdiffstats
diff options
authorStephen Rothwell <sfr@canb.auug.org.au>2022-06-28 11:09:17 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2022-06-28 11:09:17 +1000
commit8a264bbdb657dfed382360ecb4742b754be86af9 (patch)
treee11c374be232ebcc4b40db707667cc3b7fa9bf9b
parentc95333b2ff8c3325df74d1eb36c3c797c9096dc0 (diff)
parentfd75733da2f376c0c8c6513c3cb2ac227082ec5c (diff)
downloadlinux-next-8a264bbdb657dfed382360ecb4742b754be86af9.tar.gz
Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git
-rw-r--r--Documentation/bpf/instruction-set.rst2
-rw-r--r--arch/arm64/net/bpf_jit_comp.c9
-rw-r--r--arch/x86/net/bpf_jit_comp.c6
-rw-r--r--include/linux/bpf.h3
-rw-r--r--include/linux/bpf_verifier.h12
-rw-r--r--include/linux/filter.h1
-rw-r--r--include/linux/net.h4
-rw-r--r--include/net/tcp.h1
-rw-r--r--include/net/udp.h3
-rw-r--r--kernel/bpf/bpf_iter.c9
-rw-r--r--kernel/bpf/bpf_struct_ops.c7
-rw-r--r--kernel/bpf/btf.c84
-rw-r--r--kernel/bpf/core.c6
-rw-r--r--kernel/bpf/verifier.c185
-rw-r--r--kernel/trace/trace_uprobe.c2
-rw-r--r--lib/test_bpf.c4
-rw-r--r--net/core/skmsg.c48
-rw-r--r--net/ipv4/af_inet.c3
-rw-r--r--net/ipv4/bpf_tcp_ca.c39
-rw-r--r--net/ipv4/tcp.c44
-rw-r--r--net/ipv4/udp.c11
-rw-r--r--net/ipv6/af_inet6.c3
-rw-r--r--net/unix/af_unix.c23
-rw-r--r--samples/bpf/xdp1_kern.c11
-rw-r--r--samples/bpf/xdp2_kern.c11
-rw-r--r--samples/bpf/xdp_tx_iptunnel_kern.c2
-rw-r--r--tools/lib/bpf/libbpf.c72
-rw-r--r--tools/lib/bpf/relo_core.c80
-rw-r--r--tools/lib/bpf/relo_core.h2
-rw-r--r--tools/perf/util/bpf-loader.c204
-rw-r--r--tools/testing/selftests/bpf/Makefile4
-rw-r--r--tools/testing/selftests/bpf/bench.c55
-rw-r--r--tools/testing/selftests/bpf/bench.h4
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_local_storage.c287
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_local_storage.sh24
-rw-r--r--tools/testing/selftests/bpf/benchs/run_common.sh17
-rw-r--r--tools/testing/selftests/bpf/config6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_loop.c62
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c61
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_fields.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_loop.c114
-rw-r--r--tools/testing/selftests/bpf/progs/local_storage_bench.c104
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c35
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c21
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c60
-rw-r--r--tools/testing/selftests/bpf/test_btf.h2
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c367
-rw-r--r--tools/testing/selftests/bpf/verifier/bpf_loop_inline.c263
49 files changed, 2063 insertions, 316 deletions
diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst
index 9e27fbdb2206f..1b0e6711dec97 100644
--- a/Documentation/bpf/instruction-set.rst
+++ b/Documentation/bpf/instruction-set.rst
@@ -351,7 +351,7 @@ These instructions have seven implicit operands:
* Register R0 is an implicit output which contains the data fetched from
the packet.
* Registers R1-R5 are scratch registers that are clobbered after a call to
- ``BPF_ABS | BPF_LD`` or ``BPF_IND`` | BPF_LD instructions.
+ ``BPF_ABS | BPF_LD`` or ``BPF_IND | BPF_LD`` instructions.
These instructions have an implicit program exit condition as well. When an
eBPF program is trying to access the data beyond the packet boundary, the
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 42f2e9a8616c3..f08a4447d363f 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -246,6 +246,7 @@ static bool is_lsi_offset(int offset, int scale)
static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
{
const struct bpf_prog *prog = ctx->prog;
+ const bool is_main_prog = prog->aux->func_idx == 0;
const u8 r6 = bpf2a64[BPF_REG_6];
const u8 r7 = bpf2a64[BPF_REG_7];
const u8 r8 = bpf2a64[BPF_REG_8];
@@ -299,7 +300,7 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
/* Set up BPF prog stack base register */
emit(A64_MOV(1, fp, A64_SP), ctx);
- if (!ebpf_from_cbpf) {
+ if (!ebpf_from_cbpf && is_main_prog) {
/* Initialize tail_call_cnt */
emit(A64_MOVZ(1, tcc, 0, 0), ctx);
@@ -1530,3 +1531,9 @@ void bpf_jit_free_exec(void *addr)
{
return vfree(addr);
}
+
+/* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
+bool bpf_jit_supports_subprog_tailcalls(void)
+{
+ return true;
+}
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index c98b8c0ed3b83..95b36ab2883f5 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -2492,3 +2492,9 @@ void *bpf_arch_text_copy(void *dst, void *src, size_t len)
return ERR_PTR(-EINVAL);
return dst;
}
+
+/* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
+bool bpf_jit_supports_subprog_tailcalls(void)
+{
+ return true;
+}
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 0edd7d2c00646..d05e1495a06e0 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1286,6 +1286,9 @@ struct bpf_array {
#define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */
#define MAX_TAIL_CALL_CNT 33
+/* Maximum number of loops for bpf_loop */
+#define BPF_MAX_LOOPS BIT(23)
+
#define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \
BPF_F_RDONLY_PROG | \
BPF_F_WRONLY | \
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 3930c963fa670..81b19669efba8 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -344,6 +344,14 @@ struct bpf_verifier_state_list {
int miss_cnt, hit_cnt;
};
+struct bpf_loop_inline_state {
+ int initialized:1; /* set to true upon first entry */
+ int fit_for_inline:1; /* true if callback function is the same
+ * at each call and flags are always zero
+ */
+ u32 callback_subprogno; /* valid when fit_for_inline is true */
+};
+
/* Possible states for alu_state member. */
#define BPF_ALU_SANITIZE_SRC (1U << 0)
#define BPF_ALU_SANITIZE_DST (1U << 1)
@@ -373,6 +381,10 @@ struct bpf_insn_aux_data {
u32 mem_size; /* mem_size for non-struct typed var */
};
} btf_var;
+ /* if instruction is a call to bpf_loop this field tracks
+ * the state of the relevant registers to make decision about inlining
+ */
+ struct bpf_loop_inline_state loop_inline_state;
};
u64 map_key_state; /* constant (32 bit) key tracking for maps */
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index d0cbb31b1b4d8..4c1a8b2475450 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -914,6 +914,7 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
void bpf_jit_compile(struct bpf_prog *prog);
bool bpf_jit_needs_zext(void);
+bool bpf_jit_supports_subprog_tailcalls(void);
bool bpf_jit_supports_kfunc_call(void);
bool bpf_helper_changes_pkt_data(void *func);
diff --git a/include/linux/net.h b/include/linux/net.h
index 12093f4db50c4..a03485e8cbb2d 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -152,6 +152,8 @@ struct module;
struct sk_buff;
typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
unsigned int, size_t);
+typedef int (*skb_read_actor_t)(struct sock *, struct sk_buff *);
+
struct proto_ops {
int family;
@@ -214,6 +216,8 @@ struct proto_ops {
*/
int (*read_sock)(struct sock *sk, read_descriptor_t *desc,
sk_read_actor_t recv_actor);
+ /* This is different from read_sock(), it reads an entire skb at a time. */
+ int (*read_skb)(struct sock *sk, skb_read_actor_t recv_actor);
int (*sendpage_locked)(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
int (*sendmsg_locked)(struct sock *sk, struct msghdr *msg,
diff --git a/include/net/tcp.h b/include/net/tcp.h
index c21a9b516f1ed..8e48dc56837b5 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -672,6 +672,7 @@ void tcp_get_info(struct sock *, struct tcp_info *);
/* Read 'sendfile()'-style from a TCP socket */
int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
sk_read_actor_t recv_actor);
+int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
void tcp_initialize_rcv_mss(struct sock *sk);
diff --git a/include/net/udp.h b/include/net/udp.h
index b60eea2e3fae2..987f7fc7c0aa8 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -306,8 +306,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
struct sk_buff *skb);
struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
__be16 sport, __be16 dport);
-int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
- sk_read_actor_t recv_actor);
+int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
/* UDP uses skb->dev_scratch to cache as much information as possible and avoid
* possibly multiple cache miss on dequeue()
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index d5d96ceca1058..7e8fd49406f64 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -723,9 +723,6 @@ const struct bpf_func_proto bpf_for_each_map_elem_proto = {
.arg4_type = ARG_ANYTHING,
};
-/* maximum number of loops */
-#define MAX_LOOPS BIT(23)
-
BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx,
u64, flags)
{
@@ -733,9 +730,13 @@ BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx,
u64 ret;
u32 i;
+ /* Note: these safety checks are also verified when bpf_loop
+ * is inlined, be careful to modify this code in sync. See
+ * function verifier.c:inline_bpf_loop.
+ */
if (flags)
return -EINVAL;
- if (nr_loops > MAX_LOOPS)
+ if (nr_loops > BPF_MAX_LOOPS)
return -E2BIG;
for (i = 0; i < nr_loops; i++) {
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index d9a3c9207240f..7e0068c3399ca 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -503,10 +503,9 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
goto unlock;
}
- /* Error during st_ops->reg(). It is very unlikely since
- * the above init_member() should have caught it earlier
- * before reg(). The only possibility is if there was a race
- * in registering the struct_ops (under the same name) to
+ /* Error during st_ops->reg(). Can happen if this struct_ops needs to be
+ * verified as a whole, after all init_member() calls. Can also happen if
+ * there was a race in registering the struct_ops (under the same name) to
* a sub-system through different struct_ops's maps.
*/
set_memory_nx((long)st_map->image, 1);
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index d003d4d8242ac..13ace59ac1a7a 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -7421,87 +7421,6 @@ EXPORT_SYMBOL_GPL(register_btf_id_dtor_kfuncs);
#define MAX_TYPES_ARE_COMPAT_DEPTH 2
-static
-int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
- const struct btf *targ_btf, __u32 targ_id,
- int level)
-{
- const struct btf_type *local_type, *targ_type;
- int depth = 32; /* max recursion depth */
-
- /* caller made sure that names match (ignoring flavor suffix) */
- local_type = btf_type_by_id(local_btf, local_id);
- targ_type = btf_type_by_id(targ_btf, targ_id);
- if (btf_kind(local_type) != btf_kind(targ_type))
- return 0;
-
-recur:
- depth--;
- if (depth < 0)
- return -EINVAL;
-
- local_type = btf_type_skip_modifiers(local_btf, local_id, &local_id);
- targ_type = btf_type_skip_modifiers(targ_btf, targ_id, &targ_id);
- if (!local_type || !targ_type)
- return -EINVAL;
-
- if (btf_kind(local_type) != btf_kind(targ_type))
- return 0;
-
- switch (btf_kind(local_type)) {
- case BTF_KIND_UNKN:
- case BTF_KIND_STRUCT:
- case BTF_KIND_UNION:
- case BTF_KIND_ENUM:
- case BTF_KIND_FWD:
- case BTF_KIND_ENUM64:
- return 1;
- case BTF_KIND_INT:
- /* just reject deprecated bitfield-like integers; all other
- * integers are by default compatible between each other
- */
- return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
- case BTF_KIND_PTR:
- local_id = local_type->type;
- targ_id = targ_type->type;
- goto recur;
- case BTF_KIND_ARRAY:
- local_id = btf_array(local_type)->type;
- targ_id = btf_array(targ_type)->type;
- goto recur;
- case BTF_KIND_FUNC_PROTO: {
- struct btf_param *local_p = btf_params(local_type);
- struct btf_param *targ_p = btf_params(targ_type);
- __u16 local_vlen = btf_vlen(local_type);
- __u16 targ_vlen = btf_vlen(targ_type);
- int i, err;
-
- if (local_vlen != targ_vlen)
- return 0;
-
- for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
- if (level <= 0)
- return -EINVAL;
-
- btf_type_skip_modifiers(local_btf, local_p->type, &local_id);
- btf_type_skip_modifiers(targ_btf, targ_p->type, &targ_id);
- err = __bpf_core_types_are_compat(local_btf, local_id,
- targ_btf, targ_id,
- level - 1);
- if (err <= 0)
- return err;
- }
-
- /* tail recurse for return type check */
- btf_type_skip_modifiers(local_btf, local_type->type, &local_id);
- btf_type_skip_modifiers(targ_btf, targ_type->type, &targ_id);
- goto recur;
- }
- default:
- return 0;
- }
-}
-
/* Check local and target types for compatibility. This check is used for
* type-based CO-RE relocations and follow slightly different rules than
* field-based relocations. This function assumes that root types were already
@@ -7524,8 +7443,7 @@ recur:
int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
const struct btf *targ_btf, __u32 targ_id)
{
- return __bpf_core_types_are_compat(local_btf, local_id,
- targ_btf, targ_id,
+ return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id,
MAX_TYPES_ARE_COMPAT_DEPTH);
}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index b5ffebcce6cca..f023cb399e3f3 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2729,6 +2729,12 @@ bool __weak bpf_jit_needs_zext(void)
return false;
}
+/* Return TRUE if the JIT backend supports mixing bpf2bpf and tailcalls. */
+bool __weak bpf_jit_supports_subprog_tailcalls(void)
+{
+ return false;
+}
+
bool __weak bpf_jit_supports_kfunc_call(void)
{
return false;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2859901ffbe37..4938477912cda 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6154,7 +6154,8 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
{
- return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64);
+ return env->prog->jit_requested &&
+ bpf_jit_supports_subprog_tailcalls();
}
static int check_map_func_compatibility(struct bpf_verifier_env *env,
@@ -7124,6 +7125,41 @@ static int check_get_func_ip(struct bpf_verifier_env *env)
return -ENOTSUPP;
}
+static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
+{
+ return &env->insn_aux_data[env->insn_idx];
+}
+
+static bool loop_flag_is_zero(struct bpf_verifier_env *env)
+{
+ struct bpf_reg_state *regs = cur_regs(env);
+ struct bpf_reg_state *reg = &regs[BPF_REG_4];
+ bool reg_is_null = register_is_null(reg);
+
+ if (reg_is_null)
+ mark_chain_precision(env, BPF_REG_4);
+
+ return reg_is_null;
+}
+
+static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
+{
+ struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
+
+ if (!state->initialized) {
+ state->initialized = 1;
+ state->fit_for_inline = loop_flag_is_zero(env);
+ state->callback_subprogno = subprogno;
+ return;
+ }
+
+ if (!state->fit_for_inline)
+ return;
+
+ state->fit_for_inline = (loop_flag_is_zero(env) &&
+ state->callback_subprogno == subprogno);
+}
+
static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx_p)
{
@@ -7276,6 +7312,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
err = check_bpf_snprintf_call(env, regs);
break;
case BPF_FUNC_loop:
+ update_loop_inline_state(env, meta.subprogno);
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
set_loop_callback_state);
break;
@@ -7682,11 +7719,6 @@ static bool check_reg_sane_offset(struct bpf_verifier_env *env,
return true;
}
-static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
-{
- return &env->insn_aux_data[env->insn_idx];
-}
-
enum {
REASON_BOUNDS = -1,
REASON_TYPE = -2,
@@ -9064,7 +9096,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
if (opcode == BPF_END || opcode == BPF_NEG) {
if (opcode == BPF_NEG) {
- if (BPF_SRC(insn->code) != 0 ||
+ if (BPF_SRC(insn->code) != BPF_K ||
insn->src_reg != BPF_REG_0 ||
insn->off != 0 || insn->imm != 0) {
verbose(env, "BPF_NEG uses reserved fields\n");
@@ -14315,6 +14347,142 @@ patch_call_imm:
return 0;
}
+static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
+ int position,
+ s32 stack_base,
+ u32 callback_subprogno,
+ u32 *cnt)
+{
+ s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
+ s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
+ s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
+ int reg_loop_max = BPF_REG_6;
+ int reg_loop_cnt = BPF_REG_7;
+ int reg_loop_ctx = BPF_REG_8;
+
+ struct bpf_prog *new_prog;
+ u32 callback_start;
+ u32 call_insn_offset;
+ s32 callback_offset;
+
+ /* This represents an inlined version of bpf_iter.c:bpf_loop,
+ * be careful to modify this code in sync.
+ */
+ struct bpf_insn insn_buf[] = {
+ /* Return error and jump to the end of the patch if
+ * expected number of iterations is too big.
+ */
+ BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2),
+ BPF_MOV32_IMM(BPF_REG_0, -E2BIG),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 16),
+ /* spill R6, R7, R8 to use these as loop vars */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset),
+ /* initialize loop vars */
+ BPF_MOV64_REG(reg_loop_max, BPF_REG_1),
+ BPF_MOV32_IMM(reg_loop_cnt, 0),
+ BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3),
+ /* loop header,
+ * if reg_loop_cnt >= reg_loop_max skip the loop body
+ */
+ BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5),
+ /* callback call,
+ * correct callback offset would be set after patching
+ */
+ BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt),
+ BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx),
+ BPF_CALL_REL(0),
+ /* increment loop counter */
+ BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1),
+ /* jump to loop header if callback returned 0 */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6),
+ /* return value of bpf_loop,
+ * set R0 to the number of iterations
+ */
+ BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt),
+ /* restore original values of R6, R7, R8 */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset),
+ };
+
+ *cnt = ARRAY_SIZE(insn_buf);
+ new_prog = bpf_patch_insn_data(env, position, insn_buf, *cnt);
+ if (!new_prog)
+ return new_prog;
+
+ /* callback start is known only after patching */
+ callback_start = env->subprog_info[callback_subprogno].start;
+ /* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
+ call_insn_offset = position + 12;
+ callback_offset = callback_start - call_insn_offset - 1;
+ new_prog->insnsi[call_insn_offset].imm = callback_offset;
+
+ return new_prog;
+}
+
+static bool is_bpf_loop_call(struct bpf_insn *insn)
+{
+ return insn->code == (BPF_JMP | BPF_CALL) &&
+ insn->src_reg == 0 &&
+ insn->imm == BPF_FUNC_loop;
+}
+
+/* For all sub-programs in the program (including main) check
+ * insn_aux_data to see if there are bpf_loop calls that require
+ * inlining. If such calls are found the calls are replaced with a
+ * sequence of instructions produced by `inline_bpf_loop` function and
+ * subprog stack_depth is increased by the size of 3 registers.
+ * This stack space is used to spill values of the R6, R7, R8. These
+ * registers are used to store the loop bound, counter and context
+ * variables.
+ */
+static int optimize_bpf_loop(struct bpf_verifier_env *env)
+{
+ struct bpf_subprog_info *subprogs = env->subprog_info;
+ int i, cur_subprog = 0, cnt, delta = 0;
+ struct bpf_insn *insn = env->prog->insnsi;
+ int insn_cnt = env->prog->len;
+ u16 stack_depth = subprogs[cur_subprog].stack_depth;
+ u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
+ u16 stack_depth_extra = 0;
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ struct bpf_loop_inline_state *inline_state =
+ &env->insn_aux_data[i + delta].loop_inline_state;
+
+ if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
+ struct bpf_prog *new_prog;
+
+ stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup;
+ new_prog = inline_bpf_loop(env,
+ i + delta,
+ -(stack_depth + stack_depth_extra),
+ inline_state->callback_subprogno,
+ &cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ }
+
+ if (subprogs[cur_subprog + 1].start == i + delta + 1) {
+ subprogs[cur_subprog].stack_depth += stack_depth_extra;
+ cur_subprog++;
+ stack_depth = subprogs[cur_subprog].stack_depth;
+ stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
+ stack_depth_extra = 0;
+ }
+ }
+
+ env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
+
+ return 0;
+}
+
static void free_states(struct bpf_verifier_env *env)
{
struct bpf_verifier_state_list *sl, *sln;
@@ -15052,6 +15220,9 @@ skip_full_check:
ret = check_max_stack_depth(env);
/* instruction rewrites happen after this point */
+ if (ret == 0)
+ ret = optimize_bpf_loop(env);
+
if (is_priv) {
if (ret == 0)
opt_hard_wire_dead_code_branches(env);
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 1f5351cae25f2..88ba5b4bd0c59 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1343,6 +1343,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
int size, esize;
int rctx;
+#ifdef CONFIG_BPF_EVENTS
if (bpf_prog_array_valid(call)) {
u32 ret;
@@ -1350,6 +1351,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
if (!ret)
return;
}
+#endif /* CONFIG_BPF_EVENTS */
esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 2a7836e115b4e..5820704165a64 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -14733,9 +14733,9 @@ static struct skb_segment_test skb_segment_tests[] __initconst = {
.build_skb = build_test_skb_linear_no_head_frag,
.features = NETIF_F_SG | NETIF_F_FRAGLIST |
NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_GSO |
- NETIF_F_LLTX_BIT | NETIF_F_GRO |
+ NETIF_F_LLTX | NETIF_F_GRO |
NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
- NETIF_F_HW_VLAN_STAG_TX_BIT
+ NETIF_F_HW_VLAN_STAG_TX
}
};
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index fc69154bbc88d..266d3b7b7d0ba 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -497,23 +497,27 @@ bool sk_msg_is_readable(struct sock *sk)
}
EXPORT_SYMBOL_GPL(sk_msg_is_readable);
-static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
- struct sk_buff *skb)
+static struct sk_msg *alloc_sk_msg(void)
{
struct sk_msg *msg;
- if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
+ msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_KERNEL);
+ if (unlikely(!msg))
return NULL;
+ sg_init_marker(msg->sg.data, NR_MSG_FRAG_IDS);
+ return msg;
+}
- if (!sk_rmem_schedule(sk, skb, skb->truesize))
+static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
+ struct sk_buff *skb)
+{
+ if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
return NULL;
- msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_KERNEL);
- if (unlikely(!msg))
+ if (!sk_rmem_schedule(sk, skb, skb->truesize))
return NULL;
- sk_msg_init(msg);
- return msg;
+ return alloc_sk_msg();
}
static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
@@ -590,13 +594,12 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len)
{
- struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
+ struct sk_msg *msg = alloc_sk_msg();
struct sock *sk = psock->sk;
int err;
if (unlikely(!msg))
return -EAGAIN;
- sk_msg_init(msg);
skb_set_owner_r(skb, sk);
err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
if (err < 0)
@@ -1165,21 +1168,14 @@ static void sk_psock_done_strp(struct sk_psock *psock)
}
#endif /* CONFIG_BPF_STREAM_PARSER */
-static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
- unsigned int offset, size_t orig_len)
+static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb)
{
- struct sock *sk = (struct sock *)desc->arg.data;
struct sk_psock *psock;
struct bpf_prog *prog;
int ret = __SK_DROP;
- int len = orig_len;
+ int len = skb->len;
- /* clone here so sk_eat_skb() in tcp_read_sock does not drop our data */
- skb = skb_clone(skb, GFP_ATOMIC);
- if (!skb) {
- desc->error = -ENOMEM;
- return 0;
- }
+ skb_get(skb);
rcu_read_lock();
psock = sk_psock(sk);
@@ -1192,12 +1188,10 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
if (!prog)
prog = READ_ONCE(psock->progs.skb_verdict);
if (likely(prog)) {
- skb->sk = sk;
skb_dst_drop(skb);
skb_bpf_redirect_clear(skb);
ret = bpf_prog_run_pin_on_cpu(prog, skb);
ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
- skb->sk = NULL;
}
if (sk_psock_verdict_apply(psock, skb, ret) < 0)
len = 0;
@@ -1209,16 +1203,10 @@ out:
static void sk_psock_verdict_data_ready(struct sock *sk)
{
struct socket *sock = sk->sk_socket;
- read_descriptor_t desc;
- if (unlikely(!sock || !sock->ops || !sock->ops->read_sock))
+ if (unlikely(!sock || !sock->ops || !sock->ops->read_skb))
return;
-
- desc.arg.data = sk;
- desc.error = 0;
- desc.count = 1;
-
- sock->ops->read_sock(sk, &desc, sk_psock_verdict_recv);
+ sock->ops->read_skb(sk, sk_psock_verdict_recv);
}
void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index da81f56fdd1c5..7abd652a558f0 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1040,6 +1040,7 @@ const struct proto_ops inet_stream_ops = {
.sendpage = inet_sendpage,
.splice_read = tcp_splice_read,
.read_sock = tcp_read_sock,
+ .read_skb = tcp_read_skb,
.sendmsg_locked = tcp_sendmsg_locked,
.sendpage_locked = tcp_sendpage_locked,
.peek_len = tcp_peek_len,
@@ -1067,7 +1068,7 @@ const struct proto_ops inet_dgram_ops = {
.setsockopt = sock_common_setsockopt,
.getsockopt = sock_common_getsockopt,
.sendmsg = inet_sendmsg,
- .read_sock = udp_read_sock,
+ .read_skb = udp_read_skb,
.recvmsg = inet_recvmsg,
.mmap = sock_no_mmap,
.sendpage = inet_sendpage,
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index f79ab942f03b6..7a181631b9952 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -14,18 +14,6 @@
/* "extern" is to avoid sparse warning. It is only used in bpf_struct_ops.c. */
extern struct bpf_struct_ops bpf_tcp_congestion_ops;
-static u32 optional_ops[] = {
- offsetof(struct tcp_congestion_ops, init),
- offsetof(struct tcp_congestion_ops, release),
- offsetof(struct tcp_congestion_ops, set_state),
- offsetof(struct tcp_congestion_ops, cwnd_event),
- offsetof(struct tcp_congestion_ops, in_ack_event),
- offsetof(struct tcp_congestion_ops, pkts_acked),
- offsetof(struct tcp_congestion_ops, min_tso_segs),
- offsetof(struct tcp_congestion_ops, sndbuf_expand),
- offsetof(struct tcp_congestion_ops, cong_control),
-};
-
static u32 unsupported_ops[] = {
offsetof(struct tcp_congestion_ops, get_info),
};
@@ -51,18 +39,6 @@ static int bpf_tcp_ca_init(struct btf *btf)
return 0;
}
-static bool is_optional(u32 member_offset)
-{
- unsigned int i;
-
- for (i = 0; i < ARRAY_SIZE(optional_ops); i++) {
- if (member_offset == optional_ops[i])
- return true;
- }
-
- return false;
-}
-
static bool is_unsupported(u32 member_offset)
{
unsigned int i;
@@ -111,6 +87,12 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log,
}
switch (off) {
+ case offsetof(struct sock, sk_pacing_rate):
+ end = offsetofend(struct sock, sk_pacing_rate);
+ break;
+ case offsetof(struct sock, sk_pacing_status):
+ end = offsetofend(struct sock, sk_pacing_status);
+ break;
case bpf_ctx_range(struct inet_connection_sock, icsk_ca_priv):
end = offsetofend(struct inet_connection_sock, icsk_ca_priv);
break;
@@ -240,7 +222,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t,
{
const struct tcp_congestion_ops *utcp_ca;
struct tcp_congestion_ops *tcp_ca;
- int prog_fd;
u32 moff;
utcp_ca = (const struct tcp_congestion_ops *)udata;
@@ -262,14 +243,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t,
return 1;
}
- if (!btf_type_resolve_func_ptr(btf_vmlinux, member->type, NULL))
- return 0;
-
- /* Ensure bpf_prog is provided for compulsory func ptr */
- prog_fd = (int)(*(unsigned long *)(udata + moff));
- if (!prog_fd && !is_optional(moff) && !is_unsupported(moff))
- return -EINVAL;
-
return 0;
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f7309452bdcec..9d2fd3ced21b2 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1734,6 +1734,50 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
}
EXPORT_SYMBOL(tcp_read_sock);
+int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ u32 seq = tp->copied_seq;
+ struct sk_buff *skb;
+ int copied = 0;
+ u32 offset;
+
+ if (sk->sk_state == TCP_LISTEN)
+ return -ENOTCONN;
+
+ while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
+ int used;
+
+ __skb_unlink(skb, &sk->sk_receive_queue);
+ used = recv_actor(sk, skb);
+ if (used <= 0) {
+ if (!copied)
+ copied = used;
+ break;
+ }
+ seq += used;
+ copied += used;
+
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
+ consume_skb(skb);
+ ++seq;
+ break;
+ }
+ consume_skb(skb);
+ break;
+ }
+ WRITE_ONCE(tp->copied_seq, seq);
+
+ tcp_rcv_space_adjust(sk);
+
+ /* Clean up data we have read: This will do ACK frames. */
+ if (copied > 0)
+ tcp_cleanup_rbuf(sk, copied);
+
+ return copied;
+}
+EXPORT_SYMBOL(tcp_read_skb);
+
int tcp_peek_len(struct socket *sock)
{
return tcp_inq(sock->sk);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6172b4750a888..2516078aa03e7 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1797,8 +1797,7 @@ busy_check:
}
EXPORT_SYMBOL(__skb_recv_udp);
-int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
- sk_read_actor_t recv_actor)
+int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{
int copied = 0;
@@ -1820,7 +1819,8 @@ int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
continue;
}
- used = recv_actor(desc, skb, 0, skb->len);
+ WARN_ON(!skb_set_owner_sk_safe(skb, sk));
+ used = recv_actor(sk, skb);
if (used <= 0) {
if (!copied)
copied = used;
@@ -1831,13 +1831,12 @@ int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
}
kfree_skb(skb);
- if (!desc->count)
- break;
+ break;
}
return copied;
}
-EXPORT_SYMBOL(udp_read_sock);
+EXPORT_SYMBOL(udp_read_skb);
/*
* This should be easy, if there is something there we
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 658823e91ecab..0ee0770e79aa5 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -702,6 +702,7 @@ const struct proto_ops inet6_stream_ops = {
.sendpage_locked = tcp_sendpage_locked,
.splice_read = tcp_splice_read,
.read_sock = tcp_read_sock,
+ .read_skb = tcp_read_skb,
.peek_len = tcp_peek_len,
#ifdef CONFIG_COMPAT
.compat_ioctl = inet6_compat_ioctl,
@@ -727,7 +728,7 @@ const struct proto_ops inet6_dgram_ops = {
.getsockopt = sock_common_getsockopt, /* ok */
.sendmsg = inet6_sendmsg, /* retpoline's sake */
.recvmsg = inet6_recvmsg, /* retpoline's sake */
- .read_sock = udp_read_sock,
+ .read_skb = udp_read_skb,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
.set_peek_off = sk_set_peek_off,
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 49f6626330c3d..1b811482fd570 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -736,10 +736,8 @@ static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
unsigned int flags);
static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
-static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
- sk_read_actor_t recv_actor);
-static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
- sk_read_actor_t recv_actor);
+static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
+static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
static int unix_dgram_connect(struct socket *, struct sockaddr *,
int, int);
static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
@@ -793,7 +791,7 @@ static const struct proto_ops unix_stream_ops = {
.shutdown = unix_shutdown,
.sendmsg = unix_stream_sendmsg,
.recvmsg = unix_stream_recvmsg,
- .read_sock = unix_stream_read_sock,
+ .read_skb = unix_stream_read_skb,
.mmap = sock_no_mmap,
.sendpage = unix_stream_sendpage,
.splice_read = unix_stream_splice_read,
@@ -818,7 +816,7 @@ static const struct proto_ops unix_dgram_ops = {
.listen = sock_no_listen,
.shutdown = unix_shutdown,
.sendmsg = unix_dgram_sendmsg,
- .read_sock = unix_read_sock,
+ .read_skb = unix_read_skb,
.recvmsg = unix_dgram_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
@@ -2478,8 +2476,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t si
return __unix_dgram_recvmsg(sk, msg, size, flags);
}
-static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
- sk_read_actor_t recv_actor)
+static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{
int copied = 0;
@@ -2494,7 +2491,7 @@ static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
if (!skb)
return err;
- used = recv_actor(desc, skb, 0, skb->len);
+ used = recv_actor(sk, skb);
if (used <= 0) {
if (!copied)
copied = used;
@@ -2505,8 +2502,7 @@ static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
}
kfree_skb(skb);
- if (!desc->count)
- break;
+ break;
}
return copied;
@@ -2641,13 +2637,12 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
}
#endif
-static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
- sk_read_actor_t recv_actor)
+static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{
if (unlikely(sk->sk_state != TCP_ESTABLISHED))
return -ENOTCONN;
- return unix_read_sock(sk, desc, recv_actor);
+ return unix_read_skb(sk, recv_actor);
}
static int unix_stream_read_generic(struct unix_stream_read_state *state,
diff --git a/samples/bpf/xdp1_kern.c b/samples/bpf/xdp1_kern.c
index f0c5d95084dec..0a5c704badd00 100644
--- a/samples/bpf/xdp1_kern.c
+++ b/samples/bpf/xdp1_kern.c
@@ -39,11 +39,13 @@ static int parse_ipv6(void *data, u64 nh_off, void *data_end)
return ip6h->nexthdr;
}
-SEC("xdp1")
+#define XDPBUFSIZE 64
+SEC("xdp.frags")
int xdp_prog1(struct xdp_md *ctx)
{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
+ __u8 pkt[XDPBUFSIZE] = {};
+ void *data_end = &pkt[XDPBUFSIZE-1];
+ void *data = pkt;
struct ethhdr *eth = data;
int rc = XDP_DROP;
long *value;
@@ -51,6 +53,9 @@ int xdp_prog1(struct xdp_md *ctx)
u64 nh_off;
u32 ipproto;
+ if (bpf_xdp_load_bytes(ctx, 0, pkt, sizeof(pkt)))
+ return rc;
+
nh_off = sizeof(*eth);
if (data + nh_off > data_end)
return rc;
diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c
index d8a64ab077b00..3332ba6bb95fb 100644
--- a/samples/bpf/xdp2_kern.c
+++ b/samples/bpf/xdp2_kern.c
@@ -55,11 +55,13 @@ static int parse_ipv6(void *data, u64 nh_off, void *data_end)
return ip6h->nexthdr;
}
-SEC("xdp1")
+#define XDPBUFSIZE 64
+SEC("xdp.frags")
int xdp_prog1(struct xdp_md *ctx)
{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
+ __u8 pkt[XDPBUFSIZE] = {};
+ void *data_end = &pkt[XDPBUFSIZE-1];
+ void *data = pkt;
struct ethhdr *eth = data;
int rc = XDP_DROP;
long *value;
@@ -67,6 +69,9 @@ int xdp_prog1(struct xdp_md *ctx)
u64 nh_off;
u32 ipproto;
+ if (bpf_xdp_load_bytes(ctx, 0, pkt, sizeof(pkt)))
+ return rc;
+
nh_off = sizeof(*eth);
if (data + nh_off > data_end)
return rc;
diff --git a/samples/bpf/xdp_tx_iptunnel_kern.c b/samples/bpf/xdp_tx_iptunnel_kern.c
index 575d57e4b8d63..0e2bca3a3fff7 100644
--- a/samples/bpf/xdp_tx_iptunnel_kern.c
+++ b/samples/bpf/xdp_tx_iptunnel_kern.c
@@ -212,7 +212,7 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp)
return XDP_TX;
}
-SEC("xdp_tx_iptunnel")
+SEC("xdp.frags")
int _xdp_tx_iptunnel(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 49e359cd34df8..335467ece75f5 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -5732,77 +5732,7 @@ err_out:
int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
const struct btf *targ_btf, __u32 targ_id)
{
- const struct btf_type *local_type, *targ_type;
- int depth = 32; /* max recursion depth */
-
- /* caller made sure that names match (ignoring flavor suffix) */
- local_type = btf__type_by_id(local_btf, local_id);
- targ_type = btf__type_by_id(targ_btf, targ_id);
- if (!btf_kind_core_compat(local_type, targ_type))
- return 0;
-
-recur:
- depth--;
- if (depth < 0)
- return -EINVAL;
-
- local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
- targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
- if (!local_type || !targ_type)
- return -EINVAL;
-
- if (!btf_kind_core_compat(local_type, targ_type))
- return 0;
-
- switch (btf_kind(local_type)) {
- case BTF_KIND_UNKN:
- case BTF_KIND_STRUCT:
- case BTF_KIND_UNION:
- case BTF_KIND_ENUM:
- case BTF_KIND_ENUM64:
- case BTF_KIND_FWD:
- return 1;
- case BTF_KIND_INT:
- /* just reject deprecated bitfield-like integers; all other
- * integers are by default compatible between each other
- */
- return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
- case BTF_KIND_PTR:
- local_id = local_type->type;
- targ_id = targ_type->type;
- goto recur;
- case BTF_KIND_ARRAY:
- local_id = btf_array(local_type)->type;
- targ_id = btf_array(targ_type)->type;
- goto recur;
- case BTF_KIND_FUNC_PROTO: {
- struct btf_param *local_p = btf_params(local_type);
- struct btf_param *targ_p = btf_params(targ_type);
- __u16 local_vlen = btf_vlen(local_type);
- __u16 targ_vlen = btf_vlen(targ_type);
- int i, err;
-
- if (local_vlen != targ_vlen)
- return 0;
-
- for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
- skip_mods_and_typedefs(local_btf, local_p->type, &local_id);
- skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id);
- err = bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id);
- if (err <= 0)
- return err;
- }
-
- /* tail recurse for return type check */
- skip_mods_and_typedefs(local_btf, local_type->type, &local_id);
- skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id);
- goto recur;
- }
- default:
- pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
- btf_kind_str(local_type), local_id, targ_id);
- return 0;
- }
+ return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32);
}
static size_t bpf_core_hash_fn(const void *key, void *ctx)
diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c
index 6ad3c3891a9ae..e070123332cd4 100644
--- a/tools/lib/bpf/relo_core.c
+++ b/tools/lib/bpf/relo_core.c
@@ -141,6 +141,86 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
}
}
+int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+ const struct btf *targ_btf, __u32 targ_id, int level)
+{
+ const struct btf_type *local_type, *targ_type;
+ int depth = 32; /* max recursion depth */
+
+ /* caller made sure that names match (ignoring flavor suffix) */
+ local_type = btf_type_by_id(local_btf, local_id);
+ targ_type = btf_type_by_id(targ_btf, targ_id);
+ if (!btf_kind_core_compat(local_type, targ_type))
+ return 0;
+
+recur:
+ depth--;
+ if (depth < 0)
+ return -EINVAL;
+
+ local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+ if (!local_type || !targ_type)
+ return -EINVAL;
+
+ if (!btf_kind_core_compat(local_type, targ_type))
+ return 0;
+
+ switch (btf_kind(local_type)) {
+ case BTF_KIND_UNKN:
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_FWD:
+ case BTF_KIND_ENUM64:
+ return 1;
+ case BTF_KIND_INT:
+ /* just reject deprecated bitfield-like integers; all other
+ * integers are by default compatible between each other
+ */
+ return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
+ case BTF_KIND_PTR:
+ local_id = local_type->type;
+ targ_id = targ_type->type;
+ goto recur;
+ case BTF_KIND_ARRAY:
+ local_id = btf_array(local_type)->type;
+ targ_id = btf_array(targ_type)->type;
+ goto recur;
+ case BTF_KIND_FUNC_PROTO: {
+ struct btf_param *local_p = btf_params(local_type);
+ struct btf_param *targ_p = btf_params(targ_type);
+ __u16 local_vlen = btf_vlen(local_type);
+ __u16 targ_vlen = btf_vlen(targ_type);
+ int i, err;
+
+ if (local_vlen != targ_vlen)
+ return 0;
+
+ for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
+ if (level <= 0)
+ return -EINVAL;
+
+ skip_mods_and_typedefs(local_btf, local_p->type, &local_id);
+ skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id);
+ err = __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id,
+ level - 1);
+ if (err <= 0)
+ return err;
+ }
+
+ /* tail recurse for return type check */
+ skip_mods_and_typedefs(local_btf, local_type->type, &local_id);
+ skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id);
+ goto recur;
+ }
+ default:
+ pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
+ btf_kind_str(local_type), local_id, targ_id);
+ return 0;
+ }
+}
+
/*
* Turn bpf_core_relo into a low- and high-level spec representation,
* validating correctness along the way, as well as calculating resulting
diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h
index 7df0da082f2c9..3fd3842d42303 100644
--- a/tools/lib/bpf/relo_core.h
+++ b/tools/lib/bpf/relo_core.h
@@ -68,6 +68,8 @@ struct bpf_core_relo_res {
__u32 new_type_id;
};
+int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+ const struct btf *targ_btf, __u32 targ_id, int level);
int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
const struct btf *targ_btf, __u32 targ_id);
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index f8ad581ea2479..6bd7c288e8205 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -9,6 +9,7 @@
#include <linux/bpf.h>
#include <bpf/libbpf.h>
#include <bpf/bpf.h>
+#include <linux/filter.h>
#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/string.h>
@@ -49,6 +50,7 @@ struct bpf_prog_priv {
struct bpf_insn *insns_buf;
int nr_types;
int *type_mapping;
+ int *prologue_fds;
};
struct bpf_perf_object {
@@ -56,6 +58,11 @@ struct bpf_perf_object {
struct bpf_object *obj;
};
+struct bpf_preproc_result {
+ struct bpf_insn *new_insn_ptr;
+ int new_insn_cnt;
+};
+
static LIST_HEAD(bpf_objects_list);
static struct hashmap *bpf_program_hash;
static struct hashmap *bpf_map_hash;
@@ -86,6 +93,7 @@ bpf_perf_object__next(struct bpf_perf_object *prev)
(perf_obj) = (tmp), (tmp) = bpf_perf_object__next(tmp))
static bool libbpf_initialized;
+static int libbpf_sec_handler;
static int bpf_perf_object__add(struct bpf_object *obj)
{
@@ -99,12 +107,76 @@ static int bpf_perf_object__add(struct bpf_object *obj)
return perf_obj ? 0 : -ENOMEM;
}
+static void *program_priv(const struct bpf_program *prog)
+{
+ void *priv;
+
+ if (IS_ERR_OR_NULL(bpf_program_hash))
+ return NULL;
+ if (!hashmap__find(bpf_program_hash, prog, &priv))
+ return NULL;
+ return priv;
+}
+
+static struct bpf_insn prologue_init_insn[] = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+};
+
+static int libbpf_prog_prepare_load_fn(struct bpf_program *prog,
+ struct bpf_prog_load_opts *opts __maybe_unused,
+ long cookie __maybe_unused)
+{
+ size_t init_size_cnt = ARRAY_SIZE(prologue_init_insn);
+ size_t orig_insn_cnt, insn_cnt, init_size, orig_size;
+ struct bpf_prog_priv *priv = program_priv(prog);
+ const struct bpf_insn *orig_insn;
+ struct bpf_insn *insn;
+
+ if (IS_ERR_OR_NULL(priv)) {
+ pr_debug("bpf: failed to get private field\n");
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ }
+
+ if (!priv->need_prologue)
+ return 0;
+
+ /* prepend initialization code to program instructions */
+ orig_insn = bpf_program__insns(prog);
+ orig_insn_cnt = bpf_program__insn_cnt(prog);
+ init_size = init_size_cnt * sizeof(*insn);
+ orig_size = orig_insn_cnt * sizeof(*insn);
+
+ insn_cnt = orig_insn_cnt + init_size_cnt;
+ insn = malloc(insn_cnt * sizeof(*insn));
+ if (!insn)
+ return -ENOMEM;
+
+ memcpy(insn, prologue_init_insn, init_size);
+ memcpy((char *) insn + init_size, orig_insn, orig_size);
+ bpf_program__set_insns(prog, insn, insn_cnt);
+ return 0;
+}
+
static int libbpf_init(void)
{
+ LIBBPF_OPTS(libbpf_prog_handler_opts, handler_opts,
+ .prog_prepare_load_fn = libbpf_prog_prepare_load_fn,
+ );
+
if (libbpf_initialized)
return 0;
libbpf_set_print(libbpf_perf_print);
+ libbpf_sec_handler = libbpf_register_prog_handler(NULL, BPF_PROG_TYPE_KPROBE,
+ 0, &handler_opts);
+ if (libbpf_sec_handler < 0) {
+ pr_debug("bpf: failed to register libbpf section handler: %d\n",
+ libbpf_sec_handler);
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ }
libbpf_initialized = true;
return 0;
}
@@ -188,14 +260,31 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source)
return obj;
}
+static void close_prologue_programs(struct bpf_prog_priv *priv)
+{
+ struct perf_probe_event *pev;
+ int i, fd;
+
+ if (!priv->need_prologue)
+ return;
+ pev = &priv->pev;
+ for (i = 0; i < pev->ntevs; i++) {
+ fd = priv->prologue_fds[i];
+ if (fd != -1)
+ close(fd);
+ }
+}
+
static void
clear_prog_priv(const struct bpf_program *prog __maybe_unused,
void *_priv)
{
struct bpf_prog_priv *priv = _priv;
+ close_prologue_programs(priv);
cleanup_perf_probe_events(&priv->pev, 1);
zfree(&priv->insns_buf);
+ zfree(&priv->prologue_fds);
zfree(&priv->type_mapping);
zfree(&priv->sys_name);
zfree(&priv->evt_name);
@@ -243,17 +332,6 @@ static bool ptr_equal(const void *key1, const void *key2,
return key1 == key2;
}
-static void *program_priv(const struct bpf_program *prog)
-{
- void *priv;
-
- if (IS_ERR_OR_NULL(bpf_program_hash))
- return NULL;
- if (!hashmap__find(bpf_program_hash, prog, &priv))
- return NULL;
- return priv;
-}
-
static int program_set_priv(struct bpf_program *prog, void *priv)
{
void *old_priv;
@@ -558,8 +636,8 @@ static int bpf__prepare_probe(void)
static int
preproc_gen_prologue(struct bpf_program *prog, int n,
- struct bpf_insn *orig_insns, int orig_insns_cnt,
- struct bpf_prog_prep_result *res)
+ const struct bpf_insn *orig_insns, int orig_insns_cnt,
+ struct bpf_preproc_result *res)
{
struct bpf_prog_priv *priv = program_priv(prog);
struct probe_trace_event *tev;
@@ -607,7 +685,6 @@ preproc_gen_prologue(struct bpf_program *prog, int n,
res->new_insn_ptr = buf;
res->new_insn_cnt = prologue_cnt + orig_insns_cnt;
- res->pfd = NULL;
return 0;
errout:
@@ -715,7 +792,7 @@ static int hook_load_preprocessor(struct bpf_program *prog)
struct bpf_prog_priv *priv = program_priv(prog);
struct perf_probe_event *pev;
bool need_prologue = false;
- int err, i;
+ int i;
if (IS_ERR_OR_NULL(priv)) {
pr_debug("Internal error when hook preprocessor\n");
@@ -753,6 +830,13 @@ static int hook_load_preprocessor(struct bpf_program *prog)
return -ENOMEM;
}
+ priv->prologue_fds = malloc(sizeof(int) * pev->ntevs);
+ if (!priv->prologue_fds) {
+ pr_debug("Not enough memory: alloc prologue fds failed\n");
+ return -ENOMEM;
+ }
+ memset(priv->prologue_fds, -1, sizeof(int) * pev->ntevs);
+
priv->type_mapping = malloc(sizeof(int) * pev->ntevs);
if (!priv->type_mapping) {
pr_debug("Not enough memory: alloc type_mapping failed\n");
@@ -761,13 +845,7 @@ static int hook_load_preprocessor(struct bpf_program *prog)
memset(priv->type_mapping, -1,
sizeof(int) * pev->ntevs);
- err = map_prologue(pev, priv->type_mapping, &priv->nr_types);
- if (err)
- return err;
-
- err = bpf_program__set_prep(prog, priv->nr_types,
- preproc_gen_prologue);
- return err;
+ return map_prologue(pev, priv->type_mapping, &priv->nr_types);
}
int bpf__probe(struct bpf_object *obj)
@@ -874,6 +952,77 @@ int bpf__unprobe(struct bpf_object *obj)
return ret;
}
+static int bpf_object__load_prologue(struct bpf_object *obj)
+{
+ int init_cnt = ARRAY_SIZE(prologue_init_insn);
+ const struct bpf_insn *orig_insns;
+ struct bpf_preproc_result res;
+ struct perf_probe_event *pev;
+ struct bpf_program *prog;
+ int orig_insns_cnt;
+
+ bpf_object__for_each_program(prog, obj) {
+ struct bpf_prog_priv *priv = program_priv(prog);
+ int err, i, fd;
+
+ if (IS_ERR_OR_NULL(priv)) {
+ pr_debug("bpf: failed to get private field\n");
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ }
+
+ if (!priv->need_prologue)
+ continue;
+
+ /*
+ * For each program that needs prologue we do following:
+ *
+ * - take its current instructions and use them
+ * to generate the new code with prologue
+ * - load new instructions with bpf_prog_load
+ * and keep the fd in prologue_fds
+ * - new fd will be used in bpf__foreach_event
+ * to connect this program with perf evsel
+ */
+ orig_insns = bpf_program__insns(prog);
+ orig_insns_cnt = bpf_program__insn_cnt(prog);
+
+ pev = &priv->pev;
+ for (i = 0; i < pev->ntevs; i++) {
+ /*
+ * Skipping artificall prologue_init_insn instructions
+ * (init_cnt), so the prologue can be generated instead
+ * of them.
+ */
+ err = preproc_gen_prologue(prog, i,
+ orig_insns + init_cnt,
+ orig_insns_cnt - init_cnt,
+ &res);
+ if (err)
+ return err;
+
+ fd = bpf_prog_load(bpf_program__get_type(prog),
+ bpf_program__name(prog), "GPL",
+ res.new_insn_ptr,
+ res.new_insn_cnt, NULL);
+ if (fd < 0) {
+ char bf[128];
+
+ libbpf_strerror(-errno, bf, sizeof(bf));
+ pr_debug("bpf: load objects with prologue failed: err=%d: (%s)\n",
+ -errno, bf);
+ return -errno;
+ }
+ priv->prologue_fds[i] = fd;
+ }
+ /*
+ * We no longer need the original program,
+ * we can unload it.
+ */
+ bpf_program__unload(prog);
+ }
+ return 0;
+}
+
int bpf__load(struct bpf_object *obj)
{
int err;
@@ -885,7 +1034,7 @@ int bpf__load(struct bpf_object *obj)
pr_debug("bpf: load objects failed: err=%d: (%s)\n", err, bf);
return err;
}
- return 0;
+ return bpf_object__load_prologue(obj);
}
int bpf__foreach_event(struct bpf_object *obj,
@@ -920,13 +1069,10 @@ int bpf__foreach_event(struct bpf_object *obj,
for (i = 0; i < pev->ntevs; i++) {
tev = &pev->tevs[i];
- if (priv->need_prologue) {
- int type = priv->type_mapping[i];
-
- fd = bpf_program__nth_fd(prog, type);
- } else {
+ if (priv->need_prologue)
+ fd = priv->prologue_fds[i];
+ else
fd = bpf_program__fd(prog);
- }
if (fd < 0) {
pr_debug("bpf: failed to get file descriptor\n");
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index cb8e552e14185..4fbd88a8ed9e0 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -571,6 +571,7 @@ $(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h
$(OUTPUT)/bench_bpf_loop.o: $(OUTPUT)/bpf_loop_bench.skel.h
$(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h
$(OUTPUT)/bench_bpf_hashmap_full_update.o: $(OUTPUT)/bpf_hashmap_full_update_bench.skel.h
+$(OUTPUT)/bench_local_storage.o: $(OUTPUT)/local_storage_bench.skel.h
$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
$(OUTPUT)/bench: LDLIBS += -lm
$(OUTPUT)/bench: $(OUTPUT)/bench.o \
@@ -583,7 +584,8 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
$(OUTPUT)/bench_bloom_filter_map.o \
$(OUTPUT)/bench_bpf_loop.o \
$(OUTPUT)/bench_strncmp.o \
- $(OUTPUT)/bench_bpf_hashmap_full_update.o
+ $(OUTPUT)/bench_bpf_hashmap_full_update.o \
+ $(OUTPUT)/bench_local_storage.o
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index d8aa62be996bd..1e7b5d4b1f11e 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -150,6 +150,53 @@ void ops_report_final(struct bench_res res[], int res_cnt)
printf("latency %8.3lf ns/op\n", 1000.0 / hits_mean * env.producer_cnt);
}
+void local_storage_report_progress(int iter, struct bench_res *res,
+ long delta_ns)
+{
+ double important_hits_per_sec, hits_per_sec;
+ double delta_sec = delta_ns / 1000000000.0;
+
+ hits_per_sec = res->hits / 1000000.0 / delta_sec;
+ important_hits_per_sec = res->important_hits / 1000000.0 / delta_sec;
+
+ printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0);
+
+ printf("hits %8.3lfM/s ", hits_per_sec);
+ printf("important_hits %8.3lfM/s\n", important_hits_per_sec);
+}
+
+void local_storage_report_final(struct bench_res res[], int res_cnt)
+{
+ double important_hits_mean = 0.0, important_hits_stddev = 0.0;
+ double hits_mean = 0.0, hits_stddev = 0.0;
+ int i;
+
+ for (i = 0; i < res_cnt; i++) {
+ hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
+ important_hits_mean += res[i].important_hits / 1000000.0 / (0.0 + res_cnt);
+ }
+
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++) {
+ hits_stddev += (hits_mean - res[i].hits / 1000000.0) *
+ (hits_mean - res[i].hits / 1000000.0) /
+ (res_cnt - 1.0);
+ important_hits_stddev +=
+ (important_hits_mean - res[i].important_hits / 1000000.0) *
+ (important_hits_mean - res[i].important_hits / 1000000.0) /
+ (res_cnt - 1.0);
+ }
+
+ hits_stddev = sqrt(hits_stddev);
+ important_hits_stddev = sqrt(important_hits_stddev);
+ }
+ printf("Summary: hits throughput %8.3lf \u00B1 %5.3lf M ops/s, ",
+ hits_mean, hits_stddev);
+ printf("hits latency %8.3lf ns/op, ", 1000.0 / hits_mean);
+ printf("important_hits throughput %8.3lf \u00B1 %5.3lf M ops/s\n",
+ important_hits_mean, important_hits_stddev);
+}
+
const char *argp_program_version = "benchmark";
const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
const char argp_program_doc[] =
@@ -188,12 +235,14 @@ static const struct argp_option opts[] = {
extern struct argp bench_ringbufs_argp;
extern struct argp bench_bloom_map_argp;
extern struct argp bench_bpf_loop_argp;
+extern struct argp bench_local_storage_argp;
extern struct argp bench_strncmp_argp;
static const struct argp_child bench_parsers[] = {
{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
{ &bench_bloom_map_argp, 0, "Bloom filter map benchmark", 0 },
{ &bench_bpf_loop_argp, 0, "bpf_loop helper benchmark", 0 },
+ { &bench_local_storage_argp, 0, "local_storage benchmark", 0 },
{ &bench_strncmp_argp, 0, "bpf_strncmp helper benchmark", 0 },
{},
};
@@ -397,6 +446,9 @@ extern const struct bench bench_bpf_loop;
extern const struct bench bench_strncmp_no_helper;
extern const struct bench bench_strncmp_helper;
extern const struct bench bench_bpf_hashmap_full_update;
+extern const struct bench bench_local_storage_cache_seq_get;
+extern const struct bench bench_local_storage_cache_interleaved_get;
+extern const struct bench bench_local_storage_cache_hashmap_control;
static const struct bench *benchs[] = {
&bench_count_global,
@@ -432,6 +484,9 @@ static const struct bench *benchs[] = {
&bench_strncmp_no_helper,
&bench_strncmp_helper,
&bench_bpf_hashmap_full_update,
+ &bench_local_storage_cache_seq_get,
+ &bench_local_storage_cache_interleaved_get,
+ &bench_local_storage_cache_hashmap_control,
};
static void setup_benchmark()
diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h
index fb3e213df3dc9..4b15286753ba0 100644
--- a/tools/testing/selftests/bpf/bench.h
+++ b/tools/testing/selftests/bpf/bench.h
@@ -34,6 +34,7 @@ struct bench_res {
long hits;
long drops;
long false_hits;
+ long important_hits;
};
struct bench {
@@ -61,6 +62,9 @@ void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns);
void false_hits_report_final(struct bench_res res[], int res_cnt);
void ops_report_progress(int iter, struct bench_res *res, long delta_ns);
void ops_report_final(struct bench_res res[], int res_cnt);
+void local_storage_report_progress(int iter, struct bench_res *res,
+ long delta_ns);
+void local_storage_report_final(struct bench_res res[], int res_cnt);
static inline __u64 get_time_ns(void)
{
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage.c b/tools/testing/selftests/bpf/benchs/bench_local_storage.c
new file mode 100644
index 0000000000000..5a378c84e81f6
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_local_storage.c
@@ -0,0 +1,287 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <argp.h>
+#include <linux/btf.h>
+
+#include "local_storage_bench.skel.h"
+#include "bench.h"
+
+#include <test_btf.h>
+
+static struct {
+ __u32 nr_maps;
+ __u32 hashmap_nr_keys_used;
+} args = {
+ .nr_maps = 1000,
+ .hashmap_nr_keys_used = 1000,
+};
+
+enum {
+ ARG_NR_MAPS = 6000,
+ ARG_HASHMAP_NR_KEYS_USED = 6001,
+};
+
+static const struct argp_option opts[] = {
+ { "nr_maps", ARG_NR_MAPS, "NR_MAPS", 0,
+ "Set number of local_storage maps"},
+ { "hashmap_nr_keys_used", ARG_HASHMAP_NR_KEYS_USED, "NR_KEYS",
+ 0, "When doing hashmap test, set number of hashmap keys test uses"},
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ long ret;
+
+ switch (key) {
+ case ARG_NR_MAPS:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
+ fprintf(stderr, "invalid nr_maps");
+ argp_usage(state);
+ }
+ args.nr_maps = ret;
+ break;
+ case ARG_HASHMAP_NR_KEYS_USED:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
+ fprintf(stderr, "invalid hashmap_nr_keys_used");
+ argp_usage(state);
+ }
+ args.hashmap_nr_keys_used = ret;
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+const struct argp bench_local_storage_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+/* Keep in sync w/ array of maps in bpf */
+#define MAX_NR_MAPS 1000
+/* keep in sync w/ same define in bpf */
+#define HASHMAP_SZ 4194304
+
+static void validate(void)
+{
+ if (env.producer_cnt != 1) {
+ fprintf(stderr, "benchmark doesn't support multi-producer!\n");
+ exit(1);
+ }
+ if (env.consumer_cnt != 1) {
+ fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+ exit(1);
+ }
+
+ if (args.nr_maps > MAX_NR_MAPS) {
+ fprintf(stderr, "nr_maps must be <= 1000\n");
+ exit(1);
+ }
+
+ if (args.hashmap_nr_keys_used > HASHMAP_SZ) {
+ fprintf(stderr, "hashmap_nr_keys_used must be <= %u\n", HASHMAP_SZ);
+ exit(1);
+ }
+}
+
+static struct {
+ struct local_storage_bench *skel;
+ void *bpf_obj;
+ struct bpf_map *array_of_maps;
+} ctx;
+
+static void prepopulate_hashmap(int fd)
+{
+ int i, key, val;
+
+ /* local_storage gets will have BPF_LOCAL_STORAGE_GET_F_CREATE flag set, so
+ * populate the hashmap for a similar comparison
+ */
+ for (i = 0; i < HASHMAP_SZ; i++) {
+ key = val = i;
+ if (bpf_map_update_elem(fd, &key, &val, 0)) {
+ fprintf(stderr, "Error prepopulating hashmap (key %d)\n", key);
+ exit(1);
+ }
+ }
+}
+
+static void __setup(struct bpf_program *prog, bool hashmap)
+{
+ struct bpf_map *inner_map;
+ int i, fd, mim_fd, err;
+
+ LIBBPF_OPTS(bpf_map_create_opts, create_opts);
+
+ if (!hashmap)
+ create_opts.map_flags = BPF_F_NO_PREALLOC;
+
+ ctx.skel->rodata->num_maps = args.nr_maps;
+ ctx.skel->rodata->hashmap_num_keys = args.hashmap_nr_keys_used;
+ inner_map = bpf_map__inner_map(ctx.array_of_maps);
+ create_opts.btf_key_type_id = bpf_map__btf_key_type_id(inner_map);
+ create_opts.btf_value_type_id = bpf_map__btf_value_type_id(inner_map);
+
+ err = local_storage_bench__load(ctx.skel);
+ if (err) {
+ fprintf(stderr, "Error loading skeleton\n");
+ goto err_out;
+ }
+
+ create_opts.btf_fd = bpf_object__btf_fd(ctx.skel->obj);
+
+ mim_fd = bpf_map__fd(ctx.array_of_maps);
+ if (mim_fd < 0) {
+ fprintf(stderr, "Error getting map_in_map fd\n");
+ goto err_out;
+ }
+
+ for (i = 0; i < args.nr_maps; i++) {
+ if (hashmap)
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int),
+ sizeof(int), HASHMAP_SZ, &create_opts);
+ else
+ fd = bpf_map_create(BPF_MAP_TYPE_TASK_STORAGE, NULL, sizeof(int),
+ sizeof(int), 0, &create_opts);
+ if (fd < 0) {
+ fprintf(stderr, "Error creating map %d: %d\n", i, fd);
+ goto err_out;
+ }
+
+ if (hashmap)
+ prepopulate_hashmap(fd);
+
+ err = bpf_map_update_elem(mim_fd, &i, &fd, 0);
+ if (err) {
+ fprintf(stderr, "Error updating array-of-maps w/ map %d\n", i);
+ goto err_out;
+ }
+ }
+
+ if (!bpf_program__attach(prog)) {
+ fprintf(stderr, "Error attaching bpf program\n");
+ goto err_out;
+ }
+
+ return;
+err_out:
+ exit(1);
+}
+
+static void hashmap_setup(void)
+{
+ struct local_storage_bench *skel;
+
+ setup_libbpf();
+
+ skel = local_storage_bench__open();
+ ctx.skel = skel;
+ ctx.array_of_maps = skel->maps.array_of_hash_maps;
+ skel->rodata->use_hashmap = 1;
+ skel->rodata->interleave = 0;
+
+ __setup(skel->progs.get_local, true);
+}
+
+static void local_storage_cache_get_setup(void)
+{
+ struct local_storage_bench *skel;
+
+ setup_libbpf();
+
+ skel = local_storage_bench__open();
+ ctx.skel = skel;
+ ctx.array_of_maps = skel->maps.array_of_local_storage_maps;
+ skel->rodata->use_hashmap = 0;
+ skel->rodata->interleave = 0;
+
+ __setup(skel->progs.get_local, false);
+}
+
+static void local_storage_cache_get_interleaved_setup(void)
+{
+ struct local_storage_bench *skel;
+
+ setup_libbpf();
+
+ skel = local_storage_bench__open();
+ ctx.skel = skel;
+ ctx.array_of_maps = skel->maps.array_of_local_storage_maps;
+ skel->rodata->use_hashmap = 0;
+ skel->rodata->interleave = 1;
+
+ __setup(skel->progs.get_local, false);
+}
+
+static void measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
+ res->important_hits = atomic_swap(&ctx.skel->bss->important_hits, 0);
+}
+
+static inline void trigger_bpf_program(void)
+{
+ syscall(__NR_getpgid);
+}
+
+static void *consumer(void *input)
+{
+ return NULL;
+}
+
+static void *producer(void *input)
+{
+ while (true)
+ trigger_bpf_program();
+
+ return NULL;
+}
+
+/* cache sequential and interleaved get benchs test local_storage get
+ * performance, specifically they demonstrate performance cliff of
+ * current list-plus-cache local_storage model.
+ *
+ * cache sequential get: call bpf_task_storage_get on n maps in order
+ * cache interleaved get: like "sequential get", but interleave 4 calls to the
+ * 'important' map (idx 0 in array_of_maps) for every 10 calls. Goal
+ * is to mimic environment where many progs are accessing their local_storage
+ * maps, with 'our' prog needing to access its map more often than others
+ */
+const struct bench bench_local_storage_cache_seq_get = {
+ .name = "local-storage-cache-seq-get",
+ .validate = validate,
+ .setup = local_storage_cache_get_setup,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = local_storage_report_progress,
+ .report_final = local_storage_report_final,
+};
+
+const struct bench bench_local_storage_cache_interleaved_get = {
+ .name = "local-storage-cache-int-get",
+ .validate = validate,
+ .setup = local_storage_cache_get_interleaved_setup,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = local_storage_report_progress,
+ .report_final = local_storage_report_final,
+};
+
+const struct bench bench_local_storage_cache_hashmap_control = {
+ .name = "local-storage-cache-hashmap-control",
+ .validate = validate,
+ .setup = hashmap_setup,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = local_storage_report_progress,
+ .report_final = local_storage_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh
new file mode 100755
index 0000000000000..2eb2b513a1739
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./benchs/run_common.sh
+
+set -eufo pipefail
+
+header "Hashmap Control"
+for i in 10 1000 10000 100000 4194304; do
+subtitle "num keys: $i"
+ summarize_local_storage "hashmap (control) sequential get: "\
+ "$(./bench --nr_maps 1 --hashmap_nr_keys_used=$i local-storage-cache-hashmap-control)"
+ printf "\n"
+done
+
+header "Local Storage"
+for i in 1 10 16 17 24 32 100 1000; do
+subtitle "num_maps: $i"
+ summarize_local_storage "local_storage cache sequential get: "\
+ "$(./bench --nr_maps $i local-storage-cache-seq-get)"
+ summarize_local_storage "local_storage cache interleaved get: "\
+ "$(./bench --nr_maps $i local-storage-cache-int-get)"
+ printf "\n"
+done
diff --git a/tools/testing/selftests/bpf/benchs/run_common.sh b/tools/testing/selftests/bpf/benchs/run_common.sh
index 6c5e6023a69f8..d9f40af820067 100644
--- a/tools/testing/selftests/bpf/benchs/run_common.sh
+++ b/tools/testing/selftests/bpf/benchs/run_common.sh
@@ -41,6 +41,16 @@ function ops()
echo "$*" | sed -E "s/.*latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/"
}
+function local_storage()
+{
+ echo -n "hits throughput: "
+ echo -n "$*" | sed -E "s/.* hits throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/"
+ echo -n -e ", hits latency: "
+ echo -n "$*" | sed -E "s/.* hits latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/"
+ echo -n ", important_hits throughput: "
+ echo "$*" | sed -E "s/.*important_hits throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/"
+}
+
function total()
{
echo "$*" | sed -E "s/.*total operations\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
@@ -67,6 +77,13 @@ function summarize_ops()
printf "%-20s %s\n" "$bench" "$(ops $summary)"
}
+function summarize_local_storage()
+{
+ bench="$1"
+ summary=$(echo $2 | tail -n1)
+ printf "%-20s %s\n" "$bench" "$(local_storage $summary)"
+}
+
function summarize_total()
{
bench="$1"
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 3b3edc0fc8a6a..c05904d631ec8 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -57,3 +57,9 @@ CONFIG_FPROBE=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_MPTCP=y
+CONFIG_NETFILTER_SYNPROXY=y
+CONFIG_NETFILTER_XT_TARGET_CT=y
+CONFIG_NETFILTER_XT_MATCH_STATE=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_SYNPROXY=y
+CONFIG_IP_NF_RAW=y
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_loop.c b/tools/testing/selftests/bpf/prog_tests/bpf_loop.c
index 380d7a2072e39..4cd8a25afe688 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_loop.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_loop.c
@@ -120,6 +120,64 @@ static void check_nested_calls(struct bpf_loop *skel)
bpf_link__destroy(link);
}
+static void check_non_constant_callback(struct bpf_loop *skel)
+{
+ struct bpf_link *link =
+ bpf_program__attach(skel->progs.prog_non_constant_callback);
+
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ skel->bss->callback_selector = 0x0F;
+ usleep(1);
+ ASSERT_EQ(skel->bss->g_output, 0x0F, "g_output #1");
+
+ skel->bss->callback_selector = 0xF0;
+ usleep(1);
+ ASSERT_EQ(skel->bss->g_output, 0xF0, "g_output #2");
+
+ bpf_link__destroy(link);
+}
+
+static void check_stack(struct bpf_loop *skel)
+{
+ struct bpf_link *link = bpf_program__attach(skel->progs.stack_check);
+ const int max_key = 12;
+ int key;
+ int map_fd;
+
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ map_fd = bpf_map__fd(skel->maps.map1);
+
+ if (!ASSERT_GE(map_fd, 0, "bpf_map__fd"))
+ goto out;
+
+ for (key = 1; key <= max_key; ++key) {
+ int val = key;
+ int err = bpf_map_update_elem(map_fd, &key, &val, BPF_NOEXIST);
+
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto out;
+ }
+
+ usleep(1);
+
+ for (key = 1; key <= max_key; ++key) {
+ int val;
+ int err = bpf_map_lookup_elem(map_fd, &key, &val);
+
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+ goto out;
+ if (!ASSERT_EQ(val, key + 1, "bad value in the map"))
+ goto out;
+ }
+
+out:
+ bpf_link__destroy(link);
+}
+
void test_bpf_loop(void)
{
struct bpf_loop *skel;
@@ -140,6 +198,10 @@ void test_bpf_loop(void)
check_invalid_flags(skel);
if (test__start_subtest("check_nested_calls"))
check_nested_calls(skel);
+ if (test__start_subtest("check_non_constant_callback"))
+ check_non_constant_callback(skel);
+ if (test__start_subtest("check_stack"))
+ check_stack(skel);
bpf_loop__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index e9a9a31b2ffec..2959a52ced060 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -9,6 +9,9 @@
#include "bpf_cubic.skel.h"
#include "bpf_tcp_nogpl.skel.h"
#include "bpf_dctcp_release.skel.h"
+#include "tcp_ca_write_sk_pacing.skel.h"
+#include "tcp_ca_incompl_cong_ops.skel.h"
+#include "tcp_ca_unsupp_cong_op.skel.h"
#ifndef ENOTSUPP
#define ENOTSUPP 524
@@ -322,6 +325,58 @@ static void test_rel_setsockopt(void)
bpf_dctcp_release__destroy(rel_skel);
}
+static void test_write_sk_pacing(void)
+{
+ struct tcp_ca_write_sk_pacing *skel;
+ struct bpf_link *link;
+
+ skel = tcp_ca_write_sk_pacing__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.write_sk_pacing);
+ ASSERT_OK_PTR(link, "attach_struct_ops");
+
+ bpf_link__destroy(link);
+ tcp_ca_write_sk_pacing__destroy(skel);
+}
+
+static void test_incompl_cong_ops(void)
+{
+ struct tcp_ca_incompl_cong_ops *skel;
+ struct bpf_link *link;
+
+ skel = tcp_ca_incompl_cong_ops__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ /* That cong_avoid() and cong_control() are missing is only reported at
+ * this point:
+ */
+ link = bpf_map__attach_struct_ops(skel->maps.incompl_cong_ops);
+ ASSERT_ERR_PTR(link, "attach_struct_ops");
+
+ bpf_link__destroy(link);
+ tcp_ca_incompl_cong_ops__destroy(skel);
+}
+
+static void test_unsupp_cong_op(void)
+{
+ libbpf_print_fn_t old_print_fn;
+ struct tcp_ca_unsupp_cong_op *skel;
+
+ err_str = "attach to unsupported member get_info";
+ found = false;
+ old_print_fn = libbpf_set_print(libbpf_debug_print);
+
+ skel = tcp_ca_unsupp_cong_op__open_and_load();
+ ASSERT_NULL(skel, "open_and_load");
+ ASSERT_EQ(found, true, "expected_err_msg");
+
+ tcp_ca_unsupp_cong_op__destroy(skel);
+ libbpf_set_print(old_print_fn);
+}
+
void test_bpf_tcp_ca(void)
{
if (test__start_subtest("dctcp"))
@@ -334,4 +389,10 @@ void test_bpf_tcp_ca(void)
test_dctcp_fallback();
if (test__start_subtest("rel_setsockopt"))
test_rel_setsockopt();
+ if (test__start_subtest("write_sk_pacing"))
+ test_write_sk_pacing();
+ if (test__start_subtest("incompl_cong_ops"))
+ test_incompl_cong_ops();
+ if (test__start_subtest("unsupp_cong_op"))
+ test_unsupp_cong_op();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index edb387163baa2..1fd792a92a1cd 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -34,7 +34,6 @@ static bool always_log;
#undef CHECK
#define CHECK(condition, format...) _CHECK(condition, "check", duration, format)
-#define BTF_END_RAW 0xdeadbeef
#define NAME_TBD 0xdeadb33f
#define NAME_NTH(N) (0xfffe0000 | N)
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
index 9d211b5c22c41..7d23166c77af5 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
@@ -394,7 +394,6 @@ void serial_test_sock_fields(void)
test();
done:
- test_sock_fields__detach(skel);
test_sock_fields__destroy(skel);
if (child_cg_fd >= 0)
close(child_cg_fd);
diff --git a/tools/testing/selftests/bpf/progs/bpf_loop.c b/tools/testing/selftests/bpf/progs/bpf_loop.c
index e08565282759c..de1fc82d27105 100644
--- a/tools/testing/selftests/bpf/progs/bpf_loop.c
+++ b/tools/testing/selftests/bpf/progs/bpf_loop.c
@@ -11,11 +11,19 @@ struct callback_ctx {
int output;
};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 32);
+ __type(key, int);
+ __type(value, int);
+} map1 SEC(".maps");
+
/* These should be set by the user program */
u32 nested_callback_nr_loops;
u32 stop_index = -1;
u32 nr_loops;
int pid;
+int callback_selector;
/* Making these global variables so that the userspace program
* can verify the output through the skeleton
@@ -111,3 +119,109 @@ int prog_nested_calls(void *ctx)
return 0;
}
+
+static int callback_set_f0(int i, void *ctx)
+{
+ g_output = 0xF0;
+ return 0;
+}
+
+static int callback_set_0f(int i, void *ctx)
+{
+ g_output = 0x0F;
+ return 0;
+}
+
+/*
+ * non-constant callback is a corner case for bpf_loop inline logic
+ */
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int prog_non_constant_callback(void *ctx)
+{
+ struct callback_ctx data = {};
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ int (*callback)(int i, void *ctx);
+
+ g_output = 0;
+
+ if (callback_selector == 0x0F)
+ callback = callback_set_0f;
+ else
+ callback = callback_set_f0;
+
+ bpf_loop(1, callback, NULL, 0);
+
+ return 0;
+}
+
+static int stack_check_inner_callback(void *ctx)
+{
+ return 0;
+}
+
+static int map1_lookup_elem(int key)
+{
+ int *val = bpf_map_lookup_elem(&map1, &key);
+
+ return val ? *val : -1;
+}
+
+static void map1_update_elem(int key, int val)
+{
+ bpf_map_update_elem(&map1, &key, &val, BPF_ANY);
+}
+
+static int stack_check_outer_callback(void *ctx)
+{
+ int a = map1_lookup_elem(1);
+ int b = map1_lookup_elem(2);
+ int c = map1_lookup_elem(3);
+ int d = map1_lookup_elem(4);
+ int e = map1_lookup_elem(5);
+ int f = map1_lookup_elem(6);
+
+ bpf_loop(1, stack_check_inner_callback, NULL, 0);
+
+ map1_update_elem(1, a + 1);
+ map1_update_elem(2, b + 1);
+ map1_update_elem(3, c + 1);
+ map1_update_elem(4, d + 1);
+ map1_update_elem(5, e + 1);
+ map1_update_elem(6, f + 1);
+
+ return 0;
+}
+
+/* Some of the local variables in stack_check and
+ * stack_check_outer_callback would be allocated on stack by
+ * compiler. This test should verify that stack content for these
+ * variables is preserved between calls to bpf_loop (might be an issue
+ * if loop inlining allocates stack slots incorrectly).
+ */
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int stack_check(void *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ int a = map1_lookup_elem(7);
+ int b = map1_lookup_elem(8);
+ int c = map1_lookup_elem(9);
+ int d = map1_lookup_elem(10);
+ int e = map1_lookup_elem(11);
+ int f = map1_lookup_elem(12);
+
+ bpf_loop(1, stack_check_outer_callback, NULL, 0);
+
+ map1_update_elem(7, a + 1);
+ map1_update_elem(8, b + 1);
+ map1_update_elem(9, c + 1);
+ map1_update_elem(10, d + 1);
+ map1_update_elem(11, e + 1);
+ map1_update_elem(12, f + 1);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/local_storage_bench.c b/tools/testing/selftests/bpf/progs/local_storage_bench.c
new file mode 100644
index 0000000000000..2c3234c5b73a3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/local_storage_bench.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define HASHMAP_SZ 4194304
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1000);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+ });
+} array_of_local_storage_maps SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1000);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, HASHMAP_SZ);
+ __type(key, int);
+ __type(value, int);
+ });
+} array_of_hash_maps SEC(".maps");
+
+long important_hits;
+long hits;
+
+/* set from user-space */
+const volatile unsigned int use_hashmap;
+const volatile unsigned int hashmap_num_keys;
+const volatile unsigned int num_maps;
+const volatile unsigned int interleave;
+
+struct loop_ctx {
+ struct task_struct *task;
+ long loop_hits;
+ long loop_important_hits;
+};
+
+static int do_lookup(unsigned int elem, struct loop_ctx *lctx)
+{
+ void *map, *inner_map;
+ int idx = 0;
+
+ if (use_hashmap)
+ map = &array_of_hash_maps;
+ else
+ map = &array_of_local_storage_maps;
+
+ inner_map = bpf_map_lookup_elem(map, &elem);
+ if (!inner_map)
+ return -1;
+
+ if (use_hashmap) {
+ idx = bpf_get_prandom_u32() % hashmap_num_keys;
+ bpf_map_lookup_elem(inner_map, &idx);
+ } else {
+ bpf_task_storage_get(inner_map, lctx->task, &idx,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ }
+
+ lctx->loop_hits++;
+ if (!elem)
+ lctx->loop_important_hits++;
+ return 0;
+}
+
+static long loop(u32 index, void *ctx)
+{
+ struct loop_ctx *lctx = (struct loop_ctx *)ctx;
+ unsigned int map_idx = index % num_maps;
+
+ do_lookup(map_idx, lctx);
+ if (interleave && map_idx % 3 == 0)
+ do_lookup(0, lctx);
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int get_local(void *ctx)
+{
+ struct loop_ctx lctx;
+
+ lctx.task = bpf_get_current_task_btf();
+ lctx.loop_hits = 0;
+ lctx.loop_important_hits = 0;
+ bpf_loop(10000, &loop, &lctx, 0);
+ __sync_add_and_fetch(&hits, lctx.loop_hits);
+ __sync_add_and_fetch(&important_hits, lctx.loop_important_hits);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c b/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c
new file mode 100644
index 0000000000000..7bb872fb22dd7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+static inline struct tcp_sock *tcp_sk(const struct sock *sk)
+{
+ return (struct tcp_sock *)sk;
+}
+
+SEC("struct_ops/incompl_cong_ops_ssthresh")
+__u32 BPF_PROG(incompl_cong_ops_ssthresh, struct sock *sk)
+{
+ return tcp_sk(sk)->snd_ssthresh;
+}
+
+SEC("struct_ops/incompl_cong_ops_undo_cwnd")
+__u32 BPF_PROG(incompl_cong_ops_undo_cwnd, struct sock *sk)
+{
+ return tcp_sk(sk)->snd_cwnd;
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops incompl_cong_ops = {
+ /* Intentionally leaving out any of the required cong_avoid() and
+ * cong_control() here.
+ */
+ .ssthresh = (void *)incompl_cong_ops_ssthresh,
+ .undo_cwnd = (void *)incompl_cong_ops_undo_cwnd,
+ .name = "bpf_incompl_ops",
+};
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c b/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c
new file mode 100644
index 0000000000000..c06f4a41c21ab
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops/unsupp_cong_op_get_info")
+size_t BPF_PROG(unsupp_cong_op_get_info, struct sock *sk, u32 ext, int *attr,
+ union tcp_cc_info *info)
+{
+ return 0;
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops unsupp_cong_op = {
+ .get_info = (void *)unsupp_cong_op_get_info,
+ .name = "bpf_unsupp_op",
+};
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
new file mode 100644
index 0000000000000..43447704cf0e6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define USEC_PER_SEC 1000000UL
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
+static inline struct tcp_sock *tcp_sk(const struct sock *sk)
+{
+ return (struct tcp_sock *)sk;
+}
+
+SEC("struct_ops/write_sk_pacing_init")
+void BPF_PROG(write_sk_pacing_init, struct sock *sk)
+{
+#ifdef ENABLE_ATOMICS_TESTS
+ __sync_bool_compare_and_swap(&sk->sk_pacing_status, SK_PACING_NONE,
+ SK_PACING_NEEDED);
+#else
+ sk->sk_pacing_status = SK_PACING_NEEDED;
+#endif
+}
+
+SEC("struct_ops/write_sk_pacing_cong_control")
+void BPF_PROG(write_sk_pacing_cong_control, struct sock *sk,
+ const struct rate_sample *rs)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ unsigned long rate =
+ ((tp->snd_cwnd * tp->mss_cache * USEC_PER_SEC) << 3) /
+ (tp->srtt_us ?: 1U << 3);
+ sk->sk_pacing_rate = min(rate, sk->sk_max_pacing_rate);
+}
+
+SEC("struct_ops/write_sk_pacing_ssthresh")
+__u32 BPF_PROG(write_sk_pacing_ssthresh, struct sock *sk)
+{
+ return tcp_sk(sk)->snd_ssthresh;
+}
+
+SEC("struct_ops/write_sk_pacing_undo_cwnd")
+__u32 BPF_PROG(write_sk_pacing_undo_cwnd, struct sock *sk)
+{
+ return tcp_sk(sk)->snd_cwnd;
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops write_sk_pacing = {
+ .init = (void *)write_sk_pacing_init,
+ .cong_control = (void *)write_sk_pacing_cong_control,
+ .ssthresh = (void *)write_sk_pacing_ssthresh,
+ .undo_cwnd = (void *)write_sk_pacing_undo_cwnd,
+ .name = "bpf_w_sk_pacing",
+};
diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h
index 38782bd47fdca..fb4f4714eeb41 100644
--- a/tools/testing/selftests/bpf/test_btf.h
+++ b/tools/testing/selftests/bpf/test_btf.h
@@ -4,6 +4,8 @@
#ifndef _TEST_BTF_H
#define _TEST_BTF_H
+#define BTF_END_RAW 0xdeadbeef
+
#define BTF_INFO_ENC(kind, kind_flag, vlen) \
((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 372579c9f45e7..f9d553fbf68a3 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -51,12 +51,24 @@
#endif
#define MAX_INSNS BPF_MAXINSNS
+#define MAX_EXPECTED_INSNS 32
+#define MAX_UNEXPECTED_INSNS 32
#define MAX_TEST_INSNS 1000000
#define MAX_FIXUPS 8
#define MAX_NR_MAPS 23
#define MAX_TEST_RUNS 8
#define POINTER_VALUE 0xcafe4all
#define TEST_DATA_LEN 64
+#define MAX_FUNC_INFOS 8
+#define MAX_BTF_STRINGS 256
+#define MAX_BTF_TYPES 256
+
+#define INSN_OFF_MASK ((__s16)0xFFFF)
+#define INSN_IMM_MASK ((__s32)0xFFFFFFFF)
+#define SKIP_INSNS() BPF_RAW_INSN(0xde, 0xa, 0xd, 0xbeef, 0xdeadbeef)
+
+#define DEFAULT_LIBBPF_LOG_LEVEL 4
+#define VERBOSE_LIBBPF_LOG_LEVEL 1
#define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0)
#define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1)
@@ -79,6 +91,23 @@ struct bpf_test {
const char *descr;
struct bpf_insn insns[MAX_INSNS];
struct bpf_insn *fill_insns;
+ /* If specified, test engine looks for this sequence of
+ * instructions in the BPF program after loading. Allows to
+ * test rewrites applied by verifier. Use values
+ * INSN_OFF_MASK and INSN_IMM_MASK to mask `off` and `imm`
+ * fields if content does not matter. The test case fails if
+ * specified instructions are not found.
+ *
+ * The sequence could be split into sub-sequences by adding
+ * SKIP_INSNS instruction at the end of each sub-sequence. In
+ * such case sub-sequences are searched for one after another.
+ */
+ struct bpf_insn expected_insns[MAX_EXPECTED_INSNS];
+ /* If specified, test engine applies same pattern matching
+ * logic as for `expected_insns`. If the specified pattern is
+ * matched test case is marked as failed.
+ */
+ struct bpf_insn unexpected_insns[MAX_UNEXPECTED_INSNS];
int fixup_map_hash_8b[MAX_FIXUPS];
int fixup_map_hash_48b[MAX_FIXUPS];
int fixup_map_hash_16b[MAX_FIXUPS];
@@ -135,6 +164,14 @@ struct bpf_test {
};
enum bpf_attach_type expected_attach_type;
const char *kfunc;
+ struct bpf_func_info func_info[MAX_FUNC_INFOS];
+ int func_info_cnt;
+ char btf_strings[MAX_BTF_STRINGS];
+ /* A set of BTF types to load when specified,
+ * use macro definitions from test_btf.h,
+ * must end with BTF_END_RAW
+ */
+ __u32 btf_types[MAX_BTF_TYPES];
};
/* Note we want this to be 64 bit aligned so that the end of our array is
@@ -388,6 +425,45 @@ static void bpf_fill_torturous_jumps(struct bpf_test *self)
}
}
+static void bpf_fill_big_prog_with_loop_1(struct bpf_test *self)
+{
+ struct bpf_insn *insn = self->fill_insns;
+ /* This test was added to catch a specific use after free
+ * error, which happened upon BPF program reallocation.
+ * Reallocation is handled by core.c:bpf_prog_realloc, which
+ * reuses old memory if page boundary is not crossed. The
+ * value of `len` is chosen to cross this boundary on bpf_loop
+ * patching.
+ */
+ const int len = getpagesize() - 25;
+ int callback_load_idx;
+ int callback_idx;
+ int i = 0;
+
+ insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1);
+ callback_load_idx = i;
+ insn[i++] = BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW,
+ BPF_REG_2, BPF_PSEUDO_FUNC, 0,
+ 777 /* filled below */);
+ insn[i++] = BPF_RAW_INSN(0, 0, 0, 0, 0);
+ insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0);
+ insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0);
+ insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop);
+
+ while (i < len - 3)
+ insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0);
+ insn[i++] = BPF_EXIT_INSN();
+
+ callback_idx = i;
+ insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0);
+ insn[i++] = BPF_EXIT_INSN();
+
+ insn[callback_load_idx].imm = callback_idx - callback_load_idx - 1;
+ self->func_info[1].insn_off = callback_idx;
+ self->prog_len = i;
+ assert(i == len);
+}
+
/* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */
#define BPF_SK_LOOKUP(func) \
/* struct bpf_sock_tuple tuple = {} */ \
@@ -664,34 +740,66 @@ static __u32 btf_raw_types[] = {
BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr_ref *ptr; */
};
-static int load_btf(void)
+static char bpf_vlog[UINT_MAX >> 8];
+
+static int load_btf_spec(__u32 *types, int types_len,
+ const char *strings, int strings_len)
{
struct btf_header hdr = {
.magic = BTF_MAGIC,
.version = BTF_VERSION,
.hdr_len = sizeof(struct btf_header),
- .type_len = sizeof(btf_raw_types),
- .str_off = sizeof(btf_raw_types),
- .str_len = sizeof(btf_str_sec),
+ .type_len = types_len,
+ .str_off = types_len,
+ .str_len = strings_len,
};
void *ptr, *raw_btf;
int btf_fd;
+ LIBBPF_OPTS(bpf_btf_load_opts, opts,
+ .log_buf = bpf_vlog,
+ .log_size = sizeof(bpf_vlog),
+ .log_level = (verbose
+ ? VERBOSE_LIBBPF_LOG_LEVEL
+ : DEFAULT_LIBBPF_LOG_LEVEL),
+ );
- ptr = raw_btf = malloc(sizeof(hdr) + sizeof(btf_raw_types) +
- sizeof(btf_str_sec));
+ raw_btf = malloc(sizeof(hdr) + types_len + strings_len);
+ ptr = raw_btf;
memcpy(ptr, &hdr, sizeof(hdr));
ptr += sizeof(hdr);
- memcpy(ptr, btf_raw_types, hdr.type_len);
+ memcpy(ptr, types, hdr.type_len);
ptr += hdr.type_len;
- memcpy(ptr, btf_str_sec, hdr.str_len);
+ memcpy(ptr, strings, hdr.str_len);
ptr += hdr.str_len;
- btf_fd = bpf_btf_load(raw_btf, ptr - raw_btf, NULL);
- free(raw_btf);
+ btf_fd = bpf_btf_load(raw_btf, ptr - raw_btf, &opts);
if (btf_fd < 0)
- return -1;
- return btf_fd;
+ printf("Failed to load BTF spec: '%s'\n", strerror(errno));
+
+ free(raw_btf);
+
+ return btf_fd < 0 ? -1 : btf_fd;
+}
+
+static int load_btf(void)
+{
+ return load_btf_spec(btf_raw_types, sizeof(btf_raw_types),
+ btf_str_sec, sizeof(btf_str_sec));
+}
+
+static int load_btf_for_test(struct bpf_test *test)
+{
+ int types_num = 0;
+
+ while (types_num < MAX_BTF_TYPES &&
+ test->btf_types[types_num] != BTF_END_RAW)
+ ++types_num;
+
+ int types_len = types_num * sizeof(test->btf_types[0]);
+
+ return load_btf_spec(test->btf_types, types_len,
+ test->btf_strings, sizeof(test->btf_strings));
}
static int create_map_spin_lock(void)
@@ -770,8 +878,6 @@ static int create_map_kptr(void)
return fd;
}
-static char bpf_vlog[UINT_MAX >> 8];
-
static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
struct bpf_insn *prog, int *map_fds)
{
@@ -1126,10 +1232,218 @@ static bool cmp_str_seq(const char *log, const char *exp)
return true;
}
+static int get_xlated_program(int fd_prog, struct bpf_insn **buf, int *cnt)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ __u32 xlated_prog_len;
+ __u32 buf_element_size = sizeof(struct bpf_insn);
+
+ if (bpf_obj_get_info_by_fd(fd_prog, &info, &info_len)) {
+ perror("bpf_obj_get_info_by_fd failed");
+ return -1;
+ }
+
+ xlated_prog_len = info.xlated_prog_len;
+ if (xlated_prog_len % buf_element_size) {
+ printf("Program length %d is not multiple of %d\n",
+ xlated_prog_len, buf_element_size);
+ return -1;
+ }
+
+ *cnt = xlated_prog_len / buf_element_size;
+ *buf = calloc(*cnt, buf_element_size);
+ if (!buf) {
+ perror("can't allocate xlated program buffer");
+ return -ENOMEM;
+ }
+
+ bzero(&info, sizeof(info));
+ info.xlated_prog_len = xlated_prog_len;
+ info.xlated_prog_insns = (__u64)*buf;
+ if (bpf_obj_get_info_by_fd(fd_prog, &info, &info_len)) {
+ perror("second bpf_obj_get_info_by_fd failed");
+ goto out_free_buf;
+ }
+
+ return 0;
+
+out_free_buf:
+ free(*buf);
+ return -1;
+}
+
+static bool is_null_insn(struct bpf_insn *insn)
+{
+ struct bpf_insn null_insn = {};
+
+ return memcmp(insn, &null_insn, sizeof(null_insn)) == 0;
+}
+
+static bool is_skip_insn(struct bpf_insn *insn)
+{
+ struct bpf_insn skip_insn = SKIP_INSNS();
+
+ return memcmp(insn, &skip_insn, sizeof(skip_insn)) == 0;
+}
+
+static int null_terminated_insn_len(struct bpf_insn *seq, int max_len)
+{
+ int i;
+
+ for (i = 0; i < max_len; ++i) {
+ if (is_null_insn(&seq[i]))
+ return i;
+ }
+ return max_len;
+}
+
+static bool compare_masked_insn(struct bpf_insn *orig, struct bpf_insn *masked)
+{
+ struct bpf_insn orig_masked;
+
+ memcpy(&orig_masked, orig, sizeof(orig_masked));
+ if (masked->imm == INSN_IMM_MASK)
+ orig_masked.imm = INSN_IMM_MASK;
+ if (masked->off == INSN_OFF_MASK)
+ orig_masked.off = INSN_OFF_MASK;
+
+ return memcmp(&orig_masked, masked, sizeof(orig_masked)) == 0;
+}
+
+static int find_insn_subseq(struct bpf_insn *seq, struct bpf_insn *subseq,
+ int seq_len, int subseq_len)
+{
+ int i, j;
+
+ if (subseq_len > seq_len)
+ return -1;
+
+ for (i = 0; i < seq_len - subseq_len + 1; ++i) {
+ bool found = true;
+
+ for (j = 0; j < subseq_len; ++j) {
+ if (!compare_masked_insn(&seq[i + j], &subseq[j])) {
+ found = false;
+ break;
+ }
+ }
+ if (found)
+ return i;
+ }
+
+ return -1;
+}
+
+static int find_skip_insn_marker(struct bpf_insn *seq, int len)
+{
+ int i;
+
+ for (i = 0; i < len; ++i)
+ if (is_skip_insn(&seq[i]))
+ return i;
+
+ return -1;
+}
+
+/* Return true if all sub-sequences in `subseqs` could be found in
+ * `seq` one after another. Sub-sequences are separated by a single
+ * nil instruction.
+ */
+static bool find_all_insn_subseqs(struct bpf_insn *seq, struct bpf_insn *subseqs,
+ int seq_len, int max_subseqs_len)
+{
+ int subseqs_len = null_terminated_insn_len(subseqs, max_subseqs_len);
+
+ while (subseqs_len > 0) {
+ int skip_idx = find_skip_insn_marker(subseqs, subseqs_len);
+ int cur_subseq_len = skip_idx < 0 ? subseqs_len : skip_idx;
+ int subseq_idx = find_insn_subseq(seq, subseqs,
+ seq_len, cur_subseq_len);
+
+ if (subseq_idx < 0)
+ return false;
+ seq += subseq_idx + cur_subseq_len;
+ seq_len -= subseq_idx + cur_subseq_len;
+ subseqs += cur_subseq_len + 1;
+ subseqs_len -= cur_subseq_len + 1;
+ }
+
+ return true;
+}
+
+static void print_insn(struct bpf_insn *buf, int cnt)
+{
+ int i;
+
+ printf(" addr op d s off imm\n");
+ for (i = 0; i < cnt; ++i) {
+ struct bpf_insn *insn = &buf[i];
+
+ if (is_null_insn(insn))
+ break;
+
+ if (is_skip_insn(insn))
+ printf(" ...\n");
+ else
+ printf(" %04x: %02x %1x %x %04hx %08x\n",
+ i, insn->code, insn->dst_reg,
+ insn->src_reg, insn->off, insn->imm);
+ }
+}
+
+static bool check_xlated_program(struct bpf_test *test, int fd_prog)
+{
+ struct bpf_insn *buf;
+ int cnt;
+ bool result = true;
+ bool check_expected = !is_null_insn(test->expected_insns);
+ bool check_unexpected = !is_null_insn(test->unexpected_insns);
+
+ if (!check_expected && !check_unexpected)
+ goto out;
+
+ if (get_xlated_program(fd_prog, &buf, &cnt)) {
+ printf("FAIL: can't get xlated program\n");
+ result = false;
+ goto out;
+ }
+
+ if (check_expected &&
+ !find_all_insn_subseqs(buf, test->expected_insns,
+ cnt, MAX_EXPECTED_INSNS)) {
+ printf("FAIL: can't find expected subsequence of instructions\n");
+ result = false;
+ if (verbose) {
+ printf("Program:\n");
+ print_insn(buf, cnt);
+ printf("Expected subsequence:\n");
+ print_insn(test->expected_insns, MAX_EXPECTED_INSNS);
+ }
+ }
+
+ if (check_unexpected &&
+ find_all_insn_subseqs(buf, test->unexpected_insns,
+ cnt, MAX_UNEXPECTED_INSNS)) {
+ printf("FAIL: found unexpected subsequence of instructions\n");
+ result = false;
+ if (verbose) {
+ printf("Program:\n");
+ print_insn(buf, cnt);
+ printf("Un-expected subsequence:\n");
+ print_insn(test->unexpected_insns, MAX_UNEXPECTED_INSNS);
+ }
+ }
+
+ free(buf);
+ out:
+ return result;
+}
+
static void do_test_single(struct bpf_test *test, bool unpriv,
int *passes, int *errors)
{
- int fd_prog, expected_ret, alignment_prevented_execution;
+ int fd_prog, btf_fd, expected_ret, alignment_prevented_execution;
int prog_len, prog_type = test->prog_type;
struct bpf_insn *prog = test->insns;
LIBBPF_OPTS(bpf_prog_load_opts, opts);
@@ -1141,8 +1455,10 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
__u32 pflags;
int i, err;
+ fd_prog = -1;
for (i = 0; i < MAX_NR_MAPS; i++)
map_fds[i] = -1;
+ btf_fd = -1;
if (!prog_type)
prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
@@ -1175,11 +1491,11 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
opts.expected_attach_type = test->expected_attach_type;
if (verbose)
- opts.log_level = 1;
+ opts.log_level = VERBOSE_LIBBPF_LOG_LEVEL;
else if (expected_ret == VERBOSE_ACCEPT)
opts.log_level = 2;
else
- opts.log_level = 4;
+ opts.log_level = DEFAULT_LIBBPF_LOG_LEVEL;
opts.prog_flags = pflags;
if (prog_type == BPF_PROG_TYPE_TRACING && test->kfunc) {
@@ -1197,6 +1513,19 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
opts.attach_btf_id = attach_btf_id;
}
+ if (test->btf_types[0] != 0) {
+ btf_fd = load_btf_for_test(test);
+ if (btf_fd < 0)
+ goto fail_log;
+ opts.prog_btf_fd = btf_fd;
+ }
+
+ if (test->func_info_cnt != 0) {
+ opts.func_info = test->func_info;
+ opts.func_info_cnt = test->func_info_cnt;
+ opts.func_info_rec_size = sizeof(test->func_info[0]);
+ }
+
opts.log_buf = bpf_vlog;
opts.log_size = sizeof(bpf_vlog);
fd_prog = bpf_prog_load(prog_type, NULL, "GPL", prog, prog_len, &opts);
@@ -1262,6 +1591,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
if (verbose)
printf(", verifier log:\n%s", bpf_vlog);
+ if (!check_xlated_program(test, fd_prog))
+ goto fail_log;
+
run_errs = 0;
run_successes = 0;
if (!alignment_prevented_execution && fd_prog >= 0 && test->runs >= 0) {
@@ -1305,6 +1637,7 @@ close_fds:
if (test->fill_insns)
free(test->fill_insns);
close(fd_prog);
+ close(btf_fd);
for (i = 0; i < MAX_NR_MAPS; i++)
close(map_fds[i]);
sched_yield();
diff --git a/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c
new file mode 100644
index 0000000000000..2d0023659d884
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c
@@ -0,0 +1,263 @@
+#define BTF_TYPES \
+ .btf_strings = "\0int\0i\0ctx\0callback\0main\0", \
+ .btf_types = { \
+ /* 1: int */ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), \
+ /* 2: int* */ BTF_PTR_ENC(1), \
+ /* 3: void* */ BTF_PTR_ENC(0), \
+ /* 4: int __(void*) */ BTF_FUNC_PROTO_ENC(1, 1), \
+ BTF_FUNC_PROTO_ARG_ENC(7, 3), \
+ /* 5: int __(int, int*) */ BTF_FUNC_PROTO_ENC(1, 2), \
+ BTF_FUNC_PROTO_ARG_ENC(5, 1), \
+ BTF_FUNC_PROTO_ARG_ENC(7, 2), \
+ /* 6: main */ BTF_FUNC_ENC(20, 4), \
+ /* 7: callback */ BTF_FUNC_ENC(11, 5), \
+ BTF_END_RAW \
+ }
+
+#define MAIN_TYPE 6
+#define CALLBACK_TYPE 7
+
+/* can't use BPF_CALL_REL, jit_subprogs adjusts IMM & OFF
+ * fields for pseudo calls
+ */
+#define PSEUDO_CALL_INSN() \
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_CALL, \
+ INSN_OFF_MASK, INSN_IMM_MASK)
+
+/* can't use BPF_FUNC_loop constant,
+ * do_mix_fixups adjusts the IMM field
+ */
+#define HELPER_CALL_INSN() \
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, INSN_OFF_MASK, INSN_IMM_MASK)
+
+{
+ "inline simple bpf_loop call",
+ .insns = {
+ /* main */
+ /* force verifier state branching to verify logic on first and
+ * subsequent bpf_loop insn processing steps
+ */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 777, 2),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2),
+
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 6),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* callback */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_insns = { PSEUDO_CALL_INSN() },
+ .unexpected_insns = { HELPER_CALL_INSN() },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .result = ACCEPT,
+ .runs = 0,
+ .func_info = { { 0, MAIN_TYPE }, { 12, CALLBACK_TYPE } },
+ .func_info_cnt = 2,
+ BTF_TYPES
+},
+{
+ "don't inline bpf_loop call, flags non-zero",
+ .insns = {
+ /* main */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_7, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 9),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 7),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -10),
+ /* callback */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_insns = { HELPER_CALL_INSN() },
+ .unexpected_insns = { PSEUDO_CALL_INSN() },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .result = ACCEPT,
+ .runs = 0,
+ .func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } },
+ .func_info_cnt = 2,
+ BTF_TYPES
+},
+{
+ "don't inline bpf_loop call, callback non-constant",
+ .insns = {
+ /* main */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 777, 4), /* pick a random callback */
+
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 10),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 8),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* callback */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* callback #2 */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_insns = { HELPER_CALL_INSN() },
+ .unexpected_insns = { PSEUDO_CALL_INSN() },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .result = ACCEPT,
+ .runs = 0,
+ .func_info = {
+ { 0, MAIN_TYPE },
+ { 14, CALLBACK_TYPE },
+ { 16, CALLBACK_TYPE }
+ },
+ .func_info_cnt = 3,
+ BTF_TYPES
+},
+{
+ "bpf_loop_inline and a dead func",
+ .insns = {
+ /* main */
+
+ /* A reference to callback #1 to make verifier count it as a func.
+ * This reference is overwritten below and callback #1 is dead.
+ */
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 9),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 8),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* callback */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* callback #2 */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_insns = { PSEUDO_CALL_INSN() },
+ .unexpected_insns = { HELPER_CALL_INSN() },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .result = ACCEPT,
+ .runs = 0,
+ .func_info = {
+ { 0, MAIN_TYPE },
+ { 10, CALLBACK_TYPE },
+ { 12, CALLBACK_TYPE }
+ },
+ .func_info_cnt = 3,
+ BTF_TYPES
+},
+{
+ "bpf_loop_inline stack locations for loop vars",
+ .insns = {
+ /* main */
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -12, 0x77),
+ /* bpf_loop call #1 */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 22),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ /* bpf_loop call #2 */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 16),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ /* call func and exit */
+ BPF_CALL_REL(2),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* func */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -32, 0x55),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 6),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* callback */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_insns = {
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -12, 0x77),
+ SKIP_INSNS(),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -40),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -32),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -24),
+ SKIP_INSNS(),
+ /* offsets are the same as in the first call */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -40),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -32),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -24),
+ SKIP_INSNS(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -32, 0x55),
+ SKIP_INSNS(),
+ /* offsets differ from main because of different offset
+ * in BPF_ST_MEM instruction
+ */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -56),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -48),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -40),
+ },
+ .unexpected_insns = { HELPER_CALL_INSN() },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .result = ACCEPT,
+ .func_info = {
+ { 0, MAIN_TYPE },
+ { 16, MAIN_TYPE },
+ { 25, CALLBACK_TYPE },
+ },
+ .func_info_cnt = 3,
+ BTF_TYPES
+},
+{
+ "inline bpf_loop call in a big program",
+ .insns = {},
+ .fill_helper = bpf_fill_big_prog_with_loop_1,
+ .expected_insns = { PSEUDO_CALL_INSN() },
+ .unexpected_insns = { HELPER_CALL_INSN() },
+ .result = ACCEPT,
+ .func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } },
+ .func_info_cnt = 2,
+ BTF_TYPES
+},
+
+#undef HELPER_CALL_INSN
+#undef PSEUDO_CALL_INSN
+#undef CALLBACK_TYPE
+#undef MAIN_TYPE
+#undef BTF_TYPES