diff options
| author | Stephen Rothwell <sfr@canb.auug.org.au> | 2022-06-28 11:09:17 +1000 |
|---|---|---|
| committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2022-06-28 11:09:17 +1000 |
| commit | 8a264bbdb657dfed382360ecb4742b754be86af9 (patch) | |
| tree | e11c374be232ebcc4b40db707667cc3b7fa9bf9b | |
| parent | c95333b2ff8c3325df74d1eb36c3c797c9096dc0 (diff) | |
| parent | fd75733da2f376c0c8c6513c3cb2ac227082ec5c (diff) | |
| download | linux-next-8a264bbdb657dfed382360ecb4742b754be86af9.tar.gz | |
Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git
49 files changed, 2063 insertions, 316 deletions
diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst index 9e27fbdb2206f..1b0e6711dec97 100644 --- a/Documentation/bpf/instruction-set.rst +++ b/Documentation/bpf/instruction-set.rst @@ -351,7 +351,7 @@ These instructions have seven implicit operands: * Register R0 is an implicit output which contains the data fetched from the packet. * Registers R1-R5 are scratch registers that are clobbered after a call to - ``BPF_ABS | BPF_LD`` or ``BPF_IND`` | BPF_LD instructions. + ``BPF_ABS | BPF_LD`` or ``BPF_IND | BPF_LD`` instructions. These instructions have an implicit program exit condition as well. When an eBPF program is trying to access the data beyond the packet boundary, the diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 42f2e9a8616c3..f08a4447d363f 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -246,6 +246,7 @@ static bool is_lsi_offset(int offset, int scale) static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) { const struct bpf_prog *prog = ctx->prog; + const bool is_main_prog = prog->aux->func_idx == 0; const u8 r6 = bpf2a64[BPF_REG_6]; const u8 r7 = bpf2a64[BPF_REG_7]; const u8 r8 = bpf2a64[BPF_REG_8]; @@ -299,7 +300,7 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) /* Set up BPF prog stack base register */ emit(A64_MOV(1, fp, A64_SP), ctx); - if (!ebpf_from_cbpf) { + if (!ebpf_from_cbpf && is_main_prog) { /* Initialize tail_call_cnt */ emit(A64_MOVZ(1, tcc, 0, 0), ctx); @@ -1530,3 +1531,9 @@ void bpf_jit_free_exec(void *addr) { return vfree(addr); } + +/* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */ +bool bpf_jit_supports_subprog_tailcalls(void) +{ + return true; +} diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index c98b8c0ed3b83..95b36ab2883f5 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -2492,3 +2492,9 @@ void *bpf_arch_text_copy(void *dst, void *src, size_t len) return ERR_PTR(-EINVAL); return dst; } + +/* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */ +bool bpf_jit_supports_subprog_tailcalls(void) +{ + return true; +} diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0edd7d2c00646..d05e1495a06e0 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1286,6 +1286,9 @@ struct bpf_array { #define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */ #define MAX_TAIL_CALL_CNT 33 +/* Maximum number of loops for bpf_loop */ +#define BPF_MAX_LOOPS BIT(23) + #define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \ BPF_F_RDONLY_PROG | \ BPF_F_WRONLY | \ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 3930c963fa670..81b19669efba8 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -344,6 +344,14 @@ struct bpf_verifier_state_list { int miss_cnt, hit_cnt; }; +struct bpf_loop_inline_state { + int initialized:1; /* set to true upon first entry */ + int fit_for_inline:1; /* true if callback function is the same + * at each call and flags are always zero + */ + u32 callback_subprogno; /* valid when fit_for_inline is true */ +}; + /* Possible states for alu_state member. */ #define BPF_ALU_SANITIZE_SRC (1U << 0) #define BPF_ALU_SANITIZE_DST (1U << 1) @@ -373,6 +381,10 @@ struct bpf_insn_aux_data { u32 mem_size; /* mem_size for non-struct typed var */ }; } btf_var; + /* if instruction is a call to bpf_loop this field tracks + * the state of the relevant registers to make decision about inlining + */ + struct bpf_loop_inline_state loop_inline_state; }; u64 map_key_state; /* constant (32 bit) key tracking for maps */ int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ diff --git a/include/linux/filter.h b/include/linux/filter.h index d0cbb31b1b4d8..4c1a8b2475450 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -914,6 +914,7 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); void bpf_jit_compile(struct bpf_prog *prog); bool bpf_jit_needs_zext(void); +bool bpf_jit_supports_subprog_tailcalls(void); bool bpf_jit_supports_kfunc_call(void); bool bpf_helper_changes_pkt_data(void *func); diff --git a/include/linux/net.h b/include/linux/net.h index 12093f4db50c4..a03485e8cbb2d 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -152,6 +152,8 @@ struct module; struct sk_buff; typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, unsigned int, size_t); +typedef int (*skb_read_actor_t)(struct sock *, struct sk_buff *); + struct proto_ops { int family; @@ -214,6 +216,8 @@ struct proto_ops { */ int (*read_sock)(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); + /* This is different from read_sock(), it reads an entire skb at a time. */ + int (*read_skb)(struct sock *sk, skb_read_actor_t recv_actor); int (*sendpage_locked)(struct sock *sk, struct page *page, int offset, size_t size, int flags); int (*sendmsg_locked)(struct sock *sk, struct msghdr *msg, diff --git a/include/net/tcp.h b/include/net/tcp.h index c21a9b516f1ed..8e48dc56837b5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -672,6 +672,7 @@ void tcp_get_info(struct sock *, struct tcp_info *); /* Read 'sendfile()'-style from a TCP socket */ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); +int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor); void tcp_initialize_rcv_mss(struct sock *sk); diff --git a/include/net/udp.h b/include/net/udp.h index b60eea2e3fae2..987f7fc7c0aa8 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -306,8 +306,7 @@ struct sock *__udp6_lib_lookup(struct net *net, struct sk_buff *skb); struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport); -int udp_read_sock(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor); +int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor); /* UDP uses skb->dev_scratch to cache as much information as possible and avoid * possibly multiple cache miss on dequeue() diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index d5d96ceca1058..7e8fd49406f64 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -723,9 +723,6 @@ const struct bpf_func_proto bpf_for_each_map_elem_proto = { .arg4_type = ARG_ANYTHING, }; -/* maximum number of loops */ -#define MAX_LOOPS BIT(23) - BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx, u64, flags) { @@ -733,9 +730,13 @@ BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx, u64 ret; u32 i; + /* Note: these safety checks are also verified when bpf_loop + * is inlined, be careful to modify this code in sync. See + * function verifier.c:inline_bpf_loop. + */ if (flags) return -EINVAL; - if (nr_loops > MAX_LOOPS) + if (nr_loops > BPF_MAX_LOOPS) return -E2BIG; for (i = 0; i < nr_loops; i++) { diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index d9a3c9207240f..7e0068c3399ca 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -503,10 +503,9 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, goto unlock; } - /* Error during st_ops->reg(). It is very unlikely since - * the above init_member() should have caught it earlier - * before reg(). The only possibility is if there was a race - * in registering the struct_ops (under the same name) to + /* Error during st_ops->reg(). Can happen if this struct_ops needs to be + * verified as a whole, after all init_member() calls. Can also happen if + * there was a race in registering the struct_ops (under the same name) to * a sub-system through different struct_ops's maps. */ set_memory_nx((long)st_map->image, 1); diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index d003d4d8242ac..13ace59ac1a7a 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -7421,87 +7421,6 @@ EXPORT_SYMBOL_GPL(register_btf_id_dtor_kfuncs); #define MAX_TYPES_ARE_COMPAT_DEPTH 2 -static -int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, - const struct btf *targ_btf, __u32 targ_id, - int level) -{ - const struct btf_type *local_type, *targ_type; - int depth = 32; /* max recursion depth */ - - /* caller made sure that names match (ignoring flavor suffix) */ - local_type = btf_type_by_id(local_btf, local_id); - targ_type = btf_type_by_id(targ_btf, targ_id); - if (btf_kind(local_type) != btf_kind(targ_type)) - return 0; - -recur: - depth--; - if (depth < 0) - return -EINVAL; - - local_type = btf_type_skip_modifiers(local_btf, local_id, &local_id); - targ_type = btf_type_skip_modifiers(targ_btf, targ_id, &targ_id); - if (!local_type || !targ_type) - return -EINVAL; - - if (btf_kind(local_type) != btf_kind(targ_type)) - return 0; - - switch (btf_kind(local_type)) { - case BTF_KIND_UNKN: - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: - case BTF_KIND_ENUM: - case BTF_KIND_FWD: - case BTF_KIND_ENUM64: - return 1; - case BTF_KIND_INT: - /* just reject deprecated bitfield-like integers; all other - * integers are by default compatible between each other - */ - return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0; - case BTF_KIND_PTR: - local_id = local_type->type; - targ_id = targ_type->type; - goto recur; - case BTF_KIND_ARRAY: - local_id = btf_array(local_type)->type; - targ_id = btf_array(targ_type)->type; - goto recur; - case BTF_KIND_FUNC_PROTO: { - struct btf_param *local_p = btf_params(local_type); - struct btf_param *targ_p = btf_params(targ_type); - __u16 local_vlen = btf_vlen(local_type); - __u16 targ_vlen = btf_vlen(targ_type); - int i, err; - - if (local_vlen != targ_vlen) - return 0; - - for (i = 0; i < local_vlen; i++, local_p++, targ_p++) { - if (level <= 0) - return -EINVAL; - - btf_type_skip_modifiers(local_btf, local_p->type, &local_id); - btf_type_skip_modifiers(targ_btf, targ_p->type, &targ_id); - err = __bpf_core_types_are_compat(local_btf, local_id, - targ_btf, targ_id, - level - 1); - if (err <= 0) - return err; - } - - /* tail recurse for return type check */ - btf_type_skip_modifiers(local_btf, local_type->type, &local_id); - btf_type_skip_modifiers(targ_btf, targ_type->type, &targ_id); - goto recur; - } - default: - return 0; - } -} - /* Check local and target types for compatibility. This check is used for * type-based CO-RE relocations and follow slightly different rules than * field-based relocations. This function assumes that root types were already @@ -7524,8 +7443,7 @@ recur: int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf, __u32 targ_id) { - return __bpf_core_types_are_compat(local_btf, local_id, - targ_btf, targ_id, + return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, MAX_TYPES_ARE_COMPAT_DEPTH); } diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index b5ffebcce6cca..f023cb399e3f3 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2729,6 +2729,12 @@ bool __weak bpf_jit_needs_zext(void) return false; } +/* Return TRUE if the JIT backend supports mixing bpf2bpf and tailcalls. */ +bool __weak bpf_jit_supports_subprog_tailcalls(void) +{ + return false; +} + bool __weak bpf_jit_supports_kfunc_call(void) { return false; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2859901ffbe37..4938477912cda 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6154,7 +6154,8 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id) static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env) { - return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64); + return env->prog->jit_requested && + bpf_jit_supports_subprog_tailcalls(); } static int check_map_func_compatibility(struct bpf_verifier_env *env, @@ -7124,6 +7125,41 @@ static int check_get_func_ip(struct bpf_verifier_env *env) return -ENOTSUPP; } +static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env) +{ + return &env->insn_aux_data[env->insn_idx]; +} + +static bool loop_flag_is_zero(struct bpf_verifier_env *env) +{ + struct bpf_reg_state *regs = cur_regs(env); + struct bpf_reg_state *reg = ®s[BPF_REG_4]; + bool reg_is_null = register_is_null(reg); + + if (reg_is_null) + mark_chain_precision(env, BPF_REG_4); + + return reg_is_null; +} + +static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno) +{ + struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state; + + if (!state->initialized) { + state->initialized = 1; + state->fit_for_inline = loop_flag_is_zero(env); + state->callback_subprogno = subprogno; + return; + } + + if (!state->fit_for_inline) + return; + + state->fit_for_inline = (loop_flag_is_zero(env) && + state->callback_subprogno == subprogno); +} + static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx_p) { @@ -7276,6 +7312,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn err = check_bpf_snprintf_call(env, regs); break; case BPF_FUNC_loop: + update_loop_inline_state(env, meta.subprogno); err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, set_loop_callback_state); break; @@ -7682,11 +7719,6 @@ static bool check_reg_sane_offset(struct bpf_verifier_env *env, return true; } -static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env) -{ - return &env->insn_aux_data[env->insn_idx]; -} - enum { REASON_BOUNDS = -1, REASON_TYPE = -2, @@ -9064,7 +9096,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) if (opcode == BPF_END || opcode == BPF_NEG) { if (opcode == BPF_NEG) { - if (BPF_SRC(insn->code) != 0 || + if (BPF_SRC(insn->code) != BPF_K || insn->src_reg != BPF_REG_0 || insn->off != 0 || insn->imm != 0) { verbose(env, "BPF_NEG uses reserved fields\n"); @@ -14315,6 +14347,142 @@ patch_call_imm: return 0; } +static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env, + int position, + s32 stack_base, + u32 callback_subprogno, + u32 *cnt) +{ + s32 r6_offset = stack_base + 0 * BPF_REG_SIZE; + s32 r7_offset = stack_base + 1 * BPF_REG_SIZE; + s32 r8_offset = stack_base + 2 * BPF_REG_SIZE; + int reg_loop_max = BPF_REG_6; + int reg_loop_cnt = BPF_REG_7; + int reg_loop_ctx = BPF_REG_8; + + struct bpf_prog *new_prog; + u32 callback_start; + u32 call_insn_offset; + s32 callback_offset; + + /* This represents an inlined version of bpf_iter.c:bpf_loop, + * be careful to modify this code in sync. + */ + struct bpf_insn insn_buf[] = { + /* Return error and jump to the end of the patch if + * expected number of iterations is too big. + */ + BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2), + BPF_MOV32_IMM(BPF_REG_0, -E2BIG), + BPF_JMP_IMM(BPF_JA, 0, 0, 16), + /* spill R6, R7, R8 to use these as loop vars */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset), + /* initialize loop vars */ + BPF_MOV64_REG(reg_loop_max, BPF_REG_1), + BPF_MOV32_IMM(reg_loop_cnt, 0), + BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3), + /* loop header, + * if reg_loop_cnt >= reg_loop_max skip the loop body + */ + BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5), + /* callback call, + * correct callback offset would be set after patching + */ + BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt), + BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx), + BPF_CALL_REL(0), + /* increment loop counter */ + BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1), + /* jump to loop header if callback returned 0 */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6), + /* return value of bpf_loop, + * set R0 to the number of iterations + */ + BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt), + /* restore original values of R6, R7, R8 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset), + BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset), + }; + + *cnt = ARRAY_SIZE(insn_buf); + new_prog = bpf_patch_insn_data(env, position, insn_buf, *cnt); + if (!new_prog) + return new_prog; + + /* callback start is known only after patching */ + callback_start = env->subprog_info[callback_subprogno].start; + /* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */ + call_insn_offset = position + 12; + callback_offset = callback_start - call_insn_offset - 1; + new_prog->insnsi[call_insn_offset].imm = callback_offset; + + return new_prog; +} + +static bool is_bpf_loop_call(struct bpf_insn *insn) +{ + return insn->code == (BPF_JMP | BPF_CALL) && + insn->src_reg == 0 && + insn->imm == BPF_FUNC_loop; +} + +/* For all sub-programs in the program (including main) check + * insn_aux_data to see if there are bpf_loop calls that require + * inlining. If such calls are found the calls are replaced with a + * sequence of instructions produced by `inline_bpf_loop` function and + * subprog stack_depth is increased by the size of 3 registers. + * This stack space is used to spill values of the R6, R7, R8. These + * registers are used to store the loop bound, counter and context + * variables. + */ +static int optimize_bpf_loop(struct bpf_verifier_env *env) +{ + struct bpf_subprog_info *subprogs = env->subprog_info; + int i, cur_subprog = 0, cnt, delta = 0; + struct bpf_insn *insn = env->prog->insnsi; + int insn_cnt = env->prog->len; + u16 stack_depth = subprogs[cur_subprog].stack_depth; + u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth; + u16 stack_depth_extra = 0; + + for (i = 0; i < insn_cnt; i++, insn++) { + struct bpf_loop_inline_state *inline_state = + &env->insn_aux_data[i + delta].loop_inline_state; + + if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) { + struct bpf_prog *new_prog; + + stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup; + new_prog = inline_bpf_loop(env, + i + delta, + -(stack_depth + stack_depth_extra), + inline_state->callback_subprogno, + &cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = new_prog; + insn = new_prog->insnsi + i + delta; + } + + if (subprogs[cur_subprog + 1].start == i + delta + 1) { + subprogs[cur_subprog].stack_depth += stack_depth_extra; + cur_subprog++; + stack_depth = subprogs[cur_subprog].stack_depth; + stack_depth_roundup = round_up(stack_depth, 8) - stack_depth; + stack_depth_extra = 0; + } + } + + env->prog->aux->stack_depth = env->subprog_info[0].stack_depth; + + return 0; +} + static void free_states(struct bpf_verifier_env *env) { struct bpf_verifier_state_list *sl, *sln; @@ -15052,6 +15220,9 @@ skip_full_check: ret = check_max_stack_depth(env); /* instruction rewrites happen after this point */ + if (ret == 0) + ret = optimize_bpf_loop(env); + if (is_priv) { if (ret == 0) opt_hard_wire_dead_code_branches(env); diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 1f5351cae25f2..88ba5b4bd0c59 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1343,6 +1343,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, int size, esize; int rctx; +#ifdef CONFIG_BPF_EVENTS if (bpf_prog_array_valid(call)) { u32 ret; @@ -1350,6 +1351,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, if (!ret) return; } +#endif /* CONFIG_BPF_EVENTS */ esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 2a7836e115b4e..5820704165a64 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -14733,9 +14733,9 @@ static struct skb_segment_test skb_segment_tests[] __initconst = { .build_skb = build_test_skb_linear_no_head_frag, .features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_GSO | - NETIF_F_LLTX_BIT | NETIF_F_GRO | + NETIF_F_LLTX | NETIF_F_GRO | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | - NETIF_F_HW_VLAN_STAG_TX_BIT + NETIF_F_HW_VLAN_STAG_TX } }; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index fc69154bbc88d..266d3b7b7d0ba 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -497,23 +497,27 @@ bool sk_msg_is_readable(struct sock *sk) } EXPORT_SYMBOL_GPL(sk_msg_is_readable); -static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk, - struct sk_buff *skb) +static struct sk_msg *alloc_sk_msg(void) { struct sk_msg *msg; - if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) + msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_KERNEL); + if (unlikely(!msg)) return NULL; + sg_init_marker(msg->sg.data, NR_MSG_FRAG_IDS); + return msg; +} - if (!sk_rmem_schedule(sk, skb, skb->truesize)) +static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk, + struct sk_buff *skb) +{ + if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) return NULL; - msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_KERNEL); - if (unlikely(!msg)) + if (!sk_rmem_schedule(sk, skb, skb->truesize)) return NULL; - sk_msg_init(msg); - return msg; + return alloc_sk_msg(); } static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, @@ -590,13 +594,12 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb, static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb, u32 off, u32 len) { - struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC); + struct sk_msg *msg = alloc_sk_msg(); struct sock *sk = psock->sk; int err; if (unlikely(!msg)) return -EAGAIN; - sk_msg_init(msg); skb_set_owner_r(skb, sk); err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg); if (err < 0) @@ -1165,21 +1168,14 @@ static void sk_psock_done_strp(struct sk_psock *psock) } #endif /* CONFIG_BPF_STREAM_PARSER */ -static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb, - unsigned int offset, size_t orig_len) +static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb) { - struct sock *sk = (struct sock *)desc->arg.data; struct sk_psock *psock; struct bpf_prog *prog; int ret = __SK_DROP; - int len = orig_len; + int len = skb->len; - /* clone here so sk_eat_skb() in tcp_read_sock does not drop our data */ - skb = skb_clone(skb, GFP_ATOMIC); - if (!skb) { - desc->error = -ENOMEM; - return 0; - } + skb_get(skb); rcu_read_lock(); psock = sk_psock(sk); @@ -1192,12 +1188,10 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb, if (!prog) prog = READ_ONCE(psock->progs.skb_verdict); if (likely(prog)) { - skb->sk = sk; skb_dst_drop(skb); skb_bpf_redirect_clear(skb); ret = bpf_prog_run_pin_on_cpu(prog, skb); ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); - skb->sk = NULL; } if (sk_psock_verdict_apply(psock, skb, ret) < 0) len = 0; @@ -1209,16 +1203,10 @@ out: static void sk_psock_verdict_data_ready(struct sock *sk) { struct socket *sock = sk->sk_socket; - read_descriptor_t desc; - if (unlikely(!sock || !sock->ops || !sock->ops->read_sock)) + if (unlikely(!sock || !sock->ops || !sock->ops->read_skb)) return; - - desc.arg.data = sk; - desc.error = 0; - desc.count = 1; - - sock->ops->read_sock(sk, &desc, sk_psock_verdict_recv); + sock->ops->read_skb(sk, sk_psock_verdict_recv); } void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index da81f56fdd1c5..7abd652a558f0 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1040,6 +1040,7 @@ const struct proto_ops inet_stream_ops = { .sendpage = inet_sendpage, .splice_read = tcp_splice_read, .read_sock = tcp_read_sock, + .read_skb = tcp_read_skb, .sendmsg_locked = tcp_sendmsg_locked, .sendpage_locked = tcp_sendpage_locked, .peek_len = tcp_peek_len, @@ -1067,7 +1068,7 @@ const struct proto_ops inet_dgram_ops = { .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, - .read_sock = udp_read_sock, + .read_skb = udp_read_skb, .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, .sendpage = inet_sendpage, diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index f79ab942f03b6..7a181631b9952 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -14,18 +14,6 @@ /* "extern" is to avoid sparse warning. It is only used in bpf_struct_ops.c. */ extern struct bpf_struct_ops bpf_tcp_congestion_ops; -static u32 optional_ops[] = { - offsetof(struct tcp_congestion_ops, init), - offsetof(struct tcp_congestion_ops, release), - offsetof(struct tcp_congestion_ops, set_state), - offsetof(struct tcp_congestion_ops, cwnd_event), - offsetof(struct tcp_congestion_ops, in_ack_event), - offsetof(struct tcp_congestion_ops, pkts_acked), - offsetof(struct tcp_congestion_ops, min_tso_segs), - offsetof(struct tcp_congestion_ops, sndbuf_expand), - offsetof(struct tcp_congestion_ops, cong_control), -}; - static u32 unsupported_ops[] = { offsetof(struct tcp_congestion_ops, get_info), }; @@ -51,18 +39,6 @@ static int bpf_tcp_ca_init(struct btf *btf) return 0; } -static bool is_optional(u32 member_offset) -{ - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(optional_ops); i++) { - if (member_offset == optional_ops[i]) - return true; - } - - return false; -} - static bool is_unsupported(u32 member_offset) { unsigned int i; @@ -111,6 +87,12 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log, } switch (off) { + case offsetof(struct sock, sk_pacing_rate): + end = offsetofend(struct sock, sk_pacing_rate); + break; + case offsetof(struct sock, sk_pacing_status): + end = offsetofend(struct sock, sk_pacing_status); + break; case bpf_ctx_range(struct inet_connection_sock, icsk_ca_priv): end = offsetofend(struct inet_connection_sock, icsk_ca_priv); break; @@ -240,7 +222,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t, { const struct tcp_congestion_ops *utcp_ca; struct tcp_congestion_ops *tcp_ca; - int prog_fd; u32 moff; utcp_ca = (const struct tcp_congestion_ops *)udata; @@ -262,14 +243,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t, return 1; } - if (!btf_type_resolve_func_ptr(btf_vmlinux, member->type, NULL)) - return 0; - - /* Ensure bpf_prog is provided for compulsory func ptr */ - prog_fd = (int)(*(unsigned long *)(udata + moff)); - if (!prog_fd && !is_optional(moff) && !is_unsupported(moff)) - return -EINVAL; - return 0; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f7309452bdcec..9d2fd3ced21b2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1734,6 +1734,50 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, } EXPORT_SYMBOL(tcp_read_sock); +int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) +{ + struct tcp_sock *tp = tcp_sk(sk); + u32 seq = tp->copied_seq; + struct sk_buff *skb; + int copied = 0; + u32 offset; + + if (sk->sk_state == TCP_LISTEN) + return -ENOTCONN; + + while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) { + int used; + + __skb_unlink(skb, &sk->sk_receive_queue); + used = recv_actor(sk, skb); + if (used <= 0) { + if (!copied) + copied = used; + break; + } + seq += used; + copied += used; + + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) { + consume_skb(skb); + ++seq; + break; + } + consume_skb(skb); + break; + } + WRITE_ONCE(tp->copied_seq, seq); + + tcp_rcv_space_adjust(sk); + + /* Clean up data we have read: This will do ACK frames. */ + if (copied > 0) + tcp_cleanup_rbuf(sk, copied); + + return copied; +} +EXPORT_SYMBOL(tcp_read_skb); + int tcp_peek_len(struct socket *sock) { return tcp_inq(sock->sk); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6172b4750a888..2516078aa03e7 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1797,8 +1797,7 @@ busy_check: } EXPORT_SYMBOL(__skb_recv_udp); -int udp_read_sock(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor) +int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { int copied = 0; @@ -1820,7 +1819,8 @@ int udp_read_sock(struct sock *sk, read_descriptor_t *desc, continue; } - used = recv_actor(desc, skb, 0, skb->len); + WARN_ON(!skb_set_owner_sk_safe(skb, sk)); + used = recv_actor(sk, skb); if (used <= 0) { if (!copied) copied = used; @@ -1831,13 +1831,12 @@ int udp_read_sock(struct sock *sk, read_descriptor_t *desc, } kfree_skb(skb); - if (!desc->count) - break; + break; } return copied; } -EXPORT_SYMBOL(udp_read_sock); +EXPORT_SYMBOL(udp_read_skb); /* * This should be easy, if there is something there we diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 658823e91ecab..0ee0770e79aa5 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -702,6 +702,7 @@ const struct proto_ops inet6_stream_ops = { .sendpage_locked = tcp_sendpage_locked, .splice_read = tcp_splice_read, .read_sock = tcp_read_sock, + .read_skb = tcp_read_skb, .peek_len = tcp_peek_len, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, @@ -727,7 +728,7 @@ const struct proto_ops inet6_dgram_ops = { .getsockopt = sock_common_getsockopt, /* ok */ .sendmsg = inet6_sendmsg, /* retpoline's sake */ .recvmsg = inet6_recvmsg, /* retpoline's sake */ - .read_sock = udp_read_sock, + .read_skb = udp_read_skb, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, .set_peek_off = sk_set_peek_off, diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 49f6626330c3d..1b811482fd570 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -736,10 +736,8 @@ static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos, unsigned int flags); static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); -static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor); -static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor); +static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor); +static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor); static int unix_dgram_connect(struct socket *, struct sockaddr *, int, int); static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); @@ -793,7 +791,7 @@ static const struct proto_ops unix_stream_ops = { .shutdown = unix_shutdown, .sendmsg = unix_stream_sendmsg, .recvmsg = unix_stream_recvmsg, - .read_sock = unix_stream_read_sock, + .read_skb = unix_stream_read_skb, .mmap = sock_no_mmap, .sendpage = unix_stream_sendpage, .splice_read = unix_stream_splice_read, @@ -818,7 +816,7 @@ static const struct proto_ops unix_dgram_ops = { .listen = sock_no_listen, .shutdown = unix_shutdown, .sendmsg = unix_dgram_sendmsg, - .read_sock = unix_read_sock, + .read_skb = unix_read_skb, .recvmsg = unix_dgram_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, @@ -2478,8 +2476,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t si return __unix_dgram_recvmsg(sk, msg, size, flags); } -static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor) +static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { int copied = 0; @@ -2494,7 +2491,7 @@ static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, if (!skb) return err; - used = recv_actor(desc, skb, 0, skb->len); + used = recv_actor(sk, skb); if (used <= 0) { if (!copied) copied = used; @@ -2505,8 +2502,7 @@ static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, } kfree_skb(skb); - if (!desc->count) - break; + break; } return copied; @@ -2641,13 +2637,12 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, } #endif -static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor) +static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { if (unlikely(sk->sk_state != TCP_ESTABLISHED)) return -ENOTCONN; - return unix_read_sock(sk, desc, recv_actor); + return unix_read_skb(sk, recv_actor); } static int unix_stream_read_generic(struct unix_stream_read_state *state, diff --git a/samples/bpf/xdp1_kern.c b/samples/bpf/xdp1_kern.c index f0c5d95084dec..0a5c704badd00 100644 --- a/samples/bpf/xdp1_kern.c +++ b/samples/bpf/xdp1_kern.c @@ -39,11 +39,13 @@ static int parse_ipv6(void *data, u64 nh_off, void *data_end) return ip6h->nexthdr; } -SEC("xdp1") +#define XDPBUFSIZE 64 +SEC("xdp.frags") int xdp_prog1(struct xdp_md *ctx) { - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; + __u8 pkt[XDPBUFSIZE] = {}; + void *data_end = &pkt[XDPBUFSIZE-1]; + void *data = pkt; struct ethhdr *eth = data; int rc = XDP_DROP; long *value; @@ -51,6 +53,9 @@ int xdp_prog1(struct xdp_md *ctx) u64 nh_off; u32 ipproto; + if (bpf_xdp_load_bytes(ctx, 0, pkt, sizeof(pkt))) + return rc; + nh_off = sizeof(*eth); if (data + nh_off > data_end) return rc; diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c index d8a64ab077b00..3332ba6bb95fb 100644 --- a/samples/bpf/xdp2_kern.c +++ b/samples/bpf/xdp2_kern.c @@ -55,11 +55,13 @@ static int parse_ipv6(void *data, u64 nh_off, void *data_end) return ip6h->nexthdr; } -SEC("xdp1") +#define XDPBUFSIZE 64 +SEC("xdp.frags") int xdp_prog1(struct xdp_md *ctx) { - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; + __u8 pkt[XDPBUFSIZE] = {}; + void *data_end = &pkt[XDPBUFSIZE-1]; + void *data = pkt; struct ethhdr *eth = data; int rc = XDP_DROP; long *value; @@ -67,6 +69,9 @@ int xdp_prog1(struct xdp_md *ctx) u64 nh_off; u32 ipproto; + if (bpf_xdp_load_bytes(ctx, 0, pkt, sizeof(pkt))) + return rc; + nh_off = sizeof(*eth); if (data + nh_off > data_end) return rc; diff --git a/samples/bpf/xdp_tx_iptunnel_kern.c b/samples/bpf/xdp_tx_iptunnel_kern.c index 575d57e4b8d63..0e2bca3a3fff7 100644 --- a/samples/bpf/xdp_tx_iptunnel_kern.c +++ b/samples/bpf/xdp_tx_iptunnel_kern.c @@ -212,7 +212,7 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp) return XDP_TX; } -SEC("xdp_tx_iptunnel") +SEC("xdp.frags") int _xdp_tx_iptunnel(struct xdp_md *xdp) { void *data_end = (void *)(long)xdp->data_end; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 49e359cd34df8..335467ece75f5 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -5732,77 +5732,7 @@ err_out: int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf, __u32 targ_id) { - const struct btf_type *local_type, *targ_type; - int depth = 32; /* max recursion depth */ - - /* caller made sure that names match (ignoring flavor suffix) */ - local_type = btf__type_by_id(local_btf, local_id); - targ_type = btf__type_by_id(targ_btf, targ_id); - if (!btf_kind_core_compat(local_type, targ_type)) - return 0; - -recur: - depth--; - if (depth < 0) - return -EINVAL; - - local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id); - targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); - if (!local_type || !targ_type) - return -EINVAL; - - if (!btf_kind_core_compat(local_type, targ_type)) - return 0; - - switch (btf_kind(local_type)) { - case BTF_KIND_UNKN: - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: - case BTF_KIND_ENUM: - case BTF_KIND_ENUM64: - case BTF_KIND_FWD: - return 1; - case BTF_KIND_INT: - /* just reject deprecated bitfield-like integers; all other - * integers are by default compatible between each other - */ - return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0; - case BTF_KIND_PTR: - local_id = local_type->type; - targ_id = targ_type->type; - goto recur; - case BTF_KIND_ARRAY: - local_id = btf_array(local_type)->type; - targ_id = btf_array(targ_type)->type; - goto recur; - case BTF_KIND_FUNC_PROTO: { - struct btf_param *local_p = btf_params(local_type); - struct btf_param *targ_p = btf_params(targ_type); - __u16 local_vlen = btf_vlen(local_type); - __u16 targ_vlen = btf_vlen(targ_type); - int i, err; - - if (local_vlen != targ_vlen) - return 0; - - for (i = 0; i < local_vlen; i++, local_p++, targ_p++) { - skip_mods_and_typedefs(local_btf, local_p->type, &local_id); - skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id); - err = bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id); - if (err <= 0) - return err; - } - - /* tail recurse for return type check */ - skip_mods_and_typedefs(local_btf, local_type->type, &local_id); - skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id); - goto recur; - } - default: - pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n", - btf_kind_str(local_type), local_id, targ_id); - return 0; - } + return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32); } static size_t bpf_core_hash_fn(const void *key, void *ctx) diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index 6ad3c3891a9ae..e070123332cd4 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -141,6 +141,86 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) } } +int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, + const struct btf *targ_btf, __u32 targ_id, int level) +{ + const struct btf_type *local_type, *targ_type; + int depth = 32; /* max recursion depth */ + + /* caller made sure that names match (ignoring flavor suffix) */ + local_type = btf_type_by_id(local_btf, local_id); + targ_type = btf_type_by_id(targ_btf, targ_id); + if (!btf_kind_core_compat(local_type, targ_type)) + return 0; + +recur: + depth--; + if (depth < 0) + return -EINVAL; + + local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id); + targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); + if (!local_type || !targ_type) + return -EINVAL; + + if (!btf_kind_core_compat(local_type, targ_type)) + return 0; + + switch (btf_kind(local_type)) { + case BTF_KIND_UNKN: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_ENUM: + case BTF_KIND_FWD: + case BTF_KIND_ENUM64: + return 1; + case BTF_KIND_INT: + /* just reject deprecated bitfield-like integers; all other + * integers are by default compatible between each other + */ + return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0; + case BTF_KIND_PTR: + local_id = local_type->type; + targ_id = targ_type->type; + goto recur; + case BTF_KIND_ARRAY: + local_id = btf_array(local_type)->type; + targ_id = btf_array(targ_type)->type; + goto recur; + case BTF_KIND_FUNC_PROTO: { + struct btf_param *local_p = btf_params(local_type); + struct btf_param *targ_p = btf_params(targ_type); + __u16 local_vlen = btf_vlen(local_type); + __u16 targ_vlen = btf_vlen(targ_type); + int i, err; + + if (local_vlen != targ_vlen) + return 0; + + for (i = 0; i < local_vlen; i++, local_p++, targ_p++) { + if (level <= 0) + return -EINVAL; + + skip_mods_and_typedefs(local_btf, local_p->type, &local_id); + skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id); + err = __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, + level - 1); + if (err <= 0) + return err; + } + + /* tail recurse for return type check */ + skip_mods_and_typedefs(local_btf, local_type->type, &local_id); + skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id); + goto recur; + } + default: + pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n", + btf_kind_str(local_type), local_id, targ_id); + return 0; + } +} + /* * Turn bpf_core_relo into a low- and high-level spec representation, * validating correctness along the way, as well as calculating resulting diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h index 7df0da082f2c9..3fd3842d42303 100644 --- a/tools/lib/bpf/relo_core.h +++ b/tools/lib/bpf/relo_core.h @@ -68,6 +68,8 @@ struct bpf_core_relo_res { __u32 new_type_id; }; +int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, + const struct btf *targ_btf, __u32 targ_id, int level); int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf, __u32 targ_id); diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index f8ad581ea2479..6bd7c288e8205 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -9,6 +9,7 @@ #include <linux/bpf.h> #include <bpf/libbpf.h> #include <bpf/bpf.h> +#include <linux/filter.h> #include <linux/err.h> #include <linux/kernel.h> #include <linux/string.h> @@ -49,6 +50,7 @@ struct bpf_prog_priv { struct bpf_insn *insns_buf; int nr_types; int *type_mapping; + int *prologue_fds; }; struct bpf_perf_object { @@ -56,6 +58,11 @@ struct bpf_perf_object { struct bpf_object *obj; }; +struct bpf_preproc_result { + struct bpf_insn *new_insn_ptr; + int new_insn_cnt; +}; + static LIST_HEAD(bpf_objects_list); static struct hashmap *bpf_program_hash; static struct hashmap *bpf_map_hash; @@ -86,6 +93,7 @@ bpf_perf_object__next(struct bpf_perf_object *prev) (perf_obj) = (tmp), (tmp) = bpf_perf_object__next(tmp)) static bool libbpf_initialized; +static int libbpf_sec_handler; static int bpf_perf_object__add(struct bpf_object *obj) { @@ -99,12 +107,76 @@ static int bpf_perf_object__add(struct bpf_object *obj) return perf_obj ? 0 : -ENOMEM; } +static void *program_priv(const struct bpf_program *prog) +{ + void *priv; + + if (IS_ERR_OR_NULL(bpf_program_hash)) + return NULL; + if (!hashmap__find(bpf_program_hash, prog, &priv)) + return NULL; + return priv; +} + +static struct bpf_insn prologue_init_insn[] = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), +}; + +static int libbpf_prog_prepare_load_fn(struct bpf_program *prog, + struct bpf_prog_load_opts *opts __maybe_unused, + long cookie __maybe_unused) +{ + size_t init_size_cnt = ARRAY_SIZE(prologue_init_insn); + size_t orig_insn_cnt, insn_cnt, init_size, orig_size; + struct bpf_prog_priv *priv = program_priv(prog); + const struct bpf_insn *orig_insn; + struct bpf_insn *insn; + + if (IS_ERR_OR_NULL(priv)) { + pr_debug("bpf: failed to get private field\n"); + return -BPF_LOADER_ERRNO__INTERNAL; + } + + if (!priv->need_prologue) + return 0; + + /* prepend initialization code to program instructions */ + orig_insn = bpf_program__insns(prog); + orig_insn_cnt = bpf_program__insn_cnt(prog); + init_size = init_size_cnt * sizeof(*insn); + orig_size = orig_insn_cnt * sizeof(*insn); + + insn_cnt = orig_insn_cnt + init_size_cnt; + insn = malloc(insn_cnt * sizeof(*insn)); + if (!insn) + return -ENOMEM; + + memcpy(insn, prologue_init_insn, init_size); + memcpy((char *) insn + init_size, orig_insn, orig_size); + bpf_program__set_insns(prog, insn, insn_cnt); + return 0; +} + static int libbpf_init(void) { + LIBBPF_OPTS(libbpf_prog_handler_opts, handler_opts, + .prog_prepare_load_fn = libbpf_prog_prepare_load_fn, + ); + if (libbpf_initialized) return 0; libbpf_set_print(libbpf_perf_print); + libbpf_sec_handler = libbpf_register_prog_handler(NULL, BPF_PROG_TYPE_KPROBE, + 0, &handler_opts); + if (libbpf_sec_handler < 0) { + pr_debug("bpf: failed to register libbpf section handler: %d\n", + libbpf_sec_handler); + return -BPF_LOADER_ERRNO__INTERNAL; + } libbpf_initialized = true; return 0; } @@ -188,14 +260,31 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source) return obj; } +static void close_prologue_programs(struct bpf_prog_priv *priv) +{ + struct perf_probe_event *pev; + int i, fd; + + if (!priv->need_prologue) + return; + pev = &priv->pev; + for (i = 0; i < pev->ntevs; i++) { + fd = priv->prologue_fds[i]; + if (fd != -1) + close(fd); + } +} + static void clear_prog_priv(const struct bpf_program *prog __maybe_unused, void *_priv) { struct bpf_prog_priv *priv = _priv; + close_prologue_programs(priv); cleanup_perf_probe_events(&priv->pev, 1); zfree(&priv->insns_buf); + zfree(&priv->prologue_fds); zfree(&priv->type_mapping); zfree(&priv->sys_name); zfree(&priv->evt_name); @@ -243,17 +332,6 @@ static bool ptr_equal(const void *key1, const void *key2, return key1 == key2; } -static void *program_priv(const struct bpf_program *prog) -{ - void *priv; - - if (IS_ERR_OR_NULL(bpf_program_hash)) - return NULL; - if (!hashmap__find(bpf_program_hash, prog, &priv)) - return NULL; - return priv; -} - static int program_set_priv(struct bpf_program *prog, void *priv) { void *old_priv; @@ -558,8 +636,8 @@ static int bpf__prepare_probe(void) static int preproc_gen_prologue(struct bpf_program *prog, int n, - struct bpf_insn *orig_insns, int orig_insns_cnt, - struct bpf_prog_prep_result *res) + const struct bpf_insn *orig_insns, int orig_insns_cnt, + struct bpf_preproc_result *res) { struct bpf_prog_priv *priv = program_priv(prog); struct probe_trace_event *tev; @@ -607,7 +685,6 @@ preproc_gen_prologue(struct bpf_program *prog, int n, res->new_insn_ptr = buf; res->new_insn_cnt = prologue_cnt + orig_insns_cnt; - res->pfd = NULL; return 0; errout: @@ -715,7 +792,7 @@ static int hook_load_preprocessor(struct bpf_program *prog) struct bpf_prog_priv *priv = program_priv(prog); struct perf_probe_event *pev; bool need_prologue = false; - int err, i; + int i; if (IS_ERR_OR_NULL(priv)) { pr_debug("Internal error when hook preprocessor\n"); @@ -753,6 +830,13 @@ static int hook_load_preprocessor(struct bpf_program *prog) return -ENOMEM; } + priv->prologue_fds = malloc(sizeof(int) * pev->ntevs); + if (!priv->prologue_fds) { + pr_debug("Not enough memory: alloc prologue fds failed\n"); + return -ENOMEM; + } + memset(priv->prologue_fds, -1, sizeof(int) * pev->ntevs); + priv->type_mapping = malloc(sizeof(int) * pev->ntevs); if (!priv->type_mapping) { pr_debug("Not enough memory: alloc type_mapping failed\n"); @@ -761,13 +845,7 @@ static int hook_load_preprocessor(struct bpf_program *prog) memset(priv->type_mapping, -1, sizeof(int) * pev->ntevs); - err = map_prologue(pev, priv->type_mapping, &priv->nr_types); - if (err) - return err; - - err = bpf_program__set_prep(prog, priv->nr_types, - preproc_gen_prologue); - return err; + return map_prologue(pev, priv->type_mapping, &priv->nr_types); } int bpf__probe(struct bpf_object *obj) @@ -874,6 +952,77 @@ int bpf__unprobe(struct bpf_object *obj) return ret; } +static int bpf_object__load_prologue(struct bpf_object *obj) +{ + int init_cnt = ARRAY_SIZE(prologue_init_insn); + const struct bpf_insn *orig_insns; + struct bpf_preproc_result res; + struct perf_probe_event *pev; + struct bpf_program *prog; + int orig_insns_cnt; + + bpf_object__for_each_program(prog, obj) { + struct bpf_prog_priv *priv = program_priv(prog); + int err, i, fd; + + if (IS_ERR_OR_NULL(priv)) { + pr_debug("bpf: failed to get private field\n"); + return -BPF_LOADER_ERRNO__INTERNAL; + } + + if (!priv->need_prologue) + continue; + + /* + * For each program that needs prologue we do following: + * + * - take its current instructions and use them + * to generate the new code with prologue + * - load new instructions with bpf_prog_load + * and keep the fd in prologue_fds + * - new fd will be used in bpf__foreach_event + * to connect this program with perf evsel + */ + orig_insns = bpf_program__insns(prog); + orig_insns_cnt = bpf_program__insn_cnt(prog); + + pev = &priv->pev; + for (i = 0; i < pev->ntevs; i++) { + /* + * Skipping artificall prologue_init_insn instructions + * (init_cnt), so the prologue can be generated instead + * of them. + */ + err = preproc_gen_prologue(prog, i, + orig_insns + init_cnt, + orig_insns_cnt - init_cnt, + &res); + if (err) + return err; + + fd = bpf_prog_load(bpf_program__get_type(prog), + bpf_program__name(prog), "GPL", + res.new_insn_ptr, + res.new_insn_cnt, NULL); + if (fd < 0) { + char bf[128]; + + libbpf_strerror(-errno, bf, sizeof(bf)); + pr_debug("bpf: load objects with prologue failed: err=%d: (%s)\n", + -errno, bf); + return -errno; + } + priv->prologue_fds[i] = fd; + } + /* + * We no longer need the original program, + * we can unload it. + */ + bpf_program__unload(prog); + } + return 0; +} + int bpf__load(struct bpf_object *obj) { int err; @@ -885,7 +1034,7 @@ int bpf__load(struct bpf_object *obj) pr_debug("bpf: load objects failed: err=%d: (%s)\n", err, bf); return err; } - return 0; + return bpf_object__load_prologue(obj); } int bpf__foreach_event(struct bpf_object *obj, @@ -920,13 +1069,10 @@ int bpf__foreach_event(struct bpf_object *obj, for (i = 0; i < pev->ntevs; i++) { tev = &pev->tevs[i]; - if (priv->need_prologue) { - int type = priv->type_mapping[i]; - - fd = bpf_program__nth_fd(prog, type); - } else { + if (priv->need_prologue) + fd = priv->prologue_fds[i]; + else fd = bpf_program__fd(prog); - } if (fd < 0) { pr_debug("bpf: failed to get file descriptor\n"); diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index cb8e552e14185..4fbd88a8ed9e0 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -571,6 +571,7 @@ $(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h $(OUTPUT)/bench_bpf_loop.o: $(OUTPUT)/bpf_loop_bench.skel.h $(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h $(OUTPUT)/bench_bpf_hashmap_full_update.o: $(OUTPUT)/bpf_hashmap_full_update_bench.skel.h +$(OUTPUT)/bench_local_storage.o: $(OUTPUT)/local_storage_bench.skel.h $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) $(OUTPUT)/bench: LDLIBS += -lm $(OUTPUT)/bench: $(OUTPUT)/bench.o \ @@ -583,7 +584,8 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(OUTPUT)/bench_bloom_filter_map.o \ $(OUTPUT)/bench_bpf_loop.o \ $(OUTPUT)/bench_strncmp.o \ - $(OUTPUT)/bench_bpf_hashmap_full_update.o + $(OUTPUT)/bench_bpf_hashmap_full_update.o \ + $(OUTPUT)/bench_local_storage.o $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index d8aa62be996bd..1e7b5d4b1f11e 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -150,6 +150,53 @@ void ops_report_final(struct bench_res res[], int res_cnt) printf("latency %8.3lf ns/op\n", 1000.0 / hits_mean * env.producer_cnt); } +void local_storage_report_progress(int iter, struct bench_res *res, + long delta_ns) +{ + double important_hits_per_sec, hits_per_sec; + double delta_sec = delta_ns / 1000000000.0; + + hits_per_sec = res->hits / 1000000.0 / delta_sec; + important_hits_per_sec = res->important_hits / 1000000.0 / delta_sec; + + printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0); + + printf("hits %8.3lfM/s ", hits_per_sec); + printf("important_hits %8.3lfM/s\n", important_hits_per_sec); +} + +void local_storage_report_final(struct bench_res res[], int res_cnt) +{ + double important_hits_mean = 0.0, important_hits_stddev = 0.0; + double hits_mean = 0.0, hits_stddev = 0.0; + int i; + + for (i = 0; i < res_cnt; i++) { + hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt); + important_hits_mean += res[i].important_hits / 1000000.0 / (0.0 + res_cnt); + } + + if (res_cnt > 1) { + for (i = 0; i < res_cnt; i++) { + hits_stddev += (hits_mean - res[i].hits / 1000000.0) * + (hits_mean - res[i].hits / 1000000.0) / + (res_cnt - 1.0); + important_hits_stddev += + (important_hits_mean - res[i].important_hits / 1000000.0) * + (important_hits_mean - res[i].important_hits / 1000000.0) / + (res_cnt - 1.0); + } + + hits_stddev = sqrt(hits_stddev); + important_hits_stddev = sqrt(important_hits_stddev); + } + printf("Summary: hits throughput %8.3lf \u00B1 %5.3lf M ops/s, ", + hits_mean, hits_stddev); + printf("hits latency %8.3lf ns/op, ", 1000.0 / hits_mean); + printf("important_hits throughput %8.3lf \u00B1 %5.3lf M ops/s\n", + important_hits_mean, important_hits_stddev); +} + const char *argp_program_version = "benchmark"; const char *argp_program_bug_address = "<bpf@vger.kernel.org>"; const char argp_program_doc[] = @@ -188,12 +235,14 @@ static const struct argp_option opts[] = { extern struct argp bench_ringbufs_argp; extern struct argp bench_bloom_map_argp; extern struct argp bench_bpf_loop_argp; +extern struct argp bench_local_storage_argp; extern struct argp bench_strncmp_argp; static const struct argp_child bench_parsers[] = { { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, { &bench_bloom_map_argp, 0, "Bloom filter map benchmark", 0 }, { &bench_bpf_loop_argp, 0, "bpf_loop helper benchmark", 0 }, + { &bench_local_storage_argp, 0, "local_storage benchmark", 0 }, { &bench_strncmp_argp, 0, "bpf_strncmp helper benchmark", 0 }, {}, }; @@ -397,6 +446,9 @@ extern const struct bench bench_bpf_loop; extern const struct bench bench_strncmp_no_helper; extern const struct bench bench_strncmp_helper; extern const struct bench bench_bpf_hashmap_full_update; +extern const struct bench bench_local_storage_cache_seq_get; +extern const struct bench bench_local_storage_cache_interleaved_get; +extern const struct bench bench_local_storage_cache_hashmap_control; static const struct bench *benchs[] = { &bench_count_global, @@ -432,6 +484,9 @@ static const struct bench *benchs[] = { &bench_strncmp_no_helper, &bench_strncmp_helper, &bench_bpf_hashmap_full_update, + &bench_local_storage_cache_seq_get, + &bench_local_storage_cache_interleaved_get, + &bench_local_storage_cache_hashmap_control, }; static void setup_benchmark() diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h index fb3e213df3dc9..4b15286753ba0 100644 --- a/tools/testing/selftests/bpf/bench.h +++ b/tools/testing/selftests/bpf/bench.h @@ -34,6 +34,7 @@ struct bench_res { long hits; long drops; long false_hits; + long important_hits; }; struct bench { @@ -61,6 +62,9 @@ void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns); void false_hits_report_final(struct bench_res res[], int res_cnt); void ops_report_progress(int iter, struct bench_res *res, long delta_ns); void ops_report_final(struct bench_res res[], int res_cnt); +void local_storage_report_progress(int iter, struct bench_res *res, + long delta_ns); +void local_storage_report_final(struct bench_res res[], int res_cnt); static inline __u64 get_time_ns(void) { diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage.c b/tools/testing/selftests/bpf/benchs/bench_local_storage.c new file mode 100644 index 0000000000000..5a378c84e81f6 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage.c @@ -0,0 +1,287 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <argp.h> +#include <linux/btf.h> + +#include "local_storage_bench.skel.h" +#include "bench.h" + +#include <test_btf.h> + +static struct { + __u32 nr_maps; + __u32 hashmap_nr_keys_used; +} args = { + .nr_maps = 1000, + .hashmap_nr_keys_used = 1000, +}; + +enum { + ARG_NR_MAPS = 6000, + ARG_HASHMAP_NR_KEYS_USED = 6001, +}; + +static const struct argp_option opts[] = { + { "nr_maps", ARG_NR_MAPS, "NR_MAPS", 0, + "Set number of local_storage maps"}, + { "hashmap_nr_keys_used", ARG_HASHMAP_NR_KEYS_USED, "NR_KEYS", + 0, "When doing hashmap test, set number of hashmap keys test uses"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + long ret; + + switch (key) { + case ARG_NR_MAPS: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "invalid nr_maps"); + argp_usage(state); + } + args.nr_maps = ret; + break; + case ARG_HASHMAP_NR_KEYS_USED: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "invalid hashmap_nr_keys_used"); + argp_usage(state); + } + args.hashmap_nr_keys_used = ret; + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_local_storage_argp = { + .options = opts, + .parser = parse_arg, +}; + +/* Keep in sync w/ array of maps in bpf */ +#define MAX_NR_MAPS 1000 +/* keep in sync w/ same define in bpf */ +#define HASHMAP_SZ 4194304 + +static void validate(void) +{ + if (env.producer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-producer!\n"); + exit(1); + } + if (env.consumer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + exit(1); + } + + if (args.nr_maps > MAX_NR_MAPS) { + fprintf(stderr, "nr_maps must be <= 1000\n"); + exit(1); + } + + if (args.hashmap_nr_keys_used > HASHMAP_SZ) { + fprintf(stderr, "hashmap_nr_keys_used must be <= %u\n", HASHMAP_SZ); + exit(1); + } +} + +static struct { + struct local_storage_bench *skel; + void *bpf_obj; + struct bpf_map *array_of_maps; +} ctx; + +static void prepopulate_hashmap(int fd) +{ + int i, key, val; + + /* local_storage gets will have BPF_LOCAL_STORAGE_GET_F_CREATE flag set, so + * populate the hashmap for a similar comparison + */ + for (i = 0; i < HASHMAP_SZ; i++) { + key = val = i; + if (bpf_map_update_elem(fd, &key, &val, 0)) { + fprintf(stderr, "Error prepopulating hashmap (key %d)\n", key); + exit(1); + } + } +} + +static void __setup(struct bpf_program *prog, bool hashmap) +{ + struct bpf_map *inner_map; + int i, fd, mim_fd, err; + + LIBBPF_OPTS(bpf_map_create_opts, create_opts); + + if (!hashmap) + create_opts.map_flags = BPF_F_NO_PREALLOC; + + ctx.skel->rodata->num_maps = args.nr_maps; + ctx.skel->rodata->hashmap_num_keys = args.hashmap_nr_keys_used; + inner_map = bpf_map__inner_map(ctx.array_of_maps); + create_opts.btf_key_type_id = bpf_map__btf_key_type_id(inner_map); + create_opts.btf_value_type_id = bpf_map__btf_value_type_id(inner_map); + + err = local_storage_bench__load(ctx.skel); + if (err) { + fprintf(stderr, "Error loading skeleton\n"); + goto err_out; + } + + create_opts.btf_fd = bpf_object__btf_fd(ctx.skel->obj); + + mim_fd = bpf_map__fd(ctx.array_of_maps); + if (mim_fd < 0) { + fprintf(stderr, "Error getting map_in_map fd\n"); + goto err_out; + } + + for (i = 0; i < args.nr_maps; i++) { + if (hashmap) + fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int), + sizeof(int), HASHMAP_SZ, &create_opts); + else + fd = bpf_map_create(BPF_MAP_TYPE_TASK_STORAGE, NULL, sizeof(int), + sizeof(int), 0, &create_opts); + if (fd < 0) { + fprintf(stderr, "Error creating map %d: %d\n", i, fd); + goto err_out; + } + + if (hashmap) + prepopulate_hashmap(fd); + + err = bpf_map_update_elem(mim_fd, &i, &fd, 0); + if (err) { + fprintf(stderr, "Error updating array-of-maps w/ map %d\n", i); + goto err_out; + } + } + + if (!bpf_program__attach(prog)) { + fprintf(stderr, "Error attaching bpf program\n"); + goto err_out; + } + + return; +err_out: + exit(1); +} + +static void hashmap_setup(void) +{ + struct local_storage_bench *skel; + + setup_libbpf(); + + skel = local_storage_bench__open(); + ctx.skel = skel; + ctx.array_of_maps = skel->maps.array_of_hash_maps; + skel->rodata->use_hashmap = 1; + skel->rodata->interleave = 0; + + __setup(skel->progs.get_local, true); +} + +static void local_storage_cache_get_setup(void) +{ + struct local_storage_bench *skel; + + setup_libbpf(); + + skel = local_storage_bench__open(); + ctx.skel = skel; + ctx.array_of_maps = skel->maps.array_of_local_storage_maps; + skel->rodata->use_hashmap = 0; + skel->rodata->interleave = 0; + + __setup(skel->progs.get_local, false); +} + +static void local_storage_cache_get_interleaved_setup(void) +{ + struct local_storage_bench *skel; + + setup_libbpf(); + + skel = local_storage_bench__open(); + ctx.skel = skel; + ctx.array_of_maps = skel->maps.array_of_local_storage_maps; + skel->rodata->use_hashmap = 0; + skel->rodata->interleave = 1; + + __setup(skel->progs.get_local, false); +} + +static void measure(struct bench_res *res) +{ + res->hits = atomic_swap(&ctx.skel->bss->hits, 0); + res->important_hits = atomic_swap(&ctx.skel->bss->important_hits, 0); +} + +static inline void trigger_bpf_program(void) +{ + syscall(__NR_getpgid); +} + +static void *consumer(void *input) +{ + return NULL; +} + +static void *producer(void *input) +{ + while (true) + trigger_bpf_program(); + + return NULL; +} + +/* cache sequential and interleaved get benchs test local_storage get + * performance, specifically they demonstrate performance cliff of + * current list-plus-cache local_storage model. + * + * cache sequential get: call bpf_task_storage_get on n maps in order + * cache interleaved get: like "sequential get", but interleave 4 calls to the + * 'important' map (idx 0 in array_of_maps) for every 10 calls. Goal + * is to mimic environment where many progs are accessing their local_storage + * maps, with 'our' prog needing to access its map more often than others + */ +const struct bench bench_local_storage_cache_seq_get = { + .name = "local-storage-cache-seq-get", + .validate = validate, + .setup = local_storage_cache_get_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = local_storage_report_progress, + .report_final = local_storage_report_final, +}; + +const struct bench bench_local_storage_cache_interleaved_get = { + .name = "local-storage-cache-int-get", + .validate = validate, + .setup = local_storage_cache_get_interleaved_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = local_storage_report_progress, + .report_final = local_storage_report_final, +}; + +const struct bench bench_local_storage_cache_hashmap_control = { + .name = "local-storage-cache-hashmap-control", + .validate = validate, + .setup = hashmap_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = local_storage_report_progress, + .report_final = local_storage_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh new file mode 100755 index 0000000000000..2eb2b513a1739 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +header "Hashmap Control" +for i in 10 1000 10000 100000 4194304; do +subtitle "num keys: $i" + summarize_local_storage "hashmap (control) sequential get: "\ + "$(./bench --nr_maps 1 --hashmap_nr_keys_used=$i local-storage-cache-hashmap-control)" + printf "\n" +done + +header "Local Storage" +for i in 1 10 16 17 24 32 100 1000; do +subtitle "num_maps: $i" + summarize_local_storage "local_storage cache sequential get: "\ + "$(./bench --nr_maps $i local-storage-cache-seq-get)" + summarize_local_storage "local_storage cache interleaved get: "\ + "$(./bench --nr_maps $i local-storage-cache-int-get)" + printf "\n" +done diff --git a/tools/testing/selftests/bpf/benchs/run_common.sh b/tools/testing/selftests/bpf/benchs/run_common.sh index 6c5e6023a69f8..d9f40af820067 100644 --- a/tools/testing/selftests/bpf/benchs/run_common.sh +++ b/tools/testing/selftests/bpf/benchs/run_common.sh @@ -41,6 +41,16 @@ function ops() echo "$*" | sed -E "s/.*latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/" } +function local_storage() +{ + echo -n "hits throughput: " + echo -n "$*" | sed -E "s/.* hits throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/" + echo -n -e ", hits latency: " + echo -n "$*" | sed -E "s/.* hits latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/" + echo -n ", important_hits throughput: " + echo "$*" | sed -E "s/.*important_hits throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/" +} + function total() { echo "$*" | sed -E "s/.*total operations\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" @@ -67,6 +77,13 @@ function summarize_ops() printf "%-20s %s\n" "$bench" "$(ops $summary)" } +function summarize_local_storage() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s\n" "$bench" "$(local_storage $summary)" +} + function summarize_total() { bench="$1" diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 3b3edc0fc8a6a..c05904d631ec8 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -57,3 +57,9 @@ CONFIG_FPROBE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_MPTCP=y +CONFIG_NETFILTER_SYNPROXY=y +CONFIG_NETFILTER_XT_TARGET_CT=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_SYNPROXY=y +CONFIG_IP_NF_RAW=y diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_loop.c b/tools/testing/selftests/bpf/prog_tests/bpf_loop.c index 380d7a2072e39..4cd8a25afe688 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_loop.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_loop.c @@ -120,6 +120,64 @@ static void check_nested_calls(struct bpf_loop *skel) bpf_link__destroy(link); } +static void check_non_constant_callback(struct bpf_loop *skel) +{ + struct bpf_link *link = + bpf_program__attach(skel->progs.prog_non_constant_callback); + + if (!ASSERT_OK_PTR(link, "link")) + return; + + skel->bss->callback_selector = 0x0F; + usleep(1); + ASSERT_EQ(skel->bss->g_output, 0x0F, "g_output #1"); + + skel->bss->callback_selector = 0xF0; + usleep(1); + ASSERT_EQ(skel->bss->g_output, 0xF0, "g_output #2"); + + bpf_link__destroy(link); +} + +static void check_stack(struct bpf_loop *skel) +{ + struct bpf_link *link = bpf_program__attach(skel->progs.stack_check); + const int max_key = 12; + int key; + int map_fd; + + if (!ASSERT_OK_PTR(link, "link")) + return; + + map_fd = bpf_map__fd(skel->maps.map1); + + if (!ASSERT_GE(map_fd, 0, "bpf_map__fd")) + goto out; + + for (key = 1; key <= max_key; ++key) { + int val = key; + int err = bpf_map_update_elem(map_fd, &key, &val, BPF_NOEXIST); + + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto out; + } + + usleep(1); + + for (key = 1; key <= max_key; ++key) { + int val; + int err = bpf_map_lookup_elem(map_fd, &key, &val); + + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) + goto out; + if (!ASSERT_EQ(val, key + 1, "bad value in the map")) + goto out; + } + +out: + bpf_link__destroy(link); +} + void test_bpf_loop(void) { struct bpf_loop *skel; @@ -140,6 +198,10 @@ void test_bpf_loop(void) check_invalid_flags(skel); if (test__start_subtest("check_nested_calls")) check_nested_calls(skel); + if (test__start_subtest("check_non_constant_callback")) + check_non_constant_callback(skel); + if (test__start_subtest("check_stack")) + check_stack(skel); bpf_loop__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index e9a9a31b2ffec..2959a52ced060 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -9,6 +9,9 @@ #include "bpf_cubic.skel.h" #include "bpf_tcp_nogpl.skel.h" #include "bpf_dctcp_release.skel.h" +#include "tcp_ca_write_sk_pacing.skel.h" +#include "tcp_ca_incompl_cong_ops.skel.h" +#include "tcp_ca_unsupp_cong_op.skel.h" #ifndef ENOTSUPP #define ENOTSUPP 524 @@ -322,6 +325,58 @@ static void test_rel_setsockopt(void) bpf_dctcp_release__destroy(rel_skel); } +static void test_write_sk_pacing(void) +{ + struct tcp_ca_write_sk_pacing *skel; + struct bpf_link *link; + + skel = tcp_ca_write_sk_pacing__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + link = bpf_map__attach_struct_ops(skel->maps.write_sk_pacing); + ASSERT_OK_PTR(link, "attach_struct_ops"); + + bpf_link__destroy(link); + tcp_ca_write_sk_pacing__destroy(skel); +} + +static void test_incompl_cong_ops(void) +{ + struct tcp_ca_incompl_cong_ops *skel; + struct bpf_link *link; + + skel = tcp_ca_incompl_cong_ops__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + /* That cong_avoid() and cong_control() are missing is only reported at + * this point: + */ + link = bpf_map__attach_struct_ops(skel->maps.incompl_cong_ops); + ASSERT_ERR_PTR(link, "attach_struct_ops"); + + bpf_link__destroy(link); + tcp_ca_incompl_cong_ops__destroy(skel); +} + +static void test_unsupp_cong_op(void) +{ + libbpf_print_fn_t old_print_fn; + struct tcp_ca_unsupp_cong_op *skel; + + err_str = "attach to unsupported member get_info"; + found = false; + old_print_fn = libbpf_set_print(libbpf_debug_print); + + skel = tcp_ca_unsupp_cong_op__open_and_load(); + ASSERT_NULL(skel, "open_and_load"); + ASSERT_EQ(found, true, "expected_err_msg"); + + tcp_ca_unsupp_cong_op__destroy(skel); + libbpf_set_print(old_print_fn); +} + void test_bpf_tcp_ca(void) { if (test__start_subtest("dctcp")) @@ -334,4 +389,10 @@ void test_bpf_tcp_ca(void) test_dctcp_fallback(); if (test__start_subtest("rel_setsockopt")) test_rel_setsockopt(); + if (test__start_subtest("write_sk_pacing")) + test_write_sk_pacing(); + if (test__start_subtest("incompl_cong_ops")) + test_incompl_cong_ops(); + if (test__start_subtest("unsupp_cong_op")) + test_unsupp_cong_op(); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index edb387163baa2..1fd792a92a1cd 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -34,7 +34,6 @@ static bool always_log; #undef CHECK #define CHECK(condition, format...) _CHECK(condition, "check", duration, format) -#define BTF_END_RAW 0xdeadbeef #define NAME_TBD 0xdeadb33f #define NAME_NTH(N) (0xfffe0000 | N) diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c index 9d211b5c22c41..7d23166c77af5 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c @@ -394,7 +394,6 @@ void serial_test_sock_fields(void) test(); done: - test_sock_fields__detach(skel); test_sock_fields__destroy(skel); if (child_cg_fd >= 0) close(child_cg_fd); diff --git a/tools/testing/selftests/bpf/progs/bpf_loop.c b/tools/testing/selftests/bpf/progs/bpf_loop.c index e08565282759c..de1fc82d27105 100644 --- a/tools/testing/selftests/bpf/progs/bpf_loop.c +++ b/tools/testing/selftests/bpf/progs/bpf_loop.c @@ -11,11 +11,19 @@ struct callback_ctx { int output; }; +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 32); + __type(key, int); + __type(value, int); +} map1 SEC(".maps"); + /* These should be set by the user program */ u32 nested_callback_nr_loops; u32 stop_index = -1; u32 nr_loops; int pid; +int callback_selector; /* Making these global variables so that the userspace program * can verify the output through the skeleton @@ -111,3 +119,109 @@ int prog_nested_calls(void *ctx) return 0; } + +static int callback_set_f0(int i, void *ctx) +{ + g_output = 0xF0; + return 0; +} + +static int callback_set_0f(int i, void *ctx) +{ + g_output = 0x0F; + return 0; +} + +/* + * non-constant callback is a corner case for bpf_loop inline logic + */ +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int prog_non_constant_callback(void *ctx) +{ + struct callback_ctx data = {}; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + int (*callback)(int i, void *ctx); + + g_output = 0; + + if (callback_selector == 0x0F) + callback = callback_set_0f; + else + callback = callback_set_f0; + + bpf_loop(1, callback, NULL, 0); + + return 0; +} + +static int stack_check_inner_callback(void *ctx) +{ + return 0; +} + +static int map1_lookup_elem(int key) +{ + int *val = bpf_map_lookup_elem(&map1, &key); + + return val ? *val : -1; +} + +static void map1_update_elem(int key, int val) +{ + bpf_map_update_elem(&map1, &key, &val, BPF_ANY); +} + +static int stack_check_outer_callback(void *ctx) +{ + int a = map1_lookup_elem(1); + int b = map1_lookup_elem(2); + int c = map1_lookup_elem(3); + int d = map1_lookup_elem(4); + int e = map1_lookup_elem(5); + int f = map1_lookup_elem(6); + + bpf_loop(1, stack_check_inner_callback, NULL, 0); + + map1_update_elem(1, a + 1); + map1_update_elem(2, b + 1); + map1_update_elem(3, c + 1); + map1_update_elem(4, d + 1); + map1_update_elem(5, e + 1); + map1_update_elem(6, f + 1); + + return 0; +} + +/* Some of the local variables in stack_check and + * stack_check_outer_callback would be allocated on stack by + * compiler. This test should verify that stack content for these + * variables is preserved between calls to bpf_loop (might be an issue + * if loop inlining allocates stack slots incorrectly). + */ +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int stack_check(void *ctx) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + int a = map1_lookup_elem(7); + int b = map1_lookup_elem(8); + int c = map1_lookup_elem(9); + int d = map1_lookup_elem(10); + int e = map1_lookup_elem(11); + int f = map1_lookup_elem(12); + + bpf_loop(1, stack_check_outer_callback, NULL, 0); + + map1_update_elem(7, a + 1); + map1_update_elem(8, b + 1); + map1_update_elem(9, c + 1); + map1_update_elem(10, d + 1); + map1_update_elem(11, e + 1); + map1_update_elem(12, f + 1); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/local_storage_bench.c b/tools/testing/selftests/bpf/progs/local_storage_bench.c new file mode 100644 index 0000000000000..2c3234c5b73a3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/local_storage_bench.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +#define HASHMAP_SZ 4194304 + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1000); + __type(key, int); + __type(value, int); + __array(values, struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); + }); +} array_of_local_storage_maps SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1000); + __type(key, int); + __type(value, int); + __array(values, struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, HASHMAP_SZ); + __type(key, int); + __type(value, int); + }); +} array_of_hash_maps SEC(".maps"); + +long important_hits; +long hits; + +/* set from user-space */ +const volatile unsigned int use_hashmap; +const volatile unsigned int hashmap_num_keys; +const volatile unsigned int num_maps; +const volatile unsigned int interleave; + +struct loop_ctx { + struct task_struct *task; + long loop_hits; + long loop_important_hits; +}; + +static int do_lookup(unsigned int elem, struct loop_ctx *lctx) +{ + void *map, *inner_map; + int idx = 0; + + if (use_hashmap) + map = &array_of_hash_maps; + else + map = &array_of_local_storage_maps; + + inner_map = bpf_map_lookup_elem(map, &elem); + if (!inner_map) + return -1; + + if (use_hashmap) { + idx = bpf_get_prandom_u32() % hashmap_num_keys; + bpf_map_lookup_elem(inner_map, &idx); + } else { + bpf_task_storage_get(inner_map, lctx->task, &idx, + BPF_LOCAL_STORAGE_GET_F_CREATE); + } + + lctx->loop_hits++; + if (!elem) + lctx->loop_important_hits++; + return 0; +} + +static long loop(u32 index, void *ctx) +{ + struct loop_ctx *lctx = (struct loop_ctx *)ctx; + unsigned int map_idx = index % num_maps; + + do_lookup(map_idx, lctx); + if (interleave && map_idx % 3 == 0) + do_lookup(0, lctx); + return 0; +} + +SEC("fentry/" SYS_PREFIX "sys_getpgid") +int get_local(void *ctx) +{ + struct loop_ctx lctx; + + lctx.task = bpf_get_current_task_btf(); + lctx.loop_hits = 0; + lctx.loop_important_hits = 0; + bpf_loop(10000, &loop, &lctx, 0); + __sync_add_and_fetch(&hits, lctx.loop_hits); + __sync_add_and_fetch(&important_hits, lctx.loop_important_hits); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c b/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c new file mode 100644 index 0000000000000..7bb872fb22dd7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +static inline struct tcp_sock *tcp_sk(const struct sock *sk) +{ + return (struct tcp_sock *)sk; +} + +SEC("struct_ops/incompl_cong_ops_ssthresh") +__u32 BPF_PROG(incompl_cong_ops_ssthresh, struct sock *sk) +{ + return tcp_sk(sk)->snd_ssthresh; +} + +SEC("struct_ops/incompl_cong_ops_undo_cwnd") +__u32 BPF_PROG(incompl_cong_ops_undo_cwnd, struct sock *sk) +{ + return tcp_sk(sk)->snd_cwnd; +} + +SEC(".struct_ops") +struct tcp_congestion_ops incompl_cong_ops = { + /* Intentionally leaving out any of the required cong_avoid() and + * cong_control() here. + */ + .ssthresh = (void *)incompl_cong_ops_ssthresh, + .undo_cwnd = (void *)incompl_cong_ops_undo_cwnd, + .name = "bpf_incompl_ops", +}; diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c b/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c new file mode 100644 index 0000000000000..c06f4a41c21ab --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +SEC("struct_ops/unsupp_cong_op_get_info") +size_t BPF_PROG(unsupp_cong_op_get_info, struct sock *sk, u32 ext, int *attr, + union tcp_cc_info *info) +{ + return 0; +} + +SEC(".struct_ops") +struct tcp_congestion_ops unsupp_cong_op = { + .get_info = (void *)unsupp_cong_op_get_info, + .name = "bpf_unsupp_op", +}; diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c new file mode 100644 index 0000000000000..43447704cf0e6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +#define USEC_PER_SEC 1000000UL + +#define min(a, b) ((a) < (b) ? (a) : (b)) + +static inline struct tcp_sock *tcp_sk(const struct sock *sk) +{ + return (struct tcp_sock *)sk; +} + +SEC("struct_ops/write_sk_pacing_init") +void BPF_PROG(write_sk_pacing_init, struct sock *sk) +{ +#ifdef ENABLE_ATOMICS_TESTS + __sync_bool_compare_and_swap(&sk->sk_pacing_status, SK_PACING_NONE, + SK_PACING_NEEDED); +#else + sk->sk_pacing_status = SK_PACING_NEEDED; +#endif +} + +SEC("struct_ops/write_sk_pacing_cong_control") +void BPF_PROG(write_sk_pacing_cong_control, struct sock *sk, + const struct rate_sample *rs) +{ + const struct tcp_sock *tp = tcp_sk(sk); + unsigned long rate = + ((tp->snd_cwnd * tp->mss_cache * USEC_PER_SEC) << 3) / + (tp->srtt_us ?: 1U << 3); + sk->sk_pacing_rate = min(rate, sk->sk_max_pacing_rate); +} + +SEC("struct_ops/write_sk_pacing_ssthresh") +__u32 BPF_PROG(write_sk_pacing_ssthresh, struct sock *sk) +{ + return tcp_sk(sk)->snd_ssthresh; +} + +SEC("struct_ops/write_sk_pacing_undo_cwnd") +__u32 BPF_PROG(write_sk_pacing_undo_cwnd, struct sock *sk) +{ + return tcp_sk(sk)->snd_cwnd; +} + +SEC(".struct_ops") +struct tcp_congestion_ops write_sk_pacing = { + .init = (void *)write_sk_pacing_init, + .cong_control = (void *)write_sk_pacing_cong_control, + .ssthresh = (void *)write_sk_pacing_ssthresh, + .undo_cwnd = (void *)write_sk_pacing_undo_cwnd, + .name = "bpf_w_sk_pacing", +}; diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h index 38782bd47fdca..fb4f4714eeb41 100644 --- a/tools/testing/selftests/bpf/test_btf.h +++ b/tools/testing/selftests/bpf/test_btf.h @@ -4,6 +4,8 @@ #ifndef _TEST_BTF_H #define _TEST_BTF_H +#define BTF_END_RAW 0xdeadbeef + #define BTF_INFO_ENC(kind, kind_flag, vlen) \ ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 372579c9f45e7..f9d553fbf68a3 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -51,12 +51,24 @@ #endif #define MAX_INSNS BPF_MAXINSNS +#define MAX_EXPECTED_INSNS 32 +#define MAX_UNEXPECTED_INSNS 32 #define MAX_TEST_INSNS 1000000 #define MAX_FIXUPS 8 #define MAX_NR_MAPS 23 #define MAX_TEST_RUNS 8 #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 +#define MAX_FUNC_INFOS 8 +#define MAX_BTF_STRINGS 256 +#define MAX_BTF_TYPES 256 + +#define INSN_OFF_MASK ((__s16)0xFFFF) +#define INSN_IMM_MASK ((__s32)0xFFFFFFFF) +#define SKIP_INSNS() BPF_RAW_INSN(0xde, 0xa, 0xd, 0xbeef, 0xdeadbeef) + +#define DEFAULT_LIBBPF_LOG_LEVEL 4 +#define VERBOSE_LIBBPF_LOG_LEVEL 1 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0) #define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1) @@ -79,6 +91,23 @@ struct bpf_test { const char *descr; struct bpf_insn insns[MAX_INSNS]; struct bpf_insn *fill_insns; + /* If specified, test engine looks for this sequence of + * instructions in the BPF program after loading. Allows to + * test rewrites applied by verifier. Use values + * INSN_OFF_MASK and INSN_IMM_MASK to mask `off` and `imm` + * fields if content does not matter. The test case fails if + * specified instructions are not found. + * + * The sequence could be split into sub-sequences by adding + * SKIP_INSNS instruction at the end of each sub-sequence. In + * such case sub-sequences are searched for one after another. + */ + struct bpf_insn expected_insns[MAX_EXPECTED_INSNS]; + /* If specified, test engine applies same pattern matching + * logic as for `expected_insns`. If the specified pattern is + * matched test case is marked as failed. + */ + struct bpf_insn unexpected_insns[MAX_UNEXPECTED_INSNS]; int fixup_map_hash_8b[MAX_FIXUPS]; int fixup_map_hash_48b[MAX_FIXUPS]; int fixup_map_hash_16b[MAX_FIXUPS]; @@ -135,6 +164,14 @@ struct bpf_test { }; enum bpf_attach_type expected_attach_type; const char *kfunc; + struct bpf_func_info func_info[MAX_FUNC_INFOS]; + int func_info_cnt; + char btf_strings[MAX_BTF_STRINGS]; + /* A set of BTF types to load when specified, + * use macro definitions from test_btf.h, + * must end with BTF_END_RAW + */ + __u32 btf_types[MAX_BTF_TYPES]; }; /* Note we want this to be 64 bit aligned so that the end of our array is @@ -388,6 +425,45 @@ static void bpf_fill_torturous_jumps(struct bpf_test *self) } } +static void bpf_fill_big_prog_with_loop_1(struct bpf_test *self) +{ + struct bpf_insn *insn = self->fill_insns; + /* This test was added to catch a specific use after free + * error, which happened upon BPF program reallocation. + * Reallocation is handled by core.c:bpf_prog_realloc, which + * reuses old memory if page boundary is not crossed. The + * value of `len` is chosen to cross this boundary on bpf_loop + * patching. + */ + const int len = getpagesize() - 25; + int callback_load_idx; + int callback_idx; + int i = 0; + + insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1); + callback_load_idx = i; + insn[i++] = BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, + BPF_REG_2, BPF_PSEUDO_FUNC, 0, + 777 /* filled below */); + insn[i++] = BPF_RAW_INSN(0, 0, 0, 0, 0); + insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0); + insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0); + insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop); + + while (i < len - 3) + insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0); + insn[i++] = BPF_EXIT_INSN(); + + callback_idx = i; + insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0); + insn[i++] = BPF_EXIT_INSN(); + + insn[callback_load_idx].imm = callback_idx - callback_load_idx - 1; + self->func_info[1].insn_off = callback_idx; + self->prog_len = i; + assert(i == len); +} + /* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */ #define BPF_SK_LOOKUP(func) \ /* struct bpf_sock_tuple tuple = {} */ \ @@ -664,34 +740,66 @@ static __u32 btf_raw_types[] = { BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr_ref *ptr; */ }; -static int load_btf(void) +static char bpf_vlog[UINT_MAX >> 8]; + +static int load_btf_spec(__u32 *types, int types_len, + const char *strings, int strings_len) { struct btf_header hdr = { .magic = BTF_MAGIC, .version = BTF_VERSION, .hdr_len = sizeof(struct btf_header), - .type_len = sizeof(btf_raw_types), - .str_off = sizeof(btf_raw_types), - .str_len = sizeof(btf_str_sec), + .type_len = types_len, + .str_off = types_len, + .str_len = strings_len, }; void *ptr, *raw_btf; int btf_fd; + LIBBPF_OPTS(bpf_btf_load_opts, opts, + .log_buf = bpf_vlog, + .log_size = sizeof(bpf_vlog), + .log_level = (verbose + ? VERBOSE_LIBBPF_LOG_LEVEL + : DEFAULT_LIBBPF_LOG_LEVEL), + ); - ptr = raw_btf = malloc(sizeof(hdr) + sizeof(btf_raw_types) + - sizeof(btf_str_sec)); + raw_btf = malloc(sizeof(hdr) + types_len + strings_len); + ptr = raw_btf; memcpy(ptr, &hdr, sizeof(hdr)); ptr += sizeof(hdr); - memcpy(ptr, btf_raw_types, hdr.type_len); + memcpy(ptr, types, hdr.type_len); ptr += hdr.type_len; - memcpy(ptr, btf_str_sec, hdr.str_len); + memcpy(ptr, strings, hdr.str_len); ptr += hdr.str_len; - btf_fd = bpf_btf_load(raw_btf, ptr - raw_btf, NULL); - free(raw_btf); + btf_fd = bpf_btf_load(raw_btf, ptr - raw_btf, &opts); if (btf_fd < 0) - return -1; - return btf_fd; + printf("Failed to load BTF spec: '%s'\n", strerror(errno)); + + free(raw_btf); + + return btf_fd < 0 ? -1 : btf_fd; +} + +static int load_btf(void) +{ + return load_btf_spec(btf_raw_types, sizeof(btf_raw_types), + btf_str_sec, sizeof(btf_str_sec)); +} + +static int load_btf_for_test(struct bpf_test *test) +{ + int types_num = 0; + + while (types_num < MAX_BTF_TYPES && + test->btf_types[types_num] != BTF_END_RAW) + ++types_num; + + int types_len = types_num * sizeof(test->btf_types[0]); + + return load_btf_spec(test->btf_types, types_len, + test->btf_strings, sizeof(test->btf_strings)); } static int create_map_spin_lock(void) @@ -770,8 +878,6 @@ static int create_map_kptr(void) return fd; } -static char bpf_vlog[UINT_MAX >> 8]; - static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, struct bpf_insn *prog, int *map_fds) { @@ -1126,10 +1232,218 @@ static bool cmp_str_seq(const char *log, const char *exp) return true; } +static int get_xlated_program(int fd_prog, struct bpf_insn **buf, int *cnt) +{ + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + __u32 xlated_prog_len; + __u32 buf_element_size = sizeof(struct bpf_insn); + + if (bpf_obj_get_info_by_fd(fd_prog, &info, &info_len)) { + perror("bpf_obj_get_info_by_fd failed"); + return -1; + } + + xlated_prog_len = info.xlated_prog_len; + if (xlated_prog_len % buf_element_size) { + printf("Program length %d is not multiple of %d\n", + xlated_prog_len, buf_element_size); + return -1; + } + + *cnt = xlated_prog_len / buf_element_size; + *buf = calloc(*cnt, buf_element_size); + if (!buf) { + perror("can't allocate xlated program buffer"); + return -ENOMEM; + } + + bzero(&info, sizeof(info)); + info.xlated_prog_len = xlated_prog_len; + info.xlated_prog_insns = (__u64)*buf; + if (bpf_obj_get_info_by_fd(fd_prog, &info, &info_len)) { + perror("second bpf_obj_get_info_by_fd failed"); + goto out_free_buf; + } + + return 0; + +out_free_buf: + free(*buf); + return -1; +} + +static bool is_null_insn(struct bpf_insn *insn) +{ + struct bpf_insn null_insn = {}; + + return memcmp(insn, &null_insn, sizeof(null_insn)) == 0; +} + +static bool is_skip_insn(struct bpf_insn *insn) +{ + struct bpf_insn skip_insn = SKIP_INSNS(); + + return memcmp(insn, &skip_insn, sizeof(skip_insn)) == 0; +} + +static int null_terminated_insn_len(struct bpf_insn *seq, int max_len) +{ + int i; + + for (i = 0; i < max_len; ++i) { + if (is_null_insn(&seq[i])) + return i; + } + return max_len; +} + +static bool compare_masked_insn(struct bpf_insn *orig, struct bpf_insn *masked) +{ + struct bpf_insn orig_masked; + + memcpy(&orig_masked, orig, sizeof(orig_masked)); + if (masked->imm == INSN_IMM_MASK) + orig_masked.imm = INSN_IMM_MASK; + if (masked->off == INSN_OFF_MASK) + orig_masked.off = INSN_OFF_MASK; + + return memcmp(&orig_masked, masked, sizeof(orig_masked)) == 0; +} + +static int find_insn_subseq(struct bpf_insn *seq, struct bpf_insn *subseq, + int seq_len, int subseq_len) +{ + int i, j; + + if (subseq_len > seq_len) + return -1; + + for (i = 0; i < seq_len - subseq_len + 1; ++i) { + bool found = true; + + for (j = 0; j < subseq_len; ++j) { + if (!compare_masked_insn(&seq[i + j], &subseq[j])) { + found = false; + break; + } + } + if (found) + return i; + } + + return -1; +} + +static int find_skip_insn_marker(struct bpf_insn *seq, int len) +{ + int i; + + for (i = 0; i < len; ++i) + if (is_skip_insn(&seq[i])) + return i; + + return -1; +} + +/* Return true if all sub-sequences in `subseqs` could be found in + * `seq` one after another. Sub-sequences are separated by a single + * nil instruction. + */ +static bool find_all_insn_subseqs(struct bpf_insn *seq, struct bpf_insn *subseqs, + int seq_len, int max_subseqs_len) +{ + int subseqs_len = null_terminated_insn_len(subseqs, max_subseqs_len); + + while (subseqs_len > 0) { + int skip_idx = find_skip_insn_marker(subseqs, subseqs_len); + int cur_subseq_len = skip_idx < 0 ? subseqs_len : skip_idx; + int subseq_idx = find_insn_subseq(seq, subseqs, + seq_len, cur_subseq_len); + + if (subseq_idx < 0) + return false; + seq += subseq_idx + cur_subseq_len; + seq_len -= subseq_idx + cur_subseq_len; + subseqs += cur_subseq_len + 1; + subseqs_len -= cur_subseq_len + 1; + } + + return true; +} + +static void print_insn(struct bpf_insn *buf, int cnt) +{ + int i; + + printf(" addr op d s off imm\n"); + for (i = 0; i < cnt; ++i) { + struct bpf_insn *insn = &buf[i]; + + if (is_null_insn(insn)) + break; + + if (is_skip_insn(insn)) + printf(" ...\n"); + else + printf(" %04x: %02x %1x %x %04hx %08x\n", + i, insn->code, insn->dst_reg, + insn->src_reg, insn->off, insn->imm); + } +} + +static bool check_xlated_program(struct bpf_test *test, int fd_prog) +{ + struct bpf_insn *buf; + int cnt; + bool result = true; + bool check_expected = !is_null_insn(test->expected_insns); + bool check_unexpected = !is_null_insn(test->unexpected_insns); + + if (!check_expected && !check_unexpected) + goto out; + + if (get_xlated_program(fd_prog, &buf, &cnt)) { + printf("FAIL: can't get xlated program\n"); + result = false; + goto out; + } + + if (check_expected && + !find_all_insn_subseqs(buf, test->expected_insns, + cnt, MAX_EXPECTED_INSNS)) { + printf("FAIL: can't find expected subsequence of instructions\n"); + result = false; + if (verbose) { + printf("Program:\n"); + print_insn(buf, cnt); + printf("Expected subsequence:\n"); + print_insn(test->expected_insns, MAX_EXPECTED_INSNS); + } + } + + if (check_unexpected && + find_all_insn_subseqs(buf, test->unexpected_insns, + cnt, MAX_UNEXPECTED_INSNS)) { + printf("FAIL: found unexpected subsequence of instructions\n"); + result = false; + if (verbose) { + printf("Program:\n"); + print_insn(buf, cnt); + printf("Un-expected subsequence:\n"); + print_insn(test->unexpected_insns, MAX_UNEXPECTED_INSNS); + } + } + + free(buf); + out: + return result; +} + static void do_test_single(struct bpf_test *test, bool unpriv, int *passes, int *errors) { - int fd_prog, expected_ret, alignment_prevented_execution; + int fd_prog, btf_fd, expected_ret, alignment_prevented_execution; int prog_len, prog_type = test->prog_type; struct bpf_insn *prog = test->insns; LIBBPF_OPTS(bpf_prog_load_opts, opts); @@ -1141,8 +1455,10 @@ static void do_test_single(struct bpf_test *test, bool unpriv, __u32 pflags; int i, err; + fd_prog = -1; for (i = 0; i < MAX_NR_MAPS; i++) map_fds[i] = -1; + btf_fd = -1; if (!prog_type) prog_type = BPF_PROG_TYPE_SOCKET_FILTER; @@ -1175,11 +1491,11 @@ static void do_test_single(struct bpf_test *test, bool unpriv, opts.expected_attach_type = test->expected_attach_type; if (verbose) - opts.log_level = 1; + opts.log_level = VERBOSE_LIBBPF_LOG_LEVEL; else if (expected_ret == VERBOSE_ACCEPT) opts.log_level = 2; else - opts.log_level = 4; + opts.log_level = DEFAULT_LIBBPF_LOG_LEVEL; opts.prog_flags = pflags; if (prog_type == BPF_PROG_TYPE_TRACING && test->kfunc) { @@ -1197,6 +1513,19 @@ static void do_test_single(struct bpf_test *test, bool unpriv, opts.attach_btf_id = attach_btf_id; } + if (test->btf_types[0] != 0) { + btf_fd = load_btf_for_test(test); + if (btf_fd < 0) + goto fail_log; + opts.prog_btf_fd = btf_fd; + } + + if (test->func_info_cnt != 0) { + opts.func_info = test->func_info; + opts.func_info_cnt = test->func_info_cnt; + opts.func_info_rec_size = sizeof(test->func_info[0]); + } + opts.log_buf = bpf_vlog; opts.log_size = sizeof(bpf_vlog); fd_prog = bpf_prog_load(prog_type, NULL, "GPL", prog, prog_len, &opts); @@ -1262,6 +1591,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv, if (verbose) printf(", verifier log:\n%s", bpf_vlog); + if (!check_xlated_program(test, fd_prog)) + goto fail_log; + run_errs = 0; run_successes = 0; if (!alignment_prevented_execution && fd_prog >= 0 && test->runs >= 0) { @@ -1305,6 +1637,7 @@ close_fds: if (test->fill_insns) free(test->fill_insns); close(fd_prog); + close(btf_fd); for (i = 0; i < MAX_NR_MAPS; i++) close(map_fds[i]); sched_yield(); diff --git a/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c new file mode 100644 index 0000000000000..2d0023659d884 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c @@ -0,0 +1,263 @@ +#define BTF_TYPES \ + .btf_strings = "\0int\0i\0ctx\0callback\0main\0", \ + .btf_types = { \ + /* 1: int */ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), \ + /* 2: int* */ BTF_PTR_ENC(1), \ + /* 3: void* */ BTF_PTR_ENC(0), \ + /* 4: int __(void*) */ BTF_FUNC_PROTO_ENC(1, 1), \ + BTF_FUNC_PROTO_ARG_ENC(7, 3), \ + /* 5: int __(int, int*) */ BTF_FUNC_PROTO_ENC(1, 2), \ + BTF_FUNC_PROTO_ARG_ENC(5, 1), \ + BTF_FUNC_PROTO_ARG_ENC(7, 2), \ + /* 6: main */ BTF_FUNC_ENC(20, 4), \ + /* 7: callback */ BTF_FUNC_ENC(11, 5), \ + BTF_END_RAW \ + } + +#define MAIN_TYPE 6 +#define CALLBACK_TYPE 7 + +/* can't use BPF_CALL_REL, jit_subprogs adjusts IMM & OFF + * fields for pseudo calls + */ +#define PSEUDO_CALL_INSN() \ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_CALL, \ + INSN_OFF_MASK, INSN_IMM_MASK) + +/* can't use BPF_FUNC_loop constant, + * do_mix_fixups adjusts the IMM field + */ +#define HELPER_CALL_INSN() \ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, INSN_OFF_MASK, INSN_IMM_MASK) + +{ + "inline simple bpf_loop call", + .insns = { + /* main */ + /* force verifier state branching to verify logic on first and + * subsequent bpf_loop insn processing steps + */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 777, 2), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2), + + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 6), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* callback */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_insns = { PSEUDO_CALL_INSN() }, + .unexpected_insns = { HELPER_CALL_INSN() }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + .runs = 0, + .func_info = { { 0, MAIN_TYPE }, { 12, CALLBACK_TYPE } }, + .func_info_cnt = 2, + BTF_TYPES +}, +{ + "don't inline bpf_loop call, flags non-zero", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_ALU64_REG(BPF_MOV, BPF_REG_7, BPF_REG_0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 9), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 7), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -10), + /* callback */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_insns = { HELPER_CALL_INSN() }, + .unexpected_insns = { PSEUDO_CALL_INSN() }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + .runs = 0, + .func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } }, + .func_info_cnt = 2, + BTF_TYPES +}, +{ + "don't inline bpf_loop call, callback non-constant", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 777, 4), /* pick a random callback */ + + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 10), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 8), + BPF_RAW_INSN(0, 0, 0, 0, 0), + + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* callback */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* callback #2 */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_insns = { HELPER_CALL_INSN() }, + .unexpected_insns = { PSEUDO_CALL_INSN() }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + .runs = 0, + .func_info = { + { 0, MAIN_TYPE }, + { 14, CALLBACK_TYPE }, + { 16, CALLBACK_TYPE } + }, + .func_info_cnt = 3, + BTF_TYPES +}, +{ + "bpf_loop_inline and a dead func", + .insns = { + /* main */ + + /* A reference to callback #1 to make verifier count it as a func. + * This reference is overwritten below and callback #1 is dead. + */ + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 9), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 8), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* callback */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* callback #2 */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_insns = { PSEUDO_CALL_INSN() }, + .unexpected_insns = { HELPER_CALL_INSN() }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + .runs = 0, + .func_info = { + { 0, MAIN_TYPE }, + { 10, CALLBACK_TYPE }, + { 12, CALLBACK_TYPE } + }, + .func_info_cnt = 3, + BTF_TYPES +}, +{ + "bpf_loop_inline stack locations for loop vars", + .insns = { + /* main */ + BPF_ST_MEM(BPF_W, BPF_REG_10, -12, 0x77), + /* bpf_loop call #1 */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 22), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + /* bpf_loop call #2 */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 16), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + /* call func and exit */ + BPF_CALL_REL(2), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* func */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -32, 0x55), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 6), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* callback */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -12, 0x77), + SKIP_INSNS(), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -40), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -32), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -24), + SKIP_INSNS(), + /* offsets are the same as in the first call */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -40), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -32), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -24), + SKIP_INSNS(), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -32, 0x55), + SKIP_INSNS(), + /* offsets differ from main because of different offset + * in BPF_ST_MEM instruction + */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -56), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -48), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -40), + }, + .unexpected_insns = { HELPER_CALL_INSN() }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + .func_info = { + { 0, MAIN_TYPE }, + { 16, MAIN_TYPE }, + { 25, CALLBACK_TYPE }, + }, + .func_info_cnt = 3, + BTF_TYPES +}, +{ + "inline bpf_loop call in a big program", + .insns = {}, + .fill_helper = bpf_fill_big_prog_with_loop_1, + .expected_insns = { PSEUDO_CALL_INSN() }, + .unexpected_insns = { HELPER_CALL_INSN() }, + .result = ACCEPT, + .func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } }, + .func_info_cnt = 2, + BTF_TYPES +}, + +#undef HELPER_CALL_INSN +#undef PSEUDO_CALL_INSN +#undef CALLBACK_TYPE +#undef MAIN_TYPE +#undef BTF_TYPES |
