1  // SPDX-License-Identifier: GPL-2.0-or-later
2  /*
3   * Linux Socket Filter - Kernel level socket filtering
4   *
5   * Based on the design of the Berkeley Packet Filter. The new
6   * internal format has been designed by PLUMgrid:
7   *
8   *	Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
9   *
10   * Authors:
11   *
12   *	Jay Schulist <jschlst@samba.org>
13   *	Alexei Starovoitov <ast@plumgrid.com>
14   *	Daniel Borkmann <dborkman@redhat.com>
15   *
16   * Andi Kleen - Fix a few bad bugs and races.
17   * Kris Katterjohn - Added many additional checks in bpf_check_classic()
18   */
19  
20  #include <uapi/linux/btf.h>
21  #include <linux/filter.h>
22  #include <linux/skbuff.h>
23  #include <linux/vmalloc.h>
24  #include <linux/random.h>
25  #include <linux/moduleloader.h>
26  #include <linux/bpf.h>
27  #include <linux/btf.h>
28  #include <linux/objtool.h>
29  #include <linux/rbtree_latch.h>
30  #include <linux/kallsyms.h>
31  #include <linux/rcupdate.h>
32  #include <linux/perf_event.h>
33  #include <linux/extable.h>
34  #include <linux/log2.h>
35  #include <linux/bpf_verifier.h>
36  #include <linux/nodemask.h>
37  #include <linux/nospec.h>
38  #include <linux/bpf_mem_alloc.h>
39  #include <linux/memcontrol.h>
40  
41  #include <asm/barrier.h>
42  #include <asm/unaligned.h>
43  
44  /* Registers */
45  #define BPF_R0	regs[BPF_REG_0]
46  #define BPF_R1	regs[BPF_REG_1]
47  #define BPF_R2	regs[BPF_REG_2]
48  #define BPF_R3	regs[BPF_REG_3]
49  #define BPF_R4	regs[BPF_REG_4]
50  #define BPF_R5	regs[BPF_REG_5]
51  #define BPF_R6	regs[BPF_REG_6]
52  #define BPF_R7	regs[BPF_REG_7]
53  #define BPF_R8	regs[BPF_REG_8]
54  #define BPF_R9	regs[BPF_REG_9]
55  #define BPF_R10	regs[BPF_REG_10]
56  
57  /* Named registers */
58  #define DST	regs[insn->dst_reg]
59  #define SRC	regs[insn->src_reg]
60  #define FP	regs[BPF_REG_FP]
61  #define AX	regs[BPF_REG_AX]
62  #define ARG1	regs[BPF_REG_ARG1]
63  #define CTX	regs[BPF_REG_CTX]
64  #define IMM	insn->imm
65  
66  struct bpf_mem_alloc bpf_global_ma;
67  bool bpf_global_ma_set;
68  
69  /* No hurry in this branch
70   *
71   * Exported for the bpf jit load helper.
72   */
bpf_internal_load_pointer_neg_helper(const struct sk_buff * skb,int k,unsigned int size)73  void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
74  {
75  	u8 *ptr = NULL;
76  
77  	if (k >= SKF_NET_OFF) {
78  		ptr = skb_network_header(skb) + k - SKF_NET_OFF;
79  	} else if (k >= SKF_LL_OFF) {
80  		if (unlikely(!skb_mac_header_was_set(skb)))
81  			return NULL;
82  		ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
83  	}
84  	if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
85  		return ptr;
86  
87  	return NULL;
88  }
89  
bpf_prog_alloc_no_stats(unsigned int size,gfp_t gfp_extra_flags)90  struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags)
91  {
92  	gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags);
93  	struct bpf_prog_aux *aux;
94  	struct bpf_prog *fp;
95  
96  	size = round_up(size, PAGE_SIZE);
97  	fp = __vmalloc(size, gfp_flags);
98  	if (fp == NULL)
99  		return NULL;
100  
101  	aux = kzalloc(sizeof(*aux), bpf_memcg_flags(GFP_KERNEL | gfp_extra_flags));
102  	if (aux == NULL) {
103  		vfree(fp);
104  		return NULL;
105  	}
106  	fp->active = alloc_percpu_gfp(int, bpf_memcg_flags(GFP_KERNEL | gfp_extra_flags));
107  	if (!fp->active) {
108  		vfree(fp);
109  		kfree(aux);
110  		return NULL;
111  	}
112  
113  	fp->pages = size / PAGE_SIZE;
114  	fp->aux = aux;
115  	fp->aux->prog = fp;
116  	fp->jit_requested = ebpf_jit_enabled();
117  	fp->blinding_requested = bpf_jit_blinding_enabled(fp);
118  #ifdef CONFIG_CGROUP_BPF
119  	aux->cgroup_atype = CGROUP_BPF_ATTACH_TYPE_INVALID;
120  #endif
121  
122  	INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode);
123  	mutex_init(&fp->aux->used_maps_mutex);
124  	mutex_init(&fp->aux->dst_mutex);
125  
126  	return fp;
127  }
128  
bpf_prog_alloc(unsigned int size,gfp_t gfp_extra_flags)129  struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
130  {
131  	gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags);
132  	struct bpf_prog *prog;
133  	int cpu;
134  
135  	prog = bpf_prog_alloc_no_stats(size, gfp_extra_flags);
136  	if (!prog)
137  		return NULL;
138  
139  	prog->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
140  	if (!prog->stats) {
141  		free_percpu(prog->active);
142  		kfree(prog->aux);
143  		vfree(prog);
144  		return NULL;
145  	}
146  
147  	for_each_possible_cpu(cpu) {
148  		struct bpf_prog_stats *pstats;
149  
150  		pstats = per_cpu_ptr(prog->stats, cpu);
151  		u64_stats_init(&pstats->syncp);
152  	}
153  	return prog;
154  }
155  EXPORT_SYMBOL_GPL(bpf_prog_alloc);
156  
bpf_prog_alloc_jited_linfo(struct bpf_prog * prog)157  int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog)
158  {
159  	if (!prog->aux->nr_linfo || !prog->jit_requested)
160  		return 0;
161  
162  	prog->aux->jited_linfo = kvcalloc(prog->aux->nr_linfo,
163  					  sizeof(*prog->aux->jited_linfo),
164  					  bpf_memcg_flags(GFP_KERNEL | __GFP_NOWARN));
165  	if (!prog->aux->jited_linfo)
166  		return -ENOMEM;
167  
168  	return 0;
169  }
170  
bpf_prog_jit_attempt_done(struct bpf_prog * prog)171  void bpf_prog_jit_attempt_done(struct bpf_prog *prog)
172  {
173  	if (prog->aux->jited_linfo &&
174  	    (!prog->jited || !prog->aux->jited_linfo[0])) {
175  		kvfree(prog->aux->jited_linfo);
176  		prog->aux->jited_linfo = NULL;
177  	}
178  
179  	kfree(prog->aux->kfunc_tab);
180  	prog->aux->kfunc_tab = NULL;
181  }
182  
183  /* The jit engine is responsible to provide an array
184   * for insn_off to the jited_off mapping (insn_to_jit_off).
185   *
186   * The idx to this array is the insn_off.  Hence, the insn_off
187   * here is relative to the prog itself instead of the main prog.
188   * This array has one entry for each xlated bpf insn.
189   *
190   * jited_off is the byte off to the end of the jited insn.
191   *
192   * Hence, with
193   * insn_start:
194   *      The first bpf insn off of the prog.  The insn off
195   *      here is relative to the main prog.
196   *      e.g. if prog is a subprog, insn_start > 0
197   * linfo_idx:
198   *      The prog's idx to prog->aux->linfo and jited_linfo
199   *
200   * jited_linfo[linfo_idx] = prog->bpf_func
201   *
202   * For i > linfo_idx,
203   *
204   * jited_linfo[i] = prog->bpf_func +
205   *	insn_to_jit_off[linfo[i].insn_off - insn_start - 1]
206   */
bpf_prog_fill_jited_linfo(struct bpf_prog * prog,const u32 * insn_to_jit_off)207  void bpf_prog_fill_jited_linfo(struct bpf_prog *prog,
208  			       const u32 *insn_to_jit_off)
209  {
210  	u32 linfo_idx, insn_start, insn_end, nr_linfo, i;
211  	const struct bpf_line_info *linfo;
212  	void **jited_linfo;
213  
214  	if (!prog->aux->jited_linfo)
215  		/* Userspace did not provide linfo */
216  		return;
217  
218  	linfo_idx = prog->aux->linfo_idx;
219  	linfo = &prog->aux->linfo[linfo_idx];
220  	insn_start = linfo[0].insn_off;
221  	insn_end = insn_start + prog->len;
222  
223  	jited_linfo = &prog->aux->jited_linfo[linfo_idx];
224  	jited_linfo[0] = prog->bpf_func;
225  
226  	nr_linfo = prog->aux->nr_linfo - linfo_idx;
227  
228  	for (i = 1; i < nr_linfo && linfo[i].insn_off < insn_end; i++)
229  		/* The verifier ensures that linfo[i].insn_off is
230  		 * strictly increasing
231  		 */
232  		jited_linfo[i] = prog->bpf_func +
233  			insn_to_jit_off[linfo[i].insn_off - insn_start - 1];
234  }
235  
bpf_prog_realloc(struct bpf_prog * fp_old,unsigned int size,gfp_t gfp_extra_flags)236  struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
237  				  gfp_t gfp_extra_flags)
238  {
239  	gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags);
240  	struct bpf_prog *fp;
241  	u32 pages;
242  
243  	size = round_up(size, PAGE_SIZE);
244  	pages = size / PAGE_SIZE;
245  	if (pages <= fp_old->pages)
246  		return fp_old;
247  
248  	fp = __vmalloc(size, gfp_flags);
249  	if (fp) {
250  		memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
251  		fp->pages = pages;
252  		fp->aux->prog = fp;
253  
254  		/* We keep fp->aux from fp_old around in the new
255  		 * reallocated structure.
256  		 */
257  		fp_old->aux = NULL;
258  		fp_old->stats = NULL;
259  		fp_old->active = NULL;
260  		__bpf_prog_free(fp_old);
261  	}
262  
263  	return fp;
264  }
265  
__bpf_prog_free(struct bpf_prog * fp)266  void __bpf_prog_free(struct bpf_prog *fp)
267  {
268  	if (fp->aux) {
269  		mutex_destroy(&fp->aux->used_maps_mutex);
270  		mutex_destroy(&fp->aux->dst_mutex);
271  		kfree(fp->aux->poke_tab);
272  		kfree(fp->aux);
273  	}
274  	free_percpu(fp->stats);
275  	free_percpu(fp->active);
276  	vfree(fp);
277  }
278  
bpf_prog_calc_tag(struct bpf_prog * fp)279  int bpf_prog_calc_tag(struct bpf_prog *fp)
280  {
281  	const u32 bits_offset = SHA1_BLOCK_SIZE - sizeof(__be64);
282  	u32 raw_size = bpf_prog_tag_scratch_size(fp);
283  	u32 digest[SHA1_DIGEST_WORDS];
284  	u32 ws[SHA1_WORKSPACE_WORDS];
285  	u32 i, bsize, psize, blocks;
286  	struct bpf_insn *dst;
287  	bool was_ld_map;
288  	u8 *raw, *todo;
289  	__be32 *result;
290  	__be64 *bits;
291  
292  	raw = vmalloc(raw_size);
293  	if (!raw)
294  		return -ENOMEM;
295  
296  	sha1_init(digest);
297  	memset(ws, 0, sizeof(ws));
298  
299  	/* We need to take out the map fd for the digest calculation
300  	 * since they are unstable from user space side.
301  	 */
302  	dst = (void *)raw;
303  	for (i = 0, was_ld_map = false; i < fp->len; i++) {
304  		dst[i] = fp->insnsi[i];
305  		if (!was_ld_map &&
306  		    dst[i].code == (BPF_LD | BPF_IMM | BPF_DW) &&
307  		    (dst[i].src_reg == BPF_PSEUDO_MAP_FD ||
308  		     dst[i].src_reg == BPF_PSEUDO_MAP_VALUE)) {
309  			was_ld_map = true;
310  			dst[i].imm = 0;
311  		} else if (was_ld_map &&
312  			   dst[i].code == 0 &&
313  			   dst[i].dst_reg == 0 &&
314  			   dst[i].src_reg == 0 &&
315  			   dst[i].off == 0) {
316  			was_ld_map = false;
317  			dst[i].imm = 0;
318  		} else {
319  			was_ld_map = false;
320  		}
321  	}
322  
323  	psize = bpf_prog_insn_size(fp);
324  	memset(&raw[psize], 0, raw_size - psize);
325  	raw[psize++] = 0x80;
326  
327  	bsize  = round_up(psize, SHA1_BLOCK_SIZE);
328  	blocks = bsize / SHA1_BLOCK_SIZE;
329  	todo   = raw;
330  	if (bsize - psize >= sizeof(__be64)) {
331  		bits = (__be64 *)(todo + bsize - sizeof(__be64));
332  	} else {
333  		bits = (__be64 *)(todo + bsize + bits_offset);
334  		blocks++;
335  	}
336  	*bits = cpu_to_be64((psize - 1) << 3);
337  
338  	while (blocks--) {
339  		sha1_transform(digest, todo, ws);
340  		todo += SHA1_BLOCK_SIZE;
341  	}
342  
343  	result = (__force __be32 *)digest;
344  	for (i = 0; i < SHA1_DIGEST_WORDS; i++)
345  		result[i] = cpu_to_be32(digest[i]);
346  	memcpy(fp->tag, result, sizeof(fp->tag));
347  
348  	vfree(raw);
349  	return 0;
350  }
351  
bpf_adj_delta_to_imm(struct bpf_insn * insn,u32 pos,s32 end_old,s32 end_new,s32 curr,const bool probe_pass)352  static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old,
353  				s32 end_new, s32 curr, const bool probe_pass)
354  {
355  	const s64 imm_min = S32_MIN, imm_max = S32_MAX;
356  	s32 delta = end_new - end_old;
357  	s64 imm = insn->imm;
358  
359  	if (curr < pos && curr + imm + 1 >= end_old)
360  		imm += delta;
361  	else if (curr >= end_new && curr + imm + 1 < end_new)
362  		imm -= delta;
363  	if (imm < imm_min || imm > imm_max)
364  		return -ERANGE;
365  	if (!probe_pass)
366  		insn->imm = imm;
367  	return 0;
368  }
369  
bpf_adj_delta_to_off(struct bpf_insn * insn,u32 pos,s32 end_old,s32 end_new,s32 curr,const bool probe_pass)370  static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old,
371  				s32 end_new, s32 curr, const bool probe_pass)
372  {
373  	const s32 off_min = S16_MIN, off_max = S16_MAX;
374  	s32 delta = end_new - end_old;
375  	s32 off = insn->off;
376  
377  	if (curr < pos && curr + off + 1 >= end_old)
378  		off += delta;
379  	else if (curr >= end_new && curr + off + 1 < end_new)
380  		off -= delta;
381  	if (off < off_min || off > off_max)
382  		return -ERANGE;
383  	if (!probe_pass)
384  		insn->off = off;
385  	return 0;
386  }
387  
bpf_adj_branches(struct bpf_prog * prog,u32 pos,s32 end_old,s32 end_new,const bool probe_pass)388  static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, s32 end_old,
389  			    s32 end_new, const bool probe_pass)
390  {
391  	u32 i, insn_cnt = prog->len + (probe_pass ? end_new - end_old : 0);
392  	struct bpf_insn *insn = prog->insnsi;
393  	int ret = 0;
394  
395  	for (i = 0; i < insn_cnt; i++, insn++) {
396  		u8 code;
397  
398  		/* In the probing pass we still operate on the original,
399  		 * unpatched image in order to check overflows before we
400  		 * do any other adjustments. Therefore skip the patchlet.
401  		 */
402  		if (probe_pass && i == pos) {
403  			i = end_new;
404  			insn = prog->insnsi + end_old;
405  		}
406  		if (bpf_pseudo_func(insn)) {
407  			ret = bpf_adj_delta_to_imm(insn, pos, end_old,
408  						   end_new, i, probe_pass);
409  			if (ret)
410  				return ret;
411  			continue;
412  		}
413  		code = insn->code;
414  		if ((BPF_CLASS(code) != BPF_JMP &&
415  		     BPF_CLASS(code) != BPF_JMP32) ||
416  		    BPF_OP(code) == BPF_EXIT)
417  			continue;
418  		/* Adjust offset of jmps if we cross patch boundaries. */
419  		if (BPF_OP(code) == BPF_CALL) {
420  			if (insn->src_reg != BPF_PSEUDO_CALL)
421  				continue;
422  			ret = bpf_adj_delta_to_imm(insn, pos, end_old,
423  						   end_new, i, probe_pass);
424  		} else {
425  			ret = bpf_adj_delta_to_off(insn, pos, end_old,
426  						   end_new, i, probe_pass);
427  		}
428  		if (ret)
429  			break;
430  	}
431  
432  	return ret;
433  }
434  
bpf_adj_linfo(struct bpf_prog * prog,u32 off,u32 delta)435  static void bpf_adj_linfo(struct bpf_prog *prog, u32 off, u32 delta)
436  {
437  	struct bpf_line_info *linfo;
438  	u32 i, nr_linfo;
439  
440  	nr_linfo = prog->aux->nr_linfo;
441  	if (!nr_linfo || !delta)
442  		return;
443  
444  	linfo = prog->aux->linfo;
445  
446  	for (i = 0; i < nr_linfo; i++)
447  		if (off < linfo[i].insn_off)
448  			break;
449  
450  	/* Push all off < linfo[i].insn_off by delta */
451  	for (; i < nr_linfo; i++)
452  		linfo[i].insn_off += delta;
453  }
454  
bpf_patch_insn_single(struct bpf_prog * prog,u32 off,const struct bpf_insn * patch,u32 len)455  struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
456  				       const struct bpf_insn *patch, u32 len)
457  {
458  	u32 insn_adj_cnt, insn_rest, insn_delta = len - 1;
459  	const u32 cnt_max = S16_MAX;
460  	struct bpf_prog *prog_adj;
461  	int err;
462  
463  	/* Since our patchlet doesn't expand the image, we're done. */
464  	if (insn_delta == 0) {
465  		memcpy(prog->insnsi + off, patch, sizeof(*patch));
466  		return prog;
467  	}
468  
469  	insn_adj_cnt = prog->len + insn_delta;
470  
471  	/* Reject anything that would potentially let the insn->off
472  	 * target overflow when we have excessive program expansions.
473  	 * We need to probe here before we do any reallocation where
474  	 * we afterwards may not fail anymore.
475  	 */
476  	if (insn_adj_cnt > cnt_max &&
477  	    (err = bpf_adj_branches(prog, off, off + 1, off + len, true)))
478  		return ERR_PTR(err);
479  
480  	/* Several new instructions need to be inserted. Make room
481  	 * for them. Likely, there's no need for a new allocation as
482  	 * last page could have large enough tailroom.
483  	 */
484  	prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt),
485  				    GFP_USER);
486  	if (!prog_adj)
487  		return ERR_PTR(-ENOMEM);
488  
489  	prog_adj->len = insn_adj_cnt;
490  
491  	/* Patching happens in 3 steps:
492  	 *
493  	 * 1) Move over tail of insnsi from next instruction onwards,
494  	 *    so we can patch the single target insn with one or more
495  	 *    new ones (patching is always from 1 to n insns, n > 0).
496  	 * 2) Inject new instructions at the target location.
497  	 * 3) Adjust branch offsets if necessary.
498  	 */
499  	insn_rest = insn_adj_cnt - off - len;
500  
501  	memmove(prog_adj->insnsi + off + len, prog_adj->insnsi + off + 1,
502  		sizeof(*patch) * insn_rest);
503  	memcpy(prog_adj->insnsi + off, patch, sizeof(*patch) * len);
504  
505  	/* We are guaranteed to not fail at this point, otherwise
506  	 * the ship has sailed to reverse to the original state. An
507  	 * overflow cannot happen at this point.
508  	 */
509  	BUG_ON(bpf_adj_branches(prog_adj, off, off + 1, off + len, false));
510  
511  	bpf_adj_linfo(prog_adj, off, insn_delta);
512  
513  	return prog_adj;
514  }
515  
bpf_remove_insns(struct bpf_prog * prog,u32 off,u32 cnt)516  int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt)
517  {
518  	/* Branch offsets can't overflow when program is shrinking, no need
519  	 * to call bpf_adj_branches(..., true) here
520  	 */
521  	memmove(prog->insnsi + off, prog->insnsi + off + cnt,
522  		sizeof(struct bpf_insn) * (prog->len - off - cnt));
523  	prog->len -= cnt;
524  
525  	return WARN_ON_ONCE(bpf_adj_branches(prog, off, off + cnt, off, false));
526  }
527  
bpf_prog_kallsyms_del_subprogs(struct bpf_prog * fp)528  static void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
529  {
530  	int i;
531  
532  	for (i = 0; i < fp->aux->func_cnt; i++)
533  		bpf_prog_kallsyms_del(fp->aux->func[i]);
534  }
535  
bpf_prog_kallsyms_del_all(struct bpf_prog * fp)536  void bpf_prog_kallsyms_del_all(struct bpf_prog *fp)
537  {
538  	bpf_prog_kallsyms_del_subprogs(fp);
539  	bpf_prog_kallsyms_del(fp);
540  }
541  
542  #ifdef CONFIG_BPF_JIT
543  /* All BPF JIT sysctl knobs here. */
544  int bpf_jit_enable   __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
545  int bpf_jit_kallsyms __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
546  int bpf_jit_harden   __read_mostly;
547  long bpf_jit_limit   __read_mostly;
548  long bpf_jit_limit_max __read_mostly;
549  
550  static void
bpf_prog_ksym_set_addr(struct bpf_prog * prog)551  bpf_prog_ksym_set_addr(struct bpf_prog *prog)
552  {
553  	WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog));
554  
555  	prog->aux->ksym.start = (unsigned long) prog->bpf_func;
556  	prog->aux->ksym.end   = prog->aux->ksym.start + prog->jited_len;
557  }
558  
559  static void
bpf_prog_ksym_set_name(struct bpf_prog * prog)560  bpf_prog_ksym_set_name(struct bpf_prog *prog)
561  {
562  	char *sym = prog->aux->ksym.name;
563  	const char *end = sym + KSYM_NAME_LEN;
564  	const struct btf_type *type;
565  	const char *func_name;
566  
567  	BUILD_BUG_ON(sizeof("bpf_prog_") +
568  		     sizeof(prog->tag) * 2 +
569  		     /* name has been null terminated.
570  		      * We should need +1 for the '_' preceding
571  		      * the name.  However, the null character
572  		      * is double counted between the name and the
573  		      * sizeof("bpf_prog_") above, so we omit
574  		      * the +1 here.
575  		      */
576  		     sizeof(prog->aux->name) > KSYM_NAME_LEN);
577  
578  	sym += snprintf(sym, KSYM_NAME_LEN, "bpf_prog_");
579  	sym  = bin2hex(sym, prog->tag, sizeof(prog->tag));
580  
581  	/* prog->aux->name will be ignored if full btf name is available */
582  	if (prog->aux->func_info_cnt) {
583  		type = btf_type_by_id(prog->aux->btf,
584  				      prog->aux->func_info[prog->aux->func_idx].type_id);
585  		func_name = btf_name_by_offset(prog->aux->btf, type->name_off);
586  		snprintf(sym, (size_t)(end - sym), "_%s", func_name);
587  		return;
588  	}
589  
590  	if (prog->aux->name[0])
591  		snprintf(sym, (size_t)(end - sym), "_%s", prog->aux->name);
592  	else
593  		*sym = 0;
594  }
595  
bpf_get_ksym_start(struct latch_tree_node * n)596  static unsigned long bpf_get_ksym_start(struct latch_tree_node *n)
597  {
598  	return container_of(n, struct bpf_ksym, tnode)->start;
599  }
600  
bpf_tree_less(struct latch_tree_node * a,struct latch_tree_node * b)601  static __always_inline bool bpf_tree_less(struct latch_tree_node *a,
602  					  struct latch_tree_node *b)
603  {
604  	return bpf_get_ksym_start(a) < bpf_get_ksym_start(b);
605  }
606  
bpf_tree_comp(void * key,struct latch_tree_node * n)607  static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n)
608  {
609  	unsigned long val = (unsigned long)key;
610  	const struct bpf_ksym *ksym;
611  
612  	ksym = container_of(n, struct bpf_ksym, tnode);
613  
614  	if (val < ksym->start)
615  		return -1;
616  	if (val >= ksym->end)
617  		return  1;
618  
619  	return 0;
620  }
621  
622  static const struct latch_tree_ops bpf_tree_ops = {
623  	.less	= bpf_tree_less,
624  	.comp	= bpf_tree_comp,
625  };
626  
627  static DEFINE_SPINLOCK(bpf_lock);
628  static LIST_HEAD(bpf_kallsyms);
629  static struct latch_tree_root bpf_tree __cacheline_aligned;
630  
bpf_ksym_add(struct bpf_ksym * ksym)631  void bpf_ksym_add(struct bpf_ksym *ksym)
632  {
633  	spin_lock_bh(&bpf_lock);
634  	WARN_ON_ONCE(!list_empty(&ksym->lnode));
635  	list_add_tail_rcu(&ksym->lnode, &bpf_kallsyms);
636  	latch_tree_insert(&ksym->tnode, &bpf_tree, &bpf_tree_ops);
637  	spin_unlock_bh(&bpf_lock);
638  }
639  
__bpf_ksym_del(struct bpf_ksym * ksym)640  static void __bpf_ksym_del(struct bpf_ksym *ksym)
641  {
642  	if (list_empty(&ksym->lnode))
643  		return;
644  
645  	latch_tree_erase(&ksym->tnode, &bpf_tree, &bpf_tree_ops);
646  	list_del_rcu(&ksym->lnode);
647  }
648  
bpf_ksym_del(struct bpf_ksym * ksym)649  void bpf_ksym_del(struct bpf_ksym *ksym)
650  {
651  	spin_lock_bh(&bpf_lock);
652  	__bpf_ksym_del(ksym);
653  	spin_unlock_bh(&bpf_lock);
654  }
655  
bpf_prog_kallsyms_candidate(const struct bpf_prog * fp)656  static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp)
657  {
658  	return fp->jited && !bpf_prog_was_classic(fp);
659  }
660  
bpf_prog_kallsyms_add(struct bpf_prog * fp)661  void bpf_prog_kallsyms_add(struct bpf_prog *fp)
662  {
663  	if (!bpf_prog_kallsyms_candidate(fp) ||
664  	    !bpf_capable())
665  		return;
666  
667  	bpf_prog_ksym_set_addr(fp);
668  	bpf_prog_ksym_set_name(fp);
669  	fp->aux->ksym.prog = true;
670  
671  	bpf_ksym_add(&fp->aux->ksym);
672  }
673  
bpf_prog_kallsyms_del(struct bpf_prog * fp)674  void bpf_prog_kallsyms_del(struct bpf_prog *fp)
675  {
676  	if (!bpf_prog_kallsyms_candidate(fp))
677  		return;
678  
679  	bpf_ksym_del(&fp->aux->ksym);
680  }
681  
bpf_ksym_find(unsigned long addr)682  static struct bpf_ksym *bpf_ksym_find(unsigned long addr)
683  {
684  	struct latch_tree_node *n;
685  
686  	n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops);
687  	return n ? container_of(n, struct bpf_ksym, tnode) : NULL;
688  }
689  
__bpf_address_lookup(unsigned long addr,unsigned long * size,unsigned long * off,char * sym)690  const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
691  				 unsigned long *off, char *sym)
692  {
693  	struct bpf_ksym *ksym;
694  	char *ret = NULL;
695  
696  	rcu_read_lock();
697  	ksym = bpf_ksym_find(addr);
698  	if (ksym) {
699  		unsigned long symbol_start = ksym->start;
700  		unsigned long symbol_end = ksym->end;
701  
702  		strncpy(sym, ksym->name, KSYM_NAME_LEN);
703  
704  		ret = sym;
705  		if (size)
706  			*size = symbol_end - symbol_start;
707  		if (off)
708  			*off  = addr - symbol_start;
709  	}
710  	rcu_read_unlock();
711  
712  	return ret;
713  }
714  
is_bpf_text_address(unsigned long addr)715  bool is_bpf_text_address(unsigned long addr)
716  {
717  	bool ret;
718  
719  	rcu_read_lock();
720  	ret = bpf_ksym_find(addr) != NULL;
721  	rcu_read_unlock();
722  
723  	return ret;
724  }
725  
bpf_prog_ksym_find(unsigned long addr)726  static struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
727  {
728  	struct bpf_ksym *ksym = bpf_ksym_find(addr);
729  
730  	return ksym && ksym->prog ?
731  	       container_of(ksym, struct bpf_prog_aux, ksym)->prog :
732  	       NULL;
733  }
734  
search_bpf_extables(unsigned long addr)735  const struct exception_table_entry *search_bpf_extables(unsigned long addr)
736  {
737  	const struct exception_table_entry *e = NULL;
738  	struct bpf_prog *prog;
739  
740  	rcu_read_lock();
741  	prog = bpf_prog_ksym_find(addr);
742  	if (!prog)
743  		goto out;
744  	if (!prog->aux->num_exentries)
745  		goto out;
746  
747  	e = search_extable(prog->aux->extable, prog->aux->num_exentries, addr);
748  out:
749  	rcu_read_unlock();
750  	return e;
751  }
752  
bpf_get_kallsym(unsigned int symnum,unsigned long * value,char * type,char * sym)753  int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
754  		    char *sym)
755  {
756  	struct bpf_ksym *ksym;
757  	unsigned int it = 0;
758  	int ret = -ERANGE;
759  
760  	if (!bpf_jit_kallsyms_enabled())
761  		return ret;
762  
763  	rcu_read_lock();
764  	list_for_each_entry_rcu(ksym, &bpf_kallsyms, lnode) {
765  		if (it++ != symnum)
766  			continue;
767  
768  		strncpy(sym, ksym->name, KSYM_NAME_LEN);
769  
770  		*value = ksym->start;
771  		*type  = BPF_SYM_ELF_TYPE;
772  
773  		ret = 0;
774  		break;
775  	}
776  	rcu_read_unlock();
777  
778  	return ret;
779  }
780  
bpf_jit_add_poke_descriptor(struct bpf_prog * prog,struct bpf_jit_poke_descriptor * poke)781  int bpf_jit_add_poke_descriptor(struct bpf_prog *prog,
782  				struct bpf_jit_poke_descriptor *poke)
783  {
784  	struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
785  	static const u32 poke_tab_max = 1024;
786  	u32 slot = prog->aux->size_poke_tab;
787  	u32 size = slot + 1;
788  
789  	if (size > poke_tab_max)
790  		return -ENOSPC;
791  	if (poke->tailcall_target || poke->tailcall_target_stable ||
792  	    poke->tailcall_bypass || poke->adj_off || poke->bypass_addr)
793  		return -EINVAL;
794  
795  	switch (poke->reason) {
796  	case BPF_POKE_REASON_TAIL_CALL:
797  		if (!poke->tail_call.map)
798  			return -EINVAL;
799  		break;
800  	default:
801  		return -EINVAL;
802  	}
803  
804  	tab = krealloc(tab, size * sizeof(*poke), GFP_KERNEL);
805  	if (!tab)
806  		return -ENOMEM;
807  
808  	memcpy(&tab[slot], poke, sizeof(*poke));
809  	prog->aux->size_poke_tab = size;
810  	prog->aux->poke_tab = tab;
811  
812  	return slot;
813  }
814  
815  /*
816   * BPF program pack allocator.
817   *
818   * Most BPF programs are pretty small. Allocating a hole page for each
819   * program is sometime a waste. Many small bpf program also adds pressure
820   * to instruction TLB. To solve this issue, we introduce a BPF program pack
821   * allocator. The prog_pack allocator uses HPAGE_PMD_SIZE page (2MB on x86)
822   * to host BPF programs.
823   */
824  #define BPF_PROG_CHUNK_SHIFT	6
825  #define BPF_PROG_CHUNK_SIZE	(1 << BPF_PROG_CHUNK_SHIFT)
826  #define BPF_PROG_CHUNK_MASK	(~(BPF_PROG_CHUNK_SIZE - 1))
827  
828  struct bpf_prog_pack {
829  	struct list_head list;
830  	void *ptr;
831  	unsigned long bitmap[];
832  };
833  
bpf_jit_fill_hole_with_zero(void * area,unsigned int size)834  void bpf_jit_fill_hole_with_zero(void *area, unsigned int size)
835  {
836  	memset(area, 0, size);
837  }
838  
839  #define BPF_PROG_SIZE_TO_NBITS(size)	(round_up(size, BPF_PROG_CHUNK_SIZE) / BPF_PROG_CHUNK_SIZE)
840  
841  static DEFINE_MUTEX(pack_mutex);
842  static LIST_HEAD(pack_list);
843  
844  /* PMD_SIZE is not available in some special config, e.g. ARCH=arm with
845   * CONFIG_MMU=n. Use PAGE_SIZE in these cases.
846   */
847  #ifdef PMD_SIZE
848  #define BPF_PROG_PACK_SIZE (PMD_SIZE * num_possible_nodes())
849  #else
850  #define BPF_PROG_PACK_SIZE PAGE_SIZE
851  #endif
852  
853  #define BPF_PROG_CHUNK_COUNT (BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE)
854  
alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_insns)855  static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_insns)
856  {
857  	struct bpf_prog_pack *pack;
858  
859  	pack = kzalloc(struct_size(pack, bitmap, BITS_TO_LONGS(BPF_PROG_CHUNK_COUNT)),
860  		       GFP_KERNEL);
861  	if (!pack)
862  		return NULL;
863  	pack->ptr = module_alloc(BPF_PROG_PACK_SIZE);
864  	if (!pack->ptr) {
865  		kfree(pack);
866  		return NULL;
867  	}
868  	bpf_fill_ill_insns(pack->ptr, BPF_PROG_PACK_SIZE);
869  	bitmap_zero(pack->bitmap, BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE);
870  	list_add_tail(&pack->list, &pack_list);
871  
872  	set_vm_flush_reset_perms(pack->ptr);
873  	set_memory_rox((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE);
874  	return pack;
875  }
876  
bpf_prog_pack_alloc(u32 size,bpf_jit_fill_hole_t bpf_fill_ill_insns)877  void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns)
878  {
879  	unsigned int nbits = BPF_PROG_SIZE_TO_NBITS(size);
880  	struct bpf_prog_pack *pack;
881  	unsigned long pos;
882  	void *ptr = NULL;
883  
884  	mutex_lock(&pack_mutex);
885  	if (size > BPF_PROG_PACK_SIZE) {
886  		size = round_up(size, PAGE_SIZE);
887  		ptr = module_alloc(size);
888  		if (ptr) {
889  			bpf_fill_ill_insns(ptr, size);
890  			set_vm_flush_reset_perms(ptr);
891  			set_memory_rox((unsigned long)ptr, size / PAGE_SIZE);
892  		}
893  		goto out;
894  	}
895  	list_for_each_entry(pack, &pack_list, list) {
896  		pos = bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0,
897  						 nbits, 0);
898  		if (pos < BPF_PROG_CHUNK_COUNT)
899  			goto found_free_area;
900  	}
901  
902  	pack = alloc_new_pack(bpf_fill_ill_insns);
903  	if (!pack)
904  		goto out;
905  
906  	pos = 0;
907  
908  found_free_area:
909  	bitmap_set(pack->bitmap, pos, nbits);
910  	ptr = (void *)(pack->ptr) + (pos << BPF_PROG_CHUNK_SHIFT);
911  
912  out:
913  	mutex_unlock(&pack_mutex);
914  	return ptr;
915  }
916  
bpf_prog_pack_free(struct bpf_binary_header * hdr)917  void bpf_prog_pack_free(struct bpf_binary_header *hdr)
918  {
919  	struct bpf_prog_pack *pack = NULL, *tmp;
920  	unsigned int nbits;
921  	unsigned long pos;
922  
923  	mutex_lock(&pack_mutex);
924  	if (hdr->size > BPF_PROG_PACK_SIZE) {
925  		module_memfree(hdr);
926  		goto out;
927  	}
928  
929  	list_for_each_entry(tmp, &pack_list, list) {
930  		if ((void *)hdr >= tmp->ptr && (tmp->ptr + BPF_PROG_PACK_SIZE) > (void *)hdr) {
931  			pack = tmp;
932  			break;
933  		}
934  	}
935  
936  	if (WARN_ONCE(!pack, "bpf_prog_pack bug\n"))
937  		goto out;
938  
939  	nbits = BPF_PROG_SIZE_TO_NBITS(hdr->size);
940  	pos = ((unsigned long)hdr - (unsigned long)pack->ptr) >> BPF_PROG_CHUNK_SHIFT;
941  
942  	WARN_ONCE(bpf_arch_text_invalidate(hdr, hdr->size),
943  		  "bpf_prog_pack bug: missing bpf_arch_text_invalidate?\n");
944  
945  	bitmap_clear(pack->bitmap, pos, nbits);
946  	if (bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0,
947  				       BPF_PROG_CHUNK_COUNT, 0) == 0) {
948  		list_del(&pack->list);
949  		module_memfree(pack->ptr);
950  		kfree(pack);
951  	}
952  out:
953  	mutex_unlock(&pack_mutex);
954  }
955  
956  static atomic_long_t bpf_jit_current;
957  
958  /* Can be overridden by an arch's JIT compiler if it has a custom,
959   * dedicated BPF backend memory area, or if neither of the two
960   * below apply.
961   */
bpf_jit_alloc_exec_limit(void)962  u64 __weak bpf_jit_alloc_exec_limit(void)
963  {
964  #if defined(MODULES_VADDR)
965  	return MODULES_END - MODULES_VADDR;
966  #else
967  	return VMALLOC_END - VMALLOC_START;
968  #endif
969  }
970  
bpf_jit_charge_init(void)971  static int __init bpf_jit_charge_init(void)
972  {
973  	/* Only used as heuristic here to derive limit. */
974  	bpf_jit_limit_max = bpf_jit_alloc_exec_limit();
975  	bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 2,
976  					    PAGE_SIZE), LONG_MAX);
977  	return 0;
978  }
979  pure_initcall(bpf_jit_charge_init);
980  
bpf_jit_charge_modmem(u32 size)981  int bpf_jit_charge_modmem(u32 size)
982  {
983  	if (atomic_long_add_return(size, &bpf_jit_current) > READ_ONCE(bpf_jit_limit)) {
984  		if (!bpf_capable()) {
985  			atomic_long_sub(size, &bpf_jit_current);
986  			return -EPERM;
987  		}
988  	}
989  
990  	return 0;
991  }
992  
bpf_jit_uncharge_modmem(u32 size)993  void bpf_jit_uncharge_modmem(u32 size)
994  {
995  	atomic_long_sub(size, &bpf_jit_current);
996  }
997  
bpf_jit_alloc_exec(unsigned long size)998  void *__weak bpf_jit_alloc_exec(unsigned long size)
999  {
1000  	return module_alloc(size);
1001  }
1002  
bpf_jit_free_exec(void * addr)1003  void __weak bpf_jit_free_exec(void *addr)
1004  {
1005  	module_memfree(addr);
1006  }
1007  
1008  struct bpf_binary_header *
bpf_jit_binary_alloc(unsigned int proglen,u8 ** image_ptr,unsigned int alignment,bpf_jit_fill_hole_t bpf_fill_ill_insns)1009  bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
1010  		     unsigned int alignment,
1011  		     bpf_jit_fill_hole_t bpf_fill_ill_insns)
1012  {
1013  	struct bpf_binary_header *hdr;
1014  	u32 size, hole, start;
1015  
1016  	WARN_ON_ONCE(!is_power_of_2(alignment) ||
1017  		     alignment > BPF_IMAGE_ALIGNMENT);
1018  
1019  	/* Most of BPF filters are really small, but if some of them
1020  	 * fill a page, allow at least 128 extra bytes to insert a
1021  	 * random section of illegal instructions.
1022  	 */
1023  	size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE);
1024  
1025  	if (bpf_jit_charge_modmem(size))
1026  		return NULL;
1027  	hdr = bpf_jit_alloc_exec(size);
1028  	if (!hdr) {
1029  		bpf_jit_uncharge_modmem(size);
1030  		return NULL;
1031  	}
1032  
1033  	/* Fill space with illegal/arch-dep instructions. */
1034  	bpf_fill_ill_insns(hdr, size);
1035  
1036  	hdr->size = size;
1037  	hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
1038  		     PAGE_SIZE - sizeof(*hdr));
1039  	start = get_random_u32_below(hole) & ~(alignment - 1);
1040  
1041  	/* Leave a random number of instructions before BPF code. */
1042  	*image_ptr = &hdr->image[start];
1043  
1044  	return hdr;
1045  }
1046  
bpf_jit_binary_free(struct bpf_binary_header * hdr)1047  void bpf_jit_binary_free(struct bpf_binary_header *hdr)
1048  {
1049  	u32 size = hdr->size;
1050  
1051  	bpf_jit_free_exec(hdr);
1052  	bpf_jit_uncharge_modmem(size);
1053  }
1054  
1055  /* Allocate jit binary from bpf_prog_pack allocator.
1056   * Since the allocated memory is RO+X, the JIT engine cannot write directly
1057   * to the memory. To solve this problem, a RW buffer is also allocated at
1058   * as the same time. The JIT engine should calculate offsets based on the
1059   * RO memory address, but write JITed program to the RW buffer. Once the
1060   * JIT engine finishes, it calls bpf_jit_binary_pack_finalize, which copies
1061   * the JITed program to the RO memory.
1062   */
1063  struct bpf_binary_header *
bpf_jit_binary_pack_alloc(unsigned int proglen,u8 ** image_ptr,unsigned int alignment,struct bpf_binary_header ** rw_header,u8 ** rw_image,bpf_jit_fill_hole_t bpf_fill_ill_insns)1064  bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr,
1065  			  unsigned int alignment,
1066  			  struct bpf_binary_header **rw_header,
1067  			  u8 **rw_image,
1068  			  bpf_jit_fill_hole_t bpf_fill_ill_insns)
1069  {
1070  	struct bpf_binary_header *ro_header;
1071  	u32 size, hole, start;
1072  
1073  	WARN_ON_ONCE(!is_power_of_2(alignment) ||
1074  		     alignment > BPF_IMAGE_ALIGNMENT);
1075  
1076  	/* add 16 bytes for a random section of illegal instructions */
1077  	size = round_up(proglen + sizeof(*ro_header) + 16, BPF_PROG_CHUNK_SIZE);
1078  
1079  	if (bpf_jit_charge_modmem(size))
1080  		return NULL;
1081  	ro_header = bpf_prog_pack_alloc(size, bpf_fill_ill_insns);
1082  	if (!ro_header) {
1083  		bpf_jit_uncharge_modmem(size);
1084  		return NULL;
1085  	}
1086  
1087  	*rw_header = kvmalloc(size, GFP_KERNEL);
1088  	if (!*rw_header) {
1089  		bpf_arch_text_copy(&ro_header->size, &size, sizeof(size));
1090  		bpf_prog_pack_free(ro_header);
1091  		bpf_jit_uncharge_modmem(size);
1092  		return NULL;
1093  	}
1094  
1095  	/* Fill space with illegal/arch-dep instructions. */
1096  	bpf_fill_ill_insns(*rw_header, size);
1097  	(*rw_header)->size = size;
1098  
1099  	hole = min_t(unsigned int, size - (proglen + sizeof(*ro_header)),
1100  		     BPF_PROG_CHUNK_SIZE - sizeof(*ro_header));
1101  	start = get_random_u32_below(hole) & ~(alignment - 1);
1102  
1103  	*image_ptr = &ro_header->image[start];
1104  	*rw_image = &(*rw_header)->image[start];
1105  
1106  	return ro_header;
1107  }
1108  
1109  /* Copy JITed text from rw_header to its final location, the ro_header. */
bpf_jit_binary_pack_finalize(struct bpf_prog * prog,struct bpf_binary_header * ro_header,struct bpf_binary_header * rw_header)1110  int bpf_jit_binary_pack_finalize(struct bpf_prog *prog,
1111  				 struct bpf_binary_header *ro_header,
1112  				 struct bpf_binary_header *rw_header)
1113  {
1114  	void *ptr;
1115  
1116  	ptr = bpf_arch_text_copy(ro_header, rw_header, rw_header->size);
1117  
1118  	kvfree(rw_header);
1119  
1120  	if (IS_ERR(ptr)) {
1121  		bpf_prog_pack_free(ro_header);
1122  		return PTR_ERR(ptr);
1123  	}
1124  	return 0;
1125  }
1126  
1127  /* bpf_jit_binary_pack_free is called in two different scenarios:
1128   *   1) when the program is freed after;
1129   *   2) when the JIT engine fails (before bpf_jit_binary_pack_finalize).
1130   * For case 2), we need to free both the RO memory and the RW buffer.
1131   *
1132   * bpf_jit_binary_pack_free requires proper ro_header->size. However,
1133   * bpf_jit_binary_pack_alloc does not set it. Therefore, ro_header->size
1134   * must be set with either bpf_jit_binary_pack_finalize (normal path) or
1135   * bpf_arch_text_copy (when jit fails).
1136   */
bpf_jit_binary_pack_free(struct bpf_binary_header * ro_header,struct bpf_binary_header * rw_header)1137  void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header,
1138  			      struct bpf_binary_header *rw_header)
1139  {
1140  	u32 size = ro_header->size;
1141  
1142  	bpf_prog_pack_free(ro_header);
1143  	kvfree(rw_header);
1144  	bpf_jit_uncharge_modmem(size);
1145  }
1146  
1147  struct bpf_binary_header *
bpf_jit_binary_pack_hdr(const struct bpf_prog * fp)1148  bpf_jit_binary_pack_hdr(const struct bpf_prog *fp)
1149  {
1150  	unsigned long real_start = (unsigned long)fp->bpf_func;
1151  	unsigned long addr;
1152  
1153  	addr = real_start & BPF_PROG_CHUNK_MASK;
1154  	return (void *)addr;
1155  }
1156  
1157  static inline struct bpf_binary_header *
bpf_jit_binary_hdr(const struct bpf_prog * fp)1158  bpf_jit_binary_hdr(const struct bpf_prog *fp)
1159  {
1160  	unsigned long real_start = (unsigned long)fp->bpf_func;
1161  	unsigned long addr;
1162  
1163  	addr = real_start & PAGE_MASK;
1164  	return (void *)addr;
1165  }
1166  
1167  /* This symbol is only overridden by archs that have different
1168   * requirements than the usual eBPF JITs, f.e. when they only
1169   * implement cBPF JIT, do not set images read-only, etc.
1170   */
bpf_jit_free(struct bpf_prog * fp)1171  void __weak bpf_jit_free(struct bpf_prog *fp)
1172  {
1173  	if (fp->jited) {
1174  		struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
1175  
1176  		bpf_jit_binary_free(hdr);
1177  		WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
1178  	}
1179  
1180  	bpf_prog_unlock_free(fp);
1181  }
1182  
bpf_jit_get_func_addr(const struct bpf_prog * prog,const struct bpf_insn * insn,bool extra_pass,u64 * func_addr,bool * func_addr_fixed)1183  int bpf_jit_get_func_addr(const struct bpf_prog *prog,
1184  			  const struct bpf_insn *insn, bool extra_pass,
1185  			  u64 *func_addr, bool *func_addr_fixed)
1186  {
1187  	s16 off = insn->off;
1188  	s32 imm = insn->imm;
1189  	u8 *addr;
1190  
1191  	*func_addr_fixed = insn->src_reg != BPF_PSEUDO_CALL;
1192  	if (!*func_addr_fixed) {
1193  		/* Place-holder address till the last pass has collected
1194  		 * all addresses for JITed subprograms in which case we
1195  		 * can pick them up from prog->aux.
1196  		 */
1197  		if (!extra_pass)
1198  			addr = NULL;
1199  		else if (prog->aux->func &&
1200  			 off >= 0 && off < prog->aux->func_cnt)
1201  			addr = (u8 *)prog->aux->func[off]->bpf_func;
1202  		else
1203  			return -EINVAL;
1204  	} else {
1205  		/* Address of a BPF helper call. Since part of the core
1206  		 * kernel, it's always at a fixed location. __bpf_call_base
1207  		 * and the helper with imm relative to it are both in core
1208  		 * kernel.
1209  		 */
1210  		addr = (u8 *)__bpf_call_base + imm;
1211  	}
1212  
1213  	*func_addr = (unsigned long)addr;
1214  	return 0;
1215  }
1216  
bpf_jit_blind_insn(const struct bpf_insn * from,const struct bpf_insn * aux,struct bpf_insn * to_buff,bool emit_zext)1217  static int bpf_jit_blind_insn(const struct bpf_insn *from,
1218  			      const struct bpf_insn *aux,
1219  			      struct bpf_insn *to_buff,
1220  			      bool emit_zext)
1221  {
1222  	struct bpf_insn *to = to_buff;
1223  	u32 imm_rnd = get_random_u32();
1224  	s16 off;
1225  
1226  	BUILD_BUG_ON(BPF_REG_AX  + 1 != MAX_BPF_JIT_REG);
1227  	BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG);
1228  
1229  	/* Constraints on AX register:
1230  	 *
1231  	 * AX register is inaccessible from user space. It is mapped in
1232  	 * all JITs, and used here for constant blinding rewrites. It is
1233  	 * typically "stateless" meaning its contents are only valid within
1234  	 * the executed instruction, but not across several instructions.
1235  	 * There are a few exceptions however which are further detailed
1236  	 * below.
1237  	 *
1238  	 * Constant blinding is only used by JITs, not in the interpreter.
1239  	 * The interpreter uses AX in some occasions as a local temporary
1240  	 * register e.g. in DIV or MOD instructions.
1241  	 *
1242  	 * In restricted circumstances, the verifier can also use the AX
1243  	 * register for rewrites as long as they do not interfere with
1244  	 * the above cases!
1245  	 */
1246  	if (from->dst_reg == BPF_REG_AX || from->src_reg == BPF_REG_AX)
1247  		goto out;
1248  
1249  	if (from->imm == 0 &&
1250  	    (from->code == (BPF_ALU   | BPF_MOV | BPF_K) ||
1251  	     from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) {
1252  		*to++ = BPF_ALU64_REG(BPF_XOR, from->dst_reg, from->dst_reg);
1253  		goto out;
1254  	}
1255  
1256  	switch (from->code) {
1257  	case BPF_ALU | BPF_ADD | BPF_K:
1258  	case BPF_ALU | BPF_SUB | BPF_K:
1259  	case BPF_ALU | BPF_AND | BPF_K:
1260  	case BPF_ALU | BPF_OR  | BPF_K:
1261  	case BPF_ALU | BPF_XOR | BPF_K:
1262  	case BPF_ALU | BPF_MUL | BPF_K:
1263  	case BPF_ALU | BPF_MOV | BPF_K:
1264  	case BPF_ALU | BPF_DIV | BPF_K:
1265  	case BPF_ALU | BPF_MOD | BPF_K:
1266  		*to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
1267  		*to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1268  		*to++ = BPF_ALU32_REG(from->code, from->dst_reg, BPF_REG_AX);
1269  		break;
1270  
1271  	case BPF_ALU64 | BPF_ADD | BPF_K:
1272  	case BPF_ALU64 | BPF_SUB | BPF_K:
1273  	case BPF_ALU64 | BPF_AND | BPF_K:
1274  	case BPF_ALU64 | BPF_OR  | BPF_K:
1275  	case BPF_ALU64 | BPF_XOR | BPF_K:
1276  	case BPF_ALU64 | BPF_MUL | BPF_K:
1277  	case BPF_ALU64 | BPF_MOV | BPF_K:
1278  	case BPF_ALU64 | BPF_DIV | BPF_K:
1279  	case BPF_ALU64 | BPF_MOD | BPF_K:
1280  		*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
1281  		*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1282  		*to++ = BPF_ALU64_REG(from->code, from->dst_reg, BPF_REG_AX);
1283  		break;
1284  
1285  	case BPF_JMP | BPF_JEQ  | BPF_K:
1286  	case BPF_JMP | BPF_JNE  | BPF_K:
1287  	case BPF_JMP | BPF_JGT  | BPF_K:
1288  	case BPF_JMP | BPF_JLT  | BPF_K:
1289  	case BPF_JMP | BPF_JGE  | BPF_K:
1290  	case BPF_JMP | BPF_JLE  | BPF_K:
1291  	case BPF_JMP | BPF_JSGT | BPF_K:
1292  	case BPF_JMP | BPF_JSLT | BPF_K:
1293  	case BPF_JMP | BPF_JSGE | BPF_K:
1294  	case BPF_JMP | BPF_JSLE | BPF_K:
1295  	case BPF_JMP | BPF_JSET | BPF_K:
1296  		/* Accommodate for extra offset in case of a backjump. */
1297  		off = from->off;
1298  		if (off < 0)
1299  			off -= 2;
1300  		*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
1301  		*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1302  		*to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off);
1303  		break;
1304  
1305  	case BPF_JMP32 | BPF_JEQ  | BPF_K:
1306  	case BPF_JMP32 | BPF_JNE  | BPF_K:
1307  	case BPF_JMP32 | BPF_JGT  | BPF_K:
1308  	case BPF_JMP32 | BPF_JLT  | BPF_K:
1309  	case BPF_JMP32 | BPF_JGE  | BPF_K:
1310  	case BPF_JMP32 | BPF_JLE  | BPF_K:
1311  	case BPF_JMP32 | BPF_JSGT | BPF_K:
1312  	case BPF_JMP32 | BPF_JSLT | BPF_K:
1313  	case BPF_JMP32 | BPF_JSGE | BPF_K:
1314  	case BPF_JMP32 | BPF_JSLE | BPF_K:
1315  	case BPF_JMP32 | BPF_JSET | BPF_K:
1316  		/* Accommodate for extra offset in case of a backjump. */
1317  		off = from->off;
1318  		if (off < 0)
1319  			off -= 2;
1320  		*to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
1321  		*to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1322  		*to++ = BPF_JMP32_REG(from->code, from->dst_reg, BPF_REG_AX,
1323  				      off);
1324  		break;
1325  
1326  	case BPF_LD | BPF_IMM | BPF_DW:
1327  		*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm);
1328  		*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1329  		*to++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
1330  		*to++ = BPF_ALU64_REG(BPF_MOV, aux[0].dst_reg, BPF_REG_AX);
1331  		break;
1332  	case 0: /* Part 2 of BPF_LD | BPF_IMM | BPF_DW. */
1333  		*to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[0].imm);
1334  		*to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1335  		if (emit_zext)
1336  			*to++ = BPF_ZEXT_REG(BPF_REG_AX);
1337  		*to++ = BPF_ALU64_REG(BPF_OR,  aux[0].dst_reg, BPF_REG_AX);
1338  		break;
1339  
1340  	case BPF_ST | BPF_MEM | BPF_DW:
1341  	case BPF_ST | BPF_MEM | BPF_W:
1342  	case BPF_ST | BPF_MEM | BPF_H:
1343  	case BPF_ST | BPF_MEM | BPF_B:
1344  		*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
1345  		*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1346  		*to++ = BPF_STX_MEM(from->code, from->dst_reg, BPF_REG_AX, from->off);
1347  		break;
1348  	}
1349  out:
1350  	return to - to_buff;
1351  }
1352  
bpf_prog_clone_create(struct bpf_prog * fp_other,gfp_t gfp_extra_flags)1353  static struct bpf_prog *bpf_prog_clone_create(struct bpf_prog *fp_other,
1354  					      gfp_t gfp_extra_flags)
1355  {
1356  	gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
1357  	struct bpf_prog *fp;
1358  
1359  	fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags);
1360  	if (fp != NULL) {
1361  		/* aux->prog still points to the fp_other one, so
1362  		 * when promoting the clone to the real program,
1363  		 * this still needs to be adapted.
1364  		 */
1365  		memcpy(fp, fp_other, fp_other->pages * PAGE_SIZE);
1366  	}
1367  
1368  	return fp;
1369  }
1370  
bpf_prog_clone_free(struct bpf_prog * fp)1371  static void bpf_prog_clone_free(struct bpf_prog *fp)
1372  {
1373  	/* aux was stolen by the other clone, so we cannot free
1374  	 * it from this path! It will be freed eventually by the
1375  	 * other program on release.
1376  	 *
1377  	 * At this point, we don't need a deferred release since
1378  	 * clone is guaranteed to not be locked.
1379  	 */
1380  	fp->aux = NULL;
1381  	fp->stats = NULL;
1382  	fp->active = NULL;
1383  	__bpf_prog_free(fp);
1384  }
1385  
bpf_jit_prog_release_other(struct bpf_prog * fp,struct bpf_prog * fp_other)1386  void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other)
1387  {
1388  	/* We have to repoint aux->prog to self, as we don't
1389  	 * know whether fp here is the clone or the original.
1390  	 */
1391  	fp->aux->prog = fp;
1392  	bpf_prog_clone_free(fp_other);
1393  }
1394  
bpf_jit_blind_constants(struct bpf_prog * prog)1395  struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
1396  {
1397  	struct bpf_insn insn_buff[16], aux[2];
1398  	struct bpf_prog *clone, *tmp;
1399  	int insn_delta, insn_cnt;
1400  	struct bpf_insn *insn;
1401  	int i, rewritten;
1402  
1403  	if (!prog->blinding_requested || prog->blinded)
1404  		return prog;
1405  
1406  	clone = bpf_prog_clone_create(prog, GFP_USER);
1407  	if (!clone)
1408  		return ERR_PTR(-ENOMEM);
1409  
1410  	insn_cnt = clone->len;
1411  	insn = clone->insnsi;
1412  
1413  	for (i = 0; i < insn_cnt; i++, insn++) {
1414  		if (bpf_pseudo_func(insn)) {
1415  			/* ld_imm64 with an address of bpf subprog is not
1416  			 * a user controlled constant. Don't randomize it,
1417  			 * since it will conflict with jit_subprogs() logic.
1418  			 */
1419  			insn++;
1420  			i++;
1421  			continue;
1422  		}
1423  
1424  		/* We temporarily need to hold the original ld64 insn
1425  		 * so that we can still access the first part in the
1426  		 * second blinding run.
1427  		 */
1428  		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW) &&
1429  		    insn[1].code == 0)
1430  			memcpy(aux, insn, sizeof(aux));
1431  
1432  		rewritten = bpf_jit_blind_insn(insn, aux, insn_buff,
1433  						clone->aux->verifier_zext);
1434  		if (!rewritten)
1435  			continue;
1436  
1437  		tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten);
1438  		if (IS_ERR(tmp)) {
1439  			/* Patching may have repointed aux->prog during
1440  			 * realloc from the original one, so we need to
1441  			 * fix it up here on error.
1442  			 */
1443  			bpf_jit_prog_release_other(prog, clone);
1444  			return tmp;
1445  		}
1446  
1447  		clone = tmp;
1448  		insn_delta = rewritten - 1;
1449  
1450  		/* Walk new program and skip insns we just inserted. */
1451  		insn = clone->insnsi + i + insn_delta;
1452  		insn_cnt += insn_delta;
1453  		i        += insn_delta;
1454  	}
1455  
1456  	clone->blinded = 1;
1457  	return clone;
1458  }
1459  #endif /* CONFIG_BPF_JIT */
1460  
1461  /* Base function for offset calculation. Needs to go into .text section,
1462   * therefore keeping it non-static as well; will also be used by JITs
1463   * anyway later on, so do not let the compiler omit it. This also needs
1464   * to go into kallsyms for correlation from e.g. bpftool, so naming
1465   * must not change.
1466   */
__bpf_call_base(u64 r1,u64 r2,u64 r3,u64 r4,u64 r5)1467  noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
1468  {
1469  	return 0;
1470  }
1471  EXPORT_SYMBOL_GPL(__bpf_call_base);
1472  
1473  /* All UAPI available opcodes. */
1474  #define BPF_INSN_MAP(INSN_2, INSN_3)		\
1475  	/* 32 bit ALU operations. */		\
1476  	/*   Register based. */			\
1477  	INSN_3(ALU, ADD,  X),			\
1478  	INSN_3(ALU, SUB,  X),			\
1479  	INSN_3(ALU, AND,  X),			\
1480  	INSN_3(ALU, OR,   X),			\
1481  	INSN_3(ALU, LSH,  X),			\
1482  	INSN_3(ALU, RSH,  X),			\
1483  	INSN_3(ALU, XOR,  X),			\
1484  	INSN_3(ALU, MUL,  X),			\
1485  	INSN_3(ALU, MOV,  X),			\
1486  	INSN_3(ALU, ARSH, X),			\
1487  	INSN_3(ALU, DIV,  X),			\
1488  	INSN_3(ALU, MOD,  X),			\
1489  	INSN_2(ALU, NEG),			\
1490  	INSN_3(ALU, END, TO_BE),		\
1491  	INSN_3(ALU, END, TO_LE),		\
1492  	/*   Immediate based. */		\
1493  	INSN_3(ALU, ADD,  K),			\
1494  	INSN_3(ALU, SUB,  K),			\
1495  	INSN_3(ALU, AND,  K),			\
1496  	INSN_3(ALU, OR,   K),			\
1497  	INSN_3(ALU, LSH,  K),			\
1498  	INSN_3(ALU, RSH,  K),			\
1499  	INSN_3(ALU, XOR,  K),			\
1500  	INSN_3(ALU, MUL,  K),			\
1501  	INSN_3(ALU, MOV,  K),			\
1502  	INSN_3(ALU, ARSH, K),			\
1503  	INSN_3(ALU, DIV,  K),			\
1504  	INSN_3(ALU, MOD,  K),			\
1505  	/* 64 bit ALU operations. */		\
1506  	/*   Register based. */			\
1507  	INSN_3(ALU64, ADD,  X),			\
1508  	INSN_3(ALU64, SUB,  X),			\
1509  	INSN_3(ALU64, AND,  X),			\
1510  	INSN_3(ALU64, OR,   X),			\
1511  	INSN_3(ALU64, LSH,  X),			\
1512  	INSN_3(ALU64, RSH,  X),			\
1513  	INSN_3(ALU64, XOR,  X),			\
1514  	INSN_3(ALU64, MUL,  X),			\
1515  	INSN_3(ALU64, MOV,  X),			\
1516  	INSN_3(ALU64, ARSH, X),			\
1517  	INSN_3(ALU64, DIV,  X),			\
1518  	INSN_3(ALU64, MOD,  X),			\
1519  	INSN_2(ALU64, NEG),			\
1520  	/*   Immediate based. */		\
1521  	INSN_3(ALU64, ADD,  K),			\
1522  	INSN_3(ALU64, SUB,  K),			\
1523  	INSN_3(ALU64, AND,  K),			\
1524  	INSN_3(ALU64, OR,   K),			\
1525  	INSN_3(ALU64, LSH,  K),			\
1526  	INSN_3(ALU64, RSH,  K),			\
1527  	INSN_3(ALU64, XOR,  K),			\
1528  	INSN_3(ALU64, MUL,  K),			\
1529  	INSN_3(ALU64, MOV,  K),			\
1530  	INSN_3(ALU64, ARSH, K),			\
1531  	INSN_3(ALU64, DIV,  K),			\
1532  	INSN_3(ALU64, MOD,  K),			\
1533  	/* Call instruction. */			\
1534  	INSN_2(JMP, CALL),			\
1535  	/* Exit instruction. */			\
1536  	INSN_2(JMP, EXIT),			\
1537  	/* 32-bit Jump instructions. */		\
1538  	/*   Register based. */			\
1539  	INSN_3(JMP32, JEQ,  X),			\
1540  	INSN_3(JMP32, JNE,  X),			\
1541  	INSN_3(JMP32, JGT,  X),			\
1542  	INSN_3(JMP32, JLT,  X),			\
1543  	INSN_3(JMP32, JGE,  X),			\
1544  	INSN_3(JMP32, JLE,  X),			\
1545  	INSN_3(JMP32, JSGT, X),			\
1546  	INSN_3(JMP32, JSLT, X),			\
1547  	INSN_3(JMP32, JSGE, X),			\
1548  	INSN_3(JMP32, JSLE, X),			\
1549  	INSN_3(JMP32, JSET, X),			\
1550  	/*   Immediate based. */		\
1551  	INSN_3(JMP32, JEQ,  K),			\
1552  	INSN_3(JMP32, JNE,  K),			\
1553  	INSN_3(JMP32, JGT,  K),			\
1554  	INSN_3(JMP32, JLT,  K),			\
1555  	INSN_3(JMP32, JGE,  K),			\
1556  	INSN_3(JMP32, JLE,  K),			\
1557  	INSN_3(JMP32, JSGT, K),			\
1558  	INSN_3(JMP32, JSLT, K),			\
1559  	INSN_3(JMP32, JSGE, K),			\
1560  	INSN_3(JMP32, JSLE, K),			\
1561  	INSN_3(JMP32, JSET, K),			\
1562  	/* Jump instructions. */		\
1563  	/*   Register based. */			\
1564  	INSN_3(JMP, JEQ,  X),			\
1565  	INSN_3(JMP, JNE,  X),			\
1566  	INSN_3(JMP, JGT,  X),			\
1567  	INSN_3(JMP, JLT,  X),			\
1568  	INSN_3(JMP, JGE,  X),			\
1569  	INSN_3(JMP, JLE,  X),			\
1570  	INSN_3(JMP, JSGT, X),			\
1571  	INSN_3(JMP, JSLT, X),			\
1572  	INSN_3(JMP, JSGE, X),			\
1573  	INSN_3(JMP, JSLE, X),			\
1574  	INSN_3(JMP, JSET, X),			\
1575  	/*   Immediate based. */		\
1576  	INSN_3(JMP, JEQ,  K),			\
1577  	INSN_3(JMP, JNE,  K),			\
1578  	INSN_3(JMP, JGT,  K),			\
1579  	INSN_3(JMP, JLT,  K),			\
1580  	INSN_3(JMP, JGE,  K),			\
1581  	INSN_3(JMP, JLE,  K),			\
1582  	INSN_3(JMP, JSGT, K),			\
1583  	INSN_3(JMP, JSLT, K),			\
1584  	INSN_3(JMP, JSGE, K),			\
1585  	INSN_3(JMP, JSLE, K),			\
1586  	INSN_3(JMP, JSET, K),			\
1587  	INSN_2(JMP, JA),			\
1588  	/* Store instructions. */		\
1589  	/*   Register based. */			\
1590  	INSN_3(STX, MEM,  B),			\
1591  	INSN_3(STX, MEM,  H),			\
1592  	INSN_3(STX, MEM,  W),			\
1593  	INSN_3(STX, MEM,  DW),			\
1594  	INSN_3(STX, ATOMIC, W),			\
1595  	INSN_3(STX, ATOMIC, DW),		\
1596  	/*   Immediate based. */		\
1597  	INSN_3(ST, MEM, B),			\
1598  	INSN_3(ST, MEM, H),			\
1599  	INSN_3(ST, MEM, W),			\
1600  	INSN_3(ST, MEM, DW),			\
1601  	/* Load instructions. */		\
1602  	/*   Register based. */			\
1603  	INSN_3(LDX, MEM, B),			\
1604  	INSN_3(LDX, MEM, H),			\
1605  	INSN_3(LDX, MEM, W),			\
1606  	INSN_3(LDX, MEM, DW),			\
1607  	/*   Immediate based. */		\
1608  	INSN_3(LD, IMM, DW)
1609  
bpf_opcode_in_insntable(u8 code)1610  bool bpf_opcode_in_insntable(u8 code)
1611  {
1612  #define BPF_INSN_2_TBL(x, y)    [BPF_##x | BPF_##y] = true
1613  #define BPF_INSN_3_TBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = true
1614  	static const bool public_insntable[256] = {
1615  		[0 ... 255] = false,
1616  		/* Now overwrite non-defaults ... */
1617  		BPF_INSN_MAP(BPF_INSN_2_TBL, BPF_INSN_3_TBL),
1618  		/* UAPI exposed, but rewritten opcodes. cBPF carry-over. */
1619  		[BPF_LD | BPF_ABS | BPF_B] = true,
1620  		[BPF_LD | BPF_ABS | BPF_H] = true,
1621  		[BPF_LD | BPF_ABS | BPF_W] = true,
1622  		[BPF_LD | BPF_IND | BPF_B] = true,
1623  		[BPF_LD | BPF_IND | BPF_H] = true,
1624  		[BPF_LD | BPF_IND | BPF_W] = true,
1625  	};
1626  #undef BPF_INSN_3_TBL
1627  #undef BPF_INSN_2_TBL
1628  	return public_insntable[code];
1629  }
1630  
1631  #ifndef CONFIG_BPF_JIT_ALWAYS_ON
bpf_probe_read_kernel(void * dst,u32 size,const void * unsafe_ptr)1632  u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
1633  {
1634  	memset(dst, 0, size);
1635  	return -EFAULT;
1636  }
1637  
1638  /**
1639   *	___bpf_prog_run - run eBPF program on a given context
1640   *	@regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
1641   *	@insn: is the array of eBPF instructions
1642   *
1643   * Decode and execute eBPF instructions.
1644   *
1645   * Return: whatever value is in %BPF_R0 at program exit
1646   */
___bpf_prog_run(u64 * regs,const struct bpf_insn * insn)1647  static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
1648  {
1649  #define BPF_INSN_2_LBL(x, y)    [BPF_##x | BPF_##y] = &&x##_##y
1650  #define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z
1651  	static const void * const jumptable[256] __annotate_jump_table = {
1652  		[0 ... 255] = &&default_label,
1653  		/* Now overwrite non-defaults ... */
1654  		BPF_INSN_MAP(BPF_INSN_2_LBL, BPF_INSN_3_LBL),
1655  		/* Non-UAPI available opcodes. */
1656  		[BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS,
1657  		[BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL,
1658  		[BPF_ST  | BPF_NOSPEC] = &&ST_NOSPEC,
1659  		[BPF_LDX | BPF_PROBE_MEM | BPF_B] = &&LDX_PROBE_MEM_B,
1660  		[BPF_LDX | BPF_PROBE_MEM | BPF_H] = &&LDX_PROBE_MEM_H,
1661  		[BPF_LDX | BPF_PROBE_MEM | BPF_W] = &&LDX_PROBE_MEM_W,
1662  		[BPF_LDX | BPF_PROBE_MEM | BPF_DW] = &&LDX_PROBE_MEM_DW,
1663  	};
1664  #undef BPF_INSN_3_LBL
1665  #undef BPF_INSN_2_LBL
1666  	u32 tail_call_cnt = 0;
1667  
1668  #define CONT	 ({ insn++; goto select_insn; })
1669  #define CONT_JMP ({ insn++; goto select_insn; })
1670  
1671  select_insn:
1672  	goto *jumptable[insn->code];
1673  
1674  	/* Explicitly mask the register-based shift amounts with 63 or 31
1675  	 * to avoid undefined behavior. Normally this won't affect the
1676  	 * generated code, for example, in case of native 64 bit archs such
1677  	 * as x86-64 or arm64, the compiler is optimizing the AND away for
1678  	 * the interpreter. In case of JITs, each of the JIT backends compiles
1679  	 * the BPF shift operations to machine instructions which produce
1680  	 * implementation-defined results in such a case; the resulting
1681  	 * contents of the register may be arbitrary, but program behaviour
1682  	 * as a whole remains defined. In other words, in case of JIT backends,
1683  	 * the AND must /not/ be added to the emitted LSH/RSH/ARSH translation.
1684  	 */
1685  	/* ALU (shifts) */
1686  #define SHT(OPCODE, OP)					\
1687  	ALU64_##OPCODE##_X:				\
1688  		DST = DST OP (SRC & 63);		\
1689  		CONT;					\
1690  	ALU_##OPCODE##_X:				\
1691  		DST = (u32) DST OP ((u32) SRC & 31);	\
1692  		CONT;					\
1693  	ALU64_##OPCODE##_K:				\
1694  		DST = DST OP IMM;			\
1695  		CONT;					\
1696  	ALU_##OPCODE##_K:				\
1697  		DST = (u32) DST OP (u32) IMM;		\
1698  		CONT;
1699  	/* ALU (rest) */
1700  #define ALU(OPCODE, OP)					\
1701  	ALU64_##OPCODE##_X:				\
1702  		DST = DST OP SRC;			\
1703  		CONT;					\
1704  	ALU_##OPCODE##_X:				\
1705  		DST = (u32) DST OP (u32) SRC;		\
1706  		CONT;					\
1707  	ALU64_##OPCODE##_K:				\
1708  		DST = DST OP IMM;			\
1709  		CONT;					\
1710  	ALU_##OPCODE##_K:				\
1711  		DST = (u32) DST OP (u32) IMM;		\
1712  		CONT;
1713  	ALU(ADD,  +)
1714  	ALU(SUB,  -)
1715  	ALU(AND,  &)
1716  	ALU(OR,   |)
1717  	ALU(XOR,  ^)
1718  	ALU(MUL,  *)
1719  	SHT(LSH, <<)
1720  	SHT(RSH, >>)
1721  #undef SHT
1722  #undef ALU
1723  	ALU_NEG:
1724  		DST = (u32) -DST;
1725  		CONT;
1726  	ALU64_NEG:
1727  		DST = -DST;
1728  		CONT;
1729  	ALU_MOV_X:
1730  		DST = (u32) SRC;
1731  		CONT;
1732  	ALU_MOV_K:
1733  		DST = (u32) IMM;
1734  		CONT;
1735  	ALU64_MOV_X:
1736  		DST = SRC;
1737  		CONT;
1738  	ALU64_MOV_K:
1739  		DST = IMM;
1740  		CONT;
1741  	LD_IMM_DW:
1742  		DST = (u64) (u32) insn[0].imm | ((u64) (u32) insn[1].imm) << 32;
1743  		insn++;
1744  		CONT;
1745  	ALU_ARSH_X:
1746  		DST = (u64) (u32) (((s32) DST) >> (SRC & 31));
1747  		CONT;
1748  	ALU_ARSH_K:
1749  		DST = (u64) (u32) (((s32) DST) >> IMM);
1750  		CONT;
1751  	ALU64_ARSH_X:
1752  		(*(s64 *) &DST) >>= (SRC & 63);
1753  		CONT;
1754  	ALU64_ARSH_K:
1755  		(*(s64 *) &DST) >>= IMM;
1756  		CONT;
1757  	ALU64_MOD_X:
1758  		div64_u64_rem(DST, SRC, &AX);
1759  		DST = AX;
1760  		CONT;
1761  	ALU_MOD_X:
1762  		AX = (u32) DST;
1763  		DST = do_div(AX, (u32) SRC);
1764  		CONT;
1765  	ALU64_MOD_K:
1766  		div64_u64_rem(DST, IMM, &AX);
1767  		DST = AX;
1768  		CONT;
1769  	ALU_MOD_K:
1770  		AX = (u32) DST;
1771  		DST = do_div(AX, (u32) IMM);
1772  		CONT;
1773  	ALU64_DIV_X:
1774  		DST = div64_u64(DST, SRC);
1775  		CONT;
1776  	ALU_DIV_X:
1777  		AX = (u32) DST;
1778  		do_div(AX, (u32) SRC);
1779  		DST = (u32) AX;
1780  		CONT;
1781  	ALU64_DIV_K:
1782  		DST = div64_u64(DST, IMM);
1783  		CONT;
1784  	ALU_DIV_K:
1785  		AX = (u32) DST;
1786  		do_div(AX, (u32) IMM);
1787  		DST = (u32) AX;
1788  		CONT;
1789  	ALU_END_TO_BE:
1790  		switch (IMM) {
1791  		case 16:
1792  			DST = (__force u16) cpu_to_be16(DST);
1793  			break;
1794  		case 32:
1795  			DST = (__force u32) cpu_to_be32(DST);
1796  			break;
1797  		case 64:
1798  			DST = (__force u64) cpu_to_be64(DST);
1799  			break;
1800  		}
1801  		CONT;
1802  	ALU_END_TO_LE:
1803  		switch (IMM) {
1804  		case 16:
1805  			DST = (__force u16) cpu_to_le16(DST);
1806  			break;
1807  		case 32:
1808  			DST = (__force u32) cpu_to_le32(DST);
1809  			break;
1810  		case 64:
1811  			DST = (__force u64) cpu_to_le64(DST);
1812  			break;
1813  		}
1814  		CONT;
1815  
1816  	/* CALL */
1817  	JMP_CALL:
1818  		/* Function call scratches BPF_R1-BPF_R5 registers,
1819  		 * preserves BPF_R6-BPF_R9, and stores return value
1820  		 * into BPF_R0.
1821  		 */
1822  		BPF_R0 = (__bpf_call_base + insn->imm)(BPF_R1, BPF_R2, BPF_R3,
1823  						       BPF_R4, BPF_R5);
1824  		CONT;
1825  
1826  	JMP_CALL_ARGS:
1827  		BPF_R0 = (__bpf_call_base_args + insn->imm)(BPF_R1, BPF_R2,
1828  							    BPF_R3, BPF_R4,
1829  							    BPF_R5,
1830  							    insn + insn->off + 1);
1831  		CONT;
1832  
1833  	JMP_TAIL_CALL: {
1834  		struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2;
1835  		struct bpf_array *array = container_of(map, struct bpf_array, map);
1836  		struct bpf_prog *prog;
1837  		u32 index = BPF_R3;
1838  
1839  		if (unlikely(index >= array->map.max_entries))
1840  			goto out;
1841  
1842  		if (unlikely(tail_call_cnt >= MAX_TAIL_CALL_CNT))
1843  			goto out;
1844  
1845  		tail_call_cnt++;
1846  
1847  		prog = READ_ONCE(array->ptrs[index]);
1848  		if (!prog)
1849  			goto out;
1850  
1851  		/* ARG1 at this point is guaranteed to point to CTX from
1852  		 * the verifier side due to the fact that the tail call is
1853  		 * handled like a helper, that is, bpf_tail_call_proto,
1854  		 * where arg1_type is ARG_PTR_TO_CTX.
1855  		 */
1856  		insn = prog->insnsi;
1857  		goto select_insn;
1858  out:
1859  		CONT;
1860  	}
1861  	JMP_JA:
1862  		insn += insn->off;
1863  		CONT;
1864  	JMP_EXIT:
1865  		return BPF_R0;
1866  	/* JMP */
1867  #define COND_JMP(SIGN, OPCODE, CMP_OP)				\
1868  	JMP_##OPCODE##_X:					\
1869  		if ((SIGN##64) DST CMP_OP (SIGN##64) SRC) {	\
1870  			insn += insn->off;			\
1871  			CONT_JMP;				\
1872  		}						\
1873  		CONT;						\
1874  	JMP32_##OPCODE##_X:					\
1875  		if ((SIGN##32) DST CMP_OP (SIGN##32) SRC) {	\
1876  			insn += insn->off;			\
1877  			CONT_JMP;				\
1878  		}						\
1879  		CONT;						\
1880  	JMP_##OPCODE##_K:					\
1881  		if ((SIGN##64) DST CMP_OP (SIGN##64) IMM) {	\
1882  			insn += insn->off;			\
1883  			CONT_JMP;				\
1884  		}						\
1885  		CONT;						\
1886  	JMP32_##OPCODE##_K:					\
1887  		if ((SIGN##32) DST CMP_OP (SIGN##32) IMM) {	\
1888  			insn += insn->off;			\
1889  			CONT_JMP;				\
1890  		}						\
1891  		CONT;
1892  	COND_JMP(u, JEQ, ==)
1893  	COND_JMP(u, JNE, !=)
1894  	COND_JMP(u, JGT, >)
1895  	COND_JMP(u, JLT, <)
1896  	COND_JMP(u, JGE, >=)
1897  	COND_JMP(u, JLE, <=)
1898  	COND_JMP(u, JSET, &)
1899  	COND_JMP(s, JSGT, >)
1900  	COND_JMP(s, JSLT, <)
1901  	COND_JMP(s, JSGE, >=)
1902  	COND_JMP(s, JSLE, <=)
1903  #undef COND_JMP
1904  	/* ST, STX and LDX*/
1905  	ST_NOSPEC:
1906  		/* Speculation barrier for mitigating Speculative Store Bypass.
1907  		 * In case of arm64, we rely on the firmware mitigation as
1908  		 * controlled via the ssbd kernel parameter. Whenever the
1909  		 * mitigation is enabled, it works for all of the kernel code
1910  		 * with no need to provide any additional instructions here.
1911  		 * In case of x86, we use 'lfence' insn for mitigation. We
1912  		 * reuse preexisting logic from Spectre v1 mitigation that
1913  		 * happens to produce the required code on x86 for v4 as well.
1914  		 */
1915  		barrier_nospec();
1916  		CONT;
1917  #define LDST(SIZEOP, SIZE)						\
1918  	STX_MEM_##SIZEOP:						\
1919  		*(SIZE *)(unsigned long) (DST + insn->off) = SRC;	\
1920  		CONT;							\
1921  	ST_MEM_##SIZEOP:						\
1922  		*(SIZE *)(unsigned long) (DST + insn->off) = IMM;	\
1923  		CONT;							\
1924  	LDX_MEM_##SIZEOP:						\
1925  		DST = *(SIZE *)(unsigned long) (SRC + insn->off);	\
1926  		CONT;							\
1927  	LDX_PROBE_MEM_##SIZEOP:						\
1928  		bpf_probe_read_kernel(&DST, sizeof(SIZE),		\
1929  				      (const void *)(long) (SRC + insn->off));	\
1930  		DST = *((SIZE *)&DST);					\
1931  		CONT;
1932  
1933  	LDST(B,   u8)
1934  	LDST(H,  u16)
1935  	LDST(W,  u32)
1936  	LDST(DW, u64)
1937  #undef LDST
1938  
1939  #define ATOMIC_ALU_OP(BOP, KOP)						\
1940  		case BOP:						\
1941  			if (BPF_SIZE(insn->code) == BPF_W)		\
1942  				atomic_##KOP((u32) SRC, (atomic_t *)(unsigned long) \
1943  					     (DST + insn->off));	\
1944  			else						\
1945  				atomic64_##KOP((u64) SRC, (atomic64_t *)(unsigned long) \
1946  					       (DST + insn->off));	\
1947  			break;						\
1948  		case BOP | BPF_FETCH:					\
1949  			if (BPF_SIZE(insn->code) == BPF_W)		\
1950  				SRC = (u32) atomic_fetch_##KOP(		\
1951  					(u32) SRC,			\
1952  					(atomic_t *)(unsigned long) (DST + insn->off)); \
1953  			else						\
1954  				SRC = (u64) atomic64_fetch_##KOP(	\
1955  					(u64) SRC,			\
1956  					(atomic64_t *)(unsigned long) (DST + insn->off)); \
1957  			break;
1958  
1959  	STX_ATOMIC_DW:
1960  	STX_ATOMIC_W:
1961  		switch (IMM) {
1962  		ATOMIC_ALU_OP(BPF_ADD, add)
1963  		ATOMIC_ALU_OP(BPF_AND, and)
1964  		ATOMIC_ALU_OP(BPF_OR, or)
1965  		ATOMIC_ALU_OP(BPF_XOR, xor)
1966  #undef ATOMIC_ALU_OP
1967  
1968  		case BPF_XCHG:
1969  			if (BPF_SIZE(insn->code) == BPF_W)
1970  				SRC = (u32) atomic_xchg(
1971  					(atomic_t *)(unsigned long) (DST + insn->off),
1972  					(u32) SRC);
1973  			else
1974  				SRC = (u64) atomic64_xchg(
1975  					(atomic64_t *)(unsigned long) (DST + insn->off),
1976  					(u64) SRC);
1977  			break;
1978  		case BPF_CMPXCHG:
1979  			if (BPF_SIZE(insn->code) == BPF_W)
1980  				BPF_R0 = (u32) atomic_cmpxchg(
1981  					(atomic_t *)(unsigned long) (DST + insn->off),
1982  					(u32) BPF_R0, (u32) SRC);
1983  			else
1984  				BPF_R0 = (u64) atomic64_cmpxchg(
1985  					(atomic64_t *)(unsigned long) (DST + insn->off),
1986  					(u64) BPF_R0, (u64) SRC);
1987  			break;
1988  
1989  		default:
1990  			goto default_label;
1991  		}
1992  		CONT;
1993  
1994  	default_label:
1995  		/* If we ever reach this, we have a bug somewhere. Die hard here
1996  		 * instead of just returning 0; we could be somewhere in a subprog,
1997  		 * so execution could continue otherwise which we do /not/ want.
1998  		 *
1999  		 * Note, verifier whitelists all opcodes in bpf_opcode_in_insntable().
2000  		 */
2001  		pr_warn("BPF interpreter: unknown opcode %02x (imm: 0x%x)\n",
2002  			insn->code, insn->imm);
2003  		BUG_ON(1);
2004  		return 0;
2005  }
2006  
2007  #define PROG_NAME(stack_size) __bpf_prog_run##stack_size
2008  #define DEFINE_BPF_PROG_RUN(stack_size) \
2009  static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn *insn) \
2010  { \
2011  	u64 stack[stack_size / sizeof(u64)]; \
2012  	u64 regs[MAX_BPF_EXT_REG] = {}; \
2013  \
2014  	FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
2015  	ARG1 = (u64) (unsigned long) ctx; \
2016  	return ___bpf_prog_run(regs, insn); \
2017  }
2018  
2019  #define PROG_NAME_ARGS(stack_size) __bpf_prog_run_args##stack_size
2020  #define DEFINE_BPF_PROG_RUN_ARGS(stack_size) \
2021  static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \
2022  				      const struct bpf_insn *insn) \
2023  { \
2024  	u64 stack[stack_size / sizeof(u64)]; \
2025  	u64 regs[MAX_BPF_EXT_REG]; \
2026  \
2027  	FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
2028  	BPF_R1 = r1; \
2029  	BPF_R2 = r2; \
2030  	BPF_R3 = r3; \
2031  	BPF_R4 = r4; \
2032  	BPF_R5 = r5; \
2033  	return ___bpf_prog_run(regs, insn); \
2034  }
2035  
2036  #define EVAL1(FN, X) FN(X)
2037  #define EVAL2(FN, X, Y...) FN(X) EVAL1(FN, Y)
2038  #define EVAL3(FN, X, Y...) FN(X) EVAL2(FN, Y)
2039  #define EVAL4(FN, X, Y...) FN(X) EVAL3(FN, Y)
2040  #define EVAL5(FN, X, Y...) FN(X) EVAL4(FN, Y)
2041  #define EVAL6(FN, X, Y...) FN(X) EVAL5(FN, Y)
2042  
2043  EVAL6(DEFINE_BPF_PROG_RUN, 32, 64, 96, 128, 160, 192);
2044  EVAL6(DEFINE_BPF_PROG_RUN, 224, 256, 288, 320, 352, 384);
2045  EVAL4(DEFINE_BPF_PROG_RUN, 416, 448, 480, 512);
2046  
2047  EVAL6(DEFINE_BPF_PROG_RUN_ARGS, 32, 64, 96, 128, 160, 192);
2048  EVAL6(DEFINE_BPF_PROG_RUN_ARGS, 224, 256, 288, 320, 352, 384);
2049  EVAL4(DEFINE_BPF_PROG_RUN_ARGS, 416, 448, 480, 512);
2050  
2051  #define PROG_NAME_LIST(stack_size) PROG_NAME(stack_size),
2052  
2053  static unsigned int (*interpreters[])(const void *ctx,
2054  				      const struct bpf_insn *insn) = {
2055  EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192)
2056  EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
2057  EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
2058  };
2059  #undef PROG_NAME_LIST
2060  #define PROG_NAME_LIST(stack_size) PROG_NAME_ARGS(stack_size),
2061  static u64 (*interpreters_args[])(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5,
2062  				  const struct bpf_insn *insn) = {
2063  EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192)
2064  EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
2065  EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
2066  };
2067  #undef PROG_NAME_LIST
2068  
bpf_patch_call_args(struct bpf_insn * insn,u32 stack_depth)2069  void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth)
2070  {
2071  	stack_depth = max_t(u32, stack_depth, 1);
2072  	insn->off = (s16) insn->imm;
2073  	insn->imm = interpreters_args[(round_up(stack_depth, 32) / 32) - 1] -
2074  		__bpf_call_base_args;
2075  	insn->code = BPF_JMP | BPF_CALL_ARGS;
2076  }
2077  
2078  #else
__bpf_prog_ret0_warn(const void * ctx,const struct bpf_insn * insn)2079  static unsigned int __bpf_prog_ret0_warn(const void *ctx,
2080  					 const struct bpf_insn *insn)
2081  {
2082  	/* If this handler ever gets executed, then BPF_JIT_ALWAYS_ON
2083  	 * is not working properly, so warn about it!
2084  	 */
2085  	WARN_ON_ONCE(1);
2086  	return 0;
2087  }
2088  #endif
2089  
bpf_prog_map_compatible(struct bpf_map * map,const struct bpf_prog * fp)2090  bool bpf_prog_map_compatible(struct bpf_map *map,
2091  			     const struct bpf_prog *fp)
2092  {
2093  	enum bpf_prog_type prog_type = resolve_prog_type(fp);
2094  	bool ret;
2095  
2096  	if (fp->kprobe_override)
2097  		return false;
2098  
2099  	/* XDP programs inserted into maps are not guaranteed to run on
2100  	 * a particular netdev (and can run outside driver context entirely
2101  	 * in the case of devmap and cpumap). Until device checks
2102  	 * are implemented, prohibit adding dev-bound programs to program maps.
2103  	 */
2104  	if (bpf_prog_is_dev_bound(fp->aux))
2105  		return false;
2106  
2107  	spin_lock(&map->owner.lock);
2108  	if (!map->owner.type) {
2109  		/* There's no owner yet where we could check for
2110  		 * compatibility.
2111  		 */
2112  		map->owner.type  = prog_type;
2113  		map->owner.jited = fp->jited;
2114  		map->owner.xdp_has_frags = fp->aux->xdp_has_frags;
2115  		ret = true;
2116  	} else {
2117  		ret = map->owner.type  == prog_type &&
2118  		      map->owner.jited == fp->jited &&
2119  		      map->owner.xdp_has_frags == fp->aux->xdp_has_frags;
2120  	}
2121  	spin_unlock(&map->owner.lock);
2122  
2123  	return ret;
2124  }
2125  
bpf_check_tail_call(const struct bpf_prog * fp)2126  static int bpf_check_tail_call(const struct bpf_prog *fp)
2127  {
2128  	struct bpf_prog_aux *aux = fp->aux;
2129  	int i, ret = 0;
2130  
2131  	mutex_lock(&aux->used_maps_mutex);
2132  	for (i = 0; i < aux->used_map_cnt; i++) {
2133  		struct bpf_map *map = aux->used_maps[i];
2134  
2135  		if (!map_type_contains_progs(map))
2136  			continue;
2137  
2138  		if (!bpf_prog_map_compatible(map, fp)) {
2139  			ret = -EINVAL;
2140  			goto out;
2141  		}
2142  	}
2143  
2144  out:
2145  	mutex_unlock(&aux->used_maps_mutex);
2146  	return ret;
2147  }
2148  
bpf_prog_select_func(struct bpf_prog * fp)2149  static void bpf_prog_select_func(struct bpf_prog *fp)
2150  {
2151  #ifndef CONFIG_BPF_JIT_ALWAYS_ON
2152  	u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
2153  
2154  	fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
2155  #else
2156  	fp->bpf_func = __bpf_prog_ret0_warn;
2157  #endif
2158  }
2159  
2160  /**
2161   *	bpf_prog_select_runtime - select exec runtime for BPF program
2162   *	@fp: bpf_prog populated with BPF program
2163   *	@err: pointer to error variable
2164   *
2165   * Try to JIT eBPF program, if JIT is not available, use interpreter.
2166   * The BPF program will be executed via bpf_prog_run() function.
2167   *
2168   * Return: the &fp argument along with &err set to 0 for success or
2169   * a negative errno code on failure
2170   */
bpf_prog_select_runtime(struct bpf_prog * fp,int * err)2171  struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
2172  {
2173  	/* In case of BPF to BPF calls, verifier did all the prep
2174  	 * work with regards to JITing, etc.
2175  	 */
2176  	bool jit_needed = false;
2177  
2178  	if (fp->bpf_func)
2179  		goto finalize;
2180  
2181  	if (IS_ENABLED(CONFIG_BPF_JIT_ALWAYS_ON) ||
2182  	    bpf_prog_has_kfunc_call(fp))
2183  		jit_needed = true;
2184  
2185  	bpf_prog_select_func(fp);
2186  
2187  	/* eBPF JITs can rewrite the program in case constant
2188  	 * blinding is active. However, in case of error during
2189  	 * blinding, bpf_int_jit_compile() must always return a
2190  	 * valid program, which in this case would simply not
2191  	 * be JITed, but falls back to the interpreter.
2192  	 */
2193  	if (!bpf_prog_is_offloaded(fp->aux)) {
2194  		*err = bpf_prog_alloc_jited_linfo(fp);
2195  		if (*err)
2196  			return fp;
2197  
2198  		fp = bpf_int_jit_compile(fp);
2199  		bpf_prog_jit_attempt_done(fp);
2200  		if (!fp->jited && jit_needed) {
2201  			*err = -ENOTSUPP;
2202  			return fp;
2203  		}
2204  	} else {
2205  		*err = bpf_prog_offload_compile(fp);
2206  		if (*err)
2207  			return fp;
2208  	}
2209  
2210  finalize:
2211  	bpf_prog_lock_ro(fp);
2212  
2213  	/* The tail call compatibility check can only be done at
2214  	 * this late stage as we need to determine, if we deal
2215  	 * with JITed or non JITed program concatenations and not
2216  	 * all eBPF JITs might immediately support all features.
2217  	 */
2218  	*err = bpf_check_tail_call(fp);
2219  
2220  	return fp;
2221  }
2222  EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
2223  
__bpf_prog_ret1(const void * ctx,const struct bpf_insn * insn)2224  static unsigned int __bpf_prog_ret1(const void *ctx,
2225  				    const struct bpf_insn *insn)
2226  {
2227  	return 1;
2228  }
2229  
2230  static struct bpf_prog_dummy {
2231  	struct bpf_prog prog;
2232  } dummy_bpf_prog = {
2233  	.prog = {
2234  		.bpf_func = __bpf_prog_ret1,
2235  	},
2236  };
2237  
2238  struct bpf_empty_prog_array bpf_empty_prog_array = {
2239  	.null_prog = NULL,
2240  };
2241  EXPORT_SYMBOL(bpf_empty_prog_array);
2242  
bpf_prog_array_alloc(u32 prog_cnt,gfp_t flags)2243  struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
2244  {
2245  	if (prog_cnt)
2246  		return kzalloc(sizeof(struct bpf_prog_array) +
2247  			       sizeof(struct bpf_prog_array_item) *
2248  			       (prog_cnt + 1),
2249  			       flags);
2250  
2251  	return &bpf_empty_prog_array.hdr;
2252  }
2253  
bpf_prog_array_free(struct bpf_prog_array * progs)2254  void bpf_prog_array_free(struct bpf_prog_array *progs)
2255  {
2256  	if (!progs || progs == &bpf_empty_prog_array.hdr)
2257  		return;
2258  	kfree_rcu(progs, rcu);
2259  }
2260  
__bpf_prog_array_free_sleepable_cb(struct rcu_head * rcu)2261  static void __bpf_prog_array_free_sleepable_cb(struct rcu_head *rcu)
2262  {
2263  	struct bpf_prog_array *progs;
2264  
2265  	/* If RCU Tasks Trace grace period implies RCU grace period, there is
2266  	 * no need to call kfree_rcu(), just call kfree() directly.
2267  	 */
2268  	progs = container_of(rcu, struct bpf_prog_array, rcu);
2269  	if (rcu_trace_implies_rcu_gp())
2270  		kfree(progs);
2271  	else
2272  		kfree_rcu(progs, rcu);
2273  }
2274  
bpf_prog_array_free_sleepable(struct bpf_prog_array * progs)2275  void bpf_prog_array_free_sleepable(struct bpf_prog_array *progs)
2276  {
2277  	if (!progs || progs == &bpf_empty_prog_array.hdr)
2278  		return;
2279  	call_rcu_tasks_trace(&progs->rcu, __bpf_prog_array_free_sleepable_cb);
2280  }
2281  
bpf_prog_array_length(struct bpf_prog_array * array)2282  int bpf_prog_array_length(struct bpf_prog_array *array)
2283  {
2284  	struct bpf_prog_array_item *item;
2285  	u32 cnt = 0;
2286  
2287  	for (item = array->items; item->prog; item++)
2288  		if (item->prog != &dummy_bpf_prog.prog)
2289  			cnt++;
2290  	return cnt;
2291  }
2292  
bpf_prog_array_is_empty(struct bpf_prog_array * array)2293  bool bpf_prog_array_is_empty(struct bpf_prog_array *array)
2294  {
2295  	struct bpf_prog_array_item *item;
2296  
2297  	for (item = array->items; item->prog; item++)
2298  		if (item->prog != &dummy_bpf_prog.prog)
2299  			return false;
2300  	return true;
2301  }
2302  
bpf_prog_array_copy_core(struct bpf_prog_array * array,u32 * prog_ids,u32 request_cnt)2303  static bool bpf_prog_array_copy_core(struct bpf_prog_array *array,
2304  				     u32 *prog_ids,
2305  				     u32 request_cnt)
2306  {
2307  	struct bpf_prog_array_item *item;
2308  	int i = 0;
2309  
2310  	for (item = array->items; item->prog; item++) {
2311  		if (item->prog == &dummy_bpf_prog.prog)
2312  			continue;
2313  		prog_ids[i] = item->prog->aux->id;
2314  		if (++i == request_cnt) {
2315  			item++;
2316  			break;
2317  		}
2318  	}
2319  
2320  	return !!(item->prog);
2321  }
2322  
bpf_prog_array_copy_to_user(struct bpf_prog_array * array,__u32 __user * prog_ids,u32 cnt)2323  int bpf_prog_array_copy_to_user(struct bpf_prog_array *array,
2324  				__u32 __user *prog_ids, u32 cnt)
2325  {
2326  	unsigned long err = 0;
2327  	bool nospc;
2328  	u32 *ids;
2329  
2330  	/* users of this function are doing:
2331  	 * cnt = bpf_prog_array_length();
2332  	 * if (cnt > 0)
2333  	 *     bpf_prog_array_copy_to_user(..., cnt);
2334  	 * so below kcalloc doesn't need extra cnt > 0 check.
2335  	 */
2336  	ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN);
2337  	if (!ids)
2338  		return -ENOMEM;
2339  	nospc = bpf_prog_array_copy_core(array, ids, cnt);
2340  	err = copy_to_user(prog_ids, ids, cnt * sizeof(u32));
2341  	kfree(ids);
2342  	if (err)
2343  		return -EFAULT;
2344  	if (nospc)
2345  		return -ENOSPC;
2346  	return 0;
2347  }
2348  
bpf_prog_array_delete_safe(struct bpf_prog_array * array,struct bpf_prog * old_prog)2349  void bpf_prog_array_delete_safe(struct bpf_prog_array *array,
2350  				struct bpf_prog *old_prog)
2351  {
2352  	struct bpf_prog_array_item *item;
2353  
2354  	for (item = array->items; item->prog; item++)
2355  		if (item->prog == old_prog) {
2356  			WRITE_ONCE(item->prog, &dummy_bpf_prog.prog);
2357  			break;
2358  		}
2359  }
2360  
2361  /**
2362   * bpf_prog_array_delete_safe_at() - Replaces the program at the given
2363   *                                   index into the program array with
2364   *                                   a dummy no-op program.
2365   * @array: a bpf_prog_array
2366   * @index: the index of the program to replace
2367   *
2368   * Skips over dummy programs, by not counting them, when calculating
2369   * the position of the program to replace.
2370   *
2371   * Return:
2372   * * 0		- Success
2373   * * -EINVAL	- Invalid index value. Must be a non-negative integer.
2374   * * -ENOENT	- Index out of range
2375   */
bpf_prog_array_delete_safe_at(struct bpf_prog_array * array,int index)2376  int bpf_prog_array_delete_safe_at(struct bpf_prog_array *array, int index)
2377  {
2378  	return bpf_prog_array_update_at(array, index, &dummy_bpf_prog.prog);
2379  }
2380  
2381  /**
2382   * bpf_prog_array_update_at() - Updates the program at the given index
2383   *                              into the program array.
2384   * @array: a bpf_prog_array
2385   * @index: the index of the program to update
2386   * @prog: the program to insert into the array
2387   *
2388   * Skips over dummy programs, by not counting them, when calculating
2389   * the position of the program to update.
2390   *
2391   * Return:
2392   * * 0		- Success
2393   * * -EINVAL	- Invalid index value. Must be a non-negative integer.
2394   * * -ENOENT	- Index out of range
2395   */
bpf_prog_array_update_at(struct bpf_prog_array * array,int index,struct bpf_prog * prog)2396  int bpf_prog_array_update_at(struct bpf_prog_array *array, int index,
2397  			     struct bpf_prog *prog)
2398  {
2399  	struct bpf_prog_array_item *item;
2400  
2401  	if (unlikely(index < 0))
2402  		return -EINVAL;
2403  
2404  	for (item = array->items; item->prog; item++) {
2405  		if (item->prog == &dummy_bpf_prog.prog)
2406  			continue;
2407  		if (!index) {
2408  			WRITE_ONCE(item->prog, prog);
2409  			return 0;
2410  		}
2411  		index--;
2412  	}
2413  	return -ENOENT;
2414  }
2415  
bpf_prog_array_copy(struct bpf_prog_array * old_array,struct bpf_prog * exclude_prog,struct bpf_prog * include_prog,u64 bpf_cookie,struct bpf_prog_array ** new_array)2416  int bpf_prog_array_copy(struct bpf_prog_array *old_array,
2417  			struct bpf_prog *exclude_prog,
2418  			struct bpf_prog *include_prog,
2419  			u64 bpf_cookie,
2420  			struct bpf_prog_array **new_array)
2421  {
2422  	int new_prog_cnt, carry_prog_cnt = 0;
2423  	struct bpf_prog_array_item *existing, *new;
2424  	struct bpf_prog_array *array;
2425  	bool found_exclude = false;
2426  
2427  	/* Figure out how many existing progs we need to carry over to
2428  	 * the new array.
2429  	 */
2430  	if (old_array) {
2431  		existing = old_array->items;
2432  		for (; existing->prog; existing++) {
2433  			if (existing->prog == exclude_prog) {
2434  				found_exclude = true;
2435  				continue;
2436  			}
2437  			if (existing->prog != &dummy_bpf_prog.prog)
2438  				carry_prog_cnt++;
2439  			if (existing->prog == include_prog)
2440  				return -EEXIST;
2441  		}
2442  	}
2443  
2444  	if (exclude_prog && !found_exclude)
2445  		return -ENOENT;
2446  
2447  	/* How many progs (not NULL) will be in the new array? */
2448  	new_prog_cnt = carry_prog_cnt;
2449  	if (include_prog)
2450  		new_prog_cnt += 1;
2451  
2452  	/* Do we have any prog (not NULL) in the new array? */
2453  	if (!new_prog_cnt) {
2454  		*new_array = NULL;
2455  		return 0;
2456  	}
2457  
2458  	/* +1 as the end of prog_array is marked with NULL */
2459  	array = bpf_prog_array_alloc(new_prog_cnt + 1, GFP_KERNEL);
2460  	if (!array)
2461  		return -ENOMEM;
2462  	new = array->items;
2463  
2464  	/* Fill in the new prog array */
2465  	if (carry_prog_cnt) {
2466  		existing = old_array->items;
2467  		for (; existing->prog; existing++) {
2468  			if (existing->prog == exclude_prog ||
2469  			    existing->prog == &dummy_bpf_prog.prog)
2470  				continue;
2471  
2472  			new->prog = existing->prog;
2473  			new->bpf_cookie = existing->bpf_cookie;
2474  			new++;
2475  		}
2476  	}
2477  	if (include_prog) {
2478  		new->prog = include_prog;
2479  		new->bpf_cookie = bpf_cookie;
2480  		new++;
2481  	}
2482  	new->prog = NULL;
2483  	*new_array = array;
2484  	return 0;
2485  }
2486  
bpf_prog_array_copy_info(struct bpf_prog_array * array,u32 * prog_ids,u32 request_cnt,u32 * prog_cnt)2487  int bpf_prog_array_copy_info(struct bpf_prog_array *array,
2488  			     u32 *prog_ids, u32 request_cnt,
2489  			     u32 *prog_cnt)
2490  {
2491  	u32 cnt = 0;
2492  
2493  	if (array)
2494  		cnt = bpf_prog_array_length(array);
2495  
2496  	*prog_cnt = cnt;
2497  
2498  	/* return early if user requested only program count or nothing to copy */
2499  	if (!request_cnt || !cnt)
2500  		return 0;
2501  
2502  	/* this function is called under trace/bpf_trace.c: bpf_event_mutex */
2503  	return bpf_prog_array_copy_core(array, prog_ids, request_cnt) ? -ENOSPC
2504  								     : 0;
2505  }
2506  
__bpf_free_used_maps(struct bpf_prog_aux * aux,struct bpf_map ** used_maps,u32 len)2507  void __bpf_free_used_maps(struct bpf_prog_aux *aux,
2508  			  struct bpf_map **used_maps, u32 len)
2509  {
2510  	struct bpf_map *map;
2511  	u32 i;
2512  
2513  	for (i = 0; i < len; i++) {
2514  		map = used_maps[i];
2515  		if (map->ops->map_poke_untrack)
2516  			map->ops->map_poke_untrack(map, aux);
2517  		bpf_map_put(map);
2518  	}
2519  }
2520  
bpf_free_used_maps(struct bpf_prog_aux * aux)2521  static void bpf_free_used_maps(struct bpf_prog_aux *aux)
2522  {
2523  	__bpf_free_used_maps(aux, aux->used_maps, aux->used_map_cnt);
2524  	kfree(aux->used_maps);
2525  }
2526  
__bpf_free_used_btfs(struct bpf_prog_aux * aux,struct btf_mod_pair * used_btfs,u32 len)2527  void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
2528  			  struct btf_mod_pair *used_btfs, u32 len)
2529  {
2530  #ifdef CONFIG_BPF_SYSCALL
2531  	struct btf_mod_pair *btf_mod;
2532  	u32 i;
2533  
2534  	for (i = 0; i < len; i++) {
2535  		btf_mod = &used_btfs[i];
2536  		if (btf_mod->module)
2537  			module_put(btf_mod->module);
2538  		btf_put(btf_mod->btf);
2539  	}
2540  #endif
2541  }
2542  
bpf_free_used_btfs(struct bpf_prog_aux * aux)2543  static void bpf_free_used_btfs(struct bpf_prog_aux *aux)
2544  {
2545  	__bpf_free_used_btfs(aux, aux->used_btfs, aux->used_btf_cnt);
2546  	kfree(aux->used_btfs);
2547  }
2548  
bpf_prog_free_deferred(struct work_struct * work)2549  static void bpf_prog_free_deferred(struct work_struct *work)
2550  {
2551  	struct bpf_prog_aux *aux;
2552  	int i;
2553  
2554  	aux = container_of(work, struct bpf_prog_aux, work);
2555  #ifdef CONFIG_BPF_SYSCALL
2556  	bpf_free_kfunc_btf_tab(aux->kfunc_btf_tab);
2557  #endif
2558  #ifdef CONFIG_CGROUP_BPF
2559  	if (aux->cgroup_atype != CGROUP_BPF_ATTACH_TYPE_INVALID)
2560  		bpf_cgroup_atype_put(aux->cgroup_atype);
2561  #endif
2562  	bpf_free_used_maps(aux);
2563  	bpf_free_used_btfs(aux);
2564  	if (bpf_prog_is_dev_bound(aux))
2565  		bpf_prog_dev_bound_destroy(aux->prog);
2566  #ifdef CONFIG_PERF_EVENTS
2567  	if (aux->prog->has_callchain_buf)
2568  		put_callchain_buffers();
2569  #endif
2570  	if (aux->dst_trampoline)
2571  		bpf_trampoline_put(aux->dst_trampoline);
2572  	for (i = 0; i < aux->func_cnt; i++) {
2573  		/* We can just unlink the subprog poke descriptor table as
2574  		 * it was originally linked to the main program and is also
2575  		 * released along with it.
2576  		 */
2577  		aux->func[i]->aux->poke_tab = NULL;
2578  		bpf_jit_free(aux->func[i]);
2579  	}
2580  	if (aux->func_cnt) {
2581  		kfree(aux->func);
2582  		bpf_prog_unlock_free(aux->prog);
2583  	} else {
2584  		bpf_jit_free(aux->prog);
2585  	}
2586  }
2587  
bpf_prog_free(struct bpf_prog * fp)2588  void bpf_prog_free(struct bpf_prog *fp)
2589  {
2590  	struct bpf_prog_aux *aux = fp->aux;
2591  
2592  	if (aux->dst_prog)
2593  		bpf_prog_put(aux->dst_prog);
2594  	INIT_WORK(&aux->work, bpf_prog_free_deferred);
2595  	schedule_work(&aux->work);
2596  }
2597  EXPORT_SYMBOL_GPL(bpf_prog_free);
2598  
2599  /* RNG for unpriviledged user space with separated state from prandom_u32(). */
2600  static DEFINE_PER_CPU(struct rnd_state, bpf_user_rnd_state);
2601  
bpf_user_rnd_init_once(void)2602  void bpf_user_rnd_init_once(void)
2603  {
2604  	prandom_init_once(&bpf_user_rnd_state);
2605  }
2606  
BPF_CALL_0(bpf_user_rnd_u32)2607  BPF_CALL_0(bpf_user_rnd_u32)
2608  {
2609  	/* Should someone ever have the rather unwise idea to use some
2610  	 * of the registers passed into this function, then note that
2611  	 * this function is called from native eBPF and classic-to-eBPF
2612  	 * transformations. Register assignments from both sides are
2613  	 * different, f.e. classic always sets fn(ctx, A, X) here.
2614  	 */
2615  	struct rnd_state *state;
2616  	u32 res;
2617  
2618  	state = &get_cpu_var(bpf_user_rnd_state);
2619  	res = prandom_u32_state(state);
2620  	put_cpu_var(bpf_user_rnd_state);
2621  
2622  	return res;
2623  }
2624  
BPF_CALL_0(bpf_get_raw_cpu_id)2625  BPF_CALL_0(bpf_get_raw_cpu_id)
2626  {
2627  	return raw_smp_processor_id();
2628  }
2629  
2630  /* Weak definitions of helper functions in case we don't have bpf syscall. */
2631  const struct bpf_func_proto bpf_map_lookup_elem_proto __weak;
2632  const struct bpf_func_proto bpf_map_update_elem_proto __weak;
2633  const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
2634  const struct bpf_func_proto bpf_map_push_elem_proto __weak;
2635  const struct bpf_func_proto bpf_map_pop_elem_proto __weak;
2636  const struct bpf_func_proto bpf_map_peek_elem_proto __weak;
2637  const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto __weak;
2638  const struct bpf_func_proto bpf_spin_lock_proto __weak;
2639  const struct bpf_func_proto bpf_spin_unlock_proto __weak;
2640  const struct bpf_func_proto bpf_jiffies64_proto __weak;
2641  
2642  const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
2643  const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
2644  const struct bpf_func_proto bpf_get_numa_node_id_proto __weak;
2645  const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
2646  const struct bpf_func_proto bpf_ktime_get_boot_ns_proto __weak;
2647  const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto __weak;
2648  const struct bpf_func_proto bpf_ktime_get_tai_ns_proto __weak;
2649  
2650  const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
2651  const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
2652  const struct bpf_func_proto bpf_get_current_comm_proto __weak;
2653  const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
2654  const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto __weak;
2655  const struct bpf_func_proto bpf_get_local_storage_proto __weak;
2656  const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak;
2657  const struct bpf_func_proto bpf_snprintf_btf_proto __weak;
2658  const struct bpf_func_proto bpf_seq_printf_btf_proto __weak;
2659  const struct bpf_func_proto bpf_set_retval_proto __weak;
2660  const struct bpf_func_proto bpf_get_retval_proto __weak;
2661  
bpf_get_trace_printk_proto(void)2662  const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
2663  {
2664  	return NULL;
2665  }
2666  
bpf_get_trace_vprintk_proto(void)2667  const struct bpf_func_proto * __weak bpf_get_trace_vprintk_proto(void)
2668  {
2669  	return NULL;
2670  }
2671  
2672  u64 __weak
bpf_event_output(struct bpf_map * map,u64 flags,void * meta,u64 meta_size,void * ctx,u64 ctx_size,bpf_ctx_copy_t ctx_copy)2673  bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
2674  		 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
2675  {
2676  	return -ENOTSUPP;
2677  }
2678  EXPORT_SYMBOL_GPL(bpf_event_output);
2679  
2680  /* Always built-in helper functions. */
2681  const struct bpf_func_proto bpf_tail_call_proto = {
2682  	.func		= NULL,
2683  	.gpl_only	= false,
2684  	.ret_type	= RET_VOID,
2685  	.arg1_type	= ARG_PTR_TO_CTX,
2686  	.arg2_type	= ARG_CONST_MAP_PTR,
2687  	.arg3_type	= ARG_ANYTHING,
2688  };
2689  
2690  /* Stub for JITs that only support cBPF. eBPF programs are interpreted.
2691   * It is encouraged to implement bpf_int_jit_compile() instead, so that
2692   * eBPF and implicitly also cBPF can get JITed!
2693   */
bpf_int_jit_compile(struct bpf_prog * prog)2694  struct bpf_prog * __weak bpf_int_jit_compile(struct bpf_prog *prog)
2695  {
2696  	return prog;
2697  }
2698  
2699  /* Stub for JITs that support eBPF. All cBPF code gets transformed into
2700   * eBPF by the kernel and is later compiled by bpf_int_jit_compile().
2701   */
bpf_jit_compile(struct bpf_prog * prog)2702  void __weak bpf_jit_compile(struct bpf_prog *prog)
2703  {
2704  }
2705  
bpf_helper_changes_pkt_data(void * func)2706  bool __weak bpf_helper_changes_pkt_data(void *func)
2707  {
2708  	return false;
2709  }
2710  
2711  /* Return TRUE if the JIT backend wants verifier to enable sub-register usage
2712   * analysis code and wants explicit zero extension inserted by verifier.
2713   * Otherwise, return FALSE.
2714   *
2715   * The verifier inserts an explicit zero extension after BPF_CMPXCHGs even if
2716   * you don't override this. JITs that don't want these extra insns can detect
2717   * them using insn_is_zext.
2718   */
bpf_jit_needs_zext(void)2719  bool __weak bpf_jit_needs_zext(void)
2720  {
2721  	return false;
2722  }
2723  
2724  /* Return TRUE if the JIT backend supports mixing bpf2bpf and tailcalls. */
bpf_jit_supports_subprog_tailcalls(void)2725  bool __weak bpf_jit_supports_subprog_tailcalls(void)
2726  {
2727  	return false;
2728  }
2729  
bpf_jit_supports_kfunc_call(void)2730  bool __weak bpf_jit_supports_kfunc_call(void)
2731  {
2732  	return false;
2733  }
2734  
2735  /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
2736   * skb_copy_bits(), so provide a weak definition of it for NET-less config.
2737   */
skb_copy_bits(const struct sk_buff * skb,int offset,void * to,int len)2738  int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
2739  			 int len)
2740  {
2741  	return -EFAULT;
2742  }
2743  
bpf_arch_text_poke(void * ip,enum bpf_text_poke_type t,void * addr1,void * addr2)2744  int __weak bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
2745  			      void *addr1, void *addr2)
2746  {
2747  	return -ENOTSUPP;
2748  }
2749  
bpf_arch_text_copy(void * dst,void * src,size_t len)2750  void * __weak bpf_arch_text_copy(void *dst, void *src, size_t len)
2751  {
2752  	return ERR_PTR(-ENOTSUPP);
2753  }
2754  
bpf_arch_text_invalidate(void * dst,size_t len)2755  int __weak bpf_arch_text_invalidate(void *dst, size_t len)
2756  {
2757  	return -ENOTSUPP;
2758  }
2759  
2760  #ifdef CONFIG_BPF_SYSCALL
bpf_global_ma_init(void)2761  static int __init bpf_global_ma_init(void)
2762  {
2763  	int ret;
2764  
2765  	ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false);
2766  	bpf_global_ma_set = !ret;
2767  	return ret;
2768  }
2769  late_initcall(bpf_global_ma_init);
2770  #endif
2771  
2772  DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
2773  EXPORT_SYMBOL(bpf_stats_enabled_key);
2774  
2775  /* All definitions of tracepoints related to BPF. */
2776  #define CREATE_TRACE_POINTS
2777  #include <linux/bpf_trace.h>
2778  
2779  EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception);
2780  EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_bulk_tx);
2781