1
0

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next

Pull networking updates from David Miller:
 "Here we go, another merge window full of networking and #ebpf changes:

   1) Snoop DHCPACKS in batman-adv to learn MAC/IP pairs in the DHCP
      range without dealing with floods of ARP traffic, from Linus
      Lüssing.

   2) Throttle buffered multicast packet transmission in mt76, from
      Felix Fietkau.

   3) Support adaptive interrupt moderation in ice, from Brett Creeley.

   4) A lot of struct_size conversions, from Gustavo A. R. Silva.

   5) Add peek/push/pop commands to bpftool, as well as bash completion,
      from Stanislav Fomichev.

   6) Optimize sk_msg_clone(), from Vakul Garg.

   7) Add SO_BINDTOIFINDEX, from David Herrmann.

   8) Be more conservative with local resends due to local congestion,
      from Yuchung Cheng.

   9) Allow vetoing of unsupported VXLAN FDBs, from Petr Machata.

  10) Add health buffer support to devlink, from Eran Ben Elisha.

  11) Add TXQ scheduling API to mac80211, from Toke Høiland-Jørgensen.

  12) Add statistics to basic packet scheduler filter, from Cong Wang.

  13) Add GRE tunnel support for mlxsw Spectrum-2, from Nir Dotan.

  14) Lots of new IP tunneling forwarding tests, also from Nir Dotan.

  15) Add 3ad stats to bonding, from Nikolay Aleksandrov.

  16) Lots of probing improvements for bpftool, from Quentin Monnet.

  17) Various nfp drive #ebpf JIT improvements from Jakub Kicinski.

  18) Allow #ebpf programs to access gso_segs from skb shared info, from
      Eric Dumazet.

  19) Add sock_diag support for AF_XDP sockets, from Björn Töpel.

  20) Support 22260 iwlwifi devices, from Luca Coelho.

  21) Use rbtree for ipv6 defragmentation, from Peter Oskolkov.

  22) Add JMP32 instruction class support to #ebpf, from Jiong Wang.

  23) Add spinlock support to #ebpf, from Alexei Starovoitov.

  24) Support 256-bit keys and TLS 1.3 in ktls, from Dave Watson.

  25) Add device infomation API to devlink, from Jakub Kicinski.

  26) Add new timestamping socket options which are y2038 safe, from
      Deepa Dinamani.

  27) Add RX checksum offloading for various sh_eth chips, from Sergei
      Shtylyov.

  28) Flow offload infrastructure, from Pablo Neira Ayuso.

  29) Numerous cleanups, improvements, and bug fixes to the PHY layer
      and many drivers from Heiner Kallweit.

  30) Lots of changes to try and make packet scheduler classifiers run
      lockless as much as possible, from Vlad Buslov.

  31) Support BCM957504 chip in bnxt_en driver, from Erik Burrows.

  32) Add concurrency tests to tc-tests infrastructure, from Vlad
      Buslov.

  33) Add hwmon support to aquantia, from Heiner Kallweit.

  34) Allow 64-bit values for SO_MAX_PACING_RATE, from Eric Dumazet.

  And I would be remiss if I didn't thank the various major networking
  subsystem maintainers for integrating much of this work before I even
  saw it. Alexei Starovoitov, Daniel Borkmann, Pablo Neira Ayuso,
  Johannes Berg, Kalle Valo, and many others. Thank you!"

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2207 commits)
  net/sched: avoid unused-label warning
  net: ignore sysctl_devconf_inherit_init_net without SYSCTL
  phy: mdio-mux: fix Kconfig dependencies
  net: phy: use phy_modify_mmd_changed in genphy_c45_an_config_aneg
  net: dsa: mv88e6xxx: add call to mv88e6xxx_ports_cmode_init to probe for new DSA framework
  selftest/net: Remove duplicate header
  sky2: Disable MSI on Dell Inspiron 1545 and Gateway P-79
  net/mlx5e: Update tx reporter status in case channels were successfully opened
  devlink: Add support for direct reporter health state update
  devlink: Update reporter state to error even if recover aborted
  sctp: call iov_iter_revert() after sending ABORT
  team: Free BPF filter when unregistering netdev
  ip6mr: Do not call __IP6_INC_STATS() from preemptible context
  isdn: mISDN: Fix potential NULL pointer dereference of kzalloc
  net: dsa: mv88e6xxx: support in-band signalling on SGMII ports with external PHYs
  cxgb4/chtls: Prefix adapter flags with CXGB4
  net-sysfs: Switch to bitmap_zalloc()
  mellanox: Switch to bitmap_zalloc()
  bpf: add test cases for non-pointer sanitiation logic
  mlxsw: i2c: Extend initialization by querying resources data
  ...
This commit is contained in:
Linus Torvalds
2019-03-05 08:26:13 -08:00
2150 changed files with 112827 additions and 58173 deletions

View File

@@ -1,7 +1,6 @@
cpustat
fds_example
lathist
load_sock_ops
lwt_len_hist
map_perf_test
offwaketime

View File

@@ -40,7 +40,6 @@ hostprogs-y += lwt_len_hist
hostprogs-y += xdp_tx_iptunnel
hostprogs-y += test_map_in_map
hostprogs-y += per_socket_stats_example
hostprogs-y += load_sock_ops
hostprogs-y += xdp_redirect
hostprogs-y += xdp_redirect_map
hostprogs-y += xdp_redirect_cpu
@@ -53,6 +52,7 @@ hostprogs-y += xdpsock
hostprogs-y += xdp_fwd
hostprogs-y += task_fd_query
hostprogs-y += xdp_sample_pkts
hostprogs-y += hbm
# Libbpf dependencies
LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -60,9 +60,9 @@ LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o
TRACE_HELPERS := ../../tools/testing/selftests/bpf/trace_helpers.o
fds_example-objs := bpf_load.o fds_example.o
sockex1-objs := bpf_load.o sockex1_user.o
sockex2-objs := bpf_load.o sockex2_user.o
fds_example-objs := fds_example.o
sockex1-objs := sockex1_user.o
sockex2-objs := sockex2_user.o
sockex3-objs := bpf_load.o sockex3_user.o
tracex1-objs := bpf_load.o tracex1_user.o
tracex2-objs := bpf_load.o tracex2_user.o
@@ -71,7 +71,6 @@ tracex4-objs := bpf_load.o tracex4_user.o
tracex5-objs := bpf_load.o tracex5_user.o
tracex6-objs := bpf_load.o tracex6_user.o
tracex7-objs := bpf_load.o tracex7_user.o
load_sock_ops-objs := bpf_load.o load_sock_ops.o
test_probe_write_user-objs := bpf_load.o test_probe_write_user_user.o
trace_output-objs := bpf_load.o trace_output_user.o $(TRACE_HELPERS)
lathist-objs := bpf_load.o lathist_user.o
@@ -87,18 +86,18 @@ test_cgrp2_sock2-objs := bpf_load.o test_cgrp2_sock2.o
xdp1-objs := xdp1_user.o
# reuse xdp1 source intentionally
xdp2-objs := xdp1_user.o
xdp_router_ipv4-objs := bpf_load.o xdp_router_ipv4_user.o
xdp_router_ipv4-objs := xdp_router_ipv4_user.o
test_current_task_under_cgroup-objs := bpf_load.o $(CGROUP_HELPERS) \
test_current_task_under_cgroup_user.o
trace_event-objs := bpf_load.o trace_event_user.o $(TRACE_HELPERS)
sampleip-objs := bpf_load.o sampleip_user.o $(TRACE_HELPERS)
tc_l2_redirect-objs := bpf_load.o tc_l2_redirect_user.o
lwt_len_hist-objs := bpf_load.o lwt_len_hist_user.o
xdp_tx_iptunnel-objs := bpf_load.o xdp_tx_iptunnel_user.o
xdp_tx_iptunnel-objs := xdp_tx_iptunnel_user.o
test_map_in_map-objs := bpf_load.o test_map_in_map_user.o
per_socket_stats_example-objs := cookie_uid_helper_example.o
xdp_redirect-objs := bpf_load.o xdp_redirect_user.o
xdp_redirect_map-objs := bpf_load.o xdp_redirect_map_user.o
xdp_redirect-objs := xdp_redirect_user.o
xdp_redirect_map-objs := xdp_redirect_map_user.o
xdp_redirect_cpu-objs := bpf_load.o xdp_redirect_cpu_user.o
xdp_monitor-objs := bpf_load.o xdp_monitor_user.o
xdp_rxq_info-objs := xdp_rxq_info_user.o
@@ -109,6 +108,7 @@ xdpsock-objs := xdpsock_user.o
xdp_fwd-objs := xdp_fwd_user.o
task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS)
hbm-objs := bpf_load.o hbm.o $(CGROUP_HELPERS)
# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -163,10 +163,10 @@ always += xdp2skb_meta_kern.o
always += syscall_tp_kern.o
always += cpustat_kern.o
always += xdp_adjust_tail_kern.o
always += xdpsock_kern.o
always += xdp_fwd_kern.o
always += task_fd_query_kern.o
always += xdp_sample_pkts_kern.o
always += hbm_out_kern.o
KBUILD_HOSTCFLAGS += -I$(objtree)/usr/include
KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -266,6 +266,8 @@ $(BPF_SAMPLES_PATH)/*.c: verify_target_bpf $(LIBBPF)
$(src)/*.c: verify_target_bpf $(LIBBPF)
$(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h
$(obj)/hbm_out_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
$(obj)/hbm.o: $(src)/hbm.h
# asm/sysreg.h - inline assembly used by it is incompatible with llvm.
# But, there is no easy way to fix it, so just exclude it since it is

View File

@@ -164,6 +164,16 @@ struct bpf_insn;
.off = OFF, \
.imm = 0 })
/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */
#define BPF_JMP32_REG(OP, DST, SRC, OFF) \
((struct bpf_insn) { \
.code = BPF_JMP32 | BPF_OP(OP) | BPF_X, \
.dst_reg = DST, \
.src_reg = SRC, \
.off = OFF, \
.imm = 0 })
/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
#define BPF_JMP_IMM(OP, DST, IMM, OFF) \
@@ -174,6 +184,16 @@ struct bpf_insn;
.off = OFF, \
.imm = IMM })
/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */
#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \
((struct bpf_insn) { \
.code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \
.dst_reg = DST, \
.src_reg = 0, \
.off = OFF, \
.imm = IMM })
/* Raw code statement block */
#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \

436
samples/bpf/do_hbm_test.sh Executable file
View File

@@ -0,0 +1,436 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# Copyright (c) 2019 Facebook
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of version 2 of the GNU General Public
# License as published by the Free Software Foundation.
Usage() {
echo "Script for testing HBM (Host Bandwidth Manager) framework."
echo "It creates a cgroup to use for testing and load a BPF program to limit"
echo "egress or ingress bandwidht. It then uses iperf3 or netperf to create"
echo "loads. The output is the goodput in Mbps (unless -D was used)."
echo ""
echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>] [-D]"
echo " [-d=<delay>|--delay=<delay>] [--debug] [-E]"
echo " [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]"
echo " [-l] [-N] [-p=<port>|--port=<port>] [-P]"
echo " [-q=<qdisc>] [-R] [-s=<server>|--server=<server]"
echo " [-S|--stats] -t=<time>|--time=<time>] [-w] [cubic|dctcp]"
echo " Where:"
echo " out egress (default)"
echo " -b or --bpf BPF program filename to load and attach."
echo " Default is hbm_out_kern.o for egress,"
echo " -c or -cc TCP congestion control (cubic or dctcp)"
echo " --debug print BPF trace buffer"
echo " -d or --delay add a delay in ms using netem"
echo " -D In addition to the goodput in Mbps, it also outputs"
echo " other detailed information. This information is"
echo " test dependent (i.e. iperf3 or netperf)."
echo " -E enable ECN (not required for dctcp)"
echo " -f or --flows number of concurrent flows (default=1)"
echo " -i or --id cgroup id (an integer, default is 1)"
echo " -N use netperf instead of iperf3"
echo " -l do not limit flows using loopback"
echo " -h Help"
echo " -p or --port iperf3 port (default is 5201)"
echo " -P use an iperf3 instance for each flow"
echo " -q use the specified qdisc"
echo " -r or --rate rate in Mbps (default 1s 1Gbps)"
echo " -R Use TCP_RR for netperf. 1st flow has req"
echo " size of 10KB, rest of 1MB. Reply in all"
echo " cases is 1 byte."
echo " More detailed output for each flow can be found"
echo " in the files netperf.<cg>.<flow>, where <cg> is the"
echo " cgroup id as specified with the -i flag, and <flow>"
echo " is the flow id starting at 1 and increasing by 1 for"
echo " flow (as specified by -f)."
echo " -s or --server hostname of netperf server. Used to create netperf"
echo " test traffic between to hosts (default is within host)"
echo " netserver must be running on the host."
echo " -S or --stats whether to update hbm stats (default is yes)."
echo " -t or --time duration of iperf3 in seconds (default=5)"
echo " -w Work conserving flag. cgroup can increase its"
echo " bandwidth beyond the rate limit specified"
echo " while there is available bandwidth. Current"
echo " implementation assumes there is only one NIC"
echo " (eth0), but can be extended to support multiple"
echo " NICs."
echo " cubic or dctcp specify which TCP CC to use"
echo " "
exit
}
#set -x
debug_flag=0
args="$@"
name="$0"
netem=0
cc=x
dir="-o"
dir_name="out"
dur=5
flows=1
id=1
prog=""
port=5201
rate=1000
multi_iperf=0
flow_cnt=1
use_netperf=0
rr=0
ecn=0
details=0
server=""
qdisc=""
flags=""
do_stats=0
function start_hbm () {
rm -f hbm.out
echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out
echo " " >> hbm.out
./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog >> hbm.out 2>&1 &
echo $!
}
processArgs () {
for i in $args ; do
case $i in
# Support for upcomming ingress rate limiting
#in) # support for upcoming ingress rate limiting
# dir="-i"
# dir_name="in"
# ;;
out)
dir="-o"
dir_name="out"
;;
-b=*|--bpf=*)
prog="${i#*=}"
;;
-c=*|--cc=*)
cc="${i#*=}"
;;
--debug)
flags="$flags -d"
debug_flag=1
;;
-d=*|--delay=*)
netem="${i#*=}"
;;
-D)
details=1
;;
-E)
ecn=1
;;
# Support for upcomming fq Early Departure Time egress rate limiting
#--edt)
# prog="hbm_out_edt_kern.o"
# qdisc="fq"
# ;;
-f=*|--flows=*)
flows="${i#*=}"
;;
-i=*|--id=*)
id="${i#*=}"
;;
-l)
flags="$flags -l"
;;
-N)
use_netperf=1
;;
-p=*|--port=*)
port="${i#*=}"
;;
-P)
multi_iperf=1
;;
-q=*)
qdisc="${i#*=}"
;;
-r=*|--rate=*)
rate="${i#*=}"
;;
-R)
rr=1
;;
-s=*|--server=*)
server="${i#*=}"
;;
-S|--stats)
flags="$flags -s"
do_stats=1
;;
-t=*|--time=*)
dur="${i#*=}"
;;
-w)
flags="$flags -w"
;;
cubic)
cc=cubic
;;
dctcp)
cc=dctcp
;;
*)
echo "Unknown arg:$i"
Usage
;;
esac
done
}
processArgs
if [ $debug_flag -eq 1 ] ; then
rm -f hbm_out.log
fi
hbm_pid=$(start_hbm)
usleep 100000
host=`hostname`
cg_base_dir=/sys/fs/cgroup
cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id"
echo $$ >> $cg_dir/cgroup.procs
ulimit -l unlimited
rm -f ss.out
rm -f hbm.[0-9]*.$dir_name
if [ $ecn -ne 0 ] ; then
sysctl -w -q -n net.ipv4.tcp_ecn=1
fi
if [ $use_netperf -eq 0 ] ; then
cur_cc=`sysctl -n net.ipv4.tcp_congestion_control`
if [ "$cc" != "x" ] ; then
sysctl -w -q -n net.ipv4.tcp_congestion_control=$cc
fi
fi
if [ "$netem" -ne "0" ] ; then
if [ "$qdisc" != "" ] ; then
echo "WARNING: Ignoring -q options because -d option used"
fi
tc qdisc del dev lo root > /dev/null 2>&1
tc qdisc add dev lo root netem delay $netem\ms > /dev/null 2>&1
elif [ "$qdisc" != "" ] ; then
tc qdisc del dev lo root > /dev/null 2>&1
tc qdisc add dev lo root $qdisc > /dev/null 2>&1
fi
n=0
m=$[$dur * 5]
hn="::1"
if [ $use_netperf -ne 0 ] ; then
if [ "$server" != "" ] ; then
hn=$server
fi
fi
( ping6 -i 0.2 -c $m $hn > ping.out 2>&1 ) &
if [ $use_netperf -ne 0 ] ; then
begNetserverPid=`ps ax | grep netserver | grep --invert-match "grep" | \
awk '{ print $1 }'`
if [ "$begNetserverPid" == "" ] ; then
if [ "$server" == "" ] ; then
( ./netserver > /dev/null 2>&1) &
usleep 100000
fi
fi
flow_cnt=1
if [ "$server" == "" ] ; then
np_server=$host
else
np_server=$server
fi
if [ "$cc" == "x" ] ; then
np_cc=""
else
np_cc="-K $cc,$cc"
fi
replySize=1
while [ $flow_cnt -le $flows ] ; do
if [ $rr -ne 0 ] ; then
reqSize=1M
if [ $flow_cnt -eq 1 ] ; then
reqSize=10K
fi
if [ "$dir" == "-i" ] ; then
replySize=$reqSize
reqSize=1
fi
( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r $reqSize,$replySize $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,REMOTE_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,LOCAL_RECV_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
else
if [ "$dir" == "-i" ] ; then
( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r 1,10M $np_cc -k P50_LATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REMOTE_TRANSPORT_RETRANS,REMOTE_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
else
( ./netperf -H $np_server -l $dur -f m -j -t TCP_STREAM -- $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
fi
fi
flow_cnt=$[flow_cnt+1]
done
# sleep for duration of test (plus some buffer)
n=$[dur+2]
sleep $n
# force graceful termination of netperf
pids=`pgrep netperf`
for p in $pids ; do
kill -SIGALRM $p
done
flow_cnt=1
rate=0
if [ $details -ne 0 ] ; then
echo ""
echo "Details for HBM in cgroup $id"
if [ $do_stats -eq 1 ] ; then
if [ -e hbm.$id.$dir_name ] ; then
cat hbm.$id.$dir_name
fi
fi
fi
while [ $flow_cnt -le $flows ] ; do
if [ "$dir" == "-i" ] ; then
r=`cat netperf.$id.$flow_cnt | grep -o "REMOTE_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
else
r=`cat netperf.$id.$flow_cnt | grep -o "LOCAL_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
fi
echo "rate for flow $flow_cnt: $r"
rate=$[rate+r]
if [ $details -ne 0 ] ; then
echo "-----"
echo "Details for cgroup $id, flow $flow_cnt"
cat netperf.$id.$flow_cnt
fi
flow_cnt=$[flow_cnt+1]
done
if [ $details -ne 0 ] ; then
echo ""
delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
echo "PING AVG DELAY:$delay"
echo "AGGREGATE_GOODPUT:$rate"
else
echo $rate
fi
elif [ $multi_iperf -eq 0 ] ; then
(iperf3 -s -p $port -1 > /dev/null 2>&1) &
usleep 100000
iperf3 -c $host -p $port -i 0 -P $flows -f m -t $dur > iperf.$id
rates=`grep receiver iperf.$id | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*"`
rate=`echo $rates | grep -o "[0-9]*$"`
if [ $details -ne 0 ] ; then
echo ""
echo "Details for HBM in cgroup $id"
if [ $do_stats -eq 1 ] ; then
if [ -e hbm.$id.$dir_name ] ; then
cat hbm.$id.$dir_name
fi
fi
delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
echo "PING AVG DELAY:$delay"
echo "AGGREGATE_GOODPUT:$rate"
else
echo $rate
fi
else
flow_cnt=1
while [ $flow_cnt -le $flows ] ; do
(iperf3 -s -p $port -1 > /dev/null 2>&1) &
( iperf3 -c $host -p $port -i 0 -P 1 -f m -t $dur | grep receiver | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*" | grep -o "[0-9]*$" > iperf3.$id.$flow_cnt ) &
port=$[port+1]
flow_cnt=$[flow_cnt+1]
done
n=$[dur+1]
sleep $n
flow_cnt=1
rate=0
if [ $details -ne 0 ] ; then
echo ""
echo "Details for HBM in cgroup $id"
if [ $do_stats -eq 1 ] ; then
if [ -e hbm.$id.$dir_name ] ; then
cat hbm.$id.$dir_name
fi
fi
fi
while [ $flow_cnt -le $flows ] ; do
r=`cat iperf3.$id.$flow_cnt`
# echo "rate for flow $flow_cnt: $r"
if [ $details -ne 0 ] ; then
echo "Rate for cgroup $id, flow $flow_cnt LOCAL_SEND_THROUGHPUT=$r"
fi
rate=$[rate+r]
flow_cnt=$[flow_cnt+1]
done
if [ $details -ne 0 ] ; then
delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
echo "PING AVG DELAY:$delay"
echo "AGGREGATE_GOODPUT:$rate"
else
echo $rate
fi
fi
if [ $use_netperf -eq 0 ] ; then
sysctl -w -q -n net.ipv4.tcp_congestion_control=$cur_cc
fi
if [ $ecn -ne 0 ] ; then
sysctl -w -q -n net.ipv4.tcp_ecn=0
fi
if [ "$netem" -ne "0" ] ; then
tc qdisc del dev lo root > /dev/null 2>&1
fi
sleep 2
hbmPid=`ps ax | grep "hbm " | grep --invert-match "grep" | awk '{ print $1 }'`
if [ "$hbmPid" == "$hbm_pid" ] ; then
kill $hbm_pid
fi
sleep 1
# Detach any BPF programs that may have lingered
ttx=`bpftool cgroup tree | grep hbm`
v=2
for x in $ttx ; do
if [ "${x:0:36}" == "/sys/fs/cgroup/cgroup-test-work-dir/" ] ; then
cg=$x ; v=0
else
if [ $v -eq 0 ] ; then
id=$x ; v=1
else
if [ $v -eq 1 ] ; then
type=$x ; bpftool cgroup detach $cg $type id $id
v=0
fi
fi
fi
done
if [ $use_netperf -ne 0 ] ; then
if [ "$server" == "" ] ; then
if [ "$begNetserverPid" == "" ] ; then
netserverPid=`ps ax | grep netserver | grep --invert-match "grep" | awk '{ print $1 }'`
if [ "$netserverPid" != "" ] ; then
kill $netserverPid
fi
fi
fi
fi
exit

View File

@@ -14,8 +14,8 @@
#include <bpf/bpf.h>
#include "bpf/libbpf.h"
#include "bpf_insn.h"
#include "bpf_load.h"
#include "sock_example.h"
#define BPF_F_PIN (1 << 0)
@@ -57,10 +57,14 @@ static int bpf_prog_create(const char *object)
BPF_EXIT_INSN(),
};
size_t insns_cnt = sizeof(insns) / sizeof(struct bpf_insn);
char bpf_log_buf[BPF_LOG_BUF_SIZE];
struct bpf_object *obj;
int prog_fd;
if (object) {
assert(!load_bpf_file((char *)object));
return prog_fd[0];
assert(!bpf_prog_load(object, BPF_PROG_TYPE_UNSPEC,
&obj, &prog_fd));
return prog_fd;
} else {
return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER,
insns, insns_cnt, "GPL", 0,

441
samples/bpf/hbm.c Normal file
View File

@@ -0,0 +1,441 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* Example program for Host Bandwidth Managment
*
* This program loads a cgroup skb BPF program to enforce cgroup output
* (egress) or input (ingress) bandwidth limits.
*
* USAGE: hbm [-d] [-l] [-n <id>] [-r <rate>] [-s] [-t <secs>] [-w] [-h] [prog]
* Where:
* -d Print BPF trace debug buffer
* -l Also limit flows doing loopback
* -n <#> To create cgroup \"/hbm#\" and attach prog
* Default is /hbm1
* -r <rate> Rate limit in Mbps
* -s Get HBM stats (marked, dropped, etc.)
* -t <time> Exit after specified seconds (deault is 0)
* -w Work conserving flag. cgroup can increase its bandwidth
* beyond the rate limit specified while there is available
* bandwidth. Current implementation assumes there is only
* NIC (eth0), but can be extended to support multiple NICs.
* Currrently only supported for egress.
* -h Print this info
* prog BPF program file name. Name defaults to hbm_out_kern.o
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <sys/resource.h>
#include <sys/time.h>
#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
#include <linux/unistd.h>
#include <linux/bpf.h>
#include <bpf/bpf.h>
#include "bpf_load.h"
#include "bpf_rlimit.h"
#include "cgroup_helpers.h"
#include "hbm.h"
#include "bpf_util.h"
#include "bpf/bpf.h"
#include "bpf/libbpf.h"
bool outFlag = true;
int minRate = 1000; /* cgroup rate limit in Mbps */
int rate = 1000; /* can grow if rate conserving is enabled */
int dur = 1;
bool stats_flag;
bool loopback_flag;
bool debugFlag;
bool work_conserving_flag;
static void Usage(void);
static void read_trace_pipe2(void);
static void do_error(char *msg, bool errno_flag);
#define DEBUGFS "/sys/kernel/debug/tracing/"
struct bpf_object *obj;
int bpfprog_fd;
int cgroup_storage_fd;
static void read_trace_pipe2(void)
{
int trace_fd;
FILE *outf;
char *outFname = "hbm_out.log";
trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
if (trace_fd < 0) {
printf("Error opening trace_pipe\n");
return;
}
// Future support of ingress
// if (!outFlag)
// outFname = "hbm_in.log";
outf = fopen(outFname, "w");
if (outf == NULL)
printf("Error creating %s\n", outFname);
while (1) {
static char buf[4097];
ssize_t sz;
sz = read(trace_fd, buf, sizeof(buf) - 1);
if (sz > 0) {
buf[sz] = 0;
puts(buf);
if (outf != NULL) {
fprintf(outf, "%s\n", buf);
fflush(outf);
}
}
}
}
static void do_error(char *msg, bool errno_flag)
{
if (errno_flag)
printf("ERROR: %s, errno: %d\n", msg, errno);
else
printf("ERROR: %s\n", msg);
exit(1);
}
static int prog_load(char *prog)
{
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.file = prog,
.expected_attach_type = BPF_CGROUP_INET_EGRESS,
};
int map_fd;
struct bpf_map *map;
int ret = 0;
if (access(prog, O_RDONLY) < 0) {
printf("Error accessing file %s: %s\n", prog, strerror(errno));
return 1;
}
if (bpf_prog_load_xattr(&prog_load_attr, &obj, &bpfprog_fd))
ret = 1;
if (!ret) {
map = bpf_object__find_map_by_name(obj, "queue_stats");
map_fd = bpf_map__fd(map);
if (map_fd < 0) {
printf("Map not found: %s\n", strerror(map_fd));
ret = 1;
}
}
if (ret) {
printf("ERROR: load_bpf_file failed for: %s\n", prog);
printf(" Output from verifier:\n%s\n------\n", bpf_log_buf);
ret = -1;
} else {
ret = map_fd;
}
return ret;
}
static int run_bpf_prog(char *prog, int cg_id)
{
int map_fd;
int rc = 0;
int key = 0;
int cg1 = 0;
int type = BPF_CGROUP_INET_EGRESS;
char cg_dir[100];
struct hbm_queue_stats qstats = {0};
sprintf(cg_dir, "/hbm%d", cg_id);
map_fd = prog_load(prog);
if (map_fd == -1)
return 1;
if (setup_cgroup_environment()) {
printf("ERROR: setting cgroup environment\n");
goto err;
}
cg1 = create_and_get_cgroup(cg_dir);
if (!cg1) {
printf("ERROR: create_and_get_cgroup\n");
goto err;
}
if (join_cgroup(cg_dir)) {
printf("ERROR: join_cgroup\n");
goto err;
}
qstats.rate = rate;
qstats.stats = stats_flag ? 1 : 0;
qstats.loopback = loopback_flag ? 1 : 0;
if (bpf_map_update_elem(map_fd, &key, &qstats, BPF_ANY)) {
printf("ERROR: Could not update map element\n");
goto err;
}
if (!outFlag)
type = BPF_CGROUP_INET_INGRESS;
if (bpf_prog_attach(bpfprog_fd, cg1, type, 0)) {
printf("ERROR: bpf_prog_attach fails!\n");
log_err("Attaching prog");
goto err;
}
if (work_conserving_flag) {
struct timeval t0, t_last, t_new;
FILE *fin;
unsigned long long last_eth_tx_bytes, new_eth_tx_bytes;
signed long long last_cg_tx_bytes, new_cg_tx_bytes;
signed long long delta_time, delta_bytes, delta_rate;
int delta_ms;
#define DELTA_RATE_CHECK 10000 /* in us */
#define RATE_THRESHOLD 9500000000 /* 9.5 Gbps */
bpf_map_lookup_elem(map_fd, &key, &qstats);
if (gettimeofday(&t0, NULL) < 0)
do_error("gettimeofday failed", true);
t_last = t0;
fin = fopen("/sys/class/net/eth0/statistics/tx_bytes", "r");
if (fscanf(fin, "%llu", &last_eth_tx_bytes) != 1)
do_error("fscanf fails", false);
fclose(fin);
last_cg_tx_bytes = qstats.bytes_total;
while (true) {
usleep(DELTA_RATE_CHECK);
if (gettimeofday(&t_new, NULL) < 0)
do_error("gettimeofday failed", true);
delta_ms = (t_new.tv_sec - t0.tv_sec) * 1000 +
(t_new.tv_usec - t0.tv_usec)/1000;
if (delta_ms > dur * 1000)
break;
delta_time = (t_new.tv_sec - t_last.tv_sec) * 1000000 +
(t_new.tv_usec - t_last.tv_usec);
if (delta_time == 0)
continue;
t_last = t_new;
fin = fopen("/sys/class/net/eth0/statistics/tx_bytes",
"r");
if (fscanf(fin, "%llu", &new_eth_tx_bytes) != 1)
do_error("fscanf fails", false);
fclose(fin);
printf(" new_eth_tx_bytes:%llu\n",
new_eth_tx_bytes);
bpf_map_lookup_elem(map_fd, &key, &qstats);
new_cg_tx_bytes = qstats.bytes_total;
delta_bytes = new_eth_tx_bytes - last_eth_tx_bytes;
last_eth_tx_bytes = new_eth_tx_bytes;
delta_rate = (delta_bytes * 8000000) / delta_time;
printf("%5d - eth_rate:%.1fGbps cg_rate:%.3fGbps",
delta_ms, delta_rate/1000000000.0,
rate/1000.0);
if (delta_rate < RATE_THRESHOLD) {
/* can increase cgroup rate limit, but first
* check if we are using the current limit.
* Currently increasing by 6.25%, unknown
* if that is the optimal rate.
*/
int rate_diff100;
delta_bytes = new_cg_tx_bytes -
last_cg_tx_bytes;
last_cg_tx_bytes = new_cg_tx_bytes;
delta_rate = (delta_bytes * 8000000) /
delta_time;
printf(" rate:%.3fGbps",
delta_rate/1000000000.0);
rate_diff100 = (((long long)rate)*1000000 -
delta_rate) * 100 /
(((long long) rate) * 1000000);
printf(" rdiff:%d", rate_diff100);
if (rate_diff100 <= 3) {
rate += (rate >> 4);
if (rate > RATE_THRESHOLD / 1000000)
rate = RATE_THRESHOLD / 1000000;
qstats.rate = rate;
printf(" INC\n");
} else {
printf("\n");
}
} else {
/* Need to decrease cgroup rate limit.
* Currently decreasing by 12.5%, unknown
* if that is optimal
*/
printf(" DEC\n");
rate -= (rate >> 3);
if (rate < minRate)
rate = minRate;
qstats.rate = rate;
}
if (bpf_map_update_elem(map_fd, &key, &qstats, BPF_ANY))
do_error("update map element fails", false);
}
} else {
sleep(dur);
}
// Get stats!
if (stats_flag && bpf_map_lookup_elem(map_fd, &key, &qstats)) {
char fname[100];
FILE *fout;
if (!outFlag)
sprintf(fname, "hbm.%d.in", cg_id);
else
sprintf(fname, "hbm.%d.out", cg_id);
fout = fopen(fname, "w");
fprintf(fout, "id:%d\n", cg_id);
fprintf(fout, "ERROR: Could not lookup queue_stats\n");
} else if (stats_flag && qstats.lastPacketTime >
qstats.firstPacketTime) {
long long delta_us = (qstats.lastPacketTime -
qstats.firstPacketTime)/1000;
unsigned int rate_mbps = ((qstats.bytes_total -
qstats.bytes_dropped) * 8 /
delta_us);
double percent_pkts, percent_bytes;
char fname[100];
FILE *fout;
// Future support of ingress
// if (!outFlag)
// sprintf(fname, "hbm.%d.in", cg_id);
// else
sprintf(fname, "hbm.%d.out", cg_id);
fout = fopen(fname, "w");
fprintf(fout, "id:%d\n", cg_id);
fprintf(fout, "rate_mbps:%d\n", rate_mbps);
fprintf(fout, "duration:%.1f secs\n",
(qstats.lastPacketTime - qstats.firstPacketTime) /
1000000000.0);
fprintf(fout, "packets:%d\n", (int)qstats.pkts_total);
fprintf(fout, "bytes_MB:%d\n", (int)(qstats.bytes_total /
1000000));
fprintf(fout, "pkts_dropped:%d\n", (int)qstats.pkts_dropped);
fprintf(fout, "bytes_dropped_MB:%d\n",
(int)(qstats.bytes_dropped /
1000000));
// Marked Pkts and Bytes
percent_pkts = (qstats.pkts_marked * 100.0) /
(qstats.pkts_total + 1);
percent_bytes = (qstats.bytes_marked * 100.0) /
(qstats.bytes_total + 1);
fprintf(fout, "pkts_marked_percent:%6.2f\n", percent_pkts);
fprintf(fout, "bytes_marked_percent:%6.2f\n", percent_bytes);
// Dropped Pkts and Bytes
percent_pkts = (qstats.pkts_dropped * 100.0) /
(qstats.pkts_total + 1);
percent_bytes = (qstats.bytes_dropped * 100.0) /
(qstats.bytes_total + 1);
fprintf(fout, "pkts_dropped_percent:%6.2f\n", percent_pkts);
fprintf(fout, "bytes_dropped_percent:%6.2f\n", percent_bytes);
fclose(fout);
}
if (debugFlag)
read_trace_pipe2();
return rc;
err:
rc = 1;
if (cg1)
close(cg1);
cleanup_cgroup_environment();
return rc;
}
static void Usage(void)
{
printf("This program loads a cgroup skb BPF program to enforce\n"
"cgroup output (egress) bandwidth limits.\n\n"
"USAGE: hbm [-o] [-d] [-l] [-n <id>] [-r <rate>] [-s]\n"
" [-t <secs>] [-w] [-h] [prog]\n"
" Where:\n"
" -o indicates egress direction (default)\n"
" -d print BPF trace debug buffer\n"
" -l also limit flows using loopback\n"
" -n <#> to create cgroup \"/hbm#\" and attach prog\n"
" Default is /hbm1\n"
" -r <rate> Rate in Mbps\n"
" -s Update HBM stats\n"
" -t <time> Exit after specified seconds (deault is 0)\n"
" -w Work conserving flag. cgroup can increase\n"
" bandwidth beyond the rate limit specified\n"
" while there is available bandwidth. Current\n"
" implementation assumes there is only eth0\n"
" but can be extended to support multiple NICs\n"
" -h print this info\n"
" prog BPF program file name. Name defaults to\n"
" hbm_out_kern.o\n");
}
int main(int argc, char **argv)
{
char *prog = "hbm_out_kern.o";
int k;
int cg_id = 1;
char *optstring = "iodln:r:st:wh";
while ((k = getopt(argc, argv, optstring)) != -1) {
switch (k) {
case'o':
break;
case 'd':
debugFlag = true;
break;
case 'l':
loopback_flag = true;
break;
case 'n':
cg_id = atoi(optarg);
break;
case 'r':
minRate = atoi(optarg) * 1.024;
rate = minRate;
break;
case 's':
stats_flag = true;
break;
case 't':
dur = atoi(optarg);
break;
case 'w':
work_conserving_flag = true;
break;
case '?':
if (optopt == 'n' || optopt == 'r' || optopt == 't')
fprintf(stderr,
"Option -%c requires an argument.\n\n",
optopt);
case 'h':
// fallthrough
default:
Usage();
return 0;
}
}
if (optind < argc)
prog = argv[optind];
printf("HBM prog: %s\n", prog != NULL ? prog : "NULL");
return run_bpf_prog(prog, cg_id);
}

31
samples/bpf/hbm.h Normal file
View File

@@ -0,0 +1,31 @@
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright (c) 2019 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* Include file for Host Bandwidth Management (HBM) programs
*/
struct hbm_vqueue {
struct bpf_spin_lock lock;
/* 4 byte hole */
unsigned long long lasttime; /* In ns */
int credit; /* In bytes */
unsigned int rate; /* In bytes per NS << 20 */
};
struct hbm_queue_stats {
unsigned long rate; /* in Mbps*/
unsigned long stats:1, /* get HBM stats (marked, dropped,..) */
loopback:1; /* also limit flows using loopback */
unsigned long long pkts_marked;
unsigned long long bytes_marked;
unsigned long long pkts_dropped;
unsigned long long bytes_dropped;
unsigned long long pkts_total;
unsigned long long bytes_total;
unsigned long long firstPacketTime;
unsigned long long lastPacketTime;
};

137
samples/bpf/hbm_kern.h Normal file
View File

@@ -0,0 +1,137 @@
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright (c) 2019 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* Include file for sample Host Bandwidth Manager (HBM) BPF programs
*/
#define KBUILD_MODNAME "foo"
#include <stddef.h>
#include <stdbool.h>
#include <uapi/linux/bpf.h>
#include <uapi/linux/if_ether.h>
#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h>
#include <uapi/linux/ipv6.h>
#include <uapi/linux/in.h>
#include <uapi/linux/tcp.h>
#include <uapi/linux/filter.h>
#include <uapi/linux/pkt_cls.h>
#include <net/ipv6.h>
#include <net/inet_ecn.h>
#include "bpf_endian.h"
#include "bpf_helpers.h"
#include "hbm.h"
#define DROP_PKT 0
#define ALLOW_PKT 1
#define TCP_ECN_OK 1
#define HBM_DEBUG 0 // Set to 1 to enable debugging
#if HBM_DEBUG
#define bpf_printk(fmt, ...) \
({ \
char ____fmt[] = fmt; \
bpf_trace_printk(____fmt, sizeof(____fmt), \
##__VA_ARGS__); \
})
#else
#define bpf_printk(fmt, ...)
#endif
#define INITIAL_CREDIT_PACKETS 100
#define MAX_BYTES_PER_PACKET 1500
#define MARK_THRESH (40 * MAX_BYTES_PER_PACKET)
#define DROP_THRESH (80 * 5 * MAX_BYTES_PER_PACKET)
#define LARGE_PKT_DROP_THRESH (DROP_THRESH - (15 * MAX_BYTES_PER_PACKET))
#define MARK_REGION_SIZE (LARGE_PKT_DROP_THRESH - MARK_THRESH)
#define LARGE_PKT_THRESH 120
#define MAX_CREDIT (100 * MAX_BYTES_PER_PACKET)
#define INIT_CREDIT (INITIAL_CREDIT_PACKETS * MAX_BYTES_PER_PACKET)
// rate in bytes per ns << 20
#define CREDIT_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20)
struct bpf_map_def SEC("maps") queue_state = {
.type = BPF_MAP_TYPE_CGROUP_STORAGE,
.key_size = sizeof(struct bpf_cgroup_storage_key),
.value_size = sizeof(struct hbm_vqueue),
};
BPF_ANNOTATE_KV_PAIR(queue_state, struct bpf_cgroup_storage_key,
struct hbm_vqueue);
struct bpf_map_def SEC("maps") queue_stats = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(u32),
.value_size = sizeof(struct hbm_queue_stats),
.max_entries = 1,
};
BPF_ANNOTATE_KV_PAIR(queue_stats, int, struct hbm_queue_stats);
struct hbm_pkt_info {
bool is_ip;
bool is_tcp;
short ecn;
};
static __always_inline void hbm_get_pkt_info(struct __sk_buff *skb,
struct hbm_pkt_info *pkti)
{
struct iphdr iph;
struct ipv6hdr *ip6h;
bpf_skb_load_bytes(skb, 0, &iph, 12);
if (iph.version == 6) {
ip6h = (struct ipv6hdr *)&iph;
pkti->is_ip = true;
pkti->is_tcp = (ip6h->nexthdr == 6);
pkti->ecn = (ip6h->flow_lbl[0] >> 4) & INET_ECN_MASK;
} else if (iph.version == 4) {
pkti->is_ip = true;
pkti->is_tcp = (iph.protocol == 6);
pkti->ecn = iph.tos & INET_ECN_MASK;
} else {
pkti->is_ip = false;
pkti->is_tcp = false;
pkti->ecn = 0;
}
}
static __always_inline void hbm_init_vqueue(struct hbm_vqueue *qdp, int rate)
{
bpf_printk("Initializing queue_state, rate:%d\n", rate * 128);
qdp->lasttime = bpf_ktime_get_ns();
qdp->credit = INIT_CREDIT;
qdp->rate = rate * 128;
}
static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp,
int len,
unsigned long long curtime,
bool congestion_flag,
bool drop_flag)
{
if (qsp != NULL) {
// Following is needed for work conserving
__sync_add_and_fetch(&(qsp->bytes_total), len);
if (qsp->stats) {
// Optionally update statistics
if (qsp->firstPacketTime == 0)
qsp->firstPacketTime = curtime;
qsp->lastPacketTime = curtime;
__sync_add_and_fetch(&(qsp->pkts_total), 1);
if (congestion_flag || drop_flag) {
__sync_add_and_fetch(&(qsp->pkts_marked), 1);
__sync_add_and_fetch(&(qsp->bytes_marked), len);
}
if (drop_flag) {
__sync_add_and_fetch(&(qsp->pkts_dropped), 1);
__sync_add_and_fetch(&(qsp->bytes_dropped),
len);
}
}
}
}

157
samples/bpf/hbm_out_kern.c Normal file
View File

@@ -0,0 +1,157 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* Sample Host Bandwidth Manager (HBM) BPF program.
*
* A cgroup skb BPF egress program to limit cgroup output bandwidth.
* It uses a modified virtual token bucket queue to limit average
* egress bandwidth. The implementation uses credits instead of tokens.
* Negative credits imply that queueing would have happened (this is
* a virtual queue, so no queueing is done by it. However, queueing may
* occur at the actual qdisc (which is not used for rate limiting).
*
* This implementation uses 3 thresholds, one to start marking packets and
* the other two to drop packets:
* CREDIT
* - <--------------------------|------------------------> +
* | | | 0
* | Large pkt |
* | drop thresh |
* Small pkt drop Mark threshold
* thresh
*
* The effect of marking depends on the type of packet:
* a) If the packet is ECN enabled and it is a TCP packet, then the packet
* is ECN marked.
* b) If the packet is a TCP packet, then we probabilistically call tcp_cwr
* to reduce the congestion window. The current implementation uses a linear
* distribution (0% probability at marking threshold, 100% probability
* at drop threshold).
* c) If the packet is not a TCP packet, then it is dropped.
*
* If the credit is below the drop threshold, the packet is dropped. If it
* is a TCP packet, then it also calls tcp_cwr since packets dropped by
* by a cgroup skb BPF program do not automatically trigger a call to
* tcp_cwr in the current kernel code.
*
* This BPF program actually uses 2 drop thresholds, one threshold
* for larger packets (>= 120 bytes) and another for smaller packets. This
* protects smaller packets such as SYNs, ACKs, etc.
*
* The default bandwidth limit is set at 1Gbps but this can be changed by
* a user program through a shared BPF map. In addition, by default this BPF
* program does not limit connections using loopback. This behavior can be
* overwritten by the user program. There is also an option to calculate
* some statistics, such as percent of packets marked or dropped, which
* the user program can access.
*
* A latter patch provides such a program (hbm.c)
*/
#include "hbm_kern.h"
SEC("cgroup_skb/egress")
int _hbm_out_cg(struct __sk_buff *skb)
{
struct hbm_pkt_info pkti;
int len = skb->len;
unsigned int queue_index = 0;
unsigned long long curtime;
int credit;
signed long long delta = 0, zero = 0;
int max_credit = MAX_CREDIT;
bool congestion_flag = false;
bool drop_flag = false;
bool cwr_flag = false;
struct hbm_vqueue *qdp;
struct hbm_queue_stats *qsp = NULL;
int rv = ALLOW_PKT;
qsp = bpf_map_lookup_elem(&queue_stats, &queue_index);
if (qsp != NULL && !qsp->loopback && (skb->ifindex == 1))
return ALLOW_PKT;
hbm_get_pkt_info(skb, &pkti);
// We may want to account for the length of headers in len
// calculation, like ETH header + overhead, specially if it
// is a gso packet. But I am not doing it right now.
qdp = bpf_get_local_storage(&queue_state, 0);
if (!qdp)
return ALLOW_PKT;
else if (qdp->lasttime == 0)
hbm_init_vqueue(qdp, 1024);
curtime = bpf_ktime_get_ns();
// Begin critical section
bpf_spin_lock(&qdp->lock);
credit = qdp->credit;
delta = curtime - qdp->lasttime;
/* delta < 0 implies that another process with a curtime greater
* than ours beat us to the critical section and already added
* the new credit, so we should not add it ourselves
*/
if (delta > 0) {
qdp->lasttime = curtime;
credit += CREDIT_PER_NS(delta, qdp->rate);
if (credit > MAX_CREDIT)
credit = MAX_CREDIT;
}
credit -= len;
qdp->credit = credit;
bpf_spin_unlock(&qdp->lock);
// End critical section
// Check if we should update rate
if (qsp != NULL && (qsp->rate * 128) != qdp->rate) {
qdp->rate = qsp->rate * 128;
bpf_printk("Updating rate: %d (1sec:%llu bits)\n",
(int)qdp->rate,
CREDIT_PER_NS(1000000000, qdp->rate) * 8);
}
// Set flags (drop, congestion, cwr)
// Dropping => we are congested, so ignore congestion flag
if (credit < -DROP_THRESH ||
(len > LARGE_PKT_THRESH &&
credit < -LARGE_PKT_DROP_THRESH)) {
// Very congested, set drop flag
drop_flag = true;
} else if (credit < 0) {
// Congested, set congestion flag
if (pkti.ecn) {
if (credit < -MARK_THRESH)
congestion_flag = true;
else
congestion_flag = false;
} else {
congestion_flag = true;
}
}
if (congestion_flag) {
if (!bpf_skb_ecn_set_ce(skb)) {
if (len > LARGE_PKT_THRESH) {
// Problem if too many small packets?
drop_flag = true;
}
}
}
if (drop_flag)
rv = DROP_PKT;
hbm_update_stats(qsp, len, curtime, congestion_flag, drop_flag);
if (rv == DROP_PKT)
__sync_add_and_fetch(&(qdp->credit), len);
return rv;
}
char _license[] SEC("license") = "GPL";

View File

@@ -1,97 +0,0 @@
/* Copyright (c) 2017 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <linux/bpf.h>
#include <bpf/bpf.h>
#include "bpf_load.h"
#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
#include <linux/unistd.h>
static void usage(char *pname)
{
printf("USAGE:\n %s [-l] <cg-path> <prog filename>\n", pname);
printf("\tLoad and attach a sock_ops program to the specified "
"cgroup\n");
printf("\tIf \"-l\" is used, the program will continue to run\n");
printf("\tprinting the BPF log buffer\n");
printf("\tIf the specified filename does not end in \".o\", it\n");
printf("\tappends \"_kern.o\" to the name\n");
printf("\n");
printf(" %s -r <cg-path>\n", pname);
printf("\tDetaches the currently attached sock_ops program\n");
printf("\tfrom the specified cgroup\n");
printf("\n");
exit(1);
}
int main(int argc, char **argv)
{
int logFlag = 0;
int error = 0;
char *cg_path;
char fn[500];
char *prog;
int cg_fd;
if (argc < 3)
usage(argv[0]);
if (!strcmp(argv[1], "-r")) {
cg_path = argv[2];
cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY);
error = bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
if (error) {
printf("ERROR: bpf_prog_detach: %d (%s)\n",
error, strerror(errno));
return 2;
}
return 0;
} else if (!strcmp(argv[1], "-h")) {
usage(argv[0]);
} else if (!strcmp(argv[1], "-l")) {
logFlag = 1;
if (argc < 4)
usage(argv[0]);
}
prog = argv[argc - 1];
cg_path = argv[argc - 2];
if (strlen(prog) > 480) {
fprintf(stderr, "ERROR: program name too long (> 480 chars)\n");
return 3;
}
cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY);
if (!strcmp(prog + strlen(prog)-2, ".o"))
strcpy(fn, prog);
else
sprintf(fn, "%s_kern.o", prog);
if (logFlag)
printf("loading bpf file:%s\n", fn);
if (load_bpf_file(fn)) {
printf("ERROR: load_bpf_file failed for: %s\n", fn);
printf("%s", bpf_log_buf);
return 4;
}
if (logFlag)
printf("TCP BPF Loaded %s\n", fn);
error = bpf_prog_attach(prog_fd[0], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
if (error) {
printf("ERROR: bpf_prog_attach: %d (%s)\n",
error, strerror(errno));
return 5;
} else if (logFlag) {
read_trace_pipe();
}
return error;
}

View File

@@ -99,7 +99,7 @@ int main(void)
{
FILE *f;
f = popen("ping -c5 localhost", "r");
f = popen("ping -4 -c5 localhost", "r");
(void)f;
return test_sock();

View File

@@ -3,30 +3,33 @@
#include <assert.h>
#include <linux/bpf.h>
#include <bpf/bpf.h>
#include "bpf_load.h"
#include "bpf/libbpf.h"
#include "sock_example.h"
#include <unistd.h>
#include <arpa/inet.h>
int main(int ac, char **argv)
{
struct bpf_object *obj;
int map_fd, prog_fd;
char filename[256];
FILE *f;
int i, sock;
FILE *f;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
if (bpf_prog_load(filename, BPF_PROG_TYPE_SOCKET_FILTER,
&obj, &prog_fd))
return 1;
}
map_fd = bpf_object__find_map_fd_by_name(obj, "my_map");
sock = open_raw_sock("lo");
assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd,
sizeof(prog_fd[0])) == 0);
assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd,
sizeof(prog_fd)) == 0);
f = popen("ping -c5 localhost", "r");
f = popen("ping -4 -c5 localhost", "r");
(void) f;
for (i = 0; i < 5; i++) {
@@ -34,13 +37,13 @@ int main(int ac, char **argv)
int key;
key = IPPROTO_TCP;
assert(bpf_map_lookup_elem(map_fd[0], &key, &tcp_cnt) == 0);
assert(bpf_map_lookup_elem(map_fd, &key, &tcp_cnt) == 0);
key = IPPROTO_UDP;
assert(bpf_map_lookup_elem(map_fd[0], &key, &udp_cnt) == 0);
assert(bpf_map_lookup_elem(map_fd, &key, &udp_cnt) == 0);
key = IPPROTO_ICMP;
assert(bpf_map_lookup_elem(map_fd[0], &key, &icmp_cnt) == 0);
assert(bpf_map_lookup_elem(map_fd, &key, &icmp_cnt) == 0);
printf("TCP %lld UDP %lld ICMP %lld bytes\n",
tcp_cnt, udp_cnt, icmp_cnt);

View File

@@ -3,7 +3,7 @@
#include <assert.h>
#include <linux/bpf.h>
#include <bpf/bpf.h>
#include "bpf_load.h"
#include "bpf/libbpf.h"
#include "sock_example.h"
#include <unistd.h>
#include <arpa/inet.h>
@@ -17,32 +17,35 @@ struct pair {
int main(int ac, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_object *obj;
int map_fd, prog_fd;
char filename[256];
FILE *f;
int i, sock;
FILE *f;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
setrlimit(RLIMIT_MEMLOCK, &r);
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
if (bpf_prog_load(filename, BPF_PROG_TYPE_SOCKET_FILTER,
&obj, &prog_fd))
return 1;
}
map_fd = bpf_object__find_map_fd_by_name(obj, "hash_map");
sock = open_raw_sock("lo");
assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd,
sizeof(prog_fd[0])) == 0);
assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd,
sizeof(prog_fd)) == 0);
f = popen("ping -c5 localhost", "r");
f = popen("ping -4 -c5 localhost", "r");
(void) f;
for (i = 0; i < 5; i++) {
int key = 0, next_key;
struct pair value;
while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) {
bpf_map_lookup_elem(map_fd[0], &next_key, &value);
while (bpf_map_get_next_key(map_fd, &key, &next_key) == 0) {
bpf_map_lookup_elem(map_fd, &next_key, &value);
printf("ip %s bytes %lld packets %lld\n",
inet_ntoa((struct in_addr){htonl(next_key)}),
value.bytes, value.packets);

View File

@@ -58,7 +58,7 @@ int main(int argc, char **argv)
sizeof(__u32)) == 0);
if (argc > 1)
f = popen("ping -c5 localhost", "r");
f = popen("ping -4 -c5 localhost", "r");
else
f = popen("netperf -l 4 localhost", "r");
(void) f;

View File

@@ -4,7 +4,7 @@
#include <uapi/linux/bpf.h>
#include "bpf_helpers.h"
SEC("kprobe/blk_start_request")
SEC("kprobe/blk_mq_start_request")
int bpf_prog1(struct pt_regs *ctx)
{
return 0;

View File

@@ -311,7 +311,7 @@ int main(int argc, char **argv)
}
/* test two functions in the corresponding *_kern.c file */
CHECK_AND_RET(test_debug_fs_kprobe(0, "blk_start_request",
CHECK_AND_RET(test_debug_fs_kprobe(0, "blk_mq_start_request",
BPF_FD_TYPE_KPROBE));
CHECK_AND_RET(test_debug_fs_kprobe(1, "blk_account_io_completion",
BPF_FD_TYPE_KRETPROBE));

View File

@@ -7,7 +7,7 @@
* BPF program to set base_rtt to 80us when host is running TCP-NV and
* both hosts are in the same datacenter (as determined by IPv6 prefix).
*
* Use load_sock_ops to load this BPF program.
* Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
*/
#include <uapi/linux/bpf.h>

View File

@@ -8,14 +8,16 @@ a cgroupv2 and attach a bash shell to the group.
bash
echo $$ >> /tmp/cgroupv2/foo/cgroup.procs
Anything that runs under this shell belongs to the foo cgroupv2 To load
Anything that runs under this shell belongs to the foo cgroupv2. To load
(attach) one of the tcp_*_kern.o programs:
./load_sock_ops -l /tmp/cgroupv2/foo tcp_basertt_kern.o
bpftool prog load tcp_basertt_kern.o /sys/fs/bpf/tcp_prog
bpftool cgroup attach /tmp/cgroupv2/foo sock_ops pinned /sys/fs/bpf/tcp_prog
bpftool prog tracelog
If the "-l" flag is used, the load_sock_ops program will continue to run
printing the BPF log buffer. The tcp_*_kern.o programs use special print
functions to print logging information (if enabled by the ifdef).
"bpftool prog tracelog" will continue to run printing the BPF log buffer.
The tcp_*_kern.o programs use special print functions to print logging
information (if enabled by the ifdef).
If using netperf/netserver to create traffic, you need to run them under the
cgroupv2 to which the BPF programs are attached (i.e. under bash shell
@@ -23,4 +25,4 @@ attached to the cgroupv2).
To remove (unattach) a socket_ops BPF program from a cgroupv2:
./load_sock_ops -r /tmp/cgroupv2/foo
bpftool cgroup attach /tmp/cgroupv2/foo sock_ops pinned /sys/fs/bpf/tcp_prog

View File

@@ -9,7 +9,7 @@
* doing appropriate checks that indicate the hosts are far enough
* away (i.e. large RTT).
*
* Use load_sock_ops to load this BPF program.
* Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
*/
#include <uapi/linux/bpf.h>

View File

@@ -9,7 +9,7 @@
* the same datacenter. For his example, we assume they are within the same
* datacenter when the first 5.5 bytes of their IPv6 addresses are the same.
*
* Use load_sock_ops to load this BPF program.
* Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
*/
#include <uapi/linux/bpf.h>

View File

@@ -7,7 +7,7 @@
* BPF program to set congestion control to dctcp when both hosts are
* in the same datacenter (as deteremined by IPv6 prefix).
*
* Use load_sock_ops to load this BPF program.
* Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
*/
#include <uapi/linux/bpf.h>

View File

@@ -9,7 +9,7 @@
* would usually be done after doing appropriate checks that indicate
* the hosts are far enough away (i.e. large RTT).
*
* Use load_sock_ops to load this BPF program.
* Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
*/
#include <uapi/linux/bpf.h>

View File

@@ -8,7 +8,7 @@
* and the first 5.5 bytes of the IPv6 addresses are not the same (in this
* example that means both hosts are not the same datacenter).
*
* Use load_sock_ops to load this BPF program.
* Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
*/
#include <uapi/linux/bpf.h>

View File

@@ -8,7 +8,7 @@
* and the first 5.5 bytes of the IPv6 addresses are the same (in this example
* that means both hosts are in the same datacenter).
*
* Use load_sock_ops to load this BPF program.
* Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
*/
#include <uapi/linux/bpf.h>

View File

@@ -4,7 +4,7 @@
*
* BPF program to automatically reflect TOS option from received syn packet
*
* Use load_sock_ops to load this BPF program.
* Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
*/
#include <uapi/linux/bpf.h>

View File

@@ -131,7 +131,7 @@ int main(int ac, char **argv)
signal(SIGTERM, int_exit);
/* start 'ping' in the background to have some kfree_skb events */
f = popen("ping -c5 localhost", "r");
f = popen("ping -4 -c5 localhost", "r");
(void) f;
/* start 'dd' in the background to have plenty of 'write' syscalls */

View File

@@ -20,7 +20,7 @@ struct bpf_map_def SEC("maps") my_map = {
/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
* example will no longer be meaningful
*/
SEC("kprobe/blk_start_request")
SEC("kprobe/blk_mq_start_request")
int bpf_prog1(struct pt_regs *ctx)
{
long rq = PT_REGS_PARM1(ctx);

View File

@@ -22,11 +22,23 @@
#include "bpf/libbpf.h"
static int ifindex;
static __u32 xdp_flags;
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
static __u32 prog_id;
static void int_exit(int sig)
{
bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
__u32 curr_prog_id = 0;
if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
printf("bpf_get_link_xdp_id failed\n");
exit(1);
}
if (prog_id == curr_prog_id)
bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
else if (!curr_prog_id)
printf("couldn't find a prog id on a given interface\n");
else
printf("program on interface changed, not removing\n");
exit(0);
}
@@ -63,7 +75,8 @@ static void usage(const char *prog)
"usage: %s [OPTS] IFACE\n\n"
"OPTS:\n"
" -S use skb-mode\n"
" -N enforce native mode\n",
" -N enforce native mode\n"
" -F force loading prog\n",
prog);
}
@@ -73,11 +86,14 @@ int main(int argc, char **argv)
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
};
const char *optstr = "SN";
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
const char *optstr = "FSN";
int prog_fd, map_fd, opt;
struct bpf_object *obj;
struct bpf_map *map;
char filename[256];
int err;
while ((opt = getopt(argc, argv, optstr)) != -1) {
switch (opt) {
@@ -87,6 +103,9 @@ int main(int argc, char **argv)
case 'N':
xdp_flags |= XDP_FLAGS_DRV_MODE;
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
break;
default:
usage(basename(argv[0]));
return 1;
@@ -135,6 +154,13 @@ int main(int argc, char **argv)
return 1;
}
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
if (err) {
printf("can't get prog info - %s\n", strerror(errno));
return err;
}
prog_id = info.id;
poll_stats(map_fd, 2);
return 0;

View File

@@ -24,12 +24,25 @@
#define STATS_INTERVAL_S 2U
static int ifindex = -1;
static __u32 xdp_flags;
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
static __u32 prog_id;
static void int_exit(int sig)
{
if (ifindex > -1)
bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
__u32 curr_prog_id = 0;
if (ifindex > -1) {
if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
printf("bpf_get_link_xdp_id failed\n");
exit(1);
}
if (prog_id == curr_prog_id)
bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
else if (!curr_prog_id)
printf("couldn't find a prog id on a given iface\n");
else
printf("program on interface changed, not removing\n");
}
exit(0);
}
@@ -60,6 +73,7 @@ static void usage(const char *cmd)
printf(" -T <stop-after-X-seconds> Default: 0 (forever)\n");
printf(" -S use skb-mode\n");
printf(" -N enforce native mode\n");
printf(" -F force loading prog\n");
printf(" -h Display this help\n");
}
@@ -70,12 +84,15 @@ int main(int argc, char **argv)
.prog_type = BPF_PROG_TYPE_XDP,
};
unsigned char opt_flags[256] = {};
const char *optstr = "i:T:SNFh";
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
unsigned int kill_after_s = 0;
const char *optstr = "i:T:SNh";
int i, prog_fd, map_fd, opt;
struct bpf_object *obj;
struct bpf_map *map;
char filename[256];
int err;
for (i = 0; i < strlen(optstr); i++)
if (optstr[i] != 'h' && 'a' <= optstr[i] && optstr[i] <= 'z')
@@ -96,6 +113,9 @@ int main(int argc, char **argv)
case 'N':
xdp_flags |= XDP_FLAGS_DRV_MODE;
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
break;
default:
usage(argv[0]);
return 1;
@@ -142,9 +162,15 @@ int main(int argc, char **argv)
return 1;
}
poll_stats(map_fd, kill_after_s);
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
if (err) {
printf("can't get prog info - %s\n", strerror(errno));
return 1;
}
prog_id = info.id;
bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
poll_stats(map_fd, kill_after_s);
int_exit(0);
return 0;
}

View File

@@ -24,20 +24,26 @@ static const char *__doc__ =
/* How many xdp_progs are defined in _kern.c */
#define MAX_PROG 6
/* Wanted to get rid of bpf_load.h and fake-"libbpf.h" (and instead
* use bpf/libbpf.h), but cannot as (currently) needed for XDP
* attaching to a device via bpf_set_link_xdp_fd()
*/
#include <bpf/bpf.h>
#include "bpf_load.h"
#include "bpf/libbpf.h"
#include "bpf_util.h"
static int ifindex = -1;
static char ifname_buf[IF_NAMESIZE];
static char *ifname;
static __u32 prog_id;
static __u32 xdp_flags;
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
static int cpu_map_fd;
static int rx_cnt_map_fd;
static int redirect_err_cnt_map_fd;
static int cpumap_enqueue_cnt_map_fd;
static int cpumap_kthread_cnt_map_fd;
static int cpus_available_map_fd;
static int cpus_count_map_fd;
static int cpus_iterator_map_fd;
static int exception_cnt_map_fd;
/* Exit return codes */
#define EXIT_OK 0
@@ -51,27 +57,50 @@ static const struct option long_options[] = {
{"help", no_argument, NULL, 'h' },
{"dev", required_argument, NULL, 'd' },
{"skb-mode", no_argument, NULL, 'S' },
{"debug", no_argument, NULL, 'D' },
{"sec", required_argument, NULL, 's' },
{"prognum", required_argument, NULL, 'p' },
{"progname", required_argument, NULL, 'p' },
{"qsize", required_argument, NULL, 'q' },
{"cpu", required_argument, NULL, 'c' },
{"stress-mode", no_argument, NULL, 'x' },
{"no-separators", no_argument, NULL, 'z' },
{"force", no_argument, NULL, 'F' },
{0, 0, NULL, 0 }
};
static void int_exit(int sig)
{
fprintf(stderr,
"Interrupted: Removing XDP program on ifindex:%d device:%s\n",
ifindex, ifname);
if (ifindex > -1)
bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
__u32 curr_prog_id = 0;
if (ifindex > -1) {
if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
printf("bpf_get_link_xdp_id failed\n");
exit(EXIT_FAIL);
}
if (prog_id == curr_prog_id) {
fprintf(stderr,
"Interrupted: Removing XDP program on ifindex:%d device:%s\n",
ifindex, ifname);
bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
} else if (!curr_prog_id) {
printf("couldn't find a prog id on a given iface\n");
} else {
printf("program on interface changed, not removing\n");
}
}
exit(EXIT_OK);
}
static void usage(char *argv[])
static void print_avail_progs(struct bpf_object *obj)
{
struct bpf_program *pos;
bpf_object__for_each_program(pos, obj) {
if (bpf_program__is_xdp(pos))
printf(" %s\n", bpf_program__title(pos, false));
}
}
static void usage(char *argv[], struct bpf_object *obj)
{
int i;
@@ -89,6 +118,8 @@ static void usage(char *argv[])
long_options[i].val);
printf("\n");
}
printf("\n Programs to be used for --progname:\n");
print_avail_progs(obj);
printf("\n");
}
@@ -263,7 +294,7 @@ static __u64 calc_errs_pps(struct datarec *r,
static void stats_print(struct stats_record *stats_rec,
struct stats_record *stats_prev,
int prog_num)
char *prog_name)
{
unsigned int nr_cpus = bpf_num_possible_cpus();
double pps = 0, drop = 0, err = 0;
@@ -273,7 +304,7 @@ static void stats_print(struct stats_record *stats_rec,
int i;
/* Header */
printf("Running XDP/eBPF prog_num:%d\n", prog_num);
printf("Running XDP/eBPF prog_name:%s\n", prog_name);
printf("%-15s %-7s %-14s %-11s %-9s\n",
"XDP-cpumap", "CPU:to", "pps", "drop-pps", "extra-info");
@@ -424,20 +455,20 @@ static void stats_collect(struct stats_record *rec)
{
int fd, i;
fd = map_fd[1]; /* map: rx_cnt */
fd = rx_cnt_map_fd;
map_collect_percpu(fd, 0, &rec->rx_cnt);
fd = map_fd[2]; /* map: redirect_err_cnt */
fd = redirect_err_cnt_map_fd;
map_collect_percpu(fd, 1, &rec->redir_err);
fd = map_fd[3]; /* map: cpumap_enqueue_cnt */
fd = cpumap_enqueue_cnt_map_fd;
for (i = 0; i < MAX_CPUS; i++)
map_collect_percpu(fd, i, &rec->enq[i]);
fd = map_fd[4]; /* map: cpumap_kthread_cnt */
fd = cpumap_kthread_cnt_map_fd;
map_collect_percpu(fd, 0, &rec->kthread);
fd = map_fd[8]; /* map: exception_cnt */
fd = exception_cnt_map_fd;
map_collect_percpu(fd, 0, &rec->exception);
}
@@ -462,7 +493,7 @@ static int create_cpu_entry(__u32 cpu, __u32 queue_size,
/* Add a CPU entry to cpumap, as this allocate a cpu entry in
* the kernel for the cpu.
*/
ret = bpf_map_update_elem(map_fd[0], &cpu, &queue_size, 0);
ret = bpf_map_update_elem(cpu_map_fd, &cpu, &queue_size, 0);
if (ret) {
fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret);
exit(EXIT_FAIL_BPF);
@@ -471,23 +502,22 @@ static int create_cpu_entry(__u32 cpu, __u32 queue_size,
/* Inform bpf_prog's that a new CPU is available to select
* from via some control maps.
*/
/* map_fd[5] = cpus_available */
ret = bpf_map_update_elem(map_fd[5], &avail_idx, &cpu, 0);
ret = bpf_map_update_elem(cpus_available_map_fd, &avail_idx, &cpu, 0);
if (ret) {
fprintf(stderr, "Add to avail CPUs failed\n");
exit(EXIT_FAIL_BPF);
}
/* When not replacing/updating existing entry, bump the count */
/* map_fd[6] = cpus_count */
ret = bpf_map_lookup_elem(map_fd[6], &key, &curr_cpus_count);
ret = bpf_map_lookup_elem(cpus_count_map_fd, &key, &curr_cpus_count);
if (ret) {
fprintf(stderr, "Failed reading curr cpus_count\n");
exit(EXIT_FAIL_BPF);
}
if (new) {
curr_cpus_count++;
ret = bpf_map_update_elem(map_fd[6], &key, &curr_cpus_count, 0);
ret = bpf_map_update_elem(cpus_count_map_fd, &key,
&curr_cpus_count, 0);
if (ret) {
fprintf(stderr, "Failed write curr cpus_count\n");
exit(EXIT_FAIL_BPF);
@@ -510,8 +540,8 @@ static void mark_cpus_unavailable(void)
int ret, i;
for (i = 0; i < MAX_CPUS; i++) {
/* map_fd[5] = cpus_available */
ret = bpf_map_update_elem(map_fd[5], &i, &invalid_cpu, 0);
ret = bpf_map_update_elem(cpus_available_map_fd, &i,
&invalid_cpu, 0);
if (ret) {
fprintf(stderr, "Failed marking CPU unavailable\n");
exit(EXIT_FAIL_BPF);
@@ -531,7 +561,7 @@ static void stress_cpumap(void)
create_cpu_entry(1, 16000, 0, false);
}
static void stats_poll(int interval, bool use_separators, int prog_num,
static void stats_poll(int interval, bool use_separators, char *prog_name,
bool stress_mode)
{
struct stats_record *record, *prev;
@@ -547,7 +577,7 @@ static void stats_poll(int interval, bool use_separators, int prog_num,
while (1) {
swap(&prev, &record);
stats_collect(record);
stats_print(record, prev, prog_num);
stats_print(record, prev, prog_name);
sleep(interval);
if (stress_mode)
stress_cpumap();
@@ -557,20 +587,55 @@ static void stats_poll(int interval, bool use_separators, int prog_num,
free_stats_record(prev);
}
static int init_map_fds(struct bpf_object *obj)
{
cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map");
rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt");
redirect_err_cnt_map_fd =
bpf_object__find_map_fd_by_name(obj, "redirect_err_cnt");
cpumap_enqueue_cnt_map_fd =
bpf_object__find_map_fd_by_name(obj, "cpumap_enqueue_cnt");
cpumap_kthread_cnt_map_fd =
bpf_object__find_map_fd_by_name(obj, "cpumap_kthread_cnt");
cpus_available_map_fd =
bpf_object__find_map_fd_by_name(obj, "cpus_available");
cpus_count_map_fd = bpf_object__find_map_fd_by_name(obj, "cpus_count");
cpus_iterator_map_fd =
bpf_object__find_map_fd_by_name(obj, "cpus_iterator");
exception_cnt_map_fd =
bpf_object__find_map_fd_by_name(obj, "exception_cnt");
if (cpu_map_fd < 0 || rx_cnt_map_fd < 0 ||
redirect_err_cnt_map_fd < 0 || cpumap_enqueue_cnt_map_fd < 0 ||
cpumap_kthread_cnt_map_fd < 0 || cpus_available_map_fd < 0 ||
cpus_count_map_fd < 0 || cpus_iterator_map_fd < 0 ||
exception_cnt_map_fd < 0)
return -ENOENT;
return 0;
}
int main(int argc, char **argv)
{
struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs";
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_UNSPEC,
};
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
bool use_separators = true;
bool stress_mode = false;
struct bpf_program *prog;
struct bpf_object *obj;
char filename[256];
bool debug = false;
int added_cpus = 0;
int longindex = 0;
int interval = 2;
int prog_num = 5;
int add_cpu = -1;
int opt, err;
int prog_fd;
__u32 qsize;
int opt;
/* Notice: choosing he queue size is very important with the
* ixgbe driver, because it's driver page recycling trick is
@@ -581,26 +646,29 @@ int main(int argc, char **argv)
qsize = 128+64;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
prog_load_attr.file = filename;
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
perror("setrlimit(RLIMIT_MEMLOCK)");
return 1;
}
if (load_bpf_file(filename)) {
fprintf(stderr, "ERR in load_bpf_file(): %s", bpf_log_buf);
if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
return EXIT_FAIL;
if (prog_fd < 0) {
fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n",
strerror(errno));
return EXIT_FAIL;
}
if (!prog_fd[0]) {
fprintf(stderr, "ERR: load_bpf_file: %s\n", strerror(errno));
if (init_map_fds(obj) < 0) {
fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n");
return EXIT_FAIL;
}
mark_cpus_unavailable();
/* Parse commands line args */
while ((opt = getopt_long(argc, argv, "hSd:",
while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzF",
long_options, &longindex)) != -1) {
switch (opt) {
case 'd':
@@ -624,9 +692,6 @@ int main(int argc, char **argv)
case 'S':
xdp_flags |= XDP_FLAGS_SKB_MODE;
break;
case 'D':
debug = true;
break;
case 'x':
stress_mode = true;
break;
@@ -635,13 +700,7 @@ int main(int argc, char **argv)
break;
case 'p':
/* Selecting eBPF prog to load */
prog_num = atoi(optarg);
if (prog_num < 0 || prog_num >= MAX_PROG) {
fprintf(stderr,
"--prognum too large err(%d):%s\n",
errno, strerror(errno));
goto error;
}
prog_name = optarg;
break;
case 'c':
/* Add multiple CPUs */
@@ -658,24 +717,27 @@ int main(int argc, char **argv)
case 'q':
qsize = atoi(optarg);
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
break;
case 'h':
error:
default:
usage(argv);
usage(argv, obj);
return EXIT_FAIL_OPTION;
}
}
/* Required option */
if (ifindex == -1) {
fprintf(stderr, "ERR: required option --dev missing\n");
usage(argv);
usage(argv, obj);
return EXIT_FAIL_OPTION;
}
/* Required option */
if (add_cpu == -1) {
fprintf(stderr, "ERR: required option --cpu missing\n");
fprintf(stderr, " Specify multiple --cpu option to add more\n");
usage(argv);
usage(argv, obj);
return EXIT_FAIL_OPTION;
}
@@ -683,16 +745,30 @@ int main(int argc, char **argv)
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
if (bpf_set_link_xdp_fd(ifindex, prog_fd[prog_num], xdp_flags) < 0) {
prog = bpf_object__find_program_by_title(obj, prog_name);
if (!prog) {
fprintf(stderr, "bpf_object__find_program_by_title failed\n");
return EXIT_FAIL;
}
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
fprintf(stderr, "bpf_program__fd failed\n");
return EXIT_FAIL;
}
if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
fprintf(stderr, "link set xdp fd failed\n");
return EXIT_FAIL_XDP;
}
if (debug) {
printf("Debug-mode reading trace pipe (fix #define DEBUG)\n");
read_trace_pipe();
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
if (err) {
printf("can't get prog info - %s\n", strerror(errno));
return err;
}
prog_id = info.id;
stats_poll(interval, use_separators, prog_num, stress_mode);
stats_poll(interval, use_separators, prog_name, stress_mode);
return EXIT_OK;
}

View File

@@ -22,21 +22,48 @@
#include <libgen.h>
#include <sys/resource.h>
#include "bpf_load.h"
#include "bpf_util.h"
#include <bpf/bpf.h>
#include "bpf/libbpf.h"
static int ifindex_in;
static int ifindex_out;
static bool ifindex_out_xdp_dummy_attached = true;
static __u32 prog_id;
static __u32 dummy_prog_id;
static __u32 xdp_flags;
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
static int rxcnt_map_fd;
static void int_exit(int sig)
{
bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags);
if (ifindex_out_xdp_dummy_attached)
bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags);
__u32 curr_prog_id = 0;
if (bpf_get_link_xdp_id(ifindex_in, &curr_prog_id, xdp_flags)) {
printf("bpf_get_link_xdp_id failed\n");
exit(1);
}
if (prog_id == curr_prog_id)
bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags);
else if (!curr_prog_id)
printf("couldn't find a prog id on iface IN\n");
else
printf("program on iface IN changed, not removing\n");
if (ifindex_out_xdp_dummy_attached) {
curr_prog_id = 0;
if (bpf_get_link_xdp_id(ifindex_out, &curr_prog_id,
xdp_flags)) {
printf("bpf_get_link_xdp_id failed\n");
exit(1);
}
if (dummy_prog_id == curr_prog_id)
bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags);
else if (!curr_prog_id)
printf("couldn't find a prog id on iface OUT\n");
else
printf("program on iface OUT changed, not removing\n");
}
exit(0);
}
@@ -53,7 +80,7 @@ static void poll_stats(int interval, int ifindex)
int i;
sleep(interval);
assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0);
assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0);
for (i = 0; i < nr_cpus; i++)
sum += (values[i] - prev[i]);
if (sum)
@@ -69,16 +96,26 @@ static void usage(const char *prog)
"usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n"
"OPTS:\n"
" -S use skb-mode\n"
" -N enforce native mode\n",
" -N enforce native mode\n"
" -F force loading prog\n",
prog);
}
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
const char *optstr = "SN";
char filename[256];
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
};
struct bpf_program *prog, *dummy_prog;
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
int prog_fd, dummy_prog_fd;
const char *optstr = "FSN";
struct bpf_object *obj;
int ret, opt, key = 0;
char filename[256];
int tx_port_map_fd;
while ((opt = getopt(argc, argv, optstr)) != -1) {
switch (opt) {
@@ -88,6 +125,9 @@ int main(int argc, char **argv)
case 'N':
xdp_flags |= XDP_FLAGS_DRV_MODE;
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
break;
default:
usage(basename(argv[0]));
return 1;
@@ -109,37 +149,65 @@ int main(int argc, char **argv)
printf("input: %d output: %d\n", ifindex_in, ifindex_out);
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
prog_load_attr.file = filename;
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
return 1;
prog = bpf_program__next(NULL, obj);
dummy_prog = bpf_program__next(prog, obj);
if (!prog || !dummy_prog) {
printf("finding a prog in obj file failed\n");
return 1;
}
/* bpf_prog_load_xattr gives us the pointer to first prog's fd,
* so we're missing only the fd for dummy prog
*/
dummy_prog_fd = bpf_program__fd(dummy_prog);
if (prog_fd < 0 || dummy_prog_fd < 0) {
printf("bpf_prog_load_xattr: %s\n", strerror(errno));
return 1;
}
if (!prog_fd[0]) {
printf("load_bpf_file: %s\n", strerror(errno));
tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port");
rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
if (tx_port_map_fd < 0 || rxcnt_map_fd < 0) {
printf("bpf_object__find_map_fd_by_name failed\n");
return 1;
}
if (bpf_set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) {
if (bpf_set_link_xdp_fd(ifindex_in, prog_fd, xdp_flags) < 0) {
printf("ERROR: link set xdp fd failed on %d\n", ifindex_in);
return 1;
}
ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
if (ret) {
printf("can't get prog info - %s\n", strerror(errno));
return ret;
}
prog_id = info.id;
/* Loading dummy XDP prog on out-device */
if (bpf_set_link_xdp_fd(ifindex_out, prog_fd[1],
if (bpf_set_link_xdp_fd(ifindex_out, dummy_prog_fd,
(xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) {
printf("WARN: link set xdp fd failed on %d\n", ifindex_out);
ifindex_out_xdp_dummy_attached = false;
}
memset(&info, 0, sizeof(info));
ret = bpf_obj_get_info_by_fd(dummy_prog_fd, &info, &info_len);
if (ret) {
printf("can't get prog info - %s\n", strerror(errno));
return ret;
}
dummy_prog_id = info.id;
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
printf("map[0] (vports) = %i, map[1] (map) = %i, map[2] (count) = %i\n",
map_fd[0], map_fd[1], map_fd[2]);
/* populate virtual to physical port map */
ret = bpf_map_update_elem(map_fd[0], &key, &ifindex_out, 0);
ret = bpf_map_update_elem(tx_port_map_fd, &key, &ifindex_out, 0);
if (ret) {
perror("bpf_update_elem");
goto out;

View File

@@ -22,21 +22,48 @@
#include <libgen.h>
#include <sys/resource.h>
#include "bpf_load.h"
#include "bpf_util.h"
#include <bpf/bpf.h>
#include "bpf/libbpf.h"
static int ifindex_in;
static int ifindex_out;
static bool ifindex_out_xdp_dummy_attached = true;
static __u32 prog_id;
static __u32 dummy_prog_id;
static __u32 xdp_flags;
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
static int rxcnt_map_fd;
static void int_exit(int sig)
{
bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags);
if (ifindex_out_xdp_dummy_attached)
bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags);
__u32 curr_prog_id = 0;
if (bpf_get_link_xdp_id(ifindex_in, &curr_prog_id, xdp_flags)) {
printf("bpf_get_link_xdp_id failed\n");
exit(1);
}
if (prog_id == curr_prog_id)
bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags);
else if (!curr_prog_id)
printf("couldn't find a prog id on iface IN\n");
else
printf("program on iface IN changed, not removing\n");
if (ifindex_out_xdp_dummy_attached) {
curr_prog_id = 0;
if (bpf_get_link_xdp_id(ifindex_out, &curr_prog_id,
xdp_flags)) {
printf("bpf_get_link_xdp_id failed\n");
exit(1);
}
if (dummy_prog_id == curr_prog_id)
bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags);
else if (!curr_prog_id)
printf("couldn't find a prog id on iface OUT\n");
else
printf("program on iface OUT changed, not removing\n");
}
exit(0);
}
@@ -53,7 +80,7 @@ static void poll_stats(int interval, int ifindex)
int i;
sleep(interval);
assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0);
assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0);
for (i = 0; i < nr_cpus; i++)
sum += (values[i] - prev[i]);
if (sum)
@@ -69,7 +96,8 @@ static void usage(const char *prog)
"usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n"
"OPTS:\n"
" -S use skb-mode\n"
" -N enforce native mode\n",
" -N enforce native mode\n"
" -F force loading prog\n",
prog);
}
@@ -77,9 +105,18 @@ static void usage(const char *prog)
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
const char *optstr = "SN";
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
};
struct bpf_program *prog, *dummy_prog;
int prog_fd, tx_port_map_fd, opt;
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
const char *optstr = "FSN";
struct bpf_object *obj;
char filename[256];
int ret, opt, key = 0;
int dummy_prog_fd;
int ret, key = 0;
while ((opt = getopt(argc, argv, optstr)) != -1) {
switch (opt) {
@@ -89,6 +126,9 @@ int main(int argc, char **argv)
case 'N':
xdp_flags |= XDP_FLAGS_DRV_MODE;
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
break;
default:
usage(basename(argv[0]));
return 1;
@@ -110,34 +150,65 @@ int main(int argc, char **argv)
printf("input: %d output: %d\n", ifindex_in, ifindex_out);
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
prog_load_attr.file = filename;
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
return 1;
prog = bpf_program__next(NULL, obj);
dummy_prog = bpf_program__next(prog, obj);
if (!prog || !dummy_prog) {
printf("finding a prog in obj file failed\n");
return 1;
}
/* bpf_prog_load_xattr gives us the pointer to first prog's fd,
* so we're missing only the fd for dummy prog
*/
dummy_prog_fd = bpf_program__fd(dummy_prog);
if (prog_fd < 0 || dummy_prog_fd < 0) {
printf("bpf_prog_load_xattr: %s\n", strerror(errno));
return 1;
}
if (!prog_fd[0]) {
printf("load_bpf_file: %s\n", strerror(errno));
tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port");
rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
if (tx_port_map_fd < 0 || rxcnt_map_fd < 0) {
printf("bpf_object__find_map_fd_by_name failed\n");
return 1;
}
if (bpf_set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) {
if (bpf_set_link_xdp_fd(ifindex_in, prog_fd, xdp_flags) < 0) {
printf("ERROR: link set xdp fd failed on %d\n", ifindex_in);
return 1;
}
ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
if (ret) {
printf("can't get prog info - %s\n", strerror(errno));
return ret;
}
prog_id = info.id;
/* Loading dummy XDP prog on out-device */
if (bpf_set_link_xdp_fd(ifindex_out, prog_fd[1],
if (bpf_set_link_xdp_fd(ifindex_out, dummy_prog_fd,
(xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) {
printf("WARN: link set xdp fd failed on %d\n", ifindex_out);
ifindex_out_xdp_dummy_attached = false;
}
memset(&info, 0, sizeof(info));
ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
if (ret) {
printf("can't get prog info - %s\n", strerror(errno));
return ret;
}
dummy_prog_id = info.id;
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
/* bpf redirect port */
ret = bpf_map_update_elem(map_fd[0], &key, &ifindex_out, 0);
ret = bpf_map_update_elem(tx_port_map_fd, &key, &ifindex_out, 0);
if (ret) {
perror("bpf_update_elem");
goto out;

View File

@@ -15,7 +15,6 @@
#include <string.h>
#include <sys/socket.h>
#include <unistd.h>
#include "bpf_load.h"
#include <bpf/bpf.h>
#include <arpa/inet.h>
#include <fcntl.h>
@@ -25,32 +24,52 @@
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include "bpf_util.h"
#include "bpf/libbpf.h"
#include <sys/resource.h>
#include <libgen.h>
int sock, sock_arp, flags = 0;
int sock, sock_arp, flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
static int total_ifindex;
int *ifindex_list;
static int *ifindex_list;
static __u32 *prog_id_list;
char buf[8192];
static int lpm_map_fd;
static int rxcnt_map_fd;
static int arp_table_map_fd;
static int exact_match_map_fd;
static int tx_port_map_fd;
static int get_route_table(int rtm_family);
static void int_exit(int sig)
{
__u32 prog_id = 0;
int i = 0;
for (i = 0; i < total_ifindex; i++)
bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
for (i = 0; i < total_ifindex; i++) {
if (bpf_get_link_xdp_id(ifindex_list[i], &prog_id, flags)) {
printf("bpf_get_link_xdp_id on iface %d failed\n",
ifindex_list[i]);
exit(1);
}
if (prog_id_list[i] == prog_id)
bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
else if (!prog_id)
printf("couldn't find a prog id on iface %d\n",
ifindex_list[i]);
else
printf("program on iface %d changed, not removing\n",
ifindex_list[i]);
prog_id = 0;
}
exit(0);
}
static void close_and_exit(int sig)
{
int i = 0;
close(sock);
close(sock_arp);
for (i = 0; i < total_ifindex; i++)
bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
exit(0);
int_exit(0);
}
/* Get the mac address of the interface given interface name */
@@ -179,14 +198,10 @@ static void read_route(struct nlmsghdr *nh, int nll)
route.iface_name = alloca(sizeof(char *) * IFNAMSIZ);
route.iface_name = if_indextoname(route.iface, route.iface_name);
route.mac = getmac(route.iface_name);
if (route.mac == -1) {
int i = 0;
for (i = 0; i < total_ifindex; i++)
bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
exit(0);
}
assert(bpf_map_update_elem(map_fd[4], &route.iface, &route.iface, 0) == 0);
if (route.mac == -1)
int_exit(0);
assert(bpf_map_update_elem(tx_port_map_fd,
&route.iface, &route.iface, 0) == 0);
if (rtm_family == AF_INET) {
struct trie_value {
__u8 prefix[4];
@@ -207,11 +222,16 @@ static void read_route(struct nlmsghdr *nh, int nll)
direct_entry.arp.dst = 0;
if (route.dst_len == 32) {
if (nh->nlmsg_type == RTM_DELROUTE) {
assert(bpf_map_delete_elem(map_fd[3], &route.dst) == 0);
assert(bpf_map_delete_elem(exact_match_map_fd,
&route.dst) == 0);
} else {
if (bpf_map_lookup_elem(map_fd[2], &route.dst, &direct_entry.arp.mac) == 0)
if (bpf_map_lookup_elem(arp_table_map_fd,
&route.dst,
&direct_entry.arp.mac) == 0)
direct_entry.arp.dst = route.dst;
assert(bpf_map_update_elem(map_fd[3], &route.dst, &direct_entry, 0) == 0);
assert(bpf_map_update_elem(exact_match_map_fd,
&route.dst,
&direct_entry, 0) == 0);
}
}
for (i = 0; i < 4; i++)
@@ -225,7 +245,7 @@ static void read_route(struct nlmsghdr *nh, int nll)
route.gw, route.dst_len,
route.metric,
route.iface_name);
if (bpf_map_lookup_elem(map_fd[0], prefix_key,
if (bpf_map_lookup_elem(lpm_map_fd, prefix_key,
prefix_value) < 0) {
for (i = 0; i < 4; i++)
prefix_value->prefix[i] = prefix_key->data[i];
@@ -234,7 +254,7 @@ static void read_route(struct nlmsghdr *nh, int nll)
prefix_value->gw = route.gw;
prefix_value->metric = route.metric;
assert(bpf_map_update_elem(map_fd[0],
assert(bpf_map_update_elem(lpm_map_fd,
prefix_key,
prefix_value, 0
) == 0);
@@ -247,7 +267,7 @@ static void read_route(struct nlmsghdr *nh, int nll)
prefix_key->data[2],
prefix_key->data[3],
prefix_key->prefixlen);
assert(bpf_map_delete_elem(map_fd[0],
assert(bpf_map_delete_elem(lpm_map_fd,
prefix_key
) == 0);
/* Rereading the route table to check if
@@ -275,8 +295,7 @@ static void read_route(struct nlmsghdr *nh, int nll)
prefix_value->ifindex = route.iface;
prefix_value->gw = route.gw;
prefix_value->metric = route.metric;
assert(bpf_map_update_elem(
map_fd[0],
assert(bpf_map_update_elem(lpm_map_fd,
prefix_key,
prefix_value,
0) == 0);
@@ -401,7 +420,8 @@ static void read_arp(struct nlmsghdr *nh, int nll)
arp_entry.mac = atol(mac);
printf("%x\t\t%llx\n", arp_entry.dst, arp_entry.mac);
if (ndm_family == AF_INET) {
if (bpf_map_lookup_elem(map_fd[3], &arp_entry.dst,
if (bpf_map_lookup_elem(exact_match_map_fd,
&arp_entry.dst,
&direct_entry) == 0) {
if (nh->nlmsg_type == RTM_DELNEIGH) {
direct_entry.arp.dst = 0;
@@ -410,16 +430,17 @@ static void read_arp(struct nlmsghdr *nh, int nll)
direct_entry.arp.dst = arp_entry.dst;
direct_entry.arp.mac = arp_entry.mac;
}
assert(bpf_map_update_elem(map_fd[3],
assert(bpf_map_update_elem(exact_match_map_fd,
&arp_entry.dst,
&direct_entry, 0
) == 0);
memset(&direct_entry, 0, sizeof(direct_entry));
}
if (nh->nlmsg_type == RTM_DELNEIGH) {
assert(bpf_map_delete_elem(map_fd[2], &arp_entry.dst) == 0);
assert(bpf_map_delete_elem(arp_table_map_fd,
&arp_entry.dst) == 0);
} else if (nh->nlmsg_type == RTM_NEWNEIGH) {
assert(bpf_map_update_elem(map_fd[2],
assert(bpf_map_update_elem(arp_table_map_fd,
&arp_entry.dst,
&arp_entry.mac, 0
) == 0);
@@ -553,7 +574,8 @@ static int monitor_route(void)
for (key = 0; key < nr_keys; key++) {
__u64 sum = 0;
assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0);
assert(bpf_map_lookup_elem(rxcnt_map_fd,
&key, values) == 0);
for (i = 0; i < nr_cpus; i++)
sum += (values[i] - prev[key][i]);
if (sum)
@@ -594,36 +616,87 @@ cleanup:
return ret;
}
static void usage(const char *prog)
{
fprintf(stderr,
"%s: %s [OPTS] interface name list\n\n"
"OPTS:\n"
" -S use skb-mode\n"
" -F force loading prog\n",
__func__, prog);
}
int main(int ac, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
};
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
const char *optstr = "SF";
struct bpf_object *obj;
char filename[256];
char **ifname_list;
int i = 1;
int prog_fd, opt;
int err, i = 1;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
if (ac < 2) {
printf("usage: %s [-S] Interface name list\n", argv[0]);
prog_load_attr.file = filename;
total_ifindex = ac - 1;
ifname_list = (argv + 1);
while ((opt = getopt(ac, argv, optstr)) != -1) {
switch (opt) {
case 'S':
flags |= XDP_FLAGS_SKB_MODE;
total_ifindex--;
ifname_list++;
break;
case 'F':
flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
total_ifindex--;
ifname_list++;
break;
default:
usage(basename(argv[0]));
return 1;
}
}
if (optind == ac) {
usage(basename(argv[0]));
return 1;
}
if (!strcmp(argv[1], "-S")) {
flags = XDP_FLAGS_SKB_MODE;
total_ifindex = ac - 2;
ifname_list = (argv + 2);
} else {
flags = 0;
total_ifindex = ac - 1;
ifname_list = (argv + 1);
}
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
perror("setrlimit(RLIMIT_MEMLOCK)");
return 1;
}
if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
return 1;
printf("\n**************loading bpf file*********************\n\n\n");
if (!prog_fd[0]) {
printf("load_bpf_file: %s\n", strerror(errno));
if (!prog_fd) {
printf("bpf_prog_load_xattr: %s\n", strerror(errno));
return 1;
}
ifindex_list = (int *)malloc(total_ifindex * sizeof(int *));
lpm_map_fd = bpf_object__find_map_fd_by_name(obj, "lpm_map");
rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
arp_table_map_fd = bpf_object__find_map_fd_by_name(obj, "arp_table");
exact_match_map_fd = bpf_object__find_map_fd_by_name(obj,
"exact_match");
tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port");
if (lpm_map_fd < 0 || rxcnt_map_fd < 0 || arp_table_map_fd < 0 ||
exact_match_map_fd < 0 || tx_port_map_fd < 0) {
printf("bpf_object__find_map_fd_by_name failed\n");
return 1;
}
ifindex_list = (int *)calloc(total_ifindex, sizeof(int *));
for (i = 0; i < total_ifindex; i++) {
ifindex_list[i] = if_nametoindex(ifname_list[i]);
if (!ifindex_list[i]) {
@@ -632,8 +705,9 @@ int main(int ac, char **argv)
return 1;
}
}
prog_id_list = (__u32 *)calloc(total_ifindex, sizeof(__u32 *));
for (i = 0; i < total_ifindex; i++) {
if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd[0], flags) < 0) {
if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd, flags) < 0) {
printf("link set xdp fd failed\n");
int recovery_index = i;
@@ -642,6 +716,13 @@ int main(int ac, char **argv)
return 1;
}
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
if (err) {
printf("can't get prog info - %s\n", strerror(errno));
return err;
}
prog_id_list[i] = info.id;
memset(&info, 0, sizeof(info));
printf("Attached to %d\n", ifindex_list[i]);
}
signal(SIGINT, int_exit);

View File

@@ -29,8 +29,9 @@ static const char *__doc__ = " XDP RX-queue info extract example\n\n"
static int ifindex = -1;
static char ifname_buf[IF_NAMESIZE];
static char *ifname;
static __u32 prog_id;
static __u32 xdp_flags;
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
static struct bpf_map *stats_global_map;
static struct bpf_map *rx_queue_index_map;
@@ -52,16 +53,30 @@ static const struct option long_options[] = {
{"action", required_argument, NULL, 'a' },
{"readmem", no_argument, NULL, 'r' },
{"swapmac", no_argument, NULL, 'm' },
{"force", no_argument, NULL, 'F' },
{0, 0, NULL, 0 }
};
static void int_exit(int sig)
{
fprintf(stderr,
"Interrupted: Removing XDP program on ifindex:%d device:%s\n",
ifindex, ifname);
if (ifindex > -1)
bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
__u32 curr_prog_id = 0;
if (ifindex > -1) {
if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
printf("bpf_get_link_xdp_id failed\n");
exit(EXIT_FAIL);
}
if (prog_id == curr_prog_id) {
fprintf(stderr,
"Interrupted: Removing XDP program on ifindex:%d device:%s\n",
ifindex, ifname);
bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
} else if (!curr_prog_id) {
printf("couldn't find a prog id on a given iface\n");
} else {
printf("program on interface changed, not removing\n");
}
}
exit(EXIT_OK);
}
@@ -446,6 +461,8 @@ int main(int argc, char **argv)
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
};
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
int prog_fd, map_fd, opt, err;
bool use_separators = true;
struct config cfg = { 0 };
@@ -487,7 +504,7 @@ int main(int argc, char **argv)
}
/* Parse commands line args */
while ((opt = getopt_long(argc, argv, "hSd:",
while ((opt = getopt_long(argc, argv, "FhSrmzd:s:a:",
long_options, &longindex)) != -1) {
switch (opt) {
case 'd':
@@ -524,6 +541,9 @@ int main(int argc, char **argv)
case 'm':
cfg_options |= SWAP_MAC;
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
break;
case 'h':
error:
default:
@@ -576,6 +596,13 @@ int main(int argc, char **argv)
return EXIT_FAIL_XDP;
}
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
if (err) {
printf("can't get prog info - %s\n", strerror(errno));
return err;
}
prog_id = info.id;
stats_poll(interval, action, cfg_options);
return EXIT_OK;
}

View File

@@ -12,6 +12,9 @@
#include <signal.h>
#include <libbpf.h>
#include <bpf/bpf.h>
#include <sys/resource.h>
#include <libgen.h>
#include <linux/if_link.h>
#include "perf-sys.h"
#include "trace_helpers.h"
@@ -20,25 +23,50 @@
static int pmu_fds[MAX_CPUS], if_idx;
static struct perf_event_mmap_page *headers[MAX_CPUS];
static char *if_name;
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
static __u32 prog_id;
static int do_attach(int idx, int fd, const char *name)
{
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
int err;
err = bpf_set_link_xdp_fd(idx, fd, 0);
if (err < 0)
err = bpf_set_link_xdp_fd(idx, fd, xdp_flags);
if (err < 0) {
printf("ERROR: failed to attach program to %s\n", name);
return err;
}
err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
if (err) {
printf("can't get prog info - %s\n", strerror(errno));
return err;
}
prog_id = info.id;
return err;
}
static int do_detach(int idx, const char *name)
{
int err;
__u32 curr_prog_id = 0;
int err = 0;
err = bpf_set_link_xdp_fd(idx, -1, 0);
if (err < 0)
printf("ERROR: failed to detach program from %s\n", name);
err = bpf_get_link_xdp_id(idx, &curr_prog_id, 0);
if (err) {
printf("bpf_get_link_xdp_id failed\n");
return err;
}
if (prog_id == curr_prog_id) {
err = bpf_set_link_xdp_fd(idx, -1, 0);
if (err < 0)
printf("ERROR: failed to detach prog from %s\n", name);
} else if (!curr_prog_id) {
printf("couldn't find a prog id on a %s\n", name);
} else {
printf("program on interface changed, not removing\n");
}
return err;
}
@@ -97,20 +125,47 @@ static void sig_handler(int signo)
exit(0);
}
static void usage(const char *prog)
{
fprintf(stderr,
"%s: %s [OPTS] <ifname|ifindex>\n\n"
"OPTS:\n"
" -F force loading prog\n",
__func__, prog);
}
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
};
const char *optstr = "F";
int prog_fd, map_fd, opt;
struct bpf_object *obj;
struct bpf_map *map;
int prog_fd, map_fd;
char filename[256];
int ret, err, i;
int numcpus;
if (argc < 2) {
printf("Usage: %s <ifname>\n", argv[0]);
while ((opt = getopt(argc, argv, optstr)) != -1) {
switch (opt) {
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
break;
default:
usage(basename(argv[0]));
return 1;
}
}
if (optind == argc) {
usage(basename(argv[0]));
return 1;
}
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
perror("setrlimit(RLIMIT_MEMLOCK)");
return 1;
}
@@ -136,16 +191,16 @@ int main(int argc, char **argv)
}
map_fd = bpf_map__fd(map);
if_idx = if_nametoindex(argv[1]);
if_idx = if_nametoindex(argv[optind]);
if (!if_idx)
if_idx = strtoul(argv[1], NULL, 0);
if_idx = strtoul(argv[optind], NULL, 0);
if (!if_idx) {
fprintf(stderr, "Invalid ifname\n");
return 1;
}
if_name = argv[1];
err = do_attach(if_idx, prog_fd, argv[1]);
if_name = argv[optind];
err = do_attach(if_idx, prog_fd, if_name);
if (err)
return err;

View File

@@ -17,7 +17,7 @@
#include <netinet/ether.h>
#include <unistd.h>
#include <time.h>
#include "bpf_load.h"
#include "bpf/libbpf.h"
#include <bpf/bpf.h>
#include "bpf_util.h"
#include "xdp_tx_iptunnel_common.h"
@@ -25,12 +25,26 @@
#define STATS_INTERVAL_S 2U
static int ifindex = -1;
static __u32 xdp_flags = 0;
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
static int rxcnt_map_fd;
static __u32 prog_id;
static void int_exit(int sig)
{
if (ifindex > -1)
bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
__u32 curr_prog_id = 0;
if (ifindex > -1) {
if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
printf("bpf_get_link_xdp_id failed\n");
exit(1);
}
if (prog_id == curr_prog_id)
bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
else if (!curr_prog_id)
printf("couldn't find a prog id on a given iface\n");
else
printf("program on interface changed, not removing\n");
}
exit(0);
}
@@ -53,7 +67,8 @@ static void poll_stats(unsigned int kill_after_s)
for (proto = 0; proto < nr_protos; proto++) {
__u64 sum = 0;
assert(bpf_map_lookup_elem(map_fd[0], &proto, values) == 0);
assert(bpf_map_lookup_elem(rxcnt_map_fd, &proto,
values) == 0);
for (i = 0; i < nr_cpus; i++)
sum += (values[i] - prev[proto][i]);
@@ -81,6 +96,7 @@ static void usage(const char *cmd)
printf(" -P <IP-Protocol> Default is TCP\n");
printf(" -S use skb-mode\n");
printf(" -N enforce native mode\n");
printf(" -F Force loading the XDP prog\n");
printf(" -h Display this help\n");
}
@@ -138,16 +154,22 @@ static int parse_ports(const char *port_str, int *min_port, int *max_port)
int main(int argc, char **argv)
{
unsigned char opt_flags[256] = {};
unsigned int kill_after_s = 0;
const char *optstr = "i:a:p:s:d:m:T:P:SNh";
int min_port = 0, max_port = 0;
struct iptnl_info tnl = {};
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
};
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
int min_port = 0, max_port = 0, vip2tnl_map_fd;
const char *optstr = "i:a:p:s:d:m:T:P:FSNh";
unsigned char opt_flags[256] = {};
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
unsigned int kill_after_s = 0;
struct iptnl_info tnl = {};
struct bpf_object *obj;
struct vip vip = {};
char filename[256];
int opt;
int i;
int opt, prog_fd;
int i, err;
tnl.family = AF_UNSPEC;
vip.protocol = IPPROTO_TCP;
@@ -211,6 +233,9 @@ int main(int argc, char **argv)
case 'N':
xdp_flags |= XDP_FLAGS_DRV_MODE;
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
break;
default:
usage(argv[0]);
return 1;
@@ -232,14 +257,20 @@ int main(int argc, char **argv)
}
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
prog_load_attr.file = filename;
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
return 1;
if (!prog_fd) {
printf("load_bpf_file: %s\n", strerror(errno));
return 1;
}
if (!prog_fd[0]) {
printf("load_bpf_file: %s\n", strerror(errno));
rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
vip2tnl_map_fd = bpf_object__find_map_fd_by_name(obj, "vip2tnl");
if (vip2tnl_map_fd < 0 || rxcnt_map_fd < 0) {
printf("bpf_object__find_map_fd_by_name failed\n");
return 1;
}
@@ -248,17 +279,25 @@ int main(int argc, char **argv)
while (min_port <= max_port) {
vip.dport = htons(min_port++);
if (bpf_map_update_elem(map_fd[1], &vip, &tnl, BPF_NOEXIST)) {
if (bpf_map_update_elem(vip2tnl_map_fd, &vip, &tnl,
BPF_NOEXIST)) {
perror("bpf_map_update_elem(&vip2tnl)");
return 1;
}
}
if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
printf("link set xdp fd failed\n");
return 1;
}
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
if (err) {
printf("can't get prog info - %s\n", strerror(errno));
return err;
}
prog_id = info.id;
poll_stats(kill_after_s);
bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);

View File

@@ -1,11 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef XDPSOCK_H_
#define XDPSOCK_H_
/* Power-of-2 number of sockets */
#define MAX_SOCKS 4
/* Round-robin receive */
#define RR_LB 0
#endif /* XDPSOCK_H_ */

View File

@@ -1,56 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
#define KBUILD_MODNAME "foo"
#include <uapi/linux/bpf.h>
#include "bpf_helpers.h"
#include "xdpsock.h"
struct bpf_map_def SEC("maps") qidconf_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(int),
.max_entries = 1,
};
struct bpf_map_def SEC("maps") xsks_map = {
.type = BPF_MAP_TYPE_XSKMAP,
.key_size = sizeof(int),
.value_size = sizeof(int),
.max_entries = MAX_SOCKS,
};
struct bpf_map_def SEC("maps") rr_map = {
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(unsigned int),
.max_entries = 1,
};
SEC("xdp_sock")
int xdp_sock_prog(struct xdp_md *ctx)
{
int *qidconf, key = 0, idx;
unsigned int *rr;
qidconf = bpf_map_lookup_elem(&qidconf_map, &key);
if (!qidconf)
return XDP_ABORTED;
if (*qidconf != ctx->rx_queue_index)
return XDP_PASS;
#if RR_LB /* NB! RR_LB is configured in xdpsock.h */
rr = bpf_map_lookup_elem(&rr_map, &key);
if (!rr)
return XDP_ABORTED;
*rr = (*rr + 1) & (MAX_SOCKS - 1);
idx = *rr;
#else
idx = 0;
#endif
return bpf_redirect_map(&xsks_map, idx, 0);
}
char _license[] SEC("license") = "GPL";

File diff suppressed because it is too large Load Diff