[DO_NOT_APPLY 0/2] xdp-forwarder
Hi all, Since this worsens the networking situation unless combined with the userspace Router (which is to-be-done), I am formally sending this out for comments without expecting it to be applied for now. The net-vm's purpose is running the Linux drivers for any physical interfaces on the spectrum system. A net-vm (there could be multiple, one per IOMMU-group) will load the included XDP programs on the passed-through physical interfaces as well as the downstream virtio interface going into the router (recognized by its special MAC address) using mdev events. The net-vm needs to multiplex between the physical interfaces, as there might be several interfaces in the same IOMMU-group. For this, the XDP program loaded on the physical interfaces applies a VLAN tag corresponding to the interface id, and redirects the packets to the router interface (identified by the router_iface bpf map). In the other direction the XDP program loaded on the router interface removes one layer of VLAN tagging, and redirects the packets to the interface read from the VLAN tag. I have verified that when running a wpa_supplicant / iwd in the interface, a WiFi interface can correctly function as a physical interface with the XDP forwarder, assuming that the router sends packets with the correct source MAC address. Yureka Lilian (2): integrate xdp-forwarder into net-vm temporary changes for testing lib/nixpkgs.default.nix | 4 +- vm/sys/net/Makefile | 8 +- vm/sys/net/default.nix | 38 +++++-- vm/sys/net/etc/fstab | 1 + vm/sys/net/etc/mdev/iface | 23 +--- vm/sys/net/etc/nftables.conf | 8 -- vm/sys/net/etc/s6-rc/connman/dependencies | 4 - vm/sys/net/etc/s6-rc/connman/type | 1 - vm/sys/net/etc/s6-rc/connman/type.license | 2 - vm/sys/net/etc/s6-rc/nftables/type | 1 - vm/sys/net/etc/s6-rc/nftables/type.license | 2 - vm/sys/net/etc/s6-rc/nftables/up | 6 - vm/sys/net/xdp-forwarder/README.md | 9 ++ vm/sys/net/xdp-forwarder/default.nix | 35 ++++++ .../xdp-forwarder/include/parsing_helpers.h | 38 +++++++ .../xdp-forwarder/include/rewrite_helpers.h | 103 ++++++++++++++++++ vm/sys/net/xdp-forwarder/load_physical | 4 + vm/sys/net/xdp-forwarder/load_router | 6 + vm/sys/net/xdp-forwarder/prog_physical.c | 28 +++++ vm/sys/net/xdp-forwarder/prog_router.c | 34 ++++++ vm/sys/net/xdp-forwarder/set_router_iface.c | 31 ++++++ 21 files changed, 325 insertions(+), 61 deletions(-) delete mode 100644 vm/sys/net/etc/nftables.conf delete mode 100644 vm/sys/net/etc/s6-rc/connman/dependencies delete mode 100644 vm/sys/net/etc/s6-rc/connman/type delete mode 100644 vm/sys/net/etc/s6-rc/connman/type.license delete mode 100644 vm/sys/net/etc/s6-rc/nftables/type delete mode 100644 vm/sys/net/etc/s6-rc/nftables/type.license delete mode 100644 vm/sys/net/etc/s6-rc/nftables/up create mode 100644 vm/sys/net/xdp-forwarder/README.md create mode 100644 vm/sys/net/xdp-forwarder/default.nix create mode 100644 vm/sys/net/xdp-forwarder/include/parsing_helpers.h create mode 100644 vm/sys/net/xdp-forwarder/include/rewrite_helpers.h create mode 100755 vm/sys/net/xdp-forwarder/load_physical create mode 100755 vm/sys/net/xdp-forwarder/load_router create mode 100644 vm/sys/net/xdp-forwarder/prog_physical.c create mode 100644 vm/sys/net/xdp-forwarder/prog_router.c create mode 100644 vm/sys/net/xdp-forwarder/set_router_iface.c -- 2.50.1
Signed-off-by: Yureka Lilian <yureka@cyberchaos.dev> --- vm/sys/net/Makefile | 8 +- vm/sys/net/default.nix | 23 ++-- vm/sys/net/etc/fstab | 1 + vm/sys/net/etc/mdev/iface | 23 +--- vm/sys/net/etc/nftables.conf | 8 -- vm/sys/net/etc/s6-rc/connman/dependencies | 4 - vm/sys/net/etc/s6-rc/connman/type | 1 - vm/sys/net/etc/s6-rc/connman/type.license | 2 - vm/sys/net/etc/s6-rc/nftables/type | 1 - vm/sys/net/etc/s6-rc/nftables/type.license | 2 - vm/sys/net/etc/s6-rc/nftables/up | 6 - vm/sys/net/xdp-forwarder/README.md | 9 ++ vm/sys/net/xdp-forwarder/default.nix | 35 ++++++ .../xdp-forwarder/include/parsing_helpers.h | 38 +++++++ .../xdp-forwarder/include/rewrite_helpers.h | 103 ++++++++++++++++++ vm/sys/net/xdp-forwarder/load_physical | 4 + vm/sys/net/xdp-forwarder/load_router | 6 + vm/sys/net/xdp-forwarder/prog_physical.c | 28 +++++ vm/sys/net/xdp-forwarder/prog_router.c | 34 ++++++ vm/sys/net/xdp-forwarder/set_router_iface.c | 31 ++++++ 20 files changed, 308 insertions(+), 59 deletions(-) delete mode 100644 vm/sys/net/etc/nftables.conf delete mode 100644 vm/sys/net/etc/s6-rc/connman/dependencies delete mode 100644 vm/sys/net/etc/s6-rc/connman/type delete mode 100644 vm/sys/net/etc/s6-rc/connman/type.license delete mode 100644 vm/sys/net/etc/s6-rc/nftables/type delete mode 100644 vm/sys/net/etc/s6-rc/nftables/type.license delete mode 100644 vm/sys/net/etc/s6-rc/nftables/up create mode 100644 vm/sys/net/xdp-forwarder/README.md create mode 100644 vm/sys/net/xdp-forwarder/default.nix create mode 100644 vm/sys/net/xdp-forwarder/include/parsing_helpers.h create mode 100644 vm/sys/net/xdp-forwarder/include/rewrite_helpers.h create mode 100755 vm/sys/net/xdp-forwarder/load_physical create mode 100755 vm/sys/net/xdp-forwarder/load_router create mode 100644 vm/sys/net/xdp-forwarder/prog_physical.c create mode 100644 vm/sys/net/xdp-forwarder/prog_router.c create mode 100644 vm/sys/net/xdp-forwarder/set_router_iface.c diff --git a/vm/sys/net/Makefile b/vm/sys/net/Makefile index e681940..9576a92 100644 --- a/vm/sys/net/Makefile +++ b/vm/sys/net/Makefile @@ -34,12 +34,11 @@ VM_FILES = \ etc/init \ etc/mdev.conf \ etc/mdev/iface \ - etc/nftables.conf \ etc/passwd \ etc/s6-linux-init/run-image/service/getty-hvc0/run \ etc/s6-linux-init/scripts/rc.init \ etc/sysctl.conf -VM_DIRS = dev etc/s6-linux-init/env run proc sys var/lib/connman +VM_DIRS = dev etc/s6-linux-init/env run proc sys # These are separate because they need to be included, but putting # them as make dependencies would confuse make. @@ -59,9 +58,6 @@ build/rootfs.erofs: ../../../scripts/make-erofs.sh $(PACKAGES_FILE) $(VM_FILES) ) | ../../../scripts/make-erofs.sh $@ VM_S6_RC_FILES = \ - etc/s6-rc/connman/dependencies \ - etc/s6-rc/connman/run \ - etc/s6-rc/connman/type \ etc/s6-rc/dbus/notification-fd \ etc/s6-rc/dbus/run \ etc/s6-rc/dbus/type \ @@ -71,8 +67,6 @@ VM_S6_RC_FILES = \ etc/s6-rc/mdevd/notification-fd \ etc/s6-rc/mdevd/run \ etc/s6-rc/mdevd/type \ - etc/s6-rc/nftables/type \ - etc/s6-rc/nftables/up \ etc/s6-rc/ok-all/contents \ etc/s6-rc/ok-all/type \ etc/s6-rc/sysctl/type \ diff --git a/vm/sys/net/default.nix b/vm/sys/net/default.nix index b5873eb..335c938 100644 --- a/vm/sys/net/default.nix +++ b/vm/sys/net/default.nix @@ -1,23 +1,26 @@ # SPDX-License-Identifier: MIT # SPDX-FileCopyrightText: 2021-2023 Alyssa Ross <hi@alyssa.is> +# SPDX-FileCopyrightText: 2025 Yureka Lilian <yureka@cyberchaos.dev> -import ../../../lib/call-package.nix ({ lseek, src, terminfo, pkgsStatic }: -pkgsStatic.callPackage ( +import ../../../lib/call-package.nix ({ lseek, src, terminfo, pkgsMusl }: +pkgsMusl.callPackage ( { lib, stdenvNoCC, nixos, runCommand, writeClosure , erofs-utils, jq, s6-rc, util-linux, xorg -, busybox, connmanMinimal, dbus, execline, kmod, linux_latest, mdevd, nftables -, s6, s6-linux-init +, busybox, dbus, execline, kmod, linux_latest, mdevd +, s6, s6-linux-init, xdp-tools }: let inherit (lib) concatMapStringsSep; inherit (nixosAllHardware.config.hardware) firmware; - connman = connmanMinimal; - packages = [ - connman dbus execline kmod mdevd s6 s6-linux-init s6-rc + dbus execline kmod mdevd s6 s6-linux-init s6-rc xdp-tools + + (pkgsMusl.callPackage ./xdp-forwarder { + linux = kernel; + }) (busybox.override { extraConfig = '' @@ -30,13 +33,13 @@ let CONFIG_RMMOD n ''; }) - - (nftables.override { withCli = false; }) ]; # Packages that should be fully linked into /usr, # (not just their bin/* files). - usrPackages = [ connman dbus firmware kernel terminfo ]; + usrPackages = [ + dbus firmware kernel terminfo + ]; packagesSysroot = runCommand "packages-sysroot" { inherit packages; diff --git a/vm/sys/net/etc/fstab b/vm/sys/net/etc/fstab index 6a82ecc..06f26ff 100644 --- a/vm/sys/net/etc/fstab +++ b/vm/sys/net/etc/fstab @@ -4,3 +4,4 @@ proc /proc proc defaults 0 0 devpts /dev/pts devpts defaults,gid=4,mode=620 0 0 tmpfs /dev/shm tmpfs defaults 0 0 sysfs /sys sysfs defaults 0 0 +bpffs /sys/fs/bpf bpf defaults 0 0 diff --git a/vm/sys/net/etc/mdev/iface b/vm/sys/net/etc/mdev/iface index 2306575..9724690 100755 --- a/vm/sys/net/etc/mdev/iface +++ b/vm/sys/net/etc/mdev/iface @@ -1,6 +1,7 @@ #!/bin/execlineb -P # SPDX-License-Identifier: EUPL-1.2+ # SPDX-FileCopyrightText: 2020-2021 Alyssa Ross <hi@alyssa.is> +# SPDX-FileCopyrightText: 2025 Yureka Lilian <yureka@cyberchaos.dev> importas -Si INTERFACE @@ -8,29 +9,15 @@ ifte { # This interface is connected to another VM. - - # The other VM's IP is encoded in the NIC-specific portion of the - # interface's MAC address. - backtick -E CLIENT_IP { - awk -F: "{printf \"100.64.%d.%d\\n\", \"0x\" $5, \"0x\" $6}" - /sys/class/net/${INTERFACE}/address - } - - if { ip address add 169.254.0.1/32 dev $INTERFACE } - if { ip link set $INTERFACE up } - ip route add $CLIENT_IP dev $INTERFACE + if { load_router $INTERFACE } + ip link set $INTERFACE up } { if { test $INTERFACE != lo } # This is a physical connection to a network device. - background { s6-rc -bu change connman } - if { s6-rc -bu change nftables } - if { - forx -pE module { nft_counter nft_masq } - modprobe $module - } - nft add rule ip nat postrouting oifname $INTERFACE counter masquerade + if { load_physical $INTERFACE } + ip link set $INTERFACE up } grep -iq ^02:01: /sys/class/net/${INTERFACE}/address diff --git a/vm/sys/net/etc/nftables.conf b/vm/sys/net/etc/nftables.conf deleted file mode 100644 index 296d92c..0000000 --- a/vm/sys/net/etc/nftables.conf +++ /dev/null @@ -1,8 +0,0 @@ -# SPDX-License-Identifier: EUPL-1.2+ -# SPDX-FileCopyrightText: 2021 Alyssa Ross <hi@alyssa.is> - -table nat { - chain postrouting { - type nat hook postrouting priority 100; - } -} diff --git a/vm/sys/net/etc/s6-rc/connman/dependencies b/vm/sys/net/etc/s6-rc/connman/dependencies deleted file mode 100644 index 23bda19..0000000 --- a/vm/sys/net/etc/s6-rc/connman/dependencies +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: CC0-1.0 -# SPDX-FileCopyrightText: 2020 Alyssa Ross <hi@alyssa.is> -# -dbus diff --git a/vm/sys/net/etc/s6-rc/connman/type b/vm/sys/net/etc/s6-rc/connman/type deleted file mode 100644 index 5883cff..0000000 --- a/vm/sys/net/etc/s6-rc/connman/type +++ /dev/null @@ -1 +0,0 @@ -longrun diff --git a/vm/sys/net/etc/s6-rc/connman/type.license b/vm/sys/net/etc/s6-rc/connman/type.license deleted file mode 100644 index 2b3b032..0000000 --- a/vm/sys/net/etc/s6-rc/connman/type.license +++ /dev/null @@ -1,2 +0,0 @@ -SPDX-License-Identifier: CC0-1.0 -SPDX-FileCopyrightText: 2020 Alyssa Ross <hi@alyssa.is> diff --git a/vm/sys/net/etc/s6-rc/nftables/type b/vm/sys/net/etc/s6-rc/nftables/type deleted file mode 100644 index bdd22a1..0000000 --- a/vm/sys/net/etc/s6-rc/nftables/type +++ /dev/null @@ -1 +0,0 @@ -oneshot diff --git a/vm/sys/net/etc/s6-rc/nftables/type.license b/vm/sys/net/etc/s6-rc/nftables/type.license deleted file mode 100644 index c49c11b..0000000 --- a/vm/sys/net/etc/s6-rc/nftables/type.license +++ /dev/null @@ -1,2 +0,0 @@ -SPDX-License-Identifier: CC0-1.0 -SPDX-FileCopyrightText: 2021 Alyssa Ross <hi@alyssa.is> diff --git a/vm/sys/net/etc/s6-rc/nftables/up b/vm/sys/net/etc/s6-rc/nftables/up deleted file mode 100644 index 7d5f141..0000000 --- a/vm/sys/net/etc/s6-rc/nftables/up +++ /dev/null @@ -1,6 +0,0 @@ -# SPDX-License-Identifier: EUPL-1.2+ -# SPDX-FileCopyrightText: 2021 Alyssa Ross <hi@alyssa.is> - -if { modprobe nft_chain_nat } - -nft -f /etc/nftables.conf diff --git a/vm/sys/net/xdp-forwarder/README.md b/vm/sys/net/xdp-forwarder/README.md new file mode 100644 index 0000000..2aa6585 --- /dev/null +++ b/vm/sys/net/xdp-forwarder/README.md @@ -0,0 +1,9 @@ +### xdp-forwarder + +The xdp forwarder consists of a redirect_kern.c (which is the actual XDP program, basically just a one-liner) and redirect_user.c (which loads the XDP program and sets up the maps with the interfaces passed on the command line). + +It behaves like an ethernet hub between two interfaces. + +Temporary repository, to be inlined into the [Spectrum](https://spectrum-os.org/) monorepo. + +This work was funded by [NGI Zero](https://www.ngi.eu/ngi-projects/ngi-zero/), an initiative by the Digital Single Market of the European Commission. diff --git a/vm/sys/net/xdp-forwarder/default.nix b/vm/sys/net/xdp-forwarder/default.nix new file mode 100644 index 0000000..75b1d66 --- /dev/null +++ b/vm/sys/net/xdp-forwarder/default.nix @@ -0,0 +1,35 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2025 Yureka Lilian <yureka@cyberchaos.dev> + +{ lib, runCommand, stdenv, llvmPackages, libbpf, linux, bpftools }: + +stdenv.mkDerivation { + pname = "xdp-forwarder"; + version = "0"; + + src = lib.fileset.toSource { + root = ./.; + fileset = lib.fileset.fileFilter + ({ hasExt, ... }: !(hasExt "nix") && !(hasExt "md")) ./.; + }; + + buildInputs = [ libbpf ]; + nativeBuildInputs = [ llvmPackages.clang-unwrapped bpftools ]; + + buildPhase = '' + bpftool btf dump file ${linux.dev}/vmlinux format c > include/vmlinux.h + for prog in physical router; do + clang $NIX_CFLAGS_COMPILE -O2 -g -Wall -target bpf -I include -c prog_$prog.c -o prog_$prog.o + substituteInPlace load_$prog --replace-fail prog_$prog.o $out/lib/prog_$prog.o + done + $CC -lbpf -I include -o set_router_iface set_router_iface.c + ''; + + installPhase = '' + for prog in physical router; do + install -Dm644 prog_$prog.o $out/lib/prog_$prog.o + install -Dm755 load_$prog $out/bin/load_$prog + done + install -Dm755 set_router_iface $out/bin/set_router_iface + ''; +} diff --git a/vm/sys/net/xdp-forwarder/include/parsing_helpers.h b/vm/sys/net/xdp-forwarder/include/parsing_helpers.h new file mode 100644 index 0000000..2446e56 --- /dev/null +++ b/vm/sys/net/xdp-forwarder/include/parsing_helpers.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-clause) */ +/* SPDX-FileCopyrightText: 2021 The xdp-tutorial Authors */ +/* SPDX-FileCopyrightText: 2025 Yureka Lilian <yureka@cyberchaos.dev> */ +/* Based on https://github.com/xdp-project/xdp-tutorial/blob/main/common/parsing_helpers... */ + +#ifndef __PARSING_HELPERS_H +#define __PARSING_HELPERS_H + +#include <bpf/bpf_endian.h> + +//#include <linux/if_ether.h> +#define ETH_P_8021Q 0x8100 /* 802.1Q VLAN Extended Header */ +#define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */ + +static __always_inline int proto_is_vlan(__u16 h_proto) +{ + return !!(h_proto == bpf_htons(ETH_P_8021Q) || + h_proto == bpf_htons(ETH_P_8021AD)); +} + +static __always_inline int parse_ethhdr(struct xdp_md *ctx, + struct ethhdr **ethhdr) +{ + void *data_end = (void *)(long)ctx->data_end; + struct ethhdr *eth = (void *) (long) ctx->data; + + /* Byte-count bounds check; check if current pointer + size of header + * is after data_end. + */ + if ((void *) (eth + 1) > data_end) + return -1; + + *ethhdr = eth; + + return eth->h_proto; /* network-byte-order */ +} + +#endif diff --git a/vm/sys/net/xdp-forwarder/include/rewrite_helpers.h b/vm/sys/net/xdp-forwarder/include/rewrite_helpers.h new file mode 100644 index 0000000..7fa6e2c --- /dev/null +++ b/vm/sys/net/xdp-forwarder/include/rewrite_helpers.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-clause) */ +/* SPDX-FileCopyrightText: 2019 The xdp-tutorial Authors */ +/* SPDX-FileCopyrightText: 2025 Yureka Lilian <yureka@cyberchaos.dev> */ +/* Based on: https://github.com/xdp-project/xdp-tutorial/blob/main/common/rewrite_helpers... */ + +#ifndef __REWRITE_HELPERS_H +#define __REWRITE_HELPERS_H + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +/* Pops the outermost VLAN tag off the packet. Returns the popped VLAN ID on + * success or negative errno on failure. + */ +static __always_inline int vlan_tag_pop(struct xdp_md *ctx, struct ethhdr *eth) +{ + void *data_end = (void *)(long)ctx->data_end; + struct ethhdr eth_cpy; + struct vlan_hdr *vlh; + __be16 h_proto; + int vlid; + + if (!proto_is_vlan(eth->h_proto)) + return -1; + + /* Careful with the parenthesis here */ + vlh = (void *)(eth + 1); + + /* Still need to do bounds checking */ + if ((void *) (vlh + 1) > data_end) + return -1; + + /* Save vlan ID for returning, h_proto for updating Ethernet header */ + vlid = bpf_ntohs(vlh->h_vlan_TCI); + h_proto = vlh->h_vlan_encapsulated_proto; + + /* Make a copy of the outer Ethernet header before we cut it off */ + __builtin_memcpy(ð_cpy, eth, sizeof(eth_cpy)); + + /* Actually adjust the head pointer */ + if (bpf_xdp_adjust_head(ctx, (int)sizeof(*vlh))) + return -1; + + /* Need to re-evaluate data *and* data_end and do new bounds checking + * after adjusting head + */ + eth = (void *)(long)ctx->data; + data_end = (void *)(long)ctx->data_end; + if ((void *) (eth + 1) > data_end) + return -1; + + /* Copy back the old Ethernet header and update the proto type */ + __builtin_memcpy(eth, ð_cpy, sizeof(*eth)); + eth->h_proto = h_proto; + + return vlid; +} + +/* Pushes a new VLAN tag after the Ethernet header. Returns 0 on success, + * -1 on failure. + */ +static __always_inline int vlan_tag_push(struct xdp_md *ctx, + struct ethhdr *eth, int vlid) +{ + void *data_end = (void *)(long)ctx->data_end; + struct ethhdr eth_cpy; + struct vlan_hdr *vlh; + + /* First copy the original Ethernet header */ + __builtin_memcpy(ð_cpy, eth, sizeof(eth_cpy)); + + /* Then add space in front of the packet */ + if (bpf_xdp_adjust_head(ctx, 0 - (int)sizeof(*vlh))) + return -1; + + /* Need to re-evaluate data_end and data after head adjustment, and + * bounds check, even though we know there is enough space (as we + * increased it). + */ + data_end = (void *)(long)ctx->data_end; + eth = (void *)(long)ctx->data; + + if ((void *) (eth + 1) > data_end) + return -1; + + /* Copy back Ethernet header in the right place, populate VLAN tag with + * ID and proto, and set outer Ethernet header to VLAN type. + */ + __builtin_memcpy(eth, ð_cpy, sizeof(*eth)); + + vlh = (void *)(eth + 1); + + if ((void *) (vlh + 1) > data_end) + return -1; + + vlh->h_vlan_TCI = bpf_htons(vlid); + vlh->h_vlan_encapsulated_proto = eth->h_proto; + + eth->h_proto = bpf_htons(ETH_P_8021Q); + return 0; +} + +#endif diff --git a/vm/sys/net/xdp-forwarder/load_physical b/vm/sys/net/xdp-forwarder/load_physical new file mode 100755 index 0000000..13473e4 --- /dev/null +++ b/vm/sys/net/xdp-forwarder/load_physical @@ -0,0 +1,4 @@ +#!/bin/execlineb -S1 +# SPDX-License-Identifier: EUPL-1.2+ +# SPDX-FileCopyrightText: 2025 Yureka Lilian <yureka@cyberchaos.dev> +xdp-loader load ${1} prog_physical.o -m skb -p /sys/fs/bpf diff --git a/vm/sys/net/xdp-forwarder/load_router b/vm/sys/net/xdp-forwarder/load_router new file mode 100755 index 0000000..d16c26d --- /dev/null +++ b/vm/sys/net/xdp-forwarder/load_router @@ -0,0 +1,6 @@ +#!/bin/execlineb -S1 +# SPDX-License-Identifier: EUPL-1.2+ +# SPDX-FileCopyrightText: 2025 Yureka Lilian <yureka@cyberchaos.dev> +if { xdp-loader load ${1} prog_router.o -m skb -p /sys/fs/bpf } +if { ip link set ${1} promisc on } +set_router_iface ${1} diff --git a/vm/sys/net/xdp-forwarder/prog_physical.c b/vm/sys/net/xdp-forwarder/prog_physical.c new file mode 100644 index 0000000..d547bdd --- /dev/null +++ b/vm/sys/net/xdp-forwarder/prog_physical.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: EUPL-1.2+ +// SPDX-FileCopyrightText: 2025 Yureka Lilian <yureka@cyberchaos.dev> + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "parsing_helpers.h" +#include "rewrite_helpers.h" + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __type(key, int); + __type(value, int); + __uint(max_entries, 1); + __uint(pinning, LIBBPF_PIN_BY_NAME); +} router_iface SEC(".maps"); + +SEC("xdp") +int physical(struct xdp_md *ctx) { + struct ethhdr *eth; + if (parse_ethhdr(ctx, ð) < 0) + return XDP_DROP; + + if (ctx->ingress_ifindex < 1 || ctx->ingress_ifindex > 4096) + return XDP_DROP; + + vlan_tag_push(ctx, eth, ctx->ingress_ifindex); + return bpf_redirect_map(&router_iface, 0, 0); +} diff --git a/vm/sys/net/xdp-forwarder/prog_router.c b/vm/sys/net/xdp-forwarder/prog_router.c new file mode 100644 index 0000000..18e97cc --- /dev/null +++ b/vm/sys/net/xdp-forwarder/prog_router.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: EUPL-1.2+ +// SPDX-FileCopyrightText: 2025 Yureka Lilian <yureka@cyberchaos.dev> + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "parsing_helpers.h" +#include "rewrite_helpers.h" + +// The map is actually not used by this program, but just included +// to keep the reference-counted pin alive before any physical interfaces +// are added. +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __type(key, int); + __type(value, int); + __uint(max_entries, 1); + __uint(pinning, LIBBPF_PIN_BY_NAME); +} router_iface SEC(".maps"); + + +SEC("xdp") +int router(struct xdp_md *ctx) { + struct ethhdr *eth; + int r; + if (r = parse_ethhdr(ctx, ð) < 0) + return XDP_DROP; + + int vlid = vlan_tag_pop(ctx, eth); + if (vlid < 0) { + return XDP_DROP; + } + + return bpf_redirect(vlid, 0); +} diff --git a/vm/sys/net/xdp-forwarder/set_router_iface.c b/vm/sys/net/xdp-forwarder/set_router_iface.c new file mode 100644 index 0000000..c828ee3 --- /dev/null +++ b/vm/sys/net/xdp-forwarder/set_router_iface.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: EUPL-1.2+ +// SPDX-FileCopyrightText: 2025 Yureka Lilian <yureka@cyberchaos.dev> + +#include <stdio.h> +#include <net/if.h> +#include <bpf/bpf.h> + +int main(int argc, char **argv) { + if (argc < 2) { + fprintf(stderr, "missing interface name\n"); + return 1; + } + + int router_idx = if_nametoindex(argv[1]); + if (router_idx <= 0) { + perror("error getting router interface"); + return 1; + } + + int map_fd = bpf_obj_get("/sys/fs/bpf/router_iface"); + if (map_fd < 0) { + perror("failed to open bpf map"); + return 1; + } + + int id = 0; + if (bpf_map_update_elem(map_fd, &id, &router_idx, 0) < 0) { + perror("failed to update bpf map"); + return 1; + } +} -- 2.50.1
I don't know much about BPF (or networking really, for that matter), so I'll focus my comments on the integration into Spectrum. Hopefully somebody else is able to help with reviewing the actual functionality. Yureka Lilian <yureka@cyberchaos.dev> writes:
Signed-off-by: Yureka Lilian <yureka@cyberchaos.dev> --- vm/sys/net/Makefile | 8 +- vm/sys/net/default.nix | 23 ++-- vm/sys/net/etc/fstab | 1 + vm/sys/net/etc/mdev/iface | 23 +--- vm/sys/net/etc/nftables.conf | 8 -- vm/sys/net/etc/s6-rc/connman/dependencies | 4 - vm/sys/net/etc/s6-rc/connman/type | 1 - vm/sys/net/etc/s6-rc/connman/type.license | 2 - vm/sys/net/etc/s6-rc/nftables/type | 1 - vm/sys/net/etc/s6-rc/nftables/type.license | 2 - vm/sys/net/etc/s6-rc/nftables/up | 6 - vm/sys/net/xdp-forwarder/README.md | 9 ++ vm/sys/net/xdp-forwarder/default.nix | 35 ++++++ .../xdp-forwarder/include/parsing_helpers.h | 38 +++++++ .../xdp-forwarder/include/rewrite_helpers.h | 103 ++++++++++++++++++ vm/sys/net/xdp-forwarder/load_physical | 4 + vm/sys/net/xdp-forwarder/load_router | 6 + vm/sys/net/xdp-forwarder/prog_physical.c | 28 +++++ vm/sys/net/xdp-forwarder/prog_router.c | 34 ++++++ vm/sys/net/xdp-forwarder/set_router_iface.c | 31 ++++++
Could this be integrated into tools/? I'm hoping we can work towards having a common build system for all our compiled code. Currently it only has a host/guest distinction, but we probably should change that to host/driver/app.
20 files changed, 308 insertions(+), 59 deletions(-) delete mode 100644 vm/sys/net/etc/nftables.conf delete mode 100644 vm/sys/net/etc/s6-rc/connman/dependencies delete mode 100644 vm/sys/net/etc/s6-rc/connman/type delete mode 100644 vm/sys/net/etc/s6-rc/connman/type.license delete mode 100644 vm/sys/net/etc/s6-rc/nftables/type delete mode 100644 vm/sys/net/etc/s6-rc/nftables/type.license delete mode 100644 vm/sys/net/etc/s6-rc/nftables/up create mode 100644 vm/sys/net/xdp-forwarder/README.md create mode 100644 vm/sys/net/xdp-forwarder/default.nix create mode 100644 vm/sys/net/xdp-forwarder/include/parsing_helpers.h create mode 100644 vm/sys/net/xdp-forwarder/include/rewrite_helpers.h create mode 100755 vm/sys/net/xdp-forwarder/load_physical create mode 100755 vm/sys/net/xdp-forwarder/load_router create mode 100644 vm/sys/net/xdp-forwarder/prog_physical.c create mode 100644 vm/sys/net/xdp-forwarder/prog_router.c create mode 100644 vm/sys/net/xdp-forwarder/set_router_iface.c
diff --git a/vm/sys/net/Makefile b/vm/sys/net/Makefile index e681940..9576a92 100644 --- a/vm/sys/net/Makefile +++ b/vm/sys/net/Makefile @@ -34,12 +34,11 @@ VM_FILES = \ etc/init \ etc/mdev.conf \ etc/mdev/iface \ - etc/nftables.conf \ etc/passwd \ etc/s6-linux-init/run-image/service/getty-hvc0/run \ etc/s6-linux-init/scripts/rc.init \ etc/sysctl.conf -VM_DIRS = dev etc/s6-linux-init/env run proc sys var/lib/connman +VM_DIRS = dev etc/s6-linux-init/env run proc sys
# These are separate because they need to be included, but putting # them as make dependencies would confuse make. @@ -59,9 +58,6 @@ build/rootfs.erofs: ../../../scripts/make-erofs.sh $(PACKAGES_FILE) $(VM_FILES) ) | ../../../scripts/make-erofs.sh $@
VM_S6_RC_FILES = \ - etc/s6-rc/connman/dependencies \ - etc/s6-rc/connman/run \ - etc/s6-rc/connman/type \ etc/s6-rc/dbus/notification-fd \ etc/s6-rc/dbus/run \ etc/s6-rc/dbus/type \ @@ -71,8 +67,6 @@ VM_S6_RC_FILES = \ etc/s6-rc/mdevd/notification-fd \ etc/s6-rc/mdevd/run \ etc/s6-rc/mdevd/type \ - etc/s6-rc/nftables/type \ - etc/s6-rc/nftables/up \ etc/s6-rc/ok-all/contents \ etc/s6-rc/ok-all/type \ etc/s6-rc/sysctl/type \ diff --git a/vm/sys/net/default.nix b/vm/sys/net/default.nix index b5873eb..335c938 100644 --- a/vm/sys/net/default.nix +++ b/vm/sys/net/default.nix @@ -1,23 +1,26 @@ # SPDX-License-Identifier: MIT # SPDX-FileCopyrightText: 2021-2023 Alyssa Ross <hi@alyssa.is> +# SPDX-FileCopyrightText: 2025 Yureka Lilian <yureka@cyberchaos.dev>
-import ../../../lib/call-package.nix ({ lseek, src, terminfo, pkgsStatic }: -pkgsStatic.callPackage ( +import ../../../lib/call-package.nix ({ lseek, src, terminfo, pkgsMusl }: +pkgsMusl.callPackage (
{ lib, stdenvNoCC, nixos, runCommand, writeClosure , erofs-utils, jq, s6-rc, util-linux, xorg -, busybox, connmanMinimal, dbus, execline, kmod, linux_latest, mdevd, nftables -, s6, s6-linux-init +, busybox, dbus, execline, kmod, linux_latest, mdevd +, s6, s6-linux-init, xdp-tools }:
let inherit (lib) concatMapStringsSep; inherit (nixosAllHardware.config.hardware) firmware;
- connman = connmanMinimal; - packages = [ - connman dbus execline kmod mdevd s6 s6-linux-init s6-rc + dbus execline kmod mdevd s6 s6-linux-init s6-rc xdp-tools + + (pkgsMusl.callPackage ./xdp-forwarder { + linux = kernel; + })
(busybox.override { extraConfig = '' @@ -30,13 +33,13 @@ let CONFIG_RMMOD n ''; }) - - (nftables.override { withCli = false; }) ];
# Packages that should be fully linked into /usr, # (not just their bin/* files). - usrPackages = [ connman dbus firmware kernel terminfo ]; + usrPackages = [ + dbus firmware kernel terminfo + ];
packagesSysroot = runCommand "packages-sysroot" { inherit packages; diff --git a/vm/sys/net/etc/fstab b/vm/sys/net/etc/fstab index 6a82ecc..06f26ff 100644 --- a/vm/sys/net/etc/fstab +++ b/vm/sys/net/etc/fstab @@ -4,3 +4,4 @@ proc /proc proc defaults 0 0 devpts /dev/pts devpts defaults,gid=4,mode=620 0 0 tmpfs /dev/shm tmpfs defaults 0 0 sysfs /sys sysfs defaults 0 0 +bpffs /sys/fs/bpf bpf defaults 0 0 diff --git a/vm/sys/net/etc/mdev/iface b/vm/sys/net/etc/mdev/iface index 2306575..9724690 100755 --- a/vm/sys/net/etc/mdev/iface +++ b/vm/sys/net/etc/mdev/iface @@ -1,6 +1,7 @@ #!/bin/execlineb -P # SPDX-License-Identifier: EUPL-1.2+ # SPDX-FileCopyrightText: 2020-2021 Alyssa Ross <hi@alyssa.is> +# SPDX-FileCopyrightText: 2025 Yureka Lilian <yureka@cyberchaos.dev>
importas -Si INTERFACE
@@ -8,29 +9,15 @@ ifte
{ # This interface is connected to another VM.
This comment should probably be reworded a bit.
- - # The other VM's IP is encoded in the NIC-specific portion of the - # interface's MAC address. - backtick -E CLIENT_IP { - awk -F: "{printf \"100.64.%d.%d\\n\", \"0x\" $5, \"0x\" $6}" - /sys/class/net/${INTERFACE}/address - } - - if { ip address add 169.254.0.1/32 dev $INTERFACE } - if { ip link set $INTERFACE up } - ip route add $CLIENT_IP dev $INTERFACE + if { load_router $INTERFACE } + ip link set $INTERFACE up }
{ if { test $INTERFACE != lo } # This is a physical connection to a network device. - background { s6-rc -bu change connman } - if { s6-rc -bu change nftables } - if { - forx -pE module { nft_counter nft_masq } - modprobe $module - } - nft add rule ip nat postrouting oifname $INTERFACE counter masquerade + if { load_physical $INTERFACE } + ip link set $INTERFACE up }
grep -iq ^02:01: /sys/class/net/${INTERFACE}/address diff --git a/vm/sys/net/xdp-forwarder/README.md b/vm/sys/net/xdp-forwarder/README.md new file mode 100644 index 0000000..2aa6585 --- /dev/null +++ b/vm/sys/net/xdp-forwarder/README.md @@ -0,0 +1,9 @@ +### xdp-forwarder + +The xdp forwarder consists of a redirect_kern.c (which is the actual XDP program, basically just a one-liner) and redirect_user.c (which loads the XDP program and sets up the maps with the interfaces passed on the command line). + +It behaves like an ethernet hub between two interfaces. + +Temporary repository, to be inlined into the [Spectrum](https://spectrum-os.org/) monorepo. + +This work was funded by [NGI Zero](https://www.ngi.eu/ngi-projects/ngi-zero/), an initiative by the Digital Single Market of the European Commission.
This should be integrated into the Documentation/ we already have. It's a bit of a mess in ther at the moment, though. I expect we'll be getting some help with that early next year, so would be fine to be without documentation until then.
diff --git a/vm/sys/net/xdp-forwarder/default.nix b/vm/sys/net/xdp-forwarder/default.nix new file mode 100644 index 0000000..75b1d66 --- /dev/null +++ b/vm/sys/net/xdp-forwarder/default.nix @@ -0,0 +1,35 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2025 Yureka Lilian <yureka@cyberchaos.dev> + +{ lib, runCommand, stdenv, llvmPackages, libbpf, linux, bpftools }: + +stdenv.mkDerivation { + pname = "xdp-forwarder"; + version = "0"; + + src = lib.fileset.toSource { + root = ./.; + fileset = lib.fileset.fileFilter + ({ hasExt, ... }: !(hasExt "nix") && !(hasExt "md")) ./.; + }; + + buildInputs = [ libbpf ]; + nativeBuildInputs = [ llvmPackages.clang-unwrapped bpftools ]; + + buildPhase = '' + bpftool btf dump file ${linux.dev}/vmlinux format c > include/vmlinux.h
I guess we're still missing a vmlinux.h package in Nixpkgs? That would be much cleaner.
+ for prog in physical router; do + clang $NIX_CFLAGS_COMPILE -O2 -g -Wall -target bpf -I include -c prog_$prog.c -o prog_$prog.o + substituteInPlace load_$prog --replace-fail prog_$prog.o $out/lib/prog_$prog.o + done + $CC -lbpf -I include -o set_router_iface set_router_iface.c + ''; + + installPhase = '' + for prog in physical router; do + install -Dm644 prog_$prog.o $out/lib/prog_$prog.o + install -Dm755 load_$prog $out/bin/load_$prog + done + install -Dm755 set_router_iface $out/bin/set_router_iface + ''; +} diff --git a/vm/sys/net/xdp-forwarder/include/parsing_helpers.h b/vm/sys/net/xdp-forwarder/include/parsing_helpers.h new file mode 100644 index 0000000..2446e56 --- /dev/null +++ b/vm/sys/net/xdp-forwarder/include/parsing_helpers.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-clause) */ +/* SPDX-FileCopyrightText: 2021 The xdp-tutorial Authors */ +/* SPDX-FileCopyrightText: 2025 Yureka Lilian <yureka@cyberchaos.dev> */ +/* Based on https://github.com/xdp-project/xdp-tutorial/blob/main/common/parsing_helpers... */ + +#ifndef __PARSING_HELPERS_H +#define __PARSING_HELPERS_H + +#include <bpf/bpf_endian.h> + +//#include <linux/if_ether.h> +#define ETH_P_8021Q 0x8100 /* 802.1Q VLAN Extended Header */ +#define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */
Looks like there's still something to do here?
+ +static __always_inline int proto_is_vlan(__u16 h_proto) +{ + return !!(h_proto == bpf_htons(ETH_P_8021Q) || + h_proto == bpf_htons(ETH_P_8021AD)); +} + +static __always_inline int parse_ethhdr(struct xdp_md *ctx, + struct ethhdr **ethhdr) +{ + void *data_end = (void *)(long)ctx->data_end; + struct ethhdr *eth = (void *) (long) ctx->data;
uintptr_t might be more appropriate than long? There's some inconsistent formatting here too. I've pushed an uncrustify config file that resolves the inconsistencies I noticed though, so should be possible to avoid going forward without having to think about it. :)
+ + /* Byte-count bounds check; check if current pointer + size of header + * is after data_end. + */ + if ((void *) (eth + 1) > data_end) + return -1;
This is checking that there's more data after the header, right? Is that something it's important for us to check?
+ + *ethhdr = eth; + + return eth->h_proto; /* network-byte-order */
Why return this when we're already outputting the whole struct ethhdr?
+} + +#endif diff --git a/vm/sys/net/xdp-forwarder/include/rewrite_helpers.h b/vm/sys/net/xdp-forwarder/include/rewrite_helpers.h new file mode 100644 index 0000000..7fa6e2c --- /dev/null +++ b/vm/sys/net/xdp-forwarder/include/rewrite_helpers.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-clause) */ +/* SPDX-FileCopyrightText: 2019 The xdp-tutorial Authors */ +/* SPDX-FileCopyrightText: 2025 Yureka Lilian <yureka@cyberchaos.dev> */ +/* Based on: https://github.com/xdp-project/xdp-tutorial/blob/main/common/rewrite_helpers... */ + +#ifndef __REWRITE_HELPERS_H +#define __REWRITE_HELPERS_H + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +/* Pops the outermost VLAN tag off the packet. Returns the popped VLAN ID on + * success or negative errno on failure. + */ +static __always_inline int vlan_tag_pop(struct xdp_md *ctx, struct ethhdr *eth) +{ + void *data_end = (void *)(long)ctx->data_end; + struct ethhdr eth_cpy; + struct vlan_hdr *vlh; + __be16 h_proto; + int vlid; + + if (!proto_is_vlan(eth->h_proto)) + return -1; + + /* Careful with the parenthesis here */ + vlh = (void *)(eth + 1); + + /* Still need to do bounds checking */ + if ((void *) (vlh + 1) > data_end) + return -1; + + /* Save vlan ID for returning, h_proto for updating Ethernet header */ + vlid = bpf_ntohs(vlh->h_vlan_TCI); + h_proto = vlh->h_vlan_encapsulated_proto; + + /* Make a copy of the outer Ethernet header before we cut it off */ + __builtin_memcpy(ð_cpy, eth, sizeof(eth_cpy)); + + /* Actually adjust the head pointer */ + if (bpf_xdp_adjust_head(ctx, (int)sizeof(*vlh))) + return -1; + + /* Need to re-evaluate data *and* data_end and do new bounds checking + * after adjusting head + */ + eth = (void *)(long)ctx->data; + data_end = (void *)(long)ctx->data_end; + if ((void *) (eth + 1) > data_end) + return -1; + + /* Copy back the old Ethernet header and update the proto type */ + __builtin_memcpy(eth, ð_cpy, sizeof(*eth)); + eth->h_proto = h_proto; + + return vlid; +} + +/* Pushes a new VLAN tag after the Ethernet header. Returns 0 on success, + * -1 on failure. + */ +static __always_inline int vlan_tag_push(struct xdp_md *ctx, + struct ethhdr *eth, int vlid) +{ + void *data_end = (void *)(long)ctx->data_end; + struct ethhdr eth_cpy; + struct vlan_hdr *vlh; + + /* First copy the original Ethernet header */ + __builtin_memcpy(ð_cpy, eth, sizeof(eth_cpy)); + + /* Then add space in front of the packet */ + if (bpf_xdp_adjust_head(ctx, 0 - (int)sizeof(*vlh))) + return -1; + + /* Need to re-evaluate data_end and data after head adjustment, and + * bounds check, even though we know there is enough space (as we + * increased it). + */ + data_end = (void *)(long)ctx->data_end; + eth = (void *)(long)ctx->data; + + if ((void *) (eth + 1) > data_end)
These bound checks still look really weird to me. Anyway, the last doesn't do anything here, right?
+ return -1; + + /* Copy back Ethernet header in the right place, populate VLAN tag with + * ID and proto, and set outer Ethernet header to VLAN type. + */ + __builtin_memcpy(eth, ð_cpy, sizeof(*eth)); + + vlh = (void *)(eth + 1); + + if ((void *) (vlh + 1) > data_end) + return -1; + + vlh->h_vlan_TCI = bpf_htons(vlid); + vlh->h_vlan_encapsulated_proto = eth->h_proto; + + eth->h_proto = bpf_htons(ETH_P_8021Q); + return 0; +} + +#endif diff --git a/vm/sys/net/xdp-forwarder/load_physical b/vm/sys/net/xdp-forwarder/load_physical new file mode 100755 index 0000000..13473e4 --- /dev/null +++ b/vm/sys/net/xdp-forwarder/load_physical @@ -0,0 +1,4 @@ +#!/bin/execlineb -S1 +# SPDX-License-Identifier: EUPL-1.2+ +# SPDX-FileCopyrightText: 2025 Yureka Lilian <yureka@cyberchaos.dev> +xdp-loader load ${1} prog_physical.o -m skb -p /sys/fs/bpf diff --git a/vm/sys/net/xdp-forwarder/load_router b/vm/sys/net/xdp-forwarder/load_router new file mode 100755 index 0000000..d16c26d --- /dev/null +++ b/vm/sys/net/xdp-forwarder/load_router @@ -0,0 +1,6 @@ +#!/bin/execlineb -S1 +# SPDX-License-Identifier: EUPL-1.2+ +# SPDX-FileCopyrightText: 2025 Yureka Lilian <yureka@cyberchaos.dev> +if { xdp-loader load ${1} prog_router.o -m skb -p /sys/fs/bpf } +if { ip link set ${1} promisc on } +set_router_iface ${1}
These scripts are only used in one place, right? I think they're simple enough it would be clearer to just inline them.
diff --git a/vm/sys/net/xdp-forwarder/set_router_iface.c b/vm/sys/net/xdp-forwarder/set_router_iface.c new file mode 100644 index 0000000..c828ee3 --- /dev/null +++ b/vm/sys/net/xdp-forwarder/set_router_iface.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: EUPL-1.2+ +// SPDX-FileCopyrightText: 2025 Yureka Lilian <yureka@cyberchaos.dev> + +#include <stdio.h> +#include <net/if.h> +#include <bpf/bpf.h> + +int main(int argc, char **argv) { + if (argc < 2) { + fprintf(stderr, "missing interface name\n"); + return 1; + } + + int router_idx = if_nametoindex(argv[1]); + if (router_idx <= 0) { + perror("error getting router interface"); + return 1; + } + + int map_fd = bpf_obj_get("/sys/fs/bpf/router_iface"); + if (map_fd < 0) { + perror("failed to open bpf map"); + return 1; + } + + int id = 0; + if (bpf_map_update_elem(map_fd, &id, &router_idx, 0) < 0) { + perror("failed to update bpf map"); + return 1; + } +}
No existing CLI for trivial map updates like this?
Alyssa Ross <hi@alyssa.is> writes:
diff --git a/vm/sys/net/xdp-forwarder/default.nix b/vm/sys/net/xdp-forwarder/default.nix new file mode 100644 index 0000000..75b1d66 --- /dev/null +++ b/vm/sys/net/xdp-forwarder/default.nix @@ -0,0 +1,35 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2025 Yureka Lilian <yureka@cyberchaos.dev> + +{ lib, runCommand, stdenv, llvmPackages, libbpf, linux, bpftools }: + +stdenv.mkDerivation { + pname = "xdp-forwarder"; + version = "0"; + + src = lib.fileset.toSource { + root = ./.; + fileset = lib.fileset.fileFilter + ({ hasExt, ... }: !(hasExt "nix") && !(hasExt "md")) ./.; + }; + + buildInputs = [ libbpf ]; + nativeBuildInputs = [ llvmPackages.clang-unwrapped bpftools ]; + + buildPhase = '' + bpftool btf dump file ${linux.dev}/vmlinux format c > include/vmlinux.h
I guess we're still missing a vmlinux.h package in Nixpkgs? That would be much cleaner.
Actually, given discussion in #systemd:nixos.org yesterday, it sounds like best practice is to declare just the structs and struct members we use: https://nakryiko.com/posts/bpf-core-reference-guide/#defining-own-co-re-relo... (Since on Spectrum we /can/ predict the kernel we'll be running on, it's not a huge deal, but I'd still like to follow the principle of having packages be portable.)
On 8/31/25 19:10, Alyssa Ross wrote:
Alyssa Ross <hi@alyssa.is> writes:
diff --git a/vm/sys/net/xdp-forwarder/default.nix b/vm/sys/net/xdp-forwarder/default.nix new file mode 100644 index 0000000..75b1d66 --- /dev/null +++ b/vm/sys/net/xdp-forwarder/default.nix @@ -0,0 +1,35 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2025 Yureka Lilian <yureka@cyberchaos.dev> + +{ lib, runCommand, stdenv, llvmPackages, libbpf, linux, bpftools }: + +stdenv.mkDerivation { + pname = "xdp-forwarder"; + version = "0"; + + src = lib.fileset.toSource { + root = ./.; + fileset = lib.fileset.fileFilter + ({ hasExt, ... }: !(hasExt "nix") && !(hasExt "md")) ./.; + }; + + buildInputs = [ libbpf ]; + nativeBuildInputs = [ llvmPackages.clang-unwrapped bpftools ]; + + buildPhase = '' + bpftool btf dump file ${linux.dev}/vmlinux format c > include/vmlinux.h I guess we're still missing a vmlinux.h package in Nixpkgs? That would be much cleaner. Actually, given discussion in #systemd:nixos.org yesterday, it sounds like best practice is to declare just the structs and struct members we use:
https://nakryiko.com/posts/bpf-core-reference-guide/#defining-own-co-re-relo...
(Since on Spectrum we /can/ predict the kernel we'll be running on, it's not a huge deal, but I'd still like to follow the principle of having packages be portable.)
I don't really understand how avoiding vmlinux.h would make our programb more portable. At the same time, I am very reliant on bpf/bpf_helpers.h from libbpf, and this requires a definition of __u64 and __u32, and I don't know where I would get those from if not vmlinux.h (and defining them myself feels wrong).
On 8/30/25 12:59, Alyssa Ross wrote:
I don't know much about BPF (or networking really, for that matter), so I'll focus my comments on the integration into Spectrum. Hopefully somebody else is able to help with reviewing the actual functionality.
Yureka Lilian<yureka@cyberchaos.dev> writes:
Signed-off-by: Yureka Lilian<yureka@cyberchaos.dev> --- vm/sys/net/Makefile | 8 +- vm/sys/net/default.nix | 23 ++-- vm/sys/net/etc/fstab | 1 + vm/sys/net/etc/mdev/iface | 23 +--- vm/sys/net/etc/nftables.conf | 8 -- vm/sys/net/etc/s6-rc/connman/dependencies | 4 - vm/sys/net/etc/s6-rc/connman/type | 1 - vm/sys/net/etc/s6-rc/connman/type.license | 2 - vm/sys/net/etc/s6-rc/nftables/type | 1 - vm/sys/net/etc/s6-rc/nftables/type.license | 2 - vm/sys/net/etc/s6-rc/nftables/up | 6 - vm/sys/net/xdp-forwarder/README.md | 9 ++ vm/sys/net/xdp-forwarder/default.nix | 35 ++++++ .../xdp-forwarder/include/parsing_helpers.h | 38 +++++++ .../xdp-forwarder/include/rewrite_helpers.h | 103 ++++++++++++++++++ vm/sys/net/xdp-forwarder/load_physical | 4 + vm/sys/net/xdp-forwarder/load_router | 6 + vm/sys/net/xdp-forwarder/prog_physical.c | 28 +++++ vm/sys/net/xdp-forwarder/prog_router.c | 34 ++++++ vm/sys/net/xdp-forwarder/set_router_iface.c | 31 ++++++ Could this be integrated into tools/? I'm hoping we can work towards having a common build system for all our compiled code. Currently it only has a host/guest distinction, but we probably should change that to host/driver/app. Is moved to tools/ in the next iteration. 20 files changed, 308 insertions(+), 59 deletions(-) delete mode 100644 vm/sys/net/etc/nftables.conf delete mode 100644 vm/sys/net/etc/s6-rc/connman/dependencies delete mode 100644 vm/sys/net/etc/s6-rc/connman/type delete mode 100644 vm/sys/net/etc/s6-rc/connman/type.license delete mode 100644 vm/sys/net/etc/s6-rc/nftables/type delete mode 100644 vm/sys/net/etc/s6-rc/nftables/type.license delete mode 100644 vm/sys/net/etc/s6-rc/nftables/up create mode 100644 vm/sys/net/xdp-forwarder/README.md create mode 100644 vm/sys/net/xdp-forwarder/default.nix create mode 100644 vm/sys/net/xdp-forwarder/include/parsing_helpers.h create mode 100644 vm/sys/net/xdp-forwarder/include/rewrite_helpers.h create mode 100755 vm/sys/net/xdp-forwarder/load_physical create mode 100755 vm/sys/net/xdp-forwarder/load_router create mode 100644 vm/sys/net/xdp-forwarder/prog_physical.c create mode 100644 vm/sys/net/xdp-forwarder/prog_router.c create mode 100644 vm/sys/net/xdp-forwarder/set_router_iface.c
diff --git a/vm/sys/net/Makefile b/vm/sys/net/Makefile index e681940..9576a92 100644 --- a/vm/sys/net/Makefile +++ b/vm/sys/net/Makefile @@ -34,12 +34,11 @@ VM_FILES = \ etc/init \ etc/mdev.conf \ etc/mdev/iface \ - etc/nftables.conf \ etc/passwd \ etc/s6-linux-init/run-image/service/getty-hvc0/run \ etc/s6-linux-init/scripts/rc.init \ etc/sysctl.conf -VM_DIRS = dev etc/s6-linux-init/env run proc sys var/lib/connman +VM_DIRS = dev etc/s6-linux-init/env run proc sys
# These are separate because they need to be included, but putting # them as make dependencies would confuse make. @@ -59,9 +58,6 @@ build/rootfs.erofs: ../../../scripts/make-erofs.sh $(PACKAGES_FILE) $(VM_FILES) ) | ../../../scripts/make-erofs.sh $@
VM_S6_RC_FILES = \ - etc/s6-rc/connman/dependencies \ - etc/s6-rc/connman/run \ - etc/s6-rc/connman/type \ etc/s6-rc/dbus/notification-fd \ etc/s6-rc/dbus/run \ etc/s6-rc/dbus/type \ @@ -71,8 +67,6 @@ VM_S6_RC_FILES = \ etc/s6-rc/mdevd/notification-fd \ etc/s6-rc/mdevd/run \ etc/s6-rc/mdevd/type \ - etc/s6-rc/nftables/type \ - etc/s6-rc/nftables/up \ etc/s6-rc/ok-all/contents \ etc/s6-rc/ok-all/type \ etc/s6-rc/sysctl/type \ diff --git a/vm/sys/net/default.nix b/vm/sys/net/default.nix index b5873eb..335c938 100644 --- a/vm/sys/net/default.nix +++ b/vm/sys/net/default.nix @@ -1,23 +1,26 @@ # SPDX-License-Identifier: MIT # SPDX-FileCopyrightText: 2021-2023 Alyssa Ross<hi@alyssa.is> +# SPDX-FileCopyrightText: 2025 Yureka Lilian<yureka@cyberchaos.dev>
-import ../../../lib/call-package.nix ({ lseek, src, terminfo, pkgsStatic }: -pkgsStatic.callPackage ( +import ../../../lib/call-package.nix ({ lseek, src, terminfo, pkgsMusl }: +pkgsMusl.callPackage (
{ lib, stdenvNoCC, nixos, runCommand, writeClosure , erofs-utils, jq, s6-rc, util-linux, xorg -, busybox, connmanMinimal, dbus, execline, kmod, linux_latest, mdevd, nftables -, s6, s6-linux-init +, busybox, dbus, execline, kmod, linux_latest, mdevd +, s6, s6-linux-init, xdp-tools }:
let inherit (lib) concatMapStringsSep; inherit (nixosAllHardware.config.hardware) firmware;
- connman = connmanMinimal; - packages = [ - connman dbus execline kmod mdevd s6 s6-linux-init s6-rc + dbus execline kmod mdevd s6 s6-linux-init s6-rc xdp-tools + + (pkgsMusl.callPackage ./xdp-forwarder { + linux = kernel; + })
(busybox.override { extraConfig = '' @@ -30,13 +33,13 @@ let CONFIG_RMMOD n ''; }) - - (nftables.override { withCli = false; }) ];
# Packages that should be fully linked into /usr, # (not just their bin/* files). - usrPackages = [ connman dbus firmware kernel terminfo ]; + usrPackages = [ + dbus firmware kernel terminfo + ];
packagesSysroot = runCommand "packages-sysroot" { inherit packages; diff --git a/vm/sys/net/etc/fstab b/vm/sys/net/etc/fstab index 6a82ecc..06f26ff 100644 --- a/vm/sys/net/etc/fstab +++ b/vm/sys/net/etc/fstab @@ -4,3 +4,4 @@ proc /proc proc defaults 0 0 devpts /dev/pts devpts defaults,gid=4,mode=620 0 0 tmpfs /dev/shm tmpfs defaults 0 0 sysfs /sys sysfs defaults 0 0 +bpffs /sys/fs/bpf bpf defaults 0 0 diff --git a/vm/sys/net/etc/mdev/iface b/vm/sys/net/etc/mdev/iface index 2306575..9724690 100755 --- a/vm/sys/net/etc/mdev/iface +++ b/vm/sys/net/etc/mdev/iface @@ -1,6 +1,7 @@ #!/bin/execlineb -P # SPDX-License-Identifier: EUPL-1.2+ # SPDX-FileCopyrightText: 2020-2021 Alyssa Ross<hi@alyssa.is> +# SPDX-FileCopyrightText: 2025 Yureka Lilian<yureka@cyberchaos.dev>
importas -Si INTERFACE
@@ -8,29 +9,15 @@ ifte
{ # This interface is connected to another VM. This comment should probably be reworded a bit. Will be in the next iteration - - # The other VM's IP is encoded in the NIC-specific portion of the - # interface's MAC address. - backtick -E CLIENT_IP { - awk -F: "{printf \"100.64.%d.%d\\n\", \"0x\" $5, \"0x\" $6}" - /sys/class/net/${INTERFACE}/address - } - - if { ip address add 169.254.0.1/32 dev $INTERFACE } - if { ip link set $INTERFACE up } - ip route add $CLIENT_IP dev $INTERFACE + if { load_router $INTERFACE } + ip link set $INTERFACE up }
{ if { test $INTERFACE != lo } # This is a physical connection to a network device. - background { s6-rc -bu change connman } - if { s6-rc -bu change nftables } - if { - forx -pE module { nft_counter nft_masq } - modprobe $module - } - nft add rule ip nat postrouting oifname $INTERFACE counter masquerade + if { load_physical $INTERFACE } + ip link set $INTERFACE up }
grep -iq ^02:01: /sys/class/net/${INTERFACE}/address diff --git a/vm/sys/net/xdp-forwarder/README.md b/vm/sys/net/xdp-forwarder/README.md new file mode 100644 index 0000000..2aa6585 --- /dev/null +++ b/vm/sys/net/xdp-forwarder/README.md @@ -0,0 +1,9 @@ +### xdp-forwarder + +The xdp forwarder consists of a redirect_kern.c (which is the actual XDP program, basically just a one-liner) and redirect_user.c (which loads the XDP program and sets up the maps with the interfaces passed on the command line). + +It behaves like an ethernet hub between two interfaces. + +Temporary repository, to be inlined into the [Spectrum](https://spectrum-os.org/) monorepo. + +This work was funded by [NGI Zero](https://www.ngi.eu/ngi-projects/ngi-zero/), an initiative by the Digital Single Market of the European Commission. This should be integrated into the Documentation/ we already have. It's a bit of a mess in ther at the moment, though. I expect we'll be getting some help with that early next year, so would be fine to be without documentation until then. I added a more up-to-date text to the architecture page diff --git a/vm/sys/net/xdp-forwarder/default.nix b/vm/sys/net/xdp-forwarder/default.nix new file mode 100644 index 0000000..75b1d66 --- /dev/null +++ b/vm/sys/net/xdp-forwarder/default.nix @@ -0,0 +1,35 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2025 Yureka Lilian<yureka@cyberchaos.dev> + +{ lib, runCommand, stdenv, llvmPackages, libbpf, linux, bpftools }: + +stdenv.mkDerivation { + pname = "xdp-forwarder"; + version = "0"; + + src = lib.fileset.toSource { + root = ./.; + fileset = lib.fileset.fileFilter + ({ hasExt, ... }: !(hasExt "nix") && !(hasExt "md")) ./.; + }; + + buildInputs = [ libbpf ]; + nativeBuildInputs = [ llvmPackages.clang-unwrapped bpftools ]; + + buildPhase = '' + bpftool btf dump file ${linux.dev}/vmlinux format c > include/vmlinux.h I guess we're still missing a vmlinux.h package in Nixpkgs? That would be much cleaner. Fixed by not relying on vmlinux.h
+ for prog in physical router; do + clang $NIX_CFLAGS_COMPILE -O2 -g -Wall -target bpf -I include -c prog_$prog.c -o prog_$prog.o + substituteInPlace load_$prog --replace-fail prog_$prog.o $out/lib/prog_$prog.o + done + $CC -lbpf -I include -o set_router_iface set_router_iface.c + ''; + + installPhase = '' + for prog in physical router; do + install -Dm644 prog_$prog.o $out/lib/prog_$prog.o + install -Dm755 load_$prog $out/bin/load_$prog + done + install -Dm755 set_router_iface $out/bin/set_router_iface + ''; +} diff --git a/vm/sys/net/xdp-forwarder/include/parsing_helpers.h b/vm/sys/net/xdp-forwarder/include/parsing_helpers.h new file mode 100644 index 0000000..2446e56 --- /dev/null +++ b/vm/sys/net/xdp-forwarder/include/parsing_helpers.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-clause) */ +/* SPDX-FileCopyrightText: 2021 The xdp-tutorial Authors */ +/* SPDX-FileCopyrightText: 2025 Yureka Lilian<yureka@cyberchaos.dev> */ +/* Based onhttps://github.com/xdp-project/xdp-tutorial/blob/main/common/parsing_helpers... */ + +#ifndef __PARSING_HELPERS_H +#define __PARSING_HELPERS_H + +#include <bpf/bpf_endian.h> + +//#include <linux/if_ether.h> +#define ETH_P_8021Q 0x8100 /* 802.1Q VLAN Extended Header */ +#define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */ Looks like there's still something to do here? Fixed by using linux/if_ether.h + +static __always_inline int proto_is_vlan(__u16 h_proto) +{ + return !!(h_proto == bpf_htons(ETH_P_8021Q) || + h_proto == bpf_htons(ETH_P_8021AD)); +} + +static __always_inline int parse_ethhdr(struct xdp_md *ctx, + struct ethhdr **ethhdr) +{ + void *data_end = (void *)(long)ctx->data_end; + struct ethhdr *eth = (void *) (long) ctx->data; uintptr_t might be more appropriate than long?
There's some inconsistent formatting here too. I've pushed an uncrustify config file that resolves the inconsistencies I noticed though, so should be possible to avoid going forward without having to think about it. :) This file has been replaced by the original (untouched, besides formatting) header from xdp-tutorial. This version doesn't do the cast.
+ + /* Byte-count bounds check; check if current pointer + size of header + * is after data_end. + */ + if ((void *) (eth + 1) > data_end) + return -1; This is checking that there's more data after the header, right? Is that something it's important for us to check?
The intent is to check that the entire eth hdr, which we casted a pointer to, is within the data (length) of the packet before we de-reference the pointer. So essentially, skipping packets which do not have a full ethernet header, instead of reading from addresses which we are not supposed to read from. When loading the XDP program, it is tested against an empty or very small packet, and if it tries to access memory outside of the packet bounds, it will refuse to load. So the BPF/XDP system ensures that these kinds of packets are handled properly.
+ + *ethhdr = eth; + + return eth->h_proto; /* network-byte-order */ Why return this when we're already outputting the whole struct ethhdr? No point really, but it's the type signatures is from the xdp-tutorial code, and I want to stick to that as much as possible. +} + +#endif diff --git a/vm/sys/net/xdp-forwarder/include/rewrite_helpers.h b/vm/sys/net/xdp-forwarder/include/rewrite_helpers.h new file mode 100644 index 0000000..7fa6e2c --- /dev/null +++ b/vm/sys/net/xdp-forwarder/include/rewrite_helpers.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-clause) */ +/* SPDX-FileCopyrightText: 2019 The xdp-tutorial Authors */ +/* SPDX-FileCopyrightText: 2025 Yureka Lilian<yureka@cyberchaos.dev> */ +/* Based on:https://github.com/xdp-project/xdp-tutorial/blob/main/common/rewrite_helpers... */ + +#ifndef __REWRITE_HELPERS_H +#define __REWRITE_HELPERS_H + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +/* Pops the outermost VLAN tag off the packet. Returns the popped VLAN ID on + * success or negative errno on failure. + */ +static __always_inline int vlan_tag_pop(struct xdp_md *ctx, struct ethhdr *eth) +{ + void *data_end = (void *)(long)ctx->data_end; + struct ethhdr eth_cpy; + struct vlan_hdr *vlh; + __be16 h_proto; + int vlid; + + if (!proto_is_vlan(eth->h_proto)) + return -1; + + /* Careful with the parenthesis here */ + vlh = (void *)(eth + 1); + + /* Still need to do bounds checking */ + if ((void *) (vlh + 1) > data_end) + return -1; + + /* Save vlan ID for returning, h_proto for updating Ethernet header */ + vlid = bpf_ntohs(vlh->h_vlan_TCI); + h_proto = vlh->h_vlan_encapsulated_proto; + + /* Make a copy of the outer Ethernet header before we cut it off */ + __builtin_memcpy(ð_cpy, eth, sizeof(eth_cpy)); + + /* Actually adjust the head pointer */ + if (bpf_xdp_adjust_head(ctx, (int)sizeof(*vlh))) + return -1; + + /* Need to re-evaluate data *and* data_end and do new bounds checking + * after adjusting head + */ + eth = (void *)(long)ctx->data; + data_end = (void *)(long)ctx->data_end; + if ((void *) (eth + 1) > data_end) + return -1; + + /* Copy back the old Ethernet header and update the proto type */ + __builtin_memcpy(eth, ð_cpy, sizeof(*eth)); + eth->h_proto = h_proto; + + return vlid; +} + +/* Pushes a new VLAN tag after the Ethernet header. Returns 0 on success, + * -1 on failure. + */ +static __always_inline int vlan_tag_push(struct xdp_md *ctx, + struct ethhdr *eth, int vlid) +{ + void *data_end = (void *)(long)ctx->data_end; + struct ethhdr eth_cpy; + struct vlan_hdr *vlh; + + /* First copy the original Ethernet header */ + __builtin_memcpy(ð_cpy, eth, sizeof(eth_cpy)); + + /* Then add space in front of the packet */ + if (bpf_xdp_adjust_head(ctx, 0 - (int)sizeof(*vlh))) + return -1; + + /* Need to re-evaluate data_end and data after head adjustment, and + * bounds check, even though we know there is enough space (as we + * increased it). + */ + data_end = (void *)(long)ctx->data_end; + eth = (void *)(long)ctx->data; + + if ((void *) (eth + 1) > data_end) These bound checks still look really weird to me. Anyway, the last doesn't do anything here, right?
Yes, it's likely optimized away, because we already checked that the vlan hdr (which comes after the eth hdr) is valid. However, we should always check before de-referencing a pointer that we don't read from invalid memory.
+ return -1; + + /* Copy back Ethernet header in the right place, populate VLAN tag with + * ID and proto, and set outer Ethernet header to VLAN type. + */ + __builtin_memcpy(eth, ð_cpy, sizeof(*eth)); + + vlh = (void *)(eth + 1); + + if ((void *) (vlh + 1) > data_end) + return -1; + + vlh->h_vlan_TCI = bpf_htons(vlid); + vlh->h_vlan_encapsulated_proto = eth->h_proto; + + eth->h_proto = bpf_htons(ETH_P_8021Q); + return 0; +} + +#endif diff --git a/vm/sys/net/xdp-forwarder/load_physical b/vm/sys/net/xdp-forwarder/load_physical new file mode 100755 index 0000000..13473e4 --- /dev/null +++ b/vm/sys/net/xdp-forwarder/load_physical @@ -0,0 +1,4 @@ +#!/bin/execlineb -S1 +# SPDX-License-Identifier: EUPL-1.2+ +# SPDX-FileCopyrightText: 2025 Yureka Lilian<yureka@cyberchaos.dev> +xdp-loader load ${1} prog_physical.o -m skb -p /sys/fs/bpf diff --git a/vm/sys/net/xdp-forwarder/load_router b/vm/sys/net/xdp-forwarder/load_router new file mode 100755 index 0000000..d16c26d --- /dev/null +++ b/vm/sys/net/xdp-forwarder/load_router @@ -0,0 +1,6 @@ +#!/bin/execlineb -S1 +# SPDX-License-Identifier: EUPL-1.2+ +# SPDX-FileCopyrightText: 2025 Yureka Lilian<yureka@cyberchaos.dev> +if { xdp-loader load ${1} prog_router.o -m skb -p /sys/fs/bpf } +if { ip link set ${1} promisc on } +set_router_iface ${1} These scripts are only used in one place, right? I think they're simple enough it would be clearer to just inline them. Done in the next iteration diff --git a/vm/sys/net/xdp-forwarder/set_router_iface.c b/vm/sys/net/xdp-forwarder/set_router_iface.c new file mode 100644 index 0000000..c828ee3 --- /dev/null +++ b/vm/sys/net/xdp-forwarder/set_router_iface.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: EUPL-1.2+ +// SPDX-FileCopyrightText: 2025 Yureka Lilian<yureka@cyberchaos.dev> + +#include <stdio.h> +#include <net/if.h> +#include <bpf/bpf.h> + +int main(int argc, char **argv) { + if (argc < 2) { + fprintf(stderr, "missing interface name\n"); + return 1; + } + + int router_idx = if_nametoindex(argv[1]); + if (router_idx <= 0) { + perror("error getting router interface"); + return 1; + } + + int map_fd = bpf_obj_get("/sys/fs/bpf/router_iface"); + if (map_fd < 0) { + perror("failed to open bpf map"); + return 1; + } + + int id = 0; + if (bpf_map_update_elem(map_fd, &id, &router_idx, 0) < 0) { + perror("failed to update bpf map"); + return 1; + } +} No existing CLI for trivial map updates like this? There is a bpftool map update command, but the way we'd need to pass the interface id as value is very inconvenient to produce in a shell script, since it takes individual bytes.
Yureka <yuka@yuka.dev> writes:
+ + /* Byte-count bounds check; check if current pointer + size of header + * is after data_end. + */ + if ((void *) (eth + 1) > data_end) + return -1; This is checking that there's more data after the header, right? Is that something it's important for us to check?
The intent is to check that the entire eth hdr, which we casted a pointer to, is within the data (length) of the packet before we de-reference the pointer. So essentially, skipping packets which do not have a full ethernet header, instead of reading from addresses which we are not supposed to read from.
When loading the XDP program, it is tested against an empty or very small packet, and if it tries to access memory outside of the packet bounds, it will refuse to load. So the BPF/XDP system ensures that these kinds of packets are handled properly.
Doesn't using > instead of >= check that the entire eth hdr **plus one byte** is within the packet, though? i.e. wouldn't this check fail if the data consisted entirely of an ethernet header? Is that the right thing to do? (Sorry if my maths is just wrong.)
Pointer arithmetics always work in pointer lengths, so + 1 is adding sizeof(struct ethhdr) bytes. eth is the beginning of the eth header. eth + 1 is the first byte after the eth header, or where the next eth header would begin in an array. On 9/1/25 15:59, Alyssa Ross wrote:
Yureka <yuka@yuka.dev> writes:
+ + /* Byte-count bounds check; check if current pointer + size of header + * is after data_end. + */ + if ((void *) (eth + 1) > data_end) + return -1; This is checking that there's more data after the header, right? Is that something it's important for us to check? The intent is to check that the entire eth hdr, which we casted a pointer to, is within the data (length) of the packet before we de-reference the pointer. So essentially, skipping packets which do not have a full ethernet header, instead of reading from addresses which we are not supposed to read from.
When loading the XDP program, it is tested against an empty or very small packet, and if it tries to access memory outside of the packet bounds, it will refuse to load. So the BPF/XDP system ensures that these kinds of packets are handled properly. Doesn't using > instead of >= check that the entire eth hdr **plus one byte** is within the packet, though? i.e. wouldn't this check fail if the data consisted entirely of an ethernet header? Is that the right thing to do? (Sorry if my maths is just wrong.)
Yureka <yuka@yuka.dev> writes:
Pointer arithmetics always work in pointer lengths, so + 1 is adding sizeof(struct ethhdr) bytes.
eth is the beginning of the eth header.
eth + 1 is the first byte after the eth header, or where the next eth header would begin in an array.
I've figured out the source of my confusion. I mistakenly assumed that data_end would be a pointer to the last byte of data. It's actually a pointer to the byte after the last byte of data.
On 9/1/25 15:59, Alyssa Ross wrote:
Yureka <yuka@yuka.dev> writes:
+ + /* Byte-count bounds check; check if current pointer + size of header + * is after data_end. + */ + if ((void *) (eth + 1) > data_end) + return -1; This is checking that there's more data after the header, right? Is that something it's important for us to check? The intent is to check that the entire eth hdr, which we casted a pointer to, is within the data (length) of the packet before we de-reference the pointer. So essentially, skipping packets which do not have a full ethernet header, instead of reading from addresses which we are not supposed to read from.
When loading the XDP program, it is tested against an empty or very small packet, and if it tries to access memory outside of the packet bounds, it will refuse to load. So the BPF/XDP system ensures that these kinds of packets are handled properly. Doesn't using > instead of >= check that the entire eth hdr **plus one byte** is within the packet, though? i.e. wouldn't this check fail if the data consisted entirely of an ethernet header? Is that the right thing to do? (Sorry if my maths is just wrong.)
Signed-off-by: Yureka Lilian <yureka@cyberchaos.dev> --- lib/nixpkgs.default.nix | 4 ++-- vm/sys/net/default.nix | 19 +++++++++++++++++-- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/lib/nixpkgs.default.nix b/lib/nixpkgs.default.nix index 50ffa84..5ac9986 100644 --- a/lib/nixpkgs.default.nix +++ b/lib/nixpkgs.default.nix @@ -4,6 +4,6 @@ # Generated by scripts/update-nixpkgs.sh. import (builtins.fetchTarball { - url = "https://github.com/NixOS/nixpkgs/archive/6e1ffe6ba3c38d429bd15079a0935f3a86a..."; - sha256 = "0mh2g0x6f8sfk9n4wl6mmwd3qwwfcdm0wjmcfqk28vb2hi6j7xxm"; + url = "https://github.com/NixOS/nixpkgs/archive/0d2db936fc6a3b4b6723957ed2e51ce315b..."; + sha256 = "1ydp8lwpk7zira5ms429k2ig3qcm7bd4jlwvzmhhvzjsm4qm36xn"; }) diff --git a/vm/sys/net/default.nix b/vm/sys/net/default.nix index 335c938..5620d52 100644 --- a/vm/sys/net/default.nix +++ b/vm/sys/net/default.nix @@ -8,7 +8,8 @@ pkgsMusl.callPackage ( { lib, stdenvNoCC, nixos, runCommand, writeClosure , erofs-utils, jq, s6-rc, util-linux, xorg , busybox, dbus, execline, kmod, linux_latest, mdevd -, s6, s6-linux-init, xdp-tools +, s6, s6-linux-init, xdp-tools, libbpf, fetchFromGitHub +, iproute2 }: let @@ -16,7 +17,20 @@ let inherit (nixosAllHardware.config.hardware) firmware; packages = [ - dbus execline kmod mdevd s6 s6-linux-init s6-rc xdp-tools + dbus execline kmod mdevd s6 s6-linux-init s6-rc iproute2 + + (xdp-tools.override { + libbpf = libbpf.overrideAttrs (_old: rec { + version = "1.6.2"; + src = fetchFromGitHub { + owner = "libbpf"; + repo = "libbpf"; + rev = "v${version}"; + hash = "sha256-igjjwirg3O5mC3DzGCAO9OgrH2drnE/gV6NH7ZLNnFE="; + }; + postInstall = " "; + }); + }) (pkgsMusl.callPackage ./xdp-forwarder { linux = kernel; @@ -24,6 +38,7 @@ let (busybox.override { extraConfig = '' + CONFIG_IP n CONFIG_DEPMOD n CONFIG_INIT n CONFIG_INSMOD n -- 2.50.1
participants (3)
-
Alyssa Ross -
Yureka -
Yureka Lilian