Skip to content

Segfault possibly because of invalid or nil bandwidth configuration #1221

@adelton

Description

@adelton

In k3s-io/k3s#13344 I've described a failure including SIGSEGV when using K3s with its CNI plugins with CRI-O, as opposed to containerd.

@brandond helped to unpack the stacktrace to

panic: runtime error: invalid memory address or nil pointer dereference
[signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x5f89cc]

goroutine 1 gp=0xc000002380 m=0 mp=0x8da300 [running, locked to thread]:
panic({0x6763e0?, 0x8cd930?})
	/usr/local/go/src/runtime/panic.go:811 +0x168 fp=0xc0000d3950 sp=0xc0000d38a0 pc=0x46e668
runtime.panicmem(...)
	/usr/local/go/src/runtime/panic.go:262
runtime.sigpanic()
	/usr/local/go/src/runtime/signal_unix.go:925 +0x359 fp=0xc0000d39b0 sp=0xc0000d3950 pc=0x4705d9
github.com/containernetworking/plugins/plugins/meta/bandwidth.cmdCheck(0xc000096980)
	/tmp/tmp.SfBAFPHij0/src/github.com/containernetworking/plugins/plugins/meta/bandwidth/main.go:302 +0x2ac fp=0xc0000d3b18 sp=0xc0000d39b0 pc=0x5f89cc
github.com/containernetworking/plugins/vendor/github.com/containernetworking/cni/pkg/skel.(*dispatcher).checkVersionAndCall(0xc0000a9db0, 0xc000096980, {0x72b288, 0xc000079d70}, 0x6de140)
	/tmp/tmp.SfBAFPHij0/src/github.com/containernetworking/plugins/vendor/github.com/containernetworking/cni/pkg/skel/skel.go:204 +0x116 fp=0xc0000d3bc0 sp=0xc0000d3b18 pc=0x576856
github.com/containernetworking/plugins/vendor/github.com/containernetworking/cni/pkg/skel.(*dispatcher).pluginMain(0xc0000a9db0, {0x6de138, 0x6de148, 0x6de140, 0x0, 0x0}, {0x72b288, 0xc000079d70}, {0xc0000924e0, 0x20})
	/tmp/tmp.SfBAFPHij0/src/github.com/containernetworking/plugins/vendor/github.com/containernetworking/cni/pkg/skel/skel.go:273 +0xbc5 fp=0xc0000d3d18 sp=0xc0000d3bc0 pc=0x5777a5
github.com/containernetworking/plugins/vendor/github.com/containernetworking/cni/pkg/skel.PluginMainFuncsWithError(...)
	/tmp/tmp.SfBAFPHij0/src/github.com/containernetworking/plugins/vendor/github.com/containernetworking/cni/pkg/skel/skel.go:394
github.com/containernetworking/plugins/vendor/github.com/containernetworking/cni/pkg/skel.PluginMainFuncs({0x6de138, 0x6de148, 0x6de140, 0x0, 0x0}, {0x72b288?, 0xc000079d70?}, {0xc0000924e0?, 0x7ffdf5072c9a?})
	/tmp/tmp.SfBAFPHij0/src/github.com/containernetworking/plugins/vendor/github.com/containernetworking/cni/pkg/skel/skel.go:411 +0x13f fp=0xc0000d3e20 sp=0xc0000d3d18 pc=0x577cbf
github.com/containernetworking/plugins/plugins/meta/bandwidth.Main()
	/tmp/tmp.SfBAFPHij0/src/github.com/containernetworking/plugins/plugins/meta/bandwidth/main.go:244 +0x11f fp=0xc0000d3ed0 sp=0xc0000d3e20 pc=0x5f85bf
github.com/containernetworking/plugins/vendor/github.com/docker/docker/pkg/reexec.Init(...)
	/tmp/tmp.SfBAFPHij0/src/github.com/containernetworking/plugins/vendor/github.com/docker/docker/pkg/reexec/reexec.go:33
main.main()
	/tmp/tmp.SfBAFPHij0/src/github.com/containernetworking/plugins/main_linux.go:34 +0x15c fp=0xc0000d3f50 sp=0xc0000d3ed0 pc=0x62b61c
runtime.main()
	/usr/local/go/src/runtime/proc.go:283 +0x28b fp=0xc0000d3fe0 sp=0xc0000d3f50 pc=0x43c78b
runtime.goexit({})
	/usr/local/go/src/runtime/asm_amd64.s:1700 +0x1 fp=0xc0000d3fe8 sp=0xc0000d3fe0 pc=0x475d01

goroutine 2 gp=0xc0000028c0 m=nil [force gc (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
	/usr/local/go/src/runtime/proc.go:435 +0xce fp=0xc00003cfa8 sp=0xc00003cf88 pc=0x46eb4e
runtime.goparkunlock(...)
	/usr/local/go/src/runtime/proc.go:441
runtime.forcegchelper()
	/usr/local/go/src/runtime/proc.go:348 +0xb3 fp=0xc00003cfe0 sp=0xc00003cfa8 pc=0x43cad3
runtime.goexit({})
	/usr/local/go/src/runtime/asm_amd64.s:1700 +0x1 fp=0xc00003cfe8 sp=0xc00003cfe0 pc=0x475d01
created by runtime.init.7 in goroutine 1
	/usr/local/go/src/runtime/proc.go:336 +0x1a

goroutine 3 gp=0xc000002e00 m=nil [GC sweep wait]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
	/usr/local/go/src/runtime/proc.go:435 +0xce fp=0xc00003d780 sp=0xc00003d760 pc=0x46eb4e
runtime.goparkunlock(...)
	/usr/local/go/src/runtime/proc.go:441
runtime.bgsweep(0xc00005c000)
	/usr/local/go/src/runtime/mgcsweep.go:276 +0x94 fp=0xc00003d7c8 sp=0xc00003d780 pc=0x427634
runtime.gcenable.gowrap1()
	/usr/local/go/src/runtime/mgc.go:204 +0x25 fp=0xc00003d7e0 sp=0xc00003d7c8 pc=0x41bb05
runtime.goexit({})
	/usr/local/go/src/runtime/asm_amd64.s:1700 +0x1 fp=0xc00003d7e8 sp=0xc00003d7e0 pc=0x475d01
created by runtime.gcenable in goroutine 1
	/usr/local/go/src/runtime/mgc.go:204 +0x66

goroutine 4 gp=0xc000002fc0 m=nil [GC scavenge wait]:
runtime.gopark(0xc00005c000?, 0x727b98?, 0x1?, 0x0?, 0xc000002fc0?)
	/usr/local/go/src/runtime/proc.go:435 +0xce fp=0xc00003df78 sp=0xc00003df58 pc=0x46eb4e
runtime.goparkunlock(...)
	/usr/local/go/src/runtime/proc.go:441
runtime.(*scavengerState).park(0x8d92e0)
	/usr/local/go/src/runtime/mgcscavenge.go:425 +0x49 fp=0xc00003dfa8 sp=0xc00003df78 pc=0x4250e9
runtime.bgscavenge(0xc00005c000)
	/usr/local/go/src/runtime/mgcscavenge.go:653 +0x3c fp=0xc00003dfc8 sp=0xc00003dfa8 pc=0x42565c
runtime.gcenable.gowrap2()
	/usr/local/go/src/runtime/mgc.go:205 +0x25 fp=0xc00003dfe0 sp=0xc00003dfc8 pc=0x41baa5
runtime.goexit({})
	/usr/local/go/src/runtime/asm_amd64.s:1700 +0x1 fp=0xc00003dfe8 sp=0xc00003dfe0 pc=0x475d01
created by runtime.gcenable in goroutine 1
	/usr/local/go/src/runtime/mgc.go:205 +0xa5

goroutine 5 gp=0xc000003500 m=nil [finalizer wait]:
runtime.gopark(0x8f9e40?, 0x490013?, 0x78?, 0xc6?, 0x413dde?)
	/usr/local/go/src/runtime/proc.go:435 +0xce fp=0xc00003c630 sp=0xc00003c610 pc=0x46eb4e
runtime.runfinq()
	/usr/local/go/src/runtime/mfinal.go:196 +0x107 fp=0xc00003c7e0 sp=0xc00003c630 pc=0x41aac7
runtime.goexit({})
	/usr/local/go/src/runtime/asm_amd64.s:1700 +0x1 fp=0xc00003c7e8 sp=0xc00003c7e0 pc=0x475d01
created by runtime.createfing in goroutine 1
	/usr/local/go/src/runtime/mfinal.go:166 +0x3d

goroutine 6 gp=0xc0000036c0 m=nil [chan receive]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
	/usr/local/go/src/runtime/proc.go:435 +0xce fp=0xc00003e718 sp=0xc00003e6f8 pc=0x46eb4e
runtime.chanrecv(0xc00006c0e0, 0x0, 0x1)
	/usr/local/go/src/runtime/chan.go:664 +0x445 fp=0xc00003e790 sp=0xc00003e718 pc=0x40d445
runtime.chanrecv1(0x0?, 0x0?)
	/usr/local/go/src/runtime/chan.go:506 +0x12 fp=0xc00003e7b8 sp=0xc00003e790 pc=0x40cff2
runtime.unique_runtime_registerUniqueMapCleanup.func2(...)
	/usr/local/go/src/runtime/mgc.go:1797
runtime.unique_runtime_registerUniqueMapCleanup.gowrap1()
	/usr/local/go/src/runtime/mgc.go:1800 +0x2f fp=0xc00003e7e0 sp=0xc00003e7b8 pc=0x41ec4f
runtime.goexit({})
	/usr/local/go/src/runtime/asm_amd64.s:1700 +0x1 fp=0xc00003e7e8 sp=0xc00003e7e0 pc=0x475d01
created by unique.runtime_registerUniqueMapCleanup in goroutine 1
	/usr/local/go/src/runtime/mgc.go:1795 +0x79

and noted that

Looks like cri-o is passing an invalid or nil bandwidth configuration, and the plugin does not handle this. Nil check is missing here, before checking field values: https://github.com/containernetworking/plugins/blob/v1.8.0/plugins/meta/bandwidth/main.go#L300-L302

The runtime config as reported (or created?) by CRI-O seems to be

RuntimeConfig:map[cbr0:{IP: MAC: PortMappings:[] Bandwidth:<nil> IpRanges:[] CgroupPath:kubepods-burstable-pod3a33a87e_5acb_4333_a8ec_22fd42b3f7e5.slice PodAnnotations:0xc00007ce68}]

Should the CNI plugin check for the nil value and handle it gracefully, or is this format breaking some specification and the problem is primarily on the CRI-O side?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions