Skip to content

Commit c7e18d8

Browse files
committed
qat: rework driver binding
The new_id based driver binding is failing on kernels 5.11+ when the QAT VF is not bound to any driver: attempts to write to new_id with the same device ID repeatedly error with "file exists". Move the new_id initialization to the beginning of the startup and write the enabled device IDs only once. This commit also fixes an issue where VF devices where not correctly detected in virtual machines where the VF was not bound any driver. Signed-off-by: Mikko Ylinen <mikko.ylinen@intel.com>
1 parent b48ca7f commit c7e18d8

File tree

2 files changed

+155
-82
lines changed

2 files changed

+155
-82
lines changed

cmd/qat_plugin/dpdkdrv/dpdkdrv.go

Lines changed: 74 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,14 @@ const (
4141
pciDriverDirectory = "/sys/bus/pci/drivers"
4242
uioSuffix = "uio"
4343
iommuGroupSuffix = "iommu_group"
44-
newIDSuffix = "new_id"
45-
driverUnbindSuffix = "driver/unbind"
4644
vendorPrefix = "8086 "
4745
envVarPrefix = "QAT"
4846

4947
igbUio = "igb_uio"
5048
vfioPci = "vfio-pci"
49+
50+
// Period of device scans.
51+
scanPeriod = 5 * time.Second
5152
)
5253

5354
// QAT PCI VF Device ID -> kernel QAT VF device driver mappings.
@@ -63,6 +64,9 @@ var qatDeviceDriver = map[string]string{
6364

6465
// DevicePlugin represents vfio based QAT plugin.
6566
type DevicePlugin struct {
67+
scanTicker *time.Ticker
68+
scanDone chan bool
69+
6670
pciDriverDir string
6771
pciDeviceDir string
6872
dpdkDriver string
@@ -93,11 +97,36 @@ func newDevicePlugin(pciDriverDir, pciDeviceDir string, maxDevices int, kernelVf
9397
pciDeviceDir: pciDeviceDir,
9498
kernelVfDrivers: kernelVfDrivers,
9599
dpdkDriver: dpdkDriver,
100+
scanTicker: time.NewTicker(scanPeriod),
101+
scanDone: make(chan bool, 1),
102+
}
103+
}
104+
105+
func (dp *DevicePlugin) setupDeviceIDs() error {
106+
for devID, driver := range qatDeviceDriver {
107+
for _, enabledDriver := range dp.kernelVfDrivers {
108+
if driver != enabledDriver {
109+
continue
110+
}
111+
112+
err := writeToDriver(filepath.Join(dp.pciDriverDir, dp.dpdkDriver, "new_id"), vendorPrefix+devID)
113+
if err != nil && !errors.Is(err, os.ErrExist) {
114+
return errors.WithMessagef(err, "failed to set device ID %s for %s. Driver module not loaded?", devID, dp.dpdkDriver)
115+
}
116+
}
96117
}
118+
119+
return nil
97120
}
98121

99122
// Scan implements Scanner interface for vfio based QAT plugin.
100123
func (dp *DevicePlugin) Scan(notifier dpapi.Notifier) error {
124+
defer dp.scanTicker.Stop()
125+
126+
if err := dp.setupDeviceIDs(); err != nil {
127+
return err
128+
}
129+
101130
for {
102131
devTree, err := dp.scan()
103132
if err != nil {
@@ -106,7 +135,11 @@ func (dp *DevicePlugin) Scan(notifier dpapi.Notifier) error {
106135

107136
notifier.Notify(devTree)
108137

109-
time.Sleep(5 * time.Second)
138+
select {
139+
case <-dp.scanDone:
140+
return nil
141+
case <-dp.scanTicker.C:
142+
}
110143
}
111144
}
112145

@@ -197,34 +230,18 @@ func (dp *DevicePlugin) getDpdkMounts(dpdkDeviceName string) []pluginapi.Mount {
197230
}
198231
}
199232

200-
func (dp *DevicePlugin) getDeviceID(pciAddr string) (string, error) {
201-
devID, err := os.ReadFile(filepath.Join(dp.pciDeviceDir, filepath.Clean(pciAddr), "device"))
233+
func getDeviceID(device string) (string, error) {
234+
devID, err := os.ReadFile(filepath.Join(device, "device"))
202235
if err != nil {
203-
return "", errors.Wrapf(err, "Cannot obtain ID for the device %s", pciAddr)
236+
return "", errors.Wrapf(err, "failed to read device ID")
204237
}
205238

206239
return strings.TrimPrefix(string(bytes.TrimSpace(devID)), "0x"), nil
207240
}
208241

209-
// bindDevice unbinds given device from kernel driver and binds to DPDK driver.
210-
func (dp *DevicePlugin) bindDevice(vfBdf string) error {
211-
unbindDevicePath := filepath.Join(dp.pciDeviceDir, vfBdf, driverUnbindSuffix)
212-
213-
// Unbind from the kernel driver. IsNotExist means the device is not bound to any driver.
214-
if err := os.WriteFile(unbindDevicePath, []byte(vfBdf), 0600); !os.IsNotExist(err) {
215-
return errors.Wrapf(err, "Unbinding from kernel driver failed for the device %s", vfBdf)
216-
}
217-
218-
vfdevID, err := dp.getDeviceID(vfBdf)
219-
if err != nil {
220-
return err
221-
}
222-
223-
bindDevicePath := filepath.Join(dp.pciDriverDir, dp.dpdkDriver, newIDSuffix)
224-
//Bind to the the dpdk driver
225-
err = os.WriteFile(bindDevicePath, []byte(vendorPrefix+vfdevID), 0600)
226-
if err != nil {
227-
return errors.Wrapf(err, "Binding to the DPDK driver failed for the device %s", vfBdf)
242+
func writeToDriver(path, value string) error {
243+
if err := os.WriteFile(path, []byte(value), 0600); err != nil {
244+
return errors.Wrapf(err, "write to driver failed: %s", value)
228245
}
229246

230247
return nil
@@ -307,27 +324,43 @@ func (dp *DevicePlugin) getVfDevices() []string {
307324
qatPfDevices := make([]string, 0)
308325
qatVfDevices := make([]string, 0)
309326

310-
// Get PF BDFs bound to a PF driver
327+
// Get PF BDFs bound to a known QAT PF driver
311328
for _, vfDriver := range dp.kernelVfDrivers {
312329
pfDriver := strings.TrimSuffix(vfDriver, "vf")
313330
pattern := filepath.Join(dp.pciDriverDir, pfDriver, "????:??:??.?")
314331
qatPfDevices = append(qatPfDevices, getPciDevicesWithPattern(pattern)...)
315332
}
316333

317-
// Get VF devices belonging to a PF device
334+
// Get VF devices belonging to a valid QAT PF device
318335
for _, qatPfDevice := range qatPfDevices {
319336
pattern := filepath.Join(qatPfDevice, "virtfn*")
320337
qatVfDevices = append(qatVfDevices, getPciDevicesWithPattern(pattern)...)
321338
}
322339

323-
if len(qatVfDevices) > 0 {
340+
if len(qatPfDevices) > 0 {
341+
if len(qatVfDevices) >= dp.maxDevices {
342+
return qatVfDevices[:dp.maxDevices]
343+
}
344+
324345
return qatVfDevices
325346
}
326347

327-
// No PF devices found, running in a VM?
328-
for _, vfDriver := range append([]string{dp.dpdkDriver}, dp.kernelVfDrivers...) {
329-
pattern := filepath.Join(dp.pciDriverDir, vfDriver, "????:??:??.?")
330-
qatVfDevices = append(qatVfDevices, getPciDevicesWithPattern(pattern)...)
348+
// No PF devices with a QAT driver found, running in a VM?
349+
pattern := filepath.Join(dp.pciDeviceDir, "????:??:??.?")
350+
for _, pciDev := range getPciDevicesWithPattern(pattern) {
351+
devID, err := getDeviceID(pciDev)
352+
if err != nil {
353+
klog.Warningf("unable to read device id for device %s: %q", filepath.Base(pciDev), err)
354+
continue
355+
}
356+
357+
if dp.isValidVfDeviceID(devID) {
358+
qatVfDevices = append(qatVfDevices, pciDev)
359+
}
360+
}
361+
362+
if len(qatVfDevices) >= dp.maxDevices {
363+
return qatVfDevices[:dp.maxDevices]
331364
}
332365

333366
return qatVfDevices
@@ -352,22 +385,15 @@ func (dp *DevicePlugin) scan() (dpapi.DeviceTree, error) {
352385
for _, vfDevice := range dp.getVfDevices() {
353386
vfBdf := filepath.Base(vfDevice)
354387

355-
vfdevID, err := dp.getDeviceID(vfBdf)
356-
if err != nil {
357-
return nil, err
358-
}
359-
360-
if !dp.isValidVfDeviceID(vfdevID) {
361-
continue
362-
}
363-
364-
n = n + 1
365-
if n > dp.maxDevices {
366-
break
367-
}
388+
if drv := getCurrentDriver(vfDevice); drv != dp.dpdkDriver {
389+
if drv != "" {
390+
err := writeToDriver(filepath.Join(dp.pciDriverDir, drv, "unbind"), vfBdf)
391+
if err != nil {
392+
return nil, err
393+
}
394+
}
368395

369-
if getCurrentDriver(vfDevice) != dp.dpdkDriver {
370-
err = dp.bindDevice(vfBdf)
396+
err := writeToDriver(filepath.Join(dp.pciDriverDir, dp.dpdkDriver, "bind"), vfBdf)
371397
if err != nil {
372398
return nil, err
373399
}
@@ -380,6 +406,7 @@ func (dp *DevicePlugin) scan() (dpapi.DeviceTree, error) {
380406

381407
klog.V(1).Infof("Device %s found", vfBdf)
382408

409+
n = n + 1
383410
envs := map[string]string{
384411
fmt.Sprintf("%s%d", envVarPrefix, n): vfBdf,
385412
}

0 commit comments

Comments
 (0)