Skip to content

Commit 6aa1a47

Browse files
authored
Merge pull request #638 from uniemimu/fractional
gpu_plugin: fractional resource management
2 parents 37daf67 + f41ecf3 commit 6aa1a47

20 files changed

+877
-11
lines changed

DEVEL.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,13 @@ Optionally, your device plugin may also implement the
6464
before they are sent to `kubelet`. To see an example, refer to the FPGA
6565
plugin which implements this interface to annotate its responses.
6666

67+
In case you want to implement the whole allocation functionality in your
68+
device plugin, you can implement the optional `deviceplugin.Allocator`
69+
interface. In this case `PostAllocate()` is not called. But if you decide in your
70+
implementation of `deviceplugin.Allocator` that you need to resort to the default
71+
implementation of the allocation functionality then return an error of the type
72+
`deviceplugin.UseDefaultMethodError`.
73+
6774
### Logging
6875

6976
The framework uses [`klog`](https://github.com/kubernetes/klog) as its logging

cmd/gpu_plugin/README.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,30 @@ $ kubectl apply -k deployments/gpu_plugin/overlays/nfd_labeled_nodes
119119
daemonset.apps/intel-gpu-plugin created
120120
```
121121

122+
The experimental fractional-resource feature can be enabled by running:
123+
124+
```bash
125+
$ kubectl apply -k deployments/gpu_plugin/overlays/fractional_resources
126+
serviceaccount/resource-reader-sa created
127+
clusterrole.rbac.authorization.k8s.io/resource-reader created
128+
clusterrolebinding.rbac.authorization.k8s.io/resource-reader-rb created
129+
daemonset.apps/intel-gpu-plugin created
130+
```
131+
132+
Usage of fractional GPU resources, such as GPU memory, requires that the cluster has node
133+
extended resources with the name prefix `gpu.intel.com/`. Those can be created with NFD
134+
by running the hook installed by the plugin initcontainer. When fractional resources are
135+
enabled, the plugin lets a scheduler extender do card selection decisions based on resource
136+
availability and the amount of extended resources requested in the pod spec.
137+
138+
The scheduler extender then needs to annotate the pod objects with unique
139+
increasing numeric timestamps in the annotation `gas-ts` and container card selections in
140+
`gas-container-cards` annotation. The latter has container separator `|` and card separator
141+
`,`. Example for a pod with two containers and both containers getting two cards:
142+
`gas-container-cards:card0,card1|card2,card3`. Enabling the fractional-resource support
143+
in the plugin without running such an annotation adding scheduler extender in the cluster
144+
will only slow down GPU-deployments, so do not enable this feature unnecessarily.
145+
122146
> **Note**: It is also possible to run the GPU device plugin using a non-root user. To do this,
123147
the nodes' DAC rules must be configured to device plugin socket creation and kubelet registration.
124148
Furthermore, the deployments `securityContext` must be configured with appropriate `runAsUser/runAsGroup`.

cmd/gpu_plugin/gpu_plugin.go

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
"k8s.io/klog/v2"
2929
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
3030

31+
"github.com/intel/intel-device-plugins-for-kubernetes/cmd/gpu_plugin/rm"
3132
dpapi "github.com/intel/intel-device-plugins-for-kubernetes/pkg/deviceplugin"
3233
)
3334

@@ -51,8 +52,9 @@ const (
5152
)
5253

5354
type cliOptions struct {
54-
sharedDevNum int
55-
enableMonitoring bool
55+
sharedDevNum int
56+
enableMonitoring bool
57+
resourceManagement bool
5658
}
5759

5860
type devicePlugin struct {
@@ -66,10 +68,12 @@ type devicePlugin struct {
6668

6769
scanTicker *time.Ticker
6870
scanDone chan bool
71+
72+
resMan rm.ResourceManager
6973
}
7074

7175
func newDevicePlugin(sysfsDir, devfsDir string, options cliOptions) *devicePlugin {
72-
return &devicePlugin{
76+
dp := &devicePlugin{
7377
sysfsDir: sysfsDir,
7478
devfsDir: devfsDir,
7579
options: options,
@@ -78,6 +82,17 @@ func newDevicePlugin(sysfsDir, devfsDir string, options cliOptions) *devicePlugi
7882
scanTicker: time.NewTicker(scanPeriod),
7983
scanDone: make(chan bool, 1), // buffered as we may send to it before Scan starts receiving from it
8084
}
85+
86+
if options.resourceManagement {
87+
var err error
88+
dp.resMan, err = rm.NewResourceManager(monitorID, namespace+"/"+deviceType)
89+
if err != nil {
90+
klog.Errorf("Failed to create resource manager: %+v", err)
91+
return nil
92+
}
93+
}
94+
95+
return dp
8196
}
8297

8398
func (dp *devicePlugin) Scan(notifier dpapi.Notifier) error {
@@ -131,6 +146,7 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) {
131146

132147
var monitor []pluginapi.DeviceSpec
133148
devTree := dpapi.NewDeviceTree()
149+
rmDevInfos := rm.NewDeviceInfoMap()
134150
for _, f := range files {
135151
var nodes []pluginapi.DeviceSpec
136152

@@ -179,6 +195,7 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) {
179195
// Currently only one device type (i915) is supported.
180196
// TODO: check model ID to differentiate device models.
181197
devTree.AddDevice(deviceType, devID, deviceInfo)
198+
rmDevInfos[devID] = rm.NewDeviceInfo(nodes, nil, nil)
182199
}
183200
}
184201
}
@@ -188,18 +205,36 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) {
188205
devTree.AddDevice(monitorType, monitorID, deviceInfo)
189206
}
190207

208+
if dp.resMan != nil {
209+
dp.resMan.SetDevInfos(rmDevInfos)
210+
}
211+
191212
return devTree, nil
192213
}
193214

215+
func (dp *devicePlugin) Allocate(request *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) {
216+
if dp.resMan != nil {
217+
return dp.resMan.ReallocateWithFractionalResources(request)
218+
}
219+
220+
return nil, &dpapi.UseDefaultMethodError{}
221+
}
222+
194223
func main() {
195224
var opts cliOptions
196225

197226
flag.BoolVar(&opts.enableMonitoring, "enable-monitoring", false, "whether to enable 'i915_monitoring' (= all GPUs) resource")
227+
flag.BoolVar(&opts.resourceManagement, "resource-manager", false, "fractional GPU resource management")
198228
flag.IntVar(&opts.sharedDevNum, "shared-dev-num", 1, "number of containers sharing the same GPU device")
199229
flag.Parse()
200230

201231
if opts.sharedDevNum < 1 {
202-
klog.Warning("The number of containers sharing the same GPU must greater than zero")
232+
klog.Error("The number of containers sharing the same GPU must greater than zero")
233+
os.Exit(1)
234+
}
235+
236+
if opts.sharedDevNum == 1 && opts.resourceManagement {
237+
klog.Error("Trying to use fractional resources with shared-dev-num 1 is pointless")
203238
os.Exit(1)
204239
}
205240

cmd/gpu_plugin/gpu_plugin_test.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ import (
2121
"testing"
2222

2323
"github.com/pkg/errors"
24+
"k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
2425

26+
"github.com/intel/intel-device-plugins-for-kubernetes/cmd/gpu_plugin/rm"
2527
dpapi "github.com/intel/intel-device-plugins-for-kubernetes/pkg/deviceplugin"
2628
)
2729

@@ -43,6 +45,13 @@ func (n *mockNotifier) Notify(newDeviceTree dpapi.DeviceTree) {
4345
n.scanDone <- true
4446
}
4547

48+
type mockResourceManager struct{}
49+
50+
func (m *mockResourceManager) ReallocateWithFractionalResources(*v1beta1.AllocateRequest) (*v1beta1.AllocateResponse, error) {
51+
return &v1beta1.AllocateResponse{}, &dpapi.UseDefaultMethodError{}
52+
}
53+
func (m *mockResourceManager) SetDevInfos(rm.DeviceInfoMap) {}
54+
4655
func createTestFiles(root string, devfsdirs, sysfsdirs []string, sysfsfiles map[string][]byte) (string, string, error) {
4756
sysfs := path.Join(root, "sys")
4857
devfs := path.Join(root, "dev")
@@ -65,6 +74,30 @@ func createTestFiles(root string, devfsdirs, sysfsdirs []string, sysfsfiles map[
6574
return sysfs, devfs, nil
6675
}
6776

77+
func TestNewDevicePlugin(t *testing.T) {
78+
if newDevicePlugin("", "", cliOptions{sharedDevNum: 2, resourceManagement: false}) == nil {
79+
t.Error("Failed to create plugin")
80+
}
81+
if newDevicePlugin("", "", cliOptions{sharedDevNum: 2, resourceManagement: true}) != nil {
82+
t.Error("Unexpectedly managed to create resource management enabled plugin inside unit tests")
83+
}
84+
}
85+
86+
func TestAllocate(t *testing.T) {
87+
plugin := newDevicePlugin("", "", cliOptions{sharedDevNum: 2, resourceManagement: false})
88+
_, err := plugin.Allocate(&v1beta1.AllocateRequest{})
89+
if _, ok := err.(*dpapi.UseDefaultMethodError); !ok {
90+
t.Errorf("Unexpected return value: %+v", err)
91+
}
92+
93+
// mock the rm
94+
plugin.resMan = &mockResourceManager{}
95+
_, err = plugin.Allocate(&v1beta1.AllocateRequest{})
96+
if _, ok := err.(*dpapi.UseDefaultMethodError); !ok {
97+
t.Errorf("Unexpected return value: %+v", err)
98+
}
99+
}
100+
68101
func TestScan(t *testing.T) {
69102
tcases := []struct {
70103
name string
@@ -203,6 +236,8 @@ func TestScan(t *testing.T) {
203236
scanDone: plugin.scanDone,
204237
}
205238

239+
plugin.resMan = &mockResourceManager{}
240+
206241
err = plugin.Scan(notifier)
207242
// Scans in GPU plugin never fail
208243
if err != nil {

0 commit comments

Comments
 (0)