@@ -28,6 +28,7 @@ import (
2828 "k8s.io/klog/v2"
2929 pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
3030
31+ "github.com/intel/intel-device-plugins-for-kubernetes/cmd/gpu_plugin/rm"
3132 dpapi "github.com/intel/intel-device-plugins-for-kubernetes/pkg/deviceplugin"
3233)
3334
@@ -51,8 +52,9 @@ const (
5152)
5253
5354type cliOptions struct {
54- sharedDevNum int
55- enableMonitoring bool
55+ sharedDevNum int
56+ enableMonitoring bool
57+ resourceManagement bool
5658}
5759
5860type devicePlugin struct {
@@ -66,10 +68,12 @@ type devicePlugin struct {
6668
6769 scanTicker * time.Ticker
6870 scanDone chan bool
71+
72+ resMan rm.ResourceManager
6973}
7074
7175func newDevicePlugin (sysfsDir , devfsDir string , options cliOptions ) * devicePlugin {
72- return & devicePlugin {
76+ dp := & devicePlugin {
7377 sysfsDir : sysfsDir ,
7478 devfsDir : devfsDir ,
7579 options : options ,
@@ -78,6 +82,17 @@ func newDevicePlugin(sysfsDir, devfsDir string, options cliOptions) *devicePlugi
7882 scanTicker : time .NewTicker (scanPeriod ),
7983 scanDone : make (chan bool , 1 ), // buffered as we may send to it before Scan starts receiving from it
8084 }
85+
86+ if options .resourceManagement {
87+ var err error
88+ dp .resMan , err = rm .NewResourceManager (monitorID , namespace + "/" + deviceType )
89+ if err != nil {
90+ klog .Errorf ("Failed to create resource manager: %+v" , err )
91+ return nil
92+ }
93+ }
94+
95+ return dp
8196}
8297
8398func (dp * devicePlugin ) Scan (notifier dpapi.Notifier ) error {
@@ -131,6 +146,7 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) {
131146
132147 var monitor []pluginapi.DeviceSpec
133148 devTree := dpapi .NewDeviceTree ()
149+ rmDevInfos := rm .NewDeviceInfoMap ()
134150 for _ , f := range files {
135151 var nodes []pluginapi.DeviceSpec
136152
@@ -179,6 +195,7 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) {
179195 // Currently only one device type (i915) is supported.
180196 // TODO: check model ID to differentiate device models.
181197 devTree .AddDevice (deviceType , devID , deviceInfo )
198+ rmDevInfos [devID ] = rm .NewDeviceInfo (nodes , nil , nil )
182199 }
183200 }
184201 }
@@ -188,18 +205,36 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) {
188205 devTree .AddDevice (monitorType , monitorID , deviceInfo )
189206 }
190207
208+ if dp .resMan != nil {
209+ dp .resMan .SetDevInfos (rmDevInfos )
210+ }
211+
191212 return devTree , nil
192213}
193214
215+ func (dp * devicePlugin ) Allocate (request * pluginapi.AllocateRequest ) (* pluginapi.AllocateResponse , error ) {
216+ if dp .resMan != nil {
217+ return dp .resMan .ReallocateWithFractionalResources (request )
218+ }
219+
220+ return nil , & dpapi.UseDefaultMethodError {}
221+ }
222+
194223func main () {
195224 var opts cliOptions
196225
197226 flag .BoolVar (& opts .enableMonitoring , "enable-monitoring" , false , "whether to enable 'i915_monitoring' (= all GPUs) resource" )
227+ flag .BoolVar (& opts .resourceManagement , "resource-manager" , false , "fractional GPU resource management" )
198228 flag .IntVar (& opts .sharedDevNum , "shared-dev-num" , 1 , "number of containers sharing the same GPU device" )
199229 flag .Parse ()
200230
201231 if opts .sharedDevNum < 1 {
202- klog .Warning ("The number of containers sharing the same GPU must greater than zero" )
232+ klog .Error ("The number of containers sharing the same GPU must greater than zero" )
233+ os .Exit (1 )
234+ }
235+
236+ if opts .sharedDevNum == 1 && opts .resourceManagement {
237+ klog .Error ("Trying to use fractional resources with shared-dev-num 1 is pointless" )
203238 os .Exit (1 )
204239 }
205240
0 commit comments