Skip to content

Commit fa5b9da

Browse files
uniemimutkatila
authored andcommitted
randomize cleanup interval and increase it to 20 minutes
Signed-off-by: Ukri Niemimuukko <ukri.niemimuukko@intel.com>
1 parent 09504b1 commit fa5b9da

File tree

1 file changed

+14
-2
lines changed

1 file changed

+14
-2
lines changed

cmd/gpu_plugin/rm/gpu_plugin_resource_manager.go

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ package rm
1616

1717
import (
1818
"context"
19+
"crypto/rand"
20+
"math/big"
1921
"os"
2022
"sort"
2123
"strconv"
@@ -141,17 +143,27 @@ func NewResourceManager(skipID, fullResourceName string) (ResourceManager, error
141143
prGetClientFunc: podresources.GetV1Client,
142144
assignments: make(map[string]podAssignmentDetails),
143145
retryTimeout: 1 * time.Second,
144-
cleanupInterval: 2 * time.Minute,
146+
cleanupInterval: 20 * time.Minute,
145147
}
146148

147149
klog.Info("GPU device plugin resource manager enabled")
148150

149151
go func() {
150-
ticker := time.NewTicker(rm.cleanupInterval)
152+
getRandDuration := func() time.Duration {
153+
cleanupIntervalSeconds := int(rm.cleanupInterval.Seconds())
154+
155+
n, _ := rand.Int(rand.Reader, big.NewInt(int64(cleanupIntervalSeconds)))
156+
157+
return rm.cleanupInterval/2 + time.Duration(n.Int64())*time.Second
158+
}
159+
160+
ticker := time.NewTicker(getRandDuration())
151161

152162
for range ticker.C {
153163
klog.V(4).Info("Running cleanup")
154164

165+
ticker.Reset(getRandDuration())
166+
155167
// Gather both running and pending pods. It might happen that
156168
// cleanup is triggered between GetPreferredAllocation and Allocate
157169
// and it would remove the assignment data for the soon-to-be allocated pod

0 commit comments

Comments
 (0)