@@ -34,203 +34,227 @@ import (
34
34
35
35
const HighNodeUtilizationPluginName = "HighNodeUtilization"
36
36
37
- // HighNodeUtilization evicts pods from under utilized nodes so that scheduler can schedule according to its plugin.
38
- // Note that CPU/Memory requests are used to calculate nodes' utilization and not the actual resource usage.
37
+ // this lines makes sure that HighNodeUtilization implements the BalancePlugin
38
+ // interface.
39
+ var _ frameworktypes.BalancePlugin = & HighNodeUtilization {}
39
40
41
+ // HighNodeUtilization evicts pods from under utilized nodes so that scheduler
42
+ // can schedule according to its plugin. Note that CPU/Memory requests are used
43
+ // to calculate nodes' utilization and not the actual resource usage.
40
44
type HighNodeUtilization struct {
41
- handle frameworktypes.Handle
42
- args * HighNodeUtilizationArgs
43
- podFilter func (pod * v1.Pod ) bool
44
- underutilizationCriteria []interface {}
45
- resourceNames []v1.ResourceName
46
- extendedResourceNames []v1.ResourceName
47
- targetThresholds api.ResourceThresholds
48
- usageClient usageClient
45
+ handle frameworktypes.Handle
46
+ args * HighNodeUtilizationArgs
47
+ podFilter func (pod * v1.Pod ) bool
48
+ criteria []any
49
+ resourceNames []v1.ResourceName
50
+ highThresholds api.ResourceThresholds
51
+ usageClient usageClient
49
52
}
50
53
51
- var _ frameworktypes. BalancePlugin = & HighNodeUtilization {}
52
-
53
- // NewHighNodeUtilization builds plugin from its arguments while passing a handle
54
- func NewHighNodeUtilization ( args runtime. Object , handle frameworktypes. Handle ) (frameworktypes.Plugin , error ) {
55
- highNodeUtilizatioArgs , ok := args .(* HighNodeUtilizationArgs )
54
+ // NewHighNodeUtilization builds plugin from its arguments while passing a handle.
55
+ func NewHighNodeUtilization (
56
+ genericArgs runtime. Object , handle frameworktypes. Handle ,
57
+ ) (frameworktypes.Plugin , error ) {
58
+ args , ok := genericArgs .(* HighNodeUtilizationArgs )
56
59
if ! ok {
57
- return nil , fmt .Errorf ("want args to be of type HighNodeUtilizationArgs, got %T" , args )
60
+ return nil , fmt .Errorf (
61
+ "want args to be of type HighNodeUtilizationArgs, got %T" ,
62
+ genericArgs ,
63
+ )
58
64
}
59
65
60
- targetThresholds := make (api. ResourceThresholds )
61
- setDefaultForThresholds ( highNodeUtilizatioArgs . Thresholds , targetThresholds )
62
- resourceNames := getResourceNames ( highNodeUtilizatioArgs . Thresholds )
63
-
64
- underutilizationCriteria := [] interface {}{
65
- "CPU" , highNodeUtilizatioArgs . Thresholds [ v1 . ResourceCPU ],
66
- "Mem" , highNodeUtilizatioArgs .Thresholds [ v1 . ResourceMemory ],
67
- "Pods" , highNodeUtilizatioArgs . Thresholds [ v1 . ResourcePods ],
66
+ // this plugins worries only about thresholds but the nodeplugins
67
+ // package was made to take two thresholds into account, one for low
68
+ // and another for high usage. here we make sure we set the high
69
+ // threshold to the maximum value for all resources for which we have a
70
+ // threshold.
71
+ highThresholds := make (api. ResourceThresholds )
72
+ for rname := range args .Thresholds {
73
+ highThresholds [ rname ] = MaxResourcePercentage
68
74
}
69
- for name := range highNodeUtilizatioArgs .Thresholds {
70
- if ! nodeutil .IsBasicResource (name ) {
71
- underutilizationCriteria = append (underutilizationCriteria , string (name ), int64 (highNodeUtilizatioArgs .Thresholds [name ]))
72
- }
75
+
76
+ // criteria is a list of thresholds that are used to determine if a node
77
+ // is underutilized. it is used only for logging purposes.
78
+ criteria := []any {}
79
+ for rname , rvalue := range args .Thresholds {
80
+ criteria = append (criteria , rname , rvalue )
73
81
}
74
82
75
- podFilter , err := podutil .NewOptions ().
83
+ podFilter , err := podutil .
84
+ NewOptions ().
76
85
WithFilter (handle .Evictor ().Filter ).
77
86
BuildFilterFunc ()
78
87
if err != nil {
79
88
return nil , fmt .Errorf ("error initializing pod filter function: %v" , err )
80
89
}
81
90
82
- extendedResourceNames := uniquifyResourceNames (
83
- append (resourceNames , v1 .ResourceCPU , v1 .ResourceMemory , v1 .ResourcePods ),
91
+ // resourceNames is a list of all resource names this plugin cares
92
+ // about. we care about the resources for which we have a threshold and
93
+ // all we consider the basic resources (cpu, memory, pods).
94
+ resourceNames := uniquifyResourceNames (
95
+ append (
96
+ getResourceNames (args .Thresholds ),
97
+ v1 .ResourceCPU ,
98
+ v1 .ResourceMemory ,
99
+ v1 .ResourcePods ,
100
+ ),
84
101
)
85
102
86
103
return & HighNodeUtilization {
87
- handle : handle ,
88
- args : highNodeUtilizatioArgs ,
89
- resourceNames : resourceNames ,
90
- extendedResourceNames : extendedResourceNames ,
91
- targetThresholds : targetThresholds ,
92
- underutilizationCriteria : underutilizationCriteria ,
93
- podFilter : podFilter ,
94
- usageClient : newRequestedUsageClient (extendedResourceNames , handle .GetPodsAssignedToNodeFunc ()),
104
+ handle : handle ,
105
+ args : args ,
106
+ resourceNames : resourceNames ,
107
+ highThresholds : highThresholds ,
108
+ criteria : criteria ,
109
+ podFilter : podFilter ,
110
+ usageClient : newRequestedUsageClient (
111
+ resourceNames ,
112
+ handle .GetPodsAssignedToNodeFunc (),
113
+ ),
95
114
}, nil
96
115
}
97
116
98
- // Name retrieves the plugin name
117
+ // Name retrieves the plugin name.
99
118
func (h * HighNodeUtilization ) Name () string {
100
119
return HighNodeUtilizationPluginName
101
120
}
102
121
103
- // Balance extension point implementation for the plugin
122
+ // Balance holds the main logic of the plugin. It evicts pods from under
123
+ // utilized nodes. The goal here is to concentrate pods in fewer nodes so that
124
+ // less nodes are used.
104
125
func (h * HighNodeUtilization ) Balance (ctx context.Context , nodes []* v1.Node ) * frameworktypes.Status {
105
126
if err := h .usageClient .sync (ctx , nodes ); err != nil {
106
127
return & frameworktypes.Status {
107
128
Err : fmt .Errorf ("error getting node usage: %v" , err ),
108
129
}
109
130
}
110
131
132
+ // take a picture of the current state of the nodes, everything else
133
+ // here is based on this snapshot.
111
134
nodesMap , nodesUsageMap , podListMap := getNodeUsageSnapshot (nodes , h .usageClient )
112
135
capacities := referencedResourceListForNodesCapacity (nodes )
113
136
137
+ // node usages are not presented as percentages over the capacity.
138
+ // we need to normalize them to be able to compare them with the
139
+ // thresholds. thresholds are already provided by the user in
140
+ // percentage.
114
141
usage , thresholds := assessNodesUsagesAndStaticThresholds (
115
- nodesUsageMap ,
116
- capacities ,
117
- h .args .Thresholds ,
118
- h .targetThresholds ,
142
+ nodesUsageMap , capacities , h .args .Thresholds , h .highThresholds ,
119
143
)
120
144
145
+ // classify nodes in two groups: underutilized and schedulable. we will
146
+ // later try to move pods from the first group to the second.
121
147
nodeGroups := classifyNodeUsage (
122
- usage ,
123
- thresholds ,
148
+ usage , thresholds ,
124
149
[]classifierFnc {
125
- // underutilized nodes
150
+ // underutilized nodes.
126
151
func (nodeName string , usage , threshold api.ResourceThresholds ) bool {
127
152
return isNodeBelowThreshold (usage , threshold )
128
153
},
129
- // every other node that is schedulable
154
+ // schedulable nodes.
130
155
func (nodeName string , usage , threshold api.ResourceThresholds ) bool {
131
156
if nodeutil .IsNodeUnschedulable (nodesMap [nodeName ]) {
132
- klog .V (2 ).InfoS ("Node is unschedulable" , "node" , klog .KObj (nodesMap [nodeName ]))
157
+ klog .V (2 ).InfoS (
158
+ "Node is unschedulable" ,
159
+ "node" , klog .KObj (nodesMap [nodeName ]),
160
+ )
133
161
return false
134
162
}
135
163
return true
136
164
},
137
165
},
138
166
)
139
167
140
- // convert groups node []NodeInfo
168
+ // the nodeplugin package works by means of NodeInfo structures. these
169
+ // structures hold a series of information about the nodes. now that
170
+ // we have classified the nodes, we can build the NodeInfo structures
171
+ // for each group. NodeInfo structs carry usage and available resources
172
+ // for each node.
141
173
nodeInfos := make ([][]NodeInfo , 2 )
142
174
category := []string {"underutilized" , "overutilized" }
143
175
for i := range nodeGroups {
144
176
for nodeName := range nodeGroups [i ] {
145
177
klog .InfoS (
146
- fmt .Sprintf ("Node is %s" , category [i ]),
178
+ "Node has been classified" ,
179
+ "category" , category [i ],
147
180
"node" , klog .KObj (nodesMap [nodeName ]),
148
181
"usage" , nodesUsageMap [nodeName ],
149
182
"usagePercentage" , normalizer .Round (usage [nodeName ]),
150
183
)
151
184
nodeInfos [i ] = append (nodeInfos [i ], NodeInfo {
152
185
NodeUsage : NodeUsage {
153
186
node : nodesMap [nodeName ],
154
- usage : nodesUsageMap [nodeName ], // get back the original node usage
187
+ usage : nodesUsageMap [nodeName ],
155
188
allPods : podListMap [nodeName ],
156
189
},
157
- thresholds : NodeThresholds {
158
- lowResourceThreshold : resourceThresholdsToNodeUsage (thresholds [nodeName ][0 ], capacities [nodeName ], h .extendedResourceNames ),
159
- highResourceThreshold : resourceThresholdsToNodeUsage (thresholds [nodeName ][1 ], capacities [nodeName ], h .extendedResourceNames ),
160
- },
190
+ available : capNodeCapacitiesToThreshold (
191
+ nodesMap [nodeName ],
192
+ thresholds [nodeName ][1 ],
193
+ h .resourceNames ,
194
+ ),
161
195
})
162
196
}
163
197
}
164
198
165
- sourceNodes := nodeInfos [0 ]
166
- highNodes := nodeInfos [1 ]
199
+ lowNodes , schedulableNodes := nodeInfos [0 ], nodeInfos [1 ]
167
200
168
- // log message in one line
169
- klog .V (1 ).InfoS ("Criteria for a node below target utilization" , h .underutilizationCriteria ... )
170
- klog .V (1 ).InfoS ("Number of underutilized nodes" , "totalNumber" , len (sourceNodes ))
201
+ klog .V (1 ).InfoS ("Criteria for a node below target utilization" , h .criteria ... )
202
+ klog .V (1 ).InfoS ("Number of underutilized nodes" , "totalNumber" , len (lowNodes ))
171
203
172
- if len (sourceNodes ) == 0 {
173
- klog .V (1 ).InfoS ("No node is underutilized, nothing to do here, you might tune your thresholds further" )
204
+ if len (lowNodes ) == 0 {
205
+ klog .V (1 ).InfoS (
206
+ "No node is underutilized, nothing to do here, you might tune your thresholds further" ,
207
+ )
174
208
return nil
175
209
}
176
- if len (sourceNodes ) <= h .args .NumberOfNodes {
177
- klog .V (1 ).InfoS ("Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here" , "underutilizedNodes" , len (sourceNodes ), "numberOfNodes" , h .args .NumberOfNodes )
210
+
211
+ if len (lowNodes ) <= h .args .NumberOfNodes {
212
+ klog .V (1 ).InfoS (
213
+ "Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here" ,
214
+ "underutilizedNodes" , len (lowNodes ),
215
+ "numberOfNodes" , h .args .NumberOfNodes ,
216
+ )
178
217
return nil
179
218
}
180
- if len (sourceNodes ) == len (nodes ) {
219
+
220
+ if len (lowNodes ) == len (nodes ) {
181
221
klog .V (1 ).InfoS ("All nodes are underutilized, nothing to do here" )
182
222
return nil
183
223
}
184
- if len (highNodes ) == 0 {
224
+
225
+ if len (schedulableNodes ) == 0 {
185
226
klog .V (1 ).InfoS ("No node is available to schedule the pods, nothing to do here" )
186
227
return nil
187
228
}
188
229
189
- // stop if the total available usage has dropped to zero - no more pods can be scheduled
190
- continueEvictionCond := func (nodeInfo NodeInfo , totalAvailableUsage api.ReferencedResourceList ) bool {
191
- for name := range totalAvailableUsage {
192
- if totalAvailableUsage [name ].CmpInt64 (0 ) < 1 {
230
+ // stops the eviction process if the total available capacity sage has
231
+ // dropped to zero - no more pods can be scheduled. this will signalize
232
+ // to stop if any of the available resources has dropped to zero.
233
+ continueEvictionCond := func (_ NodeInfo , avail api.ReferencedResourceList ) bool {
234
+ for name := range avail {
235
+ if avail [name ].CmpInt64 (0 ) < 1 {
193
236
return false
194
237
}
195
238
}
196
-
197
239
return true
198
240
}
199
241
200
- // Sort the nodes by the usage in ascending order
201
- sortNodesByUsage (sourceNodes , true )
242
+ // sorts the nodes by the usage in ascending order.
243
+ sortNodesByUsage (lowNodes , true )
202
244
203
245
evictPodsFromSourceNodes (
204
246
ctx ,
205
247
h .args .EvictableNamespaces ,
206
- sourceNodes ,
207
- highNodes ,
248
+ lowNodes ,
249
+ schedulableNodes ,
208
250
h .handle .Evictor (),
209
251
evictions.EvictOptions {StrategyName : HighNodeUtilizationPluginName },
210
252
h .podFilter ,
211
- h .extendedResourceNames ,
253
+ h .resourceNames ,
212
254
continueEvictionCond ,
213
255
h .usageClient ,
214
256
nil ,
215
257
)
216
258
217
259
return nil
218
260
}
219
-
220
- func setDefaultForThresholds (thresholds , targetThresholds api.ResourceThresholds ) {
221
- if _ , ok := thresholds [v1 .ResourcePods ]; ok {
222
- targetThresholds [v1 .ResourcePods ] = MaxResourcePercentage
223
- }
224
- if _ , ok := thresholds [v1 .ResourceCPU ]; ok {
225
- targetThresholds [v1 .ResourceCPU ] = MaxResourcePercentage
226
- }
227
- if _ , ok := thresholds [v1 .ResourceMemory ]; ok {
228
- targetThresholds [v1 .ResourceMemory ] = MaxResourcePercentage
229
- }
230
-
231
- for name := range thresholds {
232
- if ! nodeutil .IsBasicResource (name ) {
233
- targetThresholds [name ] = MaxResourcePercentage
234
- }
235
- }
236
- }
0 commit comments