Skip to content

Commit 54d0a22

Browse files
chore: comment the code and simplify some things
this commit comments the low and high utilization plugins. this also simplifies a little bit where it was possible without affecting too much.
1 parent 87ba84b commit 54d0a22

File tree

3 files changed

+658
-393
lines changed

3 files changed

+658
-393
lines changed

pkg/framework/plugins/nodeutilization/highnodeutilization.go

Lines changed: 120 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -34,203 +34,227 @@ import (
3434

3535
const HighNodeUtilizationPluginName = "HighNodeUtilization"
3636

37-
// HighNodeUtilization evicts pods from under utilized nodes so that scheduler can schedule according to its plugin.
38-
// Note that CPU/Memory requests are used to calculate nodes' utilization and not the actual resource usage.
37+
// this lines makes sure that HighNodeUtilization implements the BalancePlugin
38+
// interface.
39+
var _ frameworktypes.BalancePlugin = &HighNodeUtilization{}
3940

41+
// HighNodeUtilization evicts pods from under utilized nodes so that scheduler
42+
// can schedule according to its plugin. Note that CPU/Memory requests are used
43+
// to calculate nodes' utilization and not the actual resource usage.
4044
type HighNodeUtilization struct {
41-
handle frameworktypes.Handle
42-
args *HighNodeUtilizationArgs
43-
podFilter func(pod *v1.Pod) bool
44-
underutilizationCriteria []interface{}
45-
resourceNames []v1.ResourceName
46-
extendedResourceNames []v1.ResourceName
47-
targetThresholds api.ResourceThresholds
48-
usageClient usageClient
45+
handle frameworktypes.Handle
46+
args *HighNodeUtilizationArgs
47+
podFilter func(pod *v1.Pod) bool
48+
criteria []any
49+
resourceNames []v1.ResourceName
50+
highThresholds api.ResourceThresholds
51+
usageClient usageClient
4952
}
5053

51-
var _ frameworktypes.BalancePlugin = &HighNodeUtilization{}
52-
53-
// NewHighNodeUtilization builds plugin from its arguments while passing a handle
54-
func NewHighNodeUtilization(args runtime.Object, handle frameworktypes.Handle) (frameworktypes.Plugin, error) {
55-
highNodeUtilizatioArgs, ok := args.(*HighNodeUtilizationArgs)
54+
// NewHighNodeUtilization builds plugin from its arguments while passing a handle.
55+
func NewHighNodeUtilization(
56+
genericArgs runtime.Object, handle frameworktypes.Handle,
57+
) (frameworktypes.Plugin, error) {
58+
args, ok := genericArgs.(*HighNodeUtilizationArgs)
5659
if !ok {
57-
return nil, fmt.Errorf("want args to be of type HighNodeUtilizationArgs, got %T", args)
60+
return nil, fmt.Errorf(
61+
"want args to be of type HighNodeUtilizationArgs, got %T",
62+
genericArgs,
63+
)
5864
}
5965

60-
targetThresholds := make(api.ResourceThresholds)
61-
setDefaultForThresholds(highNodeUtilizatioArgs.Thresholds, targetThresholds)
62-
resourceNames := getResourceNames(highNodeUtilizatioArgs.Thresholds)
63-
64-
underutilizationCriteria := []interface{}{
65-
"CPU", highNodeUtilizatioArgs.Thresholds[v1.ResourceCPU],
66-
"Mem", highNodeUtilizatioArgs.Thresholds[v1.ResourceMemory],
67-
"Pods", highNodeUtilizatioArgs.Thresholds[v1.ResourcePods],
66+
// this plugins worries only about thresholds but the nodeplugins
67+
// package was made to take two thresholds into account, one for low
68+
// and another for high usage. here we make sure we set the high
69+
// threshold to the maximum value for all resources for which we have a
70+
// threshold.
71+
highThresholds := make(api.ResourceThresholds)
72+
for rname := range args.Thresholds {
73+
highThresholds[rname] = MaxResourcePercentage
6874
}
69-
for name := range highNodeUtilizatioArgs.Thresholds {
70-
if !nodeutil.IsBasicResource(name) {
71-
underutilizationCriteria = append(underutilizationCriteria, string(name), int64(highNodeUtilizatioArgs.Thresholds[name]))
72-
}
75+
76+
// criteria is a list of thresholds that are used to determine if a node
77+
// is underutilized. it is used only for logging purposes.
78+
criteria := []any{}
79+
for rname, rvalue := range args.Thresholds {
80+
criteria = append(criteria, rname, rvalue)
7381
}
7482

75-
podFilter, err := podutil.NewOptions().
83+
podFilter, err := podutil.
84+
NewOptions().
7685
WithFilter(handle.Evictor().Filter).
7786
BuildFilterFunc()
7887
if err != nil {
7988
return nil, fmt.Errorf("error initializing pod filter function: %v", err)
8089
}
8190

82-
extendedResourceNames := uniquifyResourceNames(
83-
append(resourceNames, v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods),
91+
// resourceNames is a list of all resource names this plugin cares
92+
// about. we care about the resources for which we have a threshold and
93+
// all we consider the basic resources (cpu, memory, pods).
94+
resourceNames := uniquifyResourceNames(
95+
append(
96+
getResourceNames(args.Thresholds),
97+
v1.ResourceCPU,
98+
v1.ResourceMemory,
99+
v1.ResourcePods,
100+
),
84101
)
85102

86103
return &HighNodeUtilization{
87-
handle: handle,
88-
args: highNodeUtilizatioArgs,
89-
resourceNames: resourceNames,
90-
extendedResourceNames: extendedResourceNames,
91-
targetThresholds: targetThresholds,
92-
underutilizationCriteria: underutilizationCriteria,
93-
podFilter: podFilter,
94-
usageClient: newRequestedUsageClient(extendedResourceNames, handle.GetPodsAssignedToNodeFunc()),
104+
handle: handle,
105+
args: args,
106+
resourceNames: resourceNames,
107+
highThresholds: highThresholds,
108+
criteria: criteria,
109+
podFilter: podFilter,
110+
usageClient: newRequestedUsageClient(
111+
resourceNames,
112+
handle.GetPodsAssignedToNodeFunc(),
113+
),
95114
}, nil
96115
}
97116

98-
// Name retrieves the plugin name
117+
// Name retrieves the plugin name.
99118
func (h *HighNodeUtilization) Name() string {
100119
return HighNodeUtilizationPluginName
101120
}
102121

103-
// Balance extension point implementation for the plugin
122+
// Balance holds the main logic of the plugin. It evicts pods from under
123+
// utilized nodes. The goal here is to concentrate pods in fewer nodes so that
124+
// less nodes are used.
104125
func (h *HighNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *frameworktypes.Status {
105126
if err := h.usageClient.sync(ctx, nodes); err != nil {
106127
return &frameworktypes.Status{
107128
Err: fmt.Errorf("error getting node usage: %v", err),
108129
}
109130
}
110131

132+
// take a picture of the current state of the nodes, everything else
133+
// here is based on this snapshot.
111134
nodesMap, nodesUsageMap, podListMap := getNodeUsageSnapshot(nodes, h.usageClient)
112135
capacities := referencedResourceListForNodesCapacity(nodes)
113136

137+
// node usages are not presented as percentages over the capacity.
138+
// we need to normalize them to be able to compare them with the
139+
// thresholds. thresholds are already provided by the user in
140+
// percentage.
114141
usage, thresholds := assessNodesUsagesAndStaticThresholds(
115-
nodesUsageMap,
116-
capacities,
117-
h.args.Thresholds,
118-
h.targetThresholds,
142+
nodesUsageMap, capacities, h.args.Thresholds, h.highThresholds,
119143
)
120144

145+
// classify nodes in two groups: underutilized and schedulable. we will
146+
// later try to move pods from the first group to the second.
121147
nodeGroups := classifyNodeUsage(
122-
usage,
123-
thresholds,
148+
usage, thresholds,
124149
[]classifierFnc{
125-
// underutilized nodes
150+
// underutilized nodes.
126151
func(nodeName string, usage, threshold api.ResourceThresholds) bool {
127152
return isNodeBelowThreshold(usage, threshold)
128153
},
129-
// every other node that is schedulable
154+
// schedulable nodes.
130155
func(nodeName string, usage, threshold api.ResourceThresholds) bool {
131156
if nodeutil.IsNodeUnschedulable(nodesMap[nodeName]) {
132-
klog.V(2).InfoS("Node is unschedulable", "node", klog.KObj(nodesMap[nodeName]))
157+
klog.V(2).InfoS(
158+
"Node is unschedulable",
159+
"node", klog.KObj(nodesMap[nodeName]),
160+
)
133161
return false
134162
}
135163
return true
136164
},
137165
},
138166
)
139167

140-
// convert groups node []NodeInfo
168+
// the nodeplugin package works by means of NodeInfo structures. these
169+
// structures hold a series of information about the nodes. now that
170+
// we have classified the nodes, we can build the NodeInfo structures
171+
// for each group. NodeInfo structs carry usage and available resources
172+
// for each node.
141173
nodeInfos := make([][]NodeInfo, 2)
142174
category := []string{"underutilized", "overutilized"}
143175
for i := range nodeGroups {
144176
for nodeName := range nodeGroups[i] {
145177
klog.InfoS(
146-
fmt.Sprintf("Node is %s", category[i]),
178+
"Node has been classified",
179+
"category", category[i],
147180
"node", klog.KObj(nodesMap[nodeName]),
148181
"usage", nodesUsageMap[nodeName],
149182
"usagePercentage", normalizer.Round(usage[nodeName]),
150183
)
151184
nodeInfos[i] = append(nodeInfos[i], NodeInfo{
152185
NodeUsage: NodeUsage{
153186
node: nodesMap[nodeName],
154-
usage: nodesUsageMap[nodeName], // get back the original node usage
187+
usage: nodesUsageMap[nodeName],
155188
allPods: podListMap[nodeName],
156189
},
157-
thresholds: NodeThresholds{
158-
lowResourceThreshold: resourceThresholdsToNodeUsage(thresholds[nodeName][0], capacities[nodeName], h.extendedResourceNames),
159-
highResourceThreshold: resourceThresholdsToNodeUsage(thresholds[nodeName][1], capacities[nodeName], h.extendedResourceNames),
160-
},
190+
available: capNodeCapacitiesToThreshold(
191+
nodesMap[nodeName],
192+
thresholds[nodeName][1],
193+
h.resourceNames,
194+
),
161195
})
162196
}
163197
}
164198

165-
sourceNodes := nodeInfos[0]
166-
highNodes := nodeInfos[1]
199+
lowNodes, schedulableNodes := nodeInfos[0], nodeInfos[1]
167200

168-
// log message in one line
169-
klog.V(1).InfoS("Criteria for a node below target utilization", h.underutilizationCriteria...)
170-
klog.V(1).InfoS("Number of underutilized nodes", "totalNumber", len(sourceNodes))
201+
klog.V(1).InfoS("Criteria for a node below target utilization", h.criteria...)
202+
klog.V(1).InfoS("Number of underutilized nodes", "totalNumber", len(lowNodes))
171203

172-
if len(sourceNodes) == 0 {
173-
klog.V(1).InfoS("No node is underutilized, nothing to do here, you might tune your thresholds further")
204+
if len(lowNodes) == 0 {
205+
klog.V(1).InfoS(
206+
"No node is underutilized, nothing to do here, you might tune your thresholds further",
207+
)
174208
return nil
175209
}
176-
if len(sourceNodes) <= h.args.NumberOfNodes {
177-
klog.V(1).InfoS("Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here", "underutilizedNodes", len(sourceNodes), "numberOfNodes", h.args.NumberOfNodes)
210+
211+
if len(lowNodes) <= h.args.NumberOfNodes {
212+
klog.V(1).InfoS(
213+
"Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here",
214+
"underutilizedNodes", len(lowNodes),
215+
"numberOfNodes", h.args.NumberOfNodes,
216+
)
178217
return nil
179218
}
180-
if len(sourceNodes) == len(nodes) {
219+
220+
if len(lowNodes) == len(nodes) {
181221
klog.V(1).InfoS("All nodes are underutilized, nothing to do here")
182222
return nil
183223
}
184-
if len(highNodes) == 0 {
224+
225+
if len(schedulableNodes) == 0 {
185226
klog.V(1).InfoS("No node is available to schedule the pods, nothing to do here")
186227
return nil
187228
}
188229

189-
// stop if the total available usage has dropped to zero - no more pods can be scheduled
190-
continueEvictionCond := func(nodeInfo NodeInfo, totalAvailableUsage api.ReferencedResourceList) bool {
191-
for name := range totalAvailableUsage {
192-
if totalAvailableUsage[name].CmpInt64(0) < 1 {
230+
// stops the eviction process if the total available capacity sage has
231+
// dropped to zero - no more pods can be scheduled. this will signalize
232+
// to stop if any of the available resources has dropped to zero.
233+
continueEvictionCond := func(_ NodeInfo, avail api.ReferencedResourceList) bool {
234+
for name := range avail {
235+
if avail[name].CmpInt64(0) < 1 {
193236
return false
194237
}
195238
}
196-
197239
return true
198240
}
199241

200-
// Sort the nodes by the usage in ascending order
201-
sortNodesByUsage(sourceNodes, true)
242+
// sorts the nodes by the usage in ascending order.
243+
sortNodesByUsage(lowNodes, true)
202244

203245
evictPodsFromSourceNodes(
204246
ctx,
205247
h.args.EvictableNamespaces,
206-
sourceNodes,
207-
highNodes,
248+
lowNodes,
249+
schedulableNodes,
208250
h.handle.Evictor(),
209251
evictions.EvictOptions{StrategyName: HighNodeUtilizationPluginName},
210252
h.podFilter,
211-
h.extendedResourceNames,
253+
h.resourceNames,
212254
continueEvictionCond,
213255
h.usageClient,
214256
nil,
215257
)
216258

217259
return nil
218260
}
219-
220-
func setDefaultForThresholds(thresholds, targetThresholds api.ResourceThresholds) {
221-
if _, ok := thresholds[v1.ResourcePods]; ok {
222-
targetThresholds[v1.ResourcePods] = MaxResourcePercentage
223-
}
224-
if _, ok := thresholds[v1.ResourceCPU]; ok {
225-
targetThresholds[v1.ResourceCPU] = MaxResourcePercentage
226-
}
227-
if _, ok := thresholds[v1.ResourceMemory]; ok {
228-
targetThresholds[v1.ResourceMemory] = MaxResourcePercentage
229-
}
230-
231-
for name := range thresholds {
232-
if !nodeutil.IsBasicResource(name) {
233-
targetThresholds[name] = MaxResourcePercentage
234-
}
235-
}
236-
}

0 commit comments

Comments
 (0)