Skip to content

Commit cceabf8

Browse files
committed
Change auto-egress-ip pkt_mark to be based on VNID, avoid masqueradeBit
If the iptables rules get reordered after a flush, then it's possible that kube-proxy rules that look at MasqueradeBit will accidentally match a packet that was intended for an auto-egress-ip rule. To avoid that, change the code to avoid using that bit of pkt_mark. This means we now only have 31 bits to work with instead of 32, so make the mark be based on the (24-bit) VNID rather than the (32-bit) egress IP address.
1 parent 09939c6 commit cceabf8

File tree

6 files changed

+117
-35
lines changed

6 files changed

+117
-35
lines changed

pkg/cmd/server/kubernetes/network/sdn_linux.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ func NewSDNInterfaces(options configapi.NodeConfig, networkClient networkclient.
5555
KubeInformers: internalKubeInformers,
5656
NetworkInformers: internalNetworkInformers,
5757
IPTablesSyncPeriod: proxyconfig.IPTables.SyncPeriod.Duration,
58+
MasqueradeBit: proxyconfig.IPTables.MasqueradeBit,
5859
ProxyMode: proxyconfig.Mode,
5960
EnableHostports: enableHostports,
6061
Recorder: recorder,

pkg/network/node/egressip.go

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,9 @@ type namespaceEgress struct {
4040
type egressIPWatcher struct {
4141
sync.Mutex
4242

43-
localIP string
44-
oc *ovsController
43+
oc *ovsController
44+
localIP string
45+
masqueradeBit uint32
4546

4647
networkInformers networkinformers.SharedInformerFactory
4748
iptables *NodeIPTables
@@ -60,17 +61,21 @@ type egressIPWatcher struct {
6061
testModeChan chan string
6162
}
6263

63-
func newEgressIPWatcher(localIP string, oc *ovsController) *egressIPWatcher {
64-
return &egressIPWatcher{
65-
localIP: localIP,
64+
func newEgressIPWatcher(oc *ovsController, localIP string, masqueradeBit *int32) *egressIPWatcher {
65+
eip := &egressIPWatcher{
6666
oc: oc,
67+
localIP: localIP,
6768

6869
nodesByNodeIP: make(map[string]*nodeEgress),
6970
nodesByEgressIP: make(map[string]*nodeEgress),
7071

7172
namespacesByVNID: make(map[uint32]*namespaceEgress),
7273
namespacesByEgressIP: make(map[string]*namespaceEgress),
7374
}
75+
if masqueradeBit != nil {
76+
eip.masqueradeBit = 1 << uint32(*masqueradeBit)
77+
}
78+
return eip
7479
}
7580

7681
func (eip *egressIPWatcher) Start(networkInformers networkinformers.SharedInformerFactory, iptables *NodeIPTables) error {
@@ -88,13 +93,16 @@ func (eip *egressIPWatcher) Start(networkInformers networkinformers.SharedInform
8893
return nil
8994
}
9095

91-
func ipToHex(ip string) string {
92-
bytes := net.ParseIP(ip)
93-
if bytes == nil {
94-
return "invalid IP: shouldn't happen"
96+
// Convert vnid to a hex value that is not 0, does not have masqueradeBit set, and isn't
97+
// the same value as would be returned for any other valid vnid.
98+
func getMarkForVNID(vnid, masqueradeBit uint32) string {
99+
if vnid == 0 {
100+
vnid = 0xff000000
101+
}
102+
if (vnid & masqueradeBit) != 0 {
103+
vnid = (vnid | 0x01000000) ^ masqueradeBit
95104
}
96-
bytes = bytes.To4()
97-
return fmt.Sprintf("0x%02x%02x%02x%02x", bytes[0], bytes[1], bytes[2], bytes[3])
105+
return fmt.Sprintf("0x%08x", vnid)
98106
}
99107

100108
func (eip *egressIPWatcher) watchHostSubnets() {
@@ -167,14 +175,14 @@ func (eip *egressIPWatcher) maybeAddEgressIP(egressIP string) {
167175
return
168176
}
169177

170-
hex := ipToHex(egressIP)
178+
mark := getMarkForVNID(ns.vnid, eip.masqueradeBit)
171179
nodeIP := ""
172180

173181
if node != nil && !node.assignedIPs.Has(egressIP) {
174182
node.assignedIPs.Insert(egressIP)
175183
nodeIP = node.nodeIP
176184
if node.nodeIP == eip.localIP {
177-
if err := eip.assignEgressIP(egressIP, hex); err != nil {
185+
if err := eip.assignEgressIP(egressIP, mark); err != nil {
178186
glog.Errorf("Error assigning Egress IP %q: %v", egressIP, err)
179187
nodeIP = ""
180188
}
@@ -185,7 +193,7 @@ func (eip *egressIPWatcher) maybeAddEgressIP(egressIP string) {
185193
ns.assignedIP = egressIP
186194
ns.nodeIP = nodeIP
187195

188-
err := eip.oc.UpdateNamespaceEgressRules(ns.vnid, ns.nodeIP, hex)
196+
err := eip.oc.UpdateNamespaceEgressRules(ns.vnid, ns.nodeIP, mark)
189197
if err != nil {
190198
glog.Errorf("Error updating Namespace egress rules: %v", err)
191199
}
@@ -199,9 +207,9 @@ func (eip *egressIPWatcher) deleteEgressIP(egressIP string) {
199207
return
200208
}
201209

202-
hex := ipToHex(egressIP)
210+
mark := getMarkForVNID(ns.vnid, eip.masqueradeBit)
203211
if node.nodeIP == eip.localIP {
204-
if err := eip.releaseEgressIP(egressIP, hex); err != nil {
212+
if err := eip.releaseEgressIP(egressIP, mark); err != nil {
205213
glog.Errorf("Error releasing Egress IP %q: %v", egressIP, err)
206214
}
207215
}
@@ -217,7 +225,7 @@ func (eip *egressIPWatcher) deleteEgressIP(egressIP string) {
217225
err = eip.oc.UpdateNamespaceEgressRules(ns.vnid, "", "")
218226
} else {
219227
// Namespace still wants EgressIP but no node provides it
220-
err = eip.oc.UpdateNamespaceEgressRules(ns.vnid, "", hex)
228+
err = eip.oc.UpdateNamespaceEgressRules(ns.vnid, "", mark)
221229
}
222230
if err != nil {
223231
glog.Errorf("Error updating Namespace egress rules: %v", err)
@@ -293,7 +301,7 @@ func (eip *egressIPWatcher) deleteNamespaceEgress(vnid uint32) {
293301
delete(eip.namespacesByVNID, vnid)
294302
}
295303

296-
func (eip *egressIPWatcher) assignEgressIP(egressIP, egressHex string) error {
304+
func (eip *egressIPWatcher) assignEgressIP(egressIP, mark string) error {
297305
if egressIP == eip.localIP {
298306
return fmt.Errorf("desired egress IP %q is the node IP", egressIP)
299307
}
@@ -321,14 +329,14 @@ func (eip *egressIPWatcher) assignEgressIP(egressIP, egressHex string) error {
321329
}
322330
}
323331

324-
if err := eip.iptables.AddEgressIPRules(egressIP, egressHex); err != nil {
332+
if err := eip.iptables.AddEgressIPRules(egressIP, mark); err != nil {
325333
return fmt.Errorf("could not add egress IP iptables rule: %v", err)
326334
}
327335

328336
return nil
329337
}
330338

331-
func (eip *egressIPWatcher) releaseEgressIP(egressIP, egressHex string) error {
339+
func (eip *egressIPWatcher) releaseEgressIP(egressIP, mark string) error {
332340
if egressIP == eip.localIP {
333341
return nil
334342
}
@@ -353,7 +361,7 @@ func (eip *egressIPWatcher) releaseEgressIP(egressIP, egressHex string) error {
353361
}
354362
}
355363

356-
if err := eip.iptables.DeleteEgressIPRules(egressIP, egressHex); err != nil {
364+
if err := eip.iptables.DeleteEgressIPRules(egressIP, mark); err != nil {
357365
return fmt.Errorf("could not delete egress IP iptables rule: %v", err)
358366
}
359367

pkg/network/node/egressip_test.go

Lines changed: 77 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ func TestEgressIP(t *testing.T) {
3131
if oc.localIP != "172.17.0.4" {
3232
panic("details of fake ovsController changed")
3333
}
34-
eip := newEgressIPWatcher("172.17.0.4", oc)
34+
masqBit := int32(0)
35+
eip := newEgressIPWatcher(oc, "172.17.0.4", &masqBit)
3536
eip.testModeChan = make(chan string, 10)
3637

3738
eip.updateNodeEgress("172.17.0.3", []string{})
@@ -160,7 +161,7 @@ func TestEgressIP(t *testing.T) {
160161
err = assertFlowChanges(origFlows, flows,
161162
flowChange{
162163
kind: flowAdded,
163-
match: []string{"table=100", "reg0=44", "0xac110066->pkt_mark", "goto_table:101"},
164+
match: []string{"table=100", "reg0=44", "0x0000002c->pkt_mark", "goto_table:101"},
164165
},
165166
)
166167
if err != nil {
@@ -195,7 +196,7 @@ func TestEgressIP(t *testing.T) {
195196
err = assertFlowChanges(origFlows, flows,
196197
flowChange{
197198
kind: flowAdded,
198-
match: []string{"table=100", "reg0=45", "0xac110067->pkt_mark", "goto_table:101"},
199+
match: []string{"table=100", "reg0=45", "0x0100002c->pkt_mark", "goto_table:101"},
199200
},
200201
)
201202
if err != nil {
@@ -216,7 +217,7 @@ func TestEgressIP(t *testing.T) {
216217
err = assertFlowChanges(origFlows, flows,
217218
flowChange{
218219
kind: flowRemoved,
219-
match: []string{"table=100", "reg0=44", "0xac110066->pkt_mark", "goto_table:101"},
220+
match: []string{"table=100", "reg0=44", "0x0000002c->pkt_mark", "goto_table:101"},
220221
},
221222
)
222223
if err != nil {
@@ -262,7 +263,7 @@ func TestEgressIP(t *testing.T) {
262263
err = assertFlowChanges(origFlows, flows,
263264
flowChange{
264265
kind: flowRemoved,
265-
match: []string{"table=100", "reg0=45", "0xac110067->pkt_mark", "goto_table:101"},
266+
match: []string{"table=100", "reg0=45", "0x0100002c->pkt_mark", "goto_table:101"},
266267
},
267268
flowChange{
268269
kind: flowAdded,
@@ -289,3 +290,74 @@ func TestEgressIP(t *testing.T) {
289290
t.Fatalf("Unexpected flow changes: %v", err)
290291
}
291292
}
293+
294+
func TestMarkForVNID(t *testing.T) {
295+
testcases := []struct {
296+
description string
297+
vnid uint32
298+
masqueradeBit uint32
299+
result uint32
300+
}{
301+
{
302+
description: "masqBit in VNID range, but not set in VNID",
303+
vnid: 0x000000aa,
304+
masqueradeBit: 0x00000001,
305+
result: 0x000000aa,
306+
},
307+
{
308+
description: "masqBit in VNID range, and set in VNID",
309+
vnid: 0x000000ab,
310+
masqueradeBit: 0x00000001,
311+
result: 0x010000aa,
312+
},
313+
{
314+
description: "masqBit in VNID range, VNID 0",
315+
vnid: 0x00000000,
316+
masqueradeBit: 0x00000001,
317+
result: 0xff000000,
318+
},
319+
{
320+
description: "masqBit outside of VNID range",
321+
vnid: 0x000000aa,
322+
masqueradeBit: 0x80000000,
323+
result: 0x000000aa,
324+
},
325+
{
326+
description: "masqBit outside of VNID range, VNID 0",
327+
vnid: 0x00000000,
328+
masqueradeBit: 0x80000000,
329+
result: 0x7f000000,
330+
},
331+
{
332+
description: "masqBit == bit 24",
333+
vnid: 0x000000aa,
334+
masqueradeBit: 0x01000000,
335+
result: 0x000000aa,
336+
},
337+
{
338+
description: "masqBit == bit 24, VNID 0",
339+
vnid: 0x00000000,
340+
masqueradeBit: 0x01000000,
341+
result: 0xfe000000,
342+
},
343+
{
344+
description: "no masqBit, ordinary VNID",
345+
vnid: 0x000000aa,
346+
masqueradeBit: 0x00000000,
347+
result: 0x000000aa,
348+
},
349+
{
350+
description: "no masqBit, VNID 0",
351+
vnid: 0x00000000,
352+
masqueradeBit: 0x00000000,
353+
result: 0xff000000,
354+
},
355+
}
356+
357+
for _, tc := range testcases {
358+
result := getMarkForVNID(tc.vnid, tc.masqueradeBit)
359+
if result != fmt.Sprintf("0x%08x", tc.result) {
360+
t.Fatalf("test %q expected %08x got %s", tc.description, tc.result, result)
361+
}
362+
}
363+
}

pkg/network/node/iptables.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -212,9 +212,9 @@ func (n *NodeIPTables) getNodeIPTablesChains() []Chain {
212212
return chainArray
213213
}
214214

215-
func (n *NodeIPTables) AddEgressIPRules(egressIP, egressHex string) error {
215+
func (n *NodeIPTables) AddEgressIPRules(egressIP, mark string) error {
216216
for _, cidr := range n.clusterNetworkCIDR {
217-
_, err := n.ipt.EnsureRule(iptables.Prepend, iptables.TableNAT, iptables.Chain("OPENSHIFT-MASQUERADE"), "-s", cidr, "-m", "mark", "--mark", egressHex, "-j", "SNAT", "--to-source", egressIP)
217+
_, err := n.ipt.EnsureRule(iptables.Prepend, iptables.TableNAT, iptables.Chain("OPENSHIFT-MASQUERADE"), "-s", cidr, "-m", "mark", "--mark", mark, "-j", "SNAT", "--to-source", egressIP)
218218
if err != nil {
219219
return err
220220
}
@@ -223,9 +223,9 @@ func (n *NodeIPTables) AddEgressIPRules(egressIP, egressHex string) error {
223223
return err
224224
}
225225

226-
func (n *NodeIPTables) DeleteEgressIPRules(egressIP, egressHex string) error {
226+
func (n *NodeIPTables) DeleteEgressIPRules(egressIP, mark string) error {
227227
for _, cidr := range n.clusterNetworkCIDR {
228-
err := n.ipt.DeleteRule(iptables.TableNAT, iptables.Chain("OPENSHIFT-MASQUERADE"), "-s", cidr, "-m", "mark", "--mark", egressHex, "-j", "SNAT", "--to-source", egressIP)
228+
err := n.ipt.DeleteRule(iptables.TableNAT, iptables.Chain("OPENSHIFT-MASQUERADE"), "-s", cidr, "-m", "mark", "--mark", mark, "-j", "SNAT", "--to-source", egressIP)
229229
if err != nil {
230230
return err
231231
}

pkg/network/node/node.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ type OsdnNodeConfig struct {
8484

8585
IPTablesSyncPeriod time.Duration
8686
ProxyMode kubeproxyconfig.ProxyMode
87+
MasqueradeBit *int32
8788
}
8889

8990
type OsdnNode struct {
@@ -181,7 +182,7 @@ func New(c *OsdnNodeConfig) (network.NodeInterface, error) {
181182
hostSubnetMap: make(map[string]*networkapi.HostSubnet),
182183
kubeInformers: c.KubeInformers,
183184
networkInformers: c.NetworkInformers,
184-
egressIP: newEgressIPWatcher(c.SelfIP, oc),
185+
egressIP: newEgressIPWatcher(oc, c.SelfIP, c.MasqueradeBit),
185186

186187
runtimeEndpoint: c.RuntimeEndpoint,
187188
// 2 minutes is the current default value used in kubelet

pkg/network/node/ovscontroller.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ const (
3434
Vxlan0 = "vxlan0"
3535

3636
// rule versioning; increment each time flow rules change
37-
ruleVersion = 5
37+
ruleVersion = 6
3838

3939
ruleVersionTable = 253
4040
)
@@ -681,11 +681,11 @@ func (oc *ovsController) ensureTunMAC() error {
681681
return nil
682682
}
683683

684-
func (oc *ovsController) UpdateNamespaceEgressRules(vnid uint32, nodeIP, egressHex string) error {
684+
func (oc *ovsController) UpdateNamespaceEgressRules(vnid uint32, nodeIP, mark string) error {
685685
otx := oc.ovs.NewTransaction()
686686
otx.DeleteFlows("table=100, reg0=%d", vnid)
687687

688-
if egressHex == "" {
688+
if mark == "" {
689689
// Namespace no longer has an EgressIP; no VNID-specific rules needed
690690
} else if nodeIP == "" {
691691
// Namespace has Egress IP, but it is unavailable, so drop egress traffic
@@ -695,7 +695,7 @@ func (oc *ovsController) UpdateNamespaceEgressRules(vnid uint32, nodeIP, egressH
695695
if err := oc.ensureTunMAC(); err != nil {
696696
return err
697697
}
698-
otx.AddFlow("table=100, priority=100, reg0=%d, ip, actions=set_field:%s->eth_dst,set_field:%s->pkt_mark,goto_table:101", vnid, oc.tunMAC, egressHex)
698+
otx.AddFlow("table=100, priority=100, reg0=%d, ip, actions=set_field:%s->eth_dst,set_field:%s->pkt_mark,goto_table:101", vnid, oc.tunMAC, mark)
699699
} else {
700700
// Remote Egress IP; send via VXLAN
701701
otx.AddFlow("table=100, priority=100, reg0=%d, ip, actions=move:NXM_NX_REG0[]->NXM_NX_TUN_ID[0..31],set_field:%s->tun_dst,output:1", vnid, nodeIP)

0 commit comments

Comments
 (0)