3 Commits

Author SHA1 Message Date
eb00ba2c5a fix: Make Cilium IPv4 BGP network fully functional including proof-of-concept application (#11)
- Upgraded to
  - Talos v1.11 (newest)
  - Kubernetes v1.34
- Added test application for LB-IPAM
- Fixed IPv4 stack:
  - BGP advertisement of Services (ClusterIP, ExternalIP and LoadBalancerIP)
  - Fixed CoreDNS (integration between Talos HostDNS and Cilium BPF)

Co-authored-by: Marcel Straub <m@straubs.eu>
Reviewed-on: #11
2025-09-13 23:28:37 +02:00
18941f9eec Enable IPv4 BGP networking (#10)
- IPv4 networking with BGP peering to upstream router
- Ethernet interface MTU configurable (9000 bytes)

Co-authored-by: Marcel Straub <m@straubs.eu>
Reviewed-on: #10
2025-09-08 21:54:03 +02:00
700db1be08 initial network argocd project (#9)
Co-authored-by: Marcel Straub <m@straubs.eu>
Reviewed-on: #9
2025-09-07 16:04:43 +02:00
17 changed files with 335 additions and 44 deletions

View File

@@ -72,12 +72,14 @@ data "talos_machine_configuration" "this" {
templatefile("${path.module}/machine-config/control_plane.yaml.tftpl", {
mac_address = lower(each.value.mac_address)
vip = var.cluster.vip
cluster_mtu = var.cluster.mtu
extra_manifests = jsonencode(local.extra_manifests)
api_server = var.cluster.api_server
inline_manifests = jsonencode(terraform_data.bootstrap_inline_manifests.output)
}) :
templatefile("${path.module}/machine-config/worker.yaml.tftpl", {
mac_address = lower(each.value.mac_address)
cluster_mtu = var.cluster.mtu
})
]
}

View File

@@ -9,6 +9,12 @@ machine:
overridePath: true
%{endfor}
%{endif}
# https://github.com/siderolabs/talos/issues/10002
# Incompatibility with Cilium BPF routing and Talos Host DNS
features:
hostDNS:
enabled: true
forwardKubeDNSToHost: false
kubelet:
image: ghcr.io/siderolabs/kubelet:${kubernetes_version}
%{if kubelet != ""}
@@ -56,4 +62,11 @@ cluster:
proxy:
image: registry.k8s.io/kube-proxy:${kubernetes_version}
scheduler:
image: registry.k8s.io/kube-scheduler:${kubernetes_version}
image: registry.k8s.io/kube-scheduler:${kubernetes_version}
network:
podSubnets:
- 10.244.0.0/16
# - 2a13:fc80:1:d200::/64
serviceSubnets:
- 10.96.0.0/16
# - 2a13:fc80:1:d201::/64

View File

@@ -5,6 +5,7 @@ machine:
- deviceSelector:
hardwareAddr: ${mac_address}
dhcp: true
mtu: ${cluster_mtu}
%{ if vip != null }
vip:
ip: ${vip}
@@ -19,13 +20,15 @@ cluster:
controllerManager:
extraArgs:
bind-address: 0.0.0.0
node-cidr-mask-size-ipv4: "24"
# node-cidr-mask-size-ipv6: "112"
etcd:
extraArgs:
listen-metrics-urls: http://0.0.0.0:2381
scheduler:
extraArgs:
bind-address: 0.0.0.0
# Let's go with the default network
# Let's go with the default network
network:
cni:
name: none

View File

@@ -3,4 +3,5 @@ machine:
interfaces:
- deviceSelector:
hardwareAddr: ${mac_address}
dhcp: true
dhcp: true
mtu: ${cluster_mtu}

View File

@@ -17,6 +17,7 @@ variable "cluster" {
type = object({
name = string
vip = optional(string)
mtu = optional(number, 1500)
subnet_mask = optional(string, "24")
talos_machine_config_version = optional(string)
proxmox_cluster = string

View File

@@ -8,8 +8,9 @@ talos_cluster_config = {
# http_proxy = "http://100.64.0.1:3128"
# no_proxy = "10.0.0.0/8"
mtu = 9000
vip = "10.51.10.100"
kubernetes_version = "v1.33.3"
kubernetes_version = "v1.34.0"
gateway_api_version = "v1.3.0"
cilium = {

View File

@@ -1,6 +1,6 @@
talos_image = {
version = "v1.10.7"
update_version = "v1.10.7"
version = "v1.11.0"
update_version = "v1.11.0"
schematic_path = "talos/image/schematic.yaml"
#update_schematic_path = "talos/image/schematic.yaml"
}

View File

@@ -34,6 +34,7 @@ variable "talos_cluster_config" {
type = object({
name = string
vip = optional(string)
mtu = optional(number, 1500)
subnet_mask = optional(string, "24")
talos_machine_config_version = optional(string)
proxmox_cluster = string

View File

@@ -0,0 +1,34 @@
apiVersion: argoproj.io/v1alpha1
kind: ApplicationSet
metadata:
name: network
namespace: argocd
labels:
s5b.org: network
spec:
generators:
- git:
repoURL: https://git.straubintra.net/s5b-public/k8s.git
revision: HEAD
directories:
- path: 02-k8s/infra/network/*
template:
metadata:
name: '{{ path.basename }}'
labels:
s5b.org: network
spec:
project: network
source:
repoURL: https://git.straubintra.net/s5b-public/k8s.git
targetRevision: HEAD
path: '{{ path }}'
destination:
name: in-cluster
namespace: argocd
syncPolicy:
automated:
selfHeal: true
prune: true
syncOptions:
- ServerSideApply=true

View File

@@ -0,0 +1,120 @@
---
apiVersion: cilium.io/v2
kind: CiliumBGPClusterConfig
metadata:
name: vy-eis-mk8-de-bgp
spec:
nodeSelector:
matchLabels:
kubernetes.io/os: linux
bgpInstances:
- name: "ASN65001"
localASN: 65001
peers:
- name: "vy-eis-mk8-de-1-v6"
peerASN: 65000
peerAddress: 2a13:fc80:1:a::1
peerConfigRef:
name: "vy-eis-mk8-de-bgp-1-peer-config"
# - name: "vy-eis-mk8-de-1-v4"
# peerASN: 65000
# peerAddress: 10.51.10.1
# peerConfigRef:
# name: "vy-eis-mk8-de-bgp-1-peer-config"
---
apiVersion: cilium.io/v2
kind: CiliumBGPPeerConfig
metadata:
name: vy-eis-mk8-de-bgp-1-peer-config
spec:
# Optimized timers for datacenter environments
timers:
connectRetryTimeSeconds: 5
holdTimeSeconds: 9
keepAliveTimeSeconds: 3
# Enable graceful restart for zero-downtime operations
gracefulRestart:
enabled: true
restartTimeSeconds: 15
# Custom transport configuration
transport:
peerPort: 179
# Address family configuration with advertisements
families:
- afi: ipv4
safi: unicast
advertisements:
matchLabels:
advertise: "datacenter-bgp"
- afi: ipv6
safi: unicast
advertisements:
matchLabels:
advertise: "datacenter-bgp"
---
apiVersion: cilium.io/v2
kind: CiliumBGPAdvertisement
metadata:
name: pod-cidr-advertisements
labels:
advertise: "datacenter-bgp"
spec:
advertisements:
- advertisementType: "PodCIDR"
attributes:
communities:
standard: ["65001:100", "65001:200"]
large: ["65001:100:1"]
wellKnown: ["no-export"]
localPreference: 200
- advertisementType: "Service"
service:
addresses:
- ClusterIP
- ExternalIP
- LoadBalancerIP
selector: # select all services by a dummy expression always matching
matchExpressions:
- {key: somekey, operator: NotIn, values: ['never-used-value']}
# That at least works for the PodCIDR
# - advertisementType: "PodCIDR"
# attributes:
# communities:
# standard: ["65001:100", "65001:200"]
# large: ["65001:100:1"]
# wellKnown: ["no-export"]
# localPreference: 200
# # LoadBalancer services
# - advertisementType: "Service"
# service:
# addresses:
# - LoadBalancerIP
# # selector:
# # matchLabels:
# # service-type: "external"
# attributes:
# communities:
# standard: ["65001:300"]
# localPreference: 150
# # ClusterIP services for internal access
# - advertisementType: "Service"
# service:
# addresses:
# - ClusterIP
# # selector:
# # matchExpressions:
# # - key: "internal-bgp"
# # operator: "In"
# # values: ["enabled"]
# attributes:
# communities:
# standard: ["65001:400"]
# localPreference: 100

View File

@@ -1,8 +1,15 @@
apiVersion: cilium.io/v2
kind: CiliumLoadBalancerIPPool
metadata:
name: internal-v4
name: public-v4-01
spec:
blocks:
- start: 10.51.10.64
stop: 10.51.10.71
- cidr: 185.83.87.48/29
---
apiVersion: cilium.io/v2
kind: CiliumLoadBalancerIPPool
metadata:
name: public-v6-01
spec:
blocks:
- cidr: 2a13:fc80:0001:d201::/64

View File

@@ -4,6 +4,7 @@ kind: Kustomization
resources:
- announce.yaml
- ip-pool.yaml
- bgp.yaml
helmCharts:
- name: cilium

View File

@@ -18,42 +18,57 @@ cgroup:
enabled: false
hostRoot: /sys/fs/cgroup
bpf:
lbExternalClusterIP: true
# https://www.talos.dev/latest/talos-guides/network/host-dns/#forwarding-kube-dns-to-host-dns
# https://docs.cilium.io/en/stable/operations/performance/tuning/#ebpf-host-routing
bpf:
hostLegacyRouting: true
# hostLegacyRouting: true
# https://docs.cilium.io/en/stable/network/concepts/ipam/
ipam:
mode: kubernetes
multiPoolPreAllocation: ""
# mode: multi-pool
# operator:
# autoCreateCiliumPodIPPools:
# default:
# ipv6:
# cidrs:
# - 2a13:fc80:0001:d200::/64
# maskSize: 120
# # TODO ########!!!!!!!!!!!!!!!!!!!!!!!!!!!!%%%%%%%%%%%%%%%%%%%%%%%%%%%55555
# # The service subnet CIDR.
# serviceSubnets:
# - 10.96.0.0/12
# - 2a13:fc80:0001:d201::/64
# routingMode: native
# k8s:
# requireIPv4PodCIDR: true
# requireIPv6PodCIDR: false
# bgpControlPlane:
# enabled: true
ipv4:
enabled: true
ipv6:
enabled: false
# ipv4:
# enabled: true
# ipv6:
# enabled: true
# Avoid encapsulation for direct access
routingMode: native
# enableIPv4Masquerade: false
# enableIPv6Masquerade: false
#Route distribution gets managed by BGP
bgpControlPlane:
enabled: true
# enable instead of bgpControlPlane
# l2announcements:
# enabled: false
# externalIPs:
# enabled: false
# Only BGP manages the routes
# auto-direct-node-routes: true
# direct-routing-skip-unreachable: true
# The whole internet is directly reachable from each pod
# ipv6-native-routing-cidr: ::/0
ipv4-native-routing-cidr: 10.0.0.0/8
# Disabling DNAT
enableIPv4Masquerade: false
enableIPv6Masquerade: false
enableIPv6BIGTCP: true
bandwidthManager:
enabled: true
bbr: true
#debug:
# enabled: true
operator:
rollOutPods: true
@@ -83,20 +98,11 @@ resources:
cpu: 200m
memory: 512Mi
#debug:
# enabled: true
# Increase rate limit when doing L2 announcements
k8sClientRateLimit:
qps: 20
burst: 100
l2announcements:
enabled: true
externalIPs:
enabled: true
loadBalancer:
# https://docs.cilium.io/en/stable/network/kubernetes/kubeproxy-free/#maglev-consistent-hashing
algorithm: maglev

View File

@@ -0,0 +1,9 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
commonLabels:
s5b.org: network
app.kubernetes.io/managed-by: argocd
resources:
- project.yaml
- application-set.yaml

View File

@@ -0,0 +1,18 @@
apiVersion: argoproj.io/v1alpha1
kind: AppProject
metadata:
name: network
namespace: argocd
spec:
sourceRepos:
- 'https://git.straubintra.net/s5b-public/k8s.git'
destinations:
- namespace: 'argocd'
server: '*'
- namespace: 'kube-system'
server: '*'
- namespace: 'cilium-secrets'
server: '*'
clusterResourceWhitelist:
- group: '*'
kind: '*'

View File

@@ -0,0 +1,49 @@
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app.kubernetes.io/name: load-balancer-example
name: hello-world
spec:
replicas: 3
selector:
matchLabels:
app.kubernetes.io/name: load-balancer-example
template:
metadata:
labels:
app.kubernetes.io/name: load-balancer-example
spec:
containers:
- image: gcr.io/google-samples/hello-app:2.0
name: hello-world
ports:
- containerPort: 8080
name: http-web-svc
securityContext:
seccompProfile:
type: "RuntimeDefault"
allowPrivilegeEscalation: false
runAsNonRoot: true
readOnlyRootFilesystem: true
runAsUser: 1000
capabilities:
drop: ["ALL"]
---
apiVersion: v1
kind: Service
metadata:
name: hello-world-service
namespace: default
labels:
app.kubernetes.io/name: load-balancer-example
spec:
selector:
app.kubernetes.io/name: load-balancer-example
type: LoadBalancer
loadBalancerClass: io.cilium/bgp-control-plane
# ipFamilyPolicy: RequireDualStack
ports:
- protocol: TCP
port: 80
targetPort: http-web-svc

View File

@@ -8,6 +8,8 @@
### Network
#### Node Network
- IPv4 configuration uses DHCP with static MAC binding for easy bring-up
- IPv6 addresses are manually assigned
- DNS Zone: prod.k8.eis-mk8.de.s5b.org
@@ -16,6 +18,29 @@
|--|--|--|
|210|10.51.10.0/23|2a13:fc80:1:a::/64|
#### Pod Network
- IPv4 only
- IPv4 prefix: 10.244.0.0/16
- Potential IPv6 prefix: 2a13:fc80:0001:d200::/64
##### Important side-notes
- DNS resolver (Vyos) these networks must be whitelisted to allow recursive DNS
#### Service Network
- Dual-Stack
- IPv6 Prefix: 2a13:fc80:0001:d201::/64
- IPv4 Prefix: 185.83.87.48/28
#### BGP
|ASN|Who|
|--|--|
|65000|Upstream Router|
|65001|Cluster|
## How to use
### Prerequisites