20 Commits

Author SHA1 Message Date
5c2e53d66b Merge branch 'main' into feature/fix-cilium-network 2025-09-28 18:22:37 +02:00
eb00ba2c5a fix: Make Cilium IPv4 BGP network fully functional including proof-of-concept application (#11)
- Upgraded to
  - Talos v1.11 (newest)
  - Kubernetes v1.34
- Added test application for LB-IPAM
- Fixed IPv4 stack:
  - BGP advertisement of Services (ClusterIP, ExternalIP and LoadBalancerIP)
  - Fixed CoreDNS (integration between Talos HostDNS and Cilium BPF)

Co-authored-by: Marcel Straub <m@straubs.eu>
Reviewed-on: #11
2025-09-13 23:28:37 +02:00
Marcel Straub
2281216562 IPv4 service subnet to 10.96.0.0/16 2025-09-13 23:14:55 +02:00
Marcel Straub
31ade3ad23 fixed LB-IPAM example application 2025-09-13 23:08:27 +02:00
Marcel Straub
db6cc4b9a8 demo application requests bgp-control plane for loadbalancing 2025-09-13 22:35:33 +02:00
Marcel Straub
1048f3d861 Configure BGP advertisement to export all routes 2025-09-13 22:35:11 +02:00
Marcel Straub
01204cfd2a Add small example nginx application to test LBs 2025-09-13 22:20:42 +02:00
Marcel Straub
57485cf6ad Fix: do not define node ipv6 cidr mask if IPv6 is not assigned. Otherwise, cluster will be in boot loop 2025-09-13 21:57:32 +02:00
Marcel Straub
d4d327a3d0 Update to Talos v1.11 2025-09-13 21:13:10 +02:00
Marcel Straub
ea1b7d8df0 documented Pod network setup 2025-09-13 20:35:41 +02:00
Marcel Straub
70370e9514 working ipv4 config 2025-09-13 20:35:30 +02:00
Marcel Straub
9e7485cafc Cilium BPF vs Talos Host DNS fix https://github.com/siderolabs/talos/issues/10002 2025-09-13 20:26:17 +02:00
18941f9eec Enable IPv4 BGP networking (#10)
- IPv4 networking with BGP peering to upstream router
- Ethernet interface MTU configurable (9000 bytes)

Co-authored-by: Marcel Straub <m@straubs.eu>
Reviewed-on: #10
2025-09-08 21:54:03 +02:00
700db1be08 initial network argocd project (#9)
Co-authored-by: Marcel Straub <m@straubs.eu>
Reviewed-on: #9
2025-09-07 16:04:43 +02:00
b3b4bb6361 Define kustomize config for Cilium inlcuding an L2 load balancer pool (#8)
Co-authored-by: Marcel Straub <m@straubs.eu>
Reviewed-on: #8
2025-09-07 15:58:09 +02:00
0bf1a4b536 Added small shell tool for fetching certs of SealedSecrets for backup purpose (#7)
Co-authored-by: Marcel Straub <m@straubs.eu>
Reviewed-on: #7
2025-09-07 15:49:23 +02:00
456692fae3 Added creation of sealed-secrets namespace (#6)
Co-authored-by: Marcel Straub <m@straubs.eu>
Reviewed-on: #6
2025-09-07 15:30:33 +02:00
daa76476a1 Sealed secrets, correct helm-chart version (#5)
Co-authored-by: Marcel Straub <m@straubs.eu>
Reviewed-on: #5
2025-09-07 15:27:13 +02:00
5d758337fd Try other kustomiz.buildOptions for ArgoCd (#4)
Co-authored-by: Marcel Straub <m@straubs.eu>
Reviewed-on: #4
2025-09-07 15:19:53 +02:00
1a685314be Try fixing ArgoCD with Helm+Kustomize (#3)
Co-authored-by: Marcel Straub <m@straubs.eu>
Reviewed-on: #3
2025-09-07 11:51:36 +02:00
25 changed files with 407 additions and 54 deletions

View File

@@ -72,12 +72,14 @@ data "talos_machine_configuration" "this" {
templatefile("${path.module}/machine-config/control_plane.yaml.tftpl", {
mac_address = lower(each.value.mac_address)
vip = var.cluster.vip
cluster_mtu = var.cluster.mtu
extra_manifests = jsonencode(local.extra_manifests)
api_server = var.cluster.api_server
inline_manifests = jsonencode(terraform_data.bootstrap_inline_manifests.output)
}) :
templatefile("${path.module}/machine-config/worker.yaml.tftpl", {
mac_address = lower(each.value.mac_address)
cluster_mtu = var.cluster.mtu
})
]
}

View File

@@ -9,6 +9,12 @@ machine:
overridePath: true
%{endfor}
%{endif}
# https://github.com/siderolabs/talos/issues/10002
# Incompatibility with Cilium BPF routing and Talos Host DNS
features:
hostDNS:
enabled: true
forwardKubeDNSToHost: false
kubelet:
image: ghcr.io/siderolabs/kubelet:${kubernetes_version}
%{if kubelet != ""}
@@ -56,4 +62,11 @@ cluster:
proxy:
image: registry.k8s.io/kube-proxy:${kubernetes_version}
scheduler:
image: registry.k8s.io/kube-scheduler:${kubernetes_version}
image: registry.k8s.io/kube-scheduler:${kubernetes_version}
network:
podSubnets:
- 10.244.0.0/16
# - 2a13:fc80:1:d200::/64
serviceSubnets:
- 10.96.0.0/16
# - 2a13:fc80:1:d201::/64

View File

@@ -5,6 +5,7 @@ machine:
- deviceSelector:
hardwareAddr: ${mac_address}
dhcp: true
mtu: ${cluster_mtu}
%{ if vip != null }
vip:
ip: ${vip}
@@ -19,13 +20,15 @@ cluster:
controllerManager:
extraArgs:
bind-address: 0.0.0.0
node-cidr-mask-size-ipv4: "24"
# node-cidr-mask-size-ipv6: "112"
etcd:
extraArgs:
listen-metrics-urls: http://0.0.0.0:2381
scheduler:
extraArgs:
bind-address: 0.0.0.0
# Let's go with the default network
# Let's go with the default network
network:
cni:
name: none

View File

@@ -3,4 +3,5 @@ machine:
interfaces:
- deviceSelector:
hardwareAddr: ${mac_address}
dhcp: true
dhcp: true
mtu: ${cluster_mtu}

View File

@@ -17,6 +17,7 @@ variable "cluster" {
type = object({
name = string
vip = optional(string)
mtu = optional(number, 1500)
subnet_mask = optional(string, "24")
talos_machine_config_version = optional(string)
proxmox_cluster = string

View File

@@ -8,8 +8,9 @@ talos_cluster_config = {
# http_proxy = "http://100.64.0.1:3128"
# no_proxy = "10.0.0.0/8"
mtu = 9000
vip = "10.51.10.100"
kubernetes_version = "v1.33.3"
kubernetes_version = "v1.34.0"
gateway_api_version = "v1.3.0"
cilium = {

View File

@@ -1,6 +1,6 @@
talos_image = {
version = "v1.10.7"
update_version = "v1.10.7"
version = "v1.11.0"
update_version = "v1.11.0"
schematic_path = "talos/image/schematic.yaml"
#update_schematic_path = "talos/image/schematic.yaml"
}

View File

@@ -34,6 +34,7 @@ variable "talos_cluster_config" {
type = object({
name = string
vip = optional(string)
mtu = optional(number, 1500)
subnet_mask = optional(string, "24")
talos_machine_config_version = optional(string)
proxmox_cluster = string

3
02-k8s/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
# Kube Seal backup
kubeseal.cert
kubeseal.key

View File

@@ -14,6 +14,12 @@ Deployment of ArgoCD itself:
kustomize build --enable-helm infra/controllers/argocd | kubectl apply -f -
```
You can uninstall it with the command
```shell
kustomize build --enable-helm infra/controllers/argocd | kubectl delete -f -
```
Configure ArgoCD to pull our setup from this repo:
```shell
@@ -34,4 +40,19 @@ kubectl port-forward svc/argocd-server -n argocd 8080:443
1. CLI Login with previously retrieved password and username ``admin``:
```shell
argocd login localhost:8080 --insecure
```
## Other
### What's keepign a namespace in state deleting?
```shell
kubectl api-resources --verbs=list --namespaced -o name \
| xargs -n 1 kubectl get --show-kind --ignore-not-found -n argocd
```
### Delete ArgoCD applications stuck in dependencies
```shell
kubectl -nargocd patch applications/controllers -p '{"metadata": {"finalizers": null}}' --type merge
```

9
02-k8s/fetch_kubeseal_certs.sh Executable file
View File

@@ -0,0 +1,9 @@
#!/usr/bin/bash
rm kubeseal.cert kubeseal.key 2> /dev/null
# public cert
(umask 0077 && kubeseal --controller-namespace=sealed-secrets --fetch-cert > kubeseal.cert)
# full cert backup
(umask 0077 && kubectl get secret -n sealed-secrets -l sealedsecrets.bitnami.com/sealed-secrets-key -o yaml > kubeseal.key)

View File

@@ -4,7 +4,7 @@ metadata:
name: infrastructure
namespace: argocd
labels:
dev.stonegarden: infrastructure
s5b.org: infrastructure
spec:
generators:
- git:
@@ -16,7 +16,7 @@ spec:
metadata:
name: '{{ path.basename }}'
labels:
dev.stonegarden: infrastructure
s5b.org: infrastructure
spec:
project: infrastructure
source:

View File

@@ -7,12 +7,9 @@ configs:
create: true
application.resourceTrackingMethod: annotation+label
admin.enabled: true
kustomize.buildOptions: --enable-helm
kustomize.buildOptions: "--enable-helm --load-restrictor LoadRestrictionsNone --enable-alpha-plugins --enable-exec"
# url: https://gitops.prod.k8.eis-mk8.de.s5b.org
cmp:
create: true
params:
controller.diff.server.side: true
server.insecure: false
@@ -84,12 +81,6 @@ repoServer:
enabled: true
containerSecurityContext:
readOnlyRootFilesystem: true
volumes:
- name: cmp-kustomize-build-with-helm
configMap:
name: argocd-cmp-cm
- name: cmp-tmp
emptyDir: { }
resources:
requests:
cpu: 100m

View File

@@ -1,10 +1,13 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ns.yaml
helmCharts:
- name: sealed-secrets
repo: oci://registry-1.docker.io/bitnamicharts
version: 2.17.4
version: 2.5.19
releaseName: sealed-secrets-controller
namespace: sealed-secrets
includeCRDs: true

View File

@@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: sealed-secrets

View File

@@ -0,0 +1,34 @@
apiVersion: argoproj.io/v1alpha1
kind: ApplicationSet
metadata:
name: network
namespace: argocd
labels:
s5b.org: network
spec:
generators:
- git:
repoURL: https://git.straubintra.net/s5b-public/k8s.git
revision: HEAD
directories:
- path: 02-k8s/infra/network/*
template:
metadata:
name: '{{ path.basename }}'
labels:
s5b.org: network
spec:
project: network
source:
repoURL: https://git.straubintra.net/s5b-public/k8s.git
targetRevision: HEAD
path: '{{ path }}'
destination:
name: in-cluster
namespace: argocd
syncPolicy:
automated:
selfHeal: true
prune: true
syncOptions:
- ServerSideApply=true

View File

@@ -0,0 +1,8 @@
apiVersion: cilium.io/v2alpha1
kind: CiliumL2AnnouncementPolicy
metadata:
name: default-l2-announcement-policy
namespace: kube-system
spec:
externalIPs: true
loadBalancerIPs: true

View File

@@ -0,0 +1,120 @@
---
apiVersion: cilium.io/v2
kind: CiliumBGPClusterConfig
metadata:
name: vy-eis-mk8-de-bgp
spec:
nodeSelector:
matchLabels:
kubernetes.io/os: linux
bgpInstances:
- name: "ASN65001"
localASN: 65001
peers:
- name: "vy-eis-mk8-de-1-v6"
peerASN: 65000
peerAddress: 2a13:fc80:1:a::1
peerConfigRef:
name: "vy-eis-mk8-de-bgp-1-peer-config"
# - name: "vy-eis-mk8-de-1-v4"
# peerASN: 65000
# peerAddress: 10.51.10.1
# peerConfigRef:
# name: "vy-eis-mk8-de-bgp-1-peer-config"
---
apiVersion: cilium.io/v2
kind: CiliumBGPPeerConfig
metadata:
name: vy-eis-mk8-de-bgp-1-peer-config
spec:
# Optimized timers for datacenter environments
timers:
connectRetryTimeSeconds: 5
holdTimeSeconds: 9
keepAliveTimeSeconds: 3
# Enable graceful restart for zero-downtime operations
gracefulRestart:
enabled: true
restartTimeSeconds: 15
# Custom transport configuration
transport:
peerPort: 179
# Address family configuration with advertisements
families:
- afi: ipv4
safi: unicast
advertisements:
matchLabels:
advertise: "datacenter-bgp"
- afi: ipv6
safi: unicast
advertisements:
matchLabels:
advertise: "datacenter-bgp"
---
apiVersion: cilium.io/v2
kind: CiliumBGPAdvertisement
metadata:
name: pod-cidr-advertisements
labels:
advertise: "datacenter-bgp"
spec:
advertisements:
- advertisementType: "PodCIDR"
attributes:
communities:
standard: ["65001:100", "65001:200"]
large: ["65001:100:1"]
wellKnown: ["no-export"]
localPreference: 200
- advertisementType: "Service"
service:
addresses:
- ClusterIP
- ExternalIP
- LoadBalancerIP
selector: # select all services by a dummy expression always matching
matchExpressions:
- {key: somekey, operator: NotIn, values: ['never-used-value']}
# That at least works for the PodCIDR
# - advertisementType: "PodCIDR"
# attributes:
# communities:
# standard: ["65001:100", "65001:200"]
# large: ["65001:100:1"]
# wellKnown: ["no-export"]
# localPreference: 200
# # LoadBalancer services
# - advertisementType: "Service"
# service:
# addresses:
# - LoadBalancerIP
# # selector:
# # matchLabels:
# # service-type: "external"
# attributes:
# communities:
# standard: ["65001:300"]
# localPreference: 150
# # ClusterIP services for internal access
# - advertisementType: "Service"
# service:
# addresses:
# - ClusterIP
# # selector:
# # matchExpressions:
# # - key: "internal-bgp"
# # operator: "In"
# # values: ["enabled"]
# attributes:
# communities:
# standard: ["65001:400"]
# localPreference: 100

View File

@@ -0,0 +1,15 @@
apiVersion: cilium.io/v2
kind: CiliumLoadBalancerIPPool
metadata:
name: public-v4-01
spec:
blocks:
- cidr: 185.83.87.48/29
---
apiVersion: cilium.io/v2
kind: CiliumLoadBalancerIPPool
metadata:
name: public-v6-01
spec:
blocks:
- cidr: 2a13:fc80:0001:d201::/64

View File

@@ -0,0 +1,16 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- announce.yaml
- ip-pool.yaml
- bgp.yaml
helmCharts:
- name: cilium
repo: https://helm.cilium.io
version: 1.18.0 # renovate: github-releases=cilium/cilium
releaseName: "cilium"
includeCRDs: true
namespace: kube-system
valuesFile: values.yaml

View File

@@ -18,42 +18,57 @@ cgroup:
enabled: false
hostRoot: /sys/fs/cgroup
bpf:
lbExternalClusterIP: true
# https://www.talos.dev/latest/talos-guides/network/host-dns/#forwarding-kube-dns-to-host-dns
# https://docs.cilium.io/en/stable/operations/performance/tuning/#ebpf-host-routing
bpf:
hostLegacyRouting: true
# hostLegacyRouting: true
# https://docs.cilium.io/en/stable/network/concepts/ipam/
ipam:
mode: kubernetes
multiPoolPreAllocation: ""
# mode: multi-pool
# operator:
# autoCreateCiliumPodIPPools:
# default:
# ipv6:
# cidrs:
# - 2a13:fc80:0001:d200::/64
# maskSize: 120
# # TODO ########!!!!!!!!!!!!!!!!!!!!!!!!!!!!%%%%%%%%%%%%%%%%%%%%%%%%%%%55555
# # The service subnet CIDR.
# serviceSubnets:
# - 10.96.0.0/12
# - 2a13:fc80:0001:d201::/64
# routingMode: native
# k8s:
# requireIPv4PodCIDR: true
# requireIPv6PodCIDR: false
# bgpControlPlane:
# enabled: true
ipv4:
enabled: true
ipv6:
enabled: false
# ipv4:
# enabled: true
# ipv6:
# enabled: true
# Avoid encapsulation for direct access
routingMode: native
# enableIPv4Masquerade: false
# enableIPv6Masquerade: false
#Route distribution gets managed by BGP
bgpControlPlane:
enabled: true
# enable instead of bgpControlPlane
# l2announcements:
# enabled: false
# externalIPs:
# enabled: false
# Only BGP manages the routes
# auto-direct-node-routes: true
# direct-routing-skip-unreachable: true
# The whole internet is directly reachable from each pod
# ipv6-native-routing-cidr: ::/0
ipv4-native-routing-cidr: 10.0.0.0/8
# Disabling DNAT
enableIPv4Masquerade: false
enableIPv6Masquerade: false
enableIPv6BIGTCP: true
bandwidthManager:
enabled: true
bbr: true
#debug:
# enabled: true
operator:
rollOutPods: true
@@ -83,20 +98,11 @@ resources:
cpu: 200m
memory: 512Mi
#debug:
# enabled: true
# Increase rate limit when doing L2 announcements
k8sClientRateLimit:
qps: 20
burst: 100
l2announcements:
enabled: true
externalIPs:
enabled: true
loadBalancer:
# https://docs.cilium.io/en/stable/network/kubernetes/kubeproxy-free/#maglev-consistent-hashing
algorithm: maglev

View File

@@ -0,0 +1,9 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
commonLabels:
s5b.org: network
app.kubernetes.io/managed-by: argocd
resources:
- project.yaml
- application-set.yaml

View File

@@ -0,0 +1,18 @@
apiVersion: argoproj.io/v1alpha1
kind: AppProject
metadata:
name: network
namespace: argocd
spec:
sourceRepos:
- 'https://git.straubintra.net/s5b-public/k8s.git'
destinations:
- namespace: 'argocd'
server: '*'
- namespace: 'kube-system'
server: '*'
- namespace: 'cilium-secrets'
server: '*'
clusterResourceWhitelist:
- group: '*'
kind: '*'

View File

@@ -0,0 +1,49 @@
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app.kubernetes.io/name: load-balancer-example
name: hello-world
spec:
replicas: 3
selector:
matchLabels:
app.kubernetes.io/name: load-balancer-example
template:
metadata:
labels:
app.kubernetes.io/name: load-balancer-example
spec:
containers:
- image: gcr.io/google-samples/hello-app:2.0
name: hello-world
ports:
- containerPort: 8080
name: http-web-svc
securityContext:
seccompProfile:
type: "RuntimeDefault"
allowPrivilegeEscalation: false
runAsNonRoot: true
readOnlyRootFilesystem: true
runAsUser: 1000
capabilities:
drop: ["ALL"]
---
apiVersion: v1
kind: Service
metadata:
name: hello-world-service
namespace: default
labels:
app.kubernetes.io/name: load-balancer-example
spec:
selector:
app.kubernetes.io/name: load-balancer-example
type: LoadBalancer
loadBalancerClass: io.cilium/bgp-control-plane
# ipFamilyPolicy: RequireDualStack
ports:
- protocol: TCP
port: 80
targetPort: http-web-svc

View File

@@ -8,6 +8,8 @@
### Network
#### Node Network
- IPv4 configuration uses DHCP with static MAC binding for easy bring-up
- IPv6 addresses are manually assigned
- DNS Zone: prod.k8.eis-mk8.de.s5b.org
@@ -16,6 +18,29 @@
|--|--|--|
|210|10.51.10.0/23|2a13:fc80:1:a::/64|
#### Pod Network
- IPv4 only
- IPv4 prefix: 10.244.0.0/16
- Potential IPv6 prefix: 2a13:fc80:0001:d200::/64
##### Important side-notes
- DNS resolver (Vyos) these networks must be whitelisted to allow recursive DNS
#### Service Network
- Dual-Stack
- IPv6 Prefix: 2a13:fc80:0001:d201::/64
- IPv4 Prefix: 185.83.87.48/28
#### BGP
|ASN|Who|
|--|--|
|65000|Upstream Router|
|65001|Cluster|
## How to use
### Prerequisites