first edition that installs the cluster but cluster does not get healthy
This commit is contained in:
9
.gitignore
vendored
9
.gitignore
vendored
@@ -1,2 +1,9 @@
|
||||
# EnvRC files contain secrets that must not be checked in
|
||||
.envrc
|
||||
.envrc
|
||||
|
||||
# OpenTofu working files
|
||||
.terraform
|
||||
output/
|
||||
*.tfstate
|
||||
*.tfstate.*
|
||||
**/*.tfplan
|
||||
|
||||
18
.yamllint.yaml
Normal file
18
.yamllint.yaml
Normal file
@@ -0,0 +1,18 @@
|
||||
extends: default
|
||||
|
||||
yaml-files:
|
||||
- '*.yaml'
|
||||
- '*.yml'
|
||||
|
||||
ignore: |
|
||||
README.md
|
||||
LICENSE
|
||||
secret**.yaml
|
||||
|
||||
rules:
|
||||
document-start: { present: false }
|
||||
brackets:
|
||||
min-spaces-inside: 0
|
||||
max-spaces-inside: 10
|
||||
line-length:
|
||||
allow-non-breakable-inline-mappings: true
|
||||
14
00-infrastructure/01-tofu/cluster_state.sh
Executable file
14
00-infrastructure/01-tofu/cluster_state.sh
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/usr/bin/bash
|
||||
NODES="-n 10.51.10.101 -n 10.51.10.102 -n 10.51.10.102"
|
||||
ENDPOINTS="--endpoints 10.51.10.100"
|
||||
TALOSCONFIG=" --talosconfig=output/talos-config.yaml"
|
||||
|
||||
PARAMETERS="${NODES} ${ENDPOINTS} ${TALOSCONFIG}"
|
||||
echo " === Talos Cluster Members ==="
|
||||
talosctl ${PARAMETERS} get members
|
||||
echo
|
||||
echo " === etcd status ==="
|
||||
talosctl ${PARAMETERS} etcd status
|
||||
echo
|
||||
echo " === Cluster Health ==="
|
||||
talosctl ${ENDPOINTS} ${TALOSCONFIG} -n 10.51.10.101 health
|
||||
13
00-infrastructure/01-tofu/main.tf
Normal file
13
00-infrastructure/01-tofu/main.tf
Normal file
@@ -0,0 +1,13 @@
|
||||
# 00-infrastructure/01-talos-vms/main.tf
|
||||
|
||||
module "talos" {
|
||||
source = "./talos"
|
||||
|
||||
providers = {
|
||||
proxmox = proxmox
|
||||
}
|
||||
|
||||
image = var.talos_image
|
||||
cluster = var.talos_cluster_config
|
||||
nodes = var.talos_nodes
|
||||
}
|
||||
58
00-infrastructure/01-tofu/output.tf
Normal file
58
00-infrastructure/01-tofu/output.tf
Normal file
@@ -0,0 +1,58 @@
|
||||
resource "local_file" "talos_machine_secrets" {
|
||||
content = yamlencode({
|
||||
cluster = module.talos.machine_secrets.cluster
|
||||
secrets = module.talos.machine_secrets.secrets
|
||||
trustdinfo = module.talos.machine_secrets.trustdinfo
|
||||
certs = {
|
||||
etcd = {
|
||||
crt = module.talos.machine_secrets.certs.etcd.cert
|
||||
key = module.talos.machine_secrets.certs.etcd.key
|
||||
}
|
||||
k8s = {
|
||||
crt = module.talos.machine_secrets.certs.k8s.cert
|
||||
key = module.talos.machine_secrets.certs.k8s.key
|
||||
}
|
||||
k8saggregator = {
|
||||
crt = module.talos.machine_secrets.certs.k8s_aggregator.cert
|
||||
key = module.talos.machine_secrets.certs.k8s_aggregator.key
|
||||
}
|
||||
k8sserviceaccount = {
|
||||
key = module.talos.machine_secrets.certs.k8s_serviceaccount.key
|
||||
}
|
||||
os = {
|
||||
crt = module.talos.machine_secrets.certs.os.cert
|
||||
key = module.talos.machine_secrets.certs.os.key
|
||||
}
|
||||
}
|
||||
})
|
||||
filename = "output/talos-machine-secrets.yaml"
|
||||
}
|
||||
|
||||
resource "local_file" "talos_machine_configs" {
|
||||
for_each = module.talos.machine_config
|
||||
content = each.value.machine_configuration
|
||||
filename = "output/talos-machine-config-${each.key}.yaml"
|
||||
file_permission = "0600"
|
||||
}
|
||||
|
||||
resource "local_file" "talos_config" {
|
||||
content = module.talos.client_configuration.talos_config
|
||||
filename = "output/talos-config.yaml"
|
||||
file_permission = "0600"
|
||||
}
|
||||
|
||||
# resource "local_file" "kube_config" {
|
||||
# content = module.talos.kube_config.kubeconfig_raw
|
||||
# filename = "output/kube-config.yaml"
|
||||
# file_permission = "0600"
|
||||
# }
|
||||
|
||||
# output "kube_config" {
|
||||
# value = module.talos.kube_config.kubeconfig_raw
|
||||
# sensitive = true
|
||||
# }
|
||||
|
||||
output "talos_config" {
|
||||
value = module.talos.client_configuration.talos_config
|
||||
sensitive = true
|
||||
}
|
||||
30
00-infrastructure/01-tofu/providers.tf
Normal file
30
00-infrastructure/01-tofu/providers.tf
Normal file
@@ -0,0 +1,30 @@
|
||||
terraform {
|
||||
required_providers {
|
||||
kubernetes = {
|
||||
source = "hashicorp/kubernetes"
|
||||
version = "2.38.0"
|
||||
}
|
||||
proxmox = {
|
||||
source = "bpg/proxmox"
|
||||
version = "0.81.0"
|
||||
}
|
||||
talos = {
|
||||
source = "siderolabs/talos"
|
||||
version = "0.8.1"
|
||||
}
|
||||
restapi = {
|
||||
source = "Mastercard/restapi"
|
||||
version = "2.0.1"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
provider "proxmox" {
|
||||
endpoint = var.proxmox.endpoint
|
||||
insecure = var.proxmox.insecure
|
||||
|
||||
ssh {
|
||||
agent = true
|
||||
username = var.proxmox.username_ssh
|
||||
}
|
||||
}
|
||||
6
00-infrastructure/01-tofu/proxmox.auto.tfvars
Normal file
6
00-infrastructure/01-tofu/proxmox.auto.tfvars
Normal file
@@ -0,0 +1,6 @@
|
||||
proxmox = {
|
||||
name = "pve01"
|
||||
cluster_name = "homelab"
|
||||
endpoint = "https://pve01.straubintra.net:8006"
|
||||
insecure = true
|
||||
}
|
||||
127
00-infrastructure/01-tofu/talos/config.tf
Normal file
127
00-infrastructure/01-tofu/talos/config.tf
Normal file
@@ -0,0 +1,127 @@
|
||||
locals {
|
||||
first_control_plane_node_ip = [for k, v in var.nodes : v.ip if v.machine_type == "controlplane"][0]
|
||||
kubernetes_endpoint = coalesce(var.cluster.vip, local.first_control_plane_node_ip)
|
||||
extra_manifests = concat(var.cluster.extra_manifests, [
|
||||
"https://github.com/kubernetes-sigs/gateway-api/releases/download/${var.cluster.gateway_api_version}/standard-install.yaml",
|
||||
"https://raw.githubusercontent.com/kubernetes-sigs/gateway-api/${var.cluster.gateway_api_version}/config/crd/experimental/gateway.networking.k8s.io_tlsroutes.yaml",
|
||||
"https://raw.githubusercontent.com/prometheus-community/helm-charts/refs/heads/main/charts/kube-prometheus-stack/charts/crds/crds/crd-servicemonitors.yaml"
|
||||
])
|
||||
}
|
||||
|
||||
resource "talos_machine_secrets" "this" {
|
||||
// Changing talos_version causes trouble as new certs are created
|
||||
}
|
||||
|
||||
data "talos_client_configuration" "this" {
|
||||
cluster_name = var.cluster.name
|
||||
client_configuration = talos_machine_secrets.this.client_configuration
|
||||
nodes = [for k, v in var.nodes : v.ip]
|
||||
endpoints = [for k, v in var.nodes : v.ip if v.machine_type == "controlplane"]
|
||||
}
|
||||
|
||||
resource "terraform_data" "cilium_bootstrap_inline_manifests" {
|
||||
input = [
|
||||
{
|
||||
name = "cilium-bootstrap"
|
||||
contents = file("${path.root}/${var.cluster.cilium.bootstrap_manifest_path}")
|
||||
},
|
||||
{
|
||||
name = "cilium-values"
|
||||
contents = yamlencode({
|
||||
apiVersion = "v1"
|
||||
kind = "ConfigMap"
|
||||
metadata = {
|
||||
name = "cilium-values"
|
||||
namespace = "kube-system"
|
||||
}
|
||||
data = {
|
||||
"values.yaml" = file("${path.root}/${var.cluster.cilium.values_file_path}")
|
||||
}
|
||||
})
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
data "talos_machine_configuration" "this" {
|
||||
for_each = var.nodes
|
||||
cluster_name = var.cluster.name
|
||||
# This is the Kubernetes API Server endpoint.
|
||||
# ref - https://www.talos.dev/latest/introduction/prodnotes/#decide-the-kubernetes-endpoint
|
||||
cluster_endpoint = "https://${local.kubernetes_endpoint}:6443"
|
||||
# @formatter:off
|
||||
talos_version = var.cluster.talos_machine_config_version != null ? var.cluster.talos_machine_config_version : (each.value.update == true ? var.image.update_version : var.image.version)
|
||||
# @formatter:on
|
||||
machine_type = each.value.machine_type
|
||||
machine_secrets = talos_machine_secrets.this.machine_secrets
|
||||
config_patches = [
|
||||
templatefile("${path.module}/machine-config/common.yaml.tftpl", {
|
||||
node_name = each.value.host_node
|
||||
cluster_name = var.cluster.proxmox_cluster
|
||||
kubernetes_version = var.cluster.kubernetes_version
|
||||
http_proxy = var.cluster.http_proxy
|
||||
no_proxy = var.cluster.no_proxy
|
||||
ntp_servers = var.cluster.ntp_servers
|
||||
hostname = each.key
|
||||
kubelet = var.cluster.kubelet
|
||||
}), each.value.machine_type == "controlplane" ?
|
||||
templatefile("${path.module}/machine-config/control_plane.yaml.tftpl", {
|
||||
mac_address = lower(each.value.mac_address)
|
||||
vip = var.cluster.vip
|
||||
extra_manifests = jsonencode(local.extra_manifests)
|
||||
api_server = var.cluster.api_server
|
||||
inline_manifests = jsonencode(terraform_data.cilium_bootstrap_inline_manifests.output)
|
||||
}) :
|
||||
templatefile("${path.module}/machine-config/worker.yaml.tftpl", {
|
||||
mac_address = lower(each.value.mac_address)
|
||||
})
|
||||
]
|
||||
}
|
||||
|
||||
resource "talos_machine_configuration_apply" "this" {
|
||||
depends_on = [proxmox_virtual_environment_vm.this]
|
||||
for_each = var.nodes
|
||||
node = each.value.ip
|
||||
client_configuration = talos_machine_secrets.this.client_configuration
|
||||
machine_configuration_input = data.talos_machine_configuration.this[each.key].machine_configuration
|
||||
lifecycle {
|
||||
# re-run config apply if vm changes
|
||||
replace_triggered_by = [proxmox_virtual_environment_vm.this[each.key]]
|
||||
}
|
||||
}
|
||||
|
||||
resource "talos_machine_bootstrap" "this" {
|
||||
depends_on = [talos_machine_configuration_apply.this]
|
||||
# Bootstrap with the first control plane node.
|
||||
# VIP not yet available at this stage, so can't use var.cluster.vip
|
||||
# ref - https://www.talos.dev/v1.9/talos-guides/network/vip/#caveats
|
||||
node = local.first_control_plane_node_ip
|
||||
client_configuration = talos_machine_secrets.this.client_configuration
|
||||
}
|
||||
|
||||
data "talos_cluster_health" "this" {
|
||||
depends_on = [
|
||||
talos_machine_configuration_apply.this,
|
||||
talos_machine_bootstrap.this
|
||||
]
|
||||
skip_kubernetes_checks = false
|
||||
client_configuration = data.talos_client_configuration.this.client_configuration
|
||||
control_plane_nodes = [for k, v in var.nodes : v.ip if v.machine_type == "controlplane"]
|
||||
worker_nodes = [for k, v in var.nodes : v.ip if v.machine_type == "worker"]
|
||||
endpoints = data.talos_client_configuration.this.endpoints
|
||||
timeouts = {
|
||||
read = "10m"
|
||||
}
|
||||
}
|
||||
|
||||
resource "talos_cluster_kubeconfig" "this" {
|
||||
depends_on = [
|
||||
talos_machine_bootstrap.this,
|
||||
data.talos_cluster_health.this
|
||||
]
|
||||
# The kubeconfig endpoint will be populated from the talos_machine_configuration cluster_endpoint
|
||||
node = local.first_control_plane_node_ip
|
||||
client_configuration = talos_machine_secrets.this.client_configuration
|
||||
timeouts = {
|
||||
read = "1m"
|
||||
}
|
||||
}
|
||||
63
00-infrastructure/01-tofu/talos/image.tf
Normal file
63
00-infrastructure/01-tofu/talos/image.tf
Normal file
@@ -0,0 +1,63 @@
|
||||
locals {
|
||||
version = var.image.version
|
||||
schematic = file("${path.root}/${var.image.schematic_path}")
|
||||
schematic_id = jsondecode(data.http.schematic_id.response_body)["id"]
|
||||
|
||||
update_version = coalesce(var.image.update_version, var.image.version)
|
||||
update_schematic_path = coalesce(var.image.update_schematic_path, var.image.schematic_path)
|
||||
update_schematic = file("${path.root}/${local.update_schematic_path}")
|
||||
update_schematic_id = jsondecode(data.http.updated_schematic_id.response_body)["id"]
|
||||
|
||||
image_id = "${local.schematic_id}_${local.version}"
|
||||
update_image_id = "${local.update_schematic_id}_${local.update_version}"
|
||||
|
||||
# Comment the above 2 lines and un-comment the below 2 lines to use the provider schematic ID instead of the HTTP one
|
||||
# ref - https://github.com/vehagn/homelab/issues/106
|
||||
# image_id = "${talos_image_factory_schematic.this.id}_${local.version}"
|
||||
# update_image_id = "${talos_image_factory_schematic.updated.id}_${local.update_version}"
|
||||
}
|
||||
|
||||
data "http" "schematic_id" {
|
||||
url = "${var.image.factory_url}/schematics"
|
||||
method = "POST"
|
||||
request_body = local.schematic
|
||||
}
|
||||
|
||||
data "http" "updated_schematic_id" {
|
||||
url = "${var.image.factory_url}/schematics"
|
||||
method = "POST"
|
||||
request_body = local.update_schematic
|
||||
}
|
||||
|
||||
resource "talos_image_factory_schematic" "this" {
|
||||
schematic = local.schematic
|
||||
}
|
||||
|
||||
resource "talos_image_factory_schematic" "updated" {
|
||||
schematic = local.update_schematic
|
||||
}
|
||||
|
||||
# Note the ellipsis (...) after the for-loop. This collects values with same keys into a list.
|
||||
# The key is purposefully made up of the values (image_id contains both schematic id and version),
|
||||
# since all values under a key therefore are the same, we can simply select the first element of the value list.
|
||||
# Improvements are welcome!
|
||||
resource "proxmox_virtual_environment_download_file" "this" {
|
||||
for_each = {
|
||||
for k, v in var.nodes :
|
||||
"${v.host_node}_${v.update == true ? local.update_image_id : local.image_id}" => {
|
||||
host_node = v.host_node
|
||||
schematic = v.update == true ? talos_image_factory_schematic.updated.id : talos_image_factory_schematic.this.id
|
||||
version = v.update == true ? local.update_version : local.version
|
||||
}...
|
||||
}
|
||||
|
||||
node_name = each.value[0].host_node
|
||||
content_type = "iso"
|
||||
datastore_id = var.image.proxmox_datastore
|
||||
|
||||
file_name = "talos-${each.value[0].schematic}-${each.value[0].version}-${var.image.platform}-${var.image.arch}.img"
|
||||
url = "${var.image.factory_url}/image/${each.value[0].schematic}/${each.value[0].version}/${var.image.platform}-${var.image.arch}.raw.gz"
|
||||
decompression_algorithm = "gz"
|
||||
overwrite = false
|
||||
overwrite_unmanaged = true
|
||||
}
|
||||
6
00-infrastructure/01-tofu/talos/image/schematic.yaml
Normal file
6
00-infrastructure/01-tofu/talos/image/schematic.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
customization:
|
||||
systemExtensions:
|
||||
officialExtensions:
|
||||
- siderolabs/amd-ucode
|
||||
- siderolabs/intel-ucode
|
||||
- siderolabs/qemu-guest-agent
|
||||
@@ -0,0 +1,86 @@
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: cilium-install
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: cluster-admin
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: cilium-install
|
||||
namespace: kube-system
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: cilium-install
|
||||
namespace: kube-system
|
||||
---
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: cilium-install
|
||||
namespace: kube-system
|
||||
spec:
|
||||
backoffLimit: 10
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: cilium-install
|
||||
spec:
|
||||
restartPolicy: OnFailure
|
||||
tolerations:
|
||||
- operator: Exists
|
||||
- effect: NoSchedule
|
||||
operator: Exists
|
||||
- effect: NoExecute
|
||||
operator: Exists
|
||||
- effect: PreferNoSchedule
|
||||
operator: Exists
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
effect: NoExecute
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
effect: PreferNoSchedule
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
serviceAccountName: cilium-install
|
||||
hostNetwork: true
|
||||
containers:
|
||||
- name: cilium-install
|
||||
image: quay.io/cilium/cilium-cli:v0.18.6 # renovate: github-releases=cilium/cilium-cli
|
||||
env:
|
||||
- name: KUBERNETES_SERVICE_HOST
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
apiVersion: v1
|
||||
fieldPath: status.podIP
|
||||
- name: KUBERNETES_SERVICE_PORT
|
||||
value: "6443"
|
||||
volumeMounts:
|
||||
- name: values
|
||||
mountPath: /root/app/values.yaml
|
||||
subPath: values.yaml
|
||||
command:
|
||||
- cilium
|
||||
- install
|
||||
- --version=v1.18.0 # renovate: github-releases=cilium/cilium
|
||||
- --set
|
||||
- kubeProxyReplacement=true
|
||||
- --values
|
||||
- values.yaml
|
||||
volumes:
|
||||
- name: values
|
||||
configMap:
|
||||
name: cilium-values
|
||||
@@ -0,0 +1,67 @@
|
||||
machine:
|
||||
registries:
|
||||
mirrors:
|
||||
docker.io:
|
||||
endpoints:
|
||||
- http://harbor.prod.eis-mk8.de.s5b.org/v2/proxy-docker.io
|
||||
overridePath: true
|
||||
ghcr.io:
|
||||
endpoints:
|
||||
- http://harbor.prod.eis-mk8.de.s5b.org/v2/proxy-ghcr.io
|
||||
overridePath: true
|
||||
gcr.io:
|
||||
endpoints:
|
||||
- http://harbor.prod.eis-mk8.de.s5b.org/v2/proxy-gcr.io
|
||||
overridePath: true
|
||||
registry.k8s.io:
|
||||
endpoints:
|
||||
- http://harbor.prod.eis-mk8.de.s5b.org/v2/proxy-registry.k8s.io
|
||||
overridePath: true
|
||||
kubelet:
|
||||
image: ghcr.io/siderolabs/kubelet:${kubernetes_version}
|
||||
%{if kubelet != ""}
|
||||
${indent(4, kubelet)}
|
||||
%{endif}
|
||||
nodeLabels:
|
||||
topology.kubernetes.io/region: ${cluster_name}
|
||||
topology.kubernetes.io/zone: ${node_name}
|
||||
network:
|
||||
hostname: ${hostname}
|
||||
%{if length(ntp_servers) > 0 }
|
||||
time:
|
||||
servers:
|
||||
%{ for addr in ntp_servers ~}
|
||||
- ${addr}
|
||||
%{ endfor ~}
|
||||
%{endif}
|
||||
sysctls:
|
||||
fs.inotify.max_user_watches: 1048576 # Watchdog
|
||||
fs.inotify.max_user_instances: 8192 # Watchdog
|
||||
net.core.default_qdisc: fq # 10Gb/s
|
||||
net.core.rmem_max: 67108864 # 10Gb/s | Cloudflared / QUIC
|
||||
net.core.wmem_max: 67108864 # 10Gb/s | Cloudflared / QUIC
|
||||
net.ipv4.tcp_congestion_control: bbr # 10Gb/s
|
||||
net.ipv4.tcp_fastopen: 3 # Send and accept data in the opening SYN packet
|
||||
net.ipv4.tcp_mtu_probing: 1 # 10Gb/s | Jumbo frames
|
||||
net.ipv4.tcp_rmem: 4096 87380 33554432 # 10Gb/s
|
||||
net.ipv4.tcp_wmem: 4096 65536 33554432 # 10Gb/s
|
||||
net.ipv4.tcp_window_scaling: 1 # 10Gb/s
|
||||
vm.nr_hugepages: 1024 # PostgreSQL
|
||||
%{if http_proxy != ""}
|
||||
env:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${http_proxy}
|
||||
%{if no_proxy != ""}
|
||||
no_proxy: ${no_proxy}
|
||||
%{endif}
|
||||
%{endif}
|
||||
|
||||
cluster:
|
||||
apiServer:
|
||||
image: registry.k8s.io/kube-apiserver:${kubernetes_version}
|
||||
controllerManager:
|
||||
image: registry.k8s.io/kube-controller-manager:${kubernetes_version}
|
||||
proxy:
|
||||
image: registry.k8s.io/kube-proxy:${kubernetes_version}
|
||||
scheduler:
|
||||
image: registry.k8s.io/kube-scheduler:${kubernetes_version}
|
||||
@@ -0,0 +1,39 @@
|
||||
# https://www.talos.dev/v1.10/reference/configuration/v1alpha1/config/
|
||||
machine:
|
||||
network:
|
||||
interfaces:
|
||||
- deviceSelector:
|
||||
hardwareAddr: ${mac_address}
|
||||
dhcp: true
|
||||
%{ if vip != null }
|
||||
vip:
|
||||
ip: ${vip}
|
||||
%{ endif }
|
||||
|
||||
cluster:
|
||||
allowSchedulingOnControlPlanes: true
|
||||
%{if api_server != ""}
|
||||
apiServer:
|
||||
${indent(4, api_server)}
|
||||
%{endif}
|
||||
controllerManager:
|
||||
extraArgs:
|
||||
bind-address: 0.0.0.0
|
||||
etcd:
|
||||
extraArgs:
|
||||
listen-metrics-urls: http://0.0.0.0:2381
|
||||
scheduler:
|
||||
extraArgs:
|
||||
bind-address: 0.0.0.0
|
||||
network:
|
||||
cni:
|
||||
name: none
|
||||
proxy:
|
||||
disabled: true
|
||||
discovery:
|
||||
enabled: true
|
||||
registries:
|
||||
service:
|
||||
disabled: false
|
||||
extraManifests: ${extra_manifests}
|
||||
inlineManifests: ${inline_manifests}
|
||||
@@ -0,0 +1,6 @@
|
||||
machine:
|
||||
network:
|
||||
interfaces:
|
||||
- deviceSelector:
|
||||
hardwareAddr: ${mac_address}
|
||||
dhcp: true
|
||||
18
00-infrastructure/01-tofu/talos/output.tf
Normal file
18
00-infrastructure/01-tofu/talos/output.tf
Normal file
@@ -0,0 +1,18 @@
|
||||
output "machine_secrets" {
|
||||
value = talos_machine_secrets.this.machine_secrets
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
output "machine_config" {
|
||||
value = data.talos_machine_configuration.this
|
||||
}
|
||||
|
||||
output "client_configuration" {
|
||||
value = data.talos_client_configuration.this
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
# output "kube_config" {
|
||||
# value = talos_cluster_kubeconfig.this
|
||||
# sensitive = true
|
||||
# }
|
||||
16
00-infrastructure/01-tofu/talos/providers.tf
Normal file
16
00-infrastructure/01-tofu/talos/providers.tf
Normal file
@@ -0,0 +1,16 @@
|
||||
terraform {
|
||||
required_providers {
|
||||
proxmox = {
|
||||
source = "bpg/proxmox"
|
||||
version = ">=0.81.0"
|
||||
}
|
||||
talos = {
|
||||
source = "siderolabs/talos"
|
||||
version = ">=0.8.1"
|
||||
}
|
||||
http = {
|
||||
source = "hashicorp/http"
|
||||
version = ">=3.4.5"
|
||||
}
|
||||
}
|
||||
}
|
||||
55
00-infrastructure/01-tofu/talos/variables.tf
Normal file
55
00-infrastructure/01-tofu/talos/variables.tf
Normal file
@@ -0,0 +1,55 @@
|
||||
variable "image" {
|
||||
description = "Talos image configuration"
|
||||
type = object({
|
||||
factory_url = optional(string, "https://factory.talos.dev")
|
||||
schematic_path = string
|
||||
version = string
|
||||
update_schematic_path = optional(string)
|
||||
update_version = optional(string)
|
||||
arch = optional(string, "amd64")
|
||||
platform = optional(string, "nocloud")
|
||||
proxmox_datastore = optional(string, "local")
|
||||
})
|
||||
}
|
||||
|
||||
variable "cluster" {
|
||||
description = "Cluster configuration"
|
||||
type = object({
|
||||
name = string
|
||||
vip = optional(string)
|
||||
subnet_mask = optional(string, "24")
|
||||
talos_machine_config_version = optional(string)
|
||||
proxmox_cluster = string
|
||||
kubernetes_version = string
|
||||
gateway_api_version = string
|
||||
node_network_vlan = optional(number)
|
||||
ntp_servers = optional(list(string), [])
|
||||
http_proxy = optional(string, "")
|
||||
no_proxy = optional(string, "")
|
||||
extra_manifests = optional(list(string))
|
||||
kubelet = optional(string)
|
||||
api_server = optional(string, "")
|
||||
cilium = object({
|
||||
bootstrap_manifest_path = string
|
||||
values_file_path = string
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
variable "nodes" {
|
||||
description = "Configuration for cluster nodes"
|
||||
type = map(object({
|
||||
host_node = string
|
||||
machine_type = string
|
||||
datastore_id = optional(string, "ZFS")
|
||||
ip = string
|
||||
dns = optional(list(string))
|
||||
mac_address = string
|
||||
vm_id = number
|
||||
cpu = number
|
||||
ram_dedicated = number
|
||||
system_disk_size = optional(number, 60)
|
||||
update = optional(bool, false)
|
||||
igpu = optional(bool, false)
|
||||
}))
|
||||
}
|
||||
84
00-infrastructure/01-tofu/talos/virtual_machines.tf
Normal file
84
00-infrastructure/01-tofu/talos/virtual_machines.tf
Normal file
@@ -0,0 +1,84 @@
|
||||
resource "proxmox_virtual_environment_vm" "this" {
|
||||
for_each = var.nodes
|
||||
|
||||
node_name = each.value.host_node
|
||||
|
||||
name = each.key
|
||||
description = each.value.machine_type == "controlplane" ? "Talos Control Plane" : "Talos Worker"
|
||||
tags = each.value.machine_type == "controlplane" ? ["k8s", "control-plane"] : ["k8s", "worker"]
|
||||
on_boot = true
|
||||
vm_id = each.value.vm_id
|
||||
|
||||
machine = "q35"
|
||||
scsi_hardware = "virtio-scsi-single"
|
||||
bios = "seabios"
|
||||
|
||||
agent {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
cpu {
|
||||
cores = each.value.cpu
|
||||
type = "host"
|
||||
}
|
||||
|
||||
memory {
|
||||
dedicated = each.value.ram_dedicated
|
||||
}
|
||||
|
||||
network_device {
|
||||
bridge = "vmbr0"
|
||||
mac_address = each.value.mac_address
|
||||
vlan_id = var.cluster.node_network_vlan
|
||||
}
|
||||
|
||||
disk {
|
||||
datastore_id = each.value.datastore_id
|
||||
interface = "scsi0"
|
||||
iothread = true
|
||||
cache = "writethrough"
|
||||
discard = "on"
|
||||
ssd = true
|
||||
file_format = "raw"
|
||||
size = each.value.system_disk_size
|
||||
file_id = proxmox_virtual_environment_download_file.this["${each.value.host_node}_${each.value.update == true ? local.update_image_id : local.image_id}"].id
|
||||
}
|
||||
|
||||
boot_order = ["scsi0"]
|
||||
|
||||
operating_system {
|
||||
type = "l26" # Linux Kernel 2.6 - 6.X.
|
||||
}
|
||||
|
||||
# We use DHCP with static mappings --> Not needed
|
||||
# initialization {
|
||||
# datastore_id = each.value.datastore_id
|
||||
|
||||
# # Optional DNS Block. Update Nodes with a list value to use.
|
||||
# dynamic "dns" {
|
||||
# for_each = try(each.value.dns, null) != null ? { "enabled" = each.value.dns } : {}
|
||||
# content {
|
||||
# servers = each.value.dns
|
||||
# }
|
||||
# }
|
||||
|
||||
# ip_config {
|
||||
# ipv4 {
|
||||
# address = "${each.value.ip}/${var.cluster.subnet_mask}"
|
||||
# gateway = var.cluster.gateway
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
|
||||
dynamic "hostpci" {
|
||||
for_each = each.value.igpu ? [1] : []
|
||||
content {
|
||||
# Passthrough iGPU
|
||||
device = "hostpci0"
|
||||
mapping = "iGPU"
|
||||
pcie = true
|
||||
rombar = true
|
||||
xvga = false
|
||||
}
|
||||
}
|
||||
}
|
||||
19
00-infrastructure/01-tofu/talos_cluster.auto.tfvars
Normal file
19
00-infrastructure/01-tofu/talos_cluster.auto.tfvars
Normal file
@@ -0,0 +1,19 @@
|
||||
talos_cluster_config = {
|
||||
name = "eismk8-prod"
|
||||
proxmox_cluster = "homelab"
|
||||
node_network_vlan = 210
|
||||
ntp_servers = [
|
||||
"2a13:fc80:1:f000::1"
|
||||
]
|
||||
# http_proxy = "http://100.64.0.1:3128"
|
||||
# no_proxy = "10.0.0.0/8"
|
||||
|
||||
vip = "10.51.10.100"
|
||||
kubernetes_version = "v1.33.3"
|
||||
gateway_api_version = "v1.3.0"
|
||||
|
||||
cilium = {
|
||||
bootstrap_manifest_path = "talos/inline-manifests/cilium_install.yaml"
|
||||
values_file_path = "../../02-k8s/infra/network/cilium/values.yaml"
|
||||
}
|
||||
}
|
||||
6
00-infrastructure/01-tofu/talos_image.auto.tfvars
Normal file
6
00-infrastructure/01-tofu/talos_image.auto.tfvars
Normal file
@@ -0,0 +1,6 @@
|
||||
talos_image = {
|
||||
version = "v1.10.7"
|
||||
update_version = "v1.10.7"
|
||||
schematic_path = "talos/image/schematic.yaml"
|
||||
#update_schematic_path = "talos/image/schematic.yaml"
|
||||
}
|
||||
58
00-infrastructure/01-tofu/talos_nodes.auto.tfvars
Normal file
58
00-infrastructure/01-tofu/talos_nodes.auto.tfvars
Normal file
@@ -0,0 +1,58 @@
|
||||
talos_nodes = {
|
||||
# Controller Nodes
|
||||
"ctrl-01" = {
|
||||
host_node = "pve01"
|
||||
machine_type = "controlplane"
|
||||
ip = "10.51.10.101"
|
||||
mac_address = "BC:24:11:7B:76:3E"
|
||||
vm_id = 301
|
||||
cpu = 1
|
||||
ram_dedicated = 4096
|
||||
}
|
||||
"ctrl-02" = {
|
||||
host_node = "pve02"
|
||||
machine_type = "controlplane"
|
||||
ip = "10.51.10.102"
|
||||
mac_address = "BC:24:11:16:85:7D"
|
||||
vm_id = 302
|
||||
cpu = 1
|
||||
ram_dedicated = 4096
|
||||
}
|
||||
"ctrl-03" = {
|
||||
host_node = "pve-oberon"
|
||||
machine_type = "controlplane"
|
||||
ip = "10.51.10.103"
|
||||
mac_address = "BC:24:11:B8:B6:6F"
|
||||
vm_id = 303
|
||||
cpu = 1
|
||||
ram_dedicated = 4096
|
||||
}
|
||||
# Worker Nodes
|
||||
"worker-01" = {
|
||||
host_node = "pve01"
|
||||
machine_type = "worker"
|
||||
ip = "10.51.11.1"
|
||||
mac_address = "BC:24:11:E1:E9:AE"
|
||||
vm_id = 311
|
||||
cpu = 4
|
||||
ram_dedicated = 8192
|
||||
}
|
||||
"worker-02" = {
|
||||
host_node = "pve02"
|
||||
machine_type = "worker"
|
||||
ip = "10.51.11.2"
|
||||
mac_address = "BC:24:11:63:3A:85"
|
||||
vm_id = 312
|
||||
cpu = 4
|
||||
ram_dedicated = 8192
|
||||
}
|
||||
"worker-03" = {
|
||||
host_node = "pve-oberon"
|
||||
machine_type = "worker"
|
||||
ip = "10.51.11.3"
|
||||
mac_address = "BC:24:11:8E:75:0E"
|
||||
vm_id = 313
|
||||
cpu = 4
|
||||
ram_dedicated = 8192
|
||||
}
|
||||
}
|
||||
78
00-infrastructure/01-tofu/variables.tf
Normal file
78
00-infrastructure/01-tofu/variables.tf
Normal file
@@ -0,0 +1,78 @@
|
||||
variable "proxmox" {
|
||||
description = "Proxmox provider configuration"
|
||||
type = object({
|
||||
name = string
|
||||
cluster_name = string
|
||||
endpoint = string
|
||||
insecure = bool
|
||||
username_ssh = optional(string, "root")
|
||||
})
|
||||
}
|
||||
|
||||
# variable "proxmox_api_token" {
|
||||
# description = "API token for Proxmox"
|
||||
# type = string
|
||||
# sensitive = true
|
||||
# }
|
||||
|
||||
variable "talos_image" {
|
||||
description = "Talos image configuration"
|
||||
type = object({
|
||||
factory_url = optional(string, "https://factory.talos.dev")
|
||||
version = string
|
||||
schematic_path = string
|
||||
update_version = optional(string)
|
||||
update_schematic_path = optional(string)
|
||||
arch = optional(string, "amd64")
|
||||
platform = optional(string, "nocloud")
|
||||
proxmox_datastore = optional(string, "local")
|
||||
})
|
||||
}
|
||||
|
||||
variable "talos_cluster_config" {
|
||||
description = "Talos cluster configuration"
|
||||
type = object({
|
||||
name = string
|
||||
vip = optional(string)
|
||||
subnet_mask = optional(string, "24")
|
||||
talos_machine_config_version = optional(string)
|
||||
proxmox_cluster = string
|
||||
kubernetes_version = string
|
||||
gateway_api_version = string
|
||||
node_network_vlan = optional(number)
|
||||
http_proxy = optional(string)
|
||||
no_proxy = optional(string)
|
||||
extra_manifests = optional(list(string), [])
|
||||
kubelet = optional(string, "")
|
||||
api_server = optional(string)
|
||||
cilium = object({
|
||||
bootstrap_manifest_path = string
|
||||
values_file_path = string
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
variable "talos_nodes" {
|
||||
type = map(
|
||||
object({
|
||||
host_node = string
|
||||
machine_type = string
|
||||
ip = string
|
||||
dns = optional(list(string))
|
||||
mac_address = string
|
||||
vm_id = number
|
||||
cpu = number
|
||||
ram_dedicated = number
|
||||
system_disk_size = optional(number, 60)
|
||||
update = optional(bool, false)
|
||||
igpu = optional(bool, false)
|
||||
})
|
||||
)
|
||||
validation {
|
||||
// @formatter:off
|
||||
condition = length([for n in var.talos_nodes : n if contains(["controlplane", "worker"], n.machine_type)]) == length(var.talos_nodes)
|
||||
error_message = "Node machine_type must be either 'controlplane' or 'worker'."
|
||||
// @formatter:on
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,6 +6,42 @@ Here, you find everything to setup the VM infrastructure for TALOS cluster.
|
||||
|
||||
1. [Ansible Notebooks for managing Proxmox hosts](./00-ansible-pve-hosts/README.md)
|
||||
|
||||
## Get started
|
||||
|
||||
### Setup Proxmox Cluster
|
||||
|
||||
tbd. For now, it's expected to have happened already.
|
||||
|
||||
- Creation of terraform user
|
||||
- Assign role
|
||||
- create token
|
||||
|
||||
|
||||
### Install OpenTofu
|
||||
```shell
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y apt-transport-https ca-certificates curl gnupg
|
||||
sudo install -m 0755 -d /etc/apt/keyrings
|
||||
curl -fsSL https://get.opentofu.org/opentofu.gpg | sudo tee /etc/apt/keyrings/opentofu.gpg >/dev/null
|
||||
curl -fsSL https://packages.opentofu.org/opentofu/tofu/gpgkey | sudo gpg --no-tty --batch --dearmor -o /etc/apt/keyrings/opentofu-repo.gpg >/dev/null
|
||||
sudo chmod a+r /etc/apt/keyrings/opentofu.gpg
|
||||
|
||||
echo \
|
||||
"deb [signed-by=/etc/apt/keyrings/opentofu.gpg,/etc/apt/keyrings/opentofu-repo.gpg] https://packages.opentofu.org/opentofu/tofu/any/ any main
|
||||
deb-src [signed-by=/etc/apt/keyrings/opentofu.gpg,/etc/apt/keyrings/opentofu-repo.gpg] https://packages.opentofu.org/opentofu/tofu/any/ any main" | \
|
||||
sudo tee /etc/apt/sources.list.d/opentofu.list > /dev/null
|
||||
```
|
||||
|
||||
```shell
|
||||
wget -O /tmp/tofu.deb https://github.com/opentofu/opentofu/releases/download/v1.10.5/tofu_1.10.5_386.deb
|
||||
sudo dpkg -i /tmp/tofu.deb
|
||||
rm /tmp/tofu.deb
|
||||
sudo apt install -f
|
||||
```
|
||||
|
||||
|
||||
|
||||
## Literature
|
||||
- [Talos Kubernetes on Proxmox using OpenTofu](https://blog.stonegarden.dev/articles/2024/08/talos-proxmox-tofu/)
|
||||
- [Talos on Proxmox with Terraform (multiple node pools)](https://github.com/sergelogvinov/terraform-talos/tree/main/proxmox)
|
||||
- [Infrastructure Automation: Provisioning VMs on Proxmox with Packer, OpenTOFU, GitLab, Vault, and Minio.](https://medium.com/@avishkumar27/infrastructure-automation-provisioning-vms-on-proxmox-with-packer-opentofu-gitlab-vault-and-27fda7d73771)
|
||||
152
02-k8s/infra/network/cilium/values.yaml
Normal file
152
02-k8s/infra/network/cilium/values.yaml
Normal file
@@ -0,0 +1,152 @@
|
||||
# https://github.com/cilium/cilium/blob/main/install/kubernetes/cilium/values.yaml
|
||||
cluster:
|
||||
name: talos
|
||||
id: 1
|
||||
|
||||
kubeProxyReplacement: true
|
||||
|
||||
# Talos specific
|
||||
k8sServiceHost: localhost
|
||||
k8sServicePort: 7445
|
||||
securityContext:
|
||||
capabilities:
|
||||
ciliumAgent: [ CHOWN, KILL, NET_ADMIN, NET_RAW, IPC_LOCK, SYS_ADMIN, SYS_RESOURCE, DAC_OVERRIDE, FOWNER, SETGID, SETUID ]
|
||||
cleanCiliumState: [ NET_ADMIN, SYS_ADMIN, SYS_RESOURCE ]
|
||||
|
||||
cgroup:
|
||||
autoMount:
|
||||
enabled: false
|
||||
hostRoot: /sys/fs/cgroup
|
||||
|
||||
# https://www.talos.dev/latest/talos-guides/network/host-dns/#forwarding-kube-dns-to-host-dns
|
||||
# https://docs.cilium.io/en/stable/operations/performance/tuning/#ebpf-host-routing
|
||||
bpf:
|
||||
hostLegacyRouting: true
|
||||
|
||||
# https://docs.cilium.io/en/stable/network/concepts/ipam/
|
||||
ipam:
|
||||
mode: kubernetes
|
||||
multiPoolPreAllocation: ""
|
||||
|
||||
operator:
|
||||
rollOutPods: true
|
||||
prometheus:
|
||||
metricsService: true
|
||||
enabled: true
|
||||
port: 9963
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
dashboards:
|
||||
enabled: true
|
||||
resources:
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 256Mi
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 128Mi
|
||||
|
||||
# Roll out cilium agent pods automatically when ConfigMap is updated.
|
||||
rollOutCiliumPods: true
|
||||
resources:
|
||||
limits:
|
||||
cpu: 1000m
|
||||
memory: 1Gi
|
||||
requests:
|
||||
cpu: 200m
|
||||
memory: 512Mi
|
||||
|
||||
#debug:
|
||||
# enabled: true
|
||||
|
||||
# Increase rate limit when doing L2 announcements
|
||||
k8sClientRateLimit:
|
||||
qps: 20
|
||||
burst: 100
|
||||
|
||||
l2announcements:
|
||||
enabled: true
|
||||
|
||||
externalIPs:
|
||||
enabled: true
|
||||
|
||||
loadBalancer:
|
||||
# https://docs.cilium.io/en/stable/network/kubernetes/kubeproxy-free/#maglev-consistent-hashing
|
||||
algorithm: maglev
|
||||
|
||||
gatewayAPI:
|
||||
enabled: true
|
||||
# enableAlpn: true
|
||||
# enableAppProtocol: true
|
||||
|
||||
envoy:
|
||||
prometheus:
|
||||
enabled: true
|
||||
port: "9964"
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
securityContext:
|
||||
capabilities:
|
||||
keepCapNetBindService: true
|
||||
envoy: [ NET_ADMIN, PERFMON, BPF ]
|
||||
|
||||
hubble:
|
||||
enabled: true
|
||||
metrics:
|
||||
enabled:
|
||||
- dns
|
||||
- drop
|
||||
- tcp
|
||||
- flow
|
||||
- port-distribution
|
||||
- icmp
|
||||
- "httpV2:exemplars=true;labelsContext=source_ip,source_namespace,source_workload,destination_ip,destination_namespace,destination_workload,traffic_direction;sourceContext=workload-name|reserved-identity;destinationContext=workload-name|reserved-identity"
|
||||
enableOpenMetrics: true
|
||||
port: 9965
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
dashboards:
|
||||
enabled: true
|
||||
relay:
|
||||
enabled: true
|
||||
rollOutPods: true
|
||||
prometheus:
|
||||
enabled: true
|
||||
port: 9966
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
ui:
|
||||
enabled: true
|
||||
rollOutPods: true
|
||||
|
||||
ingressController: { enabled: false }
|
||||
|
||||
clustermesh:
|
||||
apiserver:
|
||||
metrics:
|
||||
enabled: true
|
||||
port: 9962
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
|
||||
# mTLS
|
||||
authentication:
|
||||
enabled: false
|
||||
mutual:
|
||||
spire:
|
||||
enabled: false
|
||||
install:
|
||||
server:
|
||||
dataStorage:
|
||||
storageClass: cilium-spire-sc
|
||||
|
||||
prometheus:
|
||||
metricsService: true
|
||||
enabled: true
|
||||
port: 9962
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
trustCRDsExist: true
|
||||
|
||||
dashboards:
|
||||
enabled: true
|
||||
26
README.md
26
README.md
@@ -4,4 +4,28 @@
|
||||
|
||||
1. [Infrastructure](./00-infrastructure/README.md)
|
||||
|
||||
## Configuration
|
||||
## Architecture
|
||||
|
||||
### Network
|
||||
|
||||
- IPv4 configuration uses DHCP with static MAC binding for easy bring-up
|
||||
- IPv6 addresses are manually assigned
|
||||
- DNS Zone: prod.k8.eis-mk8.de.s5b.org
|
||||
|
||||
|VLAN|IPv4|IPv6|
|
||||
|--|--|--|
|
||||
|210|10.51.10.0/23|2a13:fc80:1:a::/64|
|
||||
|
||||
## How to use
|
||||
|
||||
### Prerequisites
|
||||
|
||||
#### Secrets handling
|
||||
|
||||
Use ``direnv`` package to automatically load the environment variables for a directory and keep the secrets in ``.envrc`` files. For ZSH, add
|
||||
|
||||
```shell
|
||||
eval "$(direnv hook zsh)"
|
||||
``
|
||||
|
||||
to your ``.zshrc`` for automatic loading on directory change.
|
||||
Reference in New Issue
Block a user