Building an IaC BIND9 DNS deployment on kubernetes
For some time now I have been displeased with my DNS server setup. Previously I have had a single Pi-hole instance running on a Raspberry Pi at one of my sites. This means that private DNS resolution, for the entire network, was dependent on a single site and a single compute node. This Single Point of Failure (SPoF) is not something i was to happy with, and I did not want to point all my client networks to a singular Raspberry Pi.
To remedy this, I wanted to build a distributed DNS-server infrastructure, where each site could have one or more DNS-servers that run in HA and the client of that site would use the DNS-server that is “closest” to them, while still allowing for failover to other DNS-servers, should the local one experience a failure. Furthermore, I wanted everything to be defined as code, so that I could easily redeploy the DNS-service again from a repo without losing any data.
I drew up this diagram as a rough idea of what I would like to achieve:
Here, each site would have at lease one mini-server (like a Raspberry Pi) running k3s (a lightweight flavor of kubernetes) which would peer with the site routers using BGP. This would allow the routing to be optimized based on the Path Selection on the site routers and allow for failover to other backup DNS-servers should the Raspberry Pi have a catastrophic failure.
To manage all of this I will be setting up an IaC automation hosted in Gitlab and executed on a gitlab-runner in the provider DC. This will allow for version control and pre-deploy checks to be executed. And adding new nodes is as simple as adding them to the .kube/config
file.
I will add the most relevant parts of the IaC configuration down below. Although currently I have only setup a single node for testing and it is running the metallb loadbalancer in L2 mode, not BGP mode.
IaC
stages:
- test
- build
- deploy
validate:
stage: test
image: python:3.11.10
script:
- pip install --break-system-packages -r requirements.txt &> /dev/null
- python3 validate.py
build:
stage: build
image: python:3.11.10
before_script:
- pip install --break-system-packages -r ./requirements.txt &> /dev/null
script:
- sh build.sh
artifacts:
paths:
- files
expire_in: 1 day
rules:
- if: '$CI_COMMIT_BRANCH == "master"'
deploy:
stage: deploy
image: alpine:3.21.2
environment: production
before_script:
- echo $KUBECONFIG >> .kube/config
- apk update && apk add --no-cache curl
- curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
- install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
script:
- sh deploy.sh
rules:
- if: '$CI_COMMIT_BRANCH == "master"'
include:
- template: Jobs/Secret-Detection.gitlab-ci.yml
requirements.txt:
ansible==11.1.0
kubernetes==31.0.0
validate.py:
#!/usr/bin/python3
import yaml
import ipaddress
allowed_zones = [
'gurfin.se',
]
allowed_types = [
'A',
'AAAA',
'PTR',
]
allowed_states = [
'absent',
'present',
]
with open('vars/dns.yml', 'r') as file:
dns = yaml.safe_load(file)
def trigger_error( msg ):
print('! ERROR TRIGGERED > '+msg)
exit(1) # Fail validate pipeline
def validate_record_type(record_type, record_value, record_name):
try:
ip_obj = ipaddress.ip_address(record_value)
if record_type == 'A':
if ip_obj.version != 4: trigger_error('mismatch between IP-version and type selected for "'+record_name+'".')
elif record_type == 'AAAA':
if ip_obj.version != 6: trigger_error('mismatch between IP-version and type selected for "'+record_name+'".')
except ValueError:
trigger_error('invalid value for "'+record_name+'".')
def verify_private_dns(dns):
all_private_records = []
reversed_ips = []
for record in dns['private']['records']:
if 'name' not in record or len(record['name']) == 0: trigger_error('name missing for private dns record.')
if 'zone' not in record or len(record['zone']) == 0: trigger_error('zone missing for private dns record.')
if record['zone'] not in allowed_zones: trigger_error('the used zone for "'+record['name']+'" is not allowed. allowed zones are [ '+', '.join(allowed_zones)+' ].')
if 'type' not in record or len(record['type']) == 0: trigger_error('type missing for private dns record.')
if record['type'] not in allowed_types: trigger_error('the type used for "'+record['name']+'" is not allowed. allowed zones are [ '+', '.join(allowed_types)+' ].')
if 'value' not in record or len(record['value']) == 0: trigger_error('value missing for private dns record.')
if 'auto_reverse' in record and not isinstance(record['auto_reverse'], bool): trigger_error('auto_reverse needs to be a bool. please review "'+record['name']+'".')
if 'state' in record and record['state'] not in allowed_states: trigger_error('private record "'+record['name']+'" uses forbidden state. allowed states are [ '+', '.join(allowed_states)+' ].')
validate_record_type(record['type'], record['value'], record['name'])
if record['name'] in all_private_records: trigger_error('duplicate private records for "'+record['name']+'".')
all_private_records.append(record['name'])
if 'auto_reverse' in record and record['auto_reverse']:
if record['value'] in reversed_ips: trigger_error('multiple reverse records for "'+record['value']+'".')
reversed_ips.append(record['value'])
def verify_public_dns(dns):
all_public_records = []
for record in dns['public']['records']:
if 'name' not in record or len(record['name']) == 0: trigger_error('name missing for public dns record.')
if 'zone' not in record or len(record['zone']) == 0: trigger_error('zone missing for public dns record.')
if record['zone'] not in allowed_zones: trigger_error('the used zone for "'+record['name']+'" is not allowed. allowed zones are [ '+', '.join(allowed_zones)+' ].')
if 'type' not in record or len(record['type']) == 0: trigger_error('type missing for public dns record.')
if record['type'] not in allowed_types: trigger_error('the type used for "'+record['name']+'" is not allowed. allowed zones are [ '+', '.join(allowed_types)+' ].')
if 'value' not in record or len(record['value']) == 0: trigger_error('value missing for public dns record.')
if 'proxied' in record and not isinstance(record['proxied'], bool): trigger_error('proxied needs to be a bool. please review "'+record['name']+'".')
if 'state' in record and record['state'] not in allowed_states: trigger_error('public record "'+record['name']+'" uses forbidden state. allowed states are [ '+', '.join(allowed_states)+' ].')
if ipaddress.ip_address(record['value']).is_private: trigger_error('private IP used for public DNS.')
if record['name'] in all_public_records: trigger_error('duplicate public records for "'+record['name']+'".')
all_public_records.append(record['name'])
def main():
verify_private_dns(dns['dns'])
#verify_public_dns(dns['dns'])
exit(0)
if __name__ == "__main__":
main()
build.sh:
ansible-playbook playbooks/build.yml
playbooks/build.yml:
---
- name: Deploy DNS services
hosts: localhost
gather_facts: false
strategy: linear
vars_files:
- ../vars/dns.yml
tasks:
- name: Build deploy file
ansible.builtin.template:
src: ../templates/deployment.j2
dest: ../files/deployment.yml
mode: '0644'
vars/dns.yml - example:
dns:
private:
records:
# ===============
# NEW RECORDS
# ===============
- name: gitlab
zone: gurfin.se
type: A
value: 10.20.10.40
auto_reverse: yes
templates/deployment.j2:
---
apiVersion: v1
kind: Namespace
metadata:
name: gurfininfra-dns
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dns
namespace: gurfininfra-dns
spec:
selector:
matchLabels:
app: dns
replicas: 3
template:
metadata:
labels:
app: dns
spec:
containers:
- name: dns
image: cytopia/bind
ports:
- containerPort: 53
env:
- name: DNS_A
value: |
{% for record in dns.private.records %}
{% if record.type == 'A' %}
{{ record.name }}.{{ record.zone }}={{ record.value }},
{% endif %}
{% endfor %}
- name: DNS_PTR
value: |
{% for record in dns.private.records %}
{%- if record.type == 'A' and 'auto_reverse' in record and record.auto_reverse %}
{{ record.value }}={{ record.name }}.{{ record.zone }},
{% endif %}
{% endfor %}
- name: ALLOW_QUERY
value: any
- name: DNS_FORWARDER
value: '1.1.1.1,8.8.8.8'
---
apiVersion: v1
kind: Service
metadata:
name: dns-service-udp
namespace: gurfininfra-dns
annotations:
metallb.universe.tf/allow-shared-ip: "dnskey"
spec:
selector:
app: dns
ports:
- protocol: UDP
port: 53
targetPort: 53
type: LoadBalancer
loadBalancerIP: 10.120.2.10
---
apiVersion: v1
kind: Service
metadata:
name: dns-service-tcp
namespace: gurfininfra-dns
annotations:
metallb.universe.tf/allow-shared-ip: "dnskey"
spec:
selector:
app: dns
ports:
- protocol: TCP
port: 53
targetPort: 53
type: LoadBalancer
loadBalancerIP: 10.120.2.10
deploy.sh:
kubectl apply -f files/deployment.yml