commit 158e8740b8a68b0a8568646353381bc27cac8f1f Author: Timotej Lazar Date: Mon Dec 18 11:22:14 2023 +0100 Initial commit, squashed diff --git a/LICENSE b/LICENSE new file mode 120000 index 0000000..4761def --- /dev/null +++ b/LICENSE @@ -0,0 +1 @@ +UNLICENSE \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..80f9152 --- /dev/null +++ b/README.md @@ -0,0 +1,202 @@ +# FRI network + +Ansible playbooks to configure the FRI network. Network configuration resides in [NetBox](https://netbox.fri.uni-lj.si); an overview of core switches and servers can be found in the [topology view](https://netbox.fri.uni-lj.si/plugins/netbox_topology_views/topology/?filter_id=2&show_cables=on&show_logical_connections=on). + +## Setup + +Install dependencies with `pip install --user -r requirements.txt` or with the package manager. Since querying the API is not very fast, it is helpful to setup Ansible cache, for example by adding the following to `~/.profile` or similar: + + export ANSIBLE_INVENTORY_CACHE=True + export ANSIBLE_INVENTORY_CACHE_PLUGIN=jsonfile + export ANSIBLE_CACHE_PLUGIN_CONNECTION=~/.ansible/cache + +Devices are accessible on a separate network, reachable through a WireGuard tunnel. For device access an SSH key is required, with the public key authorized for `root` on each device. + +## Usage + +Create a read-only token in NetBox. Set variables required to access NetBox: + + # one for nb_inventory and one for nb_lookup + export NETBOX_API_KEY= + export NETBOX_TOKEN="${NETBOX_API_KEY}" + # same for both + export NETBOX_API= + +Run one-off tasks with (add `--key-file` or other options as necessary): + + ansible -i inventory.yml -m ping 'spine-*' + +Run a playbook with: + + ansible-playbook setup.yml -i inventory.yml -l 'spine-*' + +## NetBox data + +The following values are used throughout the network and should be defined in a site-wide [config context](https://netbox.fri.uni-lj.si/extras/config-contexts/?q=fri): + + * `dhcp`: DHCP server address + * `dns`: list of DNS server IPv4 addresses + * `dns6`: list of DNS server IPv6 addresses + * `domain`: site domain + * `nat`: list of IPv4 ranges used for SNAT and DNAT + * `ntp`: list of NTP server addresses + * `wg_ip`: public IPv4 address for wireguard connections, anycast between firewall nodes + * `wg_net`: client wireguard IPv4 addresses are assigned from this range + * `wg_net6`: client wireguard IPv6 addresses are assigned from this range + +### Common setup + +For most devices a management interface must be defined to run Ansible scripts, with at least the IP address and default gateway set: + + { + "name": "eth0", "type": { "value": "1000base-t" }, + "mgmt_only": true, + "mac_address": "98:03:9B:9C:2D:10", + "ip_addresses": [ { "address": "10.20.30.40/24" } ], + "custom_fields": { "gateway": { "address": "10.20.30.1/24" } } + } + +The MAC address is only used in some playbooks to set the interface name. All JSON samples in this document are subsets of the inventory as returned by Ansible. Omitted values should be set to null or empty unless stated otherwise (except for foreign keys such as `type` and `role`, where some values are omitted for brevity). + +#### L1 setup + +To break out a port, create the appropriately named interfaces and disable the original interface: + + { "name": "swp14", "enabled": false }, + { "name": "swp14s0", "type": { "value": "25gbase-x-sfp28" } }, + { "name": "swp14s1", "type": { "value": "25gbase-x-sfp28" } }, + { "name": "swp14s2", "type": { "value": "25gbase-x-sfp28" } }, + { "name": "swp14s3", "type": { "value": "25gbase-x-sfp28" } } + +Note that for SN2700 switches only odd‐numbered ports may be broken out; the next even‐numbered port must be disabled as well as the original port in this case. The new ports can be used normally in further configuration. + +#### L3 setup + +For L3 devices the `asn` custom field must be set. For the fabric and core servers we use [private ASNs above 65000](https://netbox.fri.uni-lj.si/search/?q=65%5B0-9%5D%2B&obj_types=ipam.asn&lookup=iregex). + +Each L3 node should define IPv4 and IPv6 addresses on the loopback interface. These are displayed e.g. by traceroute. The IPv4 loopback address is also used as the BGP router ID. For MLAG switches specify the same [VXLAN anycast IP](https://docs.nvidia.com/networking-ethernet-software/cumulus-linux/Network-Virtualization/VXLAN-Active-Active-Mode/) on both peers with the anycast role. + + { + "name": "lo", "type": { "value": "virtual" }, + "ip_addresses": [ + { "address": "10.34.0.8/32", "role": { "value": "loopback" } }, + { "address": "2001:1470:fffd:3400::8/128", "role": { "value: "loopback" } }, + { "address": "10.34.0.7/32", "role": { "value": "anycast" } } + ], + } + + +Interfaces to L3 servers should have the tenant custom field defined: + + { + "name": "swp9", "type": { "value": "100gbase-x-qsfp28" }, + "custom_fields": { "tenant": { "slug": "lrk" } } + } + +The tenant determines which prefixes can be received on this interface. It is important that all user‐facing ports either have a tenant defined or are disabled. Interfaces without a tenant are assumed to connect to fabric and allow all prefixes. TODO make previous sentence untrue and delete it + +#### L2 setup + +For leaf switches providing L2 access we must add a single `bridge` interface. If no VLANs are explicitly set, the bridge will allow any VLAN allowed on at least one of its ports. Otherwise it will only allow the specified VLANs. + + { + "name": "bridge", "type": { "value": "bridge" }, + "mode": { "value": "tagged" }, + "tagged_vlans": [ + { "name": "vlan-foo", "vid": 1234 }, + { "name": "vlan-bar", "vid": 1235 } + ] + } + +For dual-attached devices we form a MLAG between two leaf switches. Each leaf must have the `peer` context key set to the hostname of the other leaf. Create a bond named `peerlink` as one of the `bridge` ports, and assign it the interfaces for inter-switch links. For example [exit-1](https://netbox.fri.uni-lj.si/search/?q=exit-1&obj_types=dcim.device&lookup=iexact) with two links to [exit-2](https://netbox.fri.uni-lj.si/search/?q=exit-2&obj_types=dcim.device&lookup=iexact): + + { + "name": "peerlink", "type": { "value": "lag" }, + "bridge": { "name": "bridge" }, + "mode": { "value": "tagged" } + } + { + "name": "swp29", + "lag": { "name": "peerlink" }, + "connected_endpoints": [ { "device": { "name": "exit-2" }, "name": "swp29" } ] + }, + { + "name": "swp30", + "lag": { "name": "peerlink" }, + "connected_endpoints": [ { "device": { "name": "exit-2" }, "name": "swp30" } ] + }, + +For each dual‐attached L2 device (server or switch) first create a bond on each leaf. Note that, on Cumulus Linux on Mellanox switches, a bond must be created even if a single interface is used on a particular switch. For example, the bond for [access-bdc-1](https://netbox.fri.uni-lj.si/search/?q=access-bdc-1&obj_types=dcim.device&lookup=iexact) on [exit-1](https://netbox.fri.uni-lj.si/search/?q=exit-1&obj_types=dcim.device&lookup=iexact): + + { + "name": "access-bdc-1", "type": { "value": "lag" }, + "bridge": { "name": "bridge" } + } + +Assign the new bond all interfaces connecting to the device (here the bond has the name of the attached L2 switch `access-bdc-1`): + + { + "name": "swp23s0", + "lag": { "name": "access-bdc-1" }, + "connected_endpoints": [ { "device": { "name": "access-bdc-1" }, "name": "ethernet 1/0/49" } ] + } + +If a bond with the same name (except `peerlink`) exists on both peer switches, a [MLAG ID](https://docs.nvidia.com/networking-ethernet-software/cumulus-linux/Layer-2/Multi-Chassis-Link-Aggregation-MLAG/#basic-configuration) is assigned automatically. In this case the (same) [VXLAN anycast IP](https://docs.nvidia.com/networking-ethernet-software/cumulus-linux/Network-Virtualization/VXLAN-Active-Active-Mode/#configure-vxlan-active-active) should be set on each leaf’s loopback interface. + +The bond interface can be set as an access or a tagged port by setting the `mode` attribute. Either `untagged_vlan` or `tagged_vlans` should be set as appropriate in this case. Otherwise the bond will allow all VLANs allowed by `bridge`. + +The device on the other end of the bond should use the active‐active 802.3ad (LACP) mode. + +### Access switches + +Currently all [access switches](https://netbox.fri.uni-lj.si/search/?q=access-%5Bbcr%5Ddc-%28poe-%29%3F%5B0-9%5D%2B&obj_types=dcim.device&lookup=iregex) are D-Link DGS-1510. Connection parameters are set for those device types in a [config context](https://netbox.fri.uni-lj.si/extras/config-contexts/1/) and applied automatically by Ansible. + +The config template supports configuring the port channels and tagging ports, but is otherwise limited to this setup. Further additions should attempt to preserve (fake) idempotency by filtering out unimportant differing lines. + +To set up a bonded interface to exit switches, configure these interfaces: + + { + "name": "port-channel 1", "type": { "value": "lag" }, + "mode": { "value": "tagged" }, + "tagged_vlans": [ + { "name": "vlan-foo", "vid": 1234 }, + { "name": "vlan-bar", "vid": 1235 } + ] + }, + { + "name": "ethernet 1/0/49", "lag": "port-channel 1", + "link_peers": { "device": { "name": "exit-1" } } + } + { + "name": "ethernet 1/0/50", "lag": "port-channel 1", + "link_peers": { "device": { "name": "exit-2" } } + } + +To enable an access interface, tag it with the appropriate VLAN(s), for example: + + { + "name": "ethernet 1/0/10", + "mode": { "value": "access" }, + "untagged_vlan": { "vid": 1234 } + }, + { + "name": "ethernet 1/0/11", + "mode": { "value": "tagged" }, + "tagged_vlans": [{ "vid": 1234 }, { "vid": 1235 }] + } + +Interfaces marked as disabled are shut down. + +### Firewall + +The setup consists of two [firewall nodes](https://netbox.fri.uni-lj.si/search/?q=fw-%5B0-9%5D%2B&obj_types=dcim.device&lookup=iregex) and a [control node](https://netbox.fri.uni-lj.si/search/?q=zid&obj_types=virtualization.virtualmachine&lookup=iexact). + +For the firewall nodes, configure `mgmt0` and `lo` as usual for L3 devices. Additionally, the firewall nodes should define the following interfaces: + + { "name": "lan0" }, + { "name": "lan1" }, + { "name": "mgmt1", "ip_addresses": [{ "address": "fe80::1/64" }] }, + +The MAC address should be defined for each interface, as they are renamed by the OS. The `mgmt1` interface is used for synchronizing connection-tracking information and should use the `fe80::1/64` and `fe80::2/64` addresses for the first and second firewall node, respectively. + +Each firewall node should have a local config context with the keys `master` and `iface_sync` defining the names of the control node and the synchronization interface. diff --git a/UNLICENSE b/UNLICENSE new file mode 100644 index 0000000..68a49da --- /dev/null +++ b/UNLICENSE @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/ansible.cfg b/ansible.cfg new file mode 100644 index 0000000..43aa1a9 --- /dev/null +++ b/ansible.cfg @@ -0,0 +1,5 @@ +[defaults] +remote_user = root +vault_identity = network +ask_vault_pass = true +filter_plugins = filter_plugins diff --git a/filter_plugins/cumulus.py b/filter_plugins/cumulus.py new file mode 100644 index 0000000..7186cd9 --- /dev/null +++ b/filter_plugins/cumulus.py @@ -0,0 +1,56 @@ +#!/usr/bin/python + +import ipaddress +import itertools +import re + +class FilterModule(object): + '''Filters for Cumulus Linux switches''' + def __init__(self): + self.iface_regex = re.compile(r'swp([0-9]+)(?:s([0-9]+))?') + + def filters(self): + return { + 'cl_iface_index': self.cl_iface_index, + 'cl_clag_id': self.cl_clag_id, + 'cl_clag_sys_mac': self.cl_clag_sys_mac + } + + def cl_iface_index(self, interfaces): + ''' + Return interface index from its name, eg. swp3s2 → 17 + + Interfaces may be broken out into up to four subinterfaces. Indexing + starts at 5 and proceeds as: + + swp1 swp1s0 swp1s1 swp1s2 swp1s3 swp2 swp2s0 … + ''' + + for interface in interfaces: + if m := self.iface_regex.fullmatch(interface.get('name', '')): + yield 5 * (int(m.group(1))) + (int(m.group(2))+1 if m.group(2) else 0) + + def cl_clag_id(self, interfaces): + ''' + Generate a clag-id from a list of interfaces making up a MLAG bond + + The clag-id for a bond must be between 1 and 65535. Generate it by + combining the indexes of the lowest-numbered interface on each switch. + This avoids manual ID assignment while keeping them mostly stable. + + This clag-id does not change unless the lowest-numbered interface for + the bond on any switch changes. IDs of other bonds are not affected. + ''' + clag_id = 0 + key = lambda i: i.get('device', {}).get('name') + for device, ifaces in itertools.groupby(sorted(interfaces, key=key), key): + clag_id = 256 * clag_id + min(self.cl_iface_index(ifaces)) + if 1 <= clag_id <= 65535: # sanity checking + return clag_id + + def cl_clag_sys_mac(self, address): + '''Generate a system MAC address for a MLAG with the given address''' + + index = int(ipaddress.ip_address(address)) % 2**16 + suffix = f'{index:04x}' + return f'44:38:39:ff:{suffix[:2]}:{suffix[2:]}' diff --git a/filter_plugins/netbox.py b/filter_plugins/netbox.py new file mode 100644 index 0000000..323fcda --- /dev/null +++ b/filter_plugins/netbox.py @@ -0,0 +1,39 @@ +#!/usr/bin/python + +class FilterModule(object): + '''Various utilities for manipulating NetBox data''' + def __init__(self): + self.virtual_iface_types = ('bridge', 'lag', 'virtual') + + def filters(self): + return { + 'iface_real': self.iface_real, + 'iface_peer': self.iface_peer, + 'iface_vlans': self.iface_vlans + } + + def iface_real(self, interfaces): + '''Return only non-virtual interfaces''' + for iface in interfaces: + if iface.get('type', {}).get('value') not in self.virtual_iface_types: + yield iface + + def iface_peer(self, interfaces): + '''Return the name of the device connected to this interface''' + for iface in interfaces: + endpoint = (iface.get('connected_endpoints') or [{}])[0] + if device := endpoint.get('device', {}).get('name'): + yield device + else: + yield None + + def iface_vlans(self, interfaces): + '''Returns a list of allowed VLANs for each interface''' + for iface in interfaces: + match iface.get('mode', {}).get('value'): + case 'access': + yield [iface.get('untagged_vlan')] if 'untagged_vlan' in iface else [] + case 'tagged': + yield iface.get('tagged_vlans', []) + case _: + yield [] diff --git a/group_vars/all/vars.yml b/group_vars/all/vars.yml new file mode 100644 index 0000000..0b73a21 --- /dev/null +++ b/group_vars/all/vars.yml @@ -0,0 +1 @@ +vlans: "{{ query('netbox.netbox.nb_lookup', 'vlans', api_filter='group=new-net', raw_data=true) | sort(attribute='vid') }}" diff --git a/inventory.sh b/inventory.sh new file mode 100755 index 0000000..f0613e3 --- /dev/null +++ b/inventory.sh @@ -0,0 +1,11 @@ +#!/bin/sh + +# use this wrapper as your inventory if you have ansible-vault secrets in some config context + +# thank you Dameon Wagner you magnificent beast +# https://github.com/netbox-community/ansible_modules/discussions/551 + +# remove this file if/when this gets to your computer +# https://github.com/netbox-community/ansible_modules/pull/1114 + +ANSIBLE_ASK_VAULT_PASS=no ansible-inventory -i inventory.yml "${@}" diff --git a/inventory.yml b/inventory.yml new file mode 100644 index 0000000..7edb45c --- /dev/null +++ b/inventory.yml @@ -0,0 +1,15 @@ +plugin: netbox.netbox.nb_inventory +config_context: true +interfaces: true +flatten_config_context: true +flatten_custom_fields: true +flatten_local_context_data: true +fetch_all: false +plurals: false +device_query_filters: + - has_primary_ip: true +query_filters: + - tenant: 'fri-it' + - role: 'compute-node' + - role: 'firewall' + - role: 'switch' diff --git a/roles/access/handlers/main.yml b/roles/access/handlers/main.yml new file mode 100644 index 0000000..c905952 --- /dev/null +++ b/roles/access/handlers/main.yml @@ -0,0 +1,6 @@ +- name: write config + ansible.netcommon.cli_command: + command: "copy running-config startup-config" + prompt: "Destination filename startup-config\\? \\[y/n\\]: " + answer: "y" + when: "'handler' not in ansible_skip_tags" diff --git a/roles/access/tasks/main.yml b/roles/access/tasks/main.yml new file mode 100644 index 0000000..565958e --- /dev/null +++ b/roles/access/tasks/main.yml @@ -0,0 +1,7 @@ +- name: Set configuration + ansible.netcommon.cli_config: + config: "{{ lookup('template', 'config.j2') }}" + register: result + # These lines are not displayed by 'sho ru' and always reported as different, so ignore them. + changed_when: result.commands | reject('match', '^(no shutdown|no switchport access vlan|switchport mode hybrid|interface .*)$') + notify: write config diff --git a/roles/access/templates/config.j2 b/roles/access/templates/config.j2 new file mode 100644 index 0000000..e828e3a --- /dev/null +++ b/roles/access/templates/config.j2 @@ -0,0 +1,68 @@ +terminal length default 0 +! +line console +! +line telnet +! +line ssh +! +port-channel load-balance src-dst-ip +! +ip ssh server +ssh user admin authentication-method publickey /c:/ansible.pub +! +vlan {{ vlans | map(attribute='vid') | vlan_parser | join(',') }} +! +{% for iface in interfaces | selectattr('enabled') | selectattr('lag') %} +interface {{ iface.name }} + channel-group 1 mode active +! +{% endfor %} +{% set mgmt = namespace(ip=false, gw=false) %} +{% for iface in interfaces | rejectattr('lag') %} +interface {{ iface.name }} +{% if iface.enabled and iface.mode %} +{% if iface.type.value != 'lag' %} + no shutdown +{% endif %} +{% if iface.mgmt_only and iface.ip_addresses %} +{% set mgmt.ip = iface.ip_addresses[0].address %} +{% if iface.custom_fields.gateway %} +{% set mgmt.gw = iface.custom_fields.gateway.address %} +{% endif %} +{% endif %} +{% if iface.mode.value == 'access' %} + switchport mode access +{% if iface.untagged_vlan and iface.untagged_vlan.vid != 1 %} + switchport access vlan {{ iface.untagged_vlan.vid }} +{% else %} + no switchport access vlan +{% endif %} +{% elif iface.mode.value == 'tagged' %} + switchport mode trunk + switchport trunk allowed vlan {{ (iface.tagged_vlans or vlans) | map(attribute='vid') | vlan_parser | join(',') }} +{% endif %} +{% else %} + shutdown +{% endif %} +! +{% endfor %} +{% if mgmt.ip %} +interface Vlan1 + ip address {{ mgmt.ip | ipaddr('address') }} {{ mgmt.ip | ipaddr('netmask') }} +{% endif %} +! +sntp enable +{% for address in ntp %} +sntp server {{ address }} +{% endfor %} +! +ntp access-group default nomodify noquery +! +{% if mgmt.gw %} +ip route 0.0.0.0 0.0.0.0 {{ mgmt.gw | ipaddr('address') }} primary +{% endif %} +! +no ddp +! +end diff --git a/roles/certbot_dns/tasks/main.yml b/roles/certbot_dns/tasks/main.yml new file mode 100644 index 0000000..17b9c55 --- /dev/null +++ b/roles/certbot_dns/tasks/main.yml @@ -0,0 +1,54 @@ +- name: Enable community package repo + lineinfile: + path: /etc/apk/repositories + regexp: '^# *(http.*/v[^/]*/community)' + line: '\1' + backrefs: yes + +- name: Install packages + package: + name: bind-tools,certbot,krb5,py3-pexpect + +- name: Configure kerberos + template: + dest: /etc/krb5.conf + src: krb5.conf.j2 + +- name: Copy DNS updater scripts for certbot + template: + dest: "/usr/local/bin/{{ item }}" + src: "{{ item }}.j2" + mode: 0700 + with_items: + - certbot-auth + - certbot-cleanup + +- name: Init kerberos keytab + expect: + command: ktutil + responses: + ".*:": + - "add_entry -password -p {{ ldap_user }} -k 1 -e aes256-cts-hmac-sha1-96" + - "{{ ldap_pass }}" + - "write_kt /etc/krb5.keytab" + - "exit" + args: + creates: /etc/krb5.keytab + +- name: Create LE account + command: + cmd: certbot register --agree-tos --register-unsafely-without-email + creates: /etc/letsencrypt/accounts/acme-v02.api.letsencrypt.org/directory/*/meta.json + +- name: Create LE certificate + command: + cmd: certbot certonly --quiet --manual --preferred-challenges=dns --manual-auth-hook certbot-auth --manual-cleanup-hook certbot-cleanup -d {{ fqdn }} + creates: "/etc/letsencrypt/renewal/{{ fqdn }}.conf" + +- name: Enable certbot renewal + cron: + name: "certbot renew" + job: "certbot renew --quiet" + user: root + hour: "2,14" + minute: "38" diff --git a/roles/certbot_dns/templates/certbot-auth.j2 b/roles/certbot_dns/templates/certbot-auth.j2 new file mode 100644 index 0000000..8400b5f --- /dev/null +++ b/roles/certbot_dns/templates/certbot-auth.j2 @@ -0,0 +1,13 @@ +#!/bin/sh + +dns={{ dns[0] }} +ldap_user={{ ldap_user }} +ttl=10 + +kinit -k -t /etc/krb5.keytab "${ldap_user}" +nsupdate -g <default in + neighbor fabric route-map default->fabric out + + import vrf outside + import vrf route-map default-import + exit-address-family + + address-family ipv6 unicast + redistribute connected route-map loopback + + neighbor fabric activate + neighbor fabric soft-reconfiguration inbound + neighbor fabric route-map fabric->default in + neighbor fabric route-map default->fabric out + + import vrf outside + import vrf route-map default-import + exit-address-family + + address-family l2vpn evpn + advertise-all-vni + advertise-default-gw + neighbor fabric activate + neighbor peerlink.4094 activate + exit-address-family + + +# Outside VRF. Direct route to the world, everything else goes to the firewall. +router bgp {{ asn.asn }} vrf outside + bgp bestpath as-path multipath-relax + + neighbor peerlink.4 interface remote-as external + neighbor peerlink.4 capability extended-nexthop + neighbor peerlink.4 bfd 3 150 150 + + neighbor firewall peer-group + neighbor firewall remote-as external + neighbor firewall capability extended-nexthop + +{% for iface in ifaces_firewall %} + neighbor {{ iface }}.4 interface peer-group firewall + neighbor {{ iface }}.4 bfd 3 150 150 +{% endfor %} + + address-family ipv4 unicast + neighbor peerlink.4 soft-reconfiguration inbound + neighbor peerlink.4 route-map peer.4->me in + neighbor peerlink.4 route-map me->peer.4 out + + neighbor firewall allowas-in 1 + neighbor firewall default-originate + neighbor firewall soft-reconfiguration inbound + neighbor firewall route-map outside->firewall out +{% for iface in ifaces_firewall %} + neighbor {{ iface }}.4 route-map firewall-{{ loop.index }}->outside in +{% endfor %} + + redistribute static + redistribute connected route-map loopback-outside + import vrf default + import vrf route-map outside-import + exit-address-family + + address-family ipv6 unicast + neighbor peerlink.4 activate + neighbor peerlink.4 allowas-in origin + neighbor peerlink.4 soft-reconfiguration inbound + neighbor peerlink.4 route-map peer.4->me in + neighbor peerlink.4 route-map me->peer.4 out + + neighbor firewall activate + neighbor firewall allowas-in 1 + neighbor firewall default-originate + neighbor firewall soft-reconfiguration inbound + neighbor firewall route-map outside->firewall out +{% for iface in ifaces_firewall %} + neighbor {{ iface }}.4 route-map firewall-{{ loop.index }}->outside in +{% endfor %} + + redistribute static + redistribute connected route-map loopback-outside + import vrf default + import vrf route-map outside-import + exit-address-family + + +# Inside VRF. Default route via firewall. Direct routes to servers and offices. +router bgp {{ asn.asn }} vrf inside + bgp bestpath as-path multipath-relax + + neighbor peerlink.2 interface remote-as external + neighbor peerlink.2 capability extended-nexthop + neighbor peerlink.2 bfd 3 150 150 + + neighbor firewall peer-group + neighbor firewall remote-as external + neighbor firewall capability extended-nexthop + +{% for iface in ifaces_firewall %} + neighbor {{ iface }}.2 interface peer-group firewall + neighbor {{ iface }}.2 bfd 3 150 150 +{% endfor %} + + address-family ipv4 unicast + neighbor peerlink.2 soft-reconfiguration inbound + neighbor peerlink.2 route-map peer.2->me in + neighbor peerlink.2 route-map me->peer.2 out + + neighbor firewall allowas-in 1 + neighbor firewall soft-reconfiguration inbound + neighbor firewall route-map inside->firewall out +{% for iface in ifaces_firewall %} + neighbor {{ iface }}.2 route-map firewall-{{ loop.index }}->inside in +{% endfor %} + + redistribute connected route-map loopback-inside +{% for vlan in my_vlans %} + import vrf {{ vlan.name }} +{% endfor %} + import vrf default + import vrf route-map inside-import + exit-address-family + + address-family ipv6 unicast + neighbor peerlink.2 activate + neighbor peerlink.2 soft-reconfiguration inbound + neighbor peerlink.2 route-map peer.2->me in + neighbor peerlink.2 route-map me->peer.2 out + + neighbor firewall activate + neighbor firewall allowas-in 1 + neighbor firewall soft-reconfiguration inbound + neighbor firewall route-map inside->firewall out +{% for iface in ifaces_firewall %} + neighbor {{ iface }}.2 route-map firewall-{{ loop.index }}->inside in +{% endfor %} + + redistribute connected route-map loopback-inside +{% for vlan in my_vlans %} + import vrf {{ vlan.name }} +{% endfor %} + import vrf default + import vrf route-map inside-import + exit-address-family + + +{% for vlan in my_vlans %} +# VRF for L2 network {{ vlan.name }}. Imports gateway from inside VRF. +router bgp {{ asn.asn }} vrf {{ vlan.name }} + bgp bestpath as-path multipath-relax + + address-family ipv4 unicast + redistribute connected + import vrf inside + import vrf route-map office-import + exit-address-family + + address-family ipv6 unicast + redistribute connected + import vrf inside + import vrf route-map office-import + exit-address-family + +{% endfor %} + +# Prefix lists. +ip prefix-list default permit 0.0.0.0/0 +ipv6 prefix-list default permit ::/0 + +ip prefix-list fabric permit 10.34.0.0/24 ge 32 +ipv6 prefix-list fabric permit 2001:1470:fffd:3400::/64 ge 128 + +{% for vlan in my_vlans %} +{% set prefixes = query('netbox.netbox.nb_lookup', 'prefixes', api_filter='vlan_id='~vlan.id, raw_data=true) %} +{% for prefix in prefixes %} +{% if prefix.family.value == 4 %} +ip prefix-list office permit {{ prefix.prefix }} ge 24 +{% else %} +ipv6 prefix-list office permit {{ prefix.prefix }} ge 64 +{% endif %} +{% endfor %} +{% endfor %} + +ip prefix-list vpn permit {{ wg_net | ipaddr('subnet') }} + +ip prefix-list nat permit {{ wg_ip | ipaddr('host') }} +{% for network in nat %} +ip prefix-list nat permit {{ network }} +{% endfor %} + +{% for prefix in query('netbox.netbox.nb_lookup', 'prefixes', raw_data=true, api_filter='role=bgp') | selectattr('tenant') %} +{% if prefix.family.value == 4 %} +ip prefix-list dc permit {{ prefix.prefix }} ge 32 +{% else %} +ipv6 prefix-list dc permit {{ prefix.prefix }} ge 64 +{% endif %} +{% endfor %} + +# Route maps for redistributing own IPs from various VRFs. +route-map loopback permit 1 + match interface lo +route-map loopback-inside permit 1 + match interface inside +route-map loopback-outside permit 1 + match interface outside + +# Route maps for importing between VRFs. +route-map default-import permit 10 + match ip address prefix-list default +route-map default-import permit 11 + match ipv6 address prefix-list default +route-map default-import permit 21 + match ipv6 address prefix-list office +route-map default-import permit 30 + match ip address prefix-list nat + +route-map outside-import permit 10 + match ip address prefix-list dc +route-map outside-import permit 11 + match ipv6 address prefix-list dc + +route-map office-import permit 10 + match ip address prefix-list default +route-map office-import permit 11 + match ipv6 address prefix-list default + +route-map inside-import permit 20 + match ip address prefix-list office +route-map inside-import permit 21 + match ipv6 address prefix-list office + +# Route maps for advertised and received routes. +# Inside ↔ fabric. +route-map default->fabric permit 10 + match ip address prefix-list default +route-map default->fabric permit 11 + match ipv6 address prefix-list default +route-map default->fabric permit 20 + match ip address prefix-list fabric + +route-map fabric->default permit 10 + match ip address prefix-list fabric +route-map fabric->default permit 20 + match ip address prefix-list dc +route-map fabric->default permit 21 + match ipv6 address prefix-list dc + +# Inside ↔ firewall. +route-map inside->firewall permit 1 + match interface lo +route-map inside->firewall permit 20 + match ip address prefix-list office +route-map inside->firewall permit 21 + match ipv6 address prefix-list office + +route-map firewall->inside permit 1 + match ip address prefix-list fabric +route-map firewall->inside permit 2 + match ipv6 address prefix-list fabric +route-map firewall->inside permit 10 + match ip address prefix-list default +route-map firewall->inside permit 11 + match ipv6 address prefix-list default + +# Outside ↔ firewall. +route-map outside->firewall permit 10 + match ip address prefix-list default +route-map outside->firewall permit 11 + match ipv6 address prefix-list default + +route-map firewall->outside permit 1 + match ip address prefix-list fabric +route-map firewall->outside permit 2 + match ipv6 address prefix-list fabric +route-map firewall->outside permit 21 + match ipv6 address prefix-list office +route-map firewall->outside permit 30 + match ip address prefix-list nat + +# Tag routes from each firewall. Set weight for primary to 200 and secondary to 100. +{% for firewall in ifaces_firewall %} +route-map firewall-{{ loop.index }}->inside permit 1 + set tag {{ loop.index }} + set weight {{ 100 * loop.index }} + call firewall->inside +route-map firewall-{{ loop.index }}->outside permit 1 + set tag {{ loop.index }} + set weight {{ 100 * loop.index }} + call firewall->outside +{% endfor %} + +# Backup routes over peer link are announced to the peer with BGP +# metrics 190 and 90. These values are copied to weights by receiving +# peer, to be used alongside local routes with weights 200 and 100. +# These are the route maps for peerlink in the inside VRF. +{% for firewall in ifaces_firewall %} +{% set metric = 100 * loop.index - 10 %} +route-map me->peer.2 permit {{ loop.index }} + match tag {{ loop.index }} + on-match goto 100 + set metric {{ metric }} +route-map peer.2->me permit {{ loop.index }} + match metric {{ metric }} + on-match goto 100 + set weight {{ metric }} +{% endfor %} + +# Advertised backup routes for paths that go through the firewall +# (default route). +route-map me->peer.2 permit 110 + match ip address prefix-list default +route-map me->peer.2 permit 111 + match ipv6 address prefix-list default + +# Received backup routes (same as above). +route-map peer.2->me permit 110 + match ip address prefix-list default +route-map peer.2->me permit 111 + match ipv6 address prefix-list default + +# These are the route maps for peerlink in the outside VRF. +{% for firewall in ifaces_firewall %} +{% set metric = 100 * loop.index - 10 %} +route-map me->peer.4 permit {{ loop.index }} + match tag {{ loop.index }} + on-match goto 100 + set metric {{ metric }} +route-map peer.4->me permit {{ loop.index }} + match metric {{ metric }} + on-match goto 100 + set weight {{ metric }} +{% endfor %} + +# Backup routes for uplink and paths that go through the firewall +# (default route and NAT/IPv6 addresses for office networks). +route-map me->peer.4 permit 110 + match ip address prefix-list default +route-map me->peer.4 permit 111 + match ipv6 address prefix-list default +route-map me->peer.4 permit 120 + match ip address prefix-list nat +route-map me->peer.4 permit 131 + match ipv6 address prefix-list office + +# Received backup routes (same as above). +route-map peer.4->me permit 110 + match ip address prefix-list default +route-map peer.4->me permit 111 + match ipv6 address prefix-list default +route-map peer.4->me permit 120 + match ip address prefix-list nat +route-map peer.4->me permit 131 + match ipv6 address prefix-list office diff --git a/roles/exit/templates/isc-dhcp-relay.j2 b/roles/exit/templates/isc-dhcp-relay.j2 new file mode 100644 index 0000000..1d86ef4 --- /dev/null +++ b/roles/exit/templates/isc-dhcp-relay.j2 @@ -0,0 +1,16 @@ +{% set bridge = interfaces | selectattr('type') | selectattr('type.value', '==', 'bridge') | first %} +{% set dhcp_networks = query('netbox.netbox.nb_lookup', 'prefixes', api_filter='role=dhcp-pool', raw_data=true) + | selectattr('vlan') | map(attribute='vlan.vid') | sort -%} + +# What servers should the DHCP relay forward requests to? +SERVERS="{{ dhcp }}" + +# On what interfaces should the DHCP relay (dhrelay) serve DHCP requests? +# Always include the interface towards the DHCP server. +# This variable requires a -i for each interface configured above. +# This will be used in the actual dhcrelay command +# For example, "-i eth0 -i eth1" +INTF_CMD="{{ bridge.tagged_vlans | map(attribute='vid') | intersect(dhcp_networks) | sort | map('regex_replace', '^', '-id bridge.') | join(' ') }} -iu {{ iface_uplink }} -iu peerlink.4" + +# Additional options that are passed to the DHCP relay daemon? +OPTIONS="-U outside" diff --git a/roles/exit/templates/keepalived.conf.j2 b/roles/exit/templates/keepalived.conf.j2 new file mode 100644 index 0000000..cd0c9f4 --- /dev/null +++ b/roles/exit/templates/keepalived.conf.j2 @@ -0,0 +1,22 @@ +{% set exits = [inventory_hostname, peer] -%} + +global_defs { + enable_script_security + script_user root +} + +vrrp_instance dhcrelay { + virtual_router_id 50 + virtual_ipaddress { 169.254.1.1/24 } + interface peerlink.4 +{% for exit in exits %} + @{{ exit }} priority {{ loop.index }} + @{{ exit }} unicast_src_ip {{ "169.254.1.0/24" | ipaddr(loop.index + 1) | ipaddr('address') }} +{% endfor %} + unicast_peer { +{% for exit in exits %} + @^{{ exit }} {{ "169.254.1.0/24" | ipaddr(loop.index + 1) | ipaddr('address') }} +{% endfor %} + } + notify /usr/local/bin/keepalive-service +} diff --git a/roles/exit/templates/networks.intf.j2 b/roles/exit/templates/networks.intf.j2 new file mode 100644 index 0000000..c2ade73 --- /dev/null +++ b/roles/exit/templates/networks.intf.j2 @@ -0,0 +1,35 @@ +{# Note that there must be exactly one VLAN-aware bridge. #} +{% set bridge = interfaces | selectattr('type') | selectattr('type.value', '==', 'bridge') | first %} +{% set my_vlans = bridge.tagged_vlans | sort(attribute='vid') -%} + +# VRFs. +{% for vlan in my_vlans %} +auto {{ vlan.name }} +iface {{ vlan.name }} + vrf-table auto + +{% endfor %} + +# Interfaces. +{% for vlan in my_vlans %} +{% set prefixes = query('netbox.netbox.nb_lookup', 'prefixes', api_filter='vlan_id='~vlan.id, raw_data=true) + | map(attribute='prefix') %} +auto {{ bridge.name }}.{{ vlan.vid }} +iface {{ bridge.name }}.{{ vlan.vid }} + vrf {{ vlan.name }} + mtu 9216 +{% if peer is defined %} +{% set my_index = inventory_hostname.split('-')[1]|int %} +{% for prefix in prefixes %} + address {{ prefix | ipaddr(1 + my_index) }} +{% endfor %} +{% if prefixes %} + address-virtual 00:00:5e:00:01:01 {{ prefixes | ipaddr(1) | join(' ') }} +{% endif %} +{% else %} +{% for prefix in prefixes %} + address {{ prefix }} +{% endfor %} +{% endif %} + +{% endfor %} diff --git a/roles/exit/templates/radvd.conf.j2 b/roles/exit/templates/radvd.conf.j2 new file mode 100644 index 0000000..f383924 --- /dev/null +++ b/roles/exit/templates/radvd.conf.j2 @@ -0,0 +1,16 @@ +{# Note that there must be exactly one VLAN-aware bridge. #} +{% set bridge = interfaces | selectattr('type') | selectattr('type.value', '==', 'bridge') | first %} +{% set my_vlans = bridge.tagged_vlans | sort(attribute='vid') -%} + +# Send IPv6 RAs from virtual router IP for each network. Also set DNS options. +# Both exits announce the same gateway, so don’t revoke it if we go down. +{% for vlan in my_vlans %} +interface bridge-{{ vlan.vid }}-v0 { + AdvSendAdvert on; + RemoveAdvOnExit off; + prefix ::/64; + RDNSS {{ dns6 | join(' ') }} { }; + DNSSL {{ domain }} { }; +}; + +{% endfor %} diff --git a/roles/fabric/handlers/main.yml b/roles/fabric/handlers/main.yml new file mode 100644 index 0000000..05892b5 --- /dev/null +++ b/roles/fabric/handlers/main.yml @@ -0,0 +1,16 @@ +- name: reload interfaces + command: + cmd: ifreload -a + when: "'handler' not in ansible_skip_tags" + +- name: reload switchd + service: + name: switchd + state: reloaded + when: "'handler' not in ansible_skip_tags" + +- name: restart frr + service: + name: frr + state: restarted + when: "'handler' not in ansible_skip_tags" diff --git a/roles/fabric/tasks/main.yml b/roles/fabric/tasks/main.yml new file mode 100644 index 0000000..d71f65c --- /dev/null +++ b/roles/fabric/tasks/main.yml @@ -0,0 +1,97 @@ +- name: Set hostname + hostname: + name: "{{ inventory_hostname }}" + +- name: Set hostname in /etc/hosts + lineinfile: + path: /etc/hosts + regexp: '^127.0.1.1\s+' + line: "127.0.1.1 {{ inventory_hostname }}" + +- name: Disable unneeded services + service: + name: '{{ item }}' + enabled: false + state: stopped + loop: + - nvued + - netqd@mgmt + +- name: Set up ports + template: + dest: /etc/cumulus/ports.conf + src: ports.conf.j2 + notify: reload switchd + +- name: Set up management VRF and interface + template: + dest: /etc/network/interfaces + src: interfaces.j2 + mode: 0644 + notify: reload interfaces + +- name: Set up loopback and switch interfaces + template: + dest: "/etc/network/interfaces.d/{{ item }}.intf" + src: "{{ item }}.intf.j2" + mode: 0644 + notify: reload interfaces + loop: + - loopback + - switch + +- name: Set up peerlink + template: + dest: "/etc/network/interfaces.d/bond.intf" + src: "bond.intf.j2" + mode: 0644 + notify: reload interfaces + when: "peer is defined" + +- name: Set up bridge + template: + dest: "/etc/network/interfaces.d/bridge.intf" + src: "bridge.intf.j2" + mode: 0644 + notify: reload interfaces + when: "'bridge' in interfaces | map(attribute='name')" + +- name: Set up bonds + template: + dest: "/etc/network/interfaces.d/bond.intf" + src: "bond.intf.j2" + mode: 0644 + notify: reload interfaces + when: "'lag' in interfaces | map(attribute='type.value')" + +- name: Set nameservers for mgmt VRF + template: + dest: /etc/resolv.conf + src: resolv.conf.j2 + mode: 0644 + notify: reload interfaces + +- name: Disable SSH in default VRF + service: + name: ssh + enabled: no + state: stopped + +- name: Listen for SSH only in mgmt VRF + service: + name: ssh@mgmt + enabled: yes + state: started + +- name: Enable BGP for FRR + lineinfile: + path: /etc/frr/daemons + regexp: '^bgpd=' + line: 'bgpd=yes' + notify: restart frr + +- name: Enable FRR + service: + name: frr + enabled: yes + state: started diff --git a/roles/fabric/templates/bond.intf.j2 b/roles/fabric/templates/bond.intf.j2 new file mode 100644 index 0000000..0c87d0b --- /dev/null +++ b/roles/fabric/templates/bond.intf.j2 @@ -0,0 +1,23 @@ +{% for bond in interfaces | map(attribute='lag') | reject('none') | sort(attribute='name') | unique %} +{% set iface = interfaces | selectattr('id', '==', bond.id) | first %} +{% set members = interfaces | selectattr('lag') | selectattr('lag.name', '==', bond.name) -%} + +auto {{ bond.name }} +iface {{ bond.name }} + bond-slaves {{ members | map(attribute='name') | join(' ') }} +{% if iface.mode.value == 'access' and iface.untagged_vlan %} + bridge-access {{ iface.untagged_vlan.vid }} +{% elif iface.mode.value == 'tagged' and iface.tagged_vlans %} + bridge-vids {{ iface.tagged_vlans | map(attribute='vid') | join(' ') }} +{% endif %} + +{#- If the peer shares a bond with the same name, generate a clag-id for it unless the bonded link is to peer itself. #} +{% if peer %} +{% set peer_members = hostvars[peer].interfaces + | selectattr('lag') | selectattr('lag.name', '==', bond.name) %} +{% if peer_members | iface_peer | reject('eq', inventory_hostname) %} + clag-id {{ (members + peer_members) | cl_clag_id }} +{% endif %} +{% endif %} + +{% endfor %} diff --git a/roles/fabric/templates/bridge.intf.j2 b/roles/fabric/templates/bridge.intf.j2 new file mode 100644 index 0000000..8200efb --- /dev/null +++ b/roles/fabric/templates/bridge.intf.j2 @@ -0,0 +1,23 @@ +{# Note that there must be exactly one VLAN-aware bridge. #} +{% set bridge = interfaces | selectattr('type') | selectattr('type.value', '==', 'bridge') | first %} +{# interfaces (always bonds on Mellanox) that belong to this bridge #} +{% set ports = interfaces | selectattr('bridge') | selectattr('bridge.name', '==', bridge.name) %} +{# allowed VLANs can be specified on the bridge, any of its ports, or #} +{% set my_vlans = bridge.tagged_vlans or (ports | iface_vlans | flatten | sort | unique) or vlans %} +{% set my_vlan_ids = my_vlans | map(attribute='vid') | sort -%} + +auto {{ bridge.name }} +iface {{ bridge.name }} + bridge-ports {{ ports | map(attribute='name') | join(' ') }}{% if my_vlans %} vxlan{% endif +%} + bridge-vlan-aware yes + bridge-pvid 1 +{% if bridge.mode.value == 'tagged' and my_vlans %} + bridge-vids {{ my_vlan_ids | join(' ') }} +{% endif %} + +{% if my_vlans %} +auto vxlan +iface vxlan + bridge-vlan-vni-map {{ my_vlan_ids | zip(my_vlan_ids) | map('join', '=') | join(' ') }} + bridge-learning off +{% endif %} diff --git a/roles/fabric/templates/interfaces.j2 b/roles/fabric/templates/interfaces.j2 new file mode 100644 index 0000000..300853c --- /dev/null +++ b/roles/fabric/templates/interfaces.j2 @@ -0,0 +1,21 @@ +source /etc/network/interfaces.d/*.intf + +# Management VRF and interface. +auto mgmt +iface mgmt + address 127.0.0.1/8 + address ::1/128 + vrf-table auto + +{% for iface in interfaces | selectattr('mgmt_only') | selectattr('enabled') %} +auto {{ iface.name }} +iface {{ iface.name }} + vrf mgmt +{% for ip in iface.ip_addresses | rejectattr('address', 'match', '^fe80::.*/64$' ) %} + address {{ ip.address }} +{% endfor %} +{% if iface.custom_fields.gateway %} + gateway {{ iface.custom_fields.gateway.address | ipaddr('address') }} +{% endif %} + +{% endfor %} diff --git a/roles/fabric/templates/loopback.intf.j2 b/roles/fabric/templates/loopback.intf.j2 new file mode 100644 index 0000000..926b4d0 --- /dev/null +++ b/roles/fabric/templates/loopback.intf.j2 @@ -0,0 +1,19 @@ +{% set addrs = interfaces | selectattr('name', '==', 'lo') | + map(attribute='ip_addresses') | first | selectattr('role') %} +{% set loopback = addrs | selectattr('role.value', '==', 'loopback') | + map(attribute='address') %} +{% set anycast = addrs | selectattr('role.value', '==', 'anycast') | + map(attribute='address') %} +auto lo +iface lo inet loopback +{% for address in loopback %} + address {{ address }} +{% endfor %} +{% if peer is defined %} +{% if loopback | ipv4 %} + vxlan-local-tunnelip {{ loopback | ipv4 | first | ipaddr('address') }} +{% endif %} +{% if anycast | ipv4 %} + clagd-vxlan-anycast-ip {{ anycast | first | ipaddr('address') }} +{% endif %} +{% endif %} diff --git a/roles/fabric/templates/peerlink.intf.j2 b/roles/fabric/templates/peerlink.intf.j2 new file mode 100644 index 0000000..f5a74d9 --- /dev/null +++ b/roles/fabric/templates/peerlink.intf.j2 @@ -0,0 +1,17 @@ +{% set peer_ip = hostvars[peer].interfaces + | selectattr('name', '==', 'lo') + | map(attribute='ip_addresses') | first + | selectattr('role') | selectattr('role.value', '==', 'loopback') + | map(attribute='address') | ipv4 | first | ipaddr('address') %} +{% set anycast_ip = interfaces + | selectattr('name', '==', 'lo') + | map(attribute='ip_addresses') | first + | selectattr('role') | selectattr('role.value', '==', 'anycast') + | map(attribute='address') | ipv4 | first | ipaddr('address') -%} + +# Peer link to the other switch. +auto peerlink.4094 +iface peerlink.4094 + clagd-peer-ip linklocal + clagd-backup-ip {{ peer_ip }} + clagd-sys-mac {{ anycast_ip | cl_clag_sys_mac }} diff --git a/roles/fabric/templates/ports.conf.j2 b/roles/fabric/templates/ports.conf.j2 new file mode 100644 index 0000000..907b97b --- /dev/null +++ b/roles/fabric/templates/ports.conf.j2 @@ -0,0 +1,11 @@ +# https://docs.nvidia.com/networking-ethernet-software/cumulus-linux/Layer-1-and-Switch-Ports/Interface-Configuration-and-Management/Switch-Port-Attributes/#breakout-ports +{% for interface in interfaces | selectattr('name', 'match', '^swp[0-9]+$') %} +{{ interface.name|regex_replace('^swp', '') }}= +{%- if interfaces|selectattr('name', 'match', '^'+interface.name+'s[0-9]+$') %} +4x +{% elif not interface.enabled %} +disabled +{% else %} +1x +{% endif %} +{% endfor %} diff --git a/roles/fabric/templates/resolv.conf.j2 b/roles/fabric/templates/resolv.conf.j2 new file mode 100644 index 0000000..f03eb30 --- /dev/null +++ b/roles/fabric/templates/resolv.conf.j2 @@ -0,0 +1,3 @@ +{% for server in dns %} +nameserver {{ server }} # vrf mgmt +{% endfor %} \ No newline at end of file diff --git a/roles/fabric/templates/switch.intf.j2 b/roles/fabric/templates/switch.intf.j2 new file mode 100644 index 0000000..29b6c54 --- /dev/null +++ b/roles/fabric/templates/switch.intf.j2 @@ -0,0 +1,12 @@ +{% for iface in interfaces | iface_real | rejectattr('mgmt_only') | selectattr('enabled') %} +auto {{ iface.name }} +iface {{ iface.name }} +{% if iface.vrf %} + vrf {{ iface.vrf.name }} +{% endif %} + mtu {{ iface.mtu if iface.mtu else 9216 }} +{% for addr in iface.ip_addresses %} + address {{ addr.address }} +{% endfor %} + +{% endfor %} diff --git a/roles/firewall/files/conntrackd.conf b/roles/firewall/files/conntrackd.conf new file mode 100644 index 0000000..0fa08d9 --- /dev/null +++ b/roles/firewall/files/conntrackd.conf @@ -0,0 +1,2 @@ +# The init script for conntrackd wants this, not sure about conntrackd itself. +net.netfilter.nf_conntrack_tcp_be_liberal = 1 diff --git a/roles/firewall/files/sshd_config.friwall b/roles/firewall/files/sshd_config.friwall new file mode 100644 index 0000000..6cdd411 --- /dev/null +++ b/roles/firewall/files/sshd_config.friwall @@ -0,0 +1,15 @@ +# This is used by sshd in default VRF to receive configuration updates. Lock +# down to only allow executing the update script. + +# Only allow pubkey auth. +KbdInteractiveAuthentication no +PasswordAuthentication no +PermitRootLogin prohibit-password + +# Disable what we can. +AllowTcpForwarding no +GatewayPorts no +X11Forwarding no + +# And then disable everything else. +ForceCommand /usr/local/bin/update diff --git a/roles/firewall/files/update b/roles/firewall/files/update new file mode 100644 index 0000000..b08d49f --- /dev/null +++ b/roles/firewall/files/update @@ -0,0 +1,34 @@ +#!/bin/sh + +apply() { + cp -R /opt/config/etc/nftables.d /etc || return 1 + nft -I /etc/nftables.d -f /etc/nftables.nft || return 2 + cp -R /opt/config/etc/wireguard /etc || return 3 + wg syncconf wg /etc/wireguard/wg.conf || return 4 +} + +cleanup() { + rm -fr /opt/config +} + +# clean now and on exit +cleanup +trap cleanup EXIT + +mkdir -p /opt/config +tar xz -C /opt/config --warning=no-timestamp + +current="$(cat /opt/version 2>/dev/null || echo -1)" +next="$(cat /opt/config/version 2>/dev/null || echo -1)" +echo "Updating config from v${current} to v${next}" +if [ "${next:-0}" -ne "${current:-0}" ] ; then + echo "Applying config v${next}" + if apply ; then + echo "${next}" > /opt/version + echo "Applied config v${next}" + else + error="$?" + echo "Could not apply config v${next}, error ${error}" + exit "${error}" + fi +fi diff --git a/roles/firewall/handlers/main.yml b/roles/firewall/handlers/main.yml new file mode 100644 index 0000000..46bf0d7 --- /dev/null +++ b/roles/firewall/handlers/main.yml @@ -0,0 +1,41 @@ +- name: enable interfaces + command: ifup --auto + when: "'handler' not in ansible_skip_tags" + +- name: mkinitfs + command: mkinitfs + when: "'handler' not in ansible_skip_tags" + +- name: reload frr + command: /usr/lib/frr/frr-reload.py --reload /etc/frr/frr.conf + when: "'handler' not in ansible_skip_tags" + +- name: reload nftables + service: + name: nftables + state: reloaded + when: "'handler' not in ansible_skip_tags" + +- name: restart conntrackd + service: + name: conntrackd + state: restarted + when: "'handler' not in ansible_skip_tags" + +- name: restart frr + service: + name: frr + state: restarted + when: "'handler' not in ansible_skip_tags" + +- name: reload sshd.friwall + service: + name: sshd.friwall + state: reloaded + when: "'handler' not in ansible_skip_tags" + +- name: restart sshd.friwall + service: + name: sshd.friwall + state: restarted + when: "'handler' not in ansible_skip_tags" diff --git a/roles/firewall/tasks/config.yml b/roles/firewall/tasks/config.yml new file mode 100644 index 0000000..89c4204 --- /dev/null +++ b/roles/firewall/tasks/config.yml @@ -0,0 +1,59 @@ +- name: Install packages for config updates + package: + name: tar + +- name: Limit SSH for config updates + copy: + dest: /etc/ssh/ + src: sshd_config.friwall + notify: reload sshd.friwall + +- name: Create SSH service for config updates + file: + path: /etc/init.d/sshd.friwall + src: /etc/init.d/sshd + state: link + +- name: Configure SSH service for config updates + copy: + dest: /etc/conf.d/sshd.friwall + content: | + cfgfile="/etc/ssh/sshd_config.friwall" + vrf="default" + notify: restart sshd.friwall + +- name: Enable SSH service for config updates + service: + name: sshd.friwall + enabled: yes + state: started + +- name: Install config updater + copy: + dest: /usr/local/bin/ + src: update + mode: 0700 + +- name: Get master SSH key + delegate_to: '{{ master }}' + command: "cat ~friwall/.ssh/id_ed25519.pub" + register: master_key + changed_when: false + +- name: Deploy master key on node + authorized_key: "user=root key={{ master_key.stdout }}" + +- name: Get my host SSH key + command: cat /etc/ssh/ssh_host_ed25519_key.pub + register: node_key + changed_when: false + +- name: Introduce myself to master + delegate_to: '{{ master }}' + become: yes + become_user: friwall + become_method: su + become_flags: "-s /bin/sh" # no login shell for user + known_hosts: + name: "{{ inventory_hostname }}" + key: "{{ inventory_hostname }},{{ interfaces | selectattr('name', '==', 'lo') | map(attribute='ip_addresses') | first | selectattr('role') | selectattr('role.value', '==', 'loopback') | map(attribute='address') | ipv4 | first | ipaddr('address') }} {{ node_key.stdout }}" # TODO make IP retrieval less terrifying diff --git a/roles/firewall/tasks/conntrackd.yml b/roles/firewall/tasks/conntrackd.yml new file mode 100644 index 0000000..4412574 --- /dev/null +++ b/roles/firewall/tasks/conntrackd.yml @@ -0,0 +1,36 @@ +- name: Install conntrack-tools + package: + name: conntrack-tools + +# Ensure the module is loaded before setting sysctl values. +- name: Autoload nf_conntrack + lineinfile: + dest: /etc/modules-load.d/netfilter.conf + line: nf_conntrack + create: yes + +# Set required sysctl values. +- name: Set sysctl values for conntrackd + copy: + dest: /etc/sysctl.d/ + src: conntrackd.conf + +- name: Set up conntrackd + template: + dest: /etc/conntrackd/conntrackd.conf + src: conntrackd.conf.j2 + mode: 0644 + notify: restart conntrackd + +- name: Run conntrackd in default VRF + lineinfile: + dest: /etc/conf.d/conntrackd + line: 'vrf="default"' + regexp: '^vrf=' + notify: restart conntrackd + +- name: Enable conntrackd + service: + name: conntrackd + enabled: yes + state: started diff --git a/roles/firewall/tasks/frr.yml b/roles/firewall/tasks/frr.yml new file mode 100644 index 0000000..d7450ec --- /dev/null +++ b/roles/firewall/tasks/frr.yml @@ -0,0 +1,48 @@ +- name: Enable sysctl service + service: + name: sysctl + enabled: yes + runlevel: boot + state: started + +- name: Enable community package repo + lineinfile: + path: /etc/apk/repositories + regexp: '^# *(http.*/v[^/]*/community)' + line: '\1' + backrefs: yes + +- name: Install FRR + package: + name: frr,frr-pythontools + state: latest + +- name: Set datacenter defaults + lineinfile: + path: /etc/frr/daemons + regexp: '^frr_profile=' + line: 'frr_profile="datacenter"' + notify: restart frr + +- name: Enable BGP and BFD + lineinfile: + path: /etc/frr/daemons + regexp: "^{{ item }}=" + line: "{{ item }}=yes" + loop: + - bfdd + - bgpd + notify: restart frr + +- name: Enable FRR service + service: + name: frr + enabled: yes + state: started + +- name: Copy FRR config + template: + dest: /etc/frr/frr.conf + src: frr.conf.j2 + mode: 0644 + notify: reload frr diff --git a/roles/firewall/tasks/main.yml b/roles/firewall/tasks/main.yml new file mode 100644 index 0000000..6bf28c2 --- /dev/null +++ b/roles/firewall/tasks/main.yml @@ -0,0 +1,64 @@ +- name: Update package cache + package: + update_cache: yes + +- name: Install packages + package: + name: bash,bonding,iproute2 + state: latest + +- name: Tell mdev to rename network interfaces + lineinfile: + path: /etc/mdev.conf + line: '-net/.* root:root 600 @/sbin/nameif -s' + insertafter: '^# net devices' + notify: mkinitfs + +- name: Configure interface names + template: + dest: /etc/mactab + src: mactab.j2 + mode: 0644 + +- name: Create /etc/network/interfaces.d + file: + path: /etc/network/interfaces.d + state: directory + mode: 0755 + +- name: Set up interfaces + template: + dest: /etc/network/interfaces + src: interfaces.j2 + mode: 0644 + notify: enable interfaces + +- name: Set up management interfaces + import_tasks: mgmt.yml + +- name: Set up data interfaces + template: + dest: /etc/network/interfaces.d/fabric.intf + src: fabric.intf.j2 + mode: 0644 + notify: enable interfaces + +- name: Set up sysctls + template: + dest: /etc/sysctl.d/firewall.conf + src: sysctl.conf.j2 + +- name: Set up FRR + import_tasks: frr.yml + +- name: Set up wireguard + import_tasks: wireguard.yml + +- name: Set up nftables + import_tasks: nftables.yml + +- name: Set up conntrackd + import_tasks: conntrackd.yml + +- name: Set up configuration channel + import_tasks: config.yml diff --git a/roles/firewall/tasks/mgmt.yml b/roles/firewall/tasks/mgmt.yml new file mode 100644 index 0000000..bddee5e --- /dev/null +++ b/roles/firewall/tasks/mgmt.yml @@ -0,0 +1,25 @@ +- name: Set up management interfaces + template: + dest: /etc/network/interfaces.d/mgmt.intf + src: mgmt.intf.j2 + mode: 0644 + register: task_mgmt_interface + +- name: Run SSH in management VRF + lineinfile: + path: /etc/conf.d/sshd + line: "vrf=\"mgmt\"" + register: task_ssh_vrf + +- name: Reboot for new VRF + reboot: + when: task_mgmt_interface.changed or task_ssh_vrf.changed + register: task_reboot + +- name: Reset the connection + meta: reset_connection + +- name: Wait for the network device to reload + wait_for_connection: + delay: 10 + when: task_reboot.changed diff --git a/roles/firewall/tasks/nftables.yml b/roles/firewall/tasks/nftables.yml new file mode 100644 index 0000000..c39ce32 --- /dev/null +++ b/roles/firewall/tasks/nftables.yml @@ -0,0 +1,25 @@ +- name: Install nftables + package: + name: nftables + +- name: Copy nftables config + template: + dest: /etc/nftables.nft + src: nftables.nft.j2 + mode: 0644 + notify: reload nftables + +- name: Copy static nftables includes + template: + dest: '/etc/nftables.d/{{ item }}' + src: '{{ item }}.j2' + mode: 0644 + loop: + - interfaces.nft + notify: reload nftables + +- name: Enable nftables service + service: + name: nftables + enabled: yes + state: started diff --git a/roles/firewall/tasks/wireguard.yml b/roles/firewall/tasks/wireguard.yml new file mode 100644 index 0000000..d49a380 --- /dev/null +++ b/roles/firewall/tasks/wireguard.yml @@ -0,0 +1,26 @@ +# All firewall nodes share one external IP for wireguard connections. +# Private key and peer configuration is the same for all nodes. Peers +# connected to each node are installed in the routing table and +# distributed into fabric. + +- name: Install wireguard tools + package: + name: wireguard-tools + +- name: Create wireguard directory + file: + path: /etc/wireguard + state: directory + +- name: Touch wireguard config + file: + path: /etc/wireguard/wg.conf + state: touch + access_time: preserve + modification_time: preserve + +- name: Add wireguard interface + template: + dest: /etc/network/interfaces.d/wg.intf + src: wg.intf.j2 + notify: enable interfaces diff --git a/roles/firewall/templates/conntrackd.conf.j2 b/roles/firewall/templates/conntrackd.conf.j2 new file mode 100644 index 0000000..578f00d --- /dev/null +++ b/roles/firewall/templates/conntrackd.conf.j2 @@ -0,0 +1,50 @@ +{% set fw = inventory_hostname.split('-')[1]|int -%} + +Sync { + Mode FTFW { + # Add received rules immediately so we don’t need a + # signal on failover. + DisableExternalCache On + } + + UDP { + Interface {{ iface_sync }} + IPv6_address fe80::{{ fw }} + IPv6_Destination_Address fe80::{{ 2 if fw == 1 else 1 }} + Port 3780 + + # Recommended by manual. + Checksum on + RcvSocketBuffer 1249280 + SndSocketBuffer 1249280 + } + + #Options { + # TCPWindowTracking Off + #} +} + +General { + UNIX { + Path /var/run/conntrackd.ctl + } + Syslog on + + # Recommended by manual. + HashLimit 524288 + NetlinkBufferSize 2097152 + NetlinkBufferSizeMaxGrowth 8388608 + + Filter From Kernelspace { + # Don’t replicate rules for traffic from/to firewall. + Address Ignore { + IPv4_address 127.0.0.1/8 + IPv6_address ::1/128 + IPv6_address fe80::/64 # link-local addresses + IPv4_address {{ wg_ip }} +{% for address in interfaces | map(attribute='ip_addresses') | flatten | sort(attribute='address') %} + IPv{{ address.family.value }}_address {{ address.address }} +{% endfor %} + } + } +} diff --git a/roles/firewall/templates/fabric.intf.j2 b/roles/firewall/templates/fabric.intf.j2 new file mode 100644 index 0000000..d0392d5 --- /dev/null +++ b/roles/firewall/templates/fabric.intf.j2 @@ -0,0 +1,12 @@ +{% for iface in interfaces | selectattr('name', 'match', '^lan') | map(attribute='name') %} +auto {{ iface }} +iface {{ iface }} + mtu 9216 + +auto {{ iface }}.2 +iface {{ iface }}.2 + +auto {{ iface }}.4 +iface {{ iface }}.4 + +{% endfor %} diff --git a/roles/firewall/templates/frr.conf.j2 b/roles/firewall/templates/frr.conf.j2 new file mode 100644 index 0000000..eccf513 --- /dev/null +++ b/roles/firewall/templates/frr.conf.j2 @@ -0,0 +1,141 @@ +{% set addrs = interfaces | selectattr('name', '==', 'lo') | + map(attribute='ip_addresses') | first | selectattr('role') %} +{% set loopback = addrs | selectattr('role.value', '==', 'loopback') | map(attribute='address') -%} + +frr defaults datacenter +service integrated-vtysh-config +log syslog + +# Without this frr and kernel ECMP routes sometimes get desynced when a link is +# lost and found. Maybe related to https://github.com/FRRouting/frr/issues/12239. +zebra nexthop-group keep 1 + +router-id {{ loopback | ipv4 | first | ipaddr('address') }} + +# Don’t announce anything at start until we get routes from all our peers. +# Without this packets might get dropped until all routes are synced. +bgp update-delay 10 + +bfd + profile fast + receive-interval 150 + transmit-interval 150 + +# Default VRF has two connections to each exit, one for inside and one +# for outside networks. The efault route is received from the outside +# peers and distributed back to inside peers. Routes to office +# networks and NAT IPs are distributed to outside peers. +router bgp {{ asn.asn }} + # Allow multipathing through different ASs with equal path length. + bgp bestpath as-path multipath-relax + # NAT IPs are not on any interface so disable checking for it. + no bgp network import-check + +{% for group in ['inside', 'outside'] %} + neighbor {{ group }} peer-group + neighbor {{ group }} remote-as external + neighbor {{ group }} capability extended-nexthop +{% endfor %} + +{% for iface in interfaces | selectattr('name', 'match', '^lan') %} + neighbor {{ iface.name }}.2 interface peer-group inside + neighbor {{ iface.name }}.2 bfd profile fast + neighbor {{ iface.name }}.4 interface peer-group outside + neighbor {{ iface.name }}.4 bfd profile fast +{% endfor %} + + address-family ipv4 unicast +{% for network in nat %} + network {{ network }} +{% endfor %} + + redistribute connected route-map loopback + maximum-paths 16 + + neighbor outside soft-reconfiguration inbound + neighbor outside route-map outside->default in + neighbor outside route-map default->outside out + + neighbor inside allowas-in origin + neighbor inside default-originate + neighbor inside soft-reconfiguration inbound + neighbor inside route-map inside->default in + neighbor inside route-map default->inside out + exit-address-family + + address-family ipv6 unicast + redistribute connected route-map loopback + maximum-paths 16 + + neighbor outside activate + neighbor outside soft-reconfiguration inbound + neighbor outside route-map outside->default in + neighbor outside route-map default->outside out + + neighbor inside activate + neighbor inside allowas-in origin + neighbor inside default-originate + neighbor inside soft-reconfiguration inbound + neighbor inside route-map inside->default in + neighbor inside route-map default->inside out + exit-address-family + +# Prefix lists. +ip prefix-list default permit 0.0.0.0/0 +ipv6 prefix-list default permit ::/0 + +ip prefix-list fabric permit 10.34.0.0/24 ge 32 + +{% for vlan in vlans %} +{% for prefix in query('netbox.netbox.nb_lookup', 'prefixes', api_filter='vlan_id='~vlan.id, raw_data=true) %} +{% if prefix.family.value == 4 %} +ip prefix-list office permit {{ prefix.prefix }} ge 24 +{% else %} +ipv6 prefix-list office permit {{ prefix.prefix }} ge 64 +{% endif %} +{% endfor %} +{% endfor %} + +ip prefix-list vpn permit {{ wg_net | ipaddr('subnet') }} + +{% for network in nat %} +ip prefix-list nat permit {{ network }} +{% endfor %} +{# TODO WG endpoint should probably be in a separate prefix-list. #} +ip prefix-list nat permit {{ wg_ip }} + +route-map loopback permit 1 + match interface lo + +# Get routes to offices and VPN users on other firewalls from inside peers. +route-map inside->default permit 10 + match ip address prefix-list fabric +route-map inside->default permit 20 + match ip address prefix-list office +route-map inside->default permit 21 + match ipv6 address prefix-list office + +# Send default route and VPN network to inside peers. +route-map default->inside permit 1 + match interface lo +route-map default->inside permit 20 + match ip address prefix-list default +route-map default->inside permit 21 + match ipv6 address prefix-list default +route-map default->inside permit 30 + match ip address prefix-list vpn + +# Get default route from outside peers. +route-map outside->default permit 10 + match ip address prefix-list default +route-map outside->default permit 11 + match ipv6 address prefix-list default + +# Send IPv6 office addresses and IPv4 NAT addresses to outside peers +# so inbound packets go through the firewall. +route-map default->outside permit 1 + match interface lo +route-map default->outside permit 11 + match ipv6 address prefix-list office +route-map default->outside permit 20 + match ip address prefix-list nat diff --git a/roles/firewall/templates/interfaces.j2 b/roles/firewall/templates/interfaces.j2 new file mode 100644 index 0000000..fae0f3d --- /dev/null +++ b/roles/firewall/templates/interfaces.j2 @@ -0,0 +1,10 @@ +{% set addrs = interfaces | selectattr('name', '==', 'lo') | map(attribute='ip_addresses') | first -%} + +source-directory /etc/network/interfaces.d + +auto lo +iface lo inet loopback + address {{ wg_ip }} +{% for address in addrs %} + address {{ address.address }} +{% endfor %} diff --git a/roles/firewall/templates/interfaces.nft.j2 b/roles/firewall/templates/interfaces.nft.j2 new file mode 100644 index 0000000..a8b7fda --- /dev/null +++ b/roles/firewall/templates/interfaces.nft.j2 @@ -0,0 +1,10 @@ +{% set ifaces_fabric = interfaces | selectattr('name', 'match', '^lan') | map(attribute='name') %} +set inside { + type iface_index + elements = { {{ ifaces_fabric | product(['2']) | map('join', '.') | join(', ') }}, wg } +} + +set outside { + type iface_index + elements = { {{ ifaces_fabric | product(['4']) | map('join', '.') | join(', ') }} } +} diff --git a/roles/firewall/templates/mactab.j2 b/roles/firewall/templates/mactab.j2 new file mode 100644 index 0000000..ae0bda9 --- /dev/null +++ b/roles/firewall/templates/mactab.j2 @@ -0,0 +1,3 @@ +{% for iface in interfaces | iface_real %} +{{ iface.name }} {{ iface.mac_address | lower }} +{% endfor %} \ No newline at end of file diff --git a/roles/firewall/templates/mgmt.intf.j2 b/roles/firewall/templates/mgmt.intf.j2 new file mode 100644 index 0000000..04f99b4 --- /dev/null +++ b/roles/firewall/templates/mgmt.intf.j2 @@ -0,0 +1,24 @@ +auto mgmt +iface mgmt + pre-up ip link add $IFACE type vrf table 100 + up ip link set dev $IFACE up + post-down ip link del $IFACE + +{% for iface in interfaces | selectattr('name', 'match', '^mgmt') %} +auto {{ iface.name }} +iface {{ iface.name }} +{% if iface.vrf %} + requires {{ iface.vrf.name }} + pre-up ip link set $IFACE master {{ iface.vrf.name }} +{% endif %} +{% if iface.mtu %} + mtu {{ iface.mtu }} +{% endif %} +{% for addr in iface.ip_addresses %} + address {{ addr.address }} +{% endfor %} +{% if iface.custom_fields.gateway %} + up ip route add default via {{ iface.custom_fields.gateway.address | ipaddr('address') }}{% if iface.vrf %} vrf {{ iface.vrf.name }}{% endif %} +{% endif +%} + +{% endfor %} diff --git a/roles/firewall/templates/nftables.nft.j2 b/roles/firewall/templates/nftables.nft.j2 new file mode 100644 index 0000000..e380646 --- /dev/null +++ b/roles/firewall/templates/nftables.nft.j2 @@ -0,0 +1,117 @@ +#!/usr/sbin/nft -f +{% set ifaces_fabric = interfaces | selectattr('name', 'match', '^lan') | map(attribute='name') %} + +flush ruleset + +table inet filter { + include "/etc/nftables.d/interfaces.nft" + include "/etc/nftables.d/sets.nft*" + + set link { + type iface_index + elements = { {{ ifaces_fabric | product(['2', '4']) | map('join', '.') | join(', ') }} } + } + + chain input { + type filter hook input priority 0; policy drop + + ct state vmap { established : accept, related : accept, invalid : drop } \ + comment "Accept established streams and drop invalid connections" + + iif lo accept \ + comment "Accept any localhost traffic" + + iif mgmt tcp dport ssh accept \ + comment "Accept SSH from management VRF" + + tcp dport ssh ip saddr {{ hostvars[master]['ansible_host'] }} accept \ + comment "Accept SSH from firewall master" + + iif @link tcp dport bgp ip6 saddr fe80::/10 accept \ + comment "Accept link-local BGP on fabric links" + + iif @link udp dport 3784 ip6 saddr fe80::/10 accept \ + comment "Accept link-local BFD on fabric links" + + iif @outside udp dport 51820 accept \ + comment "Accept WireGuard from outside" + + iif {{ iface_sync }} ip6 saddr fe80::/10 udp dport 3780 accept \ + comment "Accept connection tracking sync data" + + tcp dport auth reject with icmpx type port-unreachable \ + comment "Reject AUTH to make it fail fast" + + # ICMPv4 + ip protocol icmp icmp type { + echo-request, echo-reply, destination-unreachable, + parameter-problem, time-exceeded, + } accept \ + comment "Accept ICMP" + + # ICMPv6 + ip6 nexthdr icmpv6 icmpv6 type { + echo-request, echo-reply, destination-unreachable, + packet-too-big, parameter-problem, time-exceeded, + } accept \ + comment "Accept basic IPv6 functionality" + + ip6 nexthdr icmpv6 icmpv6 type { + nd-neighbor-solicit, nd-neighbor-advert, + nd-router-solicit, nd-router-advert, + } ip6 hoplimit 255 accept \ + comment "Allow IPv6 neighbor discovery" + } + + chain forward { + type filter hook forward priority filter; policy drop + + ct state { established, related } accept \ + comment "Forward all established and related traffic" + + ct status dnat accept \ + comment "Forward DNAT traffic for servers and suchlike" + + # Forward IPv4 to/from VPN users in the same network. +{% for vlan in vlans %} + iif @inside ip saddr @{{ vlan.name }} ip daddr @{{ vlan.name }} accept +{% endfor %} + + # Forward IPv6 to/from VPN users in the same network. +{% for vlan in vlans %} + iif @inside ip6 saddr @{{ vlan.name }}/6 ip6 daddr @{{ vlan.name }}/6 accept +{% endfor %} + + include "/etc/nftables.d/forward.nft*" + } + + chain output { + type filter hook output priority 0; policy accept + } +} + +table ip nat { + include "/etc/nftables.d/interfaces.nft" + include "/etc/nftables.d/sets.nft*" + include "/etc/nftables.d/netmap.nft*" + + # Ensure these maps exist even if empty. + map netmap-in { type ipv4_addr : interval ipv4_addr; flags interval; } + map netmap-out { type ipv4_addr : interval ipv4_addr; flags interval; } + + chain postrouting { + type nat hook postrouting priority srcnat + + iif @inside oif @outside snat ip prefix to ip saddr map @netmap-out \ + comment "Static source NAT for 1:1 mapped addresses" + + include "/etc/nftables.d/nat.nft*" + } + + chain prerouting { + type nat hook prerouting priority dstnat + + dnat ip prefix to ip daddr map @netmap-in \ + comment "Static destination NAT for 1:1 mapped addresses" + } +} diff --git a/roles/firewall/templates/sysctl.conf.j2 b/roles/firewall/templates/sysctl.conf.j2 new file mode 100644 index 0000000..65ad5a7 --- /dev/null +++ b/roles/firewall/templates/sysctl.conf.j2 @@ -0,0 +1,19 @@ +# We are router. +net.ipv4.ip_forward = 1 +net.ipv6.conf.all.forwarding = 1 + +# But not for management interfaces. +{% for iface in interfaces | selectattr('name', 'match', '^mgmt') %} +net.ipv4.conf.{{ iface.name }}.forwarding = 0 +net.ipv6.conf.{{ iface.name }}.forwarding = 0 +{% endfor %} + +# Zebra docs recommend these. +net.ipv6.conf.all.keep_addr_on_down = 1 +net.ipv6.route.skip_notify_on_dev_down = 1 + +# Do not send ICMP redirects. Happens because firewall sees all office +# networks coming from the same routers, and gets confused as to why +# firewall is routing packets between them. +net.ipv4.conf.all.send_redirects = 0 +net.ipv4.conf.default.send_redirects = 0 diff --git a/roles/firewall/templates/wg.intf.j2 b/roles/firewall/templates/wg.intf.j2 new file mode 100644 index 0000000..6c295cc --- /dev/null +++ b/roles/firewall/templates/wg.intf.j2 @@ -0,0 +1,4 @@ +auto wg +iface wg inet static + use wireguard + address {{ wg_net }} diff --git a/roles/firewall_master/files/accept-fri.nft b/roles/firewall_master/files/accept-fri.nft new file mode 100644 index 0000000..cdc3976 --- /dev/null +++ b/roles/firewall_master/files/accept-fri.nft @@ -0,0 +1,16 @@ +table inet filter { + set fri { + typeof ip saddr; flags interval + elements = { 10.32.0.0/14, 192.168.0.0/16, 141.255.211.0/24, 193.2.76.0/24 } + } + + set fri/6 { + typeof ip6 saddr; flags interval + elements = { 2001:1470:fffd::/48 } + } + + chain input { + ip saddr @fri tcp dport { ssh, http, https } accept + ip6 saddr @fri/6 tcp dport { ssh, http, https } accept + } +} diff --git a/roles/firewall_master/files/friwall.ini b/roles/firewall_master/files/friwall.ini new file mode 100644 index 0000000..c6050c0 --- /dev/null +++ b/roles/firewall_master/files/friwall.ini @@ -0,0 +1,16 @@ +[uwsgi] +uid = friwall +gid = friwall + +socket = /run/friwall.socket +chown-socket = friwall:nginx +chmod-socket = 660 + +plugin = python3 +chdir = /srv/friwall/app +mount = /=wsgi:app +env = PYTHONUSERBASE=/srv/friwall/.local +env = HOME=/srv/friwall + +# Microsoft OIDC endpoint sends some fat‐ass headers. +buffer-size = 16384 diff --git a/roles/firewall_master/files/pusher.initd b/roles/firewall_master/files/pusher.initd new file mode 100755 index 0000000..4b85867 --- /dev/null +++ b/roles/firewall_master/files/pusher.initd @@ -0,0 +1,18 @@ +#!/sbin/openrc-run + +command="/srv/friwall/app/$RC_SVCNAME" +command_background="yes" +command_user="friwall" +command_group="nogroup" +directory="/srv/friwall" +pidfile="/run/$RC_SVCNAME.pid" + +depend() { + need net +} + +stop() { + ebegin "Stopping $RC_SVCNAME" + pkill -INT -g $(cat "$pidfile") && rm -f "$pidfile" + eend $? +} diff --git a/roles/firewall_master/files/uwsgi.ini b/roles/firewall_master/files/uwsgi.ini new file mode 100644 index 0000000..275e85e --- /dev/null +++ b/roles/firewall_master/files/uwsgi.ini @@ -0,0 +1,2 @@ +[uwsgi] +emperor = /etc/uwsgi/conf.d diff --git a/roles/firewall_master/handlers/main.yml b/roles/firewall_master/handlers/main.yml new file mode 100644 index 0000000..f3bc362 --- /dev/null +++ b/roles/firewall_master/handlers/main.yml @@ -0,0 +1,39 @@ +- name: restart interfaces + shell: ifdown --force --auto && ifup --auto + when: "'handler' not in ansible_skip_tags" + +- name: reload nftables + service: + name: nftables + state: reloaded + when: "'handler' not in ansible_skip_tags" + +- name: reload nginx + service: + name: nginx + state: reloaded + when: "'handler' not in ansible_skip_tags" + +- name: restart nginx + service: + name: nginx + state: restarted + when: "'handler' not in ansible_skip_tags" + +- name: restart pusher + service: + name: pusher + state: restarted + when: "'handler' not in ansible_skip_tags" + +- name: reload uwsgi + service: + name: uwsgi + state: reloaded + when: "'handler' not in ansible_skip_tags" + +- name: restart uwsgi + service: + name: uwsgi + state: restarted + when: "'handler' not in ansible_skip_tags" diff --git a/roles/firewall_master/tasks/mail.yml b/roles/firewall_master/tasks/mail.yml new file mode 100644 index 0000000..651c33a --- /dev/null +++ b/roles/firewall_master/tasks/mail.yml @@ -0,0 +1,9 @@ +- name: Install mail server + package: + name: opensmtpd + +- name: Enable mail server + service: + name: smtpd + enabled: yes + state: started diff --git a/roles/firewall_master/tasks/main.yml b/roles/firewall_master/tasks/main.yml new file mode 100644 index 0000000..c83dfed --- /dev/null +++ b/roles/firewall_master/tasks/main.yml @@ -0,0 +1,42 @@ +- name: Set up network interfaces + template: + dest: /etc/network/interfaces + src: interfaces.j2 + mode: 0644 + notify: restart interfaces + +- name: Install nftables + package: + name: nftables + +- name: Accept connections from FRI addresses + copy: + dest: /etc/nftables.d/ + src: accept-fri.nft + notify: reload nftables + +- name: Enable nftables + service: + name: nftables + enabled: yes + state: started + +- name: Install qemu guest agent + package: + name: qemu-guest-agent + +- name: Enable qemu guest agent + service: + name: qemu-guest-agent + enabled: yes + runlevel: boot + state: started + +- name: Set up mail server + import_tasks: mail.yml + +- name: Set up friwall user + import_tasks: user.yml + +- name: Set up web UI + import_tasks: web.yml diff --git a/roles/firewall_master/tasks/user.yml b/roles/firewall_master/tasks/user.yml new file mode 100644 index 0000000..f801b94 --- /dev/null +++ b/roles/firewall_master/tasks/user.yml @@ -0,0 +1,14 @@ +- name: Create friwall group + group: + name: friwall + system: yes + +- name: Create friwall user + user: + name: friwall + system: yes + home: /srv/friwall + shell: /sbin/nologin + generate_ssh_key: yes + ssh_key_comment: "{{ inventory_hostname }}" + ssh_key_type: ed25519 diff --git a/roles/firewall_master/tasks/web.yml b/roles/firewall_master/tasks/web.yml new file mode 100644 index 0000000..aa43ca9 --- /dev/null +++ b/roles/firewall_master/tasks/web.yml @@ -0,0 +1,110 @@ +--- +- name: Install packages + package: + name: git,inotify-tools,nginx,py3-pip,procps-ng,rsync,uwsgi,uwsgi-python3,wireguard-tools + +- name: Clone web files + become: yes + become_user: friwall + become_method: su + become_flags: "-s /bin/sh" + git: + repo: "{{ friwall_repo }}" + dest: /srv/friwall/app + force: yes + notify: reload uwsgi + +- name: Install requirements + become: yes + become_user: friwall + become_method: su + become_flags: '-s /bin/sh' + pip: + requirements: /srv/friwall/app/requirements.txt + extra_args: --user + +- name: Configure base settings + template: + dest: "/srv/friwall/{{ item }}" + src: "{{ item }}.j2" + owner: friwall + group: friwall + mode: 0600 + force: no + loop: + - nodes.json + - settings.json + notify: restart uwsgi + +- name: Configure list of networks + template: + dest: "/srv/friwall/networks.json" + src: "networks.json.j2" + owner: friwall + group: friwall + mode: 0600 + +- name: Configure uwsgi + copy: + dest: /etc/uwsgi/ + src: uwsgi.ini + notify: restart uwsgi + +- name: Configure uwsgi instance + copy: + dest: /etc/uwsgi/conf.d/ + src: friwall.ini + owner: friwall + group: friwall + +- name: Enable uwsgi + service: + name: uwsgi + enabled: yes + state: started + +- name: Configure nginx instance + template: + dest: /etc/nginx/http.d/friwall.conf + src: nginx.conf.j2 + notify: reload nginx + +- name: Run nginx in default VRF + lineinfile: + path: /etc/conf.d/nginx + line: "vrf=\"default\"" + notify: restart nginx + +- name: Enable nginx + service: + name: nginx + enabled: yes + state: started + +- name: Install config pusher initscript + copy: + dest: /etc/init.d/pusher + src: pusher.initd + mode: 0755 + notify: restart pusher + +- name: Enable config pusher service + service: + name: pusher + enabled: true + state: started + +- name: Regenerate config daily + cron: + name: "regenerate config" + job: "cd ~/app ; FLASK_APP=web python3 -m flask generate" + user: friwall + hour: "3" + minute: "33" + +- name: Try (re-)pushing config periodically + cron: + name: "push config" + job: "cd ~/app ; FLASK_APP=web python3 -m flask push" + user: friwall + minute: "*/15" diff --git a/roles/firewall_master/templates/interfaces.j2 b/roles/firewall_master/templates/interfaces.j2 new file mode 100644 index 0000000..d738c99 --- /dev/null +++ b/roles/firewall_master/templates/interfaces.j2 @@ -0,0 +1,14 @@ +auto lo +iface lo inet loopback + +{% for iface in interfaces %} +auto {{ iface.name }} +iface {{ iface.name }} inet static +{% for address in iface.ip_addresses %} + address {{ address.address }} +{% endfor %} +{% if iface.custom_fields.gateway %} + gateway {{ iface.custom_fields.gateway.address | ipaddr('address') }} +{% endif %} + +{% endfor %} diff --git a/roles/firewall_master/templates/networks.json.j2 b/roles/firewall_master/templates/networks.json.j2 new file mode 100644 index 0000000..229ad57 --- /dev/null +++ b/roles/firewall_master/templates/networks.json.j2 @@ -0,0 +1,21 @@ +{% set groups = vlans | map(attribute='name') | select('match', '.+-.+') + | map('split', '-') | map('first') | unique -%} +{% set prefixes = query('netbox.netbox.nb_lookup', 'prefixes', raw_data=true) %} + +{ +{% for vlan in vlans %} +{% set vlan_prefixes = prefixes | selectattr('vlan') | selectattr('vlan.id', '==', vlan.id) | map(attribute='prefix') %} + "{{ vlan.name }}": { + "ip": {{ vlan_prefixes | ipv4 | to_json }}, + "ip6": {{ vlan_prefixes | ipv6 | to_json }} + }{% if not loop.last or groups %},{% endif +%} +{% endfor %} + +{% for group in groups %} +{% set group_prefixes = prefixes | selectattr('vlan') | selectattr('vlan.name', 'match', '^'~group) | map(attribute='prefix') %} + "{{ group }}": { + "ip": {{ group_prefixes | ipv4 | to_json }}, + "ip6": {{ group_prefixes | ipv6 | to_json }} + }{% if not loop.last %},{% endif +%} +{% endfor %} +} diff --git a/roles/firewall_master/templates/nginx.conf.j2 b/roles/firewall_master/templates/nginx.conf.j2 new file mode 100644 index 0000000..b866797 --- /dev/null +++ b/roles/firewall_master/templates/nginx.conf.j2 @@ -0,0 +1,21 @@ +server { + listen 80; + listen [::]:80; + server_name {{ fqdn }}; + + return 301 https://$host$request_uri; +} + +server { + listen 443 ssl; + listen [::]:443 ssl; + server_name {{ fqdn }}; + + ssl_certificate /etc/letsencrypt/live/{{ fqdn }}/fullchain.pem; + ssl_certificate_key /etc/letsencrypt/live/{{ fqdn }}/privkey.pem; + + location / { + uwsgi_pass unix:/run/friwall.socket; + include uwsgi_params; + } +} diff --git a/roles/firewall_master/templates/nodes.json.j2 b/roles/firewall_master/templates/nodes.json.j2 new file mode 100644 index 0000000..fa16b54 --- /dev/null +++ b/roles/firewall_master/templates/nodes.json.j2 @@ -0,0 +1,10 @@ +{% set nodes = query('netbox.netbox.nb_lookup', 'devices', raw_data=true) + | selectattr('config_context') | selectattr('config_context', 'contains', 'master') + | selectattr('config_context.master', '==', inventory_hostname) + | map(attribute='name') -%} + +{ +{% for node in nodes %} + "{{ hostvars[node].interfaces | selectattr('name', '==', 'lo') | map(attribute='ip_addresses') | first | selectattr('role') | selectattr('role.value', '==', 'loopback') | map(attribute='address') | ipv4 | first | ipaddr('address') }}": -1{{ '' if loop.last else ',' }} {# TODO help my eyes the goggles do nothing +#} +{% endfor %} +} diff --git a/roles/firewall_master/templates/settings.json.j2 b/roles/firewall_master/templates/settings.json.j2 new file mode 100644 index 0000000..b176b19 --- /dev/null +++ b/roles/firewall_master/templates/settings.json.j2 @@ -0,0 +1,10 @@ +{ + "ldap_host": "{{ domain }}", + "ldap_user": "{{ ldap_user }}", + "ldap_pass": "{{ ldap_pass }}", + "ldap_base_dn": "{{ ldap_base_dn }}", + "oidc_server": "{{ oidc_server }}", + "oidc_client_id": "{{ oidc_client_id }}", + "oidc_client_secret": "{{ oidc_client_secret }}", + "wg_net": "{{ wg_net }}", +} \ No newline at end of file diff --git a/roles/leaf/handlers/main.yml b/roles/leaf/handlers/main.yml new file mode 100644 index 0000000..997284d --- /dev/null +++ b/roles/leaf/handlers/main.yml @@ -0,0 +1,4 @@ +- name: reload frr + command: + cmd: /usr/lib/frr/frr-reload + when: "'handler' not in ansible_skip_tags" diff --git a/roles/leaf/meta/main.yml b/roles/leaf/meta/main.yml new file mode 100644 index 0000000..eaa44ce --- /dev/null +++ b/roles/leaf/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: fabric diff --git a/roles/leaf/tasks/main.yml b/roles/leaf/tasks/main.yml new file mode 100644 index 0000000..c11dbde --- /dev/null +++ b/roles/leaf/tasks/main.yml @@ -0,0 +1,6 @@ +- name: Set up FRR + template: + dest: /etc/frr/frr.conf + src: frr.conf.j2 + mode: 0600 + notify: reload frr diff --git a/roles/leaf/templates/frr.conf.j2 b/roles/leaf/templates/frr.conf.j2 new file mode 100644 index 0000000..2e9f980 --- /dev/null +++ b/roles/leaf/templates/frr.conf.j2 @@ -0,0 +1,104 @@ +{% set lo_address = interfaces + | selectattr('name', '==', 'lo') + | map(attribute='ip_addresses') | first + | selectattr('role') | selectattr('role.value', '==', 'loopback') + | map(attribute='address') %} +{% set iface_bgp = interfaces + | iface_real | selectattr('enabled') + | rejectattr('mgmt_only') | rejectattr('lag') %} +{% set iface_server = iface_bgp | selectattr('custom_fields.tenant') %} +{% set iface_fabric = iface_bgp | difference(iface_server) | rejectattr('ip_addresses') %} +{% set my_tenants = iface_server | map(attribute='custom_fields.tenant.slug') | unique -%} + +frr defaults datacenter +log syslog informational +service integrated-vtysh-config + +# Route installation into kernels fails (rarely) without this option. +# It is not documented anywhere and appears to be a Cumulus extension. +zebra nexthop proto only + +router-id {{ lo_address | ipv4 | first | ipaddr('address') }} + +router bgp {{ asn.asn }} + bgp bestpath as-path multipath-relax + + neighbor fabric peer-group + neighbor fabric remote-as external + +{% for interface in iface_fabric %} + neighbor {{ interface.name }} interface peer-group fabric + neighbor {{ interface.name }} bfd 3 150 150 +{% endfor %} + +{% for tenant in my_tenants %} + neighbor dc-{{ tenant }} peer-group + neighbor dc-{{ tenant }} remote-as external +{% for interface in iface_server | selectattr('custom_fields.tenant.slug', '==', tenant) %} + neighbor {{ interface.name }} interface peer-group dc-{{ tenant }} + neighbor {{ interface.name }} bfd +{% endfor %} + +{% endfor %} + address-family ipv4 unicast + redistribute connected route-map loopbacks + neighbor fabric activate +{% for tenant in my_tenants %} + neighbor dc-{{ tenant }} activate + neighbor dc-{{ tenant }} route-map dc-{{ tenant }}->default in + neighbor dc-{{ tenant }} route-map default->dc out +{% endfor %} + exit-address-family + + address-family ipv6 unicast + redistribute connected route-map loopbacks + neighbor fabric activate +{% for tenant in my_tenants %} + neighbor dc-{{ tenant }} activate + neighbor dc-{{ tenant }} route-map dc-{{ tenant }}->default in + neighbor dc-{{ tenant }} route-map default->dc out +{% endfor %} + exit-address-family + + address-family l2vpn evpn + neighbor fabric activate +{% for iface in ifaces_evpn|default([]) %} + neighbor {{ iface }} activate +{% endfor %} +{% if interfaces | selectattr('mode') %} + advertise-all-vni +{% endif %} + exit-address-family + +route-map loopbacks permit 10 + match interface lo + +{% if my_tenants %} +ip prefix-list default permit 0.0.0.0/0 +ipv6 prefix-list default permit ::/0 + +{% for tenant in my_tenants %} +{% for prefix in query('netbox.netbox.nb_lookup', 'prefixes', raw_data=true, api_filter='tenant='~tenant) + | selectattr('role') | selectattr('role.slug', '==', 'bgp') | rejectattr('vlan') %} +{% if prefix.family.value == 4 %} +ip prefix-list dc-{{ tenant }} permit {{ prefix.prefix }} ge 32 +{% else %} +ipv6 prefix-list dc-{{ tenant }} permit {{ prefix.prefix }} ge 64 +{% endif %} +{% endfor %} +{% endfor %} + +# We only announce the default route to DC servers. +route-map default->dc permit 10 + match ip address prefix-list default +route-map default->dc permit 11 + match ipv6 address prefix-list default + +{% for tenant in my_tenants %} +route-map dc-{{ tenant }}->default permit 10 + match ip address prefix-list dc-{{ tenant }} +route-map dc-{{ tenant }}->default permit 11 + match ipv6 address prefix-list dc-{{ tenant }} + +{% endfor %} +{% endif %} diff --git a/roles/spine/handlers/main.yml b/roles/spine/handlers/main.yml new file mode 100644 index 0000000..6da5ef5 --- /dev/null +++ b/roles/spine/handlers/main.yml @@ -0,0 +1,3 @@ +- name: reload frr + command: /usr/lib/frr/frr-reload + when: "'handler' not in ansible_skip_tags" diff --git a/roles/spine/meta/main.yml b/roles/spine/meta/main.yml new file mode 100644 index 0000000..eaa44ce --- /dev/null +++ b/roles/spine/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: fabric diff --git a/roles/spine/tasks/main.yml b/roles/spine/tasks/main.yml new file mode 100644 index 0000000..c11dbde --- /dev/null +++ b/roles/spine/tasks/main.yml @@ -0,0 +1,6 @@ +- name: Set up FRR + template: + dest: /etc/frr/frr.conf + src: frr.conf.j2 + mode: 0600 + notify: reload frr diff --git a/roles/spine/templates/frr.conf.j2 b/roles/spine/templates/frr.conf.j2 new file mode 120000 index 0000000..c09820f --- /dev/null +++ b/roles/spine/templates/frr.conf.j2 @@ -0,0 +1 @@ +../../leaf/templates/frr.conf.j2 \ No newline at end of file diff --git a/setup.yml b/setup.yml new file mode 100644 index 0000000..b91c1e4 --- /dev/null +++ b/setup.yml @@ -0,0 +1,25 @@ +- hosts: spine-* + roles: + - spine + +- hosts: leaf-* + roles: + - leaf + +- hosts: exit-* + roles: + - exit + +- hosts: access-* + gather_facts: false + roles: + - access + +- hosts: fw-* + roles: + - firewall + +- hosts: zid + roles: + - certbot_dns + - firewall_master