diff --git a/roles/collector/README.md b/roles/collector/README.md deleted file mode 100644 index 77c4f93..0000000 --- a/roles/collector/README.md +++ /dev/null @@ -1,18 +0,0 @@ -Set up metric collection with prometheus and telegraf as the SNMP proxy. - -NetBox config context should contain the lists `prometheus_hosts` and `snmp_hosts` with job definitions. Each entry should define `name` and `nb_filter` user to query hosts from NetBox. For example: - - { - "prometheus_hosts": [ - { - "name": "classroom", - "nb_filter": "role=desktop-computer status=active location=classroom" - } - ], - "snmp_hosts": [ - { - "name": "switch", - "nb_filter": "role=switch name__isw=sw- status=active status=staged status=planned" - } - ] - } diff --git a/roles/collector/files/prometheus-snmp.yml b/roles/collector/files/prometheus-snmp.yml deleted file mode 100644 index b996b24..0000000 --- a/roles/collector/files/prometheus-snmp.yml +++ /dev/null @@ -1,4 +0,0 @@ -scrape_configs: - - job_name: "snmp" - static_configs: - - targets: ["localhost:9273"] diff --git a/roles/collector/files/prometheus.nft b/roles/collector/files/prometheus.nft deleted file mode 100644 index e0e8280..0000000 --- a/roles/collector/files/prometheus.nft +++ /dev/null @@ -1,12 +0,0 @@ -table inet filter { - chain output { - type filter hook output priority 0; policy accept; - - skuid prometheus ct state { established, related } accept - skuid prometheus th dport domain accept - skuid prometheus tcp dport { 443, 9100 } accept comment "prometheus" - skuid prometheus ip daddr 127.0.0.1 tcp dport 9090 accept comment "prometheus self" - skuid prometheus ip daddr 127.0.0.1 tcp dport 9273 accept comment "telegraf snmp exporter" - skuid prometheus drop - } -} diff --git a/roles/collector/files/telegraf.nft b/roles/collector/files/telegraf.nft deleted file mode 100644 index 3af3fed..0000000 --- a/roles/collector/files/telegraf.nft +++ /dev/null @@ -1,9 +0,0 @@ -table inet filter { - chain output { - type filter hook output priority 0; policy accept; - - skuid telegraf ct state { established, related } accept - skuid telegraf th dport snmp accept - skuid telegraf drop - } -} diff --git a/roles/collector/handlers/main.yml b/roles/collector/handlers/main.yml deleted file mode 100644 index 5d02988..0000000 --- a/roles/collector/handlers/main.yml +++ /dev/null @@ -1,17 +0,0 @@ -- name: reload nftables - service: - name: nftables - state: reloaded - when: "'handler' not in ansible_skip_tags" - -- name: reload prometheus - service: - name: prometheus - state: reloaded - when: "'handler' not in ansible_skip_tags" - -- name: restart telegraf - service: - name: telegraf - state: restarted # seems to crash on reloads - when: "'handler' not in ansible_skip_tags" diff --git a/roles/collector/meta/main.yml b/roles/collector/meta/main.yml deleted file mode 100644 index 368b911..0000000 --- a/roles/collector/meta/main.yml +++ /dev/null @@ -1,3 +0,0 @@ -dependencies: - - role: prometheus - - role: telegraf diff --git a/roles/collector/tasks/main.yml b/roles/collector/tasks/main.yml deleted file mode 100644 index a5176ba..0000000 --- a/roles/collector/tasks/main.yml +++ /dev/null @@ -1,34 +0,0 @@ -# since this host likely has access to sensitive networks, -# restrict the destinations where monitoring daemons can connect -- name: Set up outbound firewall rules - copy: - dest: "/etc/nftables.d/{{ item }}.nft" - src: "{{ item }}.nft" - loop: - - prometheus - - telegraf - notify: reload nftables - -- name: Configure telegraf to expose SNMP data as prometheus metrics - template: - dest: "/etc/telegraf.conf.d/{{ item }}.conf" - src: "{{ item }}.conf.j2" - loop: - - output - - snmp - notify: restart telegraf - -- name: Configure prometheus to pull SNMP data - copy: - dest: "/etc/prometheus/conf.d/snmp.yml" - src: "prometheus-snmp.yml" - notify: reload prometheus - -- name: Configure prometheus to pull custom data - template: - dest: "/etc/prometheus/conf.d/{{ item.name }}.yml" - src: "prometheus-job.yml.j2" - loop: "{{ prometheus_hosts }}" - loop_control: - label: "{{ item.name }}" - notify: reload prometheus diff --git a/roles/collector/templates/output.conf.j2 b/roles/collector/templates/output.conf.j2 deleted file mode 100644 index 6dbe53c..0000000 --- a/roles/collector/templates/output.conf.j2 +++ /dev/null @@ -1,4 +0,0 @@ -[[outputs.prometheus_client]] -listen = "127.0.0.1:9273" -expiration_interval = "300s" -tagexclude = ["mac?"] # temporary tags we don’t need to export diff --git a/roles/collector/templates/prometheus-job.yml.j2 b/roles/collector/templates/prometheus-job.yml.j2 deleted file mode 100644 index 7e24f05..0000000 --- a/roles/collector/templates/prometheus-job.yml.j2 +++ /dev/null @@ -1,20 +0,0 @@ -{% set devices = query("netbox.netbox.nb_lookup", "devices", api_filter="{{ item.nb_filter }}", raw_data=true) - | selectattr("primary_ip") - | map(attribute="name") - | map("extract", hostvars) -%} - -scrape_configs: - - job_name: "{{ item.name }}" - relabel_configs: - - source_labels: [__address__] - regex: '([^.]+).*' - target_label: name - replacement: ${1} - static_configs: - - targets: -{% for address in devices - | selectattr("dns_name", "defined") - | map(attribute="dns_name") - | reject("none") | sort | unique %} - - "{{ address }}:9100" -{% endfor %} diff --git a/roles/collector/templates/snmp.conf.j2 b/roles/collector/templates/snmp.conf.j2 deleted file mode 100644 index dd4624d..0000000 --- a/roles/collector/templates/snmp.conf.j2 +++ /dev/null @@ -1,106 +0,0 @@ -[[inputs.snmp]] - interval = "300s" - agent_host_tag = "source" - agents = [ -{% for item in snmp_hosts %} -{% for address in query("netbox.netbox.nb_lookup", "devices", api_filter=item.nb_filter, raw_data=true) - | selectattr("primary_ip4") | map(attribute="primary_ip4.address") - | ipaddr("address") %} - "{{ address }}", -{% endfor %} -{% endfor %} - ] - version = 3 - sec_level = "authPriv" - auth_protocol = "SHA" - priv_protocol = "DES" - sec_name = "{{ password.snmp_user }}" - auth_password = "{{ password.snmp_pass }}" - priv_password = "{{ password.snmp_pass }}" - - fieldexclude = ["ifDescr", "ifSpecific"] - - [[inputs.snmp.field]] - name = "hostname" - oid = "RFC1213-MIB::sysName.0" - is_tag = true - - # interface table - [[inputs.snmp.table]] - name = "iface" - oid = "IF-MIB::ifTable" - inherit_tags = ["hostname"] - - [[inputs.snmp.table.field]] - oid = "IF-MIB::ifName" - - # rename counters to make prometheus happy - [[inputs.snmp.table.field]] - name = "in_total" - oid = "IF-MIB::ifInOctets" - - [[inputs.snmp.table.field]] - name = "in_err_total" - oid = "IF-MIB::ifInErrors" - - [[inputs.snmp.table.field]] - name = "out_total" - oid = "IF-MIB::ifOutOctets" - - [[inputs.snmp.table.field]] - name = "out_err_total" - oid = "IF-MIB::ifOutErrors" - - # MAC address table per VLAN - [[inputs.snmp.table]] - name = "fdb" - index_as_tag = true - inherit_tags = ["hostname"] - - [[inputs.snmp.table.field]] - name = "ifIndex" - oid = "Q-BRIDGE-MIB::dot1qTpFdbPort" - is_tag = true - - [[inputs.snmp.table.field]] - name = "entry" - oid = "Q-BRIDGE-MIB::dot1qTpFdbStatus" - -# look up interface name from its index -# seems we need another SNMP connection for that -[[processors.snmp_lookup]] - namepass = ["fdb", "iface"] - agent_tag = "source" - index_tag = "ifIndex" - - version = 3 - sec_level = "authPriv" - auth_protocol = "SHA" - priv_protocol = "DES" - sec_name = "{{ password.snmp_user }}" - auth_password = "{{ password.snmp_pass }}" - priv_password = "{{ password.snmp_pass }}" - - [[processors.snmp_lookup.tag]] - oid = "IF-MIB::ifName" - name = "iface" - -# split index 42.1.2.3.10.11.12 into tags "vlan" and "mac1" to "mac6" -[[processors.regex]] - namepass = ["fdb"] - - [[processors.regex.tags]] - key = "index" - pattern = '^(?P\d+)\.(?P\d+)\.(?P\d+)\.(?P\d+)\.(?P\d+)\.(?P\d+)\.(?P\d+)' - -# combine "mac*" tags into a single tag "mac" with value 01:02:03:0a:0b:0c -[[processors.template]] - namepass = ["fdb"] - tagexclude = ["ifIndex", "index"] - tag = "mac" -{% raw %} - template = '''{{ - printf "%02x:%02x:%02x:%02x:%02x:%02x" - (.Tag "mac1"|int) (.Tag "mac2"|int) (.Tag "mac3"|int) (.Tag "mac4"|int) (.Tag "mac5"|int) (.Tag "mac6"|int) - }}''' -{% endraw %} diff --git a/roles/prometheus/README.md b/roles/prometheus/README.md deleted file mode 100644 index 13309e1..0000000 --- a/roles/prometheus/README.md +++ /dev/null @@ -1,3 +0,0 @@ -Install and configure prometheus. - -Job definitions should be placed in /etc/prometheus/conf.d by roles using this one. diff --git a/roles/prometheus/files/prometheus.yml b/roles/prometheus/files/prometheus.yml deleted file mode 100644 index 2d54a25..0000000 --- a/roles/prometheus/files/prometheus.yml +++ /dev/null @@ -1,2 +0,0 @@ -scrape_config_files: - - "conf.d/*.yml" diff --git a/roles/prometheus/handlers/main.yml b/roles/prometheus/handlers/main.yml deleted file mode 100644 index c85cc91..0000000 --- a/roles/prometheus/handlers/main.yml +++ /dev/null @@ -1,5 +0,0 @@ -- name: reload prometheus - service: - name: prometheus - state: reloaded - when: "'handler' not in ansible_skip_tags" diff --git a/roles/prometheus/tasks/main.yml b/roles/prometheus/tasks/main.yml deleted file mode 100644 index 9e44c4d..0000000 --- a/roles/prometheus/tasks/main.yml +++ /dev/null @@ -1,21 +0,0 @@ -- name: Install packages - package: - name: - - prometheus - -- name: Create directory for prometheus configs - file: - path: /etc/prometheus/conf.d - state: directory - -- name: Configure prometheus - copy: - dest: /etc/prometheus/ - src: prometheus.yml - notify: reload prometheus - -- name: Enable prometheus service - service: - name: prometheus - enabled: true - state: started diff --git a/roles/telegraf/tasks/debian.yml b/roles/telegraf/tasks/debian.yml new file mode 100644 index 0000000..a53989f --- /dev/null +++ b/roles/telegraf/tasks/debian.yml @@ -0,0 +1,31 @@ +- name: Add influxdb repository + deb822_repository: + name: influxdata + uris: https://repos.influxdata.com/debian + suites: stable + components: main + architectures: amd64 + signed_by: https://repos.influxdata.com/influxdata-archive.key + notify: update package cache + +- meta: flush_handlers + +- name: Install telegraf + package: + name: telegraf + +- name: Configure telegraf + when: not ansible_check_mode + template: + dest: /etc/telegraf/telegraf.d/output.conf + src: output.conf.j2 + owner: telegraf + group: telegraf + mode: 0640 + notify: restart telegraf + +- name: Enable telegraf + service: + name: telegraf + enabled: true + state: started diff --git a/roles/telegraf/tasks/main.yml b/roles/telegraf/tasks/main.yml index 6d4fea3..0193aa9 100644 --- a/roles/telegraf/tasks/main.yml +++ b/roles/telegraf/tasks/main.yml @@ -1,22 +1,11 @@ -- name: Add telegraf package repo on Debian +- name: Get influxdb info + set_fact: + influxdb_info: '{{ lookup("passwordstore", "vm/"~influxdb_host, returnall=true, missing="empty") | from_yaml }}' + +- name: Create influxdb token for this host + include_tasks: token.yml + when: 'not ansible_check_mode and "influxdb_token" not in password' + +- name: Install telegraf on Debian + include_tasks: debian.yml when: ansible_os_family == "Debian" - deb822_repository: - name: influxdata - uris: https://repos.influxdata.com/debian - suites: stable - components: main - architectures: amd64 - signed_by: https://repos.influxdata.com/influxdata-archive.key - notify: update package cache - -- meta: flush_handlers - -- name: Install telegraf - package: - name: telegraf - -- name: Enable telegraf service - service: - name: telegraf - enabled: true - state: started diff --git a/roles/telegraf/tasks/token.yml b/roles/telegraf/tasks/token.yml new file mode 100644 index 0000000..6343331 --- /dev/null +++ b/roles/telegraf/tasks/token.yml @@ -0,0 +1,53 @@ +- name: Get influxdb organization ID + delegate_to: localhost + uri: + url: '{{ influxdb_info.influxdb_url }}/api/v2/orgs' + headers: + Authorization: Token {{ influxdb_info.influxdb_operator_token }} + register: response + +- name: Parse influxdb orgID + set_fact: + influxdb_orgID: '{{ response.json.orgs | selectattr("name", "==", influxdb_info.influxdb_org) | map(attribute="id") | first }}' + +- name: Get influxdb bucket ID + delegate_to: localhost + uri: + url: '{{ influxdb_info.influxdb_url }}/api/v2/buckets?orgID={{ influxdb_orgID }}' + headers: + Authorization: Token {{ influxdb_info.influxdb_operator_token }} + register: response + +- name: Parse influxdb bucketID + set_fact: + influxdb_bucketID: '{{ response.json.buckets | selectattr("name", "==", "servers") | map(attribute="id") | first }}' + +- name: Create influxdb token + delegate_to: localhost + uri: + url: '{{ influxdb_info.influxdb_url }}/api/v2/authorizations' + method: POST + body_format: json + status_code: 201 + headers: + Authorization: Token {{ influxdb_info.influxdb_operator_token }} + Content-Type: application/json + body: | + { + "description": "{{ inventory_hostname }}", + "orgID": "{{ influxdb_orgID }}", + "permissions": [{ "action": "write", "resource": { "type": "buckets", "id": "{{ influxdb_bucketID }}" } }] + } + register: response + +- name: Parse influxdb token + set_fact: + influxdb_token: '{{ response.json.token }}' + +# Ansible’s passwordstore lookup plugin should be able to do that but is pretty broken, +# so we do it manually. +- name: Store influxdb token in password store + delegate_to: localhost + command: + cmd: 'pass insert --force --multiline {{ ("vm/" if is_virtual else "host/")~inventory_hostname }}' + stdin: '{{ password | to_nice_yaml(sort_keys=false) }}influxdb_token: {{ influxdb_token }}' diff --git a/roles/telegraf/templates/output.conf.j2 b/roles/telegraf/templates/output.conf.j2 new file mode 100644 index 0000000..25ba0de --- /dev/null +++ b/roles/telegraf/templates/output.conf.j2 @@ -0,0 +1,5 @@ +[[outputs.influxdb_v2]] + urls = ["{{ influxdb_info.influxdb_url }}"] + organization = "{{ influxdb_info.influxdb_org }}" + bucket = "{{ influxdb_info.influxdb_bucket }}" + token = "{{ influxdb_token | default(password.influxdb_token) }}" diff --git a/setup.yml b/setup.yml index 273ffcc..f5457c3 100644 --- a/setup.yml +++ b/setup.yml @@ -16,12 +16,12 @@ - hosts: ceph-* roles: - frr + - telegraf - ceph - hosts: mgmt-gw roles: - radvd # we are router for mgmt networks - - collector - hosts: proxmox-backup roles: