Add collector role
Sets up prometheus to pull metrics, with telegraf to process SNMP data.
This commit is contained in:
parent
6600a6fa36
commit
da3db8cc02
11 changed files with 228 additions and 0 deletions
18
roles/collector/README.md
Normal file
18
roles/collector/README.md
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
Set up metric collection with prometheus and telegraf as the SNMP proxy.
|
||||||
|
|
||||||
|
NetBox config context should contain the lists `prometheus_hosts` and `snmp_hosts` with job definitions. Each entry should define `name` and `nb_filter` user to query hosts from NetBox. For example:
|
||||||
|
|
||||||
|
{
|
||||||
|
"prometheus_hosts": [
|
||||||
|
{
|
||||||
|
"name": "classroom",
|
||||||
|
"nb_filter": "role=desktop-computer status=active location=classroom"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"snmp_hosts": [
|
||||||
|
{
|
||||||
|
"name": "switch",
|
||||||
|
"nb_filter": "role=switch name__isw=sw- status=active status=staged status=planned"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
4
roles/collector/files/prometheus-snmp.yml
Normal file
4
roles/collector/files/prometheus-snmp.yml
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: "snmp"
|
||||||
|
static_configs:
|
||||||
|
- targets: ["localhost:9273"]
|
12
roles/collector/files/prometheus.nft
Normal file
12
roles/collector/files/prometheus.nft
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
table inet filter {
|
||||||
|
chain output {
|
||||||
|
type filter hook output priority 0; policy accept;
|
||||||
|
|
||||||
|
skuid prometheus ct state { established, related } accept
|
||||||
|
skuid prometheus th dport domain accept
|
||||||
|
skuid prometheus tcp dport { 443, 9100 } accept comment "prometheus"
|
||||||
|
skuid prometheus ip daddr 127.0.0.1 tcp dport 9090 accept comment "prometheus self"
|
||||||
|
skuid prometheus ip daddr 127.0.0.1 tcp dport 9273 accept comment "telegraf snmp exporter"
|
||||||
|
skuid prometheus drop
|
||||||
|
}
|
||||||
|
}
|
9
roles/collector/files/telegraf.nft
Normal file
9
roles/collector/files/telegraf.nft
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
table inet filter {
|
||||||
|
chain output {
|
||||||
|
type filter hook output priority 0; policy accept;
|
||||||
|
|
||||||
|
skuid telegraf ct state { established, related } accept
|
||||||
|
skuid telegraf th dport snmp accept
|
||||||
|
skuid telegraf drop
|
||||||
|
}
|
||||||
|
}
|
17
roles/collector/handlers/main.yml
Normal file
17
roles/collector/handlers/main.yml
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
- name: reload nftables
|
||||||
|
service:
|
||||||
|
name: nftables
|
||||||
|
state: reloaded
|
||||||
|
when: "'handler' not in ansible_skip_tags"
|
||||||
|
|
||||||
|
- name: reload prometheus
|
||||||
|
service:
|
||||||
|
name: prometheus
|
||||||
|
state: reloaded
|
||||||
|
when: "'handler' not in ansible_skip_tags"
|
||||||
|
|
||||||
|
- name: restart telegraf
|
||||||
|
service:
|
||||||
|
name: telegraf
|
||||||
|
state: restarted # seems to crash on reloads
|
||||||
|
when: "'handler' not in ansible_skip_tags"
|
3
roles/collector/meta/main.yml
Normal file
3
roles/collector/meta/main.yml
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
dependencies:
|
||||||
|
- role: prometheus
|
||||||
|
- role: telegraf
|
34
roles/collector/tasks/main.yml
Normal file
34
roles/collector/tasks/main.yml
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
# since this host likely has access to sensitive networks,
|
||||||
|
# restrict the destinations where monitoring daemons can connect
|
||||||
|
- name: Set up outbound firewall rules
|
||||||
|
copy:
|
||||||
|
dest: "/etc/nftables.d/{{ item }}.nft"
|
||||||
|
src: "{{ item }}.nft"
|
||||||
|
loop:
|
||||||
|
- prometheus
|
||||||
|
- telegraf
|
||||||
|
notify: reload nftables
|
||||||
|
|
||||||
|
- name: Configure telegraf to expose SNMP data as prometheus metrics
|
||||||
|
template:
|
||||||
|
dest: "/etc/telegraf.conf.d/{{ item }}.conf"
|
||||||
|
src: "{{ item }}.conf.j2"
|
||||||
|
loop:
|
||||||
|
- output
|
||||||
|
- snmp
|
||||||
|
notify: restart telegraf
|
||||||
|
|
||||||
|
- name: Configure prometheus to pull SNMP data
|
||||||
|
copy:
|
||||||
|
dest: "/etc/prometheus/conf.d/snmp.yml"
|
||||||
|
src: "prometheus-snmp.yml"
|
||||||
|
notify: reload prometheus
|
||||||
|
|
||||||
|
- name: Configure prometheus to pull custom data
|
||||||
|
template:
|
||||||
|
dest: "/etc/prometheus/conf.d/{{ item.name }}.yml"
|
||||||
|
src: "prometheus-job.yml.j2"
|
||||||
|
loop: "{{ prometheus_hosts }}"
|
||||||
|
loop_control:
|
||||||
|
label: "{{ item.name }}"
|
||||||
|
notify: reload prometheus
|
4
roles/collector/templates/output.conf.j2
Normal file
4
roles/collector/templates/output.conf.j2
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
[[outputs.prometheus_client]]
|
||||||
|
listen = "127.0.0.1:9273"
|
||||||
|
expiration_interval = "300s"
|
||||||
|
tagexclude = ["mac?"] # temporary tags we don’t need to export
|
20
roles/collector/templates/prometheus-job.yml.j2
Normal file
20
roles/collector/templates/prometheus-job.yml.j2
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
{% set devices = query("netbox.netbox.nb_lookup", "devices", api_filter="{{ item.nb_filter }}", raw_data=true)
|
||||||
|
| selectattr("primary_ip")
|
||||||
|
| map(attribute="name")
|
||||||
|
| map("extract", hostvars) -%}
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: "{{ item.name }}"
|
||||||
|
relabel_configs:
|
||||||
|
- source_labels: [__address__]
|
||||||
|
regex: '([^.]+).*'
|
||||||
|
target_label: name
|
||||||
|
replacement: ${1}
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
{% for address in devices
|
||||||
|
| selectattr("dns_name", "defined")
|
||||||
|
| map(attribute="dns_name")
|
||||||
|
| reject("none") | sort | unique %}
|
||||||
|
- "{{ address }}:9100"
|
||||||
|
{% endfor %}
|
106
roles/collector/templates/snmp.conf.j2
Normal file
106
roles/collector/templates/snmp.conf.j2
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
[[inputs.snmp]]
|
||||||
|
interval = "300s"
|
||||||
|
agent_host_tag = "source"
|
||||||
|
agents = [
|
||||||
|
{% for item in snmp_hosts %}
|
||||||
|
{% for address in query("netbox.netbox.nb_lookup", "devices", api_filter=item.nb_filter, raw_data=true)
|
||||||
|
| selectattr("primary_ip4") | map(attribute="primary_ip4.address")
|
||||||
|
| ipaddr("address") %}
|
||||||
|
"{{ address }}",
|
||||||
|
{% endfor %}
|
||||||
|
{% endfor %}
|
||||||
|
]
|
||||||
|
version = 3
|
||||||
|
sec_level = "authPriv"
|
||||||
|
auth_protocol = "SHA"
|
||||||
|
priv_protocol = "DES"
|
||||||
|
sec_name = "{{ password.snmp_user }}"
|
||||||
|
auth_password = "{{ password.snmp_pass }}"
|
||||||
|
priv_password = "{{ password.snmp_pass }}"
|
||||||
|
|
||||||
|
fieldexclude = ["ifDescr", "ifSpecific"]
|
||||||
|
|
||||||
|
[[inputs.snmp.field]]
|
||||||
|
name = "hostname"
|
||||||
|
oid = "RFC1213-MIB::sysName.0"
|
||||||
|
is_tag = true
|
||||||
|
|
||||||
|
# interface table
|
||||||
|
[[inputs.snmp.table]]
|
||||||
|
name = "iface"
|
||||||
|
oid = "IF-MIB::ifTable"
|
||||||
|
inherit_tags = ["hostname"]
|
||||||
|
|
||||||
|
[[inputs.snmp.table.field]]
|
||||||
|
oid = "IF-MIB::ifName"
|
||||||
|
|
||||||
|
# rename counters to make prometheus happy
|
||||||
|
[[inputs.snmp.table.field]]
|
||||||
|
name = "in_total"
|
||||||
|
oid = "IF-MIB::ifInOctets"
|
||||||
|
|
||||||
|
[[inputs.snmp.table.field]]
|
||||||
|
name = "in_err_total"
|
||||||
|
oid = "IF-MIB::ifInErrors"
|
||||||
|
|
||||||
|
[[inputs.snmp.table.field]]
|
||||||
|
name = "out_total"
|
||||||
|
oid = "IF-MIB::ifOutOctets"
|
||||||
|
|
||||||
|
[[inputs.snmp.table.field]]
|
||||||
|
name = "out_err_total"
|
||||||
|
oid = "IF-MIB::ifOutErrors"
|
||||||
|
|
||||||
|
# MAC address table per VLAN
|
||||||
|
[[inputs.snmp.table]]
|
||||||
|
name = "fdb"
|
||||||
|
index_as_tag = true
|
||||||
|
inherit_tags = ["hostname"]
|
||||||
|
|
||||||
|
[[inputs.snmp.table.field]]
|
||||||
|
name = "ifIndex"
|
||||||
|
oid = "Q-BRIDGE-MIB::dot1qTpFdbPort"
|
||||||
|
is_tag = true
|
||||||
|
|
||||||
|
[[inputs.snmp.table.field]]
|
||||||
|
name = "entry"
|
||||||
|
oid = "Q-BRIDGE-MIB::dot1qTpFdbStatus"
|
||||||
|
|
||||||
|
# look up interface name from its index
|
||||||
|
# seems we need another SNMP connection for that
|
||||||
|
[[processors.snmp_lookup]]
|
||||||
|
namepass = ["fdb", "iface"]
|
||||||
|
agent_tag = "source"
|
||||||
|
index_tag = "ifIndex"
|
||||||
|
|
||||||
|
version = 3
|
||||||
|
sec_level = "authPriv"
|
||||||
|
auth_protocol = "SHA"
|
||||||
|
priv_protocol = "DES"
|
||||||
|
sec_name = "{{ password.snmp_user }}"
|
||||||
|
auth_password = "{{ password.snmp_pass }}"
|
||||||
|
priv_password = "{{ password.snmp_pass }}"
|
||||||
|
|
||||||
|
[[processors.snmp_lookup.tag]]
|
||||||
|
oid = "IF-MIB::ifName"
|
||||||
|
name = "iface"
|
||||||
|
|
||||||
|
# split index 42.1.2.3.10.11.12 into tags "vlan" and "mac1" to "mac6"
|
||||||
|
[[processors.regex]]
|
||||||
|
namepass = ["fdb"]
|
||||||
|
|
||||||
|
[[processors.regex.tags]]
|
||||||
|
key = "index"
|
||||||
|
pattern = '^(?P<vlan>\d+)\.(?P<mac1>\d+)\.(?P<mac2>\d+)\.(?P<mac3>\d+)\.(?P<mac4>\d+)\.(?P<mac5>\d+)\.(?P<mac6>\d+)'
|
||||||
|
|
||||||
|
# combine "mac*" tags into a single tag "mac" with value 01:02:03:0a:0b:0c
|
||||||
|
[[processors.template]]
|
||||||
|
namepass = ["fdb"]
|
||||||
|
tagexclude = ["ifIndex", "index"]
|
||||||
|
tag = "mac"
|
||||||
|
{% raw %}
|
||||||
|
template = '''{{
|
||||||
|
printf "%02x:%02x:%02x:%02x:%02x:%02x"
|
||||||
|
(.Tag "mac1"|int) (.Tag "mac2"|int) (.Tag "mac3"|int) (.Tag "mac4"|int) (.Tag "mac5"|int) (.Tag "mac6"|int)
|
||||||
|
}}'''
|
||||||
|
{% endraw %}
|
|
@ -21,6 +21,7 @@
|
||||||
- hosts: mgmt-gw
|
- hosts: mgmt-gw
|
||||||
roles:
|
roles:
|
||||||
- radvd # we are router for mgmt networks
|
- radvd # we are router for mgmt networks
|
||||||
|
- collector
|
||||||
|
|
||||||
- hosts: proxmox-backup
|
- hosts: proxmox-backup
|
||||||
roles:
|
roles:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue