Add collector role
Sets up prometheus to pull metrics, with telegraf to process SNMP data.
This commit is contained in:
parent
6600a6fa36
commit
da3db8cc02
11 changed files with 228 additions and 0 deletions
18
roles/collector/README.md
Normal file
18
roles/collector/README.md
Normal file
|
@ -0,0 +1,18 @@
|
|||
Set up metric collection with prometheus and telegraf as the SNMP proxy.
|
||||
|
||||
NetBox config context should contain the lists `prometheus_hosts` and `snmp_hosts` with job definitions. Each entry should define `name` and `nb_filter` user to query hosts from NetBox. For example:
|
||||
|
||||
{
|
||||
"prometheus_hosts": [
|
||||
{
|
||||
"name": "classroom",
|
||||
"nb_filter": "role=desktop-computer status=active location=classroom"
|
||||
}
|
||||
],
|
||||
"snmp_hosts": [
|
||||
{
|
||||
"name": "switch",
|
||||
"nb_filter": "role=switch name__isw=sw- status=active status=staged status=planned"
|
||||
}
|
||||
]
|
||||
}
|
4
roles/collector/files/prometheus-snmp.yml
Normal file
4
roles/collector/files/prometheus-snmp.yml
Normal file
|
@ -0,0 +1,4 @@
|
|||
scrape_configs:
|
||||
- job_name: "snmp"
|
||||
static_configs:
|
||||
- targets: ["localhost:9273"]
|
12
roles/collector/files/prometheus.nft
Normal file
12
roles/collector/files/prometheus.nft
Normal file
|
@ -0,0 +1,12 @@
|
|||
table inet filter {
|
||||
chain output {
|
||||
type filter hook output priority 0; policy accept;
|
||||
|
||||
skuid prometheus ct state { established, related } accept
|
||||
skuid prometheus th dport domain accept
|
||||
skuid prometheus tcp dport { 443, 9100 } accept comment "prometheus"
|
||||
skuid prometheus ip daddr 127.0.0.1 tcp dport 9090 accept comment "prometheus self"
|
||||
skuid prometheus ip daddr 127.0.0.1 tcp dport 9273 accept comment "telegraf snmp exporter"
|
||||
skuid prometheus drop
|
||||
}
|
||||
}
|
9
roles/collector/files/telegraf.nft
Normal file
9
roles/collector/files/telegraf.nft
Normal file
|
@ -0,0 +1,9 @@
|
|||
table inet filter {
|
||||
chain output {
|
||||
type filter hook output priority 0; policy accept;
|
||||
|
||||
skuid telegraf ct state { established, related } accept
|
||||
skuid telegraf th dport snmp accept
|
||||
skuid telegraf drop
|
||||
}
|
||||
}
|
17
roles/collector/handlers/main.yml
Normal file
17
roles/collector/handlers/main.yml
Normal file
|
@ -0,0 +1,17 @@
|
|||
- name: reload nftables
|
||||
service:
|
||||
name: nftables
|
||||
state: reloaded
|
||||
when: "'handler' not in ansible_skip_tags"
|
||||
|
||||
- name: reload prometheus
|
||||
service:
|
||||
name: prometheus
|
||||
state: reloaded
|
||||
when: "'handler' not in ansible_skip_tags"
|
||||
|
||||
- name: restart telegraf
|
||||
service:
|
||||
name: telegraf
|
||||
state: restarted # seems to crash on reloads
|
||||
when: "'handler' not in ansible_skip_tags"
|
3
roles/collector/meta/main.yml
Normal file
3
roles/collector/meta/main.yml
Normal file
|
@ -0,0 +1,3 @@
|
|||
dependencies:
|
||||
- role: prometheus
|
||||
- role: telegraf
|
34
roles/collector/tasks/main.yml
Normal file
34
roles/collector/tasks/main.yml
Normal file
|
@ -0,0 +1,34 @@
|
|||
# since this host likely has access to sensitive networks,
|
||||
# restrict the destinations where monitoring daemons can connect
|
||||
- name: Set up outbound firewall rules
|
||||
copy:
|
||||
dest: "/etc/nftables.d/{{ item }}.nft"
|
||||
src: "{{ item }}.nft"
|
||||
loop:
|
||||
- prometheus
|
||||
- telegraf
|
||||
notify: reload nftables
|
||||
|
||||
- name: Configure telegraf to expose SNMP data as prometheus metrics
|
||||
template:
|
||||
dest: "/etc/telegraf.conf.d/{{ item }}.conf"
|
||||
src: "{{ item }}.conf.j2"
|
||||
loop:
|
||||
- output
|
||||
- snmp
|
||||
notify: restart telegraf
|
||||
|
||||
- name: Configure prometheus to pull SNMP data
|
||||
copy:
|
||||
dest: "/etc/prometheus/conf.d/snmp.yml"
|
||||
src: "prometheus-snmp.yml"
|
||||
notify: reload prometheus
|
||||
|
||||
- name: Configure prometheus to pull custom data
|
||||
template:
|
||||
dest: "/etc/prometheus/conf.d/{{ item.name }}.yml"
|
||||
src: "prometheus-job.yml.j2"
|
||||
loop: "{{ prometheus_hosts }}"
|
||||
loop_control:
|
||||
label: "{{ item.name }}"
|
||||
notify: reload prometheus
|
4
roles/collector/templates/output.conf.j2
Normal file
4
roles/collector/templates/output.conf.j2
Normal file
|
@ -0,0 +1,4 @@
|
|||
[[outputs.prometheus_client]]
|
||||
listen = "127.0.0.1:9273"
|
||||
expiration_interval = "300s"
|
||||
tagexclude = ["mac?"] # temporary tags we don’t need to export
|
20
roles/collector/templates/prometheus-job.yml.j2
Normal file
20
roles/collector/templates/prometheus-job.yml.j2
Normal file
|
@ -0,0 +1,20 @@
|
|||
{% set devices = query("netbox.netbox.nb_lookup", "devices", api_filter="{{ item.nb_filter }}", raw_data=true)
|
||||
| selectattr("primary_ip")
|
||||
| map(attribute="name")
|
||||
| map("extract", hostvars) -%}
|
||||
|
||||
scrape_configs:
|
||||
- job_name: "{{ item.name }}"
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
regex: '([^.]+).*'
|
||||
target_label: name
|
||||
replacement: ${1}
|
||||
static_configs:
|
||||
- targets:
|
||||
{% for address in devices
|
||||
| selectattr("dns_name", "defined")
|
||||
| map(attribute="dns_name")
|
||||
| reject("none") | sort | unique %}
|
||||
- "{{ address }}:9100"
|
||||
{% endfor %}
|
106
roles/collector/templates/snmp.conf.j2
Normal file
106
roles/collector/templates/snmp.conf.j2
Normal file
|
@ -0,0 +1,106 @@
|
|||
[[inputs.snmp]]
|
||||
interval = "300s"
|
||||
agent_host_tag = "source"
|
||||
agents = [
|
||||
{% for item in snmp_hosts %}
|
||||
{% for address in query("netbox.netbox.nb_lookup", "devices", api_filter=item.nb_filter, raw_data=true)
|
||||
| selectattr("primary_ip4") | map(attribute="primary_ip4.address")
|
||||
| ipaddr("address") %}
|
||||
"{{ address }}",
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
]
|
||||
version = 3
|
||||
sec_level = "authPriv"
|
||||
auth_protocol = "SHA"
|
||||
priv_protocol = "DES"
|
||||
sec_name = "{{ password.snmp_user }}"
|
||||
auth_password = "{{ password.snmp_pass }}"
|
||||
priv_password = "{{ password.snmp_pass }}"
|
||||
|
||||
fieldexclude = ["ifDescr", "ifSpecific"]
|
||||
|
||||
[[inputs.snmp.field]]
|
||||
name = "hostname"
|
||||
oid = "RFC1213-MIB::sysName.0"
|
||||
is_tag = true
|
||||
|
||||
# interface table
|
||||
[[inputs.snmp.table]]
|
||||
name = "iface"
|
||||
oid = "IF-MIB::ifTable"
|
||||
inherit_tags = ["hostname"]
|
||||
|
||||
[[inputs.snmp.table.field]]
|
||||
oid = "IF-MIB::ifName"
|
||||
|
||||
# rename counters to make prometheus happy
|
||||
[[inputs.snmp.table.field]]
|
||||
name = "in_total"
|
||||
oid = "IF-MIB::ifInOctets"
|
||||
|
||||
[[inputs.snmp.table.field]]
|
||||
name = "in_err_total"
|
||||
oid = "IF-MIB::ifInErrors"
|
||||
|
||||
[[inputs.snmp.table.field]]
|
||||
name = "out_total"
|
||||
oid = "IF-MIB::ifOutOctets"
|
||||
|
||||
[[inputs.snmp.table.field]]
|
||||
name = "out_err_total"
|
||||
oid = "IF-MIB::ifOutErrors"
|
||||
|
||||
# MAC address table per VLAN
|
||||
[[inputs.snmp.table]]
|
||||
name = "fdb"
|
||||
index_as_tag = true
|
||||
inherit_tags = ["hostname"]
|
||||
|
||||
[[inputs.snmp.table.field]]
|
||||
name = "ifIndex"
|
||||
oid = "Q-BRIDGE-MIB::dot1qTpFdbPort"
|
||||
is_tag = true
|
||||
|
||||
[[inputs.snmp.table.field]]
|
||||
name = "entry"
|
||||
oid = "Q-BRIDGE-MIB::dot1qTpFdbStatus"
|
||||
|
||||
# look up interface name from its index
|
||||
# seems we need another SNMP connection for that
|
||||
[[processors.snmp_lookup]]
|
||||
namepass = ["fdb", "iface"]
|
||||
agent_tag = "source"
|
||||
index_tag = "ifIndex"
|
||||
|
||||
version = 3
|
||||
sec_level = "authPriv"
|
||||
auth_protocol = "SHA"
|
||||
priv_protocol = "DES"
|
||||
sec_name = "{{ password.snmp_user }}"
|
||||
auth_password = "{{ password.snmp_pass }}"
|
||||
priv_password = "{{ password.snmp_pass }}"
|
||||
|
||||
[[processors.snmp_lookup.tag]]
|
||||
oid = "IF-MIB::ifName"
|
||||
name = "iface"
|
||||
|
||||
# split index 42.1.2.3.10.11.12 into tags "vlan" and "mac1" to "mac6"
|
||||
[[processors.regex]]
|
||||
namepass = ["fdb"]
|
||||
|
||||
[[processors.regex.tags]]
|
||||
key = "index"
|
||||
pattern = '^(?P<vlan>\d+)\.(?P<mac1>\d+)\.(?P<mac2>\d+)\.(?P<mac3>\d+)\.(?P<mac4>\d+)\.(?P<mac5>\d+)\.(?P<mac6>\d+)'
|
||||
|
||||
# combine "mac*" tags into a single tag "mac" with value 01:02:03:0a:0b:0c
|
||||
[[processors.template]]
|
||||
namepass = ["fdb"]
|
||||
tagexclude = ["ifIndex", "index"]
|
||||
tag = "mac"
|
||||
{% raw %}
|
||||
template = '''{{
|
||||
printf "%02x:%02x:%02x:%02x:%02x:%02x"
|
||||
(.Tag "mac1"|int) (.Tag "mac2"|int) (.Tag "mac3"|int) (.Tag "mac4"|int) (.Tag "mac5"|int) (.Tag "mac6"|int)
|
||||
}}'''
|
||||
{% endraw %}
|
|
@ -21,6 +21,7 @@
|
|||
- hosts: mgmt-gw
|
||||
roles:
|
||||
- radvd # we are router for mgmt networks
|
||||
- collector
|
||||
|
||||
- hosts: proxmox-backup
|
||||
roles:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue