Compare commits
3 commits
f33a0b8a21
...
da3db8cc02
| Author | SHA1 | Date | |
|---|---|---|---|
| da3db8cc02 | |||
| 6600a6fa36 | |||
| d347fd7215 |
19 changed files with 280 additions and 100 deletions
18
roles/collector/README.md
Normal file
18
roles/collector/README.md
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
Set up metric collection with prometheus and telegraf as the SNMP proxy.
|
||||||
|
|
||||||
|
NetBox config context should contain the lists `prometheus_hosts` and `snmp_hosts` with job definitions. Each entry should define `name` and `nb_filter` user to query hosts from NetBox. For example:
|
||||||
|
|
||||||
|
{
|
||||||
|
"prometheus_hosts": [
|
||||||
|
{
|
||||||
|
"name": "classroom",
|
||||||
|
"nb_filter": "role=desktop-computer status=active location=classroom"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"snmp_hosts": [
|
||||||
|
{
|
||||||
|
"name": "switch",
|
||||||
|
"nb_filter": "role=switch name__isw=sw- status=active status=staged status=planned"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
4
roles/collector/files/prometheus-snmp.yml
Normal file
4
roles/collector/files/prometheus-snmp.yml
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: "snmp"
|
||||||
|
static_configs:
|
||||||
|
- targets: ["localhost:9273"]
|
||||||
12
roles/collector/files/prometheus.nft
Normal file
12
roles/collector/files/prometheus.nft
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
table inet filter {
|
||||||
|
chain output {
|
||||||
|
type filter hook output priority 0; policy accept;
|
||||||
|
|
||||||
|
skuid prometheus ct state { established, related } accept
|
||||||
|
skuid prometheus th dport domain accept
|
||||||
|
skuid prometheus tcp dport { 443, 9100 } accept comment "prometheus"
|
||||||
|
skuid prometheus ip daddr 127.0.0.1 tcp dport 9090 accept comment "prometheus self"
|
||||||
|
skuid prometheus ip daddr 127.0.0.1 tcp dport 9273 accept comment "telegraf snmp exporter"
|
||||||
|
skuid prometheus drop
|
||||||
|
}
|
||||||
|
}
|
||||||
9
roles/collector/files/telegraf.nft
Normal file
9
roles/collector/files/telegraf.nft
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
table inet filter {
|
||||||
|
chain output {
|
||||||
|
type filter hook output priority 0; policy accept;
|
||||||
|
|
||||||
|
skuid telegraf ct state { established, related } accept
|
||||||
|
skuid telegraf th dport snmp accept
|
||||||
|
skuid telegraf drop
|
||||||
|
}
|
||||||
|
}
|
||||||
17
roles/collector/handlers/main.yml
Normal file
17
roles/collector/handlers/main.yml
Normal file
|
|
@ -0,0 +1,17 @@
|
||||||
|
- name: reload nftables
|
||||||
|
service:
|
||||||
|
name: nftables
|
||||||
|
state: reloaded
|
||||||
|
when: "'handler' not in ansible_skip_tags"
|
||||||
|
|
||||||
|
- name: reload prometheus
|
||||||
|
service:
|
||||||
|
name: prometheus
|
||||||
|
state: reloaded
|
||||||
|
when: "'handler' not in ansible_skip_tags"
|
||||||
|
|
||||||
|
- name: restart telegraf
|
||||||
|
service:
|
||||||
|
name: telegraf
|
||||||
|
state: restarted # seems to crash on reloads
|
||||||
|
when: "'handler' not in ansible_skip_tags"
|
||||||
3
roles/collector/meta/main.yml
Normal file
3
roles/collector/meta/main.yml
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
dependencies:
|
||||||
|
- role: prometheus
|
||||||
|
- role: telegraf
|
||||||
34
roles/collector/tasks/main.yml
Normal file
34
roles/collector/tasks/main.yml
Normal file
|
|
@ -0,0 +1,34 @@
|
||||||
|
# since this host likely has access to sensitive networks,
|
||||||
|
# restrict the destinations where monitoring daemons can connect
|
||||||
|
- name: Set up outbound firewall rules
|
||||||
|
copy:
|
||||||
|
dest: "/etc/nftables.d/{{ item }}.nft"
|
||||||
|
src: "{{ item }}.nft"
|
||||||
|
loop:
|
||||||
|
- prometheus
|
||||||
|
- telegraf
|
||||||
|
notify: reload nftables
|
||||||
|
|
||||||
|
- name: Configure telegraf to expose SNMP data as prometheus metrics
|
||||||
|
template:
|
||||||
|
dest: "/etc/telegraf.conf.d/{{ item }}.conf"
|
||||||
|
src: "{{ item }}.conf.j2"
|
||||||
|
loop:
|
||||||
|
- output
|
||||||
|
- snmp
|
||||||
|
notify: restart telegraf
|
||||||
|
|
||||||
|
- name: Configure prometheus to pull SNMP data
|
||||||
|
copy:
|
||||||
|
dest: "/etc/prometheus/conf.d/snmp.yml"
|
||||||
|
src: "prometheus-snmp.yml"
|
||||||
|
notify: reload prometheus
|
||||||
|
|
||||||
|
- name: Configure prometheus to pull custom data
|
||||||
|
template:
|
||||||
|
dest: "/etc/prometheus/conf.d/{{ item.name }}.yml"
|
||||||
|
src: "prometheus-job.yml.j2"
|
||||||
|
loop: "{{ prometheus_hosts }}"
|
||||||
|
loop_control:
|
||||||
|
label: "{{ item.name }}"
|
||||||
|
notify: reload prometheus
|
||||||
4
roles/collector/templates/output.conf.j2
Normal file
4
roles/collector/templates/output.conf.j2
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
[[outputs.prometheus_client]]
|
||||||
|
listen = "127.0.0.1:9273"
|
||||||
|
expiration_interval = "300s"
|
||||||
|
tagexclude = ["mac?"] # temporary tags we don’t need to export
|
||||||
20
roles/collector/templates/prometheus-job.yml.j2
Normal file
20
roles/collector/templates/prometheus-job.yml.j2
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
{% set devices = query("netbox.netbox.nb_lookup", "devices", api_filter="{{ item.nb_filter }}", raw_data=true)
|
||||||
|
| selectattr("primary_ip")
|
||||||
|
| map(attribute="name")
|
||||||
|
| map("extract", hostvars) -%}
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: "{{ item.name }}"
|
||||||
|
relabel_configs:
|
||||||
|
- source_labels: [__address__]
|
||||||
|
regex: '([^.]+).*'
|
||||||
|
target_label: name
|
||||||
|
replacement: ${1}
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
{% for address in devices
|
||||||
|
| selectattr("dns_name", "defined")
|
||||||
|
| map(attribute="dns_name")
|
||||||
|
| reject("none") | sort | unique %}
|
||||||
|
- "{{ address }}:9100"
|
||||||
|
{% endfor %}
|
||||||
106
roles/collector/templates/snmp.conf.j2
Normal file
106
roles/collector/templates/snmp.conf.j2
Normal file
|
|
@ -0,0 +1,106 @@
|
||||||
|
[[inputs.snmp]]
|
||||||
|
interval = "300s"
|
||||||
|
agent_host_tag = "source"
|
||||||
|
agents = [
|
||||||
|
{% for item in snmp_hosts %}
|
||||||
|
{% for address in query("netbox.netbox.nb_lookup", "devices", api_filter=item.nb_filter, raw_data=true)
|
||||||
|
| selectattr("primary_ip4") | map(attribute="primary_ip4.address")
|
||||||
|
| ipaddr("address") %}
|
||||||
|
"{{ address }}",
|
||||||
|
{% endfor %}
|
||||||
|
{% endfor %}
|
||||||
|
]
|
||||||
|
version = 3
|
||||||
|
sec_level = "authPriv"
|
||||||
|
auth_protocol = "SHA"
|
||||||
|
priv_protocol = "DES"
|
||||||
|
sec_name = "{{ password.snmp_user }}"
|
||||||
|
auth_password = "{{ password.snmp_pass }}"
|
||||||
|
priv_password = "{{ password.snmp_pass }}"
|
||||||
|
|
||||||
|
fieldexclude = ["ifDescr", "ifSpecific"]
|
||||||
|
|
||||||
|
[[inputs.snmp.field]]
|
||||||
|
name = "hostname"
|
||||||
|
oid = "RFC1213-MIB::sysName.0"
|
||||||
|
is_tag = true
|
||||||
|
|
||||||
|
# interface table
|
||||||
|
[[inputs.snmp.table]]
|
||||||
|
name = "iface"
|
||||||
|
oid = "IF-MIB::ifTable"
|
||||||
|
inherit_tags = ["hostname"]
|
||||||
|
|
||||||
|
[[inputs.snmp.table.field]]
|
||||||
|
oid = "IF-MIB::ifName"
|
||||||
|
|
||||||
|
# rename counters to make prometheus happy
|
||||||
|
[[inputs.snmp.table.field]]
|
||||||
|
name = "in_total"
|
||||||
|
oid = "IF-MIB::ifInOctets"
|
||||||
|
|
||||||
|
[[inputs.snmp.table.field]]
|
||||||
|
name = "in_err_total"
|
||||||
|
oid = "IF-MIB::ifInErrors"
|
||||||
|
|
||||||
|
[[inputs.snmp.table.field]]
|
||||||
|
name = "out_total"
|
||||||
|
oid = "IF-MIB::ifOutOctets"
|
||||||
|
|
||||||
|
[[inputs.snmp.table.field]]
|
||||||
|
name = "out_err_total"
|
||||||
|
oid = "IF-MIB::ifOutErrors"
|
||||||
|
|
||||||
|
# MAC address table per VLAN
|
||||||
|
[[inputs.snmp.table]]
|
||||||
|
name = "fdb"
|
||||||
|
index_as_tag = true
|
||||||
|
inherit_tags = ["hostname"]
|
||||||
|
|
||||||
|
[[inputs.snmp.table.field]]
|
||||||
|
name = "ifIndex"
|
||||||
|
oid = "Q-BRIDGE-MIB::dot1qTpFdbPort"
|
||||||
|
is_tag = true
|
||||||
|
|
||||||
|
[[inputs.snmp.table.field]]
|
||||||
|
name = "entry"
|
||||||
|
oid = "Q-BRIDGE-MIB::dot1qTpFdbStatus"
|
||||||
|
|
||||||
|
# look up interface name from its index
|
||||||
|
# seems we need another SNMP connection for that
|
||||||
|
[[processors.snmp_lookup]]
|
||||||
|
namepass = ["fdb", "iface"]
|
||||||
|
agent_tag = "source"
|
||||||
|
index_tag = "ifIndex"
|
||||||
|
|
||||||
|
version = 3
|
||||||
|
sec_level = "authPriv"
|
||||||
|
auth_protocol = "SHA"
|
||||||
|
priv_protocol = "DES"
|
||||||
|
sec_name = "{{ password.snmp_user }}"
|
||||||
|
auth_password = "{{ password.snmp_pass }}"
|
||||||
|
priv_password = "{{ password.snmp_pass }}"
|
||||||
|
|
||||||
|
[[processors.snmp_lookup.tag]]
|
||||||
|
oid = "IF-MIB::ifName"
|
||||||
|
name = "iface"
|
||||||
|
|
||||||
|
# split index 42.1.2.3.10.11.12 into tags "vlan" and "mac1" to "mac6"
|
||||||
|
[[processors.regex]]
|
||||||
|
namepass = ["fdb"]
|
||||||
|
|
||||||
|
[[processors.regex.tags]]
|
||||||
|
key = "index"
|
||||||
|
pattern = '^(?P<vlan>\d+)\.(?P<mac1>\d+)\.(?P<mac2>\d+)\.(?P<mac3>\d+)\.(?P<mac4>\d+)\.(?P<mac5>\d+)\.(?P<mac6>\d+)'
|
||||||
|
|
||||||
|
# combine "mac*" tags into a single tag "mac" with value 01:02:03:0a:0b:0c
|
||||||
|
[[processors.template]]
|
||||||
|
namepass = ["fdb"]
|
||||||
|
tagexclude = ["ifIndex", "index"]
|
||||||
|
tag = "mac"
|
||||||
|
{% raw %}
|
||||||
|
template = '''{{
|
||||||
|
printf "%02x:%02x:%02x:%02x:%02x:%02x"
|
||||||
|
(.Tag "mac1"|int) (.Tag "mac2"|int) (.Tag "mac3"|int) (.Tag "mac4"|int) (.Tag "mac5"|int) (.Tag "mac6"|int)
|
||||||
|
}}'''
|
||||||
|
{% endraw %}
|
||||||
3
roles/prometheus/README.md
Normal file
3
roles/prometheus/README.md
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
Install and configure prometheus.
|
||||||
|
|
||||||
|
Job definitions should be placed in /etc/prometheus/conf.d by roles using this one.
|
||||||
2
roles/prometheus/files/prometheus.yml
Normal file
2
roles/prometheus/files/prometheus.yml
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
scrape_config_files:
|
||||||
|
- "conf.d/*.yml"
|
||||||
5
roles/prometheus/handlers/main.yml
Normal file
5
roles/prometheus/handlers/main.yml
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
- name: reload prometheus
|
||||||
|
service:
|
||||||
|
name: prometheus
|
||||||
|
state: reloaded
|
||||||
|
when: "'handler' not in ansible_skip_tags"
|
||||||
21
roles/prometheus/tasks/main.yml
Normal file
21
roles/prometheus/tasks/main.yml
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
- name: Install packages
|
||||||
|
package:
|
||||||
|
name:
|
||||||
|
- prometheus
|
||||||
|
|
||||||
|
- name: Create directory for prometheus configs
|
||||||
|
file:
|
||||||
|
path: /etc/prometheus/conf.d
|
||||||
|
state: directory
|
||||||
|
|
||||||
|
- name: Configure prometheus
|
||||||
|
copy:
|
||||||
|
dest: /etc/prometheus/
|
||||||
|
src: prometheus.yml
|
||||||
|
notify: reload prometheus
|
||||||
|
|
||||||
|
- name: Enable prometheus service
|
||||||
|
service:
|
||||||
|
name: prometheus
|
||||||
|
enabled: true
|
||||||
|
state: started
|
||||||
|
|
@ -1,31 +0,0 @@
|
||||||
- name: Add influxdb repository
|
|
||||||
deb822_repository:
|
|
||||||
name: influxdata
|
|
||||||
uris: https://repos.influxdata.com/debian
|
|
||||||
suites: stable
|
|
||||||
components: main
|
|
||||||
architectures: amd64
|
|
||||||
signed_by: https://repos.influxdata.com/influxdata-archive.key
|
|
||||||
notify: update package cache
|
|
||||||
|
|
||||||
- meta: flush_handlers
|
|
||||||
|
|
||||||
- name: Install telegraf
|
|
||||||
package:
|
|
||||||
name: telegraf
|
|
||||||
|
|
||||||
- name: Configure telegraf
|
|
||||||
when: not ansible_check_mode
|
|
||||||
template:
|
|
||||||
dest: /etc/telegraf/telegraf.d/output.conf
|
|
||||||
src: output.conf.j2
|
|
||||||
owner: telegraf
|
|
||||||
group: telegraf
|
|
||||||
mode: 0640
|
|
||||||
notify: restart telegraf
|
|
||||||
|
|
||||||
- name: Enable telegraf
|
|
||||||
service:
|
|
||||||
name: telegraf
|
|
||||||
enabled: true
|
|
||||||
state: started
|
|
||||||
|
|
@ -1,11 +1,22 @@
|
||||||
- name: Get influxdb info
|
- name: Add telegraf package repo on Debian
|
||||||
set_fact:
|
|
||||||
influxdb_info: '{{ lookup("passwordstore", "vm/"~influxdb_host, returnall=true, missing="empty") | from_yaml }}'
|
|
||||||
|
|
||||||
- name: Create influxdb token for this host
|
|
||||||
include_tasks: token.yml
|
|
||||||
when: 'not ansible_check_mode and "influxdb_token" not in password'
|
|
||||||
|
|
||||||
- name: Install telegraf on Debian
|
|
||||||
include_tasks: debian.yml
|
|
||||||
when: ansible_os_family == "Debian"
|
when: ansible_os_family == "Debian"
|
||||||
|
deb822_repository:
|
||||||
|
name: influxdata
|
||||||
|
uris: https://repos.influxdata.com/debian
|
||||||
|
suites: stable
|
||||||
|
components: main
|
||||||
|
architectures: amd64
|
||||||
|
signed_by: https://repos.influxdata.com/influxdata-archive.key
|
||||||
|
notify: update package cache
|
||||||
|
|
||||||
|
- meta: flush_handlers
|
||||||
|
|
||||||
|
- name: Install telegraf
|
||||||
|
package:
|
||||||
|
name: telegraf
|
||||||
|
|
||||||
|
- name: Enable telegraf service
|
||||||
|
service:
|
||||||
|
name: telegraf
|
||||||
|
enabled: true
|
||||||
|
state: started
|
||||||
|
|
|
||||||
|
|
@ -1,53 +0,0 @@
|
||||||
- name: Get influxdb organization ID
|
|
||||||
delegate_to: localhost
|
|
||||||
uri:
|
|
||||||
url: '{{ influxdb_info.influxdb_url }}/api/v2/orgs'
|
|
||||||
headers:
|
|
||||||
Authorization: Token {{ influxdb_info.influxdb_operator_token }}
|
|
||||||
register: response
|
|
||||||
|
|
||||||
- name: Parse influxdb orgID
|
|
||||||
set_fact:
|
|
||||||
influxdb_orgID: '{{ response.json.orgs | selectattr("name", "==", influxdb_info.influxdb_org) | map(attribute="id") | first }}'
|
|
||||||
|
|
||||||
- name: Get influxdb bucket ID
|
|
||||||
delegate_to: localhost
|
|
||||||
uri:
|
|
||||||
url: '{{ influxdb_info.influxdb_url }}/api/v2/buckets?orgID={{ influxdb_orgID }}'
|
|
||||||
headers:
|
|
||||||
Authorization: Token {{ influxdb_info.influxdb_operator_token }}
|
|
||||||
register: response
|
|
||||||
|
|
||||||
- name: Parse influxdb bucketID
|
|
||||||
set_fact:
|
|
||||||
influxdb_bucketID: '{{ response.json.buckets | selectattr("name", "==", "servers") | map(attribute="id") | first }}'
|
|
||||||
|
|
||||||
- name: Create influxdb token
|
|
||||||
delegate_to: localhost
|
|
||||||
uri:
|
|
||||||
url: '{{ influxdb_info.influxdb_url }}/api/v2/authorizations'
|
|
||||||
method: POST
|
|
||||||
body_format: json
|
|
||||||
status_code: 201
|
|
||||||
headers:
|
|
||||||
Authorization: Token {{ influxdb_info.influxdb_operator_token }}
|
|
||||||
Content-Type: application/json
|
|
||||||
body: |
|
|
||||||
{
|
|
||||||
"description": "{{ inventory_hostname }}",
|
|
||||||
"orgID": "{{ influxdb_orgID }}",
|
|
||||||
"permissions": [{ "action": "write", "resource": { "type": "buckets", "id": "{{ influxdb_bucketID }}" } }]
|
|
||||||
}
|
|
||||||
register: response
|
|
||||||
|
|
||||||
- name: Parse influxdb token
|
|
||||||
set_fact:
|
|
||||||
influxdb_token: '{{ response.json.token }}'
|
|
||||||
|
|
||||||
# Ansible’s passwordstore lookup plugin should be able to do that but is pretty broken,
|
|
||||||
# so we do it manually.
|
|
||||||
- name: Store influxdb token in password store
|
|
||||||
delegate_to: localhost
|
|
||||||
command:
|
|
||||||
cmd: 'pass insert --force --multiline {{ ("vm/" if is_virtual else "host/")~inventory_hostname }}'
|
|
||||||
stdin: '{{ password | to_nice_yaml(sort_keys=false) }}influxdb_token: {{ influxdb_token }}'
|
|
||||||
|
|
@ -1,5 +0,0 @@
|
||||||
[[outputs.influxdb_v2]]
|
|
||||||
urls = ["{{ influxdb_info.influxdb_url }}"]
|
|
||||||
organization = "{{ influxdb_info.influxdb_org }}"
|
|
||||||
bucket = "{{ influxdb_info.influxdb_bucket }}"
|
|
||||||
token = "{{ influxdb_token | default(password.influxdb_token) }}"
|
|
||||||
|
|
@ -16,12 +16,12 @@
|
||||||
- hosts: ceph-*
|
- hosts: ceph-*
|
||||||
roles:
|
roles:
|
||||||
- frr
|
- frr
|
||||||
- telegraf
|
|
||||||
- ceph
|
- ceph
|
||||||
|
|
||||||
- hosts: mgmt-gw
|
- hosts: mgmt-gw
|
||||||
roles:
|
roles:
|
||||||
- radvd # we are router for mgmt networks
|
- radvd # we are router for mgmt networks
|
||||||
|
- collector
|
||||||
|
|
||||||
- hosts: proxmox-backup
|
- hosts: proxmox-backup
|
||||||
roles:
|
roles:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue