Compare commits
3 commits
f33a0b8a21
...
da3db8cc02
| Author | SHA1 | Date | |
|---|---|---|---|
| da3db8cc02 | |||
| 6600a6fa36 | |||
| d347fd7215 |
19 changed files with 280 additions and 100 deletions
18
roles/collector/README.md
Normal file
18
roles/collector/README.md
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
Set up metric collection with prometheus and telegraf as the SNMP proxy.
|
||||
|
||||
NetBox config context should contain the lists `prometheus_hosts` and `snmp_hosts` with job definitions. Each entry should define `name` and `nb_filter` user to query hosts from NetBox. For example:
|
||||
|
||||
{
|
||||
"prometheus_hosts": [
|
||||
{
|
||||
"name": "classroom",
|
||||
"nb_filter": "role=desktop-computer status=active location=classroom"
|
||||
}
|
||||
],
|
||||
"snmp_hosts": [
|
||||
{
|
||||
"name": "switch",
|
||||
"nb_filter": "role=switch name__isw=sw- status=active status=staged status=planned"
|
||||
}
|
||||
]
|
||||
}
|
||||
4
roles/collector/files/prometheus-snmp.yml
Normal file
4
roles/collector/files/prometheus-snmp.yml
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
scrape_configs:
|
||||
- job_name: "snmp"
|
||||
static_configs:
|
||||
- targets: ["localhost:9273"]
|
||||
12
roles/collector/files/prometheus.nft
Normal file
12
roles/collector/files/prometheus.nft
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
table inet filter {
|
||||
chain output {
|
||||
type filter hook output priority 0; policy accept;
|
||||
|
||||
skuid prometheus ct state { established, related } accept
|
||||
skuid prometheus th dport domain accept
|
||||
skuid prometheus tcp dport { 443, 9100 } accept comment "prometheus"
|
||||
skuid prometheus ip daddr 127.0.0.1 tcp dport 9090 accept comment "prometheus self"
|
||||
skuid prometheus ip daddr 127.0.0.1 tcp dport 9273 accept comment "telegraf snmp exporter"
|
||||
skuid prometheus drop
|
||||
}
|
||||
}
|
||||
9
roles/collector/files/telegraf.nft
Normal file
9
roles/collector/files/telegraf.nft
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
table inet filter {
|
||||
chain output {
|
||||
type filter hook output priority 0; policy accept;
|
||||
|
||||
skuid telegraf ct state { established, related } accept
|
||||
skuid telegraf th dport snmp accept
|
||||
skuid telegraf drop
|
||||
}
|
||||
}
|
||||
17
roles/collector/handlers/main.yml
Normal file
17
roles/collector/handlers/main.yml
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
- name: reload nftables
|
||||
service:
|
||||
name: nftables
|
||||
state: reloaded
|
||||
when: "'handler' not in ansible_skip_tags"
|
||||
|
||||
- name: reload prometheus
|
||||
service:
|
||||
name: prometheus
|
||||
state: reloaded
|
||||
when: "'handler' not in ansible_skip_tags"
|
||||
|
||||
- name: restart telegraf
|
||||
service:
|
||||
name: telegraf
|
||||
state: restarted # seems to crash on reloads
|
||||
when: "'handler' not in ansible_skip_tags"
|
||||
3
roles/collector/meta/main.yml
Normal file
3
roles/collector/meta/main.yml
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
dependencies:
|
||||
- role: prometheus
|
||||
- role: telegraf
|
||||
34
roles/collector/tasks/main.yml
Normal file
34
roles/collector/tasks/main.yml
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
# since this host likely has access to sensitive networks,
|
||||
# restrict the destinations where monitoring daemons can connect
|
||||
- name: Set up outbound firewall rules
|
||||
copy:
|
||||
dest: "/etc/nftables.d/{{ item }}.nft"
|
||||
src: "{{ item }}.nft"
|
||||
loop:
|
||||
- prometheus
|
||||
- telegraf
|
||||
notify: reload nftables
|
||||
|
||||
- name: Configure telegraf to expose SNMP data as prometheus metrics
|
||||
template:
|
||||
dest: "/etc/telegraf.conf.d/{{ item }}.conf"
|
||||
src: "{{ item }}.conf.j2"
|
||||
loop:
|
||||
- output
|
||||
- snmp
|
||||
notify: restart telegraf
|
||||
|
||||
- name: Configure prometheus to pull SNMP data
|
||||
copy:
|
||||
dest: "/etc/prometheus/conf.d/snmp.yml"
|
||||
src: "prometheus-snmp.yml"
|
||||
notify: reload prometheus
|
||||
|
||||
- name: Configure prometheus to pull custom data
|
||||
template:
|
||||
dest: "/etc/prometheus/conf.d/{{ item.name }}.yml"
|
||||
src: "prometheus-job.yml.j2"
|
||||
loop: "{{ prometheus_hosts }}"
|
||||
loop_control:
|
||||
label: "{{ item.name }}"
|
||||
notify: reload prometheus
|
||||
4
roles/collector/templates/output.conf.j2
Normal file
4
roles/collector/templates/output.conf.j2
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
[[outputs.prometheus_client]]
|
||||
listen = "127.0.0.1:9273"
|
||||
expiration_interval = "300s"
|
||||
tagexclude = ["mac?"] # temporary tags we don’t need to export
|
||||
20
roles/collector/templates/prometheus-job.yml.j2
Normal file
20
roles/collector/templates/prometheus-job.yml.j2
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
{% set devices = query("netbox.netbox.nb_lookup", "devices", api_filter="{{ item.nb_filter }}", raw_data=true)
|
||||
| selectattr("primary_ip")
|
||||
| map(attribute="name")
|
||||
| map("extract", hostvars) -%}
|
||||
|
||||
scrape_configs:
|
||||
- job_name: "{{ item.name }}"
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
regex: '([^.]+).*'
|
||||
target_label: name
|
||||
replacement: ${1}
|
||||
static_configs:
|
||||
- targets:
|
||||
{% for address in devices
|
||||
| selectattr("dns_name", "defined")
|
||||
| map(attribute="dns_name")
|
||||
| reject("none") | sort | unique %}
|
||||
- "{{ address }}:9100"
|
||||
{% endfor %}
|
||||
106
roles/collector/templates/snmp.conf.j2
Normal file
106
roles/collector/templates/snmp.conf.j2
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
[[inputs.snmp]]
|
||||
interval = "300s"
|
||||
agent_host_tag = "source"
|
||||
agents = [
|
||||
{% for item in snmp_hosts %}
|
||||
{% for address in query("netbox.netbox.nb_lookup", "devices", api_filter=item.nb_filter, raw_data=true)
|
||||
| selectattr("primary_ip4") | map(attribute="primary_ip4.address")
|
||||
| ipaddr("address") %}
|
||||
"{{ address }}",
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
]
|
||||
version = 3
|
||||
sec_level = "authPriv"
|
||||
auth_protocol = "SHA"
|
||||
priv_protocol = "DES"
|
||||
sec_name = "{{ password.snmp_user }}"
|
||||
auth_password = "{{ password.snmp_pass }}"
|
||||
priv_password = "{{ password.snmp_pass }}"
|
||||
|
||||
fieldexclude = ["ifDescr", "ifSpecific"]
|
||||
|
||||
[[inputs.snmp.field]]
|
||||
name = "hostname"
|
||||
oid = "RFC1213-MIB::sysName.0"
|
||||
is_tag = true
|
||||
|
||||
# interface table
|
||||
[[inputs.snmp.table]]
|
||||
name = "iface"
|
||||
oid = "IF-MIB::ifTable"
|
||||
inherit_tags = ["hostname"]
|
||||
|
||||
[[inputs.snmp.table.field]]
|
||||
oid = "IF-MIB::ifName"
|
||||
|
||||
# rename counters to make prometheus happy
|
||||
[[inputs.snmp.table.field]]
|
||||
name = "in_total"
|
||||
oid = "IF-MIB::ifInOctets"
|
||||
|
||||
[[inputs.snmp.table.field]]
|
||||
name = "in_err_total"
|
||||
oid = "IF-MIB::ifInErrors"
|
||||
|
||||
[[inputs.snmp.table.field]]
|
||||
name = "out_total"
|
||||
oid = "IF-MIB::ifOutOctets"
|
||||
|
||||
[[inputs.snmp.table.field]]
|
||||
name = "out_err_total"
|
||||
oid = "IF-MIB::ifOutErrors"
|
||||
|
||||
# MAC address table per VLAN
|
||||
[[inputs.snmp.table]]
|
||||
name = "fdb"
|
||||
index_as_tag = true
|
||||
inherit_tags = ["hostname"]
|
||||
|
||||
[[inputs.snmp.table.field]]
|
||||
name = "ifIndex"
|
||||
oid = "Q-BRIDGE-MIB::dot1qTpFdbPort"
|
||||
is_tag = true
|
||||
|
||||
[[inputs.snmp.table.field]]
|
||||
name = "entry"
|
||||
oid = "Q-BRIDGE-MIB::dot1qTpFdbStatus"
|
||||
|
||||
# look up interface name from its index
|
||||
# seems we need another SNMP connection for that
|
||||
[[processors.snmp_lookup]]
|
||||
namepass = ["fdb", "iface"]
|
||||
agent_tag = "source"
|
||||
index_tag = "ifIndex"
|
||||
|
||||
version = 3
|
||||
sec_level = "authPriv"
|
||||
auth_protocol = "SHA"
|
||||
priv_protocol = "DES"
|
||||
sec_name = "{{ password.snmp_user }}"
|
||||
auth_password = "{{ password.snmp_pass }}"
|
||||
priv_password = "{{ password.snmp_pass }}"
|
||||
|
||||
[[processors.snmp_lookup.tag]]
|
||||
oid = "IF-MIB::ifName"
|
||||
name = "iface"
|
||||
|
||||
# split index 42.1.2.3.10.11.12 into tags "vlan" and "mac1" to "mac6"
|
||||
[[processors.regex]]
|
||||
namepass = ["fdb"]
|
||||
|
||||
[[processors.regex.tags]]
|
||||
key = "index"
|
||||
pattern = '^(?P<vlan>\d+)\.(?P<mac1>\d+)\.(?P<mac2>\d+)\.(?P<mac3>\d+)\.(?P<mac4>\d+)\.(?P<mac5>\d+)\.(?P<mac6>\d+)'
|
||||
|
||||
# combine "mac*" tags into a single tag "mac" with value 01:02:03:0a:0b:0c
|
||||
[[processors.template]]
|
||||
namepass = ["fdb"]
|
||||
tagexclude = ["ifIndex", "index"]
|
||||
tag = "mac"
|
||||
{% raw %}
|
||||
template = '''{{
|
||||
printf "%02x:%02x:%02x:%02x:%02x:%02x"
|
||||
(.Tag "mac1"|int) (.Tag "mac2"|int) (.Tag "mac3"|int) (.Tag "mac4"|int) (.Tag "mac5"|int) (.Tag "mac6"|int)
|
||||
}}'''
|
||||
{% endraw %}
|
||||
3
roles/prometheus/README.md
Normal file
3
roles/prometheus/README.md
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
Install and configure prometheus.
|
||||
|
||||
Job definitions should be placed in /etc/prometheus/conf.d by roles using this one.
|
||||
2
roles/prometheus/files/prometheus.yml
Normal file
2
roles/prometheus/files/prometheus.yml
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
scrape_config_files:
|
||||
- "conf.d/*.yml"
|
||||
5
roles/prometheus/handlers/main.yml
Normal file
5
roles/prometheus/handlers/main.yml
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
- name: reload prometheus
|
||||
service:
|
||||
name: prometheus
|
||||
state: reloaded
|
||||
when: "'handler' not in ansible_skip_tags"
|
||||
21
roles/prometheus/tasks/main.yml
Normal file
21
roles/prometheus/tasks/main.yml
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
- name: Install packages
|
||||
package:
|
||||
name:
|
||||
- prometheus
|
||||
|
||||
- name: Create directory for prometheus configs
|
||||
file:
|
||||
path: /etc/prometheus/conf.d
|
||||
state: directory
|
||||
|
||||
- name: Configure prometheus
|
||||
copy:
|
||||
dest: /etc/prometheus/
|
||||
src: prometheus.yml
|
||||
notify: reload prometheus
|
||||
|
||||
- name: Enable prometheus service
|
||||
service:
|
||||
name: prometheus
|
||||
enabled: true
|
||||
state: started
|
||||
|
|
@ -1,31 +0,0 @@
|
|||
- name: Add influxdb repository
|
||||
deb822_repository:
|
||||
name: influxdata
|
||||
uris: https://repos.influxdata.com/debian
|
||||
suites: stable
|
||||
components: main
|
||||
architectures: amd64
|
||||
signed_by: https://repos.influxdata.com/influxdata-archive.key
|
||||
notify: update package cache
|
||||
|
||||
- meta: flush_handlers
|
||||
|
||||
- name: Install telegraf
|
||||
package:
|
||||
name: telegraf
|
||||
|
||||
- name: Configure telegraf
|
||||
when: not ansible_check_mode
|
||||
template:
|
||||
dest: /etc/telegraf/telegraf.d/output.conf
|
||||
src: output.conf.j2
|
||||
owner: telegraf
|
||||
group: telegraf
|
||||
mode: 0640
|
||||
notify: restart telegraf
|
||||
|
||||
- name: Enable telegraf
|
||||
service:
|
||||
name: telegraf
|
||||
enabled: true
|
||||
state: started
|
||||
|
|
@ -1,11 +1,22 @@
|
|||
- name: Get influxdb info
|
||||
set_fact:
|
||||
influxdb_info: '{{ lookup("passwordstore", "vm/"~influxdb_host, returnall=true, missing="empty") | from_yaml }}'
|
||||
|
||||
- name: Create influxdb token for this host
|
||||
include_tasks: token.yml
|
||||
when: 'not ansible_check_mode and "influxdb_token" not in password'
|
||||
|
||||
- name: Install telegraf on Debian
|
||||
include_tasks: debian.yml
|
||||
- name: Add telegraf package repo on Debian
|
||||
when: ansible_os_family == "Debian"
|
||||
deb822_repository:
|
||||
name: influxdata
|
||||
uris: https://repos.influxdata.com/debian
|
||||
suites: stable
|
||||
components: main
|
||||
architectures: amd64
|
||||
signed_by: https://repos.influxdata.com/influxdata-archive.key
|
||||
notify: update package cache
|
||||
|
||||
- meta: flush_handlers
|
||||
|
||||
- name: Install telegraf
|
||||
package:
|
||||
name: telegraf
|
||||
|
||||
- name: Enable telegraf service
|
||||
service:
|
||||
name: telegraf
|
||||
enabled: true
|
||||
state: started
|
||||
|
|
|
|||
|
|
@ -1,53 +0,0 @@
|
|||
- name: Get influxdb organization ID
|
||||
delegate_to: localhost
|
||||
uri:
|
||||
url: '{{ influxdb_info.influxdb_url }}/api/v2/orgs'
|
||||
headers:
|
||||
Authorization: Token {{ influxdb_info.influxdb_operator_token }}
|
||||
register: response
|
||||
|
||||
- name: Parse influxdb orgID
|
||||
set_fact:
|
||||
influxdb_orgID: '{{ response.json.orgs | selectattr("name", "==", influxdb_info.influxdb_org) | map(attribute="id") | first }}'
|
||||
|
||||
- name: Get influxdb bucket ID
|
||||
delegate_to: localhost
|
||||
uri:
|
||||
url: '{{ influxdb_info.influxdb_url }}/api/v2/buckets?orgID={{ influxdb_orgID }}'
|
||||
headers:
|
||||
Authorization: Token {{ influxdb_info.influxdb_operator_token }}
|
||||
register: response
|
||||
|
||||
- name: Parse influxdb bucketID
|
||||
set_fact:
|
||||
influxdb_bucketID: '{{ response.json.buckets | selectattr("name", "==", "servers") | map(attribute="id") | first }}'
|
||||
|
||||
- name: Create influxdb token
|
||||
delegate_to: localhost
|
||||
uri:
|
||||
url: '{{ influxdb_info.influxdb_url }}/api/v2/authorizations'
|
||||
method: POST
|
||||
body_format: json
|
||||
status_code: 201
|
||||
headers:
|
||||
Authorization: Token {{ influxdb_info.influxdb_operator_token }}
|
||||
Content-Type: application/json
|
||||
body: |
|
||||
{
|
||||
"description": "{{ inventory_hostname }}",
|
||||
"orgID": "{{ influxdb_orgID }}",
|
||||
"permissions": [{ "action": "write", "resource": { "type": "buckets", "id": "{{ influxdb_bucketID }}" } }]
|
||||
}
|
||||
register: response
|
||||
|
||||
- name: Parse influxdb token
|
||||
set_fact:
|
||||
influxdb_token: '{{ response.json.token }}'
|
||||
|
||||
# Ansible’s passwordstore lookup plugin should be able to do that but is pretty broken,
|
||||
# so we do it manually.
|
||||
- name: Store influxdb token in password store
|
||||
delegate_to: localhost
|
||||
command:
|
||||
cmd: 'pass insert --force --multiline {{ ("vm/" if is_virtual else "host/")~inventory_hostname }}'
|
||||
stdin: '{{ password | to_nice_yaml(sort_keys=false) }}influxdb_token: {{ influxdb_token }}'
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
[[outputs.influxdb_v2]]
|
||||
urls = ["{{ influxdb_info.influxdb_url }}"]
|
||||
organization = "{{ influxdb_info.influxdb_org }}"
|
||||
bucket = "{{ influxdb_info.influxdb_bucket }}"
|
||||
token = "{{ influxdb_token | default(password.influxdb_token) }}"
|
||||
|
|
@ -16,12 +16,12 @@
|
|||
- hosts: ceph-*
|
||||
roles:
|
||||
- frr
|
||||
- telegraf
|
||||
- ceph
|
||||
|
||||
- hosts: mgmt-gw
|
||||
roles:
|
||||
- radvd # we are router for mgmt networks
|
||||
- collector
|
||||
|
||||
- hosts: proxmox-backup
|
||||
roles:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue