Compare commits
No commits in common. "da3db8cc02cd2718c0364feff7a3bc15afa5fd29" and "f33a0b8a213782d04f0abed921825687e65fabd2" have entirely different histories.
da3db8cc02
...
f33a0b8a21
19 changed files with 100 additions and 280 deletions
|
|
@ -1,18 +0,0 @@
|
||||||
Set up metric collection with prometheus and telegraf as the SNMP proxy.
|
|
||||||
|
|
||||||
NetBox config context should contain the lists `prometheus_hosts` and `snmp_hosts` with job definitions. Each entry should define `name` and `nb_filter` user to query hosts from NetBox. For example:
|
|
||||||
|
|
||||||
{
|
|
||||||
"prometheus_hosts": [
|
|
||||||
{
|
|
||||||
"name": "classroom",
|
|
||||||
"nb_filter": "role=desktop-computer status=active location=classroom"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"snmp_hosts": [
|
|
||||||
{
|
|
||||||
"name": "switch",
|
|
||||||
"nb_filter": "role=switch name__isw=sw- status=active status=staged status=planned"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
@ -1,4 +0,0 @@
|
||||||
scrape_configs:
|
|
||||||
- job_name: "snmp"
|
|
||||||
static_configs:
|
|
||||||
- targets: ["localhost:9273"]
|
|
||||||
|
|
@ -1,12 +0,0 @@
|
||||||
table inet filter {
|
|
||||||
chain output {
|
|
||||||
type filter hook output priority 0; policy accept;
|
|
||||||
|
|
||||||
skuid prometheus ct state { established, related } accept
|
|
||||||
skuid prometheus th dport domain accept
|
|
||||||
skuid prometheus tcp dport { 443, 9100 } accept comment "prometheus"
|
|
||||||
skuid prometheus ip daddr 127.0.0.1 tcp dport 9090 accept comment "prometheus self"
|
|
||||||
skuid prometheus ip daddr 127.0.0.1 tcp dport 9273 accept comment "telegraf snmp exporter"
|
|
||||||
skuid prometheus drop
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,9 +0,0 @@
|
||||||
table inet filter {
|
|
||||||
chain output {
|
|
||||||
type filter hook output priority 0; policy accept;
|
|
||||||
|
|
||||||
skuid telegraf ct state { established, related } accept
|
|
||||||
skuid telegraf th dport snmp accept
|
|
||||||
skuid telegraf drop
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,17 +0,0 @@
|
||||||
- name: reload nftables
|
|
||||||
service:
|
|
||||||
name: nftables
|
|
||||||
state: reloaded
|
|
||||||
when: "'handler' not in ansible_skip_tags"
|
|
||||||
|
|
||||||
- name: reload prometheus
|
|
||||||
service:
|
|
||||||
name: prometheus
|
|
||||||
state: reloaded
|
|
||||||
when: "'handler' not in ansible_skip_tags"
|
|
||||||
|
|
||||||
- name: restart telegraf
|
|
||||||
service:
|
|
||||||
name: telegraf
|
|
||||||
state: restarted # seems to crash on reloads
|
|
||||||
when: "'handler' not in ansible_skip_tags"
|
|
||||||
|
|
@ -1,3 +0,0 @@
|
||||||
dependencies:
|
|
||||||
- role: prometheus
|
|
||||||
- role: telegraf
|
|
||||||
|
|
@ -1,34 +0,0 @@
|
||||||
# since this host likely has access to sensitive networks,
|
|
||||||
# restrict the destinations where monitoring daemons can connect
|
|
||||||
- name: Set up outbound firewall rules
|
|
||||||
copy:
|
|
||||||
dest: "/etc/nftables.d/{{ item }}.nft"
|
|
||||||
src: "{{ item }}.nft"
|
|
||||||
loop:
|
|
||||||
- prometheus
|
|
||||||
- telegraf
|
|
||||||
notify: reload nftables
|
|
||||||
|
|
||||||
- name: Configure telegraf to expose SNMP data as prometheus metrics
|
|
||||||
template:
|
|
||||||
dest: "/etc/telegraf.conf.d/{{ item }}.conf"
|
|
||||||
src: "{{ item }}.conf.j2"
|
|
||||||
loop:
|
|
||||||
- output
|
|
||||||
- snmp
|
|
||||||
notify: restart telegraf
|
|
||||||
|
|
||||||
- name: Configure prometheus to pull SNMP data
|
|
||||||
copy:
|
|
||||||
dest: "/etc/prometheus/conf.d/snmp.yml"
|
|
||||||
src: "prometheus-snmp.yml"
|
|
||||||
notify: reload prometheus
|
|
||||||
|
|
||||||
- name: Configure prometheus to pull custom data
|
|
||||||
template:
|
|
||||||
dest: "/etc/prometheus/conf.d/{{ item.name }}.yml"
|
|
||||||
src: "prometheus-job.yml.j2"
|
|
||||||
loop: "{{ prometheus_hosts }}"
|
|
||||||
loop_control:
|
|
||||||
label: "{{ item.name }}"
|
|
||||||
notify: reload prometheus
|
|
||||||
|
|
@ -1,4 +0,0 @@
|
||||||
[[outputs.prometheus_client]]
|
|
||||||
listen = "127.0.0.1:9273"
|
|
||||||
expiration_interval = "300s"
|
|
||||||
tagexclude = ["mac?"] # temporary tags we don’t need to export
|
|
||||||
|
|
@ -1,20 +0,0 @@
|
||||||
{% set devices = query("netbox.netbox.nb_lookup", "devices", api_filter="{{ item.nb_filter }}", raw_data=true)
|
|
||||||
| selectattr("primary_ip")
|
|
||||||
| map(attribute="name")
|
|
||||||
| map("extract", hostvars) -%}
|
|
||||||
|
|
||||||
scrape_configs:
|
|
||||||
- job_name: "{{ item.name }}"
|
|
||||||
relabel_configs:
|
|
||||||
- source_labels: [__address__]
|
|
||||||
regex: '([^.]+).*'
|
|
||||||
target_label: name
|
|
||||||
replacement: ${1}
|
|
||||||
static_configs:
|
|
||||||
- targets:
|
|
||||||
{% for address in devices
|
|
||||||
| selectattr("dns_name", "defined")
|
|
||||||
| map(attribute="dns_name")
|
|
||||||
| reject("none") | sort | unique %}
|
|
||||||
- "{{ address }}:9100"
|
|
||||||
{% endfor %}
|
|
||||||
|
|
@ -1,106 +0,0 @@
|
||||||
[[inputs.snmp]]
|
|
||||||
interval = "300s"
|
|
||||||
agent_host_tag = "source"
|
|
||||||
agents = [
|
|
||||||
{% for item in snmp_hosts %}
|
|
||||||
{% for address in query("netbox.netbox.nb_lookup", "devices", api_filter=item.nb_filter, raw_data=true)
|
|
||||||
| selectattr("primary_ip4") | map(attribute="primary_ip4.address")
|
|
||||||
| ipaddr("address") %}
|
|
||||||
"{{ address }}",
|
|
||||||
{% endfor %}
|
|
||||||
{% endfor %}
|
|
||||||
]
|
|
||||||
version = 3
|
|
||||||
sec_level = "authPriv"
|
|
||||||
auth_protocol = "SHA"
|
|
||||||
priv_protocol = "DES"
|
|
||||||
sec_name = "{{ password.snmp_user }}"
|
|
||||||
auth_password = "{{ password.snmp_pass }}"
|
|
||||||
priv_password = "{{ password.snmp_pass }}"
|
|
||||||
|
|
||||||
fieldexclude = ["ifDescr", "ifSpecific"]
|
|
||||||
|
|
||||||
[[inputs.snmp.field]]
|
|
||||||
name = "hostname"
|
|
||||||
oid = "RFC1213-MIB::sysName.0"
|
|
||||||
is_tag = true
|
|
||||||
|
|
||||||
# interface table
|
|
||||||
[[inputs.snmp.table]]
|
|
||||||
name = "iface"
|
|
||||||
oid = "IF-MIB::ifTable"
|
|
||||||
inherit_tags = ["hostname"]
|
|
||||||
|
|
||||||
[[inputs.snmp.table.field]]
|
|
||||||
oid = "IF-MIB::ifName"
|
|
||||||
|
|
||||||
# rename counters to make prometheus happy
|
|
||||||
[[inputs.snmp.table.field]]
|
|
||||||
name = "in_total"
|
|
||||||
oid = "IF-MIB::ifInOctets"
|
|
||||||
|
|
||||||
[[inputs.snmp.table.field]]
|
|
||||||
name = "in_err_total"
|
|
||||||
oid = "IF-MIB::ifInErrors"
|
|
||||||
|
|
||||||
[[inputs.snmp.table.field]]
|
|
||||||
name = "out_total"
|
|
||||||
oid = "IF-MIB::ifOutOctets"
|
|
||||||
|
|
||||||
[[inputs.snmp.table.field]]
|
|
||||||
name = "out_err_total"
|
|
||||||
oid = "IF-MIB::ifOutErrors"
|
|
||||||
|
|
||||||
# MAC address table per VLAN
|
|
||||||
[[inputs.snmp.table]]
|
|
||||||
name = "fdb"
|
|
||||||
index_as_tag = true
|
|
||||||
inherit_tags = ["hostname"]
|
|
||||||
|
|
||||||
[[inputs.snmp.table.field]]
|
|
||||||
name = "ifIndex"
|
|
||||||
oid = "Q-BRIDGE-MIB::dot1qTpFdbPort"
|
|
||||||
is_tag = true
|
|
||||||
|
|
||||||
[[inputs.snmp.table.field]]
|
|
||||||
name = "entry"
|
|
||||||
oid = "Q-BRIDGE-MIB::dot1qTpFdbStatus"
|
|
||||||
|
|
||||||
# look up interface name from its index
|
|
||||||
# seems we need another SNMP connection for that
|
|
||||||
[[processors.snmp_lookup]]
|
|
||||||
namepass = ["fdb", "iface"]
|
|
||||||
agent_tag = "source"
|
|
||||||
index_tag = "ifIndex"
|
|
||||||
|
|
||||||
version = 3
|
|
||||||
sec_level = "authPriv"
|
|
||||||
auth_protocol = "SHA"
|
|
||||||
priv_protocol = "DES"
|
|
||||||
sec_name = "{{ password.snmp_user }}"
|
|
||||||
auth_password = "{{ password.snmp_pass }}"
|
|
||||||
priv_password = "{{ password.snmp_pass }}"
|
|
||||||
|
|
||||||
[[processors.snmp_lookup.tag]]
|
|
||||||
oid = "IF-MIB::ifName"
|
|
||||||
name = "iface"
|
|
||||||
|
|
||||||
# split index 42.1.2.3.10.11.12 into tags "vlan" and "mac1" to "mac6"
|
|
||||||
[[processors.regex]]
|
|
||||||
namepass = ["fdb"]
|
|
||||||
|
|
||||||
[[processors.regex.tags]]
|
|
||||||
key = "index"
|
|
||||||
pattern = '^(?P<vlan>\d+)\.(?P<mac1>\d+)\.(?P<mac2>\d+)\.(?P<mac3>\d+)\.(?P<mac4>\d+)\.(?P<mac5>\d+)\.(?P<mac6>\d+)'
|
|
||||||
|
|
||||||
# combine "mac*" tags into a single tag "mac" with value 01:02:03:0a:0b:0c
|
|
||||||
[[processors.template]]
|
|
||||||
namepass = ["fdb"]
|
|
||||||
tagexclude = ["ifIndex", "index"]
|
|
||||||
tag = "mac"
|
|
||||||
{% raw %}
|
|
||||||
template = '''{{
|
|
||||||
printf "%02x:%02x:%02x:%02x:%02x:%02x"
|
|
||||||
(.Tag "mac1"|int) (.Tag "mac2"|int) (.Tag "mac3"|int) (.Tag "mac4"|int) (.Tag "mac5"|int) (.Tag "mac6"|int)
|
|
||||||
}}'''
|
|
||||||
{% endraw %}
|
|
||||||
|
|
@ -1,3 +0,0 @@
|
||||||
Install and configure prometheus.
|
|
||||||
|
|
||||||
Job definitions should be placed in /etc/prometheus/conf.d by roles using this one.
|
|
||||||
|
|
@ -1,2 +0,0 @@
|
||||||
scrape_config_files:
|
|
||||||
- "conf.d/*.yml"
|
|
||||||
|
|
@ -1,5 +0,0 @@
|
||||||
- name: reload prometheus
|
|
||||||
service:
|
|
||||||
name: prometheus
|
|
||||||
state: reloaded
|
|
||||||
when: "'handler' not in ansible_skip_tags"
|
|
||||||
|
|
@ -1,21 +0,0 @@
|
||||||
- name: Install packages
|
|
||||||
package:
|
|
||||||
name:
|
|
||||||
- prometheus
|
|
||||||
|
|
||||||
- name: Create directory for prometheus configs
|
|
||||||
file:
|
|
||||||
path: /etc/prometheus/conf.d
|
|
||||||
state: directory
|
|
||||||
|
|
||||||
- name: Configure prometheus
|
|
||||||
copy:
|
|
||||||
dest: /etc/prometheus/
|
|
||||||
src: prometheus.yml
|
|
||||||
notify: reload prometheus
|
|
||||||
|
|
||||||
- name: Enable prometheus service
|
|
||||||
service:
|
|
||||||
name: prometheus
|
|
||||||
enabled: true
|
|
||||||
state: started
|
|
||||||
31
roles/telegraf/tasks/debian.yml
Normal file
31
roles/telegraf/tasks/debian.yml
Normal file
|
|
@ -0,0 +1,31 @@
|
||||||
|
- name: Add influxdb repository
|
||||||
|
deb822_repository:
|
||||||
|
name: influxdata
|
||||||
|
uris: https://repos.influxdata.com/debian
|
||||||
|
suites: stable
|
||||||
|
components: main
|
||||||
|
architectures: amd64
|
||||||
|
signed_by: https://repos.influxdata.com/influxdata-archive.key
|
||||||
|
notify: update package cache
|
||||||
|
|
||||||
|
- meta: flush_handlers
|
||||||
|
|
||||||
|
- name: Install telegraf
|
||||||
|
package:
|
||||||
|
name: telegraf
|
||||||
|
|
||||||
|
- name: Configure telegraf
|
||||||
|
when: not ansible_check_mode
|
||||||
|
template:
|
||||||
|
dest: /etc/telegraf/telegraf.d/output.conf
|
||||||
|
src: output.conf.j2
|
||||||
|
owner: telegraf
|
||||||
|
group: telegraf
|
||||||
|
mode: 0640
|
||||||
|
notify: restart telegraf
|
||||||
|
|
||||||
|
- name: Enable telegraf
|
||||||
|
service:
|
||||||
|
name: telegraf
|
||||||
|
enabled: true
|
||||||
|
state: started
|
||||||
|
|
@ -1,22 +1,11 @@
|
||||||
- name: Add telegraf package repo on Debian
|
- name: Get influxdb info
|
||||||
|
set_fact:
|
||||||
|
influxdb_info: '{{ lookup("passwordstore", "vm/"~influxdb_host, returnall=true, missing="empty") | from_yaml }}'
|
||||||
|
|
||||||
|
- name: Create influxdb token for this host
|
||||||
|
include_tasks: token.yml
|
||||||
|
when: 'not ansible_check_mode and "influxdb_token" not in password'
|
||||||
|
|
||||||
|
- name: Install telegraf on Debian
|
||||||
|
include_tasks: debian.yml
|
||||||
when: ansible_os_family == "Debian"
|
when: ansible_os_family == "Debian"
|
||||||
deb822_repository:
|
|
||||||
name: influxdata
|
|
||||||
uris: https://repos.influxdata.com/debian
|
|
||||||
suites: stable
|
|
||||||
components: main
|
|
||||||
architectures: amd64
|
|
||||||
signed_by: https://repos.influxdata.com/influxdata-archive.key
|
|
||||||
notify: update package cache
|
|
||||||
|
|
||||||
- meta: flush_handlers
|
|
||||||
|
|
||||||
- name: Install telegraf
|
|
||||||
package:
|
|
||||||
name: telegraf
|
|
||||||
|
|
||||||
- name: Enable telegraf service
|
|
||||||
service:
|
|
||||||
name: telegraf
|
|
||||||
enabled: true
|
|
||||||
state: started
|
|
||||||
|
|
|
||||||
53
roles/telegraf/tasks/token.yml
Normal file
53
roles/telegraf/tasks/token.yml
Normal file
|
|
@ -0,0 +1,53 @@
|
||||||
|
- name: Get influxdb organization ID
|
||||||
|
delegate_to: localhost
|
||||||
|
uri:
|
||||||
|
url: '{{ influxdb_info.influxdb_url }}/api/v2/orgs'
|
||||||
|
headers:
|
||||||
|
Authorization: Token {{ influxdb_info.influxdb_operator_token }}
|
||||||
|
register: response
|
||||||
|
|
||||||
|
- name: Parse influxdb orgID
|
||||||
|
set_fact:
|
||||||
|
influxdb_orgID: '{{ response.json.orgs | selectattr("name", "==", influxdb_info.influxdb_org) | map(attribute="id") | first }}'
|
||||||
|
|
||||||
|
- name: Get influxdb bucket ID
|
||||||
|
delegate_to: localhost
|
||||||
|
uri:
|
||||||
|
url: '{{ influxdb_info.influxdb_url }}/api/v2/buckets?orgID={{ influxdb_orgID }}'
|
||||||
|
headers:
|
||||||
|
Authorization: Token {{ influxdb_info.influxdb_operator_token }}
|
||||||
|
register: response
|
||||||
|
|
||||||
|
- name: Parse influxdb bucketID
|
||||||
|
set_fact:
|
||||||
|
influxdb_bucketID: '{{ response.json.buckets | selectattr("name", "==", "servers") | map(attribute="id") | first }}'
|
||||||
|
|
||||||
|
- name: Create influxdb token
|
||||||
|
delegate_to: localhost
|
||||||
|
uri:
|
||||||
|
url: '{{ influxdb_info.influxdb_url }}/api/v2/authorizations'
|
||||||
|
method: POST
|
||||||
|
body_format: json
|
||||||
|
status_code: 201
|
||||||
|
headers:
|
||||||
|
Authorization: Token {{ influxdb_info.influxdb_operator_token }}
|
||||||
|
Content-Type: application/json
|
||||||
|
body: |
|
||||||
|
{
|
||||||
|
"description": "{{ inventory_hostname }}",
|
||||||
|
"orgID": "{{ influxdb_orgID }}",
|
||||||
|
"permissions": [{ "action": "write", "resource": { "type": "buckets", "id": "{{ influxdb_bucketID }}" } }]
|
||||||
|
}
|
||||||
|
register: response
|
||||||
|
|
||||||
|
- name: Parse influxdb token
|
||||||
|
set_fact:
|
||||||
|
influxdb_token: '{{ response.json.token }}'
|
||||||
|
|
||||||
|
# Ansible’s passwordstore lookup plugin should be able to do that but is pretty broken,
|
||||||
|
# so we do it manually.
|
||||||
|
- name: Store influxdb token in password store
|
||||||
|
delegate_to: localhost
|
||||||
|
command:
|
||||||
|
cmd: 'pass insert --force --multiline {{ ("vm/" if is_virtual else "host/")~inventory_hostname }}'
|
||||||
|
stdin: '{{ password | to_nice_yaml(sort_keys=false) }}influxdb_token: {{ influxdb_token }}'
|
||||||
5
roles/telegraf/templates/output.conf.j2
Normal file
5
roles/telegraf/templates/output.conf.j2
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
[[outputs.influxdb_v2]]
|
||||||
|
urls = ["{{ influxdb_info.influxdb_url }}"]
|
||||||
|
organization = "{{ influxdb_info.influxdb_org }}"
|
||||||
|
bucket = "{{ influxdb_info.influxdb_bucket }}"
|
||||||
|
token = "{{ influxdb_token | default(password.influxdb_token) }}"
|
||||||
|
|
@ -16,12 +16,12 @@
|
||||||
- hosts: ceph-*
|
- hosts: ceph-*
|
||||||
roles:
|
roles:
|
||||||
- frr
|
- frr
|
||||||
|
- telegraf
|
||||||
- ceph
|
- ceph
|
||||||
|
|
||||||
- hosts: mgmt-gw
|
- hosts: mgmt-gw
|
||||||
roles:
|
roles:
|
||||||
- radvd # we are router for mgmt networks
|
- radvd # we are router for mgmt networks
|
||||||
- collector
|
|
||||||
|
|
||||||
- hosts: proxmox-backup
|
- hosts: proxmox-backup
|
||||||
roles:
|
roles:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue