Refactored alertmanager
This commit is contained in:
parent
d374a3af1f
commit
1fda4c6fff
25
README.md
25
README.md
|
@ -1,24 +1,15 @@
|
|||
# grafana-docker-stack
|
||||
|
||||
For deploying Grafana, Prometheus and Node Exporter, make these steps:
|
||||
1. Deploy stack
|
||||
1. Clone repo
|
||||
```
|
||||
git clone https://github.com/digitalstudium/grafana-docker-stack.git
|
||||
```
|
||||
2. Change configs if nececcery (at least `configs/alertmanager.yml` to configure notification channels)
|
||||
3. Deploy stack
|
||||
```bash
|
||||
docker stack deploy -c grafana-docker-stack/docker-compose.yml monitoring
|
||||
```
|
||||
2. Add prometheus datasource with address `http://prometheus:9090` to grafana
|
||||
3. Add these three lines to the bottom of `/var/lib/docker/volumes/monitoring_prom-configs/_data/prometheus.yml` file, to `scrape_configs:` section:
|
||||
```
|
||||
- job_name: 'node-exporter'
|
||||
|
||||
static_configs:
|
||||
- targets: ['node-exporter:9100']
|
||||
```
|
||||
4. Reload prometheus config via this command:
|
||||
```
|
||||
docker ps | grep prometheus | awk '{print $1}' | xargs docker kill -s SIGHUP
|
||||
```
|
||||
5. Import this dashboard: https://grafana.com/grafana/dashboards/1860 to grafana.
|
||||
|
||||
That's it!
|
||||
|
||||
|
@ -28,8 +19,8 @@ If you want to add more servers to prometheus, make these steps:
|
|||
git clone https://github.com/digitalstudium/grafana-docker-stack.git
|
||||
docker stack deploy -c grafana-docker-stack/node-exporter.yml node-exporter
|
||||
```
|
||||
2. Add these servers to `/var/lib/docker/volumes/monitoring_prom-configs/_data/prometheus.yml` file to `- targets: ['node-exporter:9100']` list of `- job_name: 'node-exporter'` section, like `- targets: ['node-exporter:9100', 'server1:9100', 'server2:9100', '...']`
|
||||
2. Add these servers to `configs/prometheus/prometheus.yml` file to `- targets: ['node-exporter:9100']` list of `- job_name: 'node-exporter'` section, like `- targets: ['node-exporter:9100', 'server1:9100', 'server2:9100', '...']`
|
||||
3. Reload prometheus config via this command:
|
||||
```
|
||||
```bash
|
||||
docker ps | grep prometheus | awk '{print $1}' | xargs docker kill -s SIGHUP
|
||||
|
||||
```
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
route:
|
||||
receiver: 'telegram'
|
||||
routes:
|
||||
# All alerts with severity=warning or severity=critical
|
||||
# are dispatched to the telegram receiver.
|
||||
- receiver: 'telegram'
|
||||
group_wait: 10s
|
||||
matchers:
|
||||
- severity=~"warning|critical"
|
||||
|
||||
receivers:
|
||||
- name: 'telegram'
|
||||
telegram_configs:
|
||||
- bot_token: "00000:ddddddddddddddddddd"
|
||||
chat_id: -000000
|
||||
|
||||
# https://api.telegram.org/botINSERT_BOT_ID_HERE/getUpdates - to get chat_id
|
||||
# amtool --alertmanager.url=http://localhost:9093/ alert add alertname="Test alert" severity="warning" job="test-job" - to send test alert
|
|
@ -0,0 +1,799 @@
|
|||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "grafana"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Simple exporter for cadvisor only",
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"gnetId": 14282,
|
||||
"graphTooltip": 0,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"collapsed": false,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 8,
|
||||
"panels": [],
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "CPU",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 1
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 15,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": false,
|
||||
"max": true,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"nullPointMode": "null as zero",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "9.3.6",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"expr": "sum(rate(container_cpu_usage_seconds_total{instance=~\"$host\",name=~\"$container\",name=~\".+\"}[5m])) by (name) *100",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"legendFormat": "{{name}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeRegions": [],
|
||||
"title": "CPU Usage",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"mode": "time",
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"$$hashKey": "object:606",
|
||||
"format": "percent",
|
||||
"logBase": 1,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:607",
|
||||
"format": "short",
|
||||
"logBase": 1,
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 11,
|
||||
"panels": [],
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Memory",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 9
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 9,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": false,
|
||||
"max": true,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"nullPointMode": "null as zero",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "9.3.6",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"expr": "sum(container_memory_rss{instance=~\"$host\",name=~\"$container\",name=~\".+\"}) by (name)",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"legendFormat": "{{name}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeRegions": [],
|
||||
"title": "Memory Usage",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"mode": "time",
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"$$hashKey": "object:606",
|
||||
"format": "bytes",
|
||||
"logBase": 1,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:607",
|
||||
"format": "short",
|
||||
"logBase": 1,
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 9
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 14,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": false,
|
||||
"max": true,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"nullPointMode": "null as zero",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "9.3.6",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"expr": "sum(container_memory_cache{instance=~\"$host\",name=~\"$container\",name=~\".+\"}) by (name)",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"legendFormat": "{{name}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeRegions": [],
|
||||
"title": "Memory Cached",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"mode": "time",
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"$$hashKey": "object:606",
|
||||
"format": "bytes",
|
||||
"logBase": 1,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:607",
|
||||
"format": "short",
|
||||
"logBase": 1,
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 17
|
||||
},
|
||||
"id": 2,
|
||||
"panels": [],
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Network",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 18
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": false,
|
||||
"hideEmpty": false,
|
||||
"hideZero": false,
|
||||
"max": true,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"nullPointMode": "null",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "9.3.6",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"expr": "sum(rate(container_network_receive_bytes_total{instance=~\"$host\",name=~\"$container\",name=~\".+\"}[5m])) by (name)",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"legendFormat": "{{name}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeRegions": [],
|
||||
"title": "Received Network Traffic",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"mode": "time",
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"$$hashKey": "object:674",
|
||||
"format": "Bps",
|
||||
"logBase": 1,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:675",
|
||||
"format": "short",
|
||||
"logBase": 1,
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 18
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 6,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": false,
|
||||
"max": true,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"nullPointMode": "null",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "9.3.6",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"expr": "sum(rate(container_network_transmit_bytes_total{instance=~\"$host\",name=~\"$container\",name=~\".+\"}[5m])) by (name)",
|
||||
"interval": "",
|
||||
"legendFormat": "{{name}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeRegions": [],
|
||||
"title": "Sent Network Traffic",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"mode": "time",
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"$$hashKey": "object:832",
|
||||
"format": "Bps",
|
||||
"logBase": 1,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:833",
|
||||
"format": "short",
|
||||
"logBase": 1,
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 26
|
||||
},
|
||||
"id": 19,
|
||||
"panels": [],
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Misc",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"filterable": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "id"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.width",
|
||||
"value": 260
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Running"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "d"
|
||||
},
|
||||
{
|
||||
"id": "decimals",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"id": "custom.displayMode",
|
||||
"value": "color-text"
|
||||
},
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "dark-green",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 27
|
||||
},
|
||||
"id": 17,
|
||||
"options": {
|
||||
"showHeader": true,
|
||||
"sortBy": []
|
||||
},
|
||||
"pluginVersion": "7.4.5",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"expr": "(time() - container_start_time_seconds{instance=~\"$host\",name=~\"$container\",name=~\".+\"})/86400",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"interval": "",
|
||||
"legendFormat": "{{name}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Containers Info",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "filterFieldsByName",
|
||||
"options": {
|
||||
"include": {
|
||||
"names": [
|
||||
"container_label_com_docker_compose_project",
|
||||
"container_label_com_docker_compose_project_working_dir",
|
||||
"image",
|
||||
"instance",
|
||||
"name",
|
||||
"Value",
|
||||
"container_label_com_docker_compose_service"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {},
|
||||
"indexByName": {},
|
||||
"renameByName": {
|
||||
"Value": "Running",
|
||||
"container_label_com_docker_compose_project": "Label",
|
||||
"container_label_com_docker_compose_project_working_dir": "Working dir",
|
||||
"container_label_com_docker_compose_service": "Service",
|
||||
"image": "Registry Image",
|
||||
"instance": "Instance",
|
||||
"name": "Name"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
}
|
||||
],
|
||||
"schemaVersion": 37,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"cadvisor",
|
||||
"docker"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"allValue": ".*",
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"definition": "label_values({__name__=~\"container.*\"},instance)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Host",
|
||||
"multi": false,
|
||||
"name": "host",
|
||||
"options": [],
|
||||
"query": {
|
||||
"query": "label_values({__name__=~\"container.*\"},instance)",
|
||||
"refId": "Prometheus-host-Variable-Query"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 5,
|
||||
"tagValuesQuery": "",
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"definition": "label_values({__name__=~\"container.*\", instance=~\"$host\"},name)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Container",
|
||||
"multi": false,
|
||||
"name": "container",
|
||||
"options": [],
|
||||
"query": {
|
||||
"query": "label_values({__name__=~\"container.*\", instance=~\"$host\"},name)",
|
||||
"refId": "Prometheus-container-Variable-Query"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-5m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Cadvisor exporter",
|
||||
"uid": "pMEd7m0Mz",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,22 @@
|
|||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
# <string> an unique provider name
|
||||
- name: 'Unknown_provider'
|
||||
# <int> org id. will default to orgId 1 if not specified
|
||||
orgId: 1
|
||||
# <string, required> name of the dashboard folder. Required
|
||||
folder: ''
|
||||
# <string> folder UID. will be automatically generated if not specified
|
||||
folderUid: ''
|
||||
# <string, required> provider type. Required
|
||||
type: file
|
||||
# <bool> disable dashboard deletion
|
||||
disableDeletion: false
|
||||
# <bool> enable dashboard editing
|
||||
editable: true
|
||||
# <int> how often Grafana will scan for changed dashboards
|
||||
updateIntervalSeconds: 10
|
||||
options:
|
||||
# <string, required> path to dashboard files on disk. Required
|
||||
path: /etc/grafana/provisioning/dashboards
|
|
@ -0,0 +1,16 @@
|
|||
# config file version
|
||||
apiVersion: 1
|
||||
|
||||
# list of datasources to insert/update depending
|
||||
# whats available in the database
|
||||
datasources:
|
||||
# <string, required> name of the datasource. Required
|
||||
- name: Prometheus
|
||||
# <string, required> datasource type. Required
|
||||
type: prometheus
|
||||
# <string, required> access mode. direct or proxy. Required
|
||||
access: proxy
|
||||
# <int> org id. will default to orgId 1 if not specified
|
||||
orgId: 1
|
||||
# <string> url
|
||||
url: http://prometheus:9090
|
|
@ -36,26 +36,12 @@ groups:
|
|||
labels:
|
||||
severity: warning
|
||||
for: 2m
|
||||
- alert: Host out of inodes
|
||||
expr: 'node_filesystem_files_free{mountpoint ="/rootfs"} / node_filesystem_files{mountpoint="/rootfs"} * 100 < 10 and ON (instance, device, mountpoint) node_filesystem_readonly{mountpoint="/rootfs"} == 0'
|
||||
annotations:
|
||||
description: Disk is almost running out of available inodes (< 10% left)
|
||||
labels:
|
||||
severity: warning
|
||||
for: 2m
|
||||
- alert: Host high CPU load
|
||||
expr: '100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[2m])) * 100) > 80'
|
||||
annotations:
|
||||
description: CPU load is > 80%
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: Host physical component too hot
|
||||
expr: 'node_hwmon_temp_celsius > 75'
|
||||
annotations:
|
||||
description: "Physical hardware component too hot"
|
||||
labels:
|
||||
severity: warning
|
||||
for: 5m
|
||||
- alert: Host OOM kill detected
|
||||
expr: 'increase(node_vmstat_oom_kill[1m]) > 0'
|
||||
annotations:
|
|
@ -0,0 +1,36 @@
|
|||
# my global config
|
||||
global:
|
||||
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
|
||||
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
|
||||
# scrape_timeout is set to the global default (10s).
|
||||
|
||||
# Alertmanager configuration
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets:
|
||||
- alertmanager:9093
|
||||
|
||||
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
|
||||
rule_files:
|
||||
- "alert_rules.yml"
|
||||
|
||||
# A scrape configuration containing exactly one endpoint to scrape:
|
||||
# Here it's Prometheus itself.
|
||||
scrape_configs:
|
||||
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||
- job_name: "prometheus"
|
||||
|
||||
# metrics_path defaults to '/metrics'
|
||||
# scheme defaults to 'http'.
|
||||
|
||||
static_configs:
|
||||
- targets: ["localhost:9090"]
|
||||
|
||||
- job_name: "node-exporter"
|
||||
static_configs:
|
||||
- targets: ["node-exporter:9100"]
|
||||
|
||||
- job_name: "cadvisor"
|
||||
static_configs:
|
||||
- targets: ["cadvisor:8080"]
|
|
@ -3,43 +3,33 @@ version: "3.9"
|
|||
services:
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:8.0.6-ubuntu
|
||||
image: grafana/grafana:9.3.6-ubuntu
|
||||
ports:
|
||||
- "3000:3000"
|
||||
volumes:
|
||||
- grafana-data:/var/lib/grafana
|
||||
- grafana-configs:/etc/grafana
|
||||
- ./configs/grafana/datasources:/etc/grafana/provisioning/datasources/
|
||||
- ./configs/grafana/dashboards:/etc/grafana/provisioning/dashboards/
|
||||
|
||||
prometheus:
|
||||
image: prom/prometheus:v2.28.1
|
||||
image: prom/prometheus:v2.42.0
|
||||
ports:
|
||||
- "9090:9090"
|
||||
volumes:
|
||||
- prom-data:/prometheus
|
||||
- prom-configs:/etc/prometheus
|
||||
- ./rules.yml:/etc/prometheus/rules.yml
|
||||
- ./configs/prometheus/alert_rules.yml:/etc/prometheus/alert_rules.yml
|
||||
- ./configs/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
|
||||
|
||||
alertmanager:
|
||||
image: prom/alertmanager:v0.22.2
|
||||
image: prom/alertmanager:v0.25.0
|
||||
ports:
|
||||
- "9093:9093"
|
||||
volumes:
|
||||
- alert-data:/alertmanager
|
||||
- alert-configs:/etc/alertmanager
|
||||
|
||||
alertmanager-bot:
|
||||
image: metalmatze/alertmanager-bot:0.4.3
|
||||
environment:
|
||||
TELEGRAM_ADMIN: "<user_id>"
|
||||
TELEGRAM_TOKEN: "<bot_id>"
|
||||
ALERTMANAGER_URL: http://alertmanager:9093
|
||||
STORE: bolt
|
||||
BOLT_PATH: /data/bot.db
|
||||
volumes:
|
||||
- alert-bot-data:/data
|
||||
- ./configs/alertmanager.yml:/etc/alertmanager/alertmanager.yml
|
||||
|
||||
node-exporter:
|
||||
image: prom/node-exporter:v1.2.0
|
||||
image: prom/node-exporter:v1.5.0
|
||||
ports:
|
||||
- "9100:9100"
|
||||
volumes:
|
||||
|
@ -52,11 +42,17 @@ services:
|
|||
- '--collector.filesystem.mount-points-exclude'
|
||||
- '^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)'
|
||||
|
||||
cadvisor:
|
||||
image: gcr.io/cadvisor/cadvisor:v0.47.0
|
||||
ports:
|
||||
- 8080:8080
|
||||
volumes:
|
||||
- /:/rootfs:ro
|
||||
- /var/run:/var/run:rw
|
||||
- /sys:/sys:ro
|
||||
- /var/lib/docker/:/var/lib/docker:ro
|
||||
|
||||
volumes:
|
||||
grafana-data:
|
||||
grafana-configs:
|
||||
prom-data:
|
||||
prom-configs:
|
||||
alert-data:
|
||||
alert-configs:
|
||||
alert-bot-data:
|
|
@ -1,7 +1,7 @@
|
|||
version: "3.9"
|
||||
services:
|
||||
node-exporter:
|
||||
image: prom/node-exporter:v1.2.0
|
||||
image: prom/node-exporter:v1.5.0
|
||||
ports:
|
||||
- "9100:9100"
|
||||
volumes:
|
||||
|
|
Loading…
Reference in New Issue