You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2741 lines
94 KiB
2741 lines
94 KiB
apiVersion: v1
|
|
kind: Template
|
|
labels:
|
|
template: grafana-prometheus
|
|
message: See https://github.com/nmasse-itix/OpenShift-Docker-Images/tree/master/grafana for more details.
|
|
metadata:
|
|
annotations:
|
|
description: |-
|
|
The prometheus datasource for Grafana.
|
|
openshift.io/display-name: Prometheus
|
|
tags: instant-app
|
|
template.openshift.io/documentation-url: https://github.com/nmasse-itix/OpenShift-Docker-Images/tree/master/grafana
|
|
template.openshift.io/long-description: Prometheus datasource for Grafana
|
|
template.openshift.io/provider-display-name: Nicolas Massé
|
|
template.openshift.io/support-url: https://github.com/nmasse-itix/OpenShift-Docker-Images/issues
|
|
name: grafana-prometheus
|
|
objects:
|
|
- kind: ConfigMap
|
|
apiVersion: v1
|
|
metadata:
|
|
name: grafana-dashboards
|
|
namespace: ${NAMESPACE}
|
|
labels:
|
|
template: grafana-prometheus
|
|
data:
|
|
openshift-cluster.json: |-
|
|
{
|
|
"annotations": {
|
|
"list": [
|
|
{
|
|
"builtIn": 1,
|
|
"datasource": "-- Grafana --",
|
|
"enable": true,
|
|
"hide": true,
|
|
"iconColor": "rgba(0, 211, 255, 1)",
|
|
"name": "Annotations & Alerts",
|
|
"type": "dashboard"
|
|
}
|
|
]
|
|
},
|
|
"description": "Monitors Kubernetes cluster using Prometheus. Shows overall cluster CPU / Memory / Filesystem usage as well as individual pod, containers, systemd services statistics. Uses cAdvisor metrics only.",
|
|
"editable": true,
|
|
"gnetId": 315,
|
|
"graphTooltip": 0,
|
|
"hideControls": false,
|
|
"id": 2,
|
|
"links": [],
|
|
"refresh": "10s",
|
|
"rows": [
|
|
{
|
|
"collapse": false,
|
|
"height": "200px",
|
|
"panels": [
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "prometheus",
|
|
"decimals": 2,
|
|
"editable": true,
|
|
"error": false,
|
|
"fill": 1,
|
|
"grid": {},
|
|
"height": "200px",
|
|
"id": 32,
|
|
"legend": {
|
|
"alignAsTable": false,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": false,
|
|
"show": false,
|
|
"sideWidth": 200,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 2,
|
|
"links": [],
|
|
"nullPointMode": "connected",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"span": 12,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"expr": "sum (rate (container_network_receive_bytes_total{id=\"/\"}[5m]))",
|
|
"format": "time_series",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "Received",
|
|
"metric": "network",
|
|
"refId": "A",
|
|
"step": 10
|
|
},
|
|
{
|
|
"expr": "- sum (rate (container_network_transmit_bytes_total{id=\"/\"}[5m]))",
|
|
"format": "time_series",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "Sent",
|
|
"metric": "network",
|
|
"refId": "B",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeFrom": null,
|
|
"timeShift": null,
|
|
"title": "Network I/O pressure",
|
|
"tooltip": {
|
|
"msResolution": false,
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "cumulative"
|
|
},
|
|
"transparent": false,
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "Bps",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "Bps",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": false
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"repeat": null,
|
|
"repeatIteration": null,
|
|
"repeatRowId": null,
|
|
"showTitle": false,
|
|
"title": "Network I/O pressure",
|
|
"titleSize": "h6"
|
|
},
|
|
{
|
|
"collapse": false,
|
|
"height": "250px",
|
|
"panels": [
|
|
{
|
|
"cacheTimeout": null,
|
|
"colorBackground": false,
|
|
"colorValue": true,
|
|
"colors": [
|
|
"rgba(50, 172, 45, 0.97)",
|
|
"rgba(237, 129, 40, 0.89)",
|
|
"rgba(245, 54, 54, 0.9)"
|
|
],
|
|
"datasource": "prometheus",
|
|
"editable": true,
|
|
"error": false,
|
|
"format": "percent",
|
|
"gauge": {
|
|
"maxValue": 100,
|
|
"minValue": 0,
|
|
"show": true,
|
|
"thresholdLabels": false,
|
|
"thresholdMarkers": true
|
|
},
|
|
"height": "180px",
|
|
"id": 4,
|
|
"interval": null,
|
|
"links": [],
|
|
"mappingType": 1,
|
|
"mappingTypes": [
|
|
{
|
|
"name": "value to text",
|
|
"value": 1
|
|
},
|
|
{
|
|
"name": "range to text",
|
|
"value": 2
|
|
}
|
|
],
|
|
"maxDataPoints": 100,
|
|
"nullPointMode": "connected",
|
|
"nullText": null,
|
|
"postfix": "",
|
|
"postfixFontSize": "50%",
|
|
"prefix": "",
|
|
"prefixFontSize": "50%",
|
|
"rangeMaps": [
|
|
{
|
|
"from": "null",
|
|
"text": "N/A",
|
|
"to": "null"
|
|
}
|
|
],
|
|
"span": 4,
|
|
"sparkline": {
|
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
|
"full": false,
|
|
"lineColor": "rgb(31, 120, 193)",
|
|
"show": false
|
|
},
|
|
"tableColumn": "",
|
|
"targets": [
|
|
{
|
|
"expr": "sum (container_memory_working_set_bytes{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) / sum (machine_memory_bytes{kubernetes_io_hostname=~\"^$Node$\"}) * 100",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"refId": "A",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": "65, 90",
|
|
"title": "Cluster memory usage",
|
|
"transparent": false,
|
|
"type": "singlestat",
|
|
"valueFontSize": "80%",
|
|
"valueMaps": [
|
|
{
|
|
"op": "=",
|
|
"text": "N/A",
|
|
"value": "null"
|
|
}
|
|
],
|
|
"valueName": "current"
|
|
},
|
|
{
|
|
"cacheTimeout": null,
|
|
"colorBackground": false,
|
|
"colorValue": true,
|
|
"colors": [
|
|
"rgba(50, 172, 45, 0.97)",
|
|
"rgba(237, 129, 40, 0.89)",
|
|
"rgba(245, 54, 54, 0.9)"
|
|
],
|
|
"datasource": "prometheus",
|
|
"decimals": 2,
|
|
"editable": true,
|
|
"error": false,
|
|
"format": "percent",
|
|
"gauge": {
|
|
"maxValue": 100,
|
|
"minValue": 0,
|
|
"show": true,
|
|
"thresholdLabels": false,
|
|
"thresholdMarkers": true
|
|
},
|
|
"height": "180px",
|
|
"id": 6,
|
|
"interval": null,
|
|
"links": [],
|
|
"mappingType": 1,
|
|
"mappingTypes": [
|
|
{
|
|
"name": "value to text",
|
|
"value": 1
|
|
},
|
|
{
|
|
"name": "range to text",
|
|
"value": 2
|
|
}
|
|
],
|
|
"maxDataPoints": 100,
|
|
"nullPointMode": "connected",
|
|
"nullText": null,
|
|
"postfix": "",
|
|
"postfixFontSize": "50%",
|
|
"prefix": "",
|
|
"prefixFontSize": "50%",
|
|
"rangeMaps": [
|
|
{
|
|
"from": "null",
|
|
"text": "N/A",
|
|
"to": "null"
|
|
}
|
|
],
|
|
"span": 4,
|
|
"sparkline": {
|
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
|
"full": false,
|
|
"lineColor": "rgb(31, 120, 193)",
|
|
"show": false
|
|
},
|
|
"tableColumn": "",
|
|
"targets": [
|
|
{
|
|
"expr": "sum (rate (container_cpu_usage_seconds_total{id=\"/\"}[5m])) / sum (machine_cpu_cores{}) * 100",
|
|
"format": "time_series",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"refId": "A",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": "65, 90",
|
|
"title": "Cluster CPU usage (5m avg)",
|
|
"type": "singlestat",
|
|
"valueFontSize": "80%",
|
|
"valueMaps": [
|
|
{
|
|
"op": "=",
|
|
"text": "N/A",
|
|
"value": "null"
|
|
}
|
|
],
|
|
"valueName": "current"
|
|
},
|
|
{
|
|
"cacheTimeout": null,
|
|
"colorBackground": false,
|
|
"colorValue": true,
|
|
"colors": [
|
|
"rgba(50, 172, 45, 0.97)",
|
|
"rgba(237, 129, 40, 0.89)",
|
|
"rgba(245, 54, 54, 0.9)"
|
|
],
|
|
"datasource": "prometheus",
|
|
"decimals": 2,
|
|
"editable": true,
|
|
"error": false,
|
|
"format": "percent",
|
|
"gauge": {
|
|
"maxValue": 100,
|
|
"minValue": 0,
|
|
"show": true,
|
|
"thresholdLabels": false,
|
|
"thresholdMarkers": true
|
|
},
|
|
"height": "180px",
|
|
"id": 7,
|
|
"interval": null,
|
|
"links": [],
|
|
"mappingType": 1,
|
|
"mappingTypes": [
|
|
{
|
|
"name": "value to text",
|
|
"value": 1
|
|
},
|
|
{
|
|
"name": "range to text",
|
|
"value": 2
|
|
}
|
|
],
|
|
"maxDataPoints": 100,
|
|
"nullPointMode": "connected",
|
|
"nullText": null,
|
|
"postfix": "",
|
|
"postfixFontSize": "50%",
|
|
"prefix": "",
|
|
"prefixFontSize": "50%",
|
|
"rangeMaps": [
|
|
{
|
|
"from": "null",
|
|
"text": "N/A",
|
|
"to": "null"
|
|
}
|
|
],
|
|
"span": 4,
|
|
"sparkline": {
|
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
|
"full": false,
|
|
"lineColor": "rgb(31, 120, 193)",
|
|
"show": false
|
|
},
|
|
"tableColumn": "",
|
|
"targets": [
|
|
{
|
|
"expr": "sum (container_fs_usage_bytes{device=~\"^/dev/([sv]d[a-z][1-9]|mapper/[-a-zA-Z0-9]+)$\",id=\"/\"}) / sum (container_fs_limit_bytes{device=~\"^/dev/([sv]d[a-z][1-9]|mapper/[-a-zA-Z0-9]+)$\",id=\"/\"}) * 100",
|
|
"format": "time_series",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "",
|
|
"metric": "",
|
|
"refId": "A",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": "65, 90",
|
|
"title": "Cluster filesystem usage",
|
|
"type": "singlestat",
|
|
"valueFontSize": "80%",
|
|
"valueMaps": [
|
|
{
|
|
"op": "=",
|
|
"text": "N/A",
|
|
"value": "null"
|
|
}
|
|
],
|
|
"valueName": "current"
|
|
},
|
|
{
|
|
"cacheTimeout": null,
|
|
"colorBackground": false,
|
|
"colorValue": false,
|
|
"colors": [
|
|
"rgba(50, 172, 45, 0.97)",
|
|
"rgba(237, 129, 40, 0.89)",
|
|
"rgba(245, 54, 54, 0.9)"
|
|
],
|
|
"datasource": "prometheus",
|
|
"decimals": 2,
|
|
"editable": true,
|
|
"error": false,
|
|
"format": "bytes",
|
|
"gauge": {
|
|
"maxValue": 100,
|
|
"minValue": 0,
|
|
"show": false,
|
|
"thresholdLabels": false,
|
|
"thresholdMarkers": true
|
|
},
|
|
"height": "1px",
|
|
"id": 9,
|
|
"interval": null,
|
|
"links": [],
|
|
"mappingType": 1,
|
|
"mappingTypes": [
|
|
{
|
|
"name": "value to text",
|
|
"value": 1
|
|
},
|
|
{
|
|
"name": "range to text",
|
|
"value": 2
|
|
}
|
|
],
|
|
"maxDataPoints": 100,
|
|
"nullPointMode": "connected",
|
|
"nullText": null,
|
|
"postfix": "",
|
|
"postfixFontSize": "20%",
|
|
"prefix": "",
|
|
"prefixFontSize": "20%",
|
|
"rangeMaps": [
|
|
{
|
|
"from": "null",
|
|
"text": "N/A",
|
|
"to": "null"
|
|
}
|
|
],
|
|
"span": 2,
|
|
"sparkline": {
|
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
|
"full": false,
|
|
"lineColor": "rgb(31, 120, 193)",
|
|
"show": false
|
|
},
|
|
"tableColumn": "",
|
|
"targets": [
|
|
{
|
|
"expr": "sum (container_memory_working_set_bytes{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"refId": "A",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": "",
|
|
"title": "Used",
|
|
"type": "singlestat",
|
|
"valueFontSize": "50%",
|
|
"valueMaps": [
|
|
{
|
|
"op": "=",
|
|
"text": "N/A",
|
|
"value": "null"
|
|
}
|
|
],
|
|
"valueName": "current"
|
|
},
|
|
{
|
|
"cacheTimeout": null,
|
|
"colorBackground": false,
|
|
"colorValue": false,
|
|
"colors": [
|
|
"rgba(50, 172, 45, 0.97)",
|
|
"rgba(237, 129, 40, 0.89)",
|
|
"rgba(245, 54, 54, 0.9)"
|
|
],
|
|
"datasource": "prometheus",
|
|
"decimals": 2,
|
|
"editable": true,
|
|
"error": false,
|
|
"format": "bytes",
|
|
"gauge": {
|
|
"maxValue": 100,
|
|
"minValue": 0,
|
|
"show": false,
|
|
"thresholdLabels": false,
|
|
"thresholdMarkers": true
|
|
},
|
|
"height": "1px",
|
|
"id": 10,
|
|
"interval": null,
|
|
"links": [],
|
|
"mappingType": 1,
|
|
"mappingTypes": [
|
|
{
|
|
"name": "value to text",
|
|
"value": 1
|
|
},
|
|
{
|
|
"name": "range to text",
|
|
"value": 2
|
|
}
|
|
],
|
|
"maxDataPoints": 100,
|
|
"nullPointMode": "connected",
|
|
"nullText": null,
|
|
"postfix": "",
|
|
"postfixFontSize": "50%",
|
|
"prefix": "",
|
|
"prefixFontSize": "50%",
|
|
"rangeMaps": [
|
|
{
|
|
"from": "null",
|
|
"text": "N/A",
|
|
"to": "null"
|
|
}
|
|
],
|
|
"span": 2,
|
|
"sparkline": {
|
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
|
"full": false,
|
|
"lineColor": "rgb(31, 120, 193)",
|
|
"show": false
|
|
},
|
|
"tableColumn": "",
|
|
"targets": [
|
|
{
|
|
"expr": "sum (machine_memory_bytes{kubernetes_io_hostname=~\"^$Node$\"})",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"refId": "A",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": "",
|
|
"title": "Total",
|
|
"type": "singlestat",
|
|
"valueFontSize": "50%",
|
|
"valueMaps": [
|
|
{
|
|
"op": "=",
|
|
"text": "N/A",
|
|
"value": "null"
|
|
}
|
|
],
|
|
"valueName": "current"
|
|
},
|
|
{
|
|
"cacheTimeout": null,
|
|
"colorBackground": false,
|
|
"colorValue": false,
|
|
"colors": [
|
|
"rgba(50, 172, 45, 0.97)",
|
|
"rgba(237, 129, 40, 0.89)",
|
|
"rgba(245, 54, 54, 0.9)"
|
|
],
|
|
"datasource": "prometheus",
|
|
"decimals": 2,
|
|
"editable": true,
|
|
"error": false,
|
|
"format": "none",
|
|
"gauge": {
|
|
"maxValue": 100,
|
|
"minValue": 0,
|
|
"show": false,
|
|
"thresholdLabels": false,
|
|
"thresholdMarkers": true
|
|
},
|
|
"height": "1px",
|
|
"id": 11,
|
|
"interval": null,
|
|
"links": [],
|
|
"mappingType": 1,
|
|
"mappingTypes": [
|
|
{
|
|
"name": "value to text",
|
|
"value": 1
|
|
},
|
|
{
|
|
"name": "range to text",
|
|
"value": 2
|
|
}
|
|
],
|
|
"maxDataPoints": 100,
|
|
"nullPointMode": "connected",
|
|
"nullText": null,
|
|
"postfix": " cores",
|
|
"postfixFontSize": "30%",
|
|
"prefix": "",
|
|
"prefixFontSize": "50%",
|
|
"rangeMaps": [
|
|
{
|
|
"from": "null",
|
|
"text": "N/A",
|
|
"to": "null"
|
|
}
|
|
],
|
|
"span": 2,
|
|
"sparkline": {
|
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
|
"full": false,
|
|
"lineColor": "rgb(31, 120, 193)",
|
|
"show": false
|
|
},
|
|
"tableColumn": "",
|
|
"targets": [
|
|
{
|
|
"expr": "sum (rate (container_cpu_usage_seconds_total{id=\"/\"}[5m]))",
|
|
"format": "time_series",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"refId": "A",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": "",
|
|
"title": "Used",
|
|
"type": "singlestat",
|
|
"valueFontSize": "50%",
|
|
"valueMaps": [
|
|
{
|
|
"op": "=",
|
|
"text": "N/A",
|
|
"value": "null"
|
|
}
|
|
],
|
|
"valueName": "current"
|
|
},
|
|
{
|
|
"cacheTimeout": null,
|
|
"colorBackground": false,
|
|
"colorValue": false,
|
|
"colors": [
|
|
"rgba(50, 172, 45, 0.97)",
|
|
"rgba(237, 129, 40, 0.89)",
|
|
"rgba(245, 54, 54, 0.9)"
|
|
],
|
|
"datasource": "prometheus",
|
|
"decimals": 2,
|
|
"editable": true,
|
|
"error": false,
|
|
"format": "none",
|
|
"gauge": {
|
|
"maxValue": 100,
|
|
"minValue": 0,
|
|
"show": false,
|
|
"thresholdLabels": false,
|
|
"thresholdMarkers": true
|
|
},
|
|
"height": "1px",
|
|
"id": 12,
|
|
"interval": null,
|
|
"links": [],
|
|
"mappingType": 1,
|
|
"mappingTypes": [
|
|
{
|
|
"name": "value to text",
|
|
"value": 1
|
|
},
|
|
{
|
|
"name": "range to text",
|
|
"value": 2
|
|
}
|
|
],
|
|
"maxDataPoints": 100,
|
|
"nullPointMode": "connected",
|
|
"nullText": null,
|
|
"postfix": " cores",
|
|
"postfixFontSize": "30%",
|
|
"prefix": "",
|
|
"prefixFontSize": "50%",
|
|
"rangeMaps": [
|
|
{
|
|
"from": "null",
|
|
"text": "N/A",
|
|
"to": "null"
|
|
}
|
|
],
|
|
"span": 2,
|
|
"sparkline": {
|
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
|
"full": false,
|
|
"lineColor": "rgb(31, 120, 193)",
|
|
"show": false
|
|
},
|
|
"tableColumn": "",
|
|
"targets": [
|
|
{
|
|
"expr": "sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"})",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"refId": "A",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": "",
|
|
"title": "Total",
|
|
"type": "singlestat",
|
|
"valueFontSize": "50%",
|
|
"valueMaps": [
|
|
{
|
|
"op": "=",
|
|
"text": "N/A",
|
|
"value": "null"
|
|
}
|
|
],
|
|
"valueName": "current"
|
|
},
|
|
{
|
|
"cacheTimeout": null,
|
|
"colorBackground": false,
|
|
"colorValue": false,
|
|
"colors": [
|
|
"rgba(50, 172, 45, 0.97)",
|
|
"rgba(237, 129, 40, 0.89)",
|
|
"rgba(245, 54, 54, 0.9)"
|
|
],
|
|
"datasource": "prometheus",
|
|
"decimals": 2,
|
|
"editable": true,
|
|
"error": false,
|
|
"format": "bytes",
|
|
"gauge": {
|
|
"maxValue": 100,
|
|
"minValue": 0,
|
|
"show": false,
|
|
"thresholdLabels": false,
|
|
"thresholdMarkers": true
|
|
},
|
|
"height": "1px",
|
|
"id": 13,
|
|
"interval": null,
|
|
"links": [],
|
|
"mappingType": 1,
|
|
"mappingTypes": [
|
|
{
|
|
"name": "value to text",
|
|
"value": 1
|
|
},
|
|
{
|
|
"name": "range to text",
|
|
"value": 2
|
|
}
|
|
],
|
|
"maxDataPoints": 100,
|
|
"nullPointMode": "connected",
|
|
"nullText": null,
|
|
"postfix": "",
|
|
"postfixFontSize": "50%",
|
|
"prefix": "",
|
|
"prefixFontSize": "50%",
|
|
"rangeMaps": [
|
|
{
|
|
"from": "null",
|
|
"text": "N/A",
|
|
"to": "null"
|
|
}
|
|
],
|
|
"span": 2,
|
|
"sparkline": {
|
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
|
"full": false,
|
|
"lineColor": "rgb(31, 120, 193)",
|
|
"show": false
|
|
},
|
|
"tableColumn": "",
|
|
"targets": [
|
|
{
|
|
"expr": "sum (container_fs_usage_bytes{device=~\"^/dev/([sv]d[a-z][1-9]|mapper/[-a-zA-Z0-9]+)$\",id=\"/\"})",
|
|
"format": "time_series",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"refId": "A",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": "",
|
|
"title": "Used",
|
|
"type": "singlestat",
|
|
"valueFontSize": "50%",
|
|
"valueMaps": [
|
|
{
|
|
"op": "=",
|
|
"text": "N/A",
|
|
"value": "null"
|
|
}
|
|
],
|
|
"valueName": "current"
|
|
},
|
|
{
|
|
"cacheTimeout": null,
|
|
"colorBackground": false,
|
|
"colorValue": false,
|
|
"colors": [
|
|
"rgba(50, 172, 45, 0.97)",
|
|
"rgba(237, 129, 40, 0.89)",
|
|
"rgba(245, 54, 54, 0.9)"
|
|
],
|
|
"datasource": "prometheus",
|
|
"decimals": 2,
|
|
"editable": true,
|
|
"error": false,
|
|
"format": "bytes",
|
|
"gauge": {
|
|
"maxValue": 100,
|
|
"minValue": 0,
|
|
"show": false,
|
|
"thresholdLabels": false,
|
|
"thresholdMarkers": true
|
|
},
|
|
"height": "1px",
|
|
"id": 14,
|
|
"interval": null,
|
|
"links": [],
|
|
"mappingType": 1,
|
|
"mappingTypes": [
|
|
{
|
|
"name": "value to text",
|
|
"value": 1
|
|
},
|
|
{
|
|
"name": "range to text",
|
|
"value": 2
|
|
}
|
|
],
|
|
"maxDataPoints": 100,
|
|
"nullPointMode": "connected",
|
|
"nullText": null,
|
|
"postfix": "",
|
|
"postfixFontSize": "50%",
|
|
"prefix": "",
|
|
"prefixFontSize": "50%",
|
|
"rangeMaps": [
|
|
{
|
|
"from": "null",
|
|
"text": "N/A",
|
|
"to": "null"
|
|
}
|
|
],
|
|
"span": 2,
|
|
"sparkline": {
|
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
|
"full": false,
|
|
"lineColor": "rgb(31, 120, 193)",
|
|
"show": false
|
|
},
|
|
"tableColumn": "",
|
|
"targets": [
|
|
{
|
|
"expr": "sum (container_fs_limit_bytes{device=~\"^/dev/([sv]d[a-z][1-9]|mapper/[-a-zA-Z0-9]+)$\",id=\"/\"})",
|
|
"format": "time_series",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"refId": "A",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": "",
|
|
"title": "Total",
|
|
"type": "singlestat",
|
|
"valueFontSize": "50%",
|
|
"valueMaps": [
|
|
{
|
|
"op": "=",
|
|
"text": "N/A",
|
|
"value": "null"
|
|
}
|
|
],
|
|
"valueName": "current"
|
|
}
|
|
],
|
|
"repeat": null,
|
|
"repeatIteration": null,
|
|
"repeatRowId": null,
|
|
"showTitle": false,
|
|
"title": "Total usage",
|
|
"titleSize": "h6"
|
|
},
|
|
{
|
|
"collapse": false,
|
|
"height": "250px",
|
|
"panels": [
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "prometheus",
|
|
"decimals": 3,
|
|
"editable": true,
|
|
"error": false,
|
|
"fill": 0,
|
|
"grid": {},
|
|
"height": "",
|
|
"id": 17,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": true,
|
|
"show": true,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 2,
|
|
"links": [],
|
|
"nullPointMode": "connected",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"span": 12,
|
|
"stack": false,
|
|
"steppedLine": true,
|
|
"targets": [
|
|
{
|
|
"expr": "sum (rate (container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\"}[5m])) by (pod_name)",
|
|
"format": "time_series",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "{{ pod_name }}",
|
|
"metric": "container_cpu",
|
|
"refId": "A",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeFrom": null,
|
|
"timeShift": null,
|
|
"title": "Pods CPU usage (1m avg)",
|
|
"tooltip": {
|
|
"msResolution": true,
|
|
"shared": true,
|
|
"sort": 2,
|
|
"value_type": "cumulative"
|
|
},
|
|
"transparent": false,
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "none",
|
|
"label": "cores",
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": false
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"repeat": null,
|
|
"repeatIteration": null,
|
|
"repeatRowId": null,
|
|
"showTitle": false,
|
|
"title": "Pods CPU usage",
|
|
"titleSize": "h6"
|
|
},
|
|
{
|
|
"collapse": false,
|
|
"height": "250px",
|
|
"panels": [
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "prometheus",
|
|
"decimals": 3,
|
|
"editable": true,
|
|
"error": false,
|
|
"fill": 0,
|
|
"grid": {},
|
|
"height": "",
|
|
"id": 23,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": true,
|
|
"show": true,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 2,
|
|
"links": [],
|
|
"nullPointMode": "connected",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"span": 12,
|
|
"stack": false,
|
|
"steppedLine": true,
|
|
"targets": [
|
|
{
|
|
"expr": "sum (rate (container_cpu_usage_seconds_total{id=~\"^/system.slice.*\"}[5m])) by (systemd_service_name)",
|
|
"format": "time_series",
|
|
"hide": false,
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "{{ systemd_service_name }}",
|
|
"metric": "container_cpu",
|
|
"refId": "A",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeFrom": null,
|
|
"timeShift": null,
|
|
"title": "System services CPU usage (5m avg)",
|
|
"tooltip": {
|
|
"msResolution": true,
|
|
"shared": true,
|
|
"sort": 2,
|
|
"value_type": "cumulative"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "none",
|
|
"label": "cores",
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": false
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"repeat": null,
|
|
"repeatIteration": null,
|
|
"repeatRowId": null,
|
|
"showTitle": false,
|
|
"title": "System services CPU usage",
|
|
"titleSize": "h6"
|
|
},
|
|
{
|
|
"collapse": false,
|
|
"height": "250px",
|
|
"panels": [
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "prometheus",
|
|
"decimals": 3,
|
|
"editable": true,
|
|
"error": false,
|
|
"fill": 0,
|
|
"grid": {},
|
|
"height": "",
|
|
"id": 24,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"hideEmpty": false,
|
|
"hideZero": false,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": true,
|
|
"show": true,
|
|
"sideWidth": null,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 2,
|
|
"links": [],
|
|
"nullPointMode": "connected",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"span": 12,
|
|
"stack": false,
|
|
"steppedLine": true,
|
|
"targets": [
|
|
{
|
|
"expr": "sum (rate (container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\",container_name!=\"POD\"}[5m])) by (container_name, pod_name)",
|
|
"format": "time_series",
|
|
"hide": false,
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "pod: {{ pod_name }} | {{ container_name }}",
|
|
"metric": "container_cpu",
|
|
"refId": "A",
|
|
"step": 10
|
|
},
|
|
{
|
|
"expr": "sum (rate (container_cpu_usage_seconds_total{image!=\"\",name!~\"^k8s_.*\"}[5m])) by (kubernetes_io_hostname, name, image)",
|
|
"format": "time_series",
|
|
"hide": false,
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})",
|
|
"metric": "container_cpu",
|
|
"refId": "B",
|
|
"step": 10
|
|
},
|
|
{
|
|
"expr": "sum (rate (container_cpu_usage_seconds_total{rkt_container_name!=\"\"}[5m])) by (kubernetes_io_hostname, rkt_container_name)",
|
|
"format": "time_series",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}",
|
|
"metric": "container_cpu",
|
|
"refId": "C",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeFrom": null,
|
|
"timeShift": null,
|
|
"title": "Containers CPU usage (5m avg)",
|
|
"tooltip": {
|
|
"msResolution": true,
|
|
"shared": true,
|
|
"sort": 2,
|
|
"value_type": "cumulative"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "none",
|
|
"label": "cores",
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": false
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"repeat": null,
|
|
"repeatIteration": null,
|
|
"repeatRowId": null,
|
|
"showTitle": false,
|
|
"title": "Containers CPU usage",
|
|
"titleSize": "h6"
|
|
},
|
|
{
|
|
"collapse": true,
|
|
"height": "500px",
|
|
"panels": [
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"datasource": "prometheus",
|
|
"decimals": 3,
|
|
"editable": true,
|
|
"error": false,
|
|
"fill": 0,
|
|
"grid": {},
|
|
"id": 20,
|
|
"isNew": true,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": false,
|
|
"show": true,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 2,
|
|
"links": [],
|
|
"nullPointMode": "connected",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"span": 12,
|
|
"stack": false,
|
|
"steppedLine": true,
|
|
"targets": [
|
|
{
|
|
"expr": "sum (rate (container_cpu_usage_seconds_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[1m])) by (id)",
|
|
"hide": false,
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "{{ id }}",
|
|
"metric": "container_cpu",
|
|
"refId": "A",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeFrom": null,
|
|
"timeShift": null,
|
|
"title": "All processes CPU usage (1m avg)",
|
|
"tooltip": {
|
|
"msResolution": true,
|
|
"shared": true,
|
|
"sort": 2,
|
|
"value_type": "cumulative"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"show": true
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "none",
|
|
"label": "cores",
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": false
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"repeat": null,
|
|
"repeatIteration": null,
|
|
"repeatRowId": null,
|
|
"showTitle": false,
|
|
"title": "All processes CPU usage",
|
|
"titleSize": "h6"
|
|
},
|
|
{
|
|
"collapse": false,
|
|
"height": "250px",
|
|
"panels": [
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "prometheus",
|
|
"decimals": 2,
|
|
"editable": true,
|
|
"error": false,
|
|
"fill": 0,
|
|
"grid": {},
|
|
"id": 25,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": true,
|
|
"show": true,
|
|
"sideWidth": 200,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 2,
|
|
"links": [],
|
|
"nullPointMode": "connected",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"span": 12,
|
|
"stack": false,
|
|
"steppedLine": true,
|
|
"targets": [
|
|
{
|
|
"expr": "sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}) by (pod_name)",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "{{ pod_name }}",
|
|
"metric": "container_memory_usage:sort_desc",
|
|
"refId": "A",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeFrom": null,
|
|
"timeShift": null,
|
|
"title": "Pods memory usage",
|
|
"tooltip": {
|
|
"msResolution": false,
|
|
"shared": true,
|
|
"sort": 2,
|
|
"value_type": "cumulative"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "bytes",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": false
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"repeat": null,
|
|
"repeatIteration": null,
|
|
"repeatRowId": null,
|
|
"showTitle": false,
|
|
"title": "Pods memory usage",
|
|
"titleSize": "h6"
|
|
},
|
|
{
|
|
"collapse": false,
|
|
"height": "250px",
|
|
"panels": [
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "prometheus",
|
|
"decimals": 2,
|
|
"editable": true,
|
|
"error": false,
|
|
"fill": 0,
|
|
"grid": {},
|
|
"id": 26,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": true,
|
|
"show": true,
|
|
"sideWidth": 200,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 2,
|
|
"links": [],
|
|
"nullPointMode": "connected",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"span": 12,
|
|
"stack": false,
|
|
"steppedLine": true,
|
|
"targets": [
|
|
{
|
|
"expr": "sum (container_memory_working_set_bytes{id=~\"^/system.slice/.*\"}) by (id)",
|
|
"format": "time_series",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "{{ id }}",
|
|
"metric": "container_memory_usage:sort_desc",
|
|
"refId": "A",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeFrom": null,
|
|
"timeShift": null,
|
|
"title": "System services memory usage",
|
|
"tooltip": {
|
|
"msResolution": false,
|
|
"shared": true,
|
|
"sort": 2,
|
|
"value_type": "cumulative"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "bytes",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": false
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"repeat": null,
|
|
"repeatIteration": null,
|
|
"repeatRowId": null,
|
|
"showTitle": false,
|
|
"title": "System services memory usage",
|
|
"titleSize": "h6"
|
|
},
|
|
{
|
|
"collapse": false,
|
|
"height": "250px",
|
|
"panels": [
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "prometheus",
|
|
"decimals": 2,
|
|
"editable": true,
|
|
"error": false,
|
|
"fill": 0,
|
|
"grid": {},
|
|
"id": 27,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": true,
|
|
"show": true,
|
|
"sideWidth": 200,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 2,
|
|
"links": [],
|
|
"nullPointMode": "connected",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"span": 12,
|
|
"stack": false,
|
|
"steppedLine": true,
|
|
"targets": [
|
|
{
|
|
"expr": "sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",container_name!=\"POD\",kubernetes_io_hostname=~\"^$Node$\"}) by (container_name, pod_name)",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "pod: {{ pod_name }} | {{ container_name }}",
|
|
"metric": "container_memory_usage:sort_desc",
|
|
"refId": "A",
|
|
"step": 10
|
|
},
|
|
{
|
|
"expr": "sum (container_memory_working_set_bytes{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}) by (kubernetes_io_hostname, name, image)",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})",
|
|
"metric": "container_memory_usage:sort_desc",
|
|
"refId": "B",
|
|
"step": 10
|
|
},
|
|
{
|
|
"expr": "sum (container_memory_working_set_bytes{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}) by (kubernetes_io_hostname, rkt_container_name)",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}",
|
|
"metric": "container_memory_usage:sort_desc",
|
|
"refId": "C",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeFrom": null,
|
|
"timeShift": null,
|
|
"title": "Containers memory usage",
|
|
"tooltip": {
|
|
"msResolution": false,
|
|
"shared": true,
|
|
"sort": 2,
|
|
"value_type": "cumulative"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "bytes",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": false
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"repeat": null,
|
|
"repeatIteration": null,
|
|
"repeatRowId": null,
|
|
"showTitle": false,
|
|
"title": "Containers memory usage",
|
|
"titleSize": "h6"
|
|
},
|
|
{
|
|
"collapse": false,
|
|
"height": "500px",
|
|
"panels": [
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "prometheus",
|
|
"decimals": 2,
|
|
"editable": true,
|
|
"error": false,
|
|
"fill": 0,
|
|
"grid": {},
|
|
"id": 28,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": false,
|
|
"show": true,
|
|
"sideWidth": 200,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 2,
|
|
"links": [],
|
|
"nullPointMode": "connected",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"span": 12,
|
|
"stack": false,
|
|
"steppedLine": true,
|
|
"targets": [
|
|
{
|
|
"expr": "sum (container_memory_working_set_bytes{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) by (id)",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "{{ id }}",
|
|
"metric": "container_memory_usage:sort_desc",
|
|
"refId": "A",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeFrom": null,
|
|
"timeShift": null,
|
|
"title": "All processes memory usage",
|
|
"tooltip": {
|
|
"msResolution": false,
|
|
"shared": true,
|
|
"sort": 2,
|
|
"value_type": "cumulative"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "bytes",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": false
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"repeat": null,
|
|
"repeatIteration": null,
|
|
"repeatRowId": null,
|
|
"showTitle": false,
|
|
"title": "All processes memory usage",
|
|
"titleSize": "h6"
|
|
},
|
|
{
|
|
"collapse": false,
|
|
"height": "250px",
|
|
"panels": [
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "prometheus",
|
|
"decimals": 2,
|
|
"editable": true,
|
|
"error": false,
|
|
"fill": 1,
|
|
"grid": {},
|
|
"id": 16,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": true,
|
|
"show": true,
|
|
"sideWidth": 200,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 2,
|
|
"links": [],
|
|
"nullPointMode": "connected",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"span": 12,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"expr": "sum (rate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\"}[5m])) by (pod_name)",
|
|
"format": "time_series",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "-> {{ pod_name }}",
|
|
"metric": "network",
|
|
"refId": "A",
|
|
"step": 10
|
|
},
|
|
{
|
|
"expr": "- sum (rate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\"}[5m])) by (pod_name)",
|
|
"format": "time_series",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "<- {{ pod_name }}",
|
|
"metric": "network",
|
|
"refId": "B",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeFrom": null,
|
|
"timeShift": null,
|
|
"title": "Pods network I/O (5m avg)",
|
|
"tooltip": {
|
|
"msResolution": false,
|
|
"shared": true,
|
|
"sort": 2,
|
|
"value_type": "cumulative"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "Bps",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": false
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"repeat": null,
|
|
"repeatIteration": null,
|
|
"repeatRowId": null,
|
|
"showTitle": false,
|
|
"title": "Pods network I/O",
|
|
"titleSize": "h6"
|
|
},
|
|
{
|
|
"collapse": false,
|
|
"height": "250px",
|
|
"panels": [
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "prometheus",
|
|
"decimals": 2,
|
|
"editable": true,
|
|
"error": false,
|
|
"fill": 1,
|
|
"grid": {},
|
|
"id": 30,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": true,
|
|
"show": true,
|
|
"sideWidth": 200,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 2,
|
|
"links": [],
|
|
"nullPointMode": "connected",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"span": 12,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"expr": "sum (rate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\"}[5m])) by (container_name, pod_name)",
|
|
"format": "time_series",
|
|
"hide": false,
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "-> pod: {{ pod_name }} | {{ container_name }}",
|
|
"metric": "network",
|
|
"refId": "B",
|
|
"step": 10
|
|
},
|
|
{
|
|
"expr": "- sum (rate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\"}[5m])) by (container_name, pod_name)",
|
|
"format": "time_series",
|
|
"hide": false,
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "<- pod: {{ pod_name }} | {{ container_name }}",
|
|
"metric": "network",
|
|
"refId": "D",
|
|
"step": 10
|
|
},
|
|
{
|
|
"expr": "sum (rate (container_network_receive_bytes_total{image!=\"\",name!~\"^k8s_.*\"}[5m])) by (kubernetes_io_hostname, name, image)",
|
|
"format": "time_series",
|
|
"hide": false,
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "-> docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})",
|
|
"metric": "network",
|
|
"refId": "A",
|
|
"step": 10
|
|
},
|
|
{
|
|
"expr": "- sum (rate (container_network_transmit_bytes_total{image!=\"\",name!~\"^k8s_.*\"}[5m])) by (kubernetes_io_hostname, name, image)",
|
|
"format": "time_series",
|
|
"hide": false,
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "<- docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})",
|
|
"metric": "network",
|
|
"refId": "C",
|
|
"step": 10
|
|
},
|
|
{
|
|
"expr": "sum (rate (container_network_transmit_bytes_total{rkt_container_name!=\"\"}[5m])) by (kubernetes_io_hostname, rkt_container_name)",
|
|
"format": "time_series",
|
|
"hide": false,
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "-> rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}",
|
|
"metric": "network",
|
|
"refId": "E",
|
|
"step": 10
|
|
},
|
|
{
|
|
"expr": "- sum (rate (container_network_transmit_bytes_total{rkt_container_name!=\"\"}[5m])) by (kubernetes_io_hostname, rkt_container_name)",
|
|
"format": "time_series",
|
|
"hide": false,
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "<- rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}",
|
|
"metric": "network",
|
|
"refId": "F",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeFrom": null,
|
|
"timeShift": null,
|
|
"title": "Containers network I/O (5m avg)",
|
|
"tooltip": {
|
|
"msResolution": false,
|
|
"shared": true,
|
|
"sort": 2,
|
|
"value_type": "cumulative"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "Bps",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": false
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"repeat": null,
|
|
"repeatIteration": null,
|
|
"repeatRowId": null,
|
|
"showTitle": false,
|
|
"title": "Containers network I/O",
|
|
"titleSize": "h6"
|
|
},
|
|
{
|
|
"collapse": false,
|
|
"height": "500px",
|
|
"panels": [
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "prometheus",
|
|
"decimals": 2,
|
|
"editable": true,
|
|
"error": false,
|
|
"fill": 1,
|
|
"grid": {},
|
|
"id": 29,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": false,
|
|
"show": true,
|
|
"sideWidth": 200,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 2,
|
|
"links": [],
|
|
"nullPointMode": "connected",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"span": 12,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"expr": "sum (rate (container_network_receive_bytes_total{id!=\"/\"}[5m])) by (id)",
|
|
"format": "time_series",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "-> {{ id }}",
|
|
"metric": "network",
|
|
"refId": "A",
|
|
"step": 10
|
|
},
|
|
{
|
|
"expr": "- sum (rate (container_network_transmit_bytes_total{id!=\"/\"}[5m])) by (id)",
|
|
"format": "time_series",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "<- {{ id }}",
|
|
"metric": "network",
|
|
"refId": "B",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeFrom": null,
|
|
"timeShift": null,
|
|
"title": "All processes network I/O (1m avg)",
|
|
"tooltip": {
|
|
"msResolution": false,
|
|
"shared": true,
|
|
"sort": 2,
|
|
"value_type": "cumulative"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "Bps",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": false
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"repeat": null,
|
|
"repeatIteration": null,
|
|
"repeatRowId": null,
|
|
"showTitle": false,
|
|
"title": "All processes network I/O",
|
|
"titleSize": "h6"
|
|
}
|
|
],
|
|
"schemaVersion": 14,
|
|
"style": "dark",
|
|
"tags": [
|
|
"kubernetes"
|
|
],
|
|
"templating": {
|
|
"list": [
|
|
{
|
|
"allValue": ".*",
|
|
"current": {
|
|
"text": "All",
|
|
"value": "$__all"
|
|
},
|
|
"datasource": "prometheus",
|
|
"hide": 0,
|
|
"includeAll": true,
|
|
"label": null,
|
|
"multi": false,
|
|
"name": "Node",
|
|
"options": [],
|
|
"query": "label_values(kubernetes_io_hostname)",
|
|
"refresh": 1,
|
|
"regex": "",
|
|
"sort": 0,
|
|
"tagValuesQuery": "",
|
|
"tags": [],
|
|
"tagsQuery": "",
|
|
"type": "query",
|
|
"useTags": false
|
|
}
|
|
]
|
|
},
|
|
"time": {
|
|
"from": "now-5m",
|
|
"to": "now"
|
|
},
|
|
"timepicker": {
|
|
"refresh_intervals": [
|
|
"5s",
|
|
"10s",
|
|
"30s",
|
|
"1m",
|
|
"5m",
|
|
"15m",
|
|
"30m",
|
|
"1h",
|
|
"2h",
|
|
"1d"
|
|
],
|
|
"time_options": [
|
|
"5m",
|
|
"15m",
|
|
"1h",
|
|
"6h",
|
|
"12h",
|
|
"24h",
|
|
"2d",
|
|
"7d",
|
|
"30d"
|
|
]
|
|
},
|
|
"timezone": "browser",
|
|
"title": "Kubernetes cluster monitoring (via Prometheus)",
|
|
"version": 2
|
|
}
|
|
|
|
- kind: ConfigMap
|
|
apiVersion: v1
|
|
metadata:
|
|
name: grafana-datasources
|
|
namespace: ${NAMESPACE}
|
|
labels:
|
|
template: grafana-prometheus
|
|
data:
|
|
prometheus.yaml: |-
|
|
datasources:
|
|
- name: prometheus
|
|
type: prometheus
|
|
access: proxy
|
|
url: http://prometheus:9090
|
|
version: 1
|
|
editable: false
|
|
|
|
- apiVersion: v1
|
|
kind: Secret
|
|
metadata:
|
|
name: prometheus-proxy
|
|
namespace: "${NAMESPACE}"
|
|
labels:
|
|
template: grafana-prometheus
|
|
stringData:
|
|
session_secret: "${SESSION_SECRET}="
|
|
|
|
- apiVersion: v1
|
|
kind: Secret
|
|
metadata:
|
|
name: alerts-proxy
|
|
namespace: "${NAMESPACE}"
|
|
labels:
|
|
template: grafana-prometheus
|
|
stringData:
|
|
session_secret: "${SESSION_SECRET}="
|
|
|
|
|
|
- apiVersion: v1
|
|
kind: ServiceAccount
|
|
metadata:
|
|
name: prometheus
|
|
namespace: ${NAMESPACE}
|
|
labels:
|
|
template: grafana-prometheus
|
|
annotations:
|
|
serviceaccounts.openshift.io/oauth-redirectreference.prom: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}'
|
|
serviceaccounts.openshift.io/oauth-redirectreference.alerts: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}'
|
|
secrets:
|
|
|
|
- apiVersion: v1
|
|
kind: ClusterRoleBinding
|
|
metadata:
|
|
name: prometheus-is-cluster-reader
|
|
groupNames:
|
|
- system:cluster-readers
|
|
roleRef:
|
|
name: cluster-reader
|
|
subjects:
|
|
- kind: ServiceAccount
|
|
name: prometheus
|
|
namespace: ${NAMESPACE}
|
|
userNames:
|
|
- system:serviceaccount:${NAMESPACE}:prometheus
|
|
|
|
- apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
labels:
|
|
app: prometheus
|
|
template: grafana-prometheus
|
|
name: prometheus
|
|
namespace: ${NAMESPACE}
|
|
annotations:
|
|
service.alpha.openshift.io/serving-cert-secret-name: prometheus-tls
|
|
prometheus.io/scrape: 'true'
|
|
prometheus.io/scheme: https
|
|
spec:
|
|
ports:
|
|
- name: oauth-proxy
|
|
port: 443
|
|
protocol: TCP
|
|
targetPort: 8443
|
|
- name: prometheus
|
|
port: 9090
|
|
protocol: TCP
|
|
targetPort: 9090
|
|
selector:
|
|
app: prometheus
|
|
sessionAffinity: None
|
|
type: ClusterIP
|
|
|
|
- apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
labels:
|
|
app: prometheus
|
|
template: grafana-prometheus
|
|
name: alerts
|
|
namespace: ${NAMESPACE}
|
|
annotations:
|
|
service.alpha.openshift.io/serving-cert-secret-name: prometheus-alerts-tls
|
|
spec:
|
|
ports:
|
|
- name: oauth-proxy
|
|
port: 443
|
|
protocol: TCP
|
|
targetPort: 9443
|
|
selector:
|
|
app: prometheus
|
|
sessionAffinity: None
|
|
type: ClusterIP
|
|
|
|
- apiVersion: v1
|
|
kind: Route
|
|
metadata:
|
|
labels:
|
|
app: prometheus
|
|
template: grafana-prometheus
|
|
name: prometheus
|
|
namespace: ${NAMESPACE}
|
|
spec:
|
|
host: ${PROMETHEUS_ROUTE_HOSTNAME}
|
|
port:
|
|
targetPort: oauth-proxy
|
|
to:
|
|
kind: Service
|
|
name: prometheus
|
|
weight: 100
|
|
wildcardPolicy: None
|
|
tls:
|
|
termination: reencrypt
|
|
|
|
- apiVersion: v1
|
|
kind: Route
|
|
metadata:
|
|
labels:
|
|
app: prometheus
|
|
template: grafana-prometheus
|
|
name: alerts
|
|
namespace: ${NAMESPACE}
|
|
spec:
|
|
host: ${ALERTS_ROUTE_HOSTNAME}
|
|
port:
|
|
targetPort: oauth-proxy
|
|
to:
|
|
kind: Service
|
|
name: alerts
|
|
weight: 100
|
|
wildcardPolicy: None
|
|
tls:
|
|
termination: reencrypt
|
|
|
|
- apiVersion: apps/v1beta1
|
|
kind: StatefulSet
|
|
metadata:
|
|
name: prometheus
|
|
namespace: ${NAMESPACE}
|
|
labels:
|
|
app: prometheus
|
|
spec:
|
|
updateStrategy:
|
|
type: RollingUpdate
|
|
podManagementPolicy: Parallel
|
|
selector:
|
|
provider: openshift
|
|
matchLabels:
|
|
app: prometheus
|
|
template:
|
|
metadata:
|
|
name: prometheus
|
|
labels:
|
|
app: prometheus
|
|
spec:
|
|
serviceAccountName: prometheus
|
|
containers:
|
|
# Deploy Prometheus behind an oauth proxy
|
|
- name: prom-proxy
|
|
image: "${PROXY_IMAGE}"
|
|
imagePullPolicy: IfNotPresent
|
|
ports:
|
|
- containerPort: 8443
|
|
name: web
|
|
args:
|
|
- -provider=openshift
|
|
- -https-address=:8443
|
|
- -http-address=
|
|
- -email-domain=*
|
|
- -upstream=http://localhost:9090
|
|
- -client-id=system:serviceaccount:${NAMESPACE}:prometheus
|
|
- '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}'
|
|
- '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}}'
|
|
- -tls-cert=/etc/tls/private/tls.crt
|
|
- -tls-key=/etc/tls/private/tls.key
|
|
- -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token
|
|
- -cookie-secret-file=/etc/proxy/secrets/session_secret
|
|
- -openshift-ca=/etc/pki/tls/cert.pem
|
|
- -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
- -skip-auth-regex=^/metrics
|
|
volumeMounts:
|
|
- mountPath: /etc/tls/private
|
|
name: prometheus-tls
|
|
- mountPath: /etc/proxy/secrets
|
|
name: prometheus-secrets
|
|
- mountPath: /prometheus
|
|
name: prometheus-data
|
|
|
|
- name: prometheus
|
|
args:
|
|
- --storage.tsdb.retention=6h
|
|
- --storage.tsdb.min-block-duration=2m
|
|
- --config.file=/etc/prometheus/prometheus.yml
|
|
- --web.listen-address=0.0.0.0:9090
|
|
image: "${PROMETHEUS_IMAGE}"
|
|
imagePullPolicy: IfNotPresent
|
|
volumeMounts:
|
|
- mountPath: /etc/prometheus
|
|
name: prometheus-config
|
|
- mountPath: /prometheus
|
|
name: prometheus-data
|
|
|
|
# Deploy alertmanager behind prometheus-alert-buffer behind an oauth proxy
|
|
- name: alerts-proxy
|
|
image: "${PROXY_IMAGE}"
|
|
imagePullPolicy: IfNotPresent
|
|
ports:
|
|
- containerPort: 9443
|
|
name: web
|
|
args:
|
|
- -provider=openshift
|
|
- -https-address=:9443
|
|
- -http-address=
|
|
- -email-domain=*
|
|
- -upstream=http://localhost:9099
|
|
- -client-id=system:serviceaccount:${NAMESPACE}:prometheus
|
|
- '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}'
|
|
- '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}}'
|
|
- -tls-cert=/etc/tls/private/tls.crt
|
|
- -tls-key=/etc/tls/private/tls.key
|
|
- -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token
|
|
- -cookie-secret-file=/etc/proxy/secrets/session_secret
|
|
- -openshift-ca=/etc/pki/tls/cert.pem
|
|
- -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
volumeMounts:
|
|
- mountPath: /etc/tls/private
|
|
name: alerts-tls
|
|
- mountPath: /etc/proxy/secrets
|
|
name: alerts-secrets
|
|
|
|
- name: alert-buffer
|
|
args:
|
|
- --storage-path=/alert-buffer/messages.db
|
|
image: "${ALERTBUFFER_IMAGE}"
|
|
imagePullPolicy: IfNotPresent
|
|
volumeMounts:
|
|
- mountPath: /alert-buffer
|
|
name: alert-buffer-data
|
|
ports:
|
|
- containerPort: 9099
|
|
name: alert-buf
|
|
|
|
- name: alertmanager
|
|
args:
|
|
- --config.file=/etc/alertmanager/alertmanager.yml
|
|
image: "${ALERTMANAGER_IMAGE}"
|
|
imagePullPolicy: IfNotPresent
|
|
ports:
|
|
- containerPort: 9093
|
|
name: web
|
|
volumeMounts:
|
|
- mountPath: /etc/alertmanager
|
|
name: alertmanager-config
|
|
- mountPath: /alertmanager
|
|
name: alertmanager-data
|
|
|
|
restartPolicy: Always
|
|
volumes:
|
|
- name: prometheus-config
|
|
configMap:
|
|
defaultMode: 420
|
|
name: prometheus
|
|
- name: prometheus-secrets
|
|
secret:
|
|
secretName: prometheus-proxy
|
|
- name: prometheus-tls
|
|
secret:
|
|
secretName: prometheus-tls
|
|
- name: prometheus-data
|
|
persistentVolumeClaim:
|
|
claimName: prometheus
|
|
- name: alertmanager-config
|
|
configMap:
|
|
defaultMode: 420
|
|
name: prometheus-alerts
|
|
- name: alerts-secrets
|
|
secret:
|
|
secretName: alerts-proxy
|
|
- name: alerts-tls
|
|
secret:
|
|
secretName: prometheus-alerts-tls
|
|
- name: alertmanager-data
|
|
persistentVolumeClaim:
|
|
claimName: prometheus-alertmanager
|
|
- name: alert-buffer-data
|
|
persistentVolumeClaim:
|
|
claimName: prometheus-alertbuffer
|
|
|
|
- kind: ConfigMap
|
|
apiVersion: v1
|
|
metadata:
|
|
name: prometheus
|
|
namespace: ${NAMESPACE}
|
|
labels:
|
|
template: grafana-prometheus
|
|
data:
|
|
prometheus.rules: |-
|
|
groups:
|
|
- name: example-rules
|
|
interval: 30s # defaults to global interval
|
|
rules:
|
|
|
|
prometheus.yml: |-
|
|
global:
|
|
scrape_interval: 30s
|
|
evaluation_interval: 30s
|
|
|
|
rule_files:
|
|
- '*.rules'
|
|
|
|
# A scrape configuration for running Prometheus on a Kubernetes cluster.
|
|
# This uses separate scrape configs for cluster components (i.e. API server, node)
|
|
# and services to allow each to use different authentication configs.
|
|
#
|
|
# Kubernetes labels will be added as Prometheus labels on metrics via the
|
|
# `labelmap` relabeling action.
|
|
|
|
# Scrape config for API servers.
|
|
#
|
|
# Kubernetes exposes API servers as endpoints to the default/kubernetes
|
|
# service so this uses `endpoints` role and uses relabelling to only keep
|
|
# the endpoints associated with the default/kubernetes service using the
|
|
# default named port `https`. This works for single API server deployments as
|
|
# well as HA API server deployments.
|
|
scrape_configs:
|
|
- job_name: 'kubernetes-apiservers'
|
|
|
|
kubernetes_sd_configs:
|
|
- role: endpoints
|
|
|
|
scheme: https
|
|
tls_config:
|
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
|
|
# Keep only the default/kubernetes service endpoints for the https port. This
|
|
# will add targets for each API server which Kubernetes adds an endpoint to
|
|
# the default/kubernetes service.
|
|
relabel_configs:
|
|
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
|
|
action: keep
|
|
regex: default;kubernetes;https
|
|
|
|
# Scrape config for nodes.
|
|
#
|
|
# Each node exposes a /metrics endpoint that contains operational metrics for
|
|
# the Kubelet and other components.
|
|
- job_name: 'kubernetes-nodes'
|
|
|
|
scheme: https
|
|
tls_config:
|
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
|
|
kubernetes_sd_configs:
|
|
- role: node
|
|
|
|
relabel_configs:
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_node_label_(.+)
|
|
|
|
# Scrape config for controllers.
|
|
#
|
|
# Each master node exposes a /metrics endpoint on :8444 that contains operational metrics for
|
|
# the controllers.
|
|
#
|
|
# TODO: move this to a pure endpoints based metrics gatherer when controllers are exposed via
|
|
# endpoints.
|
|
- job_name: 'kubernetes-controllers'
|
|
|
|
scheme: https
|
|
tls_config:
|
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
|
|
kubernetes_sd_configs:
|
|
- role: endpoints
|
|
|
|
# Keep only the default/kubernetes service endpoints for the https port, and then
|
|
# set the port to 8444. This is the default configuration for the controllers on OpenShift
|
|
# masters.
|
|
relabel_configs:
|
|
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
|
|
action: keep
|
|
regex: default;kubernetes;https
|
|
- source_labels: [__address__]
|
|
action: replace
|
|
target_label: __address__
|
|
regex: (.+)(?::\d+)
|
|
replacement: $1:8444
|
|
|
|
# Scrape config for cAdvisor.
|
|
#
|
|
# Beginning in Kube 1.7, each node exposes a /metrics/cadvisor endpoint that
|
|
# reports container metrics for each running pod. Scrape those by default.
|
|
- job_name: 'kubernetes-cadvisor'
|
|
|
|
scheme: https
|
|
tls_config:
|
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
|
|
metrics_path: /metrics/cadvisor
|
|
|
|
kubernetes_sd_configs:
|
|
- role: node
|
|
|
|
relabel_configs:
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_node_label_(.+)
|
|
|
|
# Scrape config for service endpoints.
|
|
#
|
|
# The relabeling allows the actual service scrape endpoint to be configured
|
|
# via the following annotations:
|
|
#
|
|
# * `prometheus.io/scrape`: Only scrape services that have a value of `true`
|
|
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
|
|
# to set this to `https` & most likely set the `tls_config` of the scrape config.
|
|
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
|
|
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
|
|
# service then set this appropriately.
|
|
- job_name: 'kubernetes-service-endpoints'
|
|
|
|
tls_config:
|
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
# TODO: this should be per target
|
|
insecure_skip_verify: true
|
|
|
|
kubernetes_sd_configs:
|
|
- role: endpoints
|
|
|
|
relabel_configs:
|
|
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
|
|
action: keep
|
|
regex: true
|
|
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
|
|
action: replace
|
|
target_label: __scheme__
|
|
regex: (https?)
|
|
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
|
|
action: replace
|
|
target_label: __metrics_path__
|
|
regex: (.+)
|
|
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
|
|
action: replace
|
|
target_label: __address__
|
|
regex: (.+)(?::\d+);(\d+)
|
|
replacement: $1:$2
|
|
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_username]
|
|
action: replace
|
|
target_label: __basic_auth_username__
|
|
regex: (.+)
|
|
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_password]
|
|
action: replace
|
|
target_label: __basic_auth_password__
|
|
regex: (.+)
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_service_label_(.+)
|
|
- source_labels: [__meta_kubernetes_namespace]
|
|
action: replace
|
|
target_label: kubernetes_namespace
|
|
- source_labels: [__meta_kubernetes_service_name]
|
|
action: replace
|
|
target_label: kubernetes_name
|
|
|
|
alerting:
|
|
alertmanagers:
|
|
- scheme: http
|
|
static_configs:
|
|
- targets:
|
|
- "localhost:9093"
|
|
|
|
- kind: ConfigMap
|
|
apiVersion: v1
|
|
metadata:
|
|
name: prometheus-alerts
|
|
namespace: ${NAMESPACE}
|
|
labels:
|
|
template: grafana-prometheus
|
|
data:
|
|
alertmanager.yml: |-
|
|
global:
|
|
|
|
# The root route on which each incoming alert enters.
|
|
route:
|
|
# default route if none match
|
|
receiver: alert-buffer-wh
|
|
|
|
# The labels by which incoming alerts are grouped together. For example,
|
|
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
|
|
# be batched into a single group.
|
|
# TODO:
|
|
group_by: []
|
|
|
|
# All the above attributes are inherited by all child routes and can
|
|
# overwritten on each.
|
|
|
|
receivers:
|
|
- name: alert-buffer-wh
|
|
webhook_configs:
|
|
- url: http://localhost:9099/topics/alerts
|
|
|
|
|
|
##
|
|
## Template Parameters
|
|
##
|
|
parameters:
|
|
- description: The Docker image to use for the OAuth Proxy.
|
|
displayName: OAuth Proxy image
|
|
name: PROXY_IMAGE
|
|
value: openshift3/oauth-proxy:v3.9
|
|
|
|
- description: The Docker image to use for Prometheus.
|
|
displayName: Prometheus image
|
|
name: PROMETHEUS_IMAGE
|
|
value: openshift3/prometheus:v3.9
|
|
|
|
- description: The Docker image to use for the Alert Buffer.
|
|
displayName: Alert Buffer image
|
|
name: ALERTBUFFER_IMAGE
|
|
value: openshift3/prometheus-alert-buffer:v3.9
|
|
|
|
- description: The Docker image to use for the Alert Manager.
|
|
displayName: Alert Manager image
|
|
name: ALERTMANAGER_IMAGE
|
|
value: openshift3/prometheus-alertmanager:v3.9
|
|
|
|
- description: The desired hostname of the route to the Prometheus service.
|
|
displayName: Hostname of the Prometheus Service
|
|
name: PROMETHEUS_ROUTE_HOSTNAME
|
|
|
|
- description: The desired hostname of the route to the Prometheus Alert service.
|
|
displayName: Hostname of the Prometheus Alert Service
|
|
name: ALERTS_ROUTE_HOSTNAME
|
|
|
|
- description: The session secret for the proxy
|
|
name: SESSION_SECRET
|
|
generate: expression
|
|
from: "[a-zA-Z0-9]{43}"
|
|
|
|
- description: The namespace to instantiate this template under. Defaults to 'openshift-metrics'.
|
|
name: NAMESPACE
|
|
value: openshift-metrics
|
|
|