apiVersion: v1 kind: Template labels: template: grafana metadata: annotations: description: |- Grafana allows you to query, visualize, alert on and understand your metrics no matter where they are stored. Create, explore, and share dashboards with your team and foster a data driven culture. openshift.io/display-name: Grafana tags: instant-app template.openshift.io/documentation-url: http://docs.grafana.org/ template.openshift.io/long-description: A grafana distribution for OpenShift. template.openshift.io/provider-display-name: Grafana template.openshift.io/support-url: https://grafana.com/enterprise name: grafana parameters: - description: The Docker image to use for the OAuth Proxy. displayName: OAuth Proxy image name: PROXY_IMAGE value: openshift3/oauth-proxy:v3.9 required: true - description: The desired hostname of the route to the Grafana service. displayName: Hostname of the Grafana Service name: GRAFANA_HOSTNAME required: false - description: The session secret for the proxy name: SESSION_SECRET generate: expression from: "[a-zA-Z0-9]{43}" required: true - description: The Grafana version to deploy displayName: Grafana version name: GRAFANA_CUSTOM_VERSION value: 5.1.4 required: true - description: The Grafana release to deploy, either 'stable', 'beta', 'master' or 'custom' displayName: Grafana release name: GRAFANA_RELEASE value: beta required: true - description: The namespace used to deploy this template displayName: Kubernetes Namespace name: NAMESPACE required: true - description: Volume size for the Grafana DB displayName: Volume Size name: GRAFANA_VOLUME_SIZE value: "1Gi" required: true objects: - apiVersion: v1 kind: PersistentVolumeClaim metadata: name: grafana spec: accessModes: - ReadWriteOnce resources: requests: storage: ${GRAFANA_VOLUME_SIZE} - apiVersion: v1 kind: Secret metadata: name: oauth-proxy namespace: "${NAMESPACE}" stringData: session_secret: "${SESSION_SECRET}=" - apiVersion: v1 kind: ServiceAccount metadata: name: grafana namespace: ${NAMESPACE} annotations: serviceaccounts.openshift.io/oauth-redirectreference.proxy: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"grafana"}}' secrets: - kind: ConfigMap apiVersion: v1 metadata: name: grafana-config namespace: ${NAMESPACE} data: grafana.ini: |- [server] http_addr = 127.0.0.1 [auth] disable_login_form = true disable_signout_menu = true [auth.basic] enabled = false [auth.proxy] enabled = true header_name = X-Forwarded-User [users] auto_assign_org = true auto_assign_org_role = Admin [log] mode = console - kind: ConfigMap apiVersion: v1 metadata: name: grafana-datasources namespace: ${NAMESPACE} data: prometheus.yaml: |- apiVersion: 1 datasources: - name: prometheus type: prometheus access: proxy url: http://prometheus:9090 isDefault: false version: 1 editable: true - kind: ConfigMap apiVersion: v1 metadata: name: grafana-dashboards namespace: ${NAMESPACE} data: prometheus.yaml: |- apiVersion: 1 providers: - name: 'default' orgId: 1 folder: '' type: file disableDeletion: false updateIntervalSeconds: 300 options: path: /usr/share/openshift-dashboards - apiVersion: v1 kind: ImageStream metadata: labels: build: grafana namespace: ${NAMESPACE} name: grafana spec: dockerImageRepository: docker.io/grafana/grafana tags: - name: stable from: kind: DockerImage name: 'docker.io/grafana/grafana:5.1.4' importPolicy: scheduled: true - name: beta from: kind: DockerImage name: 'docker.io/grafana/grafana:5.2.0-beta3' importPolicy: scheduled: true - name: master from: kind: DockerImage name: 'docker.io/grafana/grafana:master' importPolicy: scheduled: true - name: custom from: kind: DockerImage name: 'docker.io/grafana/grafana:${GRAFANA_CUSTOM_VERSION}' importPolicy: scheduled: true - apiVersion: v1 kind: DeploymentConfig metadata: labels: app: grafana name: grafana namespace: ${NAMESPACE} spec: replicas: 1 selector: app: grafana deploymentconfig: grafana strategy: activeDeadlineSeconds: 21600 resources: {} type: Recreate template: metadata: creationTimestamp: null labels: app: grafana deploymentconfig: grafana spec: containers: - image: " " imagePullPolicy: IfNotPresent name: grafana ports: - containerPort: 3000 protocol: TCP resources: {} securityContext: {} terminationMessagePath: /dev/termination-log volumeMounts: - mountPath: /etc/grafana/ name: grafana-config - mountPath: /var/log/grafana/ name: grafana-logs - mountPath: /var/lib/grafana/ name: grafana-storage - mountPath: /etc/grafana/provisioning/datasources name: grafana-datasources - mountPath: /etc/grafana/provisioning/dashboards name: grafana-dashboards - mountPath: /usr/share/openshift-dashboards name: grafana-openshift-dashboard - image: ${PROXY_IMAGE} imagePullPolicy: IfNotPresent name: proxy args: - --provider=openshift - --https-address=:8443 - --http-address= - --upstream=http://localhost:3000 - --openshift-service-account=grafana - '--openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}' - --tls-cert=/etc/tls/private/tls.crt - --tls-key=/etc/tls/private/tls.key - --client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token - --cookie-secret-file=/etc/proxy/secrets/session_secret - --openshift-ca=/etc/pki/tls/cert.pem - --openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt ports: - containerPort: 8443 name: web protocol: TCP resources: {} securityContext: {} terminationMessagePath: /dev/termination-log terminationMessagePolicy: File volumeMounts: - mountPath: /etc/tls/private name: tls - mountPath: /etc/proxy/secrets name: secrets dnsPolicy: ClusterFirst terminationGracePeriodSeconds: 30 restartPolicy: Always serviceAccountName: grafana volumes: - name: secrets secret: secretName: oauth-proxy - name: tls secret: secretName: grafana-tls - name: grafana-storage persistentVolumeClaim: claimName: grafana - name: grafana-logs emptyDir: {} - name: grafana-config configMap: name: grafana-config - name: grafana-datasources configMap: name: grafana-datasources - name: grafana-dashboards configMap: name: grafana-dashboards - name: grafana-openshift-dashboard configMap: name: grafana-openshift-dashboard test: false triggers: - type: ConfigChange - type: ImageChange imageChangeParams: automatic: true containerNames: - grafana from: kind: ImageStreamTag name: grafana:${GRAFANA_RELEASE} - apiVersion: v1 kind: Service metadata: labels: app: grafana name: grafana namespace: ${NAMESPACE} annotations: service.alpha.openshift.io/serving-cert-secret-name: grafana-tls spec: ports: - name: oauth-proxy port: 8443 protocol: TCP targetPort: 8443 selector: app: grafana deploymentconfig: grafana sessionAffinity: None type: ClusterIP - apiVersion: v1 kind: Route metadata: labels: app: grafana name: grafana namespace: ${NAMESPACE} spec: host: ${GRAFANA_HOSTNAME} port: targetPort: oauth-proxy to: kind: Service name: grafana weight: 100 wildcardPolicy: None tls: termination: reencrypt - kind: ConfigMap apiVersion: v1 metadata: name: grafana-openshift-dashboard namespace: ${NAMESPACE} data: openshift-cluster.json: |- { "annotations": { "list": [ { "builtIn": 1, "datasource": "-- Grafana --", "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "description": "Monitors Kubernetes cluster using Prometheus. Shows overall cluster CPU / Memory / Filesystem usage as well as individual pod, containers, systemd services statistics. Uses cAdvisor metrics only.", "editable": true, "gnetId": 315, "graphTooltip": 0, "hideControls": false, "id": 2, "links": [], "refresh": "10s", "rows": [ { "collapse": false, "height": "200px", "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "prometheus", "decimals": 2, "editable": true, "error": false, "fill": 1, "grid": {}, "height": "200px", "id": 32, "legend": { "alignAsTable": false, "avg": true, "current": true, "max": false, "min": false, "rightSide": false, "show": false, "sideWidth": 200, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum (rate (container_network_receive_bytes_total{id=\"/\"}[5m]))", "format": "time_series", "interval": "10s", "intervalFactor": 1, "legendFormat": "Received", "metric": "network", "refId": "A", "step": 10 }, { "expr": "- sum (rate (container_network_transmit_bytes_total{id=\"/\"}[5m]))", "format": "time_series", "interval": "10s", "intervalFactor": 1, "legendFormat": "Sent", "metric": "network", "refId": "B", "step": 10 } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Network I/O pressure", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "cumulative" }, "transparent": false, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Network I/O pressure", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "cacheTimeout": null, "colorBackground": false, "colorValue": true, "colors": [ "rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)" ], "datasource": "prometheus", "editable": true, "error": false, "format": "percent", "gauge": { "maxValue": 100, "minValue": 0, "show": true, "thresholdLabels": false, "thresholdMarkers": true }, "height": "180px", "id": 4, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "span": 4, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum (container_memory_working_set_bytes{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) / sum (machine_memory_bytes{kubernetes_io_hostname=~\"^$Node$\"}) * 100", "interval": "10s", "intervalFactor": 1, "refId": "A", "step": 10 } ], "thresholds": "65, 90", "title": "Cluster memory usage", "transparent": false, "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": true, "colors": [ "rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)" ], "datasource": "prometheus", "decimals": 2, "editable": true, "error": false, "format": "percent", "gauge": { "maxValue": 100, "minValue": 0, "show": true, "thresholdLabels": false, "thresholdMarkers": true }, "height": "180px", "id": 6, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "span": 4, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum (rate (container_cpu_usage_seconds_total{id=\"/\"}[5m])) / sum (machine_cpu_cores{}) * 100", "format": "time_series", "interval": "10s", "intervalFactor": 1, "refId": "A", "step": 10 } ], "thresholds": "65, 90", "title": "Cluster CPU usage (5m avg)", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": true, "colors": [ "rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)" ], "datasource": "prometheus", "decimals": 2, "editable": true, "error": false, "format": "percent", "gauge": { "maxValue": 100, "minValue": 0, "show": true, "thresholdLabels": false, "thresholdMarkers": true }, "height": "180px", "id": 7, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "span": 4, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum (container_fs_usage_bytes{device=~\"^/dev/([sv]d[a-z][1-9]|mapper/[-a-zA-Z0-9]+)$\",id=\"/\"}) / sum (container_fs_limit_bytes{device=~\"^/dev/([sv]d[a-z][1-9]|mapper/[-a-zA-Z0-9]+)$\",id=\"/\"}) * 100", "format": "time_series", "interval": "10s", "intervalFactor": 1, "legendFormat": "", "metric": "", "refId": "A", "step": 10 } ], "thresholds": "65, 90", "title": "Cluster filesystem usage", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)" ], "datasource": "prometheus", "decimals": 2, "editable": true, "error": false, "format": "bytes", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "height": "1px", "id": 9, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "20%", "prefix": "", "prefixFontSize": "20%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "span": 2, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum (container_memory_working_set_bytes{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})", "interval": "10s", "intervalFactor": 1, "refId": "A", "step": 10 } ], "thresholds": "", "title": "Used", "type": "singlestat", "valueFontSize": "50%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)" ], "datasource": "prometheus", "decimals": 2, "editable": true, "error": false, "format": "bytes", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "height": "1px", "id": 10, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "span": 2, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum (machine_memory_bytes{kubernetes_io_hostname=~\"^$Node$\"})", "interval": "10s", "intervalFactor": 1, "refId": "A", "step": 10 } ], "thresholds": "", "title": "Total", "type": "singlestat", "valueFontSize": "50%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)" ], "datasource": "prometheus", "decimals": 2, "editable": true, "error": false, "format": "none", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "height": "1px", "id": 11, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": " cores", "postfixFontSize": "30%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "span": 2, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum (rate (container_cpu_usage_seconds_total{id=\"/\"}[5m]))", "format": "time_series", "interval": "10s", "intervalFactor": 1, "refId": "A", "step": 10 } ], "thresholds": "", "title": "Used", "type": "singlestat", "valueFontSize": "50%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)" ], "datasource": "prometheus", "decimals": 2, "editable": true, "error": false, "format": "none", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "height": "1px", "id": 12, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": " cores", "postfixFontSize": "30%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "span": 2, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"})", "interval": "10s", "intervalFactor": 1, "refId": "A", "step": 10 } ], "thresholds": "", "title": "Total", "type": "singlestat", "valueFontSize": "50%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)" ], "datasource": "prometheus", "decimals": 2, "editable": true, "error": false, "format": "bytes", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "height": "1px", "id": 13, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "span": 2, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum (container_fs_usage_bytes{device=~\"^/dev/([sv]d[a-z][1-9]|mapper/[-a-zA-Z0-9]+)$\",id=\"/\"})", "format": "time_series", "interval": "10s", "intervalFactor": 1, "refId": "A", "step": 10 } ], "thresholds": "", "title": "Used", "type": "singlestat", "valueFontSize": "50%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)" ], "datasource": "prometheus", "decimals": 2, "editable": true, "error": false, "format": "bytes", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "height": "1px", "id": 14, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "span": 2, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum (container_fs_limit_bytes{device=~\"^/dev/([sv]d[a-z][1-9]|mapper/[-a-zA-Z0-9]+)$\",id=\"/\"})", "format": "time_series", "interval": "10s", "intervalFactor": 1, "refId": "A", "step": 10 } ], "thresholds": "", "title": "Total", "type": "singlestat", "valueFontSize": "50%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Total usage", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "prometheus", "decimals": 3, "editable": true, "error": false, "fill": 0, "grid": {}, "height": "", "id": 17, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": true, "targets": [ { "expr": "sum (rate (container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\"}[5m])) by (pod_name)", "format": "time_series", "interval": "10s", "intervalFactor": 1, "legendFormat": "{{ pod_name }}", "metric": "container_cpu", "refId": "A", "step": 10 } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Pods CPU usage (1m avg)", "tooltip": { "msResolution": true, "shared": true, "sort": 2, "value_type": "cumulative" }, "transparent": false, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "none", "label": "cores", "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Pods CPU usage", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "prometheus", "decimals": 3, "editable": true, "error": false, "fill": 0, "grid": {}, "height": "", "id": 23, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": true, "targets": [ { "expr": "sum (rate (container_cpu_usage_seconds_total{id=~\"^/system.slice.*\"}[5m])) by (systemd_service_name)", "format": "time_series", "hide": false, "interval": "10s", "intervalFactor": 1, "legendFormat": "{{ systemd_service_name }}", "metric": "container_cpu", "refId": "A", "step": 10 } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "System services CPU usage (5m avg)", "tooltip": { "msResolution": true, "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "none", "label": "cores", "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "System services CPU usage", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "prometheus", "decimals": 3, "editable": true, "error": false, "fill": 0, "grid": {}, "height": "", "id": 24, "legend": { "alignAsTable": true, "avg": true, "current": true, "hideEmpty": false, "hideZero": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": true, "targets": [ { "expr": "sum (rate (container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\",container_name!=\"POD\"}[5m])) by (container_name, pod_name)", "format": "time_series", "hide": false, "interval": "10s", "intervalFactor": 1, "legendFormat": "pod: {{ pod_name }} | {{ container_name }}", "metric": "container_cpu", "refId": "A", "step": 10 }, { "expr": "sum (rate (container_cpu_usage_seconds_total{image!=\"\",name!~\"^k8s_.*\"}[5m])) by (kubernetes_io_hostname, name, image)", "format": "time_series", "hide": false, "interval": "10s", "intervalFactor": 1, "legendFormat": "docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})", "metric": "container_cpu", "refId": "B", "step": 10 }, { "expr": "sum (rate (container_cpu_usage_seconds_total{rkt_container_name!=\"\"}[5m])) by (kubernetes_io_hostname, rkt_container_name)", "format": "time_series", "interval": "10s", "intervalFactor": 1, "legendFormat": "rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}", "metric": "container_cpu", "refId": "C", "step": 10 } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Containers CPU usage (5m avg)", "tooltip": { "msResolution": true, "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "none", "label": "cores", "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Containers CPU usage", "titleSize": "h6" }, { "collapse": true, "height": "500px", "panels": [ { "aliasColors": {}, "bars": false, "datasource": "prometheus", "decimals": 3, "editable": true, "error": false, "fill": 0, "grid": {}, "id": 20, "isNew": true, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": false, "min": false, "rightSide": false, "show": true, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "span": 12, "stack": false, "steppedLine": true, "targets": [ { "expr": "sum (rate (container_cpu_usage_seconds_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[1m])) by (id)", "hide": false, "interval": "10s", "intervalFactor": 1, "legendFormat": "{{ id }}", "metric": "container_cpu", "refId": "A", "step": 10 } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "All processes CPU usage (1m avg)", "tooltip": { "msResolution": true, "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "show": true }, "yaxes": [ { "format": "none", "label": "cores", "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "All processes CPU usage", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "prometheus", "decimals": 2, "editable": true, "error": false, "fill": 0, "grid": {}, "id": 25, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": 200, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": true, "targets": [ { "expr": "sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}) by (pod_name)", "interval": "10s", "intervalFactor": 1, "legendFormat": "{{ pod_name }}", "metric": "container_memory_usage:sort_desc", "refId": "A", "step": 10 } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Pods memory usage", "tooltip": { "msResolution": false, "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Pods memory usage", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "prometheus", "decimals": 2, "editable": true, "error": false, "fill": 0, "grid": {}, "id": 26, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": 200, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": true, "targets": [ { "expr": "sum (container_memory_working_set_bytes{id=~\"^/system.slice/.*\"}) by (id)", "format": "time_series", "interval": "10s", "intervalFactor": 1, "legendFormat": "{{ id }}", "metric": "container_memory_usage:sort_desc", "refId": "A", "step": 10 } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "System services memory usage", "tooltip": { "msResolution": false, "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "System services memory usage", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "prometheus", "decimals": 2, "editable": true, "error": false, "fill": 0, "grid": {}, "id": 27, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": 200, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": true, "targets": [ { "expr": "sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",container_name!=\"POD\",kubernetes_io_hostname=~\"^$Node$\"}) by (container_name, pod_name)", "interval": "10s", "intervalFactor": 1, "legendFormat": "pod: {{ pod_name }} | {{ container_name }}", "metric": "container_memory_usage:sort_desc", "refId": "A", "step": 10 }, { "expr": "sum (container_memory_working_set_bytes{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}) by (kubernetes_io_hostname, name, image)", "interval": "10s", "intervalFactor": 1, "legendFormat": "docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})", "metric": "container_memory_usage:sort_desc", "refId": "B", "step": 10 }, { "expr": "sum (container_memory_working_set_bytes{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}) by (kubernetes_io_hostname, rkt_container_name)", "interval": "10s", "intervalFactor": 1, "legendFormat": "rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}", "metric": "container_memory_usage:sort_desc", "refId": "C", "step": 10 } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Containers memory usage", "tooltip": { "msResolution": false, "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Containers memory usage", "titleSize": "h6" }, { "collapse": false, "height": "500px", "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "prometheus", "decimals": 2, "editable": true, "error": false, "fill": 0, "grid": {}, "id": 28, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": 200, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": true, "targets": [ { "expr": "sum (container_memory_working_set_bytes{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) by (id)", "interval": "10s", "intervalFactor": 1, "legendFormat": "{{ id }}", "metric": "container_memory_usage:sort_desc", "refId": "A", "step": 10 } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "All processes memory usage", "tooltip": { "msResolution": false, "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "All processes memory usage", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "prometheus", "decimals": 2, "editable": true, "error": false, "fill": 1, "grid": {}, "id": 16, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": 200, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum (rate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\"}[5m])) by (pod_name)", "format": "time_series", "interval": "10s", "intervalFactor": 1, "legendFormat": "-> {{ pod_name }}", "metric": "network", "refId": "A", "step": 10 }, { "expr": "- sum (rate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\"}[5m])) by (pod_name)", "format": "time_series", "interval": "10s", "intervalFactor": 1, "legendFormat": "<- {{ pod_name }}", "metric": "network", "refId": "B", "step": 10 } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Pods network I/O (5m avg)", "tooltip": { "msResolution": false, "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Pods network I/O", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "prometheus", "decimals": 2, "editable": true, "error": false, "fill": 1, "grid": {}, "id": 30, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": 200, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum (rate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\"}[5m])) by (container_name, pod_name)", "format": "time_series", "hide": false, "interval": "10s", "intervalFactor": 1, "legendFormat": "-> pod: {{ pod_name }} | {{ container_name }}", "metric": "network", "refId": "B", "step": 10 }, { "expr": "- sum (rate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\"}[5m])) by (container_name, pod_name)", "format": "time_series", "hide": false, "interval": "10s", "intervalFactor": 1, "legendFormat": "<- pod: {{ pod_name }} | {{ container_name }}", "metric": "network", "refId": "D", "step": 10 }, { "expr": "sum (rate (container_network_receive_bytes_total{image!=\"\",name!~\"^k8s_.*\"}[5m])) by (kubernetes_io_hostname, name, image)", "format": "time_series", "hide": false, "interval": "10s", "intervalFactor": 1, "legendFormat": "-> docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})", "metric": "network", "refId": "A", "step": 10 }, { "expr": "- sum (rate (container_network_transmit_bytes_total{image!=\"\",name!~\"^k8s_.*\"}[5m])) by (kubernetes_io_hostname, name, image)", "format": "time_series", "hide": false, "interval": "10s", "intervalFactor": 1, "legendFormat": "<- docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})", "metric": "network", "refId": "C", "step": 10 }, { "expr": "sum (rate (container_network_transmit_bytes_total{rkt_container_name!=\"\"}[5m])) by (kubernetes_io_hostname, rkt_container_name)", "format": "time_series", "hide": false, "interval": "10s", "intervalFactor": 1, "legendFormat": "-> rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}", "metric": "network", "refId": "E", "step": 10 }, { "expr": "- sum (rate (container_network_transmit_bytes_total{rkt_container_name!=\"\"}[5m])) by (kubernetes_io_hostname, rkt_container_name)", "format": "time_series", "hide": false, "interval": "10s", "intervalFactor": 1, "legendFormat": "<- rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}", "metric": "network", "refId": "F", "step": 10 } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Containers network I/O (5m avg)", "tooltip": { "msResolution": false, "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Containers network I/O", "titleSize": "h6" }, { "collapse": false, "height": "500px", "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "prometheus", "decimals": 2, "editable": true, "error": false, "fill": 1, "grid": {}, "id": 29, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": 200, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum (rate (container_network_receive_bytes_total{id!=\"/\"}[5m])) by (id)", "format": "time_series", "interval": "10s", "intervalFactor": 1, "legendFormat": "-> {{ id }}", "metric": "network", "refId": "A", "step": 10 }, { "expr": "- sum (rate (container_network_transmit_bytes_total{id!=\"/\"}[5m])) by (id)", "format": "time_series", "interval": "10s", "intervalFactor": 1, "legendFormat": "<- {{ id }}", "metric": "network", "refId": "B", "step": 10 } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "All processes network I/O (1m avg)", "tooltip": { "msResolution": false, "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "All processes network I/O", "titleSize": "h6" } ], "schemaVersion": 14, "style": "dark", "tags": [ "kubernetes" ], "templating": { "list": [ { "allValue": ".*", "current": { "text": "All", "value": "$__all" }, "datasource": "prometheus", "hide": 0, "includeAll": true, "label": null, "multi": false, "name": "Node", "options": [], "query": "label_values(kubernetes_io_hostname)", "refresh": 1, "regex": "", "sort": 0, "tagValuesQuery": "", "tags": [], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-5m", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "browser", "title": "Kubernetes cluster monitoring (via Prometheus)", "version": 2 }