diff --git a/grafana/Dockerfile b/grafana/Dockerfile deleted file mode 100644 index 5418b3f..0000000 --- a/grafana/Dockerfile +++ /dev/null @@ -1,16 +0,0 @@ -FROM registry.access.redhat.com/rhel7:latest - -ADD grafana.repo /etc/yum.repos.d/grafana.repo -RUN yum update -y \ - && yum -y install grafana fontconfig freetype urw-fonts\ - && yum clean all \ - && rm -rf /var/cache/yum \ - && chmod -R ugo+rw,+X /var/lib/grafana/ /var/run/ /var/log/grafana /etc/grafana/ - -VOLUME /var/lib/grafana/ /var/run/grafana /var/log/grafana -WORKDIR /usr/share/grafana/ - -EXPOSE 3000 -ENTRYPOINT [ "/usr/sbin/grafana-server" ] -CMD [ "--pidfile=/var/run/grafana.pid", "--config=/etc/grafana/grafana.ini", "cfg:default.paths.data=/var/lib/grafana/", "cfg:default.paths.logs=/var/log/grafana", "cfg:default.paths.plugins=/var/lib/grafana/plugins" ] - diff --git a/grafana/README.md b/grafana/README.md index a2c83f8..85fe802 100644 --- a/grafana/README.md +++ b/grafana/README.md @@ -1,4 +1,4 @@ -# Grafana and Prometheus for OpenShift 3.7 +# Grafana and Prometheus for OpenShift 3.9 ## Description @@ -8,37 +8,66 @@ One of those sources is Prometheus. This project proposes ready-to-use templates to deploy Prometheus and Grafana on OpenShift. -## Deployment +## Pre-requisites -### Pre-requisites +Make sure you are cluster-admin on your OpenShift cluster. -First, make sure the is a `rhel7` imagestream in the `openshift` namespace. -``` -oc import-image -n openshift rhel7 --from registry.access.redhat.com/rhel7:7.4 --confirm +## Deploy Prometheus + +To deploy prometheus, process the `prometheus.yaml` template with at least the +`NAMESPACE` parameter. This parameter must be the name of the OpenShift +project where you want to deploy Prometheus. This parameter is required +to setup correctly the OpenShift authentication in Prometheus. + +For instance, to deploy Prometheus in a project named "my-metrics", use: + +```sh +oc process -f prometheus.yaml -p NAMESPACE=my-metrics |oc create -n my-metrics -f - ``` -Then, make sure you are cluster-admin on your OpenShift cluster. +## Deploy Grafana -### Deploy Grafana and Prometheus +To deploy grafana, process the `grafana.yaml` template with at least the +`NAMESPACE` parameter. This parameter must be the name of the OpenShift +project where you want to deploy Grafana. This parameter is required +to setup correctly the OpenShift authentication in Grafana. +For instance, to deploy Grafana in a project named "my-metrics", use: + +```sh +oc process -f grafana.yaml -p NAMESPACE=my-metrics |oc create -n my-metrics -f - ``` -oc process -f grafana-prometheus-storage.yaml -p PVC_SIZE=1Gi |oc create -f - -oc process -f grafana-prometheus.yaml -p PROMETHEUS_ROUTE_HOSTNAME=prometheus.app.openshift.test -p ALERTS_ROUTE_HOSTNAME=alerts.app.openshift.test |oc create -f - -oc process -f grafana-base.yaml -p GRAFANA_ROUTE_HOSTNAME=grafana.app.openshift.test |oc create -f - + +### Choosing the version to deploy + +To deploy the latest stable release of Grafana: + +```sh +oc process -f grafana.yaml -p NAMESPACE=my-metrics -p GRAFANA_RELEASE=stable |oc create -n my-metrics -f - ``` -### Deploy only Grafana with its vanilla configuration +To deploy the latest beta release of Grafana: +```sh +oc process -f grafana.yaml -p NAMESPACE=my-metrics -p GRAFANA_RELEASE=beta |oc create -n my-metrics -f - ``` -oc process -f grafana-nodatasource.yaml |oc create -f - -oc process -f grafana-base.yaml |oc create -f - + +To deploy a custom version of Grafana: + +```sh +oc process -f grafana.yaml -p NAMESPACE=my-metrics -p GRAFANA_RELEASE=custom -p GRAFANA_CUSTOM_VERSION=4.1.2 |oc create -n my-metrics -f - ``` -## Configuration +### Misc. settings -Once deployed, connect to Grafana and add a datasource with the following configuration: -- Name: `prometheus` -- Type: `Prometheus` -- URL: `http://prometheus:9090` -- Access: `proxy` +To customize the hostname of the Grafana route, use: +```sh +oc process -f grafana.yaml -p NAMESPACE=my-metrics -p GRAFANA_HOSTNAME=grafana.acme.corp |oc create -n my-metrics -f - +``` + +By default, a 1Gb volume is reserved for grafana, if you want to use a different size: + +```sh +oc process -f grafana.yaml -p NAMESPACE=my-metrics -p GRAFANA_VOLUME_SIZE=10Gi |oc create -n my-metrics -f - +``` diff --git a/grafana/grafana-base.yaml b/grafana/grafana-app-311.yaml similarity index 62% rename from grafana/grafana-base.yaml rename to grafana/grafana-app-311.yaml index 5cd9700..6cd7947 100644 --- a/grafana/grafana-base.yaml +++ b/grafana/grafana-app-311.yaml @@ -1,28 +1,78 @@ apiVersion: v1 kind: Template labels: - template: grafana-base -message: See https://github.com/nmasse-itix/OpenShift-Docker-Images/tree/master/grafana for more details. + template: grafana metadata: annotations: description: |- - A grafana distribution for OpenShift. - openshift.io/display-name: Grafana (Base) + Grafana allows you to query, visualize, alert on and understand your metrics + no matter where they are stored. Create, explore, and share dashboards with + your team and foster a data driven culture. + openshift.io/display-name: Grafana tags: instant-app - template.openshift.io/documentation-url: https://github.com/nmasse-itix/OpenShift-Docker-Images/tree/master/grafana + template.openshift.io/documentation-url: http://docs.grafana.org/ template.openshift.io/long-description: A grafana distribution for OpenShift. - template.openshift.io/provider-display-name: Nicolas Massé - template.openshift.io/support-url: https://github.com/nmasse-itix/OpenShift-Docker-Images/issues - name: grafana-base + template.openshift.io/provider-display-name: Grafana + template.openshift.io/support-url: https://grafana.com/enterprise + name: grafana +parameters: +- description: The Docker image to use for the OAuth Proxy. + displayName: OAuth Proxy image + name: PROXY_IMAGE + value: registry.access.redhat.com/openshift3/oauth-proxy:v3.11 + required: true + +- description: The desired hostname of the route to the Grafana service. + displayName: Hostname of the Grafana Service + name: GRAFANA_HOSTNAME + required: false + +- description: The session secret for the proxy + name: SESSION_SECRET + generate: expression + from: "[a-zA-Z0-9]{43}" + required: true + +- description: The Grafana version to deploy + displayName: Grafana version + name: GRAFANA_CUSTOM_VERSION + value: 5.1.4 + required: true + +- description: The Grafana release to deploy, either 'stable', 'beta', 'master', 'custom' or 'redhat' + displayName: Grafana release + name: GRAFANA_RELEASE + value: master + required: true + +- description: The namespace used to deploy this template + displayName: Kubernetes Namespace + name: NAMESPACE + required: true + +- description: Volume size for the Grafana DB + displayName: Volume Size + name: GRAFANA_VOLUME_SIZE + value: "1Gi" + required: true + objects: +- apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: grafana + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: ${GRAFANA_VOLUME_SIZE} - apiVersion: v1 kind: Secret metadata: name: oauth-proxy namespace: "${NAMESPACE}" - labels: - template: grafana-base stringData: session_secret: "${SESSION_SECRET}=" @@ -31,8 +81,6 @@ objects: metadata: name: grafana namespace: ${NAMESPACE} - labels: - template: grafana-base annotations: serviceaccounts.openshift.io/oauth-redirectreference.proxy: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"grafana"}}' secrets: @@ -42,13 +90,8 @@ objects: metadata: name: grafana-config namespace: ${NAMESPACE} - labels: - template: grafana-base data: grafana.ini: |- - [paths] - datasources = /etc/grafana-datasources/ - [server] http_addr = 127.0.0.1 @@ -67,67 +110,90 @@ objects: auto_assign_org = true auto_assign_org_role = Admin - [analytics] - reporting_enabled = false - [log] mode = console - [dashboards.json] - enabled = true - path = /var/lib/grafana-dashboards +- kind: ConfigMap + apiVersion: v1 + metadata: + name: grafana-datasources + namespace: ${NAMESPACE} + data: + prometheus.yaml: |- + apiVersion: 1 + datasources: + - name: prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: false + version: 1 + editable: true + options: + path: '/usr/share/dashboards' -- apiVersion: v1 - kind: ImageStream +- kind: ConfigMap + apiVersion: v1 metadata: - labels: - build: grafana - template: grafana-base + name: grafana-dashboards namespace: ${NAMESPACE} - name: grafana + data: + prometheus.yaml: |- + apiVersion: 1 + providers: + - name: 'default' + orgId: 1 + folder: '' + type: file + disableDeletion: false + updateIntervalSeconds: 300 - apiVersion: v1 - kind: BuildConfig + kind: ImageStream metadata: - name: grafana labels: build: grafana - template: grafana-base namespace: ${NAMESPACE} + name: grafana spec: - successfulBuildsHistoryLimit: 1 - failedBuildsHistoryLimit: 1 - nodeSelector: null - output: - to: - kind: ImageStreamTag - name: grafana:latest - namespace: ${NAMESPACE} - postCommit: {} - resources: {} - runPolicy: Serial - source: - git: - uri: ${GIT_REPO} - type: Git - contextDir: grafana - strategy: - dockerStrategy: + dockerImageRepository: docker.io/grafana/grafana + tags: + - name: stable from: - kind: ImageStreamTag - name: ${RHEL_IMAGE_STREAM_TAG} - namespace: ${RHEL_IMAGE_STREAM_NAMESPACE} - type: Docker - triggers: - - type: ConfigChange - - type: ImageChange + kind: DockerImage + name: 'docker.io/grafana/grafana:5.1.4' + importPolicy: + scheduled: true + - name: beta + from: + kind: DockerImage + name: 'docker.io/grafana/grafana:5.2.0-beta3' + importPolicy: + scheduled: true + - name: master + from: + kind: DockerImage + name: 'docker.io/grafana/grafana:master' + importPolicy: + scheduled: true + - name: custom + from: + kind: DockerImage + name: 'docker.io/grafana/grafana:${GRAFANA_CUSTOM_VERSION}' + importPolicy: + scheduled: true + - name: redhat + from: + kind: DockerImage + name: 'registry.access.redhat.com/openshift3/grafana:v3.11' + importPolicy: + scheduled: true - apiVersion: v1 kind: DeploymentConfig metadata: labels: app: grafana - template: grafana-base name: grafana namespace: ${NAMESPACE} spec: @@ -145,7 +211,6 @@ objects: labels: app: grafana deploymentconfig: grafana - template: grafana-base spec: containers: - image: " " @@ -160,11 +225,13 @@ objects: volumeMounts: - mountPath: /etc/grafana/ name: grafana-config - - mountPath: /etc/grafana-datasources - name: grafana-datasources + - mountPath: /var/log/grafana/ + name: grafana-logs - mountPath: /var/lib/grafana/ name: grafana-storage - - mountPath: /var/lib/grafana-dashboards + - mountPath: /etc/grafana/provisioning/datasources + name: grafana-datasources + - mountPath: /etc/grafana/provisioning/dashboards name: grafana-dashboards - image: ${PROXY_IMAGE} imagePullPolicy: IfNotPresent @@ -207,6 +274,9 @@ objects: secret: secretName: grafana-tls - name: grafana-storage + persistentVolumeClaim: + claimName: grafana + - name: grafana-logs emptyDir: {} - name: grafana-config configMap: @@ -227,14 +297,13 @@ objects: - grafana from: kind: ImageStreamTag - name: grafana:latest + name: grafana:${GRAFANA_RELEASE} - apiVersion: v1 kind: Service metadata: labels: app: grafana - template: grafana-base name: grafana namespace: ${NAMESPACE} annotations: @@ -256,11 +325,10 @@ objects: metadata: labels: app: grafana - template: grafana-base name: grafana namespace: ${NAMESPACE} spec: - host: ${GRAFANA_ROUTE_HOSTNAME} + host: ${GRAFANA_HOSTNAME} port: targetPort: oauth-proxy to: @@ -270,40 +338,3 @@ objects: wildcardPolicy: None tls: termination: reencrypt - -## -## Template Parameters -## -parameters: -- description: The GIT repository to use. - displayName: GIT Repo URL - name: GIT_REPO - value: https://github.com/nmasse-itix/OpenShift-Docker-Images.git - -- description: The OpenShift Namespace where the RHEL ImageStream resides. - displayName: RHEL ImageStream Namespace - name: RHEL_IMAGE_STREAM_NAMESPACE - value: openshift - -- description: Name of the ImageStreamTag to be used for the RHEL image. - displayName: RHEL ImageStreamTag - name: RHEL_IMAGE_STREAM_TAG - value: rhel7:7.4 - -- description: The Docker image to use for the OAuth Proxy. - displayName: OAuth Proxy image - name: PROXY_IMAGE - value: openshift3/oauth-proxy:v3.9 - -- description: The desired hostname of the route to the Grafana service. - displayName: Hostname of the Grafana Service - name: GRAFANA_ROUTE_HOSTNAME - -- description: The session secret for the proxy - name: SESSION_SECRET - generate: expression - from: "[a-zA-Z0-9]{43}" - -- description: The namespace to instantiate this template under. Defaults to 'openshift-metrics'. - name: NAMESPACE - value: openshift-metrics diff --git a/grafana/grafana-dashboard.json b/grafana/grafana-dashboard.json deleted file mode 100644 index 60a538d..0000000 --- a/grafana/grafana-dashboard.json +++ /dev/null @@ -1,2151 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "description": "Monitors Kubernetes cluster using Prometheus. Shows overall cluster CPU / Memory / Filesystem usage as well as individual pod, containers, systemd services statistics. Uses cAdvisor metrics only.", - "editable": true, - "gnetId": 315, - "graphTooltip": 0, - "hideControls": false, - "id": 2, - "links": [], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "200px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": 2, - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "height": "200px", - "id": 32, - "legend": { - "alignAsTable": false, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sideWidth": 200, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum (rate (container_network_receive_bytes_total{id=\"/\"}[5m]))", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "Received", - "metric": "network", - "refId": "A", - "step": 10 - }, - { - "expr": "- sum (rate (container_network_transmit_bytes_total{id=\"/\"}[5m]))", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "Sent", - "metric": "network", - "refId": "B", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Network I/O pressure", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "transparent": false, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Network I/O pressure", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": true, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "editable": true, - "error": false, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "height": "180px", - "id": 4, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 4, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum (container_memory_working_set_bytes{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) / sum (machine_memory_bytes{kubernetes_io_hostname=~\"^$Node$\"}) * 100", - "interval": "10s", - "intervalFactor": 1, - "refId": "A", - "step": 10 - } - ], - "thresholds": "65, 90", - "title": "Cluster memory usage", - "transparent": false, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": true, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "decimals": 2, - "editable": true, - "error": false, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "height": "180px", - "id": 6, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 4, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum (rate (container_cpu_usage_seconds_total{id=\"/\"}[5m])) / sum (machine_cpu_cores{}) * 100", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "refId": "A", - "step": 10 - } - ], - "thresholds": "65, 90", - "title": "Cluster CPU usage (5m avg)", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": true, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "decimals": 2, - "editable": true, - "error": false, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "height": "180px", - "id": 7, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 4, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum (container_fs_usage_bytes{device=~\"^/dev/([sv]d[a-z][1-9]|mapper/[-a-zA-Z0-9]+)$\",id=\"/\"}) / sum (container_fs_limit_bytes{device=~\"^/dev/([sv]d[a-z][1-9]|mapper/[-a-zA-Z0-9]+)$\",id=\"/\"}) * 100", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "", - "metric": "", - "refId": "A", - "step": 10 - } - ], - "thresholds": "65, 90", - "title": "Cluster filesystem usage", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "decimals": 2, - "editable": true, - "error": false, - "format": "bytes", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "height": "1px", - "id": 9, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "20%", - "prefix": "", - "prefixFontSize": "20%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum (container_memory_working_set_bytes{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})", - "interval": "10s", - "intervalFactor": 1, - "refId": "A", - "step": 10 - } - ], - "thresholds": "", - "title": "Used", - "type": "singlestat", - "valueFontSize": "50%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "decimals": 2, - "editable": true, - "error": false, - "format": "bytes", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "height": "1px", - "id": 10, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum (machine_memory_bytes{kubernetes_io_hostname=~\"^$Node$\"})", - "interval": "10s", - "intervalFactor": 1, - "refId": "A", - "step": 10 - } - ], - "thresholds": "", - "title": "Total", - "type": "singlestat", - "valueFontSize": "50%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "decimals": 2, - "editable": true, - "error": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "height": "1px", - "id": 11, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": " cores", - "postfixFontSize": "30%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum (rate (container_cpu_usage_seconds_total{id=\"/\"}[5m]))", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "refId": "A", - "step": 10 - } - ], - "thresholds": "", - "title": "Used", - "type": "singlestat", - "valueFontSize": "50%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "decimals": 2, - "editable": true, - "error": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "height": "1px", - "id": 12, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": " cores", - "postfixFontSize": "30%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"})", - "interval": "10s", - "intervalFactor": 1, - "refId": "A", - "step": 10 - } - ], - "thresholds": "", - "title": "Total", - "type": "singlestat", - "valueFontSize": "50%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "decimals": 2, - "editable": true, - "error": false, - "format": "bytes", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "height": "1px", - "id": 13, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum (container_fs_usage_bytes{device=~\"^/dev/([sv]d[a-z][1-9]|mapper/[-a-zA-Z0-9]+)$\",id=\"/\"})", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "refId": "A", - "step": 10 - } - ], - "thresholds": "", - "title": "Used", - "type": "singlestat", - "valueFontSize": "50%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "decimals": 2, - "editable": true, - "error": false, - "format": "bytes", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "height": "1px", - "id": 14, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum (container_fs_limit_bytes{device=~\"^/dev/([sv]d[a-z][1-9]|mapper/[-a-zA-Z0-9]+)$\",id=\"/\"})", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "refId": "A", - "step": 10 - } - ], - "thresholds": "", - "title": "Total", - "type": "singlestat", - "valueFontSize": "50%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Total usage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": 3, - "editable": true, - "error": false, - "fill": 0, - "grid": {}, - "height": "", - "id": 17, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": true, - "targets": [ - { - "expr": "sum (rate (container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\"}[5m])) by (pod_name)", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "{{ pod_name }}", - "metric": "container_cpu", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Pods CPU usage (1m avg)", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "transparent": false, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": "cores", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Pods CPU usage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": 3, - "editable": true, - "error": false, - "fill": 0, - "grid": {}, - "height": "", - "id": 23, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": true, - "targets": [ - { - "expr": "sum (rate (container_cpu_usage_seconds_total{id=~\"^/system.slice.*\"}[5m])) by (systemd_service_name)", - "format": "time_series", - "hide": false, - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "{{ systemd_service_name }}", - "metric": "container_cpu", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "System services CPU usage (5m avg)", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": "cores", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "System services CPU usage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": 3, - "editable": true, - "error": false, - "fill": 0, - "grid": {}, - "height": "", - "id": 24, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": true, - "targets": [ - { - "expr": "sum (rate (container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\",container_name!=\"POD\"}[5m])) by (container_name, pod_name)", - "format": "time_series", - "hide": false, - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "pod: {{ pod_name }} | {{ container_name }}", - "metric": "container_cpu", - "refId": "A", - "step": 10 - }, - { - "expr": "sum (rate (container_cpu_usage_seconds_total{image!=\"\",name!~\"^k8s_.*\"}[5m])) by (kubernetes_io_hostname, name, image)", - "format": "time_series", - "hide": false, - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})", - "metric": "container_cpu", - "refId": "B", - "step": 10 - }, - { - "expr": "sum (rate (container_cpu_usage_seconds_total{rkt_container_name!=\"\"}[5m])) by (kubernetes_io_hostname, rkt_container_name)", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}", - "metric": "container_cpu", - "refId": "C", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Containers CPU usage (5m avg)", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": "cores", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Containers CPU usage", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "500px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "datasource": "prometheus", - "decimals": 3, - "editable": true, - "error": false, - "fill": 0, - "grid": {}, - "id": 20, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "span": 12, - "stack": false, - "steppedLine": true, - "targets": [ - { - "expr": "sum (rate (container_cpu_usage_seconds_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[1m])) by (id)", - "hide": false, - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "{{ id }}", - "metric": "container_cpu", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "All processes CPU usage (1m avg)", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "show": true - }, - "yaxes": [ - { - "format": "none", - "label": "cores", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "All processes CPU usage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": 2, - "editable": true, - "error": false, - "fill": 0, - "grid": {}, - "id": 25, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": 200, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": true, - "targets": [ - { - "expr": "sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}) by (pod_name)", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "{{ pod_name }}", - "metric": "container_memory_usage:sort_desc", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Pods memory usage", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Pods memory usage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": 2, - "editable": true, - "error": false, - "fill": 0, - "grid": {}, - "id": 26, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": 200, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": true, - "targets": [ - { - "expr": "sum (container_memory_working_set_bytes{id=~\"^/system.slice/.*\"}) by (id)", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "{{ id }}", - "metric": "container_memory_usage:sort_desc", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "System services memory usage", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "System services memory usage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": 2, - "editable": true, - "error": false, - "fill": 0, - "grid": {}, - "id": 27, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": 200, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": true, - "targets": [ - { - "expr": "sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",container_name!=\"POD\",kubernetes_io_hostname=~\"^$Node$\"}) by (container_name, pod_name)", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "pod: {{ pod_name }} | {{ container_name }}", - "metric": "container_memory_usage:sort_desc", - "refId": "A", - "step": 10 - }, - { - "expr": "sum (container_memory_working_set_bytes{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}) by (kubernetes_io_hostname, name, image)", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})", - "metric": "container_memory_usage:sort_desc", - "refId": "B", - "step": 10 - }, - { - "expr": "sum (container_memory_working_set_bytes{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}) by (kubernetes_io_hostname, rkt_container_name)", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}", - "metric": "container_memory_usage:sort_desc", - "refId": "C", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Containers memory usage", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Containers memory usage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "500px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": 2, - "editable": true, - "error": false, - "fill": 0, - "grid": {}, - "id": 28, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": 200, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": true, - "targets": [ - { - "expr": "sum (container_memory_working_set_bytes{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) by (id)", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "{{ id }}", - "metric": "container_memory_usage:sort_desc", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "All processes memory usage", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "All processes memory usage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": 2, - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "id": 16, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": 200, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum (rate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\"}[5m])) by (pod_name)", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "-> {{ pod_name }}", - "metric": "network", - "refId": "A", - "step": 10 - }, - { - "expr": "- sum (rate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\"}[5m])) by (pod_name)", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "<- {{ pod_name }}", - "metric": "network", - "refId": "B", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Pods network I/O (5m avg)", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Pods network I/O", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": 2, - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "id": 30, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": 200, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum (rate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\"}[5m])) by (container_name, pod_name)", - "format": "time_series", - "hide": false, - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "-> pod: {{ pod_name }} | {{ container_name }}", - "metric": "network", - "refId": "B", - "step": 10 - }, - { - "expr": "- sum (rate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\"}[5m])) by (container_name, pod_name)", - "format": "time_series", - "hide": false, - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "<- pod: {{ pod_name }} | {{ container_name }}", - "metric": "network", - "refId": "D", - "step": 10 - }, - { - "expr": "sum (rate (container_network_receive_bytes_total{image!=\"\",name!~\"^k8s_.*\"}[5m])) by (kubernetes_io_hostname, name, image)", - "format": "time_series", - "hide": false, - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "-> docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})", - "metric": "network", - "refId": "A", - "step": 10 - }, - { - "expr": "- sum (rate (container_network_transmit_bytes_total{image!=\"\",name!~\"^k8s_.*\"}[5m])) by (kubernetes_io_hostname, name, image)", - "format": "time_series", - "hide": false, - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "<- docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})", - "metric": "network", - "refId": "C", - "step": 10 - }, - { - "expr": "sum (rate (container_network_transmit_bytes_total{rkt_container_name!=\"\"}[5m])) by (kubernetes_io_hostname, rkt_container_name)", - "format": "time_series", - "hide": false, - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "-> rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}", - "metric": "network", - "refId": "E", - "step": 10 - }, - { - "expr": "- sum (rate (container_network_transmit_bytes_total{rkt_container_name!=\"\"}[5m])) by (kubernetes_io_hostname, rkt_container_name)", - "format": "time_series", - "hide": false, - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "<- rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}", - "metric": "network", - "refId": "F", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Containers network I/O (5m avg)", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Containers network I/O", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "500px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": 2, - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "id": 29, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": 200, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum (rate (container_network_receive_bytes_total{id!=\"/\"}[5m])) by (id)", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "-> {{ id }}", - "metric": "network", - "refId": "A", - "step": 10 - }, - { - "expr": "- sum (rate (container_network_transmit_bytes_total{id!=\"/\"}[5m])) by (id)", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "<- {{ id }}", - "metric": "network", - "refId": "B", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "All processes network I/O (1m avg)", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "All processes network I/O", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes" - ], - "templating": { - "list": [ - { - "allValue": ".*", - "current": { - "text": "All", - "value": "$__all" - }, - "datasource": "prometheus", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "Node", - "options": [], - "query": "label_values(kubernetes_io_hostname)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-5m", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Kubernetes cluster monitoring (via Prometheus)", - "version": 2 - } diff --git a/grafana/grafana-nodatasource.yaml b/grafana/grafana-nodatasource.yaml deleted file mode 100644 index b30b51f..0000000 --- a/grafana/grafana-nodatasource.yaml +++ /dev/null @@ -1,43 +0,0 @@ -apiVersion: v1 -kind: Template -labels: - template: grafana-nodatasource -message: See https://github.com/nmasse-itix/OpenShift-Docker-Images/tree/master/grafana for more details. -metadata: - annotations: - description: |- - The default configmaps for a vanilla Grafana. - openshift.io/display-name: Grafana (Vanilla configuration) - tags: instant-app - template.openshift.io/documentation-url: https://github.com/nmasse-itix/OpenShift-Docker-Images/tree/master/grafana - template.openshift.io/long-description: Vanilla Grafana - template.openshift.io/provider-display-name: Nicolas Massé - template.openshift.io/support-url: https://github.com/nmasse-itix/OpenShift-Docker-Images/issues - name: grafana-nodatasource -objects: - -- kind: ConfigMap - apiVersion: v1 - metadata: - name: grafana-dashboards - namespace: ${NAMESPACE} - labels: - template: grafana-nodatasource - data: - -- kind: ConfigMap - apiVersion: v1 - metadata: - name: grafana-datasources - namespace: ${NAMESPACE} - labels: - template: grafana-nodatasource - data: - -## -## Template Parameters -## -parameters: -- description: The namespace to instantiate this template under. Defaults to 'openshift-metrics'. - name: NAMESPACE - value: openshift-metrics diff --git a/grafana/grafana-prometheus-storage.yaml b/grafana/grafana-prometheus-storage.yaml deleted file mode 100644 index 1af7f71..0000000 --- a/grafana/grafana-prometheus-storage.yaml +++ /dev/null @@ -1,69 +0,0 @@ -apiVersion: v1 -kind: Template -labels: - template: grafana-prometheus-storage -message: See https://github.com/nmasse-itix/OpenShift-Docker-Images/tree/master/grafana for more details. -metadata: - annotations: - description: |- - The Persistent Volumes for Prometheus. - openshift.io/display-name: Prometheus (storage) - tags: instant-app - template.openshift.io/documentation-url: https://github.com/nmasse-itix/OpenShift-Docker-Images/tree/master/grafana - template.openshift.io/long-description: Persistent Volumes for Prometheus - template.openshift.io/provider-display-name: Nicolas Massé - template.openshift.io/support-url: https://github.com/nmasse-itix/OpenShift-Docker-Images/issues - name: grafana-prometheus-storage -objects: - -- apiVersion: v1 - kind: PersistentVolumeClaim - metadata: - name: prometheus - labels: - template: grafana-prometheus-storage - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: ${PVC_SIZE} - -- apiVersion: v1 - kind: PersistentVolumeClaim - metadata: - name: prometheus-alertbuffer - labels: - template: grafana-prometheus-storage - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: ${PVC_SIZE} - -- apiVersion: v1 - kind: PersistentVolumeClaim - metadata: - name: prometheus-alertmanager - labels: - template: grafana-prometheus-storage - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: ${PVC_SIZE} - -## -## Template Parameters -## -parameters: - -- description: The namespace to instantiate this template under. Defaults to 'openshift-metrics'. - name: NAMESPACE - value: openshift-metrics - -- description: The Storage Class to use to request the Persistent Volumes. - name: PVC_SIZE - value: "10Gi" diff --git a/grafana/grafana.repo b/grafana/grafana.repo deleted file mode 100644 index 91bf789..0000000 --- a/grafana/grafana.repo +++ /dev/null @@ -1,9 +0,0 @@ -[grafana] -name=grafana -baseurl=https://packagecloud.io/grafana/testing/el/6/$basearch -repo_gpgcheck=1 -enabled=1 -gpgcheck=1 -gpgkey=https://packagecloud.io/gpg.key https://grafanarel.s3.amazonaws.com/RPM-GPG-KEY-grafana -sslverify=1 -sslcacert=/etc/pki/tls/certs/ca-bundle.crt diff --git a/grafana/grafana-prometheus.yaml b/grafana/grafana.yaml similarity index 82% rename from grafana/grafana-prometheus.yaml rename to grafana/grafana.yaml index 3bdb07f..9e35cf2 100644 --- a/grafana/grafana-prometheus.yaml +++ b/grafana/grafana.yaml @@ -1,27 +1,348 @@ apiVersion: v1 kind: Template labels: - template: grafana-prometheus -message: See https://github.com/nmasse-itix/OpenShift-Docker-Images/tree/master/grafana for more details. + template: grafana metadata: annotations: description: |- - The prometheus datasource for Grafana. - openshift.io/display-name: Prometheus + Grafana allows you to query, visualize, alert on and understand your metrics + no matter where they are stored. Create, explore, and share dashboards with + your team and foster a data driven culture. + openshift.io/display-name: Grafana tags: instant-app - template.openshift.io/documentation-url: https://github.com/nmasse-itix/OpenShift-Docker-Images/tree/master/grafana - template.openshift.io/long-description: Prometheus datasource for Grafana - template.openshift.io/provider-display-name: Nicolas Massé - template.openshift.io/support-url: https://github.com/nmasse-itix/OpenShift-Docker-Images/issues - name: grafana-prometheus + template.openshift.io/documentation-url: http://docs.grafana.org/ + template.openshift.io/long-description: A grafana distribution for OpenShift. + template.openshift.io/provider-display-name: Grafana + template.openshift.io/support-url: https://grafana.com/enterprise + name: grafana +parameters: +- description: The Docker image to use for the OAuth Proxy. + displayName: OAuth Proxy image + name: PROXY_IMAGE + value: openshift3/oauth-proxy:v3.9 + required: true + +- description: The desired hostname of the route to the Grafana service. + displayName: Hostname of the Grafana Service + name: GRAFANA_HOSTNAME + required: false + +- description: The session secret for the proxy + name: SESSION_SECRET + generate: expression + from: "[a-zA-Z0-9]{43}" + required: true + +- description: The Grafana version to deploy + displayName: Grafana version + name: GRAFANA_CUSTOM_VERSION + value: 5.1.4 + required: true + +- description: The Grafana release to deploy, either 'stable', 'beta', 'master' or 'custom' + displayName: Grafana release + name: GRAFANA_RELEASE + value: beta + required: true + +- description: The namespace used to deploy this template + displayName: Kubernetes Namespace + name: NAMESPACE + required: true + +- description: Volume size for the Grafana DB + displayName: Volume Size + name: GRAFANA_VOLUME_SIZE + value: "1Gi" + required: true + objects: +- apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: grafana + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: ${GRAFANA_VOLUME_SIZE} + +- apiVersion: v1 + kind: Secret + metadata: + name: oauth-proxy + namespace: "${NAMESPACE}" + stringData: + session_secret: "${SESSION_SECRET}=" + +- apiVersion: v1 + kind: ServiceAccount + metadata: + name: grafana + namespace: ${NAMESPACE} + annotations: + serviceaccounts.openshift.io/oauth-redirectreference.proxy: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"grafana"}}' + secrets: + +- kind: ConfigMap + apiVersion: v1 + metadata: + name: grafana-config + namespace: ${NAMESPACE} + data: + grafana.ini: |- + [server] + http_addr = 127.0.0.1 + + [auth] + disable_login_form = true + disable_signout_menu = true + + [auth.basic] + enabled = false + + [auth.proxy] + enabled = true + header_name = X-Forwarded-User + + [users] + auto_assign_org = true + auto_assign_org_role = Admin + + [log] + mode = console + +- kind: ConfigMap + apiVersion: v1 + metadata: + name: grafana-datasources + namespace: ${NAMESPACE} + data: + prometheus.yaml: |- + apiVersion: 1 + datasources: + - name: prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: false + version: 1 + editable: true + - kind: ConfigMap apiVersion: v1 metadata: name: grafana-dashboards namespace: ${NAMESPACE} + data: + prometheus.yaml: |- + apiVersion: 1 + providers: + - name: 'default' + orgId: 1 + folder: '' + type: file + disableDeletion: false + updateIntervalSeconds: 300 + options: + path: /usr/share/openshift-dashboards + +- apiVersion: v1 + kind: ImageStream + metadata: + labels: + build: grafana + namespace: ${NAMESPACE} + name: grafana + spec: + dockerImageRepository: docker.io/grafana/grafana + tags: + - name: stable + from: + kind: DockerImage + name: 'docker.io/grafana/grafana:5.1.4' + importPolicy: + scheduled: true + - name: beta + from: + kind: DockerImage + name: 'docker.io/grafana/grafana:5.2.0-beta3' + importPolicy: + scheduled: true + - name: master + from: + kind: DockerImage + name: 'docker.io/grafana/grafana:master' + importPolicy: + scheduled: true + - name: custom + from: + kind: DockerImage + name: 'docker.io/grafana/grafana:${GRAFANA_CUSTOM_VERSION}' + importPolicy: + scheduled: true + +- apiVersion: v1 + kind: DeploymentConfig + metadata: + labels: + app: grafana + name: grafana + namespace: ${NAMESPACE} + spec: + replicas: 1 + selector: + app: grafana + deploymentconfig: grafana + strategy: + activeDeadlineSeconds: 21600 + resources: {} + type: Recreate + template: + metadata: + creationTimestamp: null + labels: + app: grafana + deploymentconfig: grafana + spec: + containers: + - image: " " + imagePullPolicy: IfNotPresent + name: grafana + ports: + - containerPort: 3000 + protocol: TCP + resources: {} + securityContext: {} + terminationMessagePath: /dev/termination-log + volumeMounts: + - mountPath: /etc/grafana/ + name: grafana-config + - mountPath: /var/log/grafana/ + name: grafana-logs + - mountPath: /var/lib/grafana/ + name: grafana-storage + - mountPath: /etc/grafana/provisioning/datasources + name: grafana-datasources + - mountPath: /etc/grafana/provisioning/dashboards + name: grafana-dashboards + - mountPath: /usr/share/openshift-dashboards + name: grafana-openshift-dashboard + - image: ${PROXY_IMAGE} + imagePullPolicy: IfNotPresent + name: proxy + args: + - --provider=openshift + - --https-address=:8443 + - --http-address= + - --upstream=http://localhost:3000 + - --openshift-service-account=grafana + - '--openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}' + - --tls-cert=/etc/tls/private/tls.crt + - --tls-key=/etc/tls/private/tls.key + - --client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - --cookie-secret-file=/etc/proxy/secrets/session_secret + - --openshift-ca=/etc/pki/tls/cert.pem + - --openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt + ports: + - containerPort: 8443 + name: web + protocol: TCP + resources: {} + securityContext: {} + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /etc/tls/private + name: tls + - mountPath: /etc/proxy/secrets + name: secrets + dnsPolicy: ClusterFirst + terminationGracePeriodSeconds: 30 + restartPolicy: Always + serviceAccountName: grafana + volumes: + - name: secrets + secret: + secretName: oauth-proxy + - name: tls + secret: + secretName: grafana-tls + - name: grafana-storage + persistentVolumeClaim: + claimName: grafana + - name: grafana-logs + emptyDir: {} + - name: grafana-config + configMap: + name: grafana-config + - name: grafana-datasources + configMap: + name: grafana-datasources + - name: grafana-dashboards + configMap: + name: grafana-dashboards + - name: grafana-openshift-dashboard + configMap: + name: grafana-openshift-dashboard + test: false + triggers: + - type: ConfigChange + - type: ImageChange + imageChangeParams: + automatic: true + containerNames: + - grafana + from: + kind: ImageStreamTag + name: grafana:${GRAFANA_RELEASE} + +- apiVersion: v1 + kind: Service + metadata: + labels: + app: grafana + name: grafana + namespace: ${NAMESPACE} + annotations: + service.alpha.openshift.io/serving-cert-secret-name: grafana-tls + spec: + ports: + - name: oauth-proxy + port: 8443 + protocol: TCP + targetPort: 8443 + selector: + app: grafana + deploymentconfig: grafana + sessionAffinity: None + type: ClusterIP + +- apiVersion: v1 + kind: Route + metadata: labels: - template: grafana-prometheus + app: grafana + name: grafana + namespace: ${NAMESPACE} + spec: + host: ${GRAFANA_HOSTNAME} + port: + targetPort: oauth-proxy + to: + kind: Service + name: grafana + weight: 100 + wildcardPolicy: None + tls: + termination: reencrypt + +- kind: ConfigMap + apiVersion: v1 + metadata: + name: grafana-openshift-dashboard + namespace: ${NAMESPACE} data: openshift-cluster.json: |- { @@ -2175,567 +2496,3 @@ objects: "title": "Kubernetes cluster monitoring (via Prometheus)", "version": 2 } - -- kind: ConfigMap - apiVersion: v1 - metadata: - name: grafana-datasources - namespace: ${NAMESPACE} - labels: - template: grafana-prometheus - data: - prometheus.yaml: |- - datasources: - - name: prometheus - type: prometheus - access: proxy - url: http://prometheus:9090 - version: 1 - editable: false - -- apiVersion: v1 - kind: Secret - metadata: - name: prometheus-proxy - namespace: "${NAMESPACE}" - labels: - template: grafana-prometheus - stringData: - session_secret: "${SESSION_SECRET}=" - -- apiVersion: v1 - kind: Secret - metadata: - name: alerts-proxy - namespace: "${NAMESPACE}" - labels: - template: grafana-prometheus - stringData: - session_secret: "${SESSION_SECRET}=" - - -- apiVersion: v1 - kind: ServiceAccount - metadata: - name: prometheus - namespace: ${NAMESPACE} - labels: - template: grafana-prometheus - annotations: - serviceaccounts.openshift.io/oauth-redirectreference.prom: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}' - serviceaccounts.openshift.io/oauth-redirectreference.alerts: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}' - secrets: - -- apiVersion: v1 - kind: ClusterRoleBinding - metadata: - name: prometheus-is-cluster-reader - groupNames: - - system:cluster-readers - roleRef: - name: cluster-reader - subjects: - - kind: ServiceAccount - name: prometheus - namespace: ${NAMESPACE} - userNames: - - system:serviceaccount:${NAMESPACE}:prometheus - -- apiVersion: v1 - kind: Service - metadata: - labels: - app: prometheus - template: grafana-prometheus - name: prometheus - namespace: ${NAMESPACE} - annotations: - service.alpha.openshift.io/serving-cert-secret-name: prometheus-tls - prometheus.io/scrape: 'true' - prometheus.io/scheme: https - spec: - ports: - - name: oauth-proxy - port: 443 - protocol: TCP - targetPort: 8443 - - name: prometheus - port: 9090 - protocol: TCP - targetPort: 9090 - selector: - app: prometheus - sessionAffinity: None - type: ClusterIP - -- apiVersion: v1 - kind: Service - metadata: - labels: - app: prometheus - template: grafana-prometheus - name: alerts - namespace: ${NAMESPACE} - annotations: - service.alpha.openshift.io/serving-cert-secret-name: prometheus-alerts-tls - spec: - ports: - - name: oauth-proxy - port: 443 - protocol: TCP - targetPort: 9443 - selector: - app: prometheus - sessionAffinity: None - type: ClusterIP - -- apiVersion: v1 - kind: Route - metadata: - labels: - app: prometheus - template: grafana-prometheus - name: prometheus - namespace: ${NAMESPACE} - spec: - host: ${PROMETHEUS_ROUTE_HOSTNAME} - port: - targetPort: oauth-proxy - to: - kind: Service - name: prometheus - weight: 100 - wildcardPolicy: None - tls: - termination: reencrypt - -- apiVersion: v1 - kind: Route - metadata: - labels: - app: prometheus - template: grafana-prometheus - name: alerts - namespace: ${NAMESPACE} - spec: - host: ${ALERTS_ROUTE_HOSTNAME} - port: - targetPort: oauth-proxy - to: - kind: Service - name: alerts - weight: 100 - wildcardPolicy: None - tls: - termination: reencrypt - -- apiVersion: apps/v1beta1 - kind: StatefulSet - metadata: - name: prometheus - namespace: ${NAMESPACE} - labels: - app: prometheus - spec: - updateStrategy: - type: RollingUpdate - podManagementPolicy: Parallel - selector: - provider: openshift - matchLabels: - app: prometheus - template: - metadata: - name: prometheus - labels: - app: prometheus - spec: - serviceAccountName: prometheus - containers: - # Deploy Prometheus behind an oauth proxy - - name: prom-proxy - image: "${PROXY_IMAGE}" - imagePullPolicy: IfNotPresent - ports: - - containerPort: 8443 - name: web - args: - - -provider=openshift - - -https-address=:8443 - - -http-address= - - -email-domain=* - - -upstream=http://localhost:9090 - - -client-id=system:serviceaccount:${NAMESPACE}:prometheus - - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}' - - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}}' - - -tls-cert=/etc/tls/private/tls.crt - - -tls-key=/etc/tls/private/tls.key - - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token - - -cookie-secret-file=/etc/proxy/secrets/session_secret - - -openshift-ca=/etc/pki/tls/cert.pem - - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt - - -skip-auth-regex=^/metrics - volumeMounts: - - mountPath: /etc/tls/private - name: prometheus-tls - - mountPath: /etc/proxy/secrets - name: prometheus-secrets - - mountPath: /prometheus - name: prometheus-data - - - name: prometheus - args: - - --storage.tsdb.retention=6h - - --storage.tsdb.min-block-duration=2m - - --config.file=/etc/prometheus/prometheus.yml - - --web.listen-address=0.0.0.0:9090 - image: "${PROMETHEUS_IMAGE}" - imagePullPolicy: IfNotPresent - volumeMounts: - - mountPath: /etc/prometheus - name: prometheus-config - - mountPath: /prometheus - name: prometheus-data - - # Deploy alertmanager behind prometheus-alert-buffer behind an oauth proxy - - name: alerts-proxy - image: "${PROXY_IMAGE}" - imagePullPolicy: IfNotPresent - ports: - - containerPort: 9443 - name: web - args: - - -provider=openshift - - -https-address=:9443 - - -http-address= - - -email-domain=* - - -upstream=http://localhost:9099 - - -client-id=system:serviceaccount:${NAMESPACE}:prometheus - - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}' - - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}}' - - -tls-cert=/etc/tls/private/tls.crt - - -tls-key=/etc/tls/private/tls.key - - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token - - -cookie-secret-file=/etc/proxy/secrets/session_secret - - -openshift-ca=/etc/pki/tls/cert.pem - - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt - volumeMounts: - - mountPath: /etc/tls/private - name: alerts-tls - - mountPath: /etc/proxy/secrets - name: alerts-secrets - - - name: alert-buffer - args: - - --storage-path=/alert-buffer/messages.db - image: "${ALERTBUFFER_IMAGE}" - imagePullPolicy: IfNotPresent - volumeMounts: - - mountPath: /alert-buffer - name: alert-buffer-data - ports: - - containerPort: 9099 - name: alert-buf - - - name: alertmanager - args: - - --config.file=/etc/alertmanager/alertmanager.yml - image: "${ALERTMANAGER_IMAGE}" - imagePullPolicy: IfNotPresent - ports: - - containerPort: 9093 - name: web - volumeMounts: - - mountPath: /etc/alertmanager - name: alertmanager-config - - mountPath: /alertmanager - name: alertmanager-data - - restartPolicy: Always - volumes: - - name: prometheus-config - configMap: - defaultMode: 420 - name: prometheus - - name: prometheus-secrets - secret: - secretName: prometheus-proxy - - name: prometheus-tls - secret: - secretName: prometheus-tls - - name: prometheus-data - persistentVolumeClaim: - claimName: prometheus - - name: alertmanager-config - configMap: - defaultMode: 420 - name: prometheus-alerts - - name: alerts-secrets - secret: - secretName: alerts-proxy - - name: alerts-tls - secret: - secretName: prometheus-alerts-tls - - name: alertmanager-data - persistentVolumeClaim: - claimName: prometheus-alertmanager - - name: alert-buffer-data - persistentVolumeClaim: - claimName: prometheus-alertbuffer - -- kind: ConfigMap - apiVersion: v1 - metadata: - name: prometheus - namespace: ${NAMESPACE} - labels: - template: grafana-prometheus - data: - prometheus.rules: |- - groups: - - name: example-rules - interval: 30s # defaults to global interval - rules: - - prometheus.yml: |- - global: - scrape_interval: 30s - evaluation_interval: 30s - - rule_files: - - '*.rules' - - # A scrape configuration for running Prometheus on a Kubernetes cluster. - # This uses separate scrape configs for cluster components (i.e. API server, node) - # and services to allow each to use different authentication configs. - # - # Kubernetes labels will be added as Prometheus labels on metrics via the - # `labelmap` relabeling action. - - # Scrape config for API servers. - # - # Kubernetes exposes API servers as endpoints to the default/kubernetes - # service so this uses `endpoints` role and uses relabelling to only keep - # the endpoints associated with the default/kubernetes service using the - # default named port `https`. This works for single API server deployments as - # well as HA API server deployments. - scrape_configs: - - job_name: 'kubernetes-apiservers' - - kubernetes_sd_configs: - - role: endpoints - - scheme: https - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - - # Keep only the default/kubernetes service endpoints for the https port. This - # will add targets for each API server which Kubernetes adds an endpoint to - # the default/kubernetes service. - relabel_configs: - - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] - action: keep - regex: default;kubernetes;https - - # Scrape config for nodes. - # - # Each node exposes a /metrics endpoint that contains operational metrics for - # the Kubelet and other components. - - job_name: 'kubernetes-nodes' - - scheme: https - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - - kubernetes_sd_configs: - - role: node - - relabel_configs: - - action: labelmap - regex: __meta_kubernetes_node_label_(.+) - - # Scrape config for controllers. - # - # Each master node exposes a /metrics endpoint on :8444 that contains operational metrics for - # the controllers. - # - # TODO: move this to a pure endpoints based metrics gatherer when controllers are exposed via - # endpoints. - - job_name: 'kubernetes-controllers' - - scheme: https - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - - kubernetes_sd_configs: - - role: endpoints - - # Keep only the default/kubernetes service endpoints for the https port, and then - # set the port to 8444. This is the default configuration for the controllers on OpenShift - # masters. - relabel_configs: - - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] - action: keep - regex: default;kubernetes;https - - source_labels: [__address__] - action: replace - target_label: __address__ - regex: (.+)(?::\d+) - replacement: $1:8444 - - # Scrape config for cAdvisor. - # - # Beginning in Kube 1.7, each node exposes a /metrics/cadvisor endpoint that - # reports container metrics for each running pod. Scrape those by default. - - job_name: 'kubernetes-cadvisor' - - scheme: https - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - - metrics_path: /metrics/cadvisor - - kubernetes_sd_configs: - - role: node - - relabel_configs: - - action: labelmap - regex: __meta_kubernetes_node_label_(.+) - - # Scrape config for service endpoints. - # - # The relabeling allows the actual service scrape endpoint to be configured - # via the following annotations: - # - # * `prometheus.io/scrape`: Only scrape services that have a value of `true` - # * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need - # to set this to `https` & most likely set the `tls_config` of the scrape config. - # * `prometheus.io/path`: If the metrics path is not `/metrics` override this. - # * `prometheus.io/port`: If the metrics are exposed on a different port to the - # service then set this appropriately. - - job_name: 'kubernetes-service-endpoints' - - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - # TODO: this should be per target - insecure_skip_verify: true - - kubernetes_sd_configs: - - role: endpoints - - relabel_configs: - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] - action: keep - regex: true - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] - action: replace - target_label: __scheme__ - regex: (https?) - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] - action: replace - target_label: __metrics_path__ - regex: (.+) - - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] - action: replace - target_label: __address__ - regex: (.+)(?::\d+);(\d+) - replacement: $1:$2 - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_username] - action: replace - target_label: __basic_auth_username__ - regex: (.+) - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_password] - action: replace - target_label: __basic_auth_password__ - regex: (.+) - - action: labelmap - regex: __meta_kubernetes_service_label_(.+) - - source_labels: [__meta_kubernetes_namespace] - action: replace - target_label: kubernetes_namespace - - source_labels: [__meta_kubernetes_service_name] - action: replace - target_label: kubernetes_name - - alerting: - alertmanagers: - - scheme: http - static_configs: - - targets: - - "localhost:9093" - -- kind: ConfigMap - apiVersion: v1 - metadata: - name: prometheus-alerts - namespace: ${NAMESPACE} - labels: - template: grafana-prometheus - data: - alertmanager.yml: |- - global: - - # The root route on which each incoming alert enters. - route: - # default route if none match - receiver: alert-buffer-wh - - # The labels by which incoming alerts are grouped together. For example, - # multiple alerts coming in for cluster=A and alertname=LatencyHigh would - # be batched into a single group. - # TODO: - group_by: [] - - # All the above attributes are inherited by all child routes and can - # overwritten on each. - - receivers: - - name: alert-buffer-wh - webhook_configs: - - url: http://localhost:9099/topics/alerts - - -## -## Template Parameters -## -parameters: -- description: The Docker image to use for the OAuth Proxy. - displayName: OAuth Proxy image - name: PROXY_IMAGE - value: openshift3/oauth-proxy:v3.9 - -- description: The Docker image to use for Prometheus. - displayName: Prometheus image - name: PROMETHEUS_IMAGE - value: openshift3/prometheus:v3.9 - -- description: The Docker image to use for the Alert Buffer. - displayName: Alert Buffer image - name: ALERTBUFFER_IMAGE - value: openshift3/prometheus-alert-buffer:v3.9 - -- description: The Docker image to use for the Alert Manager. - displayName: Alert Manager image - name: ALERTMANAGER_IMAGE - value: openshift3/prometheus-alertmanager:v3.9 - -- description: The desired hostname of the route to the Prometheus service. - displayName: Hostname of the Prometheus Service - name: PROMETHEUS_ROUTE_HOSTNAME - -- description: The desired hostname of the route to the Prometheus Alert service. - displayName: Hostname of the Prometheus Alert Service - name: ALERTS_ROUTE_HOSTNAME - -- description: The session secret for the proxy - name: SESSION_SECRET - generate: expression - from: "[a-zA-Z0-9]{43}" - -- description: The namespace to instantiate this template under. Defaults to 'openshift-metrics'. - name: NAMESPACE - value: openshift-metrics diff --git a/grafana/prometheus-app-311.yaml b/grafana/prometheus-app-311.yaml new file mode 100644 index 0000000..c5d7253 --- /dev/null +++ b/grafana/prometheus-app-311.yaml @@ -0,0 +1,517 @@ +apiVersion: v1 +kind: Template +labels: + template: prometheus +metadata: + annotations: + description: |- + From metrics to insight, + power your metrics and alerting with a leading open-source monitoring solution. + openshift.io/display-name: Prometheus + tags: instant-app + template.openshift.io/documentation-url: https://prometheus.io/docs/ + template.openshift.io/long-description: Prometheus + template.openshift.io/support-url: https://github.com/prometheus/prometheus/issues + name: prometheus +objects: + +- apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: prometheus + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: ${PROMETHEUS_VOLUME_SIZE} + +- apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: prometheus-alertbuffer + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: ${ALERTBUFFER_VOLUME_SIZE} + +- apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: prometheus-alertmanager + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: ${ALERTMANAGER_VOLUME_SIZE} + +- apiVersion: v1 + kind: Secret + metadata: + name: prometheus-proxy + namespace: "${NAMESPACE}" + labels: + template: grafana-prometheus + stringData: + session_secret: "${SESSION_SECRET}=" + +- apiVersion: v1 + kind: Secret + metadata: + name: alerts-proxy + namespace: "${NAMESPACE}" + stringData: + session_secret: "${SESSION_SECRET}=" + +- apiVersion: v1 + kind: ServiceAccount + metadata: + name: prometheus + namespace: ${NAMESPACE} + annotations: + serviceaccounts.openshift.io/oauth-redirectreference.prom: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}' + serviceaccounts.openshift.io/oauth-redirectreference.alerts: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}' + secrets: + +- apiVersion: v1 + kind: ClusterRoleBinding + metadata: + name: prometheus-in-${NAMESPACE}-is-cluster-reader + groupNames: + - system:cluster-readers + roleRef: + name: cluster-reader + subjects: + - kind: ServiceAccount + name: prometheus + namespace: ${NAMESPACE} + userNames: + - system:serviceaccount:${NAMESPACE}:prometheus + +- apiVersion: v1 + kind: Service + metadata: + labels: + app: prometheus + name: prometheus + namespace: ${NAMESPACE} + annotations: + service.alpha.openshift.io/serving-cert-secret-name: prometheus-tls + prometheus.io/scrape: 'true' + prometheus.io/scheme: https + spec: + ports: + - name: oauth-proxy + port: 443 + protocol: TCP + targetPort: 8443 + - name: prometheus + port: 9090 + protocol: TCP + targetPort: 9090 + selector: + app: prometheus + sessionAffinity: None + type: ClusterIP + +- apiVersion: v1 + kind: Service + metadata: + labels: + app: prometheus + name: alerts + namespace: ${NAMESPACE} + annotations: + service.alpha.openshift.io/serving-cert-secret-name: prometheus-alerts-tls + spec: + ports: + - name: oauth-proxy + port: 443 + protocol: TCP + targetPort: 9443 + selector: + app: prometheus + sessionAffinity: None + type: ClusterIP + +- apiVersion: v1 + kind: Route + metadata: + labels: + app: prometheus + name: prometheus + namespace: ${NAMESPACE} + spec: + host: ${PROMETHEUS_HOSTNAME} + port: + targetPort: oauth-proxy + to: + kind: Service + name: prometheus + weight: 100 + wildcardPolicy: None + tls: + termination: reencrypt + +- apiVersion: v1 + kind: Route + metadata: + labels: + app: prometheus + name: alerts + namespace: ${NAMESPACE} + spec: + host: ${ALERTS_HOSTNAME} + port: + targetPort: oauth-proxy + to: + kind: Service + name: alerts + weight: 100 + wildcardPolicy: None + tls: + termination: reencrypt + +- apiVersion: apps/v1beta1 + kind: StatefulSet + metadata: + name: prometheus + namespace: ${NAMESPACE} + labels: + app: prometheus + spec: + updateStrategy: + type: RollingUpdate + podManagementPolicy: Parallel + selector: + provider: openshift + matchLabels: + app: prometheus + template: + metadata: + name: prometheus + labels: + app: prometheus + spec: + serviceAccountName: prometheus + containers: + # Deploy Prometheus behind an oauth proxy + - name: prom-proxy + image: "${PROXY_IMAGE}" + imagePullPolicy: IfNotPresent + ports: + - containerPort: 8443 + name: web + args: + - -provider=openshift + - -https-address=:8443 + - -http-address= + - -email-domain=* + - -upstream=http://localhost:9090 + - -client-id=system:serviceaccount:${NAMESPACE}:prometheus + - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}' + - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}}' + - -tls-cert=/etc/tls/private/tls.crt + - -tls-key=/etc/tls/private/tls.key + - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret-file=/etc/proxy/secrets/session_secret + - -openshift-ca=/etc/pki/tls/cert.pem + - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt + - -skip-auth-regex=^/metrics + volumeMounts: + - mountPath: /etc/tls/private + name: prometheus-tls + - mountPath: /etc/proxy/secrets + name: prometheus-secrets + - mountPath: /prometheus + name: prometheus-data + + - name: prometheus + args: + - --storage.tsdb.retention=3h + - --storage.tsdb.min-block-duration=2m + - --config.file=/etc/prometheus/prometheus.yml + - --web.listen-address=0.0.0.0:9090 + image: "${PROMETHEUS_IMAGE}" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /etc/prometheus + name: prometheus-config + - mountPath: /prometheus + name: prometheus-data + + # Deploy alertmanager behind prometheus-alert-buffer behind an oauth proxy + - name: alerts-proxy + image: "${PROXY_IMAGE}" + imagePullPolicy: IfNotPresent + ports: + - containerPort: 9443 + name: web + args: + - -provider=openshift + - -https-address=:9443 + - -http-address= + - -email-domain=* + - -upstream=http://localhost:9099 + - -client-id=system:serviceaccount:${NAMESPACE}:prometheus + - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}' + - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}}' + - -tls-cert=/etc/tls/private/tls.crt + - -tls-key=/etc/tls/private/tls.key + - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret-file=/etc/proxy/secrets/session_secret + - -openshift-ca=/etc/pki/tls/cert.pem + - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt + volumeMounts: + - mountPath: /etc/tls/private + name: alerts-tls + - mountPath: /etc/proxy/secrets + name: alerts-secrets + + - name: alert-buffer + args: + - --storage-path=/alert-buffer/messages.db + image: "${ALERTBUFFER_IMAGE}" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /alert-buffer + name: alert-buffer-data + ports: + - containerPort: 9099 + name: alert-buf + + - name: alertmanager + args: + - --config.file=/etc/alertmanager/alertmanager.yml + image: "${ALERTMANAGER_IMAGE}" + imagePullPolicy: IfNotPresent + ports: + - containerPort: 9093 + name: web + volumeMounts: + - mountPath: /etc/alertmanager + name: alertmanager-config + - mountPath: /alertmanager + name: alertmanager-data + + restartPolicy: Always + volumes: + - name: prometheus-config + configMap: + defaultMode: 420 + name: prometheus + - name: prometheus-secrets + secret: + secretName: prometheus-proxy + - name: prometheus-tls + secret: + secretName: prometheus-tls + - name: prometheus-data + persistentVolumeClaim: + claimName: prometheus + - name: alertmanager-config + configMap: + defaultMode: 420 + name: prometheus-alerts + - name: alerts-secrets + secret: + secretName: alerts-proxy + - name: alerts-tls + secret: + secretName: prometheus-alerts-tls + - name: alertmanager-data + persistentVolumeClaim: + claimName: prometheus-alertmanager + - name: alert-buffer-data + persistentVolumeClaim: + claimName: prometheus-alertbuffer + +- kind: ConfigMap + apiVersion: v1 + metadata: + name: prometheus + namespace: ${NAMESPACE} + data: + prometheus.rules: |- + groups: + - name: example-rules + interval: 30s # defaults to global interval + rules: + + prometheus.yml: |- + global: + scrape_interval: 30s + evaluation_interval: 30s + + rule_files: + - '*.rules' + + # A scrape configuration for running Prometheus on a Kubernetes cluster. + # This uses separate scrape configs for cluster components (i.e. API server, node) + # and services to allow each to use different authentication configs. + # + # Kubernetes labels will be added as Prometheus labels on metrics via the + # `labelmap` relabeling action. + scrape_configs: + + # Scrape config for service endpoints. + # + # The relabeling allows the actual service scrape endpoint to be configured + # via the following annotations: + # + # * `prometheus.io/scrape`: Only scrape services that have a value of `true` + # * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need + # to set this to `https` & most likely set the `tls_config` of the scrape config. + # * `prometheus.io/path`: If the metrics path is not `/metrics` override this. + # * `prometheus.io/port`: If the metrics are exposed on a different port to the + # service then set this appropriately. + - job_name: 'kubernetes-service-endpoints' + + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + # TODO: this should be per target + insecure_skip_verify: true + + kubernetes_sd_configs: + - role: endpoints + + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] + action: replace + target_label: __scheme__ + regex: (https?) + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] + action: replace + target_label: __address__ + regex: (.+)(?::\d+);(\d+) + replacement: $1:$2 + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_username] + action: replace + target_label: __basic_auth_username__ + regex: (.+) + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_password] + action: replace + target_label: __basic_auth_password__ + regex: (.+) + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_service_name] + action: replace + target_label: kubernetes_name + + alerting: + alertmanagers: + - scheme: http + static_configs: + - targets: + - "localhost:9093" + +- kind: ConfigMap + apiVersion: v1 + metadata: + name: prometheus-alerts + namespace: ${NAMESPACE} + labels: + template: grafana-prometheus + data: + alertmanager.yml: |- + global: + + # The root route on which each incoming alert enters. + route: + # default route if none match + receiver: alert-buffer-wh + + # The labels by which incoming alerts are grouped together. For example, + # multiple alerts coming in for cluster=A and alertname=LatencyHigh would + # be batched into a single group. + # TODO: + group_by: [] + + # All the above attributes are inherited by all child routes and can + # overwritten on each. + + receivers: + - name: alert-buffer-wh + webhook_configs: + - url: http://localhost:9099/topics/alerts + + +## +## Template Parameters +## +parameters: +- description: The Docker image to use for the OAuth Proxy. + displayName: OAuth Proxy image + name: PROXY_IMAGE + value: registry.access.redhat.com/openshift3/oauth-proxy:v3.11 + required: true + +- description: The Docker image to use for Prometheus. + displayName: Prometheus image + name: PROMETHEUS_IMAGE + value: registry.access.redhat.com/openshift3/prometheus:v3.11 + required: true + +- description: The Docker image to use for the Alert Buffer. + displayName: Alert Buffer image + name: ALERTBUFFER_IMAGE + value: registry.access.redhat.com/openshift3/prometheus-alert-buffer:v3.11 + required: true + +- description: The Docker image to use for the Alert Manager. + displayName: Alert Manager image + name: ALERTMANAGER_IMAGE + value: registry.access.redhat.com/openshift3/prometheus-alertmanager:v3.11 + required: true + +- description: The desired hostname of the route to the Prometheus service. + displayName: Hostname of the Prometheus Service + name: PROMETHEUS_HOSTNAME + required: false + +- description: The desired hostname of the route to the Prometheus Alert service. + displayName: Hostname of the Prometheus Alert Service + name: ALERTS_HOSTNAME + required: false + +- description: The session secret for the proxy + name: SESSION_SECRET + generate: expression + from: "[a-zA-Z0-9]{43}" + required: true + +- description: The namespace used to deploy this template + name: NAMESPACE + required: true + +- description: Volume size for Prometheus + displayName: Prometheus Volume Size + name: PROMETHEUS_VOLUME_SIZE + value: "1Gi" + required: true + +- description: Volume size for the Alert Buffer + displayName: Alert Buffer Volume Size + name: ALERTBUFFER_VOLUME_SIZE + value: "1Gi" + required: true + +- description: Volume size for the Alert Manager + displayName: Alert Manager Volume Size + name: ALERTMANAGER_VOLUME_SIZE + value: "1Gi" + required: true diff --git a/grafana/prometheus.yaml b/grafana/prometheus.yaml new file mode 100644 index 0000000..a9ddd55 --- /dev/null +++ b/grafana/prometheus.yaml @@ -0,0 +1,610 @@ +apiVersion: v1 +kind: Template +labels: + template: prometheus +metadata: + annotations: + description: |- + From metrics to insight, + power your metrics and alerting with a leading open-source monitoring solution. + openshift.io/display-name: Prometheus + tags: instant-app + template.openshift.io/documentation-url: https://prometheus.io/docs/ + template.openshift.io/long-description: Prometheus + template.openshift.io/support-url: https://github.com/prometheus/prometheus/issues + name: prometheus +objects: + +- apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: prometheus + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: ${PROMETHEUS_VOLUME_SIZE} + +- apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: prometheus-alertbuffer + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: ${ALERTBUFFER_VOLUME_SIZE} + +- apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: prometheus-alertmanager + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: ${ALERTMANAGER_VOLUME_SIZE} + +- apiVersion: v1 + kind: Secret + metadata: + name: prometheus-proxy + namespace: "${NAMESPACE}" + labels: + template: grafana-prometheus + stringData: + session_secret: "${SESSION_SECRET}=" + +- apiVersion: v1 + kind: Secret + metadata: + name: alerts-proxy + namespace: "${NAMESPACE}" + stringData: + session_secret: "${SESSION_SECRET}=" + +- apiVersion: v1 + kind: ServiceAccount + metadata: + name: prometheus + namespace: ${NAMESPACE} + annotations: + serviceaccounts.openshift.io/oauth-redirectreference.prom: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}' + serviceaccounts.openshift.io/oauth-redirectreference.alerts: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}' + secrets: + +- apiVersion: v1 + kind: ClusterRoleBinding + metadata: + name: prometheus-in-${NAMESPACE}-is-cluster-reader + groupNames: + - system:cluster-readers + roleRef: + name: cluster-reader + subjects: + - kind: ServiceAccount + name: prometheus + namespace: ${NAMESPACE} + userNames: + - system:serviceaccount:${NAMESPACE}:prometheus + +- apiVersion: v1 + kind: Service + metadata: + labels: + app: prometheus + name: prometheus + namespace: ${NAMESPACE} + annotations: + service.alpha.openshift.io/serving-cert-secret-name: prometheus-tls + prometheus.io/scrape: 'true' + prometheus.io/scheme: https + spec: + ports: + - name: oauth-proxy + port: 443 + protocol: TCP + targetPort: 8443 + - name: prometheus + port: 9090 + protocol: TCP + targetPort: 9090 + selector: + app: prometheus + sessionAffinity: None + type: ClusterIP + +- apiVersion: v1 + kind: Service + metadata: + labels: + app: prometheus + name: alerts + namespace: ${NAMESPACE} + annotations: + service.alpha.openshift.io/serving-cert-secret-name: prometheus-alerts-tls + spec: + ports: + - name: oauth-proxy + port: 443 + protocol: TCP + targetPort: 9443 + selector: + app: prometheus + sessionAffinity: None + type: ClusterIP + +- apiVersion: v1 + kind: Route + metadata: + labels: + app: prometheus + name: prometheus + namespace: ${NAMESPACE} + spec: + host: ${PROMETHEUS_HOSTNAME} + port: + targetPort: oauth-proxy + to: + kind: Service + name: prometheus + weight: 100 + wildcardPolicy: None + tls: + termination: reencrypt + +- apiVersion: v1 + kind: Route + metadata: + labels: + app: prometheus + name: alerts + namespace: ${NAMESPACE} + spec: + host: ${ALERTS_HOSTNAME} + port: + targetPort: oauth-proxy + to: + kind: Service + name: alerts + weight: 100 + wildcardPolicy: None + tls: + termination: reencrypt + +- apiVersion: apps/v1beta1 + kind: StatefulSet + metadata: + name: prometheus + namespace: ${NAMESPACE} + labels: + app: prometheus + spec: + updateStrategy: + type: RollingUpdate + podManagementPolicy: Parallel + selector: + provider: openshift + matchLabels: + app: prometheus + template: + metadata: + name: prometheus + labels: + app: prometheus + spec: + serviceAccountName: prometheus + containers: + # Deploy Prometheus behind an oauth proxy + - name: prom-proxy + image: "${PROXY_IMAGE}" + imagePullPolicy: IfNotPresent + ports: + - containerPort: 8443 + name: web + args: + - -provider=openshift + - -https-address=:8443 + - -http-address= + - -email-domain=* + - -upstream=http://localhost:9090 + - -client-id=system:serviceaccount:${NAMESPACE}:prometheus + - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}' + - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}}' + - -tls-cert=/etc/tls/private/tls.crt + - -tls-key=/etc/tls/private/tls.key + - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret-file=/etc/proxy/secrets/session_secret + - -openshift-ca=/etc/pki/tls/cert.pem + - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt + - -skip-auth-regex=^/metrics + volumeMounts: + - mountPath: /etc/tls/private + name: prometheus-tls + - mountPath: /etc/proxy/secrets + name: prometheus-secrets + - mountPath: /prometheus + name: prometheus-data + + - name: prometheus + args: + - --storage.tsdb.retention=6h + - --storage.tsdb.min-block-duration=2m + - --config.file=/etc/prometheus/prometheus.yml + - --web.listen-address=0.0.0.0:9090 + image: "${PROMETHEUS_IMAGE}" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /etc/prometheus + name: prometheus-config + - mountPath: /prometheus + name: prometheus-data + + # Deploy alertmanager behind prometheus-alert-buffer behind an oauth proxy + - name: alerts-proxy + image: "${PROXY_IMAGE}" + imagePullPolicy: IfNotPresent + ports: + - containerPort: 9443 + name: web + args: + - -provider=openshift + - -https-address=:9443 + - -http-address= + - -email-domain=* + - -upstream=http://localhost:9099 + - -client-id=system:serviceaccount:${NAMESPACE}:prometheus + - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}' + - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}}' + - -tls-cert=/etc/tls/private/tls.crt + - -tls-key=/etc/tls/private/tls.key + - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret-file=/etc/proxy/secrets/session_secret + - -openshift-ca=/etc/pki/tls/cert.pem + - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt + volumeMounts: + - mountPath: /etc/tls/private + name: alerts-tls + - mountPath: /etc/proxy/secrets + name: alerts-secrets + + - name: alert-buffer + args: + - --storage-path=/alert-buffer/messages.db + image: "${ALERTBUFFER_IMAGE}" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /alert-buffer + name: alert-buffer-data + ports: + - containerPort: 9099 + name: alert-buf + + - name: alertmanager + args: + - --config.file=/etc/alertmanager/alertmanager.yml + image: "${ALERTMANAGER_IMAGE}" + imagePullPolicy: IfNotPresent + ports: + - containerPort: 9093 + name: web + volumeMounts: + - mountPath: /etc/alertmanager + name: alertmanager-config + - mountPath: /alertmanager + name: alertmanager-data + + restartPolicy: Always + volumes: + - name: prometheus-config + configMap: + defaultMode: 420 + name: prometheus + - name: prometheus-secrets + secret: + secretName: prometheus-proxy + - name: prometheus-tls + secret: + secretName: prometheus-tls + - name: prometheus-data + persistentVolumeClaim: + claimName: prometheus + - name: alertmanager-config + configMap: + defaultMode: 420 + name: prometheus-alerts + - name: alerts-secrets + secret: + secretName: alerts-proxy + - name: alerts-tls + secret: + secretName: prometheus-alerts-tls + - name: alertmanager-data + persistentVolumeClaim: + claimName: prometheus-alertmanager + - name: alert-buffer-data + persistentVolumeClaim: + claimName: prometheus-alertbuffer + +- kind: ConfigMap + apiVersion: v1 + metadata: + name: prometheus + namespace: ${NAMESPACE} + data: + prometheus.rules: |- + groups: + - name: example-rules + interval: 30s # defaults to global interval + rules: + + prometheus.yml: |- + global: + scrape_interval: 30s + evaluation_interval: 30s + + rule_files: + - '*.rules' + + # A scrape configuration for running Prometheus on a Kubernetes cluster. + # This uses separate scrape configs for cluster components (i.e. API server, node) + # and services to allow each to use different authentication configs. + # + # Kubernetes labels will be added as Prometheus labels on metrics via the + # `labelmap` relabeling action. + + # Scrape config for API servers. + # + # Kubernetes exposes API servers as endpoints to the default/kubernetes + # service so this uses `endpoints` role and uses relabelling to only keep + # the endpoints associated with the default/kubernetes service using the + # default named port `https`. This works for single API server deployments as + # well as HA API server deployments. + scrape_configs: + - job_name: 'kubernetes-apiservers' + + kubernetes_sd_configs: + - role: endpoints + + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + # Keep only the default/kubernetes service endpoints for the https port. This + # will add targets for each API server which Kubernetes adds an endpoint to + # the default/kubernetes service. + relabel_configs: + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: default;kubernetes;https + + # Scrape config for nodes. + # + # Each node exposes a /metrics endpoint that contains operational metrics for + # the Kubelet and other components. + - job_name: 'kubernetes-nodes' + + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: node + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + + # Scrape config for controllers. + # + # Each master node exposes a /metrics endpoint on :8444 that contains operational metrics for + # the controllers. + # + # TODO: move this to a pure endpoints based metrics gatherer when controllers are exposed via + # endpoints. + - job_name: 'kubernetes-controllers' + + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: endpoints + + # Keep only the default/kubernetes service endpoints for the https port, and then + # set the port to 8444. This is the default configuration for the controllers on OpenShift + # masters. + relabel_configs: + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: default;kubernetes;https + - source_labels: [__address__] + action: replace + target_label: __address__ + regex: (.+)(?::\d+) + replacement: $1:8444 + + # Scrape config for cAdvisor. + # + # Beginning in Kube 1.7, each node exposes a /metrics/cadvisor endpoint that + # reports container metrics for each running pod. Scrape those by default. + - job_name: 'kubernetes-cadvisor' + + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + metrics_path: /metrics/cadvisor + + kubernetes_sd_configs: + - role: node + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + + # Scrape config for service endpoints. + # + # The relabeling allows the actual service scrape endpoint to be configured + # via the following annotations: + # + # * `prometheus.io/scrape`: Only scrape services that have a value of `true` + # * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need + # to set this to `https` & most likely set the `tls_config` of the scrape config. + # * `prometheus.io/path`: If the metrics path is not `/metrics` override this. + # * `prometheus.io/port`: If the metrics are exposed on a different port to the + # service then set this appropriately. + - job_name: 'kubernetes-service-endpoints' + + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + # TODO: this should be per target + insecure_skip_verify: true + + kubernetes_sd_configs: + - role: endpoints + + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] + action: replace + target_label: __scheme__ + regex: (https?) + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] + action: replace + target_label: __address__ + regex: (.+)(?::\d+);(\d+) + replacement: $1:$2 + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_username] + action: replace + target_label: __basic_auth_username__ + regex: (.+) + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_password] + action: replace + target_label: __basic_auth_password__ + regex: (.+) + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_service_name] + action: replace + target_label: kubernetes_name + + alerting: + alertmanagers: + - scheme: http + static_configs: + - targets: + - "localhost:9093" + +- kind: ConfigMap + apiVersion: v1 + metadata: + name: prometheus-alerts + namespace: ${NAMESPACE} + labels: + template: grafana-prometheus + data: + alertmanager.yml: |- + global: + + # The root route on which each incoming alert enters. + route: + # default route if none match + receiver: alert-buffer-wh + + # The labels by which incoming alerts are grouped together. For example, + # multiple alerts coming in for cluster=A and alertname=LatencyHigh would + # be batched into a single group. + # TODO: + group_by: [] + + # All the above attributes are inherited by all child routes and can + # overwritten on each. + + receivers: + - name: alert-buffer-wh + webhook_configs: + - url: http://localhost:9099/topics/alerts + + +## +## Template Parameters +## +parameters: +- description: The Docker image to use for the OAuth Proxy. + displayName: OAuth Proxy image + name: PROXY_IMAGE + value: openshift3/oauth-proxy:v3.9 + required: true + +- description: The Docker image to use for Prometheus. + displayName: Prometheus image + name: PROMETHEUS_IMAGE + value: openshift3/prometheus:v3.9 + required: true + +- description: The Docker image to use for the Alert Buffer. + displayName: Alert Buffer image + name: ALERTBUFFER_IMAGE + value: openshift3/prometheus-alert-buffer:v3.9 + required: true + +- description: The Docker image to use for the Alert Manager. + displayName: Alert Manager image + name: ALERTMANAGER_IMAGE + value: openshift3/prometheus-alertmanager:v3.9 + required: true + +- description: The desired hostname of the route to the Prometheus service. + displayName: Hostname of the Prometheus Service + name: PROMETHEUS_HOSTNAME + required: false + +- description: The desired hostname of the route to the Prometheus Alert service. + displayName: Hostname of the Prometheus Alert Service + name: ALERTS_HOSTNAME + required: false + +- description: The session secret for the proxy + name: SESSION_SECRET + generate: expression + from: "[a-zA-Z0-9]{43}" + required: true + +- description: The namespace used to deploy this template + name: NAMESPACE + required: true + +- description: Volume size for Prometheus + displayName: Prometheus Volume Size + name: PROMETHEUS_VOLUME_SIZE + value: "1Gi" + required: true + +- description: Volume size for the Alert Buffer + displayName: Alert Buffer Volume Size + name: ALERTBUFFER_VOLUME_SIZE + value: "1Gi" + required: true + +- description: Volume size for the Alert Manager + displayName: Alert Manager Volume Size + name: ALERTMANAGER_VOLUME_SIZE + value: "1Gi" + required: true