From 10b6498f0548545efe0d55c22432b94194397f9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20Mass=C3=A9?= Date: Fri, 3 Aug 2018 15:09:55 +0200 Subject: [PATCH] regenerates the service serving certificates --- .../README.md | 93 +++++++++++++++++++ .../regenerate-service-certificates.yaml | 31 +++++++ 2 files changed, 124 insertions(+) create mode 100644 Regenerate-the-Service-Serving-Certificates/README.md create mode 100644 Regenerate-the-Service-Serving-Certificates/regenerate-service-certificates.yaml diff --git a/Regenerate-the-Service-Serving-Certificates/README.md b/Regenerate-the-Service-Serving-Certificates/README.md new file mode 100644 index 0000000..55e2549 --- /dev/null +++ b/Regenerate-the-Service-Serving-Certificates/README.md @@ -0,0 +1,93 @@ +# Troubleshooting certificates in OpenShift + +## Context + +OpenShift can issue TLS certificates for any service deployed in OpenShift. +Those certificates can then be used by pods to serve content over TLS. + +When re-deploying certificates using the `redeploy-certificates.yml`, the +OpenShift Service Signer CA is re-generated, thus invalidating all the +previously generated certificates. + +The first symptom is usually the Web Console not working anymore (502 HTTP Status Code). + +## Diagnosis + +Move to the OpenShift Web Console project: + +```sh +oc project openshift-web-console +``` + +Check that the pods are deployed and running: + +```raw +# oc get pods +NAME READY STATUS RESTARTS AGE +webconsole-56c6745c85-4rpsk 1/1 Running 6 125d +``` + +In the logs of the web console, you should have explicit messages: + +```raw +# oc logs -f webconsole-56c6745c85-4rpsk +I0803 09:46:40.437363 1 start.go:201] OpenShift Web Console Version: v3.9.14 +I0803 09:46:40.437533 1 serve.go:89] Serving securely on 0.0.0.0:8443 +I0803 09:48:30.824930 1 logs.go:41] http: TLS handshake error from 10.128.0.1:54128: remote error: tls: bad certificate +I0803 09:51:15.502322 1 logs.go:41] http: TLS handshake error from 10.128.0.1:34450: remote error: tls: bad certificate +I0803 09:51:17.643076 1 logs.go:41] http: TLS handshake error from 10.128.0.1:34554: remote error: tls: bad certificate +``` + +Output the webconsole certificate: + +```sh +oc get secret webconsole-serving-cert -o jsonpath='{ .data.tls\.crt }' |base64 -d |openssl x509 -noout -text +``` + +Check against the OpenShift Service Signer certificate on the master: + +```sh +openssl x509 -noout -text -in /etc/origin/master/service-signer.crt +``` + +The `Issuer DN` in the first command must be the `Subject DN` of the second command. + +If they are different, you need to re-generate the webconsole certificates +(as well as all the other service certificates). + +## Regenerate the Web Console certificates + +Delete the `webconsole-serving-cert` certificate and touch the `webconsole` service: + +```sh +oc delete secret webconsole-serving-cert +oc patch service webconsole --type=json -p '[ { "op": "remove", "path": "/metadata/annotations/service.alpha.openshift.io~1serving-cert-signed-by" } ]' +``` + +Re-deploy the webconsole with the new certificates: + +```sh +oc delete pods -l webconsole=true +``` + +## Regenerate all the other Service Serving Certificates + +You can get a list of all the impacted services with: + +```sh +oc get services --all-namespaces -o jsonpath='{range .items[?(@.metadata.annotations.service\.alpha\.openshift\.io/serving-cert-secret-name)]}{.metadata.namespace} {.metadata.name} {.metadata.annotations.service\.alpha\.openshift\.io/serving-cert-secret-name}{"\n"}{end}' +``` + +Since the list would be quite long, proper automation is needed. + +You can use the provided [ansible playbook](regenerate-service-certificates.yaml) +to regenerates all the certificates and re-deploy all the pods behind the affected +services: + +```sh +ansible-playbook regenerate-service-certificates.yaml +``` + +## References + +- [Service Serving Certificate Secrets](https://docs.openshift.com/container-platform/3.9/dev_guide/secrets.html#service-serving-certificate-secrets) \ No newline at end of file diff --git a/Regenerate-the-Service-Serving-Certificates/regenerate-service-certificates.yaml b/Regenerate-the-Service-Serving-Certificates/regenerate-service-certificates.yaml new file mode 100644 index 0000000..569697f --- /dev/null +++ b/Regenerate-the-Service-Serving-Certificates/regenerate-service-certificates.yaml @@ -0,0 +1,31 @@ +--- + +- name: Regenerates the service serving certificates + gather_facts: no + hosts: localhost + tasks: + - name: Get a list of all services + command: oc get services --all-namespaces -o json + register: oc_get_services + + - block: + - name: Delete the secret bound to the service + command: oc delete secret {{ item.secret }} -n {{ item.namespace }} + with_items: '{{ services }}' + + - name: Touch the service so that the secret gets regenerated + command: oc patch service {{ item.service }} -n {{ item.namespace }} --type=json -p '[{"op":"remove","path":"/metadata/annotations/service.alpha.openshift.io~1serving-cert-signed-by"}]' + with_items: '{{ services }}' + + - name: Wait for a few seconds, to let OpenShift regenerates all the certificates + pause: + seconds: 10 + + - name: Delete the pods behind each updated service so that they pick up the new certificate + command: oc delete pods -n {{ item.service }} {{ selectors }} + vars: + selectors: '{% for k,v in item.selectors.items() %}-l {{k}}={{v}} {% endfor %}' + with_items: '{{ services }}' + vars: + data: '{{ oc_get_services.stdout |from_json }}' + services: '{{ data|json_query(''items[?metadata.annotations."service.alpha.openshift.io/serving-cert-secret-name"].{ "service": metadata.name, "namespace": metadata.namespace, "secret": metadata.annotations."service.alpha.openshift.io/serving-cert-secret-name", "selectors": spec.selector }'') }}'