You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
154 lines
5.4 KiB
154 lines
5.4 KiB
"""Test PostgreSQL automatic crash recovery.
|
|
|
|
Scenarios covered:
|
|
1. Container crash (SIGKILL via ``podman kill``) → systemd restarts the
|
|
service automatically (Restart=always, RestartSec=10).
|
|
2. Hard VM reboot → all services start cleanly and data is intact.
|
|
|
|
All tests share the module-scoped ``postgresql_vm`` fixture. Because some
|
|
tests are destructive (they kill the container), they are intentionally
|
|
sequenced: create data → crash → verify recovery → create more data →
|
|
reboot → verify recovery.
|
|
"""
|
|
|
|
import time
|
|
|
|
from helpers import run_sql
|
|
|
|
# Data written before the crash that must survive each recovery scenario.
|
|
CRASH_WITNESS_TABLE = "crash_witness"
|
|
CRASH_WITNESS_VALUE = "before_crash"
|
|
|
|
REBOOT_WITNESS_TABLE = "reboot_witness"
|
|
REBOOT_WITNESS_VALUE = "before_reboot"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Scenario 1: container crash
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_server_running_before_crash(pg_host):
|
|
"""Precondition: postgresql-server.service must be active before we crash it."""
|
|
assert pg_host.service("postgresql-server.service").is_running
|
|
|
|
|
|
def test_create_data_before_crash(postgresql_vm, test_ssh_key):
|
|
"""Insert a row that must survive the container crash."""
|
|
run_sql(
|
|
postgresql_vm,
|
|
test_ssh_key,
|
|
(
|
|
f"CREATE TABLE IF NOT EXISTS {CRASH_WITNESS_TABLE} "
|
|
f"(id SERIAL PRIMARY KEY, message TEXT NOT NULL); "
|
|
f"INSERT INTO {CRASH_WITNESS_TABLE} (message) "
|
|
f"VALUES ('{CRASH_WITNESS_VALUE}');"
|
|
),
|
|
)
|
|
|
|
|
|
def test_kill_postgresql_container(postgresql_vm, test_ssh_key):
|
|
"""Simulate a process crash by sending SIGKILL to the container.
|
|
|
|
``podman kill`` delivers SIGKILL to the container's PID 1. Systemd will
|
|
detect the exit and restart the service after RestartSec=10 seconds.
|
|
"""
|
|
postgresql_vm.ssh_run(
|
|
"podman kill --signal SIGKILL postgresql-server",
|
|
test_ssh_key,
|
|
)
|
|
|
|
|
|
def test_service_restarts_automatically(postgresql_vm, test_ssh_key):
|
|
"""postgresql-server.service must be active again after the crash.
|
|
|
|
Allow up to 60 seconds: systemd waits RestartSec=10 s before restarting,
|
|
then the container start-up and health check take additional time.
|
|
"""
|
|
# Brief pause to let systemd register the exit before we start polling.
|
|
time.sleep(5)
|
|
postgresql_vm.wait_for_service(
|
|
"postgresql-server.service", test_ssh_key, timeout=120
|
|
)
|
|
|
|
|
|
def test_data_intact_after_crash_recovery(postgresql_vm, test_ssh_key):
|
|
"""Rows written before the crash must be present after automatic recovery."""
|
|
output = run_sql(
|
|
postgresql_vm,
|
|
test_ssh_key,
|
|
f"SELECT message FROM {CRASH_WITNESS_TABLE} "
|
|
f"WHERE message = '{CRASH_WITNESS_VALUE}'",
|
|
)
|
|
assert CRASH_WITNESS_VALUE in output, (
|
|
f"Crash witness row not found after recovery. Query returned: {output!r}"
|
|
)
|
|
|
|
|
|
def test_target_still_active_after_crash(pg_host):
|
|
"""postgresql.target must remain active after the container recovery."""
|
|
assert pg_host.service("postgresql.target").is_running
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Scenario 2: hard reboot
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_create_data_before_reboot(postgresql_vm, test_ssh_key):
|
|
"""Insert a row that must survive a full VM reboot."""
|
|
run_sql(
|
|
postgresql_vm,
|
|
test_ssh_key,
|
|
(
|
|
f"CREATE TABLE IF NOT EXISTS {REBOOT_WITNESS_TABLE} "
|
|
f"(id SERIAL PRIMARY KEY, message TEXT NOT NULL); "
|
|
f"INSERT INTO {REBOOT_WITNESS_TABLE} (message) "
|
|
f"VALUES ('{REBOOT_WITNESS_VALUE}');"
|
|
),
|
|
)
|
|
|
|
|
|
def test_reboot_vm(postgresql_vm, test_ssh_key):
|
|
"""Trigger a graceful OS reboot. SSH will temporarily drop."""
|
|
postgresql_vm.ssh_run("systemctl reboot", test_ssh_key, check=False)
|
|
# Wait for the VM to go down before polling for SSH again.
|
|
time.sleep(15)
|
|
|
|
|
|
def test_ssh_available_after_reboot(postgresql_vm, test_ssh_key):
|
|
"""SSH must become available again within 5 minutes of the reboot."""
|
|
# Reset the cached IP so wait_ssh re-probes it.
|
|
postgresql_vm._ip = None
|
|
postgresql_vm.wait_ssh(ssh_key=test_ssh_key, timeout=300)
|
|
|
|
|
|
def test_postgresql_target_active_after_reboot(postgresql_vm, test_ssh_key):
|
|
"""postgresql.target must come up automatically on reboot (enabled in ignition)."""
|
|
postgresql_vm.wait_for_service(
|
|
"postgresql.target", ssh_key=test_ssh_key, timeout=300
|
|
)
|
|
|
|
|
|
def test_data_intact_after_reboot(postgresql_vm, test_ssh_key):
|
|
"""Rows written before the reboot must still be present after boot."""
|
|
output = run_sql(
|
|
postgresql_vm,
|
|
test_ssh_key,
|
|
f"SELECT message FROM {REBOOT_WITNESS_TABLE} "
|
|
f"WHERE message = '{REBOOT_WITNESS_VALUE}'",
|
|
)
|
|
assert REBOOT_WITNESS_VALUE in output, (
|
|
f"Reboot witness row not found. Query returned: {output!r}"
|
|
)
|
|
|
|
|
|
def test_crash_witness_also_intact_after_reboot(postgresql_vm, test_ssh_key):
|
|
"""Data written before the crash must also survive the subsequent reboot."""
|
|
output = run_sql(
|
|
postgresql_vm,
|
|
test_ssh_key,
|
|
f"SELECT message FROM {CRASH_WITNESS_TABLE} "
|
|
f"WHERE message = '{CRASH_WITNESS_VALUE}'",
|
|
)
|
|
assert CRASH_WITNESS_VALUE in output
|
|
|