Collection of cookbooks for Podman Quadlets
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

154 lines
5.4 KiB

"""Test PostgreSQL automatic crash recovery.
Scenarios covered:
1. Container crash (SIGKILL via ``podman kill``) → systemd restarts the
service automatically (Restart=always, RestartSec=10).
2. Hard VM reboot → all services start cleanly and data is intact.
All tests share the module-scoped ``postgresql_vm`` fixture. Because some
tests are destructive (they kill the container), they are intentionally
sequenced: create data → crash → verify recovery → create more data →
reboot → verify recovery.
"""
import time
from helpers import run_sql
# Data written before the crash that must survive each recovery scenario.
CRASH_WITNESS_TABLE = "crash_witness"
CRASH_WITNESS_VALUE = "before_crash"
REBOOT_WITNESS_TABLE = "reboot_witness"
REBOOT_WITNESS_VALUE = "before_reboot"
# ---------------------------------------------------------------------------
# Scenario 1: container crash
# ---------------------------------------------------------------------------
def test_server_running_before_crash(pg_host):
"""Precondition: postgresql-server.service must be active before we crash it."""
assert pg_host.service("postgresql-server.service").is_running
def test_create_data_before_crash(postgresql_vm, test_ssh_key):
"""Insert a row that must survive the container crash."""
run_sql(
postgresql_vm,
test_ssh_key,
(
f"CREATE TABLE IF NOT EXISTS {CRASH_WITNESS_TABLE} "
f"(id SERIAL PRIMARY KEY, message TEXT NOT NULL); "
f"INSERT INTO {CRASH_WITNESS_TABLE} (message) "
f"VALUES ('{CRASH_WITNESS_VALUE}');"
),
)
def test_kill_postgresql_container(postgresql_vm, test_ssh_key):
"""Simulate a process crash by sending SIGKILL to the container.
``podman kill`` delivers SIGKILL to the container's PID 1. Systemd will
detect the exit and restart the service after RestartSec=10 seconds.
"""
postgresql_vm.ssh_run(
"podman kill --signal SIGKILL postgresql-server",
test_ssh_key,
)
def test_service_restarts_automatically(postgresql_vm, test_ssh_key):
"""postgresql-server.service must be active again after the crash.
Allow up to 60 seconds: systemd waits RestartSec=10 s before restarting,
then the container start-up and health check take additional time.
"""
# Brief pause to let systemd register the exit before we start polling.
time.sleep(5)
postgresql_vm.wait_for_service(
"postgresql-server.service", test_ssh_key, timeout=120
)
def test_data_intact_after_crash_recovery(postgresql_vm, test_ssh_key):
"""Rows written before the crash must be present after automatic recovery."""
output = run_sql(
postgresql_vm,
test_ssh_key,
f"SELECT message FROM {CRASH_WITNESS_TABLE} "
f"WHERE message = '{CRASH_WITNESS_VALUE}'",
)
assert CRASH_WITNESS_VALUE in output, (
f"Crash witness row not found after recovery. Query returned: {output!r}"
)
def test_target_still_active_after_crash(pg_host):
"""postgresql.target must remain active after the container recovery."""
assert pg_host.service("postgresql.target").is_running
# ---------------------------------------------------------------------------
# Scenario 2: hard reboot
# ---------------------------------------------------------------------------
def test_create_data_before_reboot(postgresql_vm, test_ssh_key):
"""Insert a row that must survive a full VM reboot."""
run_sql(
postgresql_vm,
test_ssh_key,
(
f"CREATE TABLE IF NOT EXISTS {REBOOT_WITNESS_TABLE} "
f"(id SERIAL PRIMARY KEY, message TEXT NOT NULL); "
f"INSERT INTO {REBOOT_WITNESS_TABLE} (message) "
f"VALUES ('{REBOOT_WITNESS_VALUE}');"
),
)
def test_reboot_vm(postgresql_vm, test_ssh_key):
"""Trigger a graceful OS reboot. SSH will temporarily drop."""
postgresql_vm.ssh_run("systemctl reboot", test_ssh_key, check=False)
# Wait for the VM to go down before polling for SSH again.
time.sleep(15)
def test_ssh_available_after_reboot(postgresql_vm, test_ssh_key):
"""SSH must become available again within 5 minutes of the reboot."""
# Reset the cached IP so wait_ssh re-probes it.
postgresql_vm._ip = None
postgresql_vm.wait_ssh(ssh_key=test_ssh_key, timeout=300)
def test_postgresql_target_active_after_reboot(postgresql_vm, test_ssh_key):
"""postgresql.target must come up automatically on reboot (enabled in ignition)."""
postgresql_vm.wait_for_service(
"postgresql.target", ssh_key=test_ssh_key, timeout=300
)
def test_data_intact_after_reboot(postgresql_vm, test_ssh_key):
"""Rows written before the reboot must still be present after boot."""
output = run_sql(
postgresql_vm,
test_ssh_key,
f"SELECT message FROM {REBOOT_WITNESS_TABLE} "
f"WHERE message = '{REBOOT_WITNESS_VALUE}'",
)
assert REBOOT_WITNESS_VALUE in output, (
f"Reboot witness row not found. Query returned: {output!r}"
)
def test_crash_witness_also_intact_after_reboot(postgresql_vm, test_ssh_key):
"""Data written before the crash must also survive the subsequent reboot."""
output = run_sql(
postgresql_vm,
test_ssh_key,
f"SELECT message FROM {CRASH_WITNESS_TABLE} "
f"WHERE message = '{CRASH_WITNESS_VALUE}'",
)
assert CRASH_WITNESS_VALUE in output