diff --git a/Makefile b/Makefile index e9417a3..a0250ba 100644 --- a/Makefile +++ b/Makefile @@ -6,8 +6,8 @@ all: syntax-test lint unit-test e2e-test release syntax-test: @echo "Running syntax tests..." - @/bin/bash -nv src/zvirt - @/bin/bash -nv src/lib/core.sh + @/bin/bash -nv src/bin/zvirt + @/bin/bash -nv src/lib/zvirt/core.sh prerequisites: @echo "Installing prerequisites..." diff --git a/README.md b/README.md index b535fba..80c058c 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Zvirt takes snapshots of Libvirt domains using ZFS. It supports both crash-consistent and live snapshots. -At the end, all components of a domain (Domain definition, TPM, NVRAM, VirtioFS, ZFS snapshots of the underlying storage volumes) are captured as a set of consistent ZFS snapshots. +At the end, all components of a domain - Domain definition, TPM, NVRAM, VirtioFS, disks (either files on a ZFS dataset or raw zvols) - are captured as a set of consistent ZFS snapshots. ## Features diff --git a/src/bin/zvirt b/src/bin/zvirt index 866d701..cc5b80f 100755 --- a/src/bin/zvirt +++ b/src/bin/zvirt @@ -51,12 +51,13 @@ case "$action" in revert_snapshots || fatal "Failed to revert snapshots." ;; list) - if [ ${#domains[@]} -eq 0 ]; then - # Get all domains - mapfile -t domains < <(virsh list --all --name | grep -v '^$') - fi + preflight_checks "$action" "${domains[@]}" || fatal "Pre-flight checks failed." list_snapshots "${domains[@]}" || fatal "Failed to list snapshots." ;; + prune) + preflight_checks "$action" "${domains[@]}" || fatal "Pre-flight checks failed." + prune_snapshots "${domains[@]}" || fatal "Failed to prune snapshots." + ;; *) fatal "Unknown action '$action'." ;; diff --git a/src/lib/zvirt/core.sh b/src/lib/zvirt/core.sh index 32f1f50..f7dfdd6 100644 --- a/src/lib/zvirt/core.sh +++ b/src/lib/zvirt/core.sh @@ -33,11 +33,13 @@ Options: -d DOMAIN specify domain name (you can specify multiple -d options) -s SNAPSHOT specify snapshot name -b batch mode (pause all domains, take snapshots, then resume all domains) + -k N keep at most N snapshots per domain (used with 'prune' action) Actions: snapshot take a snapshot of the specified domain(s) revert revert to a snapshot of the specified domain(s) list list snapshots of the specified domain(s) (or all domains if none specified) + prune prune old snapshots of the specified domain(s) according to retention policy Examples: Take a crash-consistent snapshot of domain 'vm1' named 'backup1': @@ -54,6 +56,9 @@ Examples: List snapshots of all domains: ${0##*/} list + + Prune snapshots of all domains, keeping at most 5 snapshots: + ${0##*/} prune -k 5 EOF } @@ -66,6 +71,7 @@ function init_global_variables () { action="" batch=0 live=0 + keep=0 # Cache for domain parameters to avoid redundant calls to the zfs command declare -gA domain_params_cache=( ) @@ -83,7 +89,7 @@ function parse_args () { OPTIND=1 # Reset in case getopts has been used previously in the shell. - while getopts "h?blvd:s:" opt; do + while getopts "h?blvd:s:k:" opt; do case "$opt" in h|\?) show_help @@ -99,6 +105,8 @@ function parse_args () { ;; l) live=1 ;; + k) keep="$OPTARG" + ;; *) show_help >&2 exit 1 ;; @@ -114,6 +122,11 @@ function parse_args () { should_exit=1 fi + if [ ${#domains[@]} -eq 0 ]; then + # Get all domains + mapfile -t domains < <(virsh list --all --name | grep -v '^$') + fi + case "$action" in snapshot) if [ ${#domains[@]} -eq 0 ] || [ -z "$snapshot_name" ]; then @@ -139,6 +152,12 @@ function parse_args () { ;; list) ;; + prune) + if [ "$keep" -le 0 ]; then + echo "Error: The -k option with a positive integer value must be specified for the 'prune' action." + should_exit=1 + fi + ;; *) echo "Error: Unsupported action '$action'." should_exit=1 @@ -164,7 +183,10 @@ function domain_exists () { function domain_checks () { local action="$1" local domain="$2" - local snapshot_name="$3" + local snapshot_name + if [ "$action" == "snapshot" ] || [ "$action" == "revert" ]; then + snapshot_name="$3" + fi local error=0 local state="" @@ -194,8 +216,6 @@ function domain_checks () { if [ -z "$zfs_mountpoint" ] || [[ ! "$zfs_mountpoint" =~ ^/ ]]; then error "$domain: Wrong ZFS mountpoint for dataset '$zfs_dataset': '$zfs_mountpoint'." ; error=1 -# elif [ ! -d "$zfs_mountpoint" ]; then -# error "$domain: ZFS mountpoint '$zfs_mountpoint' does not exist." ; error=1 fi state=$(domain_state "$domain") @@ -205,6 +225,7 @@ function domain_checks () { domain_params_cache["$domain/dataset"]="${zfs_dataset}" domain_params_cache["$domain/mountpoint"]="${zfs_mountpoint}" domain_params_cache["$domain/zvols"]="${zfs_zvols[*]}" + domain_params_cache["$domain/snapshots"]="${zfs_dataset_snapshots[*]}" case "$action" in snapshot) @@ -251,7 +272,15 @@ function domain_checks () { fi done ;; + list) + ;; + prune) + if [ ${#zfs_dataset_snapshots[@]} -le "$keep" ]; then + log_verbose "$domain: No snapshots to prune (total: ${#zfs_dataset_snapshots[@]}, keep: $keep)." + fi + ;; *) + # Should not reach here due to prior validation error "$domain: Unknown action '$action'." ;; esac @@ -381,13 +410,20 @@ function resume_all_domains () { # Performs pre-flight checks for all specified domains according to the action. function preflight_checks () { local action="$1" ; shift - local snapshot_name="$1" ; shift + local snapshot_name + if [ "$action" == "snapshot" ] || [ "$action" == "revert" ]; then + snapshot_name="$1" ; shift + fi local error=0 local domains=( "$@" ) for domain in "${domains[@]}"; do log_verbose "$domain: Performing domain pre-flight checks for $action..." - if ! domain_checks "$action" "$domain" "$snapshot_name"; then + local -a domain_checks_args=( "$action" "$domain" ) + if [ "$action" == "snapshot" ] || [ "$action" == "revert" ]; then + domain_checks_args+=( "$snapshot_name" ) + fi + if ! domain_checks "${domain_checks_args[@]}"; then error=1 fi done @@ -395,6 +431,7 @@ function preflight_checks () { return $error } + # Removes the save file for the specified domain. function remove_save_file () { local domain="$1" @@ -510,18 +547,35 @@ function revert_snapshots () { # Lists snapshots for all specified domains. function list_snapshots () { local domains=( "$@" ) - local zfs_datasets - local zfs_dataset local domain + local snapshot for domain in "${domains[@]}"; do - zfs_datasets=( $(get_zfs_datasets_from_domain "$domain") ) - if [ ${#zfs_datasets[@]} -ne 1 ]; then - error "$domain: Wrong number of ZFS datasets (${#zfs_datasets[@]}) found." ; return 1 - fi - zfs_dataset="${zfs_datasets[0]:-}" - echo "Snapshots for domain '$domain':" - get_zfs_snapshots_from_dataset "$zfs_dataset" | sed 's/^/ - /' + for snapshot in ${domain_params_cache["$domain/snapshots"]}; do + echo " - $snapshot" + done + done +} + +# Prunes old snapshots for all specified domains according to the retention policy. +function prune_snapshots () { + local domains=( "$@" ) + local dataset + local snapshots + local domain + + for domain in "${domains[@]}"; do + snapshots=( ${domain_params_cache["$domain/snapshots"]} ) + dataset="${domain_params_cache["$domain/dataset"]}" + if [ "${#snapshots[@]}" -le "$keep" ]; then + continue + fi + local first_to_delete_idx=$(( ${#snapshots[@]} - keep - 1 )) + local first_to_delete="${snapshots[$first_to_delete_idx]}" + if [ -z "$first_to_delete" ]; then + continue + fi + zfs destroy -r "${dataset}@%${first_to_delete}" done } diff --git a/test/e2e/zvirt.bats b/test/e2e/zvirt.bats index b357bf5..346b9c6 100644 --- a/test/e2e/zvirt.bats +++ b/test/e2e/zvirt.bats @@ -228,6 +228,153 @@ teardown() { e2e_test_debug_log "setup: provisioning completed" } +@test "zvirt: prune snapshots" { + # Take five snapshots in a row, each time creating and deleting a witness file + for snap in s1 s2 s3 s4 s5; do + # Create witness files in all three domains before taking snapshots + qemu_exec standard touch /test/rootfs/witness-file.$snap + qemu_exec with-fs touch /test/virtiofs/witness-file.$snap + qemu_exec with-zvol touch /test/zvol/witness-file.$snap + + # Verify that the witness files exist in the virtiofs host mount + run test -f /srv/with-fs/witness-file.$snap + assert_success + + # Take crash-consistent snapshots for all three domains + run zvirt snapshot -d standard -d with-zvol -d with-fs -s $snap + assert_success + + # Verify that the domains are still running + run virsh domstate standard + assert_success + assert_output "running" + run virsh domstate with-fs + assert_success + assert_output "running" + run virsh domstate with-zvol + assert_success + assert_output "running" + + # Assert that the files created before the snapshot exist + run qemu_exec standard ls -1 /test/rootfs + assert_success + assert_output "witness-file.$snap" + run qemu_exec with-fs ls -1 /test/virtiofs + assert_success + assert_output "witness-file.$snap" + run qemu_exec with-zvol ls -1 /test/zvol + assert_success + assert_output "witness-file.$snap" + + # Delete the witness files + run qemu_exec standard rm /test/rootfs/witness-file.$snap + assert_success + run qemu_exec with-fs rm /test/virtiofs/witness-file.$snap + assert_success + run qemu_exec with-zvol rm /test/zvol/witness-file.$snap + assert_success + + # Sync all filesystems + run qemu_exec standard sync + assert_success + run qemu_exec with-fs sync + assert_success + run qemu_exec with-zvol sync + assert_success + + # Wait a moment to ensure all writes are flushed + sleep 2 + + # Verify that the witness files have been deleted in the virtiofs host mount + run test -f /srv/with-fs/witness-file.$snap + assert_failure + done + + # List snapshots and verify their existence + run zvirt list -d standard -d with-zvol -d with-fs + assert_success + assert_output "Snapshots for domain 'standard': + - s1 + - s2 + - s3 + - s4 + - s5 +Snapshots for domain 'with-zvol': + - s1 + - s2 + - s3 + - s4 + - s5 +Snapshots for domain 'with-fs': + - s1 + - s2 + - s3 + - s4 + - s5" + + # Prune snapshots to keep only the latest two + run zvirt prune -k 2 -d standard -d with-zvol -d with-fs + assert_success + + # List snapshots and verify their existence + run zvirt list -d standard -d with-zvol -d with-fs + assert_success + assert_output "Snapshots for domain 'standard': + - s4 + - s5 +Snapshots for domain 'with-zvol': + - s4 + - s5 +Snapshots for domain 'with-fs': + - s4 + - s5" + + # Stop all domains + run virsh destroy standard + assert_success + run virsh destroy with-fs + assert_success + run virsh destroy with-zvol + assert_success + + # Revert snapshots in batch mode + run zvirt revert -d standard -d with-zvol -d with-fs -s s4 + assert_success + + # Check all domains have been shut off + run virsh domstate standard + assert_success + assert_output "shut off" + run virsh domstate with-fs + assert_success + assert_output "shut off" + run virsh domstate with-zvol + assert_success + assert_output "shut off" + + # Start all domains + run virsh start standard + assert_success + run virsh start with-fs + assert_success + run virsh start with-zvol + assert_success + + # Wait for all domains to be fully ready + readiness_wait + + # Verify that the witness files still exist after revert + run qemu_exec standard ls -1 /test/rootfs + assert_success + assert_output "witness-file.s4" + run qemu_exec with-fs ls -1 /test/virtiofs + assert_success + assert_output "witness-file.s4" + run qemu_exec with-zvol ls -1 /test/zvol + assert_success + assert_output "witness-file.s4" +} + @test "zvirt: take live snapshot in batch mode" { # Create witness files in all three domains before taking snapshots qemu_exec standard touch /test/rootfs/witness-file diff --git a/test/unit/core.bats b/test/unit/core.bats index 0cb4077..bda9ff4 100644 --- a/test/unit/core.bats +++ b/test/unit/core.bats @@ -19,7 +19,7 @@ setup() { # and with access to the domain_params_cache associative array in_bash() { local vars="" - for var in domain_params_cache snapshot_name domains verbose action batch live; do + for var in domain_params_cache snapshot_name domains verbose action batch live keep; do if declare -p "${var}" &>/dev/null; then vars+="$(declare -p "${var}") ; " fi @@ -504,22 +504,7 @@ data/domains/baz/virtiofs" @test "list_snapshots: nominal case" { # Mock the underlying tools - get_zfs_datasets_from_domain() { - if [[ "$*" == "foo" ]]; then - echo "data/domains/foo" - return 0 - fi - return 1 - } - get_zfs_snapshots_from_dataset() { - if [[ "$*" == "data/domains/foo" ]]; then - echo "snapshot1 -snapshot2" - return 0 - fi - return 1 - } - export -f get_zfs_datasets_from_domain get_zfs_snapshots_from_dataset + declare -A domain_params_cache=( ["foo/snapshots"]="snapshot1 snapshot2" ["bar/snapshots"]="snapshot3 snapshot4" ) # Run the test run in_bash list_snapshots foo @@ -529,6 +514,42 @@ snapshot2" - snapshot2" } +@test "prune_snapshots: nominal case" { + # Mock the underlying tools + declare -A domain_params_cache=( ["foo/snapshots"]="s1 s2 s3 s4 s5" ["bar/snapshots"]="s1 s2 s3 s4 s5" ["baz/snapshots"]="s1" ["foo/dataset"]="data/domains/foo" ["bar/dataset"]="data/domains/bar" ["baz/dataset"]="data/domains/baz" ) + zfs_destroy_mock="$(mock_create)" + zfs() { + if [[ "$*" == "destroy -r data/domains/foo@%s3" ]] || [[ "$*" == "destroy -r data/domains/bar@%s2" ]]; then + $zfs_destroy_mock "$@" + return $? + fi + return 1 + } + export -f zfs + export zfs_destroy_mock + + # Run the test + keep=2 + run in_bash prune_snapshots foo + assert_success + [[ "$(mock_get_call_num ${zfs_destroy_mock})" -eq 1 ]] # Deletion up to s3 + + keep=3 + run in_bash prune_snapshots bar + assert_success + [[ "$(mock_get_call_num ${zfs_destroy_mock})" -eq 2 ]] # Deletion up to s2 + + keep=5 + run in_bash prune_snapshots bar + assert_success + [[ "$(mock_get_call_num ${zfs_destroy_mock})" -eq 2 ]] # No deletion should occur + + keep=1 + run in_bash prune_snapshots baz + assert_success + [[ "$(mock_get_call_num ${zfs_destroy_mock})" -eq 2 ]] # No deletion should occur +} + @test "preflight_checks: nominal case" { # Mock the underlying tools domain_checks() { diff --git a/test/unit/usage.bats b/test/unit/usage.bats index 65b02f0..d14f8f0 100644 --- a/test/unit/usage.bats +++ b/test/unit/usage.bats @@ -11,7 +11,7 @@ setup() { init_global_variables parse_args "$@" ret=$? - declare -p action batch live verbose domains snapshot_name + declare -p action batch live verbose domains snapshot_name keep return $ret } } @@ -65,3 +65,22 @@ setup() { assert_output --partial 'live="0"' } +@test "call_parse_args: prune snapshots for all domains" { + virsh() { + if [[ "$*" == "list --all --name" ]]; then + echo -e "foo\nbar" + return 0 + fi + return 1 + } + + run call_parse_args prune -k 5 + assert_success + assert_output --partial 'action="prune"' + assert_output --partial 'domains=([0]="foo" [1]="bar")' + assert_output --partial 'keep="5"' + + run call_parse_args prune + assert_failure + assert_output --partial "The -k option with a positive integer value must be specified for the 'prune' action" +}