mirror of https://github.com/nmasse-itix/zvirt.git
commit
297041fb57
2 changed files with 509 additions and 0 deletions
@ -0,0 +1,22 @@ |
|||||
|
# Zvirt = Libvirt ZFS snapshots |
||||
|
|
||||
|
## Purpose |
||||
|
|
||||
|
Zvirt takes snapshots of Libvirt domains using ZFS. |
||||
|
It supports both crash-consistent and live snapshots. |
||||
|
|
||||
|
At the end, all components of a domain (Domain definition, TPM, NVRAM, VirtioFS, ZFS snapshots of the underlying storage volumes) are captured as a set of consistent ZFS snapshots. |
||||
|
|
||||
|
## Features |
||||
|
|
||||
|
- Take snapshots of Libvirt domains using ZFS. |
||||
|
- Support both crash-consistent and live snapshots. |
||||
|
- Support batch mode (pause all domains, take snapshots, then resume all domains) |
||||
|
|
||||
|
## License |
||||
|
|
||||
|
MIT License |
||||
|
|
||||
|
## Author |
||||
|
|
||||
|
Nicolas Massé |
||||
@ -0,0 +1,487 @@ |
|||||
|
#!/bin/bash |
||||
|
|
||||
|
# This script takes snapshots or revert snapshots of libvirt domains using ZFS. |
||||
|
# |
||||
|
# It can take two kinds of snapshots: crash-consistent snapshots and live snapshots. |
||||
|
# |
||||
|
# - Crash-consistent snapshots are taken when the VM is powered off or powered on. |
||||
|
# They capture only the disk state at the time of the snapshot. |
||||
|
# Especially, those items are NOT included in crash-consistent snapshots: TPM, NVRAM, domain definition (XML). |
||||
|
# |
||||
|
# Taking crash-consistent snapshots makes use of ZFS filesystem snapshots of the underlying storage volumes |
||||
|
# |
||||
|
# Restoring from crash-consistent snapshots involves destroying the domain, reverting the ZFS snapshots and |
||||
|
# restarting the domain. |
||||
|
# |
||||
|
# - Live snapshots are taken while the VM is running and capture the entire state of the VM, including memory, |
||||
|
# CPU state, TPM, NVRAM, and domain definition (XML). |
||||
|
# |
||||
|
# Taking live snapshots makes use of libvirt's "save" functionality. The domain is paused, its state is saved |
||||
|
# to disk, the ZFS snapshots of the underlying storage volumes are taken, and then the domain is resumed. |
||||
|
# |
||||
|
# Restoring from live snapshots involves destroying the domain, reverting the ZFS snapshots, restoring the saved state, |
||||
|
# and restarting the domain. |
||||
|
# |
||||
|
|
||||
|
set -Eeuo pipefail |
||||
|
|
||||
|
# Make sure output won't be altered by locale settings |
||||
|
export LANG=C |
||||
|
export LC_ALL=C |
||||
|
|
||||
|
function show_help () { |
||||
|
cat << EOF |
||||
|
Usage: ${0##*/} action [-h] [-l] [-v] -d <domain_name> -s <snapshot_name> |
||||
|
|
||||
|
Options: |
||||
|
-h display this help and exit |
||||
|
-v verbose mode |
||||
|
-l live snapshot mode (default is crash-consistent) |
||||
|
-d DOMAIN specify domain name (you can specify multiple -d options) |
||||
|
-s SNAPSHOT specify snapshot name |
||||
|
-b batch mode (pause all domains, take snapshots, then resume all domains) |
||||
|
|
||||
|
Actions: |
||||
|
snapshot take a snapshot of the specified domain(s) |
||||
|
revert revert to a snapshot of the specified domain(s) |
||||
|
list list snapshots of the specified domain(s) (or all domains if none specified) |
||||
|
|
||||
|
Examples: |
||||
|
Take a crash-consistent snapshot of domain 'vm1' named 'backup1': |
||||
|
${0##*/} snapshot -d vm1 -s backup1 |
||||
|
|
||||
|
Take a live snapshot of domains 'vm1' and 'vm2' in batch mode, named 'livebackup': |
||||
|
${0##*/} snapshot -l -b -d vm1 -d vm2 -s livebackup |
||||
|
|
||||
|
Revert domain 'vm1' to snapshot 'backup1': |
||||
|
${0##*/} revert -d vm1 -s backup1 |
||||
|
|
||||
|
List snapshots of domain 'vm1': |
||||
|
${0##*/} list -d vm1 |
||||
|
|
||||
|
List snapshots of all domains: |
||||
|
${0##*/} list |
||||
|
EOF |
||||
|
} |
||||
|
|
||||
|
# Initialize our own variables: |
||||
|
snapshot_name="" |
||||
|
domains=() |
||||
|
verbose=0 |
||||
|
action="" |
||||
|
batch=0 |
||||
|
live=0 |
||||
|
|
||||
|
# Try to get the action from the first positional argument |
||||
|
if [ -n "${1:-}" ] && [[ ! "${1:-}" =~ ^- ]]; then |
||||
|
action="${1:-}" |
||||
|
shift || true |
||||
|
fi |
||||
|
|
||||
|
OPTIND=1 # Reset in case getopts has been used previously in the shell. |
||||
|
|
||||
|
while getopts "h?blvd:s:" opt; do |
||||
|
case "$opt" in |
||||
|
h|\?) |
||||
|
show_help |
||||
|
exit 0 |
||||
|
;; |
||||
|
v) verbose=1 |
||||
|
;; |
||||
|
d) domains+=( "$OPTARG" ) |
||||
|
;; |
||||
|
s) snapshot_name="$OPTARG" |
||||
|
;; |
||||
|
b) batch=1 |
||||
|
;; |
||||
|
l) live=1 |
||||
|
;; |
||||
|
*) show_help >&2 |
||||
|
exit 1 |
||||
|
;; |
||||
|
esac |
||||
|
done |
||||
|
|
||||
|
shift $((OPTIND-1)) |
||||
|
|
||||
|
[ "${1:-}" = "--" ] && shift |
||||
|
|
||||
|
should_exit=0 |
||||
|
if [ $# -ne 0 ]; then |
||||
|
echo "Error: Unexpected positional arguments: $*" |
||||
|
should_exit=1 |
||||
|
fi |
||||
|
|
||||
|
case "$action" in |
||||
|
snapshot) |
||||
|
if [ ${#domains[@]} -eq 0 ] || [ -z "$snapshot_name" ]; then |
||||
|
echo "Error: Domain name(s) and snapshot name must be specified." |
||||
|
should_exit=1 |
||||
|
fi |
||||
|
|
||||
|
if [ "$batch" -eq 1 ] && [ "$live" -ne 1 ]; then |
||||
|
echo "Error: Batch mode requires live snapshot mode." |
||||
|
should_exit=1 |
||||
|
fi |
||||
|
|
||||
|
if [[ ! "$snapshot_name" =~ ^[a-zA-Z0-9._-]+$ ]]; then |
||||
|
echo "Error: Snapshot name '$snapshot_name' contains invalid characters. Only alphanumeric characters, dots (.), underscores (_) and hyphens (-) are allowed." |
||||
|
should_exit=1 |
||||
|
fi |
||||
|
;; |
||||
|
revert) |
||||
|
if [ ${#domains[@]} -eq 0 ] || [ -z "$snapshot_name" ]; then |
||||
|
echo "Error: Domain name(s) and snapshot name must be specified." |
||||
|
should_exit=1 |
||||
|
fi |
||||
|
|
||||
|
if [ "$live" -eq 1 ]; then |
||||
|
echo "Error: Live mode is only supported for the 'snapshot' action." |
||||
|
should_exit=1 |
||||
|
fi |
||||
|
;; |
||||
|
list) |
||||
|
;; |
||||
|
*) |
||||
|
echo "Error: Unsupported action '$action'." |
||||
|
should_exit=1 |
||||
|
;; |
||||
|
esac |
||||
|
|
||||
|
if [ $should_exit -ne 0 ]; then |
||||
|
echo |
||||
|
show_help >&2 |
||||
|
exit 1 |
||||
|
fi |
||||
|
|
||||
|
# Reports a verbose message to stdout if verbose mode is enabled. |
||||
|
function log_verbose () { |
||||
|
if [ "$verbose" -eq 1 ]; then |
||||
|
echo "$@" 2>&1 |
||||
|
fi |
||||
|
} |
||||
|
|
||||
|
# Reports a fatal error message to stderr and exits with a non-zero exit code. |
||||
|
function fatal () { |
||||
|
echo "Error: $@" 2>&1 |
||||
|
exit 1 |
||||
|
} |
||||
|
|
||||
|
# Reports an error message to stderr. |
||||
|
function error () { |
||||
|
echo "Error: $@" 2>&1 |
||||
|
} |
||||
|
|
||||
|
# Checks if the specified domain exists. |
||||
|
function domain_exists () { |
||||
|
local domain="$1" |
||||
|
if virsh dominfo "$domain" &> /dev/null; then |
||||
|
return 0 |
||||
|
else |
||||
|
return 1 |
||||
|
fi |
||||
|
} |
||||
|
|
||||
|
declare -A domain_params_cache=( ) |
||||
|
|
||||
|
# Performs various checks on the specified domain before taking or reverting a snapshot. |
||||
|
# All the checks are performed according to the specified action (snapshot or revert). |
||||
|
# Any errors are reported via stderr and the function returns a non-zero exit code. |
||||
|
function domain_checks () { |
||||
|
local action="$1" |
||||
|
local domain="$2" |
||||
|
local snapshot_name="$3" |
||||
|
local error=0 |
||||
|
local state="" |
||||
|
|
||||
|
if ! domain_exists "$domain"; then |
||||
|
error "Domain '$domain' does not exist." |
||||
|
return 1 # There is no point in continuing checks if the domain does not exist |
||||
|
fi |
||||
|
|
||||
|
# ZFS dataset checks |
||||
|
zfs_datasets=( $(get_zfs_datasets_from_domain "$domain") ) |
||||
|
if [ ${#zfs_datasets[@]} -ne 1 ]; then |
||||
|
error "$domain: Wrong number of ZFS datasets (${#zfs_datasets[@]}) found." ; error=1 |
||||
|
fi |
||||
|
zfs_dataset="${zfs_datasets[0]:-}" |
||||
|
|
||||
|
# Zvols checks |
||||
|
zfs_zvols=( $(get_zfs_zvols_from_domain "$domain") ) |
||||
|
for zvol in "${zfs_zvols[@]}"; do |
||||
|
# Check if zvol is a child of $zfs_dataset |
||||
|
if [[ "$zvol" != "$zfs_dataset"* ]]; then |
||||
|
error "$domain: ZFS zvol '$zvol' is not a child of dataset '$zfs_dataset'." ; error=1 |
||||
|
fi |
||||
|
done |
||||
|
|
||||
|
zfs_dataset_snapshots=( $(get_zfs_snapshots_from_dataset "${zfs_dataset}") ) |
||||
|
zfs_mountpoint=$(zfs get -H -o value mountpoint "${zfs_dataset}") |
||||
|
|
||||
|
if [ -z "$zfs_mountpoint" ] || [[ ! "$zfs_mountpoint" =~ ^/ ]]; then |
||||
|
error "$domain: Wrong ZFS mountpoint for dataset '$zfs_dataset': '$zfs_mountpoint'." ; error=1 |
||||
|
elif [ ! -d "$zfs_mountpoint" ]; then |
||||
|
error "$domain: ZFS mountpoint '$zfs_mountpoint' does not exist." ; error=1 |
||||
|
fi |
||||
|
|
||||
|
state=$(domain_state "$domain") |
||||
|
|
||||
|
case "$action" in |
||||
|
snapshot) |
||||
|
# Check domain state |
||||
|
if [ "$state" != "shut off" ] && [ "$state" != "running" ]; then |
||||
|
error "$domain: Domain must be either 'shut off' or 'running' to take a snapshot (current state: '$state')." ; error=1 |
||||
|
fi |
||||
|
|
||||
|
# Check if live snapshot requested on powered-off domain |
||||
|
if [ "$live" -eq 1 ] && [ "$state" != "running" ]; then |
||||
|
log_verbose "$domain: Live snapshot requested but domain is not running." |
||||
|
fi |
||||
|
|
||||
|
# Check if snapshot already exists |
||||
|
if printf '%s\n' "${zfs_dataset_snapshots[@]}" | grep -Fqx "$snapshot_name" ; then |
||||
|
error "$domain: Snapshot '$snapshot_name' already exists." ; error=1 |
||||
|
fi |
||||
|
for zvol in "${zfs_zvols[@]}"; do |
||||
|
zfs_zvol_snapshots=( $(get_zfs_snapshots_from_dataset "$zvol") ) |
||||
|
if printf '%s\n' "${zfs_zvol_snapshots[@]}" | grep -Fqx "$snapshot_name" ; then |
||||
|
error "$domain: Snapshot '$snapshot_name' already exists for ZFS zvol '$zvol'." ; error=1 |
||||
|
fi |
||||
|
done |
||||
|
|
||||
|
# Check if save file already exists for live snapshot |
||||
|
if [ -f "${zfs_mountpoint}/domain.save" ]; then |
||||
|
error "$domain: Save file '${zfs_mountpoint}/domain.save' already exists." ; error=1 |
||||
|
fi |
||||
|
;; |
||||
|
revert) |
||||
|
# Check domain state |
||||
|
if [ "$state" != "shut off" ]; then |
||||
|
error "$domain: Domain must be 'shut off' to revert a snapshot (current state: '$state')." ; error=1 |
||||
|
fi |
||||
|
|
||||
|
# Check if snapshot exists |
||||
|
if ! printf '%s\n' "${zfs_dataset_snapshots[@]}" | grep -Fqx "$snapshot_name" ; then |
||||
|
error "$domain: Snapshot '$snapshot_name' does not exist for domain '$domain'." ; error=1 |
||||
|
fi |
||||
|
for zvol in "${zfs_zvols[@]}"; do |
||||
|
zfs_zvol_snapshots=( $(get_zfs_snapshots_from_dataset "$zvol") ) |
||||
|
if ! printf '%s\n' "${zfs_zvol_snapshots[@]}" | grep -Fqx "$snapshot_name" ; then |
||||
|
error "$domain: Snapshot '$snapshot_name' does not exist for ZFS zvol '$zvol'." ; error=1 |
||||
|
fi |
||||
|
done |
||||
|
;; |
||||
|
*) |
||||
|
error "$domain: Unknown action '$action'." |
||||
|
;; |
||||
|
esac |
||||
|
|
||||
|
if [ $error -ne 0 ]; then |
||||
|
error "$domain: Domain checks failed." |
||||
|
return 1 |
||||
|
fi |
||||
|
|
||||
|
# Store those values in cache for later use |
||||
|
domain_params_cache["$domain"]=( "${state}" "${zfs_dataset}" "$zfs_mountpoint" "${zfs_zvols[*]}" ) |
||||
|
|
||||
|
return 0 |
||||
|
} |
||||
|
|
||||
|
# Gets the current state of the specified domain. |
||||
|
function domain_state () { |
||||
|
local domain="$1" |
||||
|
virsh domstate "$domain" |
||||
|
} |
||||
|
|
||||
|
# Gets the list of ZFS datasets used by the specified domain (excluding zvols) |
||||
|
function get_zfs_datasets_from_domain () { |
||||
|
local domain="$1" |
||||
|
virsh domblklist "$domain" --details | awk '$1 == "file" && $2 == "disk" { print $4 }' | while read -r file; do df --output=source "$file" | tail -n 1; done | sort | uniq |
||||
|
} |
||||
|
|
||||
|
# Gets the list of ZFS zvols used by the specified domain |
||||
|
function get_zfs_zvols_from_domain () { |
||||
|
local domain="$1" |
||||
|
virsh domblklist "$domain" --details | awk '$1 == "block" && $2 == "disk" && $4 ~ /^\/dev\/zvol\// { print gsub(/\/dev\/zvol\//, "", $4) }' |
||||
|
} |
||||
|
|
||||
|
# Gets the list of ZFS snapshots for the specified dataset. |
||||
|
function get_zfs_snapshots_from_dataset () { |
||||
|
local dataset="$1" |
||||
|
zfs list -H -t snapshot -o name "$dataset" | awk -F'@' '{print $2}' |
||||
|
} |
||||
|
|
||||
|
# Takes a live snapshot of the specified domain. |
||||
|
function take_live_snapshot () { |
||||
|
local domain="$1" |
||||
|
local snapshot="$2" |
||||
|
|
||||
|
log_verbose "$domain: Taking live snapshot '$snapshot'..." |
||||
|
zfs_dataset="${domain_params_cache["$domain"][1]}" |
||||
|
zfs_mountpoint="${domain_params_cache["$domain"][2]}" |
||||
|
virsh save "$domain" "${zfs_mountpoint}/domain.save" --running --verbose --image-format raw |
||||
|
zfs snapshot -r "${zfs_dataset}@${snapshot}" |
||||
|
} |
||||
|
|
||||
|
# Takes a crash-consistent snapshot of the specified domain. |
||||
|
function take_crash_consistent_snapshot () { |
||||
|
local domain="$1" |
||||
|
local snapshot="$2" |
||||
|
|
||||
|
log_verbose "$domain: Taking crash-consistent snapshot '$snapshot'..." |
||||
|
zfs_dataset="${domain_params_cache["$domain"][1]}" |
||||
|
zfs_mountpoint="${domain_params_cache["$domain"][2]}" |
||||
|
zfs snapshot -r "${zfs_dataset}@${snapshot}" |
||||
|
} |
||||
|
|
||||
|
# Reverts the specified snapshot for the given domain. |
||||
|
function revert_snapshot () { |
||||
|
local domain="$1" |
||||
|
local snapshot="$2" |
||||
|
|
||||
|
log_verbose "$domain: Reverting snapshot '$snapshot'..." |
||||
|
zfs_dataset="${domain_params_cache["$domain"][1]}" |
||||
|
zfs_mountpoint="${domain_params_cache["$domain"][2]}" |
||||
|
zfs list -H -r -o name "$zfs_dataset" | while read dataset; do |
||||
|
zfs rollback -Rrf "$dataset@$snapshot" |
||||
|
done |
||||
|
} |
||||
|
|
||||
|
# Restores a saved domain. |
||||
|
function restore_domain () { |
||||
|
local domain="$1" |
||||
|
|
||||
|
log_verbose "$domain: Restoring live snapshot..." |
||||
|
zfs_dataset="${domain_params_cache["$domain"][1]}" |
||||
|
zfs_mountpoint="${domain_params_cache["$domain"][2]}" |
||||
|
virsh_restore_opts=( ) |
||||
|
if [ "$batch" -eq 1 ]; then |
||||
|
virsh_restore_opts+=( "--paused" ) |
||||
|
else |
||||
|
virsh_restore_opts+=( "--running" ) |
||||
|
fi |
||||
|
virsh restore "${zfs_mountpoint}/domain.save" --verbose "${virsh_restore_opts[@]}" |
||||
|
} |
||||
|
|
||||
|
# Pauses all domains in the list. |
||||
|
function pause_all_domains () { |
||||
|
for domain in "${domains[@]}"; do |
||||
|
log_verbose "$domain: Pausing domain..." |
||||
|
state="${domain_params_cache["$domain"][0]}" |
||||
|
if [ "$state" == "running" ]; then |
||||
|
virsh suspend "$domain" |
||||
|
fi |
||||
|
done |
||||
|
} |
||||
|
|
||||
|
# Resumes all domains in the list. |
||||
|
function resume_all_domains () { |
||||
|
for domain in "${domains[@]}"; do |
||||
|
log_verbose "$domain: Resuming domain..." |
||||
|
state="${domain_params_cache["$domain"][0]}" |
||||
|
case "$(domain_state "$domain")" in |
||||
|
paused) |
||||
|
virsh resume "$domain" || true |
||||
|
;; |
||||
|
"shut off") |
||||
|
virsh start "$domain" || true |
||||
|
;; |
||||
|
*) |
||||
|
continue |
||||
|
;; |
||||
|
esac |
||||
|
done |
||||
|
} |
||||
|
|
||||
|
# Performs pre-flight checks for all specified domains according to the action. |
||||
|
function preflight_checks () { |
||||
|
local action="$1" |
||||
|
local error=0 |
||||
|
|
||||
|
for domain in "${domains[@]}"; do |
||||
|
log_verbose "$domain: Performing domain pre-flight checks for $action..." |
||||
|
if ! domain_checks "$action" "$domain" "$snapshot_name"; then |
||||
|
error=1 |
||||
|
fi |
||||
|
done |
||||
|
|
||||
|
return $error |
||||
|
} |
||||
|
|
||||
|
# Takes snapshots for all specified domains. |
||||
|
function take_snapshots () { |
||||
|
if [ "$batch" -eq 1 ]; then |
||||
|
pause_all_domains |
||||
|
fi |
||||
|
|
||||
|
for domain in "${domains[@]}"; do |
||||
|
state="${domain_params_cache["$domain"][0]}" |
||||
|
if [ "$live" -eq 1 ]; then |
||||
|
take_live_snapshot "$domain" "$snapshot_name" |
||||
|
restore_domain "$domain" |
||||
|
else |
||||
|
take_crash_consistent_snapshot "$domain" "$snapshot_name" |
||||
|
fi |
||||
|
done |
||||
|
|
||||
|
if [ "$batch" -eq 1 ]; then |
||||
|
resume_all_domains |
||||
|
fi |
||||
|
|
||||
|
return $error |
||||
|
} |
||||
|
|
||||
|
# Reverts snapshots for all specified domains. |
||||
|
function revert_snapshots () { |
||||
|
for domain in "${domains[@]}"; do |
||||
|
revert_snapshot "$domain" "$snapshot_name" |
||||
|
restore_domain "$domain" |
||||
|
done |
||||
|
|
||||
|
if [ "$batch" -eq 1 ]; then |
||||
|
resume_all_domains |
||||
|
fi |
||||
|
} |
||||
|
|
||||
|
# Lists snapshots for all specified domains. |
||||
|
function list_snapshots () { |
||||
|
local domains=( "$@" ) |
||||
|
local zfs_dataset="" |
||||
|
local zfs_mountpoint="" |
||||
|
|
||||
|
for domain in "${domains[@]}"; do |
||||
|
|
||||
|
zfs_datasets=( $(get_zfs_datasets_from_domain "$domain") ) |
||||
|
if [ ${#zfs_datasets[@]} -ne 1 ]; then |
||||
|
error "$domain: Wrong number of ZFS datasets (${#zfs_datasets[@]}) found." ; return 1 |
||||
|
fi |
||||
|
zfs_dataset="${zfs_datasets[0]:-}" |
||||
|
zfs_mountpoint=$(zfs get -H -o value mountpoint "${zfs_dataset}") |
||||
|
|
||||
|
echo "Snapshots for domain '$domain':" |
||||
|
zfs list -H -t snapshot -o name "$zfs_dataset" | awk -F'@' '{print $2}' |
||||
|
} |
||||
|
|
||||
|
preflight_checks "$action" || fatal "Pre-flight checks failed." |
||||
|
|
||||
|
case "$action" in |
||||
|
snapshot) |
||||
|
take_snapshots || fatal "Failed to take snapshots." |
||||
|
;; |
||||
|
revert) |
||||
|
revert_snapshots || fatal "Failed to revert snapshots." |
||||
|
;; |
||||
|
list) |
||||
|
if [ ${#domains[@]} -eq 0 ]; then |
||||
|
# Get all domains |
||||
|
mapfile -t domains < <(virsh list --all --name | grep -v '^$') |
||||
|
fi |
||||
|
list_snapshots "${domains[@]}" || fatal "Failed to list snapshots." |
||||
|
;; |
||||
|
*) |
||||
|
fatal "Unknown action '$action'." |
||||
|
;; |
||||
|
esac |
||||
|
|
||||
|
log_verbose "Operation '$action' completed successfully." |
||||
|
exit 0 |
||||
Loading…
Reference in new issue