diff --git a/btv b/btv index 309e091..206c75c 100755 --- a/btv +++ b/btv @@ -8,9 +8,14 @@ import datetime import json import os import shlex +import shutil import sys +import time +import socket +import urllib.request CONFIG = "/etc/btv/config.ini" +LOCKFILE = "/run/lock/btv/serialization.lock" # ------------------------------------------------------------------------------ # Global @@ -33,6 +38,20 @@ def chdir(new_dir): finally: os.chdir(previous_dir) +@contextmanager +def lockfile(path): + d = os.path.dirname(path) + + if not os.path.exists(d): + os.makedirs(d) + + f = open(path, "w") + + try: + yield + finally: + os.remove(path) + # ------------------------------------------------------------------------------ # Functions @@ -172,15 +191,17 @@ def serialize(snap, outdir, key, snap_from=None): Snap and snap_from are Snapshot objects. + A lockfile is kept for the duration of the process. + Returns 0 on success. """ ## prepare directories ## if snap_from: - name = "%s diff from %s" %(snap.name, snap_from.name) + name = "%s to %s" %(snap_from.name, snap.name) else: - name = "%s full" %(snap.name) + name = snap.name directory = os.path.join(outdir, name) os.makedirs(directory) @@ -196,44 +217,67 @@ def serialize(snap, outdir, key, snap_from=None): return 1 - ## serialize each subvolume or subvolume pair + ## serialization (most expensive) ## - for subvolume in snap.subvolumes: - if snap_from: - btrfs_send = 'btrfs send -p "%s" "%s"' %( - os.path.join(snap_from.path, subvolume), - os.path.join(snap.path, subvolume) - ) - else: - btrfs_send = 'btrfs send "%s"' %( - os.path.join(snap.path, subvolume) - ) - - error = os.system('%s | zstd | openssl enc -e -aes-256-cbc -pbkdf2 -salt -pass "file:%s" > "%s.btrfs.zst.aes"' %( - btrfs_send, - cfg.get("crypto", "keyfile"), - os.path.join(directory, subvolume) - )) - - if error: - print(" !! failed to serialize %s" %(subvolume)) - return error + with lockfile(LOCKFILE): + + for subvolume in snap.subvolumes: + if snap_from: + btrfs_send = 'btrfs send -p "%s" "%s"' %( + os.path.join(snap_from.path, subvolume), + os.path.join(snap.path, subvolume) + ) + else: + btrfs_send = 'btrfs send "%s"' %( + os.path.join(snap.path, subvolume) + ) + + error = os.system('%s | zstd | openssl enc -e -aes-256-cbc -pbkdf2 -salt -pass "file:%s" | hash-pipe sha512 "%s.btrfs.zst.aes" "%s/manifest.sha512" > "%s.btrfs.zst.aes"' %( + btrfs_send, + cfg.get("crypto", "keyfile"), + subvolume, + directory, + os.path.join(directory, subvolume) + )) + + if error: + print(" !! failed to serialize %s" %(subvolume)) + return error - ## calculate checksums and add a self-check executable - ## FIXME calculate this on the fly, re-reading is expensive - previous_wd = os.getcwd() - os.chdir(directory) - os.system('sha512sum "%s" > manifest.sha512' %( - '" "'.join("%s.btrfs.zst.aes" %(s) for s in snap.subvolumes) - )) + ## final touches + ## - with open("check-integrity.sh", "w") as f: + ## add self-check and unpack executables + with open(os.path.join(directory, "check-integrity.sh"), "w") as f: f.write("#! /bin/sh\n\nsha512sum --check manifest.sha512\n") + os.chmod(f.name, 0o500) - os.chmod("check-integrity.sh", 0o500) + unpack_path = os.path.join(directory, "unpack.sh") + shutil.copy("/usr/share/btv/unpack.sh", unpack_path) + os.chmod(unpack_path, 0o500) + + ## fix permissions and ownership of created objects + outdir_stat = os.stat(outdir) + os.chown(directory, outdir_stat.st_uid, outdir_stat.st_gid) + os.chmod(directory, 0o700) + + for file in os.listdir(directory): + path = os.path.join(directory, file) + os.chown(path, outdir_stat.st_uid, outdir_stat.st_gid) + + if path.endswith(".aes") or path.endswith(".sha512"): + os.chmod(path, 0o400) + + return 0 +def ping(url): + try: + urllib.request.urlopen(url, timeout=10) + except socket.error as e: + print("Ping failed: %s" %(e)) + # ------------------------------------------------------------------------------ # Verbs @@ -253,30 +297,37 @@ def do_create(args): ## determine the rank of the new snapshot ## - snaps_since_rank_1 = 1 - snaps_since_rank_2 = 1 - - for snap in list_snapshots(): - if snap.rank == 1: - snaps_since_rank_1 = 1 - else: - snaps_since_rank_1 += 1 + if "--rank2" in args: + snapshot.rank = 2 + else: + snaps_since_rank_1 = 1 + snaps_since_rank_2 = 1 - if snap.rank == 2: - snaps_since_rank_1 = 1 - snaps_since_rank_2 = 1 - else: - snaps_since_rank_2 += 1 - - ## promote the snapshot - if snaps_since_rank_2 >= cfg.getint("snap", "rank_2_interval"): - if "--process" in args: - snapshot.rank = 2 - else: - print("!!! Rank 2 snapshot is due, please enable --process") + for snap in list_snapshots(): + if snap.rank == 1: + snaps_since_rank_1 = 1 + else: + snaps_since_rank_1 += 1 + + if snap.rank == 2: + snaps_since_rank_1 = 1 + snaps_since_rank_2 = 1 + else: + snaps_since_rank_2 += 1 + + ## promote the snapshot + if snaps_since_rank_2 >= cfg.getint("snap", "rank_2_interval"): + if "--process" in args: + snapshot.rank = 2 + else: + print("!!! Rank 2 snapshot is due, please enable --process") + snapshot.rank = 1 + elif snaps_since_rank_1 >= cfg.getint("snap", "rank_1_interval"): snapshot.rank = 1 - elif snaps_since_rank_1 >= cfg.getint("snap", "rank_1_interval"): - snapshot.rank = 1 + + ping_url = cfg.get("monitoring", "rank2_start_url") + if snapshot.rank == 2 and ping_url: + ping(ping_url) ## create the snapshot itself ## @@ -299,7 +350,6 @@ def do_create(args): ## do optional processing ## if snapshot.rank == 2: - ## snapshot serialization # if there's a previous Rank 2 snapshot, compute a diff against it # if not or if the process fails, demote this snap to Rank 1 @@ -323,23 +373,39 @@ def do_create(args): else: print("!!! no previous Rank 2 snapshot, please create one using btv stream") snapshot.rank = 1 - - ## garbage collection - do_gc() ## save all snapshot metadata snapshot.dump() print(">>> Snapshot created: rank %d %s" %(snapshot.rank, snapshot.name)) + + ## garbage collection + do_gc() + + ping_url = cfg.get("monitoring", "rank2_end_url") + if snapshot.rank == 2 and ping_url: + ping(ping_url) def do_list(args): """ Print a list of existing snapshots. """ + snaps = list_snapshots() + + counts = [ + len([s for s in snaps if s.rank == 0]), + len([s for s in snaps if s.rank == 1]), + len([s for s in snaps if s.rank == 2]) + ] + + print(" > %d/%d Rank 0 snapshots" %(counts[0], cfg.getint("gc", "rank_0_count"))) + print(" > %d/%d Rank 1 snapshots" %(counts[1], cfg.getint("gc", "rank_1_count"))) + print(" > %d/%d Rank 2 snapshots" %(counts[2], cfg.getint("gc", "rank_2_count"))) + print() print("Rank Name") - for snap in list_snapshots(): + for snap in snaps: print(" %d %s %s" %(snap.rank, snap.name, ", ".join(snap.notes))) def do_drop(args): @@ -360,59 +426,151 @@ def do_drop(args): def do_stream(args): """ - Stream the snapshot args[0] into dir args[1]. + args are either - The affected snapshot is then promoted to Rank 2. + "diff" SNAPSHOT_FROM SNAPSHOT_TO OUTPUT_DIR + + or + + "full" SNAPSHOT OUTPUT_DIR + + SNAPSHOT_FROM must be Rank 2. + + Streams the full or diff snapshot into OUTPUT_DIR. + + The SNAPSHOT or SNAPSHOT_TO is then promoted to Rank 2. """ - snapshot = get_snapshot_by_name(args[0]) + ## args interpretation and validation + ## + if not args: + raise UsageError("no args") - if not snapshot: - print("!!! %s is not a snapshot" %(args[0])) + if args[0] == "diff": + if len(args) != 4: + raise UsageError("'stream diff' requires exactly 3 arguments") + + snap_from_name, snap_name, output_dir = args[1:] + snap_from = get_snapshot_by_name(snap_from_name) + + if not snap_from: + print("!!! %s is not a snapshot" %(snap_from_name)) + sys.exit(2) + + if snap_from.rank != 2: + print("!!! source snapshot must be Rank 2, %s is %d" %(snap_from.name, snap_from.rank)) + print(' > Hint: btv stream full %s "%s"' %(snap_from.name, output_dir)) + sys.exit(3) + + elif args[0] == "full": + if len(args) != 3: + raise UsageError("'stream full' requires exactly 2 arguments") + + snap_name, output_dir = args[1:] + snap_from = None + + else: + raise UsageError("'stream' type is either 'full' or 'diff'") + + snap = get_snapshot_by_name(snap_name) + + if not snap: + print("!!! %s is not a snapshot" %(snap_name)) sys.exit(2) - directory = args[1] - - if not os.path.isdir(directory): - print("!!! %s is not a directory" %(directory)) + if not os.path.isdir(output_dir): + print("!!! %s is not a directory" %(output_dir)) sys.exit(2) - print(">>> Serializing %s into %s" %(snapshot.name, directory)) + if snap_from and snap_from.name >= snap.name: + print("!!! source snapshot is younger than target snapshot") + sys.exit(3) + + ## serialization work + ## + if snap_from: + comment = "diff %s -> %s" %(snap_from.name, snap.name) + else: + comment = "full %s" %(snap.name) + + print(">>> Serializing %s into %s" %(comment, output_dir)) error = serialize( - snapshot, - directory, - cfg.get("crypto", "keyfile") + snap, + output_dir, + cfg.get("crypto", "keyfile"), + snap_from ) if error: print("!!! serialization failed") else: - snapshot.rank = 2 - snapshot.notes.add("fully streamed") - snapshot.dump() + snap.rank = 2 + snap.notes.add("manually streamed") - print("<<< %s serialized and promoted to Rank %d" %(snapshot.name, snapshot.rank)) + if not snap_from: + snap.notes.add("fully streamed") + snap.dump() + + print("<<< %s serialized and promoted to Rank 2" %(snap.name)) def do_gc(args=None): """ Drops old snapshots. + + If the only arg is "greedy", drops ALL snapshots except the youngest + Rank 2. If it's a number, drops that many oldest snapshots. """ - counts = [0, 0, 0] # Rank 0, 1, 2 - limits = [ - cfg.getint("gc", "rank_0_count"), - cfg.getint("gc", "rank_1_count"), - cfg.getint("gc", "rank_2_count") - ] - - for snap in reversed(list_snapshots()): - counts[snap.rank] += 1 - - if counts[snap.rank] > limits[snap.rank]: - print(" >> delete Rank %d %s" %(snap.rank, snap.name)) + if args: + if args[0] == "greedy": + newest = list_snapshots(2)[-1] - snap.drop() + if newest: + print(">>> Dropping all snapshots except the newest Rank 2 in 5 s...") + time.sleep(5) + + for snap in list_snapshots(): + if snap.name != newest.name: + snap.drop() + + else: + print("!!! no Rank 2 snapshot exists") + sys.exit(1) + + else: + try: + count = int(args[0]) + except ValueError: + print("!!! %s is not a count of snapshots to delete" %(args[0])) + sys.exit(1) + + snaps = list_snapshots()[:count] + + for snap in snaps: + print(" %d %s %s" %(snap.rank, snap.name, ", ".join(snap.notes))) + + print(">>> These snapshots will be dropped in 5 s...") + time.sleep(5) + + for snap in snaps: + snap.drop() + + else: + counts = [0, 0, 0] # Rank 0, 1, 2 + limits = [ + cfg.getint("gc", "rank_0_count"), + cfg.getint("gc", "rank_1_count"), + cfg.getint("gc", "rank_2_count") + ] + + for snap in reversed(list_snapshots()): + counts[snap.rank] += 1 + + if counts[snap.rank] > limits[snap.rank]: + print(" >> delete Rank %d %s" %(snap.rank, snap.name)) + + snap.drop() verb_router = { "snapshot": do_create, @@ -430,12 +588,17 @@ def print_help(): {cmd} VERB [args] Verbs: - snapshot [--process] - Create a snapshot. If --process is passed, do all optional work. + snapshot [--process] [--rank2] + Create a snapshot. If --process is passed, do all optional work. If + --rank2 is passed, the new snapshot is automatically promoted to Rank 2 + and --process is implied. - stream SNAPSHOT OUTPUT_DIR + stream full SNAPSHOT OUTPUT_DIR Streams the SNAPSHOT into OUTPUT_DIR as a full (milestone) bin. + stream diff SNAPSHOT_FROM SNAPSHOT_TO OUTPUT_DIR + Streams the difference between SNAPSHOTs into OUTPUT_DIR as a diff bin. + list List snapshots and their rank. @@ -444,6 +607,12 @@ Verbs: gc Drops old local snapshots based on garbage collector settings. + + gc greedy + Drop ALL snapshots except the newest Rank 2. + + gc COUNT + Drop COUNT oldest snapshots. """.format(cmd=sys.argv[0])) if __name__ == "__main__": @@ -460,7 +629,8 @@ if __name__ == "__main__": try: verb_router[verb](sys.argv[2:]) - except UsageError: + except UsageError as e: + print("!!! %s" %(e)) print() print_help() sys.exit(1) diff --git a/cfg/config.ini b/cfg/config.ini index f5b72b0..f3dc5a9 100644 --- a/cfg/config.ini +++ b/cfg/config.ini @@ -30,9 +30,15 @@ dir = /mnt/pool/subvol/_backup [gc] # How many snapshots of each Rank to keep around. -# Note: pruning is only performed when a new Rank 2 snapshot is attempted. # Default values are 25, 36, and 42, i.e. 5 hours for Rank 0, 2 days for Rank 1, # and a week for Rank 2. rank_0_count=25 rank_1_count=36 rank_2_count=42 + +[monitoring] +# GETs this URL before starting a rank2 snapdhot +rank2_start_url = + +# GETs this URL after successfully completing a rank2 snapshot +rank2_end_url = diff --git a/systemd/btv-backup.service b/systemd/btv-backup.service index aa1354c..d63bec8 100644 --- a/systemd/btv-backup.service +++ b/systemd/btv-backup.service @@ -1,5 +1,6 @@ [Unit] -Description=Do a filesystem snapshot, optionally offsite backup +Description=Filesystem snapshot (with offsite backup) +ConditionPathExists=!/run/lock/btv/serialization.lock [Service] Type=oneshot diff --git a/unpack.sh b/unpack.sh new file mode 100644 index 0000000..f78a8ba --- /dev/null +++ b/unpack.sh @@ -0,0 +1,24 @@ +#! /bin/zsh + +TIMESTAMP=($(basename "$(pwd)")) +OUTDIR="$1" +KEYFILE="$2" + +function die { + >&2 echo "$2" + exit $1 +} + +[[ "$0" != "./unpack.sh" ]] && die 1 "This can only be executed from the snapshot directory itself." +[[ ! -d "$OUTDIR" ]] && die 1 "The first argument must be a directory to unpack subvolumes into." +[[ ! -f "$KEYFILE" ]] && die 1 "The second argument must be a readable keyfile." +./check-integrity.sh || die 2 "This snapshot failed integrity checks." + +### end of checks + +for ARCHIVE in *btrfs.zst.aes +do + openssl enc -d -aes-256-cbc -pbkdf2 -salt -pass "file:$KEYFILE" < "$ARCHIVE" | zstd -d | btrfs receive "$OUTDIR" || die 3 "Failed to unpack subvolume." + SUBVOL_NAME=${ARCHIVE%%.btrfs.zst.aes} + mv "${OUTDIR}/${SUBVOL_NAME}" "${OUTDIR}/${SUBVOL_NAME}.${TIMESTAMP[1]}" || die 4 "Failed to rename subvolume." +done