Skip to content

Commit

Permalink
feat: implement backup status tracking and error handling in module-b…
Browse files Browse the repository at this point in the history
…ackup
  • Loading branch information
stephdl committed Nov 18, 2024
1 parent 23bbb6f commit 3cc5dab
Showing 1 changed file with 34 additions and 13 deletions.
47 changes: 34 additions & 13 deletions core/imageroot/usr/local/agent/bin/module-backup
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,25 @@ import os
import os.path
import subprocess
import time
import atexit

backup_status = {}
backup_status['total_size'] = 0
backup_status['total_file_count'] = 0
backup_status['snapshots_count'] = 0
errors = 0
time_start = int(time.time())
time_end = 0

def exit_handler():
wrdb = agent.redis_connect(privileged = True)
backup_status['errors'] = errors
backup_status['start'] = time_start
backup_status['end'] = time_end or int(time.time()) # If not set, use current time
wrdb.hset(f"module/{module_id}/backup_status/{backup_id}", mapping=backup_status)
wrdb.close()

atexit.register(exit_handler)

backup_id = int(sys.argv[1])
rdb = agent.redis_connect(host='127.0.0.1') # Connect to local replica
Expand Down Expand Up @@ -64,6 +83,7 @@ except PermissionError:
except Exception as ex:
print("[ERROR] module-dump-state failed.", ex, file=sys.stderr)
print("[ERROR] module-backup aborted.", file=sys.stderr)
errors += 1
sys.exit(1)

podman_args = [
Expand Down Expand Up @@ -101,11 +121,14 @@ if os.path.isfile(install_dir + "/etc/state-exclude.conf"):
if agent.run_restic(rdb, repository, repopath, [], ["snapshots"], stdout=subprocess.DEVNULL).returncode == 0:
print(f"Repository {repository} is present at path {repopath}", file=sys.stderr)
else:
print(f"Initializing repository {repository} at path {repopath}", file=sys.stderr)
agent.run_restic(rdb, repository, repopath, [], ["init"]).check_returncode()
try:
print(f"Initializing repository {repository} at path {repopath}", file=sys.stderr)
agent.run_restic(rdb, repository, repopath, [], ["init"]).check_returncode()
except subprocess.CalledProcessError as ex:
print("[ERROR] restic init failed.", ex, file=sys.stderr)
errors += 1
sys.exit(1)

time_start = int(time.time())
errors = 0
try:
# Run the backup
agent.run_restic(rdb, repository, repopath, podman_args, ["backup"] + backup_args).check_returncode()
Expand All @@ -114,20 +137,18 @@ try:
agent.run_restic(rdb, repository, repopath, [], ["forget", "--prune", "--keep-last=" + obackup['retention']]).check_returncode()

except subprocess.CalledProcessError as ex:
errors = 1
print("[ERROR] restic backup failed.", ex, file=sys.stderr)
errors += 1

# Advertise the backup status
time_end = int(time.time())
wrdb = agent.redis_connect(privileged = True)
backup_status = {}
stats_proc = agent.run_restic(rdb, repository, repopath, [], ["stats", "--json", "latest"], text=True, stdout=subprocess.PIPE)
if stats_proc.returncode == 0:
backup_status = json.loads(stats_proc.stdout)
backup_status['start'] = time_start
backup_status['end'] = time_end
backup_status['errors'] = errors
wrdb.hset(f"module/{module_id}/backup_status/{backup_id}", mapping=backup_status)
wrdb.close()
else:
print("[ERROR] restic stats failed.", file=sys.stderr)
errors += 1

time_end = int(time.time())

try:
ometa = {}
Expand Down

0 comments on commit 3cc5dab

Please sign in to comment.