backup-bot-two/backupbot.py

275 lines
10 KiB
Python
Executable File

#!/usr/bin/python3
import os
import click
import json
import subprocess
import logging
import docker
import restic
from datetime import datetime, timezone
from restic.errors import ResticFailedError
from pathlib import Path
from shutil import copyfile, rmtree
# logging.basicConfig(level=logging.INFO)
VOLUME_PATH = "/var/lib/docker/volumes/"
SECRET_PATH = '/secrets/'
SERVICE = None
@click.group()
@click.option('-l', '--log', 'loglevel')
@click.option('service', '--host', '-h', envvar='SERVICE')
@click.option('repository', '--repo', '-r', envvar='RESTIC_REPO', required=True)
def cli(loglevel, service, repository):
global SERVICE
if service:
SERVICE = service.replace('.', '_')
if repository:
os.environ['RESTIC_REPO'] = repository
if loglevel:
numeric_level = getattr(logging, loglevel.upper(), None)
if not isinstance(numeric_level, int):
raise ValueError('Invalid log level: %s' % loglevel)
logging.basicConfig(level=numeric_level)
export_secrets()
init_repo()
def init_repo():
repo = os.environ['RESTIC_REPO']
logging.debug(f"set restic repository location: {repo}")
restic.repository = repo
restic.password_file = '/var/run/secrets/restic_password'
try:
restic.cat.config()
except ResticFailedError as error:
if 'unable to open config file' in str(error):
result = restic.init()
logging.info(f"Initialized restic repo: {result}")
else:
raise error
def export_secrets():
for env in os.environ:
if env.endswith('FILE') and not "COMPOSE_FILE" in env:
logging.debug(f"exported secret: {env}")
with open(os.environ[env]) as file:
secret = file.read()
os.environ[env.removesuffix('_FILE')] = secret
# logging.debug(f"Read secret value: {secret}")
@cli.command()
def create():
pre_commands, post_commands, backup_paths, apps = get_backup_cmds()
copy_secrets(apps)
backup_paths.append(SECRET_PATH)
run_commands(pre_commands)
backup_volumes(backup_paths, apps)
run_commands(post_commands)
def get_backup_cmds():
client = docker.from_env()
container_by_service = {
c.labels['com.docker.swarm.service.name']: c for c in client.containers.list()}
backup_paths = set()
backup_apps = set()
pre_commands = {}
post_commands = {}
services = client.services.list()
for s in services:
labels = s.attrs['Spec']['Labels']
if (backup := labels.get('backupbot.backup')) and bool(backup):
stack_name = labels['com.docker.stack.namespace']
if SERVICE and SERVICE != stack_name:
continue
backup_apps.add(stack_name)
container = container_by_service.get(s.name)
if not container:
logging.error(
f"Container {s.name} is not running, hooks can not be executed")
if prehook := labels.get('backupbot.backup.pre-hook'):
pre_commands[container] = prehook
if posthook := labels.get('backupbot.backup.post-hook'):
post_commands[container] = posthook
backup_paths = backup_paths.union(
Path(VOLUME_PATH).glob(f"{stack_name}_*"))
return pre_commands, post_commands, list(backup_paths), list(backup_apps)
def copy_secrets(apps):
rmtree(SECRET_PATH, ignore_errors=True)
os.mkdir(SECRET_PATH)
client = docker.from_env()
container_by_service = {
c.labels['com.docker.swarm.service.name']: c for c in client.containers.list()}
services = client.services.list()
for s in services:
app_name = s.attrs['Spec']['Labels']['com.docker.stack.namespace']
if (app_name in apps and
(app_secs := s.attrs['Spec']['TaskTemplate']['ContainerSpec'].get('Secrets'))):
if not container_by_service.get(s.name):
logging.error(
f"Container {s.name} is not running, secrets can not be copied.")
continue
container_id = container_by_service[s.name].id
for sec in app_secs:
src = f'/var/lib/docker/containers/{container_id}/mounts/secrets/{sec["SecretID"]}'
dst = SECRET_PATH + sec['SecretName']
copyfile(src, dst)
def run_commands(commands):
for container, command in commands.items():
if not command:
continue
# Use bash's pipefail to return exit codes inside a pipe to prevent silent failure
command = command.removeprefix('bash -c \'').removeprefix('sh -c \'')
command = command.removesuffix('\'')
command = f"bash -c 'set -o pipefail;{command}'"
result = container.exec_run(command)
logging.info(f"run command in {container.name}")
logging.info(command)
if result.exit_code:
logging.error(
f"Failed to run command {command} in {container.name}: {result.output.decode()}")
else:
logging.info(result.output.decode())
def backup_volumes(backup_paths, apps, dry_run=False):
result = restic.backup(backup_paths, dry_run=dry_run, tags=apps)
print(result)
logging.info(result)
@cli.command()
@click.option('snapshot', '--snapshot', '-s', envvar='SNAPSHOT', default='latest')
@click.option('target', '--target', '-t', envvar='TARGET', default='/')
@click.option('noninteractive', '--noninteractive', envvar='NONINTERACTIVE', default=False)
def restore(snapshot, target, noninteractive):
# Todo: recommend to shutdown the container
service_paths = VOLUME_PATH
if SERVICE:
service_paths = service_paths + f'{SERVICE}_*'
snapshots = restic.snapshots(snapshot_id=snapshot)
if not snapshot:
logging.error("No Snapshots with ID {snapshots}")
exit(1)
if not noninteractive:
snapshot_date = datetime.fromisoformat(snapshots[0]['time'])
delta = datetime.now(tz=timezone.utc) - snapshot_date
print(f"You are going to restore Snapshot {snapshot} of {service_paths} at {target}")
print(f"This snapshot is {delta} old")
print(f"THIS COMMAND WILL IRREVERSIBLY OVERWRITES {target}{service_paths.removeprefix('/')}")
prompt = input("Type YES (uppercase) to continue: ")
if prompt != 'YES':
logging.error("Restore aborted")
exit(1)
print(f"Restoring Snapshot {snapshot} of {service_paths} at {target}")
result = restic.restore(snapshot_id=snapshot,
include=service_paths, target_dir=target)
logging.debug(result)
@cli.command()
def snapshots():
snapshots = restic.snapshots()
for snap in snapshots:
if not SERVICE or (tags := snap.get('tags')) and SERVICE in tags:
print(snap['time'], snap['id'])
@cli.command()
@click.option('snapshot', '--snapshot', '-s', envvar='SNAPSHOT', default='latest')
@click.option('path', '--path', '-p', envvar='INCLUDE_PATH')
def ls(snapshot, path):
results = list_files(snapshot, path)
for r in results:
if r.get('path'):
print(f"{r['ctime']}\t{r['path']}")
def list_files(snapshot, path):
cmd = restic.cat.base_command() + ['ls']
if SERVICE:
cmd = cmd + ['--tag', SERVICE]
cmd.append(snapshot)
if path:
cmd.append(path)
output = restic.internal.command_executor.execute(cmd)
output = output.replace('}\n{', '}|{')
results = list(map(json.loads, output.split('|')))
return results
@cli.command()
@click.option('snapshot', '--snapshot', '-s', envvar='SNAPSHOT', default='latest')
@click.option('path', '--path', '-p', envvar='INCLUDE_PATH')
@click.option('volumes', '--volumes', '-v', is_flag=True)
@click.option('secrets', '--secrets', '-c', is_flag=True)
def download(snapshot, path, volumes, secrets):
if sum(map(bool, [path, volumes, secrets])) != 1:
logging.error("Please specify exactly one of '--path', '--volumes', '--secrets'")
exit(1)
if path:
path = path.removesuffix('/')
files = list_files(snapshot, path)
filetype = [f.get('type') for f in files if f.get('path') == path][0]
filename = "/tmp/" + Path(path).name
if filetype == 'dir':
filename = filename + ".tar"
output = dump(snapshot, path)
with open(filename, "wb") as file:
file.write(output)
print(filename)
elif volumes:
if not SERVICE:
logging.error("Please specify '--host' when using '--volumes'")
exit(1)
filename = f"/tmp/{SERVICE}.tar"
files = list_files(snapshot, VOLUME_PATH)
for f in files[1:]:
path = f[ 'path' ]
if SERVICE in path and f['type'] == 'dir':
content = dump(snapshot, path)
# Concatenate tar files (extract with tar -xi)
with open(filename, "ab") as file:
file.write(content)
elif secrets:
if not SERVICE:
logging.error("Please specify '--host' when using '--secrets'")
exit(1)
filename = f"/tmp/SECRETS_{SERVICE}.json"
files = list_files(snapshot, SECRET_PATH)
secrets = {}
for f in files[1:]:
path = f[ 'path' ]
if SERVICE in path and f['type'] == 'file':
secret = dump(snapshot, path).decode()
secret_name = path.removeprefix(f'{SECRET_PATH}{SERVICE}_')
secrets[secret_name] = secret
with open(filename, "w") as file:
json.dump(secrets, file)
print(filename)
def dump(snapshot, path):
cmd = restic.cat.base_command() + ['dump']
if SERVICE:
cmd = cmd + ['--tag', SERVICE]
cmd = cmd +[snapshot, path]
logging.debug(f"Dumping {path} from snapshot '{snapshot}'")
output = subprocess.run(cmd, capture_output=True)
if output.returncode:
logging.error(f"error while dumping {path} from snapshot '{snapshot}': {output.stderr}")
exit(1)
return output.stdout
if __name__ == '__main__':
cli()