backup-bot-two/backupbot.py

329 lines
12 KiB
Python
Raw Permalink Normal View History

2023-09-06 23:41:03 +00:00
#!/usr/bin/python3
import os
import click
import json
import subprocess
import logging
import docker
import restic
import tarfile
import io
2023-10-03 20:04:15 +00:00
from datetime import datetime, timezone
2023-09-06 23:41:03 +00:00
from restic.errors import ResticFailedError
from pathlib import Path
from shutil import copyfile, rmtree
2023-09-22 14:50:45 +00:00
# logging.basicConfig(level=logging.INFO)
2023-09-06 23:41:03 +00:00
VOLUME_PATH = "/var/lib/docker/volumes/"
SECRET_PATH = '/secrets/'
2023-09-06 23:41:03 +00:00
SERVICE = None
2023-09-22 14:50:45 +00:00
2023-09-06 23:41:03 +00:00
@click.group()
@click.option('-l', '--log', 'loglevel')
@click.option('service', '--host', '-h', envvar='SERVICE')
@click.option('repository', '--repo', '-r', envvar='RESTIC_REPOSITORY', required=True)
2023-09-07 11:09:25 +00:00
def cli(loglevel, service, repository):
2023-09-06 23:41:03 +00:00
global SERVICE
if service:
2023-09-22 14:50:45 +00:00
SERVICE = service.replace('.', '_')
2023-10-03 20:39:06 +00:00
if repository:
os.environ['RESTIC_REPOSITORY'] = repository
2023-09-06 23:41:03 +00:00
if loglevel:
numeric_level = getattr(logging, loglevel.upper(), None)
if not isinstance(numeric_level, int):
raise ValueError('Invalid log level: %s' % loglevel)
logging.basicConfig(level=numeric_level)
2023-09-07 11:09:25 +00:00
export_secrets()
2023-10-03 20:39:06 +00:00
init_repo()
2023-09-06 23:41:03 +00:00
2023-10-03 20:39:06 +00:00
def init_repo():
repo = os.environ['RESTIC_REPOSITORY']
2023-10-03 20:39:06 +00:00
logging.debug(f"set restic repository location: {repo}")
restic.repository = repo
2023-09-06 23:41:03 +00:00
restic.password_file = '/var/run/secrets/restic_password'
try:
restic.cat.config()
except ResticFailedError as error:
if 'unable to open config file' in str(error):
result = restic.init()
logging.info(f"Initialized restic repo: {result}")
else:
raise error
2023-09-22 14:50:45 +00:00
2023-09-06 23:41:03 +00:00
def export_secrets():
for env in os.environ:
2023-10-03 20:39:06 +00:00
if env.endswith('FILE') and not "COMPOSE_FILE" in env:
2023-09-06 23:41:03 +00:00
logging.debug(f"exported secret: {env}")
with open(os.environ[env]) as file:
2023-10-12 10:50:10 +00:00
secret = file.read()
2023-10-03 20:39:06 +00:00
os.environ[env.removesuffix('_FILE')] = secret
# logging.debug(f"Read secret value: {secret}")
2023-09-06 23:41:03 +00:00
2023-09-22 14:50:45 +00:00
2023-09-06 23:41:03 +00:00
@cli.command()
def create():
pre_commands, post_commands, backup_paths, apps = get_backup_cmds()
copy_secrets(apps)
backup_paths.append(SECRET_PATH)
2023-09-06 23:41:03 +00:00
run_commands(pre_commands)
backup_volumes(backup_paths, apps)
run_commands(post_commands)
2023-09-22 14:50:45 +00:00
2023-09-06 23:41:03 +00:00
def get_backup_cmds():
client = docker.from_env()
2023-09-22 14:50:45 +00:00
container_by_service = {
c.labels['com.docker.swarm.service.name']: c for c in client.containers.list()}
2023-09-06 23:41:03 +00:00
backup_paths = set()
backup_apps = set()
pre_commands = {}
post_commands = {}
services = client.services.list()
for s in services:
labels = s.attrs['Spec']['Labels']
if (backup := labels.get('backupbot.backup')) and bool(backup):
stack_name = labels['com.docker.stack.namespace']
# Remove this lines to backup only a specific service
# This will unfortenately decrease restice performance
# if SERVICE and SERVICE != stack_name:
# continue
2023-09-06 23:41:03 +00:00
backup_apps.add(stack_name)
2023-10-19 21:15:24 +00:00
backup_paths = backup_paths.union(
Path(VOLUME_PATH).glob(f"{stack_name}_*"))
if not (container:= container_by_service.get(s.name)):
2023-09-22 14:50:45 +00:00
logging.error(
2023-09-22 16:33:42 +00:00
f"Container {s.name} is not running, hooks can not be executed")
2023-10-19 21:15:24 +00:00
continue
2023-09-22 14:50:45 +00:00
if prehook := labels.get('backupbot.backup.pre-hook'):
2023-09-06 23:41:03 +00:00
pre_commands[container] = prehook
2023-09-22 14:50:45 +00:00
if posthook := labels.get('backupbot.backup.post-hook'):
2023-09-06 23:41:03 +00:00
post_commands[container] = posthook
return pre_commands, post_commands, list(backup_paths), list(backup_apps)
2023-09-22 14:50:45 +00:00
def copy_secrets(apps):
#TODO: check if it is deployed
rmtree(SECRET_PATH, ignore_errors=True)
os.mkdir(SECRET_PATH)
client = docker.from_env()
2023-09-22 14:50:45 +00:00
container_by_service = {
c.labels['com.docker.swarm.service.name']: c for c in client.containers.list()}
services = client.services.list()
for s in services:
app_name = s.attrs['Spec']['Labels']['com.docker.stack.namespace']
2023-09-22 14:50:45 +00:00
if (app_name in apps and
2023-10-03 16:19:43 +00:00
(app_secs := s.attrs['Spec']['TaskTemplate']['ContainerSpec'].get('Secrets'))):
2023-09-22 14:50:45 +00:00
if not container_by_service.get(s.name):
logging.error(
2023-09-22 16:33:42 +00:00
f"Container {s.name} is not running, secrets can not be copied.")
2023-09-22 14:50:45 +00:00
continue
container_id = container_by_service[s.name].id
for sec in app_secs:
src = f'/var/lib/docker/containers/{container_id}/mounts/secrets/{sec["SecretID"]}'
if not Path(src).exists():
logging.error(f"For the secret {sec['SecretName']} the file {src} does not exist for {s.name}")
continue
2023-09-22 14:50:45 +00:00
dst = SECRET_PATH + sec['SecretName']
copyfile(src, dst)
2023-09-06 23:41:03 +00:00
def run_commands(commands):
for container, command in commands.items():
if not command:
continue
# Remove bash/sh wrapping
command = command.removeprefix('bash -c').removeprefix('sh -c')
# Remove quotes surrounding the command
if (len(command) >= 2 and command[0] == command[-1] and (command[0] == "'" or command[0] == '"')):
command[1:-1]
2023-09-06 23:41:03 +00:00
# Use bash's pipefail to return exit codes inside a pipe to prevent silent failure
command = f"bash -c 'set -o pipefail;{command}'"
2023-10-11 15:46:27 +00:00
logging.info(f"run command in {container.name}:")
2023-09-06 23:41:03 +00:00
logging.info(command)
2023-10-11 15:46:27 +00:00
result = container.exec_run(command)
2023-09-06 23:41:03 +00:00
if result.exit_code:
2023-09-22 14:50:45 +00:00
logging.error(
f"Failed to run command {command} in {container.name}: {result.output.decode()}")
2023-09-06 23:41:03 +00:00
else:
logging.info(result.output.decode())
2023-09-22 14:50:45 +00:00
2023-09-06 23:41:03 +00:00
def backup_volumes(backup_paths, apps, dry_run=False):
2023-10-25 11:37:06 +00:00
try:
result = restic.backup(backup_paths, dry_run=dry_run, tags=apps)
print(result)
logging.info(result)
except ResticFailedError as error:
logging.error(f"Backup failed for {apps}. Could not Backup these paths: {backup_paths}")
logging.error(error)
exit(1)
2023-09-06 23:41:03 +00:00
2023-09-22 14:50:45 +00:00
2023-09-06 23:41:03 +00:00
@cli.command()
2023-09-07 00:03:11 +00:00
@click.option('snapshot', '--snapshot', '-s', envvar='SNAPSHOT', default='latest')
2023-09-07 11:10:10 +00:00
@click.option('target', '--target', '-t', envvar='TARGET', default='/')
2023-10-03 20:04:15 +00:00
@click.option('noninteractive', '--noninteractive', envvar='NONINTERACTIVE', default=False)
def restore(snapshot, target, noninteractive):
2023-09-07 11:10:10 +00:00
# Todo: recommend to shutdown the container
service_paths = VOLUME_PATH
if SERVICE:
service_paths = service_paths + f'{SERVICE}_*'
2023-10-03 20:04:15 +00:00
snapshots = restic.snapshots(snapshot_id=snapshot)
if not snapshot:
logging.error("No Snapshots with ID {snapshots}")
exit(1)
if not noninteractive:
snapshot_date = datetime.fromisoformat(snapshots[0]['time'])
delta = datetime.now(tz=timezone.utc) - snapshot_date
2023-10-12 10:50:10 +00:00
print(
f"You are going to restore Snapshot {snapshot} of {service_paths} at {target}")
2023-10-03 20:04:15 +00:00
print(f"This snapshot is {delta} old")
2023-10-12 10:50:10 +00:00
print(
f"THIS COMMAND WILL IRREVERSIBLY OVERWRITES {target}{service_paths.removeprefix('/')}")
2023-10-03 20:04:15 +00:00
prompt = input("Type YES (uppercase) to continue: ")
if prompt != 'YES':
logging.error("Restore aborted")
exit(1)
print(f"Restoring Snapshot {snapshot} of {service_paths} at {target}")
2023-09-22 14:50:45 +00:00
result = restic.restore(snapshot_id=snapshot,
include=service_paths, target_dir=target)
2023-09-07 11:10:10 +00:00
logging.debug(result)
2023-09-06 23:41:03 +00:00
@cli.command()
def snapshots():
snapshots = restic.snapshots()
no_snapshots = True
2023-09-06 23:41:03 +00:00
for snap in snapshots:
2023-09-22 14:50:45 +00:00
if not SERVICE or (tags := snap.get('tags')) and SERVICE in tags:
2023-09-06 23:41:03 +00:00
print(snap['time'], snap['id'])
no_snapshots = False
if no_snapshots:
err_msg = "No Snapshots found"
if SERVICE:
err_msg += f' for app {SERVICE}'
logging.warning(err_msg)
2023-09-06 23:41:03 +00:00
2023-09-22 14:50:45 +00:00
2023-09-06 23:41:03 +00:00
@cli.command()
2023-09-07 00:03:11 +00:00
@click.option('snapshot', '--snapshot', '-s', envvar='SNAPSHOT', default='latest')
2023-09-06 23:41:03 +00:00
@click.option('path', '--path', '-p', envvar='INCLUDE_PATH')
def ls(snapshot, path):
results = list_files(snapshot, path)
for r in results:
if r.get('path'):
print(f"{r['ctime']}\t{r['path']}")
2023-09-22 14:50:45 +00:00
2023-09-06 23:41:03 +00:00
def list_files(snapshot, path):
cmd = restic.cat.base_command() + ['ls']
if SERVICE:
cmd = cmd + ['--tag', SERVICE]
cmd.append(snapshot)
2023-09-06 23:41:03 +00:00
if path:
cmd.append(path)
try:
output = restic.internal.command_executor.execute(cmd)
except ResticFailedError as error:
if 'no snapshot found' in str(error):
err_msg = f'There is no snapshot {snapshot}'
if SERVICE:
err_msg += f'for the app {SERVICE}'
logging.error(err_msg)
exit(1)
else:
2023-10-12 10:50:10 +00:00
raise error
2023-09-06 23:41:03 +00:00
output = output.replace('}\n{', '}|{')
results = list(map(json.loads, output.split('|')))
return results
2023-09-22 14:50:45 +00:00
2023-09-06 23:41:03 +00:00
@cli.command()
2023-09-07 00:03:11 +00:00
@click.option('snapshot', '--snapshot', '-s', envvar='SNAPSHOT', default='latest')
2023-09-06 23:41:03 +00:00
@click.option('path', '--path', '-p', envvar='INCLUDE_PATH')
@click.option('volumes', '--volumes', '-v', is_flag=True)
@click.option('secrets', '--secrets', '-c', is_flag=True)
def download(snapshot, path, volumes, secrets):
file_dumps = []
if not any([path, volumes, secrets]):
volumes = secrets = True
if path:
path = path.removesuffix('/')
binary_output = dump(snapshot, path)
files = list_files(snapshot, path)
filetype = [f.get('type') for f in files if f.get('path') == path][0]
filename = Path(path).name
if filetype == 'dir':
filename = filename + ".tar"
tarinfo = tarfile.TarInfo(name=filename)
tarinfo.size = len(binary_output)
file_dumps.append((binary_output, tarinfo))
if volumes:
if not SERVICE:
logging.error("Please specify '--host' when using '--volumes'")
exit(1)
files = list_files(snapshot, VOLUME_PATH)
for f in files[1:]:
2023-10-12 10:50:10 +00:00
path = f['path']
if Path(path).name.startswith(SERVICE) and f['type'] == 'dir':
binary_output = dump(snapshot, path)
filename = f"{Path(path).name}.tar"
tarinfo = tarfile.TarInfo(name=filename)
tarinfo.size = len(binary_output)
file_dumps.append((binary_output, tarinfo))
if secrets:
if not SERVICE:
logging.error("Please specify '--host' when using '--secrets'")
exit(1)
filename = f"{SERVICE}.json"
files = list_files(snapshot, SECRET_PATH)
secrets = {}
for f in files[1:]:
2023-10-12 10:50:10 +00:00
path = f['path']
if Path(path).name.startswith(SERVICE) and f['type'] == 'file':
secret = dump(snapshot, path).decode()
2023-10-04 16:48:06 +00:00
secret_name = path.removeprefix(f'{SECRET_PATH}{SERVICE}_')
secrets[secret_name] = secret
binary_output = json.dumps(secrets).encode()
tarinfo = tarfile.TarInfo(name=filename)
tarinfo.size = len(binary_output)
file_dumps.append((binary_output, tarinfo))
with tarfile.open('/tmp/backup.tar.gz', "w:gz") as tar:
print(f"Writing files to /tmp/backup.tar.gz...")
for binary_output, tarinfo in file_dumps:
tar.addfile(tarinfo, fileobj=io.BytesIO(binary_output))
size = get_formatted_size('/tmp/backup.tar.gz')
print(f"Backup has been written to /tmp/backup.tar.gz with a size of {size}")
def get_formatted_size(file_path):
file_size = os.path.getsize(file_path)
units = ['Bytes', 'KB', 'MB', 'GB', 'TB']
for unit in units:
if file_size < 1024:
return f"{round(file_size, 3)} {unit}"
file_size /= 1024
return f"{round(file_size, 3)} {units[-1]}"
2023-10-12 10:50:10 +00:00
def dump(snapshot, path):
cmd = restic.cat.base_command() + ['dump']
if SERVICE:
cmd = cmd + ['--tag', SERVICE]
2023-10-12 10:50:10 +00:00
cmd = cmd + [snapshot, path]
print(f"Dumping {path} from snapshot '{snapshot}'")
output = subprocess.run(cmd, capture_output=True)
if output.returncode:
2023-10-12 10:50:10 +00:00
logging.error(
f"error while dumping {path} from snapshot '{snapshot}': {output.stderr}")
exit(1)
return output.stdout
2023-09-06 23:41:03 +00:00
if __name__ == '__main__':
cli()