From 15275b257192ff49a96a64600ee5df7d0566506f Mon Sep 17 00:00:00 2001 From: Moritz Date: Thu, 23 Nov 2023 18:44:17 +0100 Subject: [PATCH] structured json logging with -m flag --- backupbot.py | 87 ++++++++++++++++++++++++++++++++++----------------- entrypoint.sh | 4 +-- 2 files changed, 60 insertions(+), 31 deletions(-) diff --git a/backupbot.py b/backupbot.py index 1ffc739..88c1557 100755 --- a/backupbot.py +++ b/backupbot.py @@ -1,6 +1,7 @@ #!/usr/bin/python3 import os +import sys import click import json import subprocess @@ -9,22 +10,40 @@ import docker import restic import tarfile import io +from pythonjsonlogger import jsonlogger from datetime import datetime, timezone from restic.errors import ResticFailedError from pathlib import Path from shutil import copyfile, rmtree -# logging.basicConfig(level=logging.INFO) VOLUME_PATH = "/var/lib/docker/volumes/" SECRET_PATH = '/secrets/' SERVICE = None +logger = logging.getLogger("backupbot") +logging.addLevelName(55, 'SUMMARY') +setattr(logging, 'SUMMARY', 55) +setattr(logger, 'summary', lambda message, *args, ** + kwargs: logger.log(55, message, *args, **kwargs)) + + +def handle_exception(exc_type, exc_value, exc_traceback): + if issubclass(exc_type, KeyboardInterrupt): + sys.__excepthook__(exc_type, exc_value, exc_traceback) + return + logger.critical("Uncaught exception", exc_info=( + exc_type, exc_value, exc_traceback)) + + +sys.excepthook = handle_exception + @click.group() @click.option('-l', '--log', 'loglevel') +@click.option('-m', '--machine-logs', 'machine_logs', is_flag=True) @click.option('service', '--host', '-h', envvar='SERVICE') @click.option('repository', '--repo', '-r', envvar='RESTIC_REPOSITORY', required=True) -def cli(loglevel, service, repository): +def cli(loglevel, service, repository, machine_logs): global SERVICE if service: SERVICE = service.replace('.', '_') @@ -34,14 +53,21 @@ def cli(loglevel, service, repository): numeric_level = getattr(logging, loglevel.upper(), None) if not isinstance(numeric_level, int): raise ValueError('Invalid log level: %s' % loglevel) - logging.basicConfig(level=numeric_level) + logger.setLevel(numeric_level) + if machine_logs: + logHandler = logging.StreamHandler() + formatter = jsonlogger.JsonFormatter( + "%(levelname)s %(filename)s %(lineno)s %(process)d %(message)s", rename_fields={"levelname": "message_type"}) + logHandler.setFormatter(formatter) + logger.addHandler(logHandler) + export_secrets() init_repo() def init_repo(): repo = os.environ['RESTIC_REPOSITORY'] - logging.debug(f"set restic repository location: {repo}") + logger.debug(f"set restic repository location: {repo}") restic.repository = repo restic.password_file = '/var/run/secrets/restic_password' try: @@ -49,7 +75,7 @@ def init_repo(): except ResticFailedError as error: if 'unable to open config file' in str(error): result = restic.init() - logging.info(f"Initialized restic repo: {result}") + logger.info(f"Initialized restic repo: {result}") else: raise error @@ -57,11 +83,11 @@ def init_repo(): def export_secrets(): for env in os.environ: if env.endswith('FILE') and not "COMPOSE_FILE" in env: - logging.debug(f"exported secret: {env}") + logger.debug(f"exported secret: {env}") with open(os.environ[env]) as file: secret = file.read() os.environ[env.removesuffix('_FILE')] = secret - # logging.debug(f"Read secret value: {secret}") + # logger.debug(f"Read secret value: {secret}") @cli.command() @@ -86,6 +112,7 @@ def get_backup_cmds(): for s in services: labels = s.attrs['Spec']['Labels'] if (backup := labels.get('backupbot.backup')) and bool(backup): + # volumes: s.attrs['Spec']['TaskTemplate']['ContainerSpec']['Mounts'][0]['Source'] stack_name = labels['com.docker.stack.namespace'] # Remove this lines to backup only a specific service # This will unfortenately decrease restice performance @@ -94,8 +121,8 @@ def get_backup_cmds(): backup_apps.add(stack_name) backup_paths = backup_paths.union( Path(VOLUME_PATH).glob(f"{stack_name}_*")) - if not (container:= container_by_service.get(s.name)): - logging.error( + if not (container := container_by_service.get(s.name)): + logger.error( f"Container {s.name} is not running, hooks can not be executed") continue if prehook := labels.get('backupbot.backup.pre-hook'): @@ -106,7 +133,7 @@ def get_backup_cmds(): def copy_secrets(apps): - #TODO: check if it is deployed + # TODO: check if it is deployed rmtree(SECRET_PATH, ignore_errors=True) os.mkdir(SECRET_PATH) client = docker.from_env() @@ -118,14 +145,15 @@ def copy_secrets(apps): if (app_name in apps and (app_secs := s.attrs['Spec']['TaskTemplate']['ContainerSpec'].get('Secrets'))): if not container_by_service.get(s.name): - logging.error( + logger.error( f"Container {s.name} is not running, secrets can not be copied.") continue container_id = container_by_service[s.name].id for sec in app_secs: src = f'/var/lib/docker/containers/{container_id}/mounts/secrets/{sec["SecretID"]}' if not Path(src).exists(): - logging.error(f"For the secret {sec['SecretName']} the file {src} does not exist for {s.name}") + logger.error( + f"For the secret {sec['SecretName']} the file {src} does not exist for {s.name}") continue dst = SECRET_PATH + sec['SecretName'] copyfile(src, dst) @@ -142,24 +170,24 @@ def run_commands(commands): command = command[1:-1] # Use bash's pipefail to return exit codes inside a pipe to prevent silent failure command = f"bash -c 'set -o pipefail;{command}'" - logging.info(f"run command in {container.name}:") - logging.info(command) + logger.info(f"run command in {container.name}:") + logger.info(command) result = container.exec_run(command) if result.exit_code: - logging.error( + logger.error( f"Failed to run command {command} in {container.name}: {result.output.decode()}") else: - logging.info(result.output.decode()) + logger.info(result.output.decode()) def backup_volumes(backup_paths, apps, dry_run=False): try: result = restic.backup(backup_paths, dry_run=dry_run, tags=apps) - print(result) - logging.info(result) + logger.summary("backup finished", extra=result) except ResticFailedError as error: - logging.error(f"Backup failed for {apps}. Could not Backup these paths: {backup_paths}") - logging.error(error) + logger.error( + f"Backup failed for {apps}. Could not Backup these paths: {backup_paths}") + logger.error(error, exc_info=True) exit(1) @@ -174,7 +202,7 @@ def restore(snapshot, target, noninteractive): service_paths = service_paths + f'{SERVICE}_*' snapshots = restic.snapshots(snapshot_id=snapshot) if not snapshot: - logging.error("No Snapshots with ID {snapshots}") + logger.error("No Snapshots with ID {snapshots}") exit(1) if not noninteractive: snapshot_date = datetime.fromisoformat(snapshots[0]['time']) @@ -186,12 +214,12 @@ def restore(snapshot, target, noninteractive): f"THIS COMMAND WILL IRREVERSIBLY OVERWRITES {target}{service_paths.removeprefix('/')}") prompt = input("Type YES (uppercase) to continue: ") if prompt != 'YES': - logging.error("Restore aborted") + logger.error("Restore aborted") exit(1) print(f"Restoring Snapshot {snapshot} of {service_paths} at {target}") result = restic.restore(snapshot_id=snapshot, include=service_paths, target_dir=target) - logging.debug(result) + logger.debug(result) @cli.command() @@ -206,7 +234,7 @@ def snapshots(): err_msg = "No Snapshots found" if SERVICE: err_msg += f' for app {SERVICE}' - logging.warning(err_msg) + logger.warning(err_msg) @cli.command() @@ -233,7 +261,7 @@ def list_files(snapshot, path): err_msg = f'There is no snapshot {snapshot}' if SERVICE: err_msg += f'for the app {SERVICE}' - logging.error(err_msg) + logger.error(err_msg) exit(1) else: raise error @@ -264,7 +292,7 @@ def download(snapshot, path, volumes, secrets): file_dumps.append((binary_output, tarinfo)) if volumes: if not SERVICE: - logging.error("Please specify '--host' when using '--volumes'") + logger.error("Please specify '--host' when using '--volumes'") exit(1) files = list_files(snapshot, VOLUME_PATH) for f in files[1:]: @@ -277,7 +305,7 @@ def download(snapshot, path, volumes, secrets): file_dumps.append((binary_output, tarinfo)) if secrets: if not SERVICE: - logging.error("Please specify '--host' when using '--secrets'") + logger.error("Please specify '--host' when using '--secrets'") exit(1) filename = f"{SERVICE}.json" files = list_files(snapshot, SECRET_PATH) @@ -297,7 +325,8 @@ def download(snapshot, path, volumes, secrets): for binary_output, tarinfo in file_dumps: tar.addfile(tarinfo, fileobj=io.BytesIO(binary_output)) size = get_formatted_size('/tmp/backup.tar.gz') - print(f"Backup has been written to /tmp/backup.tar.gz with a size of {size}") + print( + f"Backup has been written to /tmp/backup.tar.gz with a size of {size}") def get_formatted_size(file_path): @@ -318,7 +347,7 @@ def dump(snapshot, path): print(f"Dumping {path} from snapshot '{snapshot}'") output = subprocess.run(cmd, capture_output=True) if output.returncode: - logging.error( + logger.error( f"error while dumping {path} from snapshot '{snapshot}': {output.stderr}") exit(1) return output.stdout diff --git a/entrypoint.sh b/entrypoint.sh index 8d1048c..932f2b0 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -5,7 +5,7 @@ set -e -o pipefail apk add --upgrade --no-cache restic bash python3 py3-pip # Todo use requirements file with specific versions -pip install click==8.1.7 docker==6.1.3 resticpy==1.0.2 +pip install click==8.1.7 docker==6.1.3 resticpy==1.0.2 python-json-logger==2.0.7 if [ -n "$SSH_HOST_KEY" ] then @@ -14,7 +14,7 @@ fi cron_schedule="${CRON_SCHEDULE:?CRON_SCHEDULE not set}" -echo "$cron_schedule backup create" | crontab - +echo "$cron_schedule backup --machine-logs create" | crontab - crontab -l crond -f -d8 -L /dev/stdout