structured json logging with -m flag
continuous-integration/drone/push Build is failing Details

This commit is contained in:
Moritz 2023-11-23 18:44:17 +01:00
parent 4befebba38
commit 15275b2571
2 changed files with 60 additions and 31 deletions

View File

@ -1,6 +1,7 @@
#!/usr/bin/python3 #!/usr/bin/python3
import os import os
import sys
import click import click
import json import json
import subprocess import subprocess
@ -9,22 +10,40 @@ import docker
import restic import restic
import tarfile import tarfile
import io import io
from pythonjsonlogger import jsonlogger
from datetime import datetime, timezone from datetime import datetime, timezone
from restic.errors import ResticFailedError from restic.errors import ResticFailedError
from pathlib import Path from pathlib import Path
from shutil import copyfile, rmtree from shutil import copyfile, rmtree
# logging.basicConfig(level=logging.INFO)
VOLUME_PATH = "/var/lib/docker/volumes/" VOLUME_PATH = "/var/lib/docker/volumes/"
SECRET_PATH = '/secrets/' SECRET_PATH = '/secrets/'
SERVICE = None SERVICE = None
logger = logging.getLogger("backupbot")
logging.addLevelName(55, 'SUMMARY')
setattr(logging, 'SUMMARY', 55)
setattr(logger, 'summary', lambda message, *args, **
kwargs: logger.log(55, message, *args, **kwargs))
def handle_exception(exc_type, exc_value, exc_traceback):
if issubclass(exc_type, KeyboardInterrupt):
sys.__excepthook__(exc_type, exc_value, exc_traceback)
return
logger.critical("Uncaught exception", exc_info=(
exc_type, exc_value, exc_traceback))
sys.excepthook = handle_exception
@click.group() @click.group()
@click.option('-l', '--log', 'loglevel') @click.option('-l', '--log', 'loglevel')
@click.option('-m', '--machine-logs', 'machine_logs', is_flag=True)
@click.option('service', '--host', '-h', envvar='SERVICE') @click.option('service', '--host', '-h', envvar='SERVICE')
@click.option('repository', '--repo', '-r', envvar='RESTIC_REPOSITORY', required=True) @click.option('repository', '--repo', '-r', envvar='RESTIC_REPOSITORY', required=True)
def cli(loglevel, service, repository): def cli(loglevel, service, repository, machine_logs):
global SERVICE global SERVICE
if service: if service:
SERVICE = service.replace('.', '_') SERVICE = service.replace('.', '_')
@ -34,14 +53,21 @@ def cli(loglevel, service, repository):
numeric_level = getattr(logging, loglevel.upper(), None) numeric_level = getattr(logging, loglevel.upper(), None)
if not isinstance(numeric_level, int): if not isinstance(numeric_level, int):
raise ValueError('Invalid log level: %s' % loglevel) raise ValueError('Invalid log level: %s' % loglevel)
logging.basicConfig(level=numeric_level) logger.setLevel(numeric_level)
if machine_logs:
logHandler = logging.StreamHandler()
formatter = jsonlogger.JsonFormatter(
"%(levelname)s %(filename)s %(lineno)s %(process)d %(message)s", rename_fields={"levelname": "message_type"})
logHandler.setFormatter(formatter)
logger.addHandler(logHandler)
export_secrets() export_secrets()
init_repo() init_repo()
def init_repo(): def init_repo():
repo = os.environ['RESTIC_REPOSITORY'] repo = os.environ['RESTIC_REPOSITORY']
logging.debug(f"set restic repository location: {repo}") logger.debug(f"set restic repository location: {repo}")
restic.repository = repo restic.repository = repo
restic.password_file = '/var/run/secrets/restic_password' restic.password_file = '/var/run/secrets/restic_password'
try: try:
@ -49,7 +75,7 @@ def init_repo():
except ResticFailedError as error: except ResticFailedError as error:
if 'unable to open config file' in str(error): if 'unable to open config file' in str(error):
result = restic.init() result = restic.init()
logging.info(f"Initialized restic repo: {result}") logger.info(f"Initialized restic repo: {result}")
else: else:
raise error raise error
@ -57,11 +83,11 @@ def init_repo():
def export_secrets(): def export_secrets():
for env in os.environ: for env in os.environ:
if env.endswith('FILE') and not "COMPOSE_FILE" in env: if env.endswith('FILE') and not "COMPOSE_FILE" in env:
logging.debug(f"exported secret: {env}") logger.debug(f"exported secret: {env}")
with open(os.environ[env]) as file: with open(os.environ[env]) as file:
secret = file.read() secret = file.read()
os.environ[env.removesuffix('_FILE')] = secret os.environ[env.removesuffix('_FILE')] = secret
# logging.debug(f"Read secret value: {secret}") # logger.debug(f"Read secret value: {secret}")
@cli.command() @cli.command()
@ -86,6 +112,7 @@ def get_backup_cmds():
for s in services: for s in services:
labels = s.attrs['Spec']['Labels'] labels = s.attrs['Spec']['Labels']
if (backup := labels.get('backupbot.backup')) and bool(backup): if (backup := labels.get('backupbot.backup')) and bool(backup):
# volumes: s.attrs['Spec']['TaskTemplate']['ContainerSpec']['Mounts'][0]['Source']
stack_name = labels['com.docker.stack.namespace'] stack_name = labels['com.docker.stack.namespace']
# Remove this lines to backup only a specific service # Remove this lines to backup only a specific service
# This will unfortenately decrease restice performance # This will unfortenately decrease restice performance
@ -94,8 +121,8 @@ def get_backup_cmds():
backup_apps.add(stack_name) backup_apps.add(stack_name)
backup_paths = backup_paths.union( backup_paths = backup_paths.union(
Path(VOLUME_PATH).glob(f"{stack_name}_*")) Path(VOLUME_PATH).glob(f"{stack_name}_*"))
if not (container:= container_by_service.get(s.name)): if not (container := container_by_service.get(s.name)):
logging.error( logger.error(
f"Container {s.name} is not running, hooks can not be executed") f"Container {s.name} is not running, hooks can not be executed")
continue continue
if prehook := labels.get('backupbot.backup.pre-hook'): if prehook := labels.get('backupbot.backup.pre-hook'):
@ -106,7 +133,7 @@ def get_backup_cmds():
def copy_secrets(apps): def copy_secrets(apps):
#TODO: check if it is deployed # TODO: check if it is deployed
rmtree(SECRET_PATH, ignore_errors=True) rmtree(SECRET_PATH, ignore_errors=True)
os.mkdir(SECRET_PATH) os.mkdir(SECRET_PATH)
client = docker.from_env() client = docker.from_env()
@ -118,14 +145,15 @@ def copy_secrets(apps):
if (app_name in apps and if (app_name in apps and
(app_secs := s.attrs['Spec']['TaskTemplate']['ContainerSpec'].get('Secrets'))): (app_secs := s.attrs['Spec']['TaskTemplate']['ContainerSpec'].get('Secrets'))):
if not container_by_service.get(s.name): if not container_by_service.get(s.name):
logging.error( logger.error(
f"Container {s.name} is not running, secrets can not be copied.") f"Container {s.name} is not running, secrets can not be copied.")
continue continue
container_id = container_by_service[s.name].id container_id = container_by_service[s.name].id
for sec in app_secs: for sec in app_secs:
src = f'/var/lib/docker/containers/{container_id}/mounts/secrets/{sec["SecretID"]}' src = f'/var/lib/docker/containers/{container_id}/mounts/secrets/{sec["SecretID"]}'
if not Path(src).exists(): if not Path(src).exists():
logging.error(f"For the secret {sec['SecretName']} the file {src} does not exist for {s.name}") logger.error(
f"For the secret {sec['SecretName']} the file {src} does not exist for {s.name}")
continue continue
dst = SECRET_PATH + sec['SecretName'] dst = SECRET_PATH + sec['SecretName']
copyfile(src, dst) copyfile(src, dst)
@ -142,24 +170,24 @@ def run_commands(commands):
command = command[1:-1] command = command[1:-1]
# Use bash's pipefail to return exit codes inside a pipe to prevent silent failure # Use bash's pipefail to return exit codes inside a pipe to prevent silent failure
command = f"bash -c 'set -o pipefail;{command}'" command = f"bash -c 'set -o pipefail;{command}'"
logging.info(f"run command in {container.name}:") logger.info(f"run command in {container.name}:")
logging.info(command) logger.info(command)
result = container.exec_run(command) result = container.exec_run(command)
if result.exit_code: if result.exit_code:
logging.error( logger.error(
f"Failed to run command {command} in {container.name}: {result.output.decode()}") f"Failed to run command {command} in {container.name}: {result.output.decode()}")
else: else:
logging.info(result.output.decode()) logger.info(result.output.decode())
def backup_volumes(backup_paths, apps, dry_run=False): def backup_volumes(backup_paths, apps, dry_run=False):
try: try:
result = restic.backup(backup_paths, dry_run=dry_run, tags=apps) result = restic.backup(backup_paths, dry_run=dry_run, tags=apps)
print(result) logger.summary("backup finished", extra=result)
logging.info(result)
except ResticFailedError as error: except ResticFailedError as error:
logging.error(f"Backup failed for {apps}. Could not Backup these paths: {backup_paths}") logger.error(
logging.error(error) f"Backup failed for {apps}. Could not Backup these paths: {backup_paths}")
logger.error(error, exc_info=True)
exit(1) exit(1)
@ -174,7 +202,7 @@ def restore(snapshot, target, noninteractive):
service_paths = service_paths + f'{SERVICE}_*' service_paths = service_paths + f'{SERVICE}_*'
snapshots = restic.snapshots(snapshot_id=snapshot) snapshots = restic.snapshots(snapshot_id=snapshot)
if not snapshot: if not snapshot:
logging.error("No Snapshots with ID {snapshots}") logger.error("No Snapshots with ID {snapshots}")
exit(1) exit(1)
if not noninteractive: if not noninteractive:
snapshot_date = datetime.fromisoformat(snapshots[0]['time']) snapshot_date = datetime.fromisoformat(snapshots[0]['time'])
@ -186,12 +214,12 @@ def restore(snapshot, target, noninteractive):
f"THIS COMMAND WILL IRREVERSIBLY OVERWRITES {target}{service_paths.removeprefix('/')}") f"THIS COMMAND WILL IRREVERSIBLY OVERWRITES {target}{service_paths.removeprefix('/')}")
prompt = input("Type YES (uppercase) to continue: ") prompt = input("Type YES (uppercase) to continue: ")
if prompt != 'YES': if prompt != 'YES':
logging.error("Restore aborted") logger.error("Restore aborted")
exit(1) exit(1)
print(f"Restoring Snapshot {snapshot} of {service_paths} at {target}") print(f"Restoring Snapshot {snapshot} of {service_paths} at {target}")
result = restic.restore(snapshot_id=snapshot, result = restic.restore(snapshot_id=snapshot,
include=service_paths, target_dir=target) include=service_paths, target_dir=target)
logging.debug(result) logger.debug(result)
@cli.command() @cli.command()
@ -206,7 +234,7 @@ def snapshots():
err_msg = "No Snapshots found" err_msg = "No Snapshots found"
if SERVICE: if SERVICE:
err_msg += f' for app {SERVICE}' err_msg += f' for app {SERVICE}'
logging.warning(err_msg) logger.warning(err_msg)
@cli.command() @cli.command()
@ -233,7 +261,7 @@ def list_files(snapshot, path):
err_msg = f'There is no snapshot {snapshot}' err_msg = f'There is no snapshot {snapshot}'
if SERVICE: if SERVICE:
err_msg += f'for the app {SERVICE}' err_msg += f'for the app {SERVICE}'
logging.error(err_msg) logger.error(err_msg)
exit(1) exit(1)
else: else:
raise error raise error
@ -264,7 +292,7 @@ def download(snapshot, path, volumes, secrets):
file_dumps.append((binary_output, tarinfo)) file_dumps.append((binary_output, tarinfo))
if volumes: if volumes:
if not SERVICE: if not SERVICE:
logging.error("Please specify '--host' when using '--volumes'") logger.error("Please specify '--host' when using '--volumes'")
exit(1) exit(1)
files = list_files(snapshot, VOLUME_PATH) files = list_files(snapshot, VOLUME_PATH)
for f in files[1:]: for f in files[1:]:
@ -277,7 +305,7 @@ def download(snapshot, path, volumes, secrets):
file_dumps.append((binary_output, tarinfo)) file_dumps.append((binary_output, tarinfo))
if secrets: if secrets:
if not SERVICE: if not SERVICE:
logging.error("Please specify '--host' when using '--secrets'") logger.error("Please specify '--host' when using '--secrets'")
exit(1) exit(1)
filename = f"{SERVICE}.json" filename = f"{SERVICE}.json"
files = list_files(snapshot, SECRET_PATH) files = list_files(snapshot, SECRET_PATH)
@ -297,7 +325,8 @@ def download(snapshot, path, volumes, secrets):
for binary_output, tarinfo in file_dumps: for binary_output, tarinfo in file_dumps:
tar.addfile(tarinfo, fileobj=io.BytesIO(binary_output)) tar.addfile(tarinfo, fileobj=io.BytesIO(binary_output))
size = get_formatted_size('/tmp/backup.tar.gz') size = get_formatted_size('/tmp/backup.tar.gz')
print(f"Backup has been written to /tmp/backup.tar.gz with a size of {size}") print(
f"Backup has been written to /tmp/backup.tar.gz with a size of {size}")
def get_formatted_size(file_path): def get_formatted_size(file_path):
@ -318,7 +347,7 @@ def dump(snapshot, path):
print(f"Dumping {path} from snapshot '{snapshot}'") print(f"Dumping {path} from snapshot '{snapshot}'")
output = subprocess.run(cmd, capture_output=True) output = subprocess.run(cmd, capture_output=True)
if output.returncode: if output.returncode:
logging.error( logger.error(
f"error while dumping {path} from snapshot '{snapshot}': {output.stderr}") f"error while dumping {path} from snapshot '{snapshot}': {output.stderr}")
exit(1) exit(1)
return output.stdout return output.stdout

View File

@ -5,7 +5,7 @@ set -e -o pipefail
apk add --upgrade --no-cache restic bash python3 py3-pip apk add --upgrade --no-cache restic bash python3 py3-pip
# Todo use requirements file with specific versions # Todo use requirements file with specific versions
pip install click==8.1.7 docker==6.1.3 resticpy==1.0.2 pip install click==8.1.7 docker==6.1.3 resticpy==1.0.2 python-json-logger==2.0.7
if [ -n "$SSH_HOST_KEY" ] if [ -n "$SSH_HOST_KEY" ]
then then
@ -14,7 +14,7 @@ fi
cron_schedule="${CRON_SCHEDULE:?CRON_SCHEDULE not set}" cron_schedule="${CRON_SCHEDULE:?CRON_SCHEDULE not set}"
echo "$cron_schedule backup create" | crontab - echo "$cron_schedule backup --machine-logs create" | crontab -
crontab -l crontab -l
crond -f -d8 -L /dev/stdout crond -f -d8 -L /dev/stdout