59 Commits

Author SHA1 Message Date
333b7ec16d chore: publish 2.0.1+2.1.1-beta release 2024-10-15 18:56:43 +02:00
aeccd605ee improve logging 2024-10-15 18:14:17 +02:00
f877186a57 list restore commands before executing it 2024-10-15 18:14:17 +02:00
9dc239b199 add --no_commands flag for restore 2024-10-15 18:14:11 +02:00
43548273fe chore: publish 2.1.0-beta release 2024-09-18 12:58:10 +02:00
5a0467dbdd change drone runner: create only images from releases 2024-09-18 12:29:08 +02:00
3aefae61c0 add host while preserving filechange detection #53 2024-09-17 19:47:40 +02:00
ac7c5fb50d restore: execute hooks, filter for volumes and container 2024-09-17 17:49:37 +02:00
cc59087b8c restic_restore: extends restic.restore to use multiple --include paths 2024-09-17 17:48:20 +02:00
eb7c35d4cd get_backup_details/add_backup_paths: filter by volumes 2024-09-17 17:45:30 +02:00
249772ec03 filter snapshots by app 2024-09-17 17:45:27 +02:00
45af6e8b5e parse_backup_labels: read restore/backup hooks, filter by container 2024-09-17 17:44:25 +02:00
f7207cdf36 refactor: move restore function 2024-09-17 14:04:52 +02:00
241fe3ce92 remove old get_backup_cmds 2024-09-17 13:54:51 +02:00
b8d61d01cd add_backup_paths: extract and merge paths from includes/excludes 2024-09-17 13:44:26 +02:00
6ac781c7e6 refactor parse_excludes_includes: save all label infos in settings
without path extraction
2024-09-17 13:43:03 +02:00
197cabf564 check backup path existence 2024-09-17 13:39:45 +02:00
fe35f1ede8 implement volume excludes and path includes 2024-09-13 19:24:32 +02:00
f254a365f2 Revert "Revert "Removed redundant code formatting""
This reverts commit e09e1b476c.
2024-08-21 14:24:14 +02:00
0d15765673 Revert "Added extra secret step to REST server instructions"
This step is already included in the deployment process by the command `abra app secret generate -a <backupbot_name>` and it applies to every setup, not only the REST server.

This reverts commit 72688dc42c.
2024-08-21 14:23:48 +02:00
e09e1b476c Revert "Removed redundant code formatting"
This reverts commit 10e460ff2d.
2024-08-21 11:59:10 +01:00
72688dc42c Added extra secret step to REST server instructions 2024-08-19 17:08:41 +01:00
10e460ff2d Removed redundant code formatting 2024-08-19 14:53:48 +01:00
f2d0b92fa3 chore: publish 1.0.0+2.0.0-beta release 2024-08-14 14:47:12 +02:00
cc049b858b README: new image location 2024-07-05 16:30:40 +02:00
b7bc8ed58f fix: ignore none swarm container #52 2024-07-05 14:41:50 +02:00
68e37f5c23 Merge pull request 'Add dockerfile, and compose.yml to use it' (#49) from feature/dockerfile into main
Reviewed-on: coop-cloud/backup-bot-two#49
2024-06-01 03:36:07 +00:00
3wc
4d39d84733 Switch ENTRYPOINT to try to resolve loop on start 2024-06-01 03:35:52 +00:00
3wc
e5b9bc0446 Update requirements 2024-06-01 03:35:52 +00:00
3wc
ec4c4509dc Make entrypoint executable 2024-06-01 03:35:52 +00:00
3wc
26162a9e38 Add --break-system-packages, surely we don't need a virtualenv 2024-06-01 03:35:52 +00:00
3wc
bd581fd8d7 Move entrypoint script into Docker image 2024-06-01 03:35:52 +00:00
3wc
e77432e3ab Move /entrypoint.sh to Dockerfile 2024-06-01 03:35:52 +00:00
3wc
001a654e37 Remove redundant stuff from entrypoint 2024-06-01 03:35:52 +00:00
3wc
c5574edc54 Whoops, wrong image 2024-06-01 03:35:52 +00:00
3wc
50e4d68717 Switch to backup-bot-two image 2024-06-01 03:35:52 +00:00
3wc
c7830ceb6f Whoops skip shellcheck 2024-06-01 03:35:52 +00:00
3wc
b6f859efbb Reinstate Docker image 2024-06-01 03:35:52 +00:00
7f14698824 change loglever to warning for not running container 2024-05-06 11:31:40 +02:00
2a9a98172f Add debug infos 2024-04-30 15:27:17 +02:00
282215cf9c Add debug infos 2024-04-30 14:59:59 +02:00
ae7a14b6f1 Fix usage of RESTIC_REPOSITORY_FILE #51 2024-04-30 14:51:46 +02:00
8acdb20e5b Fix loghandler 2024-04-29 14:18:32 +02:00
5582744073 Fix usage of RESTIC_REPOSITORY_FILE #51 2024-04-29 14:16:13 +02:00
3wc
84d606fa80 Add CHANGELOG.md
[ci skip]
2024-04-09 22:51:09 -03:00
7865907811 fix push notification precendence race condition 2024-03-08 15:42:00 +01:00
dc66c02e23 make run_cron cmd independent from push_success_notifiaction 2024-02-13 11:53:27 +01:00
f730c70bfe feat: add retry option 2024-01-18 18:01:30 +01:00
faa7ae3dd1 fix Readme 2024-01-17 20:36:06 +01:00
79eeec428a Push Notifications #24 2024-01-16 19:40:31 +01:00
4164760dc6 Sepcify secret and volume donwload via env, fixes #44 2024-01-11 18:46:58 +01:00
e644679b8b Clearer service name in warning message. Fixes #46 2024-01-11 18:39:26 +01:00
0c587ac926 add spaces for missing snapshot, fixes #45 2024-01-11 18:34:58 +01:00
65686cd891 Fix python package install error 2023-12-19 01:16:12 +01:00
ac055c932e fix: remove bash/sh wrapping 2023-12-13 18:27:12 +01:00
64328c79b1 make --noninteractive a flag 2023-12-12 13:39:26 +01:00
15275b2571 structured json logging with -m flag 2023-11-23 20:16:15 +01:00
4befebba38 Merge pull request 'fix removing quotes' (#40) from p4u1/backup-bot-two:fix-quotes into main
Reviewed-on: coop-cloud/backup-bot-two#40
2023-11-11 08:15:12 +00:00
d2087a441e fix removing quotes 2023-11-11 08:55:12 +01:00
10 changed files with 380 additions and 163 deletions

View File

@ -2,11 +2,16 @@
kind: pipeline
name: linters
steps:
- name: run shellcheck
image: koalaman/shellcheck-alpine
commands:
- shellcheck backup.sh
trigger:
branch:
- main
- name: publish image
image: plugins/docker
settings:
username: 3wordchant
password:
from_secret: git_coopcloud_tech_token_3wc
repo: git.coopcloud.tech/coop-cloud/backup-bot-two
tags: ${DRONE_SEMVER_BUILD}
registry: git.coopcloud.tech
when:
event:
include:
- tag

View File

@ -8,6 +8,11 @@ RESTIC_REPOSITORY=/backups/restic
CRON_SCHEDULE='30 3 * * *'
# Push Notifiactions
#PUSH_URL_START=https://status.example.com/api/push/xxxxxxxxxx?status=up&msg=start
#PUSH_URL_SUCCESS=https://status.example.com/api/push/xxxxxxxxxx?status=up&msg=OK
#PUSH_URL_FAIL=https://status.example.com/api/push/xxxxxxxxxx?status=down&msg=fail
# swarm-cronjob, instead of built-in cron
#COMPOSE_FILE="$COMPOSE_FILE:compose.swarm-cronjob.yml"

6
CHANGELOG.md Normal file
View File

@ -0,0 +1,6 @@
# Change log
## 2.0.0 (unreleased)
- Rewrite from Bash to Python
- Add support for push notifications (#24)

11
Dockerfile Normal file
View File

@ -0,0 +1,11 @@
FROM docker:24.0.7-dind
RUN apk add --upgrade --no-cache restic bash python3 py3-pip py3-click py3-docker-py py3-json-logger curl
# Todo use requirements file with specific versions
RUN pip install --break-system-packages resticpy==1.0.2
COPY backupbot.py /usr/bin/backup
COPY entrypoint.sh /entrypoint.sh
ENTRYPOINT /entrypoint.sh

View File

@ -10,7 +10,7 @@ Automatically take backups from all volumes of running Docker Swarm services and
* **Category**: Utilities
* **Status**: 0, work-in-progress
* **Image**: [`thecoopcloud/backup-bot-two`](https://hub.docker.com/r/thecoopcloud/backup-bot-two), 4, upstream
* **Image**: [`git.coopcloud.tech/coop-cloud/backup-bot-two`](https://git.coopcloud.tech/coop-cloud/-/packages/container/backup-bot-two), 4, upstream
* **Healthcheck**: No
* **Backups**: N/A
* **Email**: N/A
@ -38,12 +38,12 @@ Backupbot II tries to help, by
* `abra app new backup-bot-two`
* `abra app config <app-name>`
- set storage options. Either configure `CRON_SCHEDULE`, or set up `swarm-cronjob`
* `abra app secret generate -a <app_name>`
* `abra app secret generate -a <backupbot_name>`
* `abra app deploy <app-name>`
## Configuration
Per default Backupbot stores the backups locally in the repository `/backups/restic`, which is accessible as volume at `/var/lib/docker/volumes/<app_name>_backups/_data/restic/`
Per default Backupbot stores the backups locally in the repository `/backups/restic`, which is accessible as volume at `/var/lib/docker/volumes/<backupbot_name>_backups/_data/restic/`
The backup location can be changed using the `RESTIC_REPOSITORY` env variable.
@ -57,7 +57,7 @@ AWS_ACCESS_KEY_ID=<MY_ACCESS_KEY>
COMPOSE_FILE="$COMPOSE_FILE:compose.s3.yml"
```
and add your `<SECRET_ACCESS_KEY>` as docker secret:
`abra app secret insert <app_name> aws_secret_access_key v1 <SECRET_ACCESS_KEY>`
`abra app secret insert <backupbot_name> aws_secret_access_key v1 <SECRET_ACCESS_KEY>`
See [restic s3 docs](https://restic.readthedocs.io/en/latest/030_preparing_a_new_repo.html#amazon-s3) for more information.
@ -79,7 +79,7 @@ Add the key to your `authorized_keys`:
`ssh-copy-id -i backupkey <user>@<hostname>`
Add your `SSH_KEY` as docker secret:
```
abra app secret insert <app_name> ssh_key v1 """$(cat backupkey)
abra app secret insert <backupbot_name> ssh_key v1 """$(cat backupkey)
"""
```
> Attention: This command needs to be executed exactly as stated above, because it places a trailing newline at the end, if this is missing you will get the following error: `Load key "/run/secrets/ssh_key": error in libcrypto`
@ -95,67 +95,81 @@ COMPOSE_FILE="$COMPOSE_FILE:compose.secret.yml"
```
Add your REST server url as secret:
```
`abra app secret insert <app_name> restic_repo v1 "rest:https://user:pass@host:8000/"`
abra app secret insert <backupbot_name> restic_repo v1 "rest:https://user:pass@host:8000/"
```
The secret will overwrite the `RESTIC_REPOSITORY` variable.
See [restic REST docs](https://restic.readthedocs.io/en/latest/030_preparing_a_new_repo.html#rest-server) for more information.
## Push notifications
The following env variables can be used to setup push notifications for backups. `PUSH_URL_START` is requested just before the backups starts, `PUSH_URL_SUCCESS` is only requested if the backup was successful and if the backup fails `PUSH_URL_FAIL` will be requested.
Each variable is optional and independent of the other.
```
PUSH_URL_START=https://status.example.com/api/push/xxxxxxxxxx?status=up&msg=start
PUSH_URL_SUCCESS=https://status.example.com/api/push/xxxxxxxxxx?status=up&msg=OK
PUSH_URL_FAIL=https://status.example.com/api/push/xxxxxxxxxx?status=down&msg=fail
```
## Usage
Run the cronjob that creates a backup, including the push notifications and docker logging:
`abra app cmd <backupbot_name> app run_cron`
Create a backup of all apps:
`abra app run <app_name> app -- backup create`
`abra app run <backupbot_name> app -- backup create`
> The apps to backup up need to be deployed
Create an individual backup:
`abra app run <app_name> app -- backup --host <target_app_name> create`
`abra app run <backupbot_name> app -- backup --host <target_app_name> create`
Create a backup to a local repository:
`abra app run <app_name> app -- backup create -r /backups/restic`
`abra app run <backupbot_name> app -- backup create -r /backups/restic`
> It is recommended to shutdown/undeploy an app before restoring the data
Restore the latest snapshot of all including apps:
`abra app run <app_name> app -- backup restore`
`abra app run <backupbot_name> app -- backup restore`
Restore a specific snapshot of an individual app:
`abra app run <app_name> app -- backup --host <target_app_name> restore --snapshot <snapshot_id>`
`abra app run <backupbot_name> app -- backup --host <target_app_name> restore --snapshot <snapshot_id>`
Show all snapshots:
`abra app run <app_name> app -- backup snapshots`
`abra app run <backupbot_name> app -- backup snapshots`
Show all snapshots containing a specific app:
`abra app run <app_name> app -- backup --host <target_app_name> snapshots`
`abra app run <backupbot_name> app -- backup --host <target_app_name> snapshots`
Show all files inside the latest snapshot (can be very verbose):
`abra app run <app_name> app -- backup ls`
`abra app run <backupbot_name> app -- backup ls`
Show specific files inside a selected snapshot:
`abra app run <app_name> app -- backup ls --snapshot <snapshot_id> --path /var/lib/docker/volumes/`
`abra app run <backupbot_name> app -- backup ls --snapshot <snapshot_id> --path /var/lib/docker/volumes/`
Download files from a snapshot:
```
filename=$(abra app run <app_name> app -- backup download --snapshot <snapshot_id> --path <absolute_path>)
abra app cp <app_name> app:$filename .
filename=$(abra app run <backupbot_name> app -- backup download --snapshot <snapshot_id> --path <absolute_path>)
abra app cp <backupbot_name> app:$filename .
```
## Run restic
```
abra app run <app_name> app bash
abra app run <backupbot_name> app bash
export AWS_SECRET_ACCESS_KEY=$(cat $AWS_SECRET_ACCESS_KEY_FILE)
export RESTIC_PASSWORD=$(cat $RESTIC_PASSWORD_FILE)
restic snapshots

View File

@ -1,3 +1,10 @@
export ENTRYPOINT_VERSION=v1
export BACKUPBOT_VERSION=v1
export SSH_CONFIG_VERSION=v1
run_cron () {
schedule="$(crontab -l | tr -s " " | cut -d ' ' -f-5)"
rm -f /tmp/backup.log
echo "* * * * * $(crontab -l | tr -s " " | cut -d ' ' -f6-)" | crontab -
while [ ! -f /tmp/backup.log ]; do sleep 1; done
echo "$schedule $(crontab -l | tr -s " " | cut -d ' ' -f6-)" | crontab -
}

View File

@ -1,6 +1,7 @@
#!/usr/bin/python3
import os
import sys
import click
import json
import subprocess
@ -9,22 +10,40 @@ import docker
import restic
import tarfile
import io
from pythonjsonlogger import jsonlogger
from datetime import datetime, timezone
from restic.errors import ResticFailedError
from pathlib import Path
from shutil import copyfile, rmtree
# logging.basicConfig(level=logging.INFO)
VOLUME_PATH = "/var/lib/docker/volumes/"
SECRET_PATH = '/secrets/'
SERVICE = None
SERVICE = 'ALL'
logger = logging.getLogger("backupbot")
logging.addLevelName(55, 'SUMMARY')
setattr(logging, 'SUMMARY', 55)
setattr(logger, 'summary', lambda message, *args, **
kwargs: logger.log(55, message, *args, **kwargs))
def handle_exception(exc_type, exc_value, exc_traceback):
if issubclass(exc_type, KeyboardInterrupt):
sys.__excepthook__(exc_type, exc_value, exc_traceback)
return
logger.critical("Uncaught exception", exc_info=(
exc_type, exc_value, exc_traceback))
sys.excepthook = handle_exception
@click.group()
@click.option('-l', '--log', 'loglevel')
@click.option('-m', '--machine-logs', 'machine_logs', is_flag=True)
@click.option('service', '--host', '-h', envvar='SERVICE')
@click.option('repository', '--repo', '-r', envvar='RESTIC_REPOSITORY', required=True)
def cli(loglevel, service, repository):
@click.option('repository', '--repo', '-r', envvar='RESTIC_REPOSITORY')
def cli(loglevel, service, repository, machine_logs):
global SERVICE
if service:
SERVICE = service.replace('.', '_')
@ -34,22 +53,33 @@ def cli(loglevel, service, repository):
numeric_level = getattr(logging, loglevel.upper(), None)
if not isinstance(numeric_level, int):
raise ValueError('Invalid log level: %s' % loglevel)
logging.basicConfig(level=numeric_level)
logger.setLevel(numeric_level)
logHandler = logging.StreamHandler()
if machine_logs:
formatter = jsonlogger.JsonFormatter(
"%(levelname)s %(filename)s %(lineno)s %(process)d %(message)s", rename_fields={"levelname": "message_type"})
logHandler.setFormatter(formatter)
logger.addHandler(logHandler)
export_secrets()
init_repo()
def init_repo():
repo = os.environ['RESTIC_REPOSITORY']
logging.debug(f"set restic repository location: {repo}")
restic.repository = repo
if repo:= os.environ.get('RESTIC_REPOSITORY_FILE'):
# RESTIC_REPOSITORY_FILE and RESTIC_REPOSITORY are mutually exclusive
del os.environ['RESTIC_REPOSITORY']
else:
repo = os.environ['RESTIC_REPOSITORY']
restic.repository = repo
logger.debug(f"set restic repository location: {repo}")
restic.password_file = '/var/run/secrets/restic_password'
try:
restic.cat.config()
except ResticFailedError as error:
if 'unable to open config file' in str(error):
result = restic.init()
logging.info(f"Initialized restic repo: {result}")
logger.info(f"Initialized restic repo: {result}")
else:
raise error
@ -57,77 +87,234 @@ def init_repo():
def export_secrets():
for env in os.environ:
if env.endswith('FILE') and not "COMPOSE_FILE" in env:
logging.debug(f"exported secret: {env}")
logger.debug(f"exported secret: {env}")
with open(os.environ[env]) as file:
secret = file.read()
os.environ[env.removesuffix('_FILE')] = secret
# logging.debug(f"Read secret value: {secret}")
# logger.debug(f"Read secret value: {secret}")
@cli.command()
def create():
pre_commands, post_commands, backup_paths, apps = get_backup_cmds()
@click.option('retries', '--retries', '-r', envvar='RETRIES', default=1)
def create(retries):
app_settings = parse_backup_labels()
pre_commands, post_commands, backup_paths, apps = get_backup_details(app_settings)
copy_secrets(apps)
backup_paths.append(SECRET_PATH)
backup_paths.append(Path(SECRET_PATH))
run_commands(pre_commands)
backup_volumes(backup_paths, apps)
backup_volumes(backup_paths, apps, int(retries))
run_commands(post_commands)
def get_backup_cmds():
@cli.command()
@click.option('snapshot', '--snapshot', '-s', envvar='SNAPSHOT', default='latest')
@click.option('target', '--target', '-t', envvar='TARGET', default='/')
@click.option('noninteractive', '--noninteractive', envvar='NONINTERACTIVE', is_flag=True)
@click.option('volumes', '--volumes', '-v', envvar='VOLUMES', multiple=True)
@click.option('container', '--container', '-c', envvar='CONTAINER', multiple=True)
@click.option('no_commands', '--no-commands', envvar='NO_COMMANDS', is_flag=True)
def restore(snapshot, target, noninteractive, volumes, container, no_commands):
app_settings = parse_backup_labels('restore', container)
if SERVICE != 'ALL':
app_settings = {SERVICE: app_settings[SERVICE]}
pre_commands, post_commands, backup_paths, apps = get_backup_details(app_settings, volumes)
snapshots = get_snapshots(snapshot_id=snapshot)
if not snapshot:
logger.error("No Snapshots with ID {snapshots} for {apps} found.")
exit(1)
if not noninteractive:
snapshot_date = datetime.fromisoformat(snapshots[0]['time'])
delta = datetime.now(tz=timezone.utc) - snapshot_date
print(f"You are going to restore Snapshot {snapshot} of {apps} at {target}")
print("The following volume paths will be restored:")
for p in backup_paths:
print(f'\t{p}')
if not no_commands:
print("The following commands will be executed:")
for container, cmd in list(pre_commands.items()) + list(post_commands.items()):
print(f"\t{container.labels['com.docker.swarm.service.name']}:\t{cmd}")
print(f"This snapshot is {delta} old")
print("\nTHIS COMMAND WILL IRREVERSIBLY OVERWRITES FILES")
prompt = input("Type YES (uppercase) to continue: ")
if prompt != 'YES':
logger.error("Restore aborted")
exit(1)
print(f"Restoring Snapshot {snapshot} at {target}")
if not no_commands and pre_commands:
print(f"Run pre commands.")
run_commands(pre_commands)
result = restic_restore(snapshot_id=snapshot, include=backup_paths, target_dir=target)
if not no_commands and post_commands:
print(f"Run post commands.")
run_commands(post_commands)
logger.debug(result)
def restic_restore(snapshot_id='latest', include=[], target_dir=None):
cmd = restic.cat.base_command() + ['restore', snapshot_id]
for path in include:
cmd.extend(['--include', path])
if target_dir:
cmd.extend(['--target', target_dir])
return restic.internal.command_executor.execute(cmd)
def get_snapshots(snapshot_id=None):
if snapshot_id and snapshot_id != 'latest':
snapshots = restic.snapshots(snapshot_id=snapshot_id)
if SERVICE not in snapshots[0]['tags']:
logger.error(f'Snapshot with ID {snapshot_id} does not contain {SERVICE}')
exit(1)
else:
snapshots = restic.snapshots()
snapshots = list(filter(lambda x: x.get('tags') and SERVICE in x.get('tags'), snapshots))
if snapshot_id == 'latest':
return snapshots[-1:]
else:
return snapshots
def parse_backup_labels(hook_type='backup', selected_container=[]):
client = docker.from_env()
container_by_service = {
c.labels['com.docker.swarm.service.name']: c for c in client.containers.list()}
backup_paths = set()
backup_apps = set()
pre_commands = {}
post_commands = {}
c.labels.get('com.docker.swarm.service.name'): c for c in client.containers.list()}
services = client.services.list()
app_settings = {}
for s in services:
labels = s.attrs['Spec']['Labels']
specs = s.attrs['Spec']
labels = specs['Labels']
stack_name = labels['com.docker.stack.namespace']
container_name = s.name.removeprefix(f"{stack_name}_")
settings = app_settings[stack_name] = app_settings.get(stack_name) or {}
if (backup := labels.get('backupbot.backup')) and bool(backup):
stack_name = labels['com.docker.stack.namespace']
# Remove this lines to backup only a specific service
# This will unfortenately decrease restice performance
# if SERVICE and SERVICE != stack_name:
# continue
backup_apps.add(stack_name)
backup_paths = backup_paths.union(
Path(VOLUME_PATH).glob(f"{stack_name}_*"))
if not (container:= container_by_service.get(s.name)):
logging.error(
f"Container {s.name} is not running, hooks can not be executed")
settings['enabled'] = True
if selected_container and container_name not in selected_container:
logger.debug(f"Skipping {s.name} because it's not a selected container")
continue
if mounts:= specs['TaskTemplate']['ContainerSpec'].get('Mounts'):
volumes = parse_volumes(stack_name, mounts)
volumes.update(settings.get('volumes') or {})
settings['volumes'] = volumes
excluded_volumes, included_volume_paths = parse_excludes_includes(labels)
settings['excluded_volumes'] = excluded_volumes.union(settings.get('excluded_volumes') or set())
settings['included_volume_paths'] = included_volume_paths.union(settings.get('included_volume_paths') or set())
if container := container_by_service.get(s.name):
if command := labels.get(f'backupbot.{hook_type}.pre-hook'):
if not (pre_hooks:= settings.get('pre_hooks')):
pre_hooks = settings['pre_hooks'] = {}
pre_hooks[container] = command
if command := labels.get(f'backupbot.{hook_type}.post-hook'):
if not (post_hooks:= settings.get('post_hooks')):
post_hooks = settings['post_hooks'] = {}
post_hooks[container] = command
else:
logger.debug(f"Container {s.name} is not running.")
if labels.get(f'backupbot.{hook_type}.pre-hook') or labels.get(f'backupbot.{hook_type}.post-hook'):
logger.error(f"Container {s.name} contain hooks but it's not running")
return app_settings
def get_backup_details(app_settings, volumes=[]):
backup_paths = set()
backup_apps = []
pre_hooks= {}
post_hooks = {}
for app, settings in app_settings.items():
if settings.get('enabled'):
if SERVICE != 'ALL' and SERVICE != app:
continue
if prehook := labels.get('backupbot.backup.pre-hook'):
pre_commands[container] = prehook
if posthook := labels.get('backupbot.backup.post-hook'):
post_commands[container] = posthook
return pre_commands, post_commands, list(backup_paths), list(backup_apps)
backup_apps.append(app)
add_backup_paths(backup_paths, settings, app, volumes)
if hooks:= settings.get('pre_hooks'):
pre_hooks.update(hooks)
if hooks:= settings.get('post_hooks'):
post_hooks.update(hooks)
return pre_hooks, post_hooks, list(backup_paths), backup_apps
def add_backup_paths(backup_paths, settings, app, selected_volumes):
if (volumes := settings.get('volumes')):
if includes:= settings.get('included_volume_paths'):
included_volumes = list(zip(*includes))[0]
for volume, rel_paths in includes:
if not (volume_path:= volumes.get(volume)):
logger.error(f'Can not find volume with the name {volume}')
continue
if selected_volumes and volume not in selected_volumes:
logger.debug(f'Skipping {volume}:{rel_paths} because the volume is not selected')
continue
for p in rel_paths:
absolute_path = Path(f"{volume_path}/{p}")
backup_paths.add(absolute_path)
else:
included_volumes = []
excluded_volumes = settings.get('excluded_volumes') or []
for name, path in volumes.items():
if selected_volumes and name not in selected_volumes:
logger.debug(f'Skipping volume: {name} because the volume is not selected')
continue
if name in excluded_volumes:
logger.debug(f'Skipping volume: {name} because the volume is excluded')
continue
if name in included_volumes:
logger.debug(f'Skipping volume: {name} because a path is selected')
continue
backup_paths.add(path)
else:
logger.warning(f"{app} does not contain any volumes")
def parse_volumes(stack_name, mounts):
volumes = {}
for m in mounts:
if m['Type'] != 'volume':
continue
relative_path = m['Source']
name = relative_path.removeprefix(stack_name + '_')
absolute_path = Path(f"{VOLUME_PATH}{relative_path}/_data/")
volumes[name] = absolute_path
return volumes
def parse_excludes_includes(labels):
excluded_volumes = set()
included_volume_paths = set()
for label, value in labels.items():
if label.startswith('backupbot.backup.volumes.'):
volume_name = label.removeprefix('backupbot.backup.volumes.').removesuffix('.path')
if label.endswith('path'):
relative_paths = tuple(value.split(','))
included_volume_paths.add((volume_name, relative_paths))
elif bool(value):
excluded_volumes.add(volume_name)
return excluded_volumes, included_volume_paths
def copy_secrets(apps):
#TODO: check if it is deployed
# TODO: check if it is deployed
rmtree(SECRET_PATH, ignore_errors=True)
os.mkdir(SECRET_PATH)
client = docker.from_env()
container_by_service = {
c.labels['com.docker.swarm.service.name']: c for c in client.containers.list()}
c.labels.get('com.docker.swarm.service.name'): c for c in client.containers.list()}
services = client.services.list()
for s in services:
app_name = s.attrs['Spec']['Labels']['com.docker.stack.namespace']
if (app_name in apps and
(app_secs := s.attrs['Spec']['TaskTemplate']['ContainerSpec'].get('Secrets'))):
if not container_by_service.get(s.name):
logging.error(
logger.warning(
f"Container {s.name} is not running, secrets can not be copied.")
continue
container_id = container_by_service[s.name].id
for sec in app_secs:
src = f'/var/lib/docker/containers/{container_id}/mounts/secrets/{sec["SecretID"]}'
if not Path(src).exists():
logging.error(f"For the secret {sec['SecretName']} the file {src} does not exist for {s.name}")
logger.error(
f"For the secret {sec['SecretName']} the file {src} does not exist for {s.name}")
continue
dst = SECRET_PATH + sec['SecretName']
logger.debug(f"Copy Secret {sec['SecretName']}")
copyfile(src, dst)
@ -136,77 +323,64 @@ def run_commands(commands):
if not command:
continue
# Remove bash/sh wrapping
command = command.removeprefix('bash -c').removeprefix('sh -c')
command = command.removeprefix('bash -c').removeprefix('sh -c').removeprefix(' ')
# Remove quotes surrounding the command
if (len(command) >= 2 and command[0] == command[-1] and (command[0] == "'" or command[0] == '"')):
command[1:-1]
command = command[1:-1]
# Use bash's pipefail to return exit codes inside a pipe to prevent silent failure
command = f"bash -c 'set -o pipefail;{command}'"
logging.info(f"run command in {container.name}:")
logging.info(command)
logger.info(f"run command in {container.name}:")
logger.info(command)
result = container.exec_run(command)
if result.exit_code:
logging.error(
logger.error(
f"Failed to run command {command} in {container.name}: {result.output.decode()}")
else:
logging.info(result.output.decode())
logger.info(result.output.decode())
def backup_volumes(backup_paths, apps, dry_run=False):
try:
result = restic.backup(backup_paths, dry_run=dry_run, tags=apps)
print(result)
logging.info(result)
except ResticFailedError as error:
logging.error(f"Backup failed for {apps}. Could not Backup these paths: {backup_paths}")
logging.error(error)
exit(1)
def backup_volumes(backup_paths, apps, retries, dry_run=False):
while True:
try:
logger.info("Backup these paths:")
logger.debug("\n".join(map(str, backup_paths)))
backup_paths = list(filter(path_exists, backup_paths))
cmd = restic.cat.base_command()
parent = get_snapshots('latest')
if parent:
# https://restic.readthedocs.io/en/stable/040_backup.html#file-change-detection
cmd.extend(['--parent', parent[0]['short_id']])
tags = set(apps + [SERVICE])
logger.info("Start volume backup")
result = restic.internal.backup.run(cmd, backup_paths, dry_run=dry_run, tags=tags)
logger.summary("backup finished", extra=result)
return
except ResticFailedError as error:
logger.error(f"Backup failed for {SERVICE}.")
logger.error(error, exc_info=True)
if retries > 0:
retries -= 1
else:
exit(1)
@cli.command()
@click.option('snapshot', '--snapshot', '-s', envvar='SNAPSHOT', default='latest')
@click.option('target', '--target', '-t', envvar='TARGET', default='/')
@click.option('noninteractive', '--noninteractive', envvar='NONINTERACTIVE', default=False)
def restore(snapshot, target, noninteractive):
# Todo: recommend to shutdown the container
service_paths = VOLUME_PATH
if SERVICE:
service_paths = service_paths + f'{SERVICE}_*'
snapshots = restic.snapshots(snapshot_id=snapshot)
if not snapshot:
logging.error("No Snapshots with ID {snapshots}")
exit(1)
if not noninteractive:
snapshot_date = datetime.fromisoformat(snapshots[0]['time'])
delta = datetime.now(tz=timezone.utc) - snapshot_date
print(
f"You are going to restore Snapshot {snapshot} of {service_paths} at {target}")
print(f"This snapshot is {delta} old")
print(
f"THIS COMMAND WILL IRREVERSIBLY OVERWRITES {target}{service_paths.removeprefix('/')}")
prompt = input("Type YES (uppercase) to continue: ")
if prompt != 'YES':
logging.error("Restore aborted")
exit(1)
print(f"Restoring Snapshot {snapshot} of {service_paths} at {target}")
result = restic.restore(snapshot_id=snapshot,
include=service_paths, target_dir=target)
logging.debug(result)
def path_exists(path):
if not path.exists():
logger.error(f'{path} does not exist')
return path.exists()
@cli.command()
def snapshots():
snapshots = restic.snapshots()
no_snapshots = True
snapshots = get_snapshots()
for snap in snapshots:
if not SERVICE or (tags := snap.get('tags')) and SERVICE in tags:
print(snap['time'], snap['id'])
no_snapshots = False
if no_snapshots:
print(snap['time'], snap['id'])
if not snapshots:
err_msg = "No Snapshots found"
if SERVICE:
err_msg += f' for app {SERVICE}'
logging.warning(err_msg)
if SERVICE != 'ALL':
service_name = SERVICE.replace('_', '.')
err_msg += f' for app {service_name}'
logger.warning(err_msg)
@cli.command()
@ -221,8 +395,7 @@ def ls(snapshot, path):
def list_files(snapshot, path):
cmd = restic.cat.base_command() + ['ls']
if SERVICE:
cmd = cmd + ['--tag', SERVICE]
cmd = cmd + ['--tag', SERVICE]
cmd.append(snapshot)
if path:
cmd.append(path)
@ -230,10 +403,10 @@ def list_files(snapshot, path):
output = restic.internal.command_executor.execute(cmd)
except ResticFailedError as error:
if 'no snapshot found' in str(error):
err_msg = f'There is no snapshot {snapshot}'
if SERVICE:
err_msg += f'for the app {SERVICE}'
logging.error(err_msg)
err_msg = f'There is no snapshot "{snapshot}"'
if SERVICE != 'ALL':
err_msg += f' for the app "{SERVICE}"'
logger.error(err_msg)
exit(1)
else:
raise error
@ -245,8 +418,8 @@ def list_files(snapshot, path):
@cli.command()
@click.option('snapshot', '--snapshot', '-s', envvar='SNAPSHOT', default='latest')
@click.option('path', '--path', '-p', envvar='INCLUDE_PATH')
@click.option('volumes', '--volumes', '-v', is_flag=True)
@click.option('secrets', '--secrets', '-c', is_flag=True)
@click.option('volumes', '--volumes', '-v', envvar='VOLUMES')
@click.option('secrets', '--secrets', '-c', is_flag=True, envvar='SECRETS')
def download(snapshot, path, volumes, secrets):
file_dumps = []
if not any([path, volumes, secrets]):
@ -263,8 +436,8 @@ def download(snapshot, path, volumes, secrets):
tarinfo.size = len(binary_output)
file_dumps.append((binary_output, tarinfo))
if volumes:
if not SERVICE:
logging.error("Please specify '--host' when using '--volumes'")
if SERVICE == 'ALL':
logger.error("Please specify '--host' when using '--volumes'")
exit(1)
files = list_files(snapshot, VOLUME_PATH)
for f in files[1:]:
@ -276,8 +449,8 @@ def download(snapshot, path, volumes, secrets):
tarinfo.size = len(binary_output)
file_dumps.append((binary_output, tarinfo))
if secrets:
if not SERVICE:
logging.error("Please specify '--host' when using '--secrets'")
if SERVICE == 'ALL':
logger.error("Please specify '--host' when using '--secrets'")
exit(1)
filename = f"{SERVICE}.json"
files = list_files(snapshot, SECRET_PATH)
@ -297,7 +470,8 @@ def download(snapshot, path, volumes, secrets):
for binary_output, tarinfo in file_dumps:
tar.addfile(tarinfo, fileobj=io.BytesIO(binary_output))
size = get_formatted_size('/tmp/backup.tar.gz')
print(f"Backup has been written to /tmp/backup.tar.gz with a size of {size}")
print(
f"Backup has been written to /tmp/backup.tar.gz with a size of {size}")
def get_formatted_size(file_path):
@ -312,13 +486,12 @@ def get_formatted_size(file_path):
def dump(snapshot, path):
cmd = restic.cat.base_command() + ['dump']
if SERVICE:
cmd = cmd + ['--tag', SERVICE]
cmd = cmd + ['--tag', SERVICE]
cmd = cmd + [snapshot, path]
print(f"Dumping {path} from snapshot '{snapshot}'")
output = subprocess.run(cmd, capture_output=True)
if output.returncode:
logging.error(
logger.error(
f"error while dumping {path} from snapshot '{snapshot}': {output.stderr}")
exit(1)
return output.stdout

View File

@ -2,7 +2,7 @@
version: "3.8"
services:
app:
image: docker:24.0.7-dind
image: git.coopcloud.tech/coop-cloud/backup-bot-two:2.1.1-beta
volumes:
- "/var/run/docker.sock:/var/run/docker.sock"
- "/var/lib/docker/volumes/:/var/lib/docker/volumes/"
@ -16,17 +16,10 @@ services:
- restic_password
deploy:
labels:
- coop-cloud.${STACK_NAME}.version=0.1.0+latest
- coop-cloud.${STACK_NAME}.version=2.0.1+2.1.1-beta
- coop-cloud.${STACK_NAME}.timeout=${TIMEOUT:-300}
- coop-cloud.backupbot.enabled=true
configs:
- source: entrypoint
target: /entrypoint.sh
mode: 0555
- source: backupbot
target: /usr/bin/backup
mode: 0555
entrypoint: ['/entrypoint.sh']
#entrypoint: ['tail', '-f','/dev/null']
healthcheck:
test: "pgrep crond"
interval: 30s
@ -41,11 +34,3 @@ secrets:
volumes:
backups:
configs:
entrypoint:
name: ${STACK_NAME}_entrypoint_${ENTRYPOINT_VERSION}
file: entrypoint.sh
backupbot:
name: ${STACK_NAME}_backupbot_${BACKUPBOT_VERSION}
file: backupbot.py

24
entrypoint.sh Normal file → Executable file
View File

@ -1,11 +1,6 @@
#!/bin/sh
set -e -o pipefail
apk add --upgrade --no-cache restic bash python3 py3-pip
# Todo use requirements file with specific versions
pip install click==8.1.7 docker==6.1.3 resticpy==1.0.2
set -e
if [ -n "$SSH_HOST_KEY" ]
then
@ -14,7 +9,22 @@ fi
cron_schedule="${CRON_SCHEDULE:?CRON_SCHEDULE not set}"
echo "$cron_schedule backup create" | crontab -
if [ -n "$PUSH_URL_START" ]
then
push_start_notification="curl -s '$PUSH_URL_START' &&"
fi
if [ -n "$PUSH_URL_FAIL" ]
then
push_fail_notification="|| curl -s '$PUSH_URL_FAIL'"
fi
if [ -n "$PUSH_URL_SUCCESS" ]
then
push_notification=" && (grep -q 'backup finished' /tmp/backup.log && curl -s '$PUSH_URL_SUCCESS' $push_fail_notification)"
fi
echo "$cron_schedule $push_start_notification backup --machine-logs create 2>&1 | tee /tmp/backup.log $push_notification" | crontab -
crontab -l
crond -f -d8 -L /dev/stdout

1
release/1.0.0+2.0.0-beta Normal file
View File

@ -0,0 +1 @@
This is the first beta release of the new backup-bot-two rewrite in python. Be aware when updating, it can break. Please read the readme and update your config according to it.