Compare commits

..

1 Commits

Author SHA1 Message Date
a9128e8450 first steps dockerized 2021-11-08 16:58:25 +01:00
24 changed files with 119 additions and 1246 deletions

View File

@ -1,17 +0,0 @@
---
kind: pipeline
name: linters
steps:
- name: publish image
image: plugins/docker
settings:
username: 3wordchant
password:
from_secret: git_coopcloud_tech_token_3wc
repo: git.coopcloud.tech/coop-cloud/backup-bot-two
tags: ${DRONE_SEMVER_BUILD}
registry: git.coopcloud.tech
when:
event:
include:
- tag

View File

@ -1,40 +0,0 @@
TYPE=backup-bot-two
SECRET_RESTIC_PASSWORD_VERSION=v1
COMPOSE_FILE=compose.yml
RESTIC_REPOSITORY=/backups/restic
CRON_SCHEDULE='30 3 * * *'
# Push Notifiactions
#PUSH_PROMETHEUS_URL=https://pushgateway.example.com/metrics/job/backup
# or
#PUSH_URL_START=https://status.example.com/api/push/xxxxxxxxxx?status=up&msg=start
#PUSH_URL_SUCCESS=https://status.example.com/api/push/xxxxxxxxxx?status=up&msg=OK
#PUSH_URL_FAIL=https://status.example.com/api/push/xxxxxxxxxx?status=down&msg=fail
# Push Basic Auth
#COMPOSE_FILE="$COMPOSE_FILE:compose.pushbasicauth.yml"
#SECRET_PUSH_BASICAUTH=v1
# swarm-cronjob, instead of built-in cron
#COMPOSE_FILE="$COMPOSE_FILE:compose.swarm-cronjob.yml"
# SSH storage
#SECRET_SSH_KEY_VERSION=v1
#SSH_HOST_KEY="hostname ssh-rsa AAAAB3...
#COMPOSE_FILE="$COMPOSE_FILE:compose.ssh.yml"
# S3 storage
#SECRET_AWS_SECRET_ACCESS_KEY_VERSION=v1
#AWS_ACCESS_KEY_ID=something-secret
#COMPOSE_FILE="$COMPOSE_FILE:compose.s3.yml"
# Secret restic repository
# use a secret to store the RESTIC_REPOSITORY if the repository location contains a secret value
# i.E rest:https://user:SECRET_PASSWORD@host:8000/
# it overwrites the RESTIC_REPOSITORY variable
#SECRET_RESTIC_REPO_VERSION=v1
#COMPOSE_FILE="$COMPOSE_FILE:compose.secret.yml"

View File

@ -1,17 +0,0 @@
export RESTIC_HOST="user@domain.tld"
export RESTIC_PASSWORD_FILE=/run/secrets/restic-password
export BACKUP_DEST=/backups
export SERVER_NAME=domain.tld
export DOCKER_CONTEXT=$SERVER_NAME
# uncomment either this:
#export SSH_KEY_FILE=~/.ssh/id_rsa
# or this:
#export AWS_SECRET_ACCESS_KEY_FILE=s3
#export AWS_ACCESS_KEY_ID=easter-october-emphatic-tug-urgent-customer
# or this:
#export HTTPS_PASSWORD_FILE=/run/secrets/https_password
# optionally limit subset of services for testing
#export SERVICES_OVERRIDE="ghost_domain_tld_app ghost_domain_tld_db"

1
.gitignore vendored
View File

@ -1 +0,0 @@
.venv

View File

@ -1,6 +0,0 @@
# Change log
## 2.0.0 (unreleased)
- Rewrite from Bash to Python
- Add support for push notifications (#24)

View File

@ -1,11 +1,14 @@
FROM docker:24.0.7-dind
FROM alpine:3
RUN apk add --upgrade --no-cache restic bash python3 py3-pip py3-click py3-docker-py py3-json-logger curl
ENV RESTIC_PASSWORD_FILE=
ENV RESTIC_REPOSITORY=
ENV AWS_ACCESS_KEY_ID=
ENV AWS_SECRET_ACCESS_KEY=
# Todo use requirements file with specific versions
RUN pip install --break-system-packages resticpy==1.0.2
RUN apk add --update --no-cache docker-cli bash jq restic
COPY backupbot.py /usr/bin/backup
COPY entrypoint.sh /entrypoint.sh
RUN echo "* * * * * /backup.sh" | crontab -
RUN crontab -l
COPY backup.sh /
ENTRYPOINT /entrypoint.sh
ENTRYPOINT ["crond", "-f", "-L", "/dev/stdout"]

301
README.md
View File

@ -1,286 +1,45 @@
# Backupbot II
# Backupbot II: This Time It's Easily Configurable
[![Build Status](https://build.coopcloud.tech/api/badges/coop-cloud/backup-bot-two/status.svg)](https://build.coopcloud.tech/coop-cloud/backup-bot-two)
Automatically backup files from running Docker Swarm services based on labels.
_This Time, It's Easily Configurable_
## TODO
Automatically take backups from all volumes of running Docker Swarm services and runs pre- and post commands.
- [ ] Make a Docker image of this
- [ ] Rip out or improve Restic stuff
- [ ] Add secret handling for database backups
- [ ] Continuous linting with shellcheck
<!-- metadata -->
## Label format
* **Category**: Utilities
* **Status**: 0, work-in-progress
* **Image**: [`git.coopcloud.tech/coop-cloud/backup-bot-two`](https://git.coopcloud.tech/coop-cloud/-/packages/container/backup-bot-two), 4, upstream
* **Healthcheck**: No
* **Backups**: N/A
* **Email**: N/A
* **Tests**: No
* **SSO**: N/A
(Haven't done secrets yet, here are two options)
<!-- endmetadata -->
## Background
There are lots of Docker volume backup systems; all of them have one or both of these limitations:
- You need to define all the volumes to back up in the configuration system
- Backups require services to be stopped to take consistent copies
Backupbot II tries to help, by
1. **letting you define backups using Docker labels**, so you can **easily collect your backups for use with another system** like docker-volume-backup.
2. **running pre- and post-commands** before and after backups, for example to use database tools to take a backup from a running service.
## Deployment
### With Co-op Cloud
* `abra app new backup-bot-two`
* `abra app config <app-name>`
- set storage options. Either configure `CRON_SCHEDULE`, or set up `swarm-cronjob`
* `abra app secret generate -a <backupbot_name>`
* `abra app deploy <app-name>`
## Configuration
Per default Backupbot stores the backups locally in the repository `/backups/restic`, which is accessible as volume at `/var/lib/docker/volumes/<backupbot_name>_backups/_data/restic/`
The backup location can be changed using the `RESTIC_REPOSITORY` env variable.
### S3 Storage
To use S3 storage as backup location set the following envs:
v1:
```
RESTIC_REPOSITORY=s3:<S3-SERVICE-URL>/<BUCKET-NAME>
SECRET_AWS_SECRET_ACCESS_KEY_VERSION=v1
AWS_ACCESS_KEY_ID=<MY_ACCESS_KEY>
COMPOSE_FILE="$COMPOSE_FILE:compose.s3.yml"
```
and add your `<SECRET_ACCESS_KEY>` as docker secret:
`abra app secret insert <backupbot_name> aws_secret_access_key v1 <SECRET_ACCESS_KEY>`
See [restic s3 docs](https://restic.readthedocs.io/en/latest/030_preparing_a_new_repo.html#amazon-s3) for more information.
### SFTP Storage
> With sftp it is not possible to prevent the backupbot from deleting backups in case of a compromised machine. Therefore we recommend to use S3, REST or rclone server without delete permissions.
To use SFTP storage as backup location set the following envs:
```
RESTIC_REPOSITORY=sftp:user@host:/restic-repo-path
SECRET_SSH_KEY_VERSION=v1
SSH_HOST_KEY="hostname ssh-rsa AAAAB3...
COMPOSE_FILE="$COMPOSE_FILE:compose.ssh.yml"
```
To get the `SSH_HOST_KEY` run the following command `ssh-keyscan <hostname>`
Generate an ssh keypair: `ssh-keygen -t ed25519 -f backupkey -P ''`
Add the key to your `authorized_keys`:
`ssh-copy-id -i backupkey <user>@<hostname>`
Add your `SSH_KEY` as docker secret:
```
abra app secret insert <backupbot_name> ssh_key v1 """$(cat backupkey)
"""
```
> Attention: This command needs to be executed exactly as stated above, because it places a trailing newline at the end, if this is missing you will get the following error: `Load key "/run/secrets/ssh_key": error in libcrypto`
### Restic REST server Storage
You can simply set the `RESTIC_REPOSITORY` variable to your REST server URL `rest:http://host:8000/`.
If you access the REST server with a password `rest:https://user:pass@host:8000/` you should hide the whole URL containing the password inside a secret.
Uncomment these lines:
```
SECRET_RESTIC_REPO_VERSION=v1
COMPOSE_FILE="$COMPOSE_FILE:compose.secret.yml"
```
Add your REST server url as secret:
```
abra app secret insert <backupbot_name> restic_repo v1 "rest:https://user:pass@host:8000/"
```
The secret will overwrite the `RESTIC_REPOSITORY` variable.
See [restic REST docs](https://restic.readthedocs.io/en/latest/030_preparing_a_new_repo.html#rest-server) for more information.
## Push notifications
It is possible to configure three push events, that may trigger on the backup cronjob. Those can be used to detect failures from mointoring systems.
The events are:
- start
- success
- fail
### Using a Prometheus Push Gateway
[A prometheus push gateway](https://git.coopcloud.tech/coop-cloud/monitoring-ng#setup-push-gateway) can be used by setting the following env variables:
- `PUSH_PROMETHEUS_URL=pushgateway.example.com/metrics/job/backup`
### Using custom URLs
The following env variables can be used to setup push notifications for backups. `PUSH_URL_START` is requested just before the backups starts, `PUSH_URL_SUCCESS` is only requested if the backup was successful and if the backup fails `PUSH_URL_FAIL` will be requested.
Each variable is optional and independent of the other.
```
PUSH_URL_START=https://status.example.com/api/push/xxxxxxxxxx?status=up&msg=start
PUSH_URL_SUCCESS=https://status.example.com/api/push/xxxxxxxxxx?status=up&msg=OK
PUSH_URL_FAIL=https://status.example.com/api/push/xxxxxxxxxx?status=down&msg=fail
```
### Push endpoint behind basic auth
Insert the basic auth secret
`abra app secret insert <backupbot_name> push_basicauth v1 "user:password"`
Enable basic auth in the env file, by uncommenting the following line:
```
#COMPOSE_FILE="$COMPOSE_FILE:compose.pushbasicauth.yml"
#SECRET_PUSH_BASICAUTH=v1
```
## Usage
Run the cronjob that creates a backup, including the push notifications and docker logging:
`abra app cmd <backupbot_name> app run_cron`
Create a backup of all apps:
`abra app run <backupbot_name> app -- backup create`
> The apps to backup up need to be deployed
Create an individual backup:
`abra app run <backupbot_name> app -- backup --host <target_app_name> create`
Create a backup to a local repository:
`abra app run <backupbot_name> app -- backup create -r /backups/restic`
> It is recommended to shutdown/undeploy an app before restoring the data
Restore the latest snapshot of all including apps:
`abra app run <backupbot_name> app -- backup restore`
Restore a specific snapshot of an individual app:
`abra app run <backupbot_name> app -- backup --host <target_app_name> restore --snapshot <snapshot_id>`
Show all snapshots:
`abra app run <backupbot_name> app -- backup snapshots`
Show all snapshots containing a specific app:
`abra app run <backupbot_name> app -- backup --host <target_app_name> snapshots`
Show all files inside the latest snapshot (can be very verbose):
`abra app run <backupbot_name> app -- backup ls`
Show specific files inside a selected snapshot:
`abra app run <backupbot_name> app -- backup ls --snapshot <snapshot_id> /var/lib/docker/volumes/`
Download files from a snapshot:
```
filename=$(abra app run <backupbot_name> app -- backup download --snapshot <snapshot_id> --path <absolute_path>)
abra app cp <backupbot_name> app:$filename .
```
## Run restic
```
abra app run <backupbot_name> app bash
export AWS_SECRET_ACCESS_KEY=$(cat $AWS_SECRET_ACCESS_KEY_FILE)
export RESTIC_PASSWORD=$(cat $RESTIC_PASSWORD_FILE)
restic snapshots
```
## Recipe Configuration
Like Traefik, or `swarm-cronjob`, Backupbot II uses access to the Docker socket to read labels from running Docker Swarm services:
1. Add `ENABLE_BACKUPS=true` to .env.sample
2. Add backupbot labels to the compose file
```
services:
db:
deploy:
labels:
backupbot.backup: "${ENABLE_BACKUPS:-true}"
backupbot.backup.pre-hook: "/pg_backup.sh backup"
backupbot.backup.volumes.db.path: "backup.sql"
backupbot.restore.post-hook: '/pg_backup.sh restore'
backupbot.backup.volumes.redis: "false"
backupbot.backup: "true"
backupbot.backup.repos: "$some_thing"
backupbot.backup.at: "* * * * *"
backupbot.backup.pre-hook: 'mysqldump -u root -p"$(cat /run/secrets/db_root_password)" -f /tmp/dump/dump.db'
backupbot.backup.post-hook: "rm -rf /tmp/dump/dump.db"
backupbot.backup.path: "/tmp/dump/"
```
v2:
```
deploy:
labels:
backupbot.backup: "true"
backupbot.backup.repos: "$some_thing"
backupbot.backup.at: "* * * * *"
backupbot.backup.post-hook: "rm -rf /tmp/dump/dump.db"
backupbot.backup.secrets": "db_root_password",
backupbot.backup.pre-hook: 'mysqldump -u root -p"$DB_ROOT_PASSWORD" -f /tmp/dump/dump.db'
```
- `backupbot.backup` -- set to `true` to back up this service (REQUIRED)
- this is the only required backup label, per default it will backup all volumes
- `backupbot.backup.volumes.<volume_name>.path` -- only backup the listed relative paths from `<volume_name>`
- `backupbot.backup.volumes.<volume_name>: false` -- exclude <volume_name> from the backup
- `backupbot.backup.pre-hook` -- command to run before copying files
- i.e. save all database dumps into the volumes
- `backupbot.backup.post-hook` -- command to run after copying files
- `backupbot.restore.pre-hook` -- command to run before restoring files
- `backupbot.restore.post-hook` -- command to run after restoring files
- i.e. read all database dumps from the volumes
## Questions:
3. (Optional) add backup/restore scripts to the compose file
- Should frequency be configurable per service, centrally, or both?
```
services:
db:
configs:
- source: pg_backup
target: /pg_backup.sh
mode: 0555
configs:
pg_backup:
name: ${STACK_NAME}_pg_backup_${PG_BACKUP_VERSION}
file: pg_backup.sh
- "backupbot.backup.at: "* * * * *"
```
Version the config file in `abra.sh`:
```
export PG_BACKUP_VERSION=v1
```
As in the above example, you can reference Docker Secrets, e.g. for looking up database passwords, by reading the files in `/run/secrets` directly.
[abra]: https://git.autonomic.zone/autonomic-cooperative/abra
## Backupbot Development
1. Copy modified backupbot.py into the container:
```
cp backupbot.py /tmp/backupbot.py; git stash; abra app cp <backupbot_name> /tmp/backupbot.py app:/usr/bin/backupbot.py; git checkout main; git stash pop
```
2. Testing stuff with the python interpreter inside the container:
```
abra app run <backupbot_name> app bash
cd /usr/bin/
python
from backupbot import *
```
### Versioning
- App version: changes to `backup.py` (build a new image)
- Co-op Cloud package version: changes to recipe.
For example, starting with 1.0.0+2.0.0:
"patch" change to recipe: 1.0.1+2.0.0
"patch" change to backup.py: increment both, so 1.1.0+2.0.1
because bumping the image version would result in a minor recipe release
https://git.coopcloud.tech/coop-cloud/backup-bot-two/issues/4

11
abra.sh
View File

@ -1,11 +0,0 @@
export SSH_CONFIG_VERSION=v1
export ENTRYPOINT_VERSION=v17
export CRONJOB_VERSION=v2
run_cron () {
schedule="$(crontab -l | tr -s " " | cut -d ' ' -f-5)"
rm -f /tmp/backup.log
echo "* * * * * $(crontab -l | tr -s " " | cut -d ' ' -f6-)" | crontab -
while [ ! -f /tmp/backup.log ]; do sleep 1; done
echo "$schedule $(crontab -l | tr -s " " | cut -d ' ' -f6-)" | crontab -
}

59
backup.sh Executable file
View File

@ -0,0 +1,59 @@
#!/bin/bash
### FIXME: just for testing
echo $RESTIC_PASSWORD_FILE
echo $RESTIC_REPOSITORY
echo $AWS_ACCESS_KEY_ID
echo $AWS_SECRET_ACCESS_KEY
export DOCKER_CONTEXT=default
mkdir /tmp/backups
backup_path=/tmp/backups
mapfile -t services < <(docker service ls --format '{{ .Name }}')
# FIXME: just for testing
services=("cloud_local_db")
for service in "${services[@]}"; do
echo "service: $service"
details=$(docker service inspect "$service" --format "{{ json .Spec.Labels }}")
if echo "$details" | jq -r '.["backupbot.backup"]' | grep -q 'true'; then
pre=$(echo "$details" | jq -r '.["backupbot.backup.pre-hook"]')
post=$(echo "$details" | jq -r '.["backupbot.backup.post-hook"]')
path=$(echo "$details" | jq -r '.["backupbot.backup.path"]')
if [ "$path" = "null" ]; then
echo "ERROR: missing 'path' for $service"
continue # or maybe exit?
fi
container=$(docker container ls -f "name=$service" --format '{{ .ID }}')
echo "backing up $service"
test -d "$backup_path/$service" || mkdir "$backup_path/$service"
if [ "$pre" != "null" ]; then
# run the precommand
# shellcheck disable=SC2086
docker exec "$container" $pre
fi
# run the backup
docker cp -a "$container:$path" "$backup_path/$service"
if [ "$post" != "null" ]; then
# run the postcommand
# shellcheck disable=SC2086
docker exec "$container" $post
fi
fi
# Check if restic repo exists
if [ -z "$(restic cat config)" ] 2>/dev/null; then
echo "initializing restic repo"
restic init "$backup_path"
fi
restic backup --tag coop-cloud "$backup_path" # --quiet
done

View File

@ -1,649 +0,0 @@
#!/usr/bin/python3
import os
import sys
import click
import json
import subprocess
import logging
import docker
import restic
import tarfile
import io
from pythonjsonlogger import jsonlogger
from datetime import datetime, timezone
from restic.errors import ResticFailedError
from pathlib import Path
from shutil import copyfile, rmtree
VOLUME_PATH = "/var/lib/docker/volumes/"
SECRET_PATH = "/secrets/"
SERVICE = "ALL"
logger = logging.getLogger("backupbot")
logging.addLevelName(55, "SUMMARY")
setattr(logging, "SUMMARY", 55)
setattr(
logger,
"summary",
lambda message, *args, **kwargs: logger.log(55, message, *args, **kwargs),
)
def handle_exception(exc_type, exc_value, exc_traceback):
if issubclass(exc_type, KeyboardInterrupt):
sys.__excepthook__(exc_type, exc_value, exc_traceback)
return
logger.critical("Uncaught exception", exc_info=(exc_type, exc_value, exc_traceback))
sys.excepthook = handle_exception
@click.group()
@click.option("-l", "--log", "loglevel")
@click.option(
"-m", "--machine-logs", "machine_logs", is_flag=True, envvar="MACHINE_LOGS"
)
@click.option("service", "--host", "-h", envvar="SERVICE")
@click.option("repository", "--repo", "-r", envvar="RESTIC_REPOSITORY")
def cli(loglevel, service, repository, machine_logs):
global SERVICE
if service:
SERVICE = service.replace(".", "_")
if repository:
os.environ["RESTIC_REPOSITORY"] = repository
if loglevel:
numeric_level = getattr(logging, loglevel.upper(), None)
if not isinstance(numeric_level, int):
raise ValueError("Invalid log level: %s" % loglevel)
logger.setLevel(numeric_level)
logHandler = logging.StreamHandler()
if machine_logs:
formatter = jsonlogger.JsonFormatter(
"%(levelname)s %(filename)s %(lineno)s %(process)d %(message)s",
rename_fields={"levelname": "message_type"},
)
logHandler.setFormatter(formatter)
logger.addHandler(logHandler)
export_secrets()
init_repo()
def init_repo():
if repo := os.environ.get("RESTIC_REPOSITORY_FILE"):
# RESTIC_REPOSITORY_FILE and RESTIC_REPOSITORY are mutually exclusive
del os.environ["RESTIC_REPOSITORY"]
else:
repo = os.environ["RESTIC_REPOSITORY"]
restic.repository = repo
logger.debug(f"set restic repository location: {repo}")
restic.password_file = "/var/run/secrets/restic_password"
try:
restic.cat.config()
except ResticFailedError as error:
if "unable to open config file" in str(error):
result = restic.init()
logger.info(f"Initialized restic repo: {result}")
else:
raise error
def export_secrets():
for env in os.environ:
if env.endswith("FILE") and not "COMPOSE_FILE" in env:
logger.debug(f"exported secret: {env}")
with open(os.environ[env]) as file:
secret = file.read()
os.environ[env.removesuffix("_FILE")] = secret
# logger.debug(f"Read secret value: {secret}")
@cli.command()
@click.option("retries", "--retries", "-r", envvar="RETRIES", default=1)
def create(retries):
app_settings = parse_backup_labels()
pre_commands, post_commands, backup_paths, apps_versions = get_backup_details(
app_settings
)
copy_secrets(apps_versions)
backup_paths.append(Path(SECRET_PATH))
run_commands(pre_commands)
backup_volumes(backup_paths, apps_versions, int(retries))
run_commands(post_commands)
@cli.command()
@click.option("snapshot_id", "--snapshot", "-s", envvar="SNAPSHOT", default="latest")
@click.option("target", "--target", "-t", envvar="TARGET", default="/")
@click.option(
"noninteractive", "--noninteractive", envvar="NONINTERACTIVE", is_flag=True
)
@click.option("volumes", "--volumes", "-v", envvar="VOLUMES", multiple=True)
@click.option("container", "--container", "-c", envvar="CONTAINER", multiple=True)
@click.option("no_commands", "--no-commands", envvar="NO_COMMANDS", is_flag=True)
def restore(snapshot_id, target, noninteractive, volumes, container, no_commands):
app_settings = parse_backup_labels("restore", container)
if SERVICE != "ALL":
if not app_settings.get(SERVICE):
logger.error(
f"The app {SERVICE} is not running, use the restore-path argument to restore paths of undeployed apps"
)
exit(1)
app_settings = {SERVICE: app_settings.get(SERVICE)}
pre_commands, post_commands, backup_paths, apps_versions = get_backup_details(
app_settings, volumes
)
snapshots = get_snapshots(snapshot_id)
if not snapshots:
logger.error(
f"No Snapshots with ID {snapshot_id} for {apps_versions.keys()} found."
)
exit(1)
snapshot = snapshots[0]
snapshot_id = snapshot["short_id"]
if not noninteractive:
print(f"Snapshot to restore: \t{snapshot_id}")
restore_app_versions = app_versions_from_tags(snapshot.get("tags"))
print("Apps:")
for app, version in apps_versions.items():
restore_version = restore_app_versions.get(app)
print(f"\t{app} \t {restore_version}")
if version != restore_version:
print(f"WARNING!!! The running app is deployed with version {version}")
print("The following volume paths will be restored:")
for p in backup_paths:
print(f"\t{p}")
if not no_commands:
print("The following commands will be executed:")
for container, cmd in list(pre_commands.items()) + list(
post_commands.items()
):
print(f"\t{container.labels['com.docker.swarm.service.name']}:\t{cmd}")
snapshot_date = datetime.fromisoformat(snapshot["time"])
delta = datetime.now(tz=timezone.utc) - snapshot_date
print(f"This snapshot is {delta} old")
print("\nTHIS COMMAND WILL IRREVERSIBLY OVERWRITES FILES")
prompt = input("Type YES (uppercase) to continue: ")
if prompt != "YES":
logger.error("Restore aborted")
exit(1)
print(f"Restoring Snapshot {snapshot_id} at {target}")
if not no_commands and pre_commands:
print(f"Run pre commands.")
run_commands(pre_commands)
if backup_paths:
result = restic_restore(
snapshot_id=snapshot_id, include=backup_paths, target_dir=target
)
logger.debug(result)
else:
print("No paths to restore.")
if not no_commands and post_commands:
print(f"Run post commands.")
run_commands(post_commands)
@cli.command()
@click.option("snapshot_id", "--snapshot", "-s", envvar="SNAPSHOT", default="latest")
@click.option("target", "--target", "-t", envvar="TARGET", default="/")
@click.option(
"noninteractive", "--noninteractive", envvar="NONINTERACTIVE", is_flag=True
)
@click.argument("paths", nargs=-1, required=True, envvar="INCLUDE_PATH")
def restore_path(snapshot_id, target, noninteractive, paths):
"""PATHS: list of paths to restore"""
snapshots = get_snapshots(snapshot_id)
if not snapshots:
logger.error(f"No Snapshots with ID {snapshot_id} for app {SERVICE} found.")
exit(1)
snapshot = snapshots[0]
snapshot_id = snapshot["short_id"]
if not noninteractive:
print(f"Snapshot to restore: \t{snapshot_id}")
restore_app_versions = app_versions_from_tags(snapshot.get("tags"))
print("Apps:")
for app, version in restore_app_versions.items():
if SERVICE == "ALL" or SERVICE == app:
print(f"\t{app} \t {version}")
print("The following paths will be restored:")
for p in paths:
print(f"\t{p}")
snapshot_date = datetime.fromisoformat(snapshot["time"])
delta = datetime.now(tz=timezone.utc) - snapshot_date
print(f"This snapshot is {delta} old")
print("\nTHIS COMMAND WILL IRREVERSIBLY OVERWRITES FILES")
prompt = input("Type YES (uppercase) to continue: ")
if prompt != "YES":
logger.error("Restore aborted")
exit(1)
print(f"Restoring Snapshot {snapshot_id} at {target}")
result = restic_restore(snapshot_id=snapshot_id, include=paths, target_dir=target)
logger.debug(result)
def restic_restore(snapshot_id, include=[], target_dir=None):
cmd = restic.cat.base_command() + ["restore", snapshot_id]
for path in include:
cmd.extend(["--include", path])
if target_dir:
cmd.extend(["--target", target_dir])
return restic.internal.command_executor.execute(cmd)
def get_snapshots(snapshot_id=None):
if snapshot_id and snapshot_id != "latest":
snapshots = restic.snapshots(snapshot_id=snapshot_id)
if not SERVICE in app_versions_from_tags(snapshots[0].get("tags")):
logger.error(f"Snapshot with ID {snapshot_id} does not contain {SERVICE}")
exit(1)
else:
snapshots = restic.snapshots()
snapshots = list(
filter(
lambda x: SERVICE in app_versions_from_tags(x.get("tags")), snapshots
)
)
if snapshot_id == "latest":
return snapshots[-1:]
else:
return snapshots
def app_versions_from_tags(tags):
if tags:
app_versions = map(lambda x: x.split(":"), tags)
return {i[0]: i[1] if len(i) > 1 else None for i in app_versions}
else:
return {}
def str2bool(value: str) -> bool:
return value.lower() in ("yes", "true", "t", "1")
def parse_backup_labels(hook_type="backup", selected_container=[]):
client = docker.from_env()
container_by_service = {
c.labels.get("com.docker.swarm.service.name"): c
for c in client.containers.list()
}
services = client.services.list()
app_settings = {}
for s in services:
specs = s.attrs["Spec"]
labels = specs["Labels"]
stack_name = labels["com.docker.stack.namespace"]
container_name = s.name.removeprefix(f"{stack_name}_")
version = labels.get(f"coop-cloud.{stack_name}.version")
settings = app_settings[stack_name] = app_settings.get(stack_name) or {}
if (backup := labels.get("backupbot.backup")) and str2bool(backup):
settings["enabled"] = True
if version:
settings["version"] = version
if selected_container and container_name not in selected_container:
logger.debug(f"Skipping {s.name} because it's not a selected container")
continue
if mounts := specs["TaskTemplate"]["ContainerSpec"].get("Mounts"):
volumes = parse_volumes(stack_name, mounts)
volumes.update(settings.get("volumes") or {})
settings["volumes"] = volumes
excluded_volumes, included_volume_paths = parse_excludes_includes(labels)
settings["excluded_volumes"] = excluded_volumes.union(
settings.get("excluded_volumes") or set()
)
settings["included_volume_paths"] = included_volume_paths.union(
settings.get("included_volume_paths") or set()
)
if container := container_by_service.get(s.name):
if command := labels.get(f"backupbot.{hook_type}.pre-hook"):
if not (pre_hooks := settings.get("pre_hooks")):
pre_hooks = settings["pre_hooks"] = {}
pre_hooks[container] = command
if command := labels.get(f"backupbot.{hook_type}.post-hook"):
if not (post_hooks := settings.get("post_hooks")):
post_hooks = settings["post_hooks"] = {}
post_hooks[container] = command
else:
logger.debug(f"Container {s.name} is not running.")
if labels.get(f"backupbot.{hook_type}.pre-hook") or labels.get(
f"backupbot.{hook_type}.post-hook"
):
logger.error(f"Container {s.name} contain hooks but it's not running")
return app_settings
def get_backup_details(app_settings, volumes=[]):
backup_paths = set()
backup_apps_versions = {}
pre_hooks = {}
post_hooks = {}
for app, settings in app_settings.items():
if settings.get("enabled"):
if SERVICE != "ALL" and SERVICE != app:
continue
backup_apps_versions[app] = settings.get("version")
add_backup_paths(backup_paths, settings, app, volumes)
if hooks := settings.get("pre_hooks"):
pre_hooks.update(hooks)
if hooks := settings.get("post_hooks"):
post_hooks.update(hooks)
return pre_hooks, post_hooks, list(backup_paths), backup_apps_versions
def add_backup_paths(backup_paths, settings, app, selected_volumes):
if volumes := settings.get("volumes"):
if includes := settings.get("included_volume_paths"):
included_volumes = list(zip(*includes))[0]
for volume, rel_paths in includes:
if not (volume_path := volumes.get(volume)):
logger.error(
f"Can not find volume with the name {volume} for {app}"
)
continue
if selected_volumes and volume not in selected_volumes:
logger.debug(
f"Skipping {volume}:{rel_paths} because the volume is not selected"
)
continue
for p in rel_paths:
absolute_path = Path(f"{volume_path}/{p}")
backup_paths.add(absolute_path)
else:
included_volumes = []
excluded_volumes = settings.get("excluded_volumes") or []
for name, path in volumes.items():
if selected_volumes and name not in selected_volumes:
logger.debug(
f"Skipping volume: {name} because the volume is not selected"
)
continue
if name in excluded_volumes:
logger.debug(f"Skipping volume: {name} because the volume is excluded")
continue
if name in included_volumes:
logger.debug(f"Skipping volume: {name} because a path is selected")
continue
backup_paths.add(path)
else:
logger.warning(f"{app} does not contain any volumes")
def parse_volumes(stack_name, mounts):
volumes = {}
for m in mounts:
if m["Type"] != "volume":
continue
relative_path = m["Source"]
name = relative_path.removeprefix(stack_name + "_")
absolute_path = Path(f"{VOLUME_PATH}{relative_path}/_data/")
volumes[name] = absolute_path
return volumes
def parse_excludes_includes(labels):
excluded_volumes = set()
included_volume_paths = set()
for label, value in labels.items():
if label.startswith("backupbot.backup.volumes."):
volume_name = label.removeprefix("backupbot.backup.volumes.").removesuffix(
".path"
)
if label.endswith("path"):
relative_paths = tuple(value.split(","))
included_volume_paths.add((volume_name, relative_paths))
elif not str2bool(value):
excluded_volumes.add(volume_name)
return excluded_volumes, included_volume_paths
def copy_secrets(apps):
# TODO: check if it is deployed
rmtree(SECRET_PATH, ignore_errors=True)
os.mkdir(SECRET_PATH)
client = docker.from_env()
container_by_service = {
c.labels.get("com.docker.swarm.service.name"): c
for c in client.containers.list()
}
services = client.services.list()
for s in services:
app_name = s.attrs["Spec"]["Labels"]["com.docker.stack.namespace"]
if app_name in apps and (
app_secs := s.attrs["Spec"]["TaskTemplate"]["ContainerSpec"].get("Secrets")
):
if not container_by_service.get(s.name):
logger.warning(
f"Container {s.name} is not running, secrets can not be copied."
)
continue
container_id = container_by_service[s.name].id
for sec in app_secs:
src = f"/var/lib/docker/containers/{container_id}/mounts/secrets/{sec['SecretID']}"
if not Path(src).exists():
logger.error(
f"For the secret {sec['SecretName']} the file {src} does not exist for {s.name}"
)
continue
dst = SECRET_PATH + sec["SecretName"]
logger.debug(f"Copy Secret {sec['SecretName']}")
copyfile(src, dst)
def run_commands(commands):
for container, command in commands.items():
if not command:
continue
# Remove bash/sh wrapping
command = (
command.removeprefix("bash -c").removeprefix("sh -c").removeprefix(" ")
)
# Remove quotes surrounding the command
if (
len(command) >= 2
and command[0] == command[-1]
and (command[0] == "'" or command[0] == '"')
):
command = command[1:-1]
# Use bash's pipefail to return exit codes inside a pipe to prevent silent failure
command = f"bash -c 'set -o pipefail;{command}'"
logger.info(f"run command in {container.name}:")
logger.info(command)
result = container.exec_run(command)
if result.exit_code:
logger.error(
f"Failed to run command {command} in {container.name}: {result.output.decode()}"
)
else:
logger.debug(result.output.decode())
def backup_volumes(backup_paths, apps_versions, retries, dry_run=False):
while True:
try:
logger.info("Backup these paths:")
logger.info("\n".join(map(str, backup_paths)))
backup_paths = list(filter(path_exists, backup_paths))
cmd = restic.cat.base_command()
parent = get_snapshots("latest")
if parent:
# https://restic.readthedocs.io/en/stable/040_backup.html#file-change-detection
cmd.extend(["--parent", parent[0]["short_id"]])
tags = [f"{app}:{version}" for app, version in apps_versions.items()]
if SERVICE == "ALL":
tags.append(SERVICE)
logger.info("Start volume backup")
result = restic.internal.backup.run(
cmd, backup_paths, dry_run=dry_run, tags=tags
)
logger.summary("backup finished", extra=result)
return
except ResticFailedError as error:
logger.error(f"Backup failed for {SERVICE}.")
logger.error(error, exc_info=True)
if retries > 0:
retries -= 1
else:
exit(1)
def path_exists(path):
if not path.exists():
logger.error(f"{path} does not exist")
return path.exists()
@cli.command()
def snapshots():
snapshots = get_snapshots()
for snap in snapshots:
output = [snap["time"].split(".")[0], snap["short_id"]]
if tags := snap.get("tags"):
app_versions = app_versions_from_tags(tags)
if version := app_versions.get(SERVICE):
output.append(version)
print(*output)
if not snapshots:
err_msg = "No Snapshots found"
if SERVICE != "ALL":
service_name = SERVICE.replace("_", ".")
err_msg += f" for app {service_name}"
logger.warning(err_msg)
@cli.command()
@click.option("snapshot", "--snapshot", "-s", envvar="SNAPSHOT", default="latest")
@click.option("show_all", "--all", "-a", envvar="SHOW_ALL", is_flag=True)
@click.option("timestamps", "--timestamps", "-t", envvar="TIMESTAMPS", is_flag=True)
@click.argument(
"path", required=False, default="/var/lib/docker/volumes/", envvar="INCLUDE_PATH"
)
def ls(snapshot, show_all, timestamps, path):
if snapshot == "latest":
latest_snapshot = get_snapshots("latest")
if not latest_snapshot:
logger.error(f"There is no latest snapshot for {SERVICE}")
exit(1)
snapshot = latest_snapshot[0]["short_id"]
if show_all:
path = None
results = list_files(snapshot, path)
for r in results:
if r.get("path"):
if timestamps:
print(f"{r['ctime']}\t{r['path']}")
else:
print(f"{r['path']}")
def list_files(snapshot, path):
cmd = restic.cat.base_command() + ["ls"]
cmd.append(snapshot)
if path:
cmd.append(path)
try:
output = restic.internal.command_executor.execute(cmd)
except ResticFailedError as error:
if "no snapshot found" in str(error):
err_msg = f'There is no snapshot "{snapshot}"'
if SERVICE != "ALL":
err_msg += f' for the app "{SERVICE}"'
logger.error(err_msg)
exit(1)
else:
raise error
output = output.replace("}\n{", "}|{")
results = list(map(json.loads, output.split("|")))
return results
@cli.command()
@click.option("snapshot", "--snapshot", "-s", envvar="SNAPSHOT", default="latest")
@click.option("path", "--path", "-p", envvar="INCLUDE_PATH")
@click.option("volumes", "--volumes", "-v", envvar="VOLUMES")
@click.option("secrets", "--secrets", "-c", is_flag=True, envvar="SECRETS")
def download(snapshot, path, volumes, secrets):
file_dumps = []
if snapshot == "latest":
latest_snapshot = get_snapshots("latest")
if not latest_snapshot:
logger.error(f"There is no latest snapshot for {SERVICE}")
exit(1)
snapshot = latest_snapshot[0]["short_id"]
if not any([path, volumes, secrets]):
volumes = secrets = True
if path:
path = path.removesuffix("/")
binary_output = dump(snapshot, path)
files = list_files(snapshot, path)
filetype = [f.get("type") for f in files if f.get("path") == path][0]
filename = Path(path).name
if filetype == "dir":
filename = filename + ".tar"
tarinfo = tarfile.TarInfo(name=filename)
tarinfo.size = len(binary_output)
file_dumps.append((binary_output, tarinfo))
if volumes:
if SERVICE == "ALL":
logger.error("Please specify '--host' when using '--volumes'")
exit(1)
files = list_files(snapshot, VOLUME_PATH)
for f in files[1:]:
path = f["path"]
if Path(path).name.startswith(SERVICE) and f["type"] == "dir":
binary_output = dump(snapshot, path)
filename = f"{Path(path).name}.tar"
tarinfo = tarfile.TarInfo(name=filename)
tarinfo.size = len(binary_output)
file_dumps.append((binary_output, tarinfo))
if secrets:
if SERVICE == "ALL":
logger.error("Please specify '--host' when using '--secrets'")
exit(1)
filename = f"{SERVICE}.json"
files = list_files(snapshot, SECRET_PATH)
secrets = {}
for f in files[1:]:
path = f["path"]
if Path(path).name.startswith(SERVICE) and f["type"] == "file":
secret = dump(snapshot, path).decode()
secret_name = path.removeprefix(f"{SECRET_PATH}{SERVICE}_")
secrets[secret_name] = secret
binary_output = json.dumps(secrets).encode()
tarinfo = tarfile.TarInfo(name=filename)
tarinfo.size = len(binary_output)
file_dumps.append((binary_output, tarinfo))
with tarfile.open("/tmp/backup.tar.gz", "w:gz") as tar:
print(f"Writing files to /tmp/backup.tar.gz...")
for binary_output, tarinfo in file_dumps:
tar.addfile(tarinfo, fileobj=io.BytesIO(binary_output))
size = get_formatted_size("/tmp/backup.tar.gz")
print(f"Backup has been written to /tmp/backup.tar.gz with a size of {size}")
def get_formatted_size(file_path):
file_size = os.path.getsize(file_path)
units = ["Bytes", "KB", "MB", "GB", "TB"]
for unit in units:
if file_size < 1024:
return f"{round(file_size, 3)} {unit}"
file_size /= 1024
return f"{round(file_size, 3)} {units[-1]}"
def dump(snapshot, path):
cmd = restic.cat.base_command() + ["dump"]
cmd = cmd + [snapshot, path]
print(f"Dumping {path} from snapshot '{snapshot}'")
output = subprocess.run(cmd, capture_output=True)
if output.returncode:
logger.error(
f"error while dumping {path} from snapshot '{snapshot}': {output.stderr}"
)
exit(1)
return output.stdout
if __name__ == "__main__":
cli()

View File

@ -1,11 +0,0 @@
---
version: "3.8"
services:
app:
secrets:
- push_basicauth
secrets:
push_basicauth:
external: true
name: ${STACK_NAME}_push_basicauth_${SECRET_PUSH_BASICAUTH}

View File

@ -1,14 +0,0 @@
---
version: "3.8"
services:
app:
environment:
- AWS_ACCESS_KEY_ID
- AWS_SECRET_ACCESS_KEY_FILE=/run/secrets/aws_secret_access_key
secrets:
- aws_secret_access_key
secrets:
aws_secret_access_key:
external: true
name: ${STACK_NAME}_aws_secret_access_key_${SECRET_AWS_SECRET_ACCESS_KEY_VERSION}

View File

@ -1,13 +0,0 @@
---
version: "3.8"
services:
app:
environment:
- RESTIC_REPOSITORY_FILE=/run/secrets/restic_repo
secrets:
- restic_repo
secrets:
restic_repo:
external: true
name: ${STACK_NAME}_restic_repo_${SECRET_RESTIC_REPO_VERSION}

View File

@ -1,23 +0,0 @@
---
version: "3.8"
services:
app:
environment:
- SSH_KEY_FILE=/run/secrets/ssh_key
- SSH_HOST_KEY
secrets:
- source: ssh_key
mode: 0400
configs:
- source: ssh_config
target: /root/.ssh/config
secrets:
ssh_key:
external: true
name: ${STACK_NAME}_ssh_key_${SECRET_SSH_KEY_VERSION}
configs:
ssh_config:
name: ${STACK_NAME}_ssh_config_${SSH_CONFIG_VERSION}
file: ssh_config

View File

@ -1,15 +0,0 @@
---
version: "3.8"
services:
app:
deploy:
mode: replicated
replicas: 0
labels:
- "swarm.cronjob.enable=true"
# Note(3wc): every 5m, testing
- "swarm.cronjob.schedule=*/5 * * * *"
# Note(3wc): blank label to be picked up by `abra recipe sync`
restart_policy:
condition: none
entrypoint: [ "/usr/bin/backup.sh" ]

View File

@ -1,51 +0,0 @@
---
version: "3.8"
services:
app:
image: git.coopcloud.tech/coop-cloud/backup-bot-two:2.3.0-beta
volumes:
- "/var/run/docker.sock:/var/run/docker.sock"
- "/var/lib/docker/volumes/:/var/lib/docker/volumes/"
- "/var/lib/docker/containers/:/var/lib/docker/containers/:ro"
- backups:/backups
environment:
- CRON_SCHEDULE
- RESTIC_REPOSITORY
- RESTIC_PASSWORD_FILE=/run/secrets/restic_password
secrets:
- restic_password
configs:
- source: entrypoint
target: /entrypoint.sh
mode: 666
- source: cronjob
target: /cronjob.sh
mode: 666
deploy:
labels:
- coop-cloud.${STACK_NAME}.version=2.3.0+2.3.0-beta
- coop-cloud.${STACK_NAME}.timeout=${TIMEOUT:-300}
- coop-cloud.backupbot.enabled=true
#entrypoint: ['tail', '-f','/dev/null']
healthcheck:
test: "pgrep crond"
interval: 30s
timeout: 10s
retries: 10
start_period: 5m
secrets:
restic_password:
external: true
name: ${STACK_NAME}_restic_password_${SECRET_RESTIC_PASSWORD_VERSION}
configs:
entrypoint:
name: ${STACK_NAME}_entrypoint_${ENTRYPOINT_VERSION}
file: entrypoint.sh
cronjob:
name: ${STACK_NAME}_cronjob_${CRONJOB_VERSION}
file: cronjob.sh
volumes:
backups:

View File

@ -1,40 +0,0 @@
#!/bin/sh
set -e
CURL_OPTS="-s"
# Check for basic auth
if [ -n "$(cat /run/secrets/push_basicauth)" ]
then
CURL_OPTS="$CURL_OPTS -u $(cat /run/secrets/push_basicauth)"
fi
if [ -n "$PUSH_PROMETHEUS_URL" ]
then
push_start_notification="(echo 'backup 1' | curl $CURL_OPTS --data-binary @- $PUSH_PROMETHEUS_URL)"
push_success_notification="(echo 'backup 0' | curl $CURL_OPTS --data-binary @- $PUSH_PROMETHEUS_URL)"
push_fail_notification="(echo 'backup -1' | curl $CURL_OPTS --data-binary @- $PUSH_PROMETHEUS_URL)"
else
if [ -n "$PUSH_URL_START" ]
then
push_start_notification="curl $CURL_OPTS '$PUSH_URL_START'"
fi
if [ -n "$PUSH_URL_FAIL" ]
then
push_fail_notification="curl $CURL_OPTS '$PUSH_URL_FAIL'"
fi
if [ -n "$PUSH_URL_SUCCESS" ]
then
push_success_notification="curl $CURL_OPTS '$PUSH_URL_SUCCESS'"
fi
fi
eval "$push_start_notification"
if [ "$(backup --machine-logs create 2>&1 | tee /tmp/backup.log && (grep -q 'backup finished' /tmp/backup.log))" ]
then
eval "$push_success_notification"
else
eval "$push_fail_notification"
fi

20
docker-compose.yml Normal file
View File

@ -0,0 +1,20 @@
version: "3.8"
services:
app:
build: .
environment:
RESTIC_PASSWORD_FILE: "/run/secrets/restic_password"
RESTIC_REPOSITORY: "s3:http://172.20.0.2:9000/backup" # https://restic.readthedocs.io/en/stable/030_preparing_a_new_repo.html
AWS_ACCESS_KEY_ID: "root"
AWS_SECRET_ACCESS_KEY: "foobar"
secrets:
- restic_password
volumes:
- /var/run/docker.sock:/var/run/docker.sock
secrets:
restic_password:
#external: true
#name: restic_password
file: restic_password

View File

@ -1,15 +0,0 @@
#!/bin/sh
set -e
if [ -n "$SSH_HOST_KEY" ]
then
echo "$SSH_HOST_KEY" > /root/.ssh/known_hosts
fi
cron_schedule="${CRON_SCHEDULE:?CRON_SCHEDULE not set}"
echo "$cron_schedule /cronjob.sh" | crontab -
crontab -l
crond -f -d8 -L /dev/stdout

View File

@ -1,34 +0,0 @@
#!/bin/bash
set -e
BACKUP_FILE='/var/lib/postgresql/data/backup.sql'
function backup {
export PGPASSWORD=$(cat $POSTGRES_PASSWORD_FILE)
pg_dump -U ${POSTGRES_USER} ${POSTGRES_DB} > $BACKUP_FILE
}
function restore {
cd /var/lib/postgresql/data/
restore_config(){
# Restore allowed connections
cat pg_hba.conf.bak > pg_hba.conf
su postgres -c 'pg_ctl reload'
}
# Don't allow any other connections than local
cp pg_hba.conf pg_hba.conf.bak
echo "local all all trust" > pg_hba.conf
su postgres -c 'pg_ctl reload'
trap restore_config EXIT INT TERM
# Recreate Database
psql -U ${POSTGRES_USER} -d postgres -c "DROP DATABASE ${POSTGRES_DB} WITH (FORCE);"
createdb -U ${POSTGRES_USER} ${POSTGRES_DB}
psql -U ${POSTGRES_USER} -d ${POSTGRES_DB} -1 -f $BACKUP_FILE
trap - EXIT INT TERM
restore_config
}
$@

View File

@ -1 +0,0 @@
This is the first beta release of the new backup-bot-two rewrite in python. Be aware when updating, it can break. Please read the readme and update your config according to it.

View File

@ -1,3 +0,0 @@
Breaking Change: the variables `SERVER_NAME` and `RESTIC_HOST` are merged into `RESTIC_REPOSITORY`. The format can be looked up here: https://restic.readthedocs.io/en/stable/030_preparing_a_new_repo.html
ssh/sftp: `sftp:user@host:/repo-path`
S3: `s3:https://s3.example.com/bucket_name`

View File

@ -1,3 +0,0 @@
{
"$schema": "https://docs.renovatebot.com/renovate-schema.json"
}

View File

@ -1,4 +0,0 @@
Host *
IdentityFile /run/secrets/ssh_key
ServerAliveInterval 60
ServerAliveCountMax 240