forked from coop-cloud/backup-bot-two
Compare commits
138 Commits
bb2-classi
...
main
Author | SHA1 | Date | |
---|---|---|---|
54e32ab422 | |||
4cda3c1018 | |||
f7f46d7b7b | |||
c1902b2dbc | |||
f40eb00435 | |||
3eea69ddee | |||
f1661c04e7 | |||
4b4371ed3f | |||
1214f59c79 | |||
8798e2feb5 | |||
119787ed39 | |||
141bedb069 | |||
14b55bbc79 | |||
ebcb0d42c5 | |||
dccc93ac6b | |||
826bec925f | |||
49dd989302 | |||
2f965a93dc | |||
4054d3417e | |||
f8cfcef029 | |||
4a49c4a7f0 | |||
79cdec6705 | |||
2bc9400807 | |||
9b141a5185 | |||
6ff2312090 | |||
8b66b80332 | |||
c9b04db7a0 | |||
333b7ec16d | |||
aeccd605ee | |||
f877186a57 | |||
9dc239b199 | |||
43548273fe | |||
5a0467dbdd | |||
3aefae61c0 | |||
ac7c5fb50d | |||
cc59087b8c | |||
eb7c35d4cd | |||
249772ec03 | |||
45af6e8b5e | |||
f7207cdf36 | |||
241fe3ce92 | |||
b8d61d01cd | |||
6ac781c7e6 | |||
197cabf564 | |||
fe35f1ede8 | |||
f254a365f2 | |||
0d15765673 | |||
e09e1b476c | |||
72688dc42c | |||
10e460ff2d | |||
f2d0b92fa3 | |||
cc049b858b | |||
b7bc8ed58f | |||
68e37f5c23 | |||
4d39d84733 | |||
e5b9bc0446 | |||
ec4c4509dc | |||
26162a9e38 | |||
bd581fd8d7 | |||
e77432e3ab | |||
001a654e37 | |||
c5574edc54 | |||
50e4d68717 | |||
c7830ceb6f | |||
b6f859efbb | |||
7f14698824 | |||
2a9a98172f | |||
282215cf9c | |||
ae7a14b6f1 | |||
8acdb20e5b | |||
5582744073 | |||
84d606fa80 | |||
7865907811 | |||
dc66c02e23 | |||
f730c70bfe | |||
faa7ae3dd1 | |||
79eeec428a | |||
4164760dc6 | |||
e644679b8b | |||
0c587ac926 | |||
65686cd891 | |||
ac055c932e | |||
64328c79b1 | |||
15275b2571 | |||
4befebba38 | |||
d2087a441e | |||
f4d96b0875 | |||
c73bbe8c0d | |||
ff2b5a25a2 | |||
e186813a49 | |||
37cb51674f | |||
2ea59b4230 | |||
354f964e7d | |||
2bb27aadc4 | |||
66e1c9617d | |||
79d19e7ac5 | |||
359140781e | |||
8750ec1813 | |||
8e76ad591e | |||
a3faa5d51f | |||
a3f27fa6ba | |||
fe5d846c5f | |||
79b7a01dda | |||
f8a8547b70 | |||
192b1f1d9c | |||
4c2304a962 | |||
69e7f07978 | |||
d25688f312 | |||
b3cbb8bb46 | |||
bb1237f9ad | |||
972a2c2314 | |||
4240318d20 | |||
c3f3d1a6fe | |||
ab6c06d423 | |||
9398e0d83d | |||
6fc62b5516 | |||
1f06af95eb | |||
15a552ef8b | |||
5d4def6143 | |||
ebc0ea5d84 | |||
488c59f667 | |||
825565451a | |||
6fa9440c76 | |||
33ce3c58aa | |||
06ad03c1d5 | |||
bd8398e7dd | |||
75a93c5456 | |||
d32337cf3a | |||
61ffb67686 | |||
a86ac15363 | |||
5fa8f821c1 | |||
203719c224 | |||
3009159c82 | |||
28334a4241 | |||
447a808849 | |||
42ae6a6b9b | |||
3261d67dca | |||
6355f3572f |
25
.drone.yml
25
.drone.yml
@ -2,27 +2,16 @@
|
||||
kind: pipeline
|
||||
name: linters
|
||||
steps:
|
||||
- name: run shellcheck
|
||||
image: koalaman/shellcheck-alpine
|
||||
commands:
|
||||
- shellcheck backup.sh
|
||||
|
||||
- name: publish image
|
||||
image: plugins/docker
|
||||
settings:
|
||||
auto_tag: true
|
||||
username: thecoopcloud
|
||||
username: 3wordchant
|
||||
password:
|
||||
from_secret: thecoopcloud_password
|
||||
repo: thecoopcloud/backup-bot-two
|
||||
tags: latest
|
||||
depends_on:
|
||||
- run shellcheck
|
||||
from_secret: git_coopcloud_tech_token_3wc
|
||||
repo: git.coopcloud.tech/coop-cloud/backup-bot-two
|
||||
tags: ${DRONE_SEMVER_BUILD}
|
||||
registry: git.coopcloud.tech
|
||||
when:
|
||||
event:
|
||||
exclude:
|
||||
- pull_request
|
||||
|
||||
trigger:
|
||||
branch:
|
||||
- main
|
||||
include:
|
||||
- tag
|
||||
|
27
.env.sample
27
.env.sample
@ -4,11 +4,20 @@ SECRET_RESTIC_PASSWORD_VERSION=v1
|
||||
|
||||
COMPOSE_FILE=compose.yml
|
||||
|
||||
SERVER_NAME=example.com
|
||||
RESTIC_HOST=minio.example.com
|
||||
RESTIC_REPOSITORY=/backups/restic
|
||||
|
||||
CRON_SCHEDULE='*/5 * * * *'
|
||||
REMOVE_BACKUP_VOLUME_AFTER_UPLOAD=1
|
||||
CRON_SCHEDULE='30 3 * * *'
|
||||
|
||||
# Push Notifiactions
|
||||
#PUSH_PROMETHEUS_URL=https://pushgateway.example.com/metrics/job/backup
|
||||
# or
|
||||
#PUSH_URL_START=https://status.example.com/api/push/xxxxxxxxxx?status=up&msg=start
|
||||
#PUSH_URL_SUCCESS=https://status.example.com/api/push/xxxxxxxxxx?status=up&msg=OK
|
||||
#PUSH_URL_FAIL=https://status.example.com/api/push/xxxxxxxxxx?status=down&msg=fail
|
||||
|
||||
# Push Basic Auth
|
||||
#COMPOSE_FILE="$COMPOSE_FILE:compose.pushbasicauth.yml"
|
||||
#SECRET_PUSH_BASICAUTH=v1
|
||||
|
||||
# swarm-cronjob, instead of built-in cron
|
||||
#COMPOSE_FILE="$COMPOSE_FILE:compose.swarm-cronjob.yml"
|
||||
@ -23,7 +32,9 @@ REMOVE_BACKUP_VOLUME_AFTER_UPLOAD=1
|
||||
#AWS_ACCESS_KEY_ID=something-secret
|
||||
#COMPOSE_FILE="$COMPOSE_FILE:compose.s3.yml"
|
||||
|
||||
# HTTPS storage
|
||||
#SECRET_HTTPS_PASSWORD_VERSION=v1
|
||||
#COMPOSE_FILE="$COMPOSE_FILE:compose.https.yml"
|
||||
#RESTIC_USER=<somebody>
|
||||
# Secret restic repository
|
||||
# use a secret to store the RESTIC_REPOSITORY if the repository location contains a secret value
|
||||
# i.E rest:https://user:SECRET_PASSWORD@host:8000/
|
||||
# it overwrites the RESTIC_REPOSITORY variable
|
||||
#SECRET_RESTIC_REPO_VERSION=v1
|
||||
#COMPOSE_FILE="$COMPOSE_FILE:compose.secret.yml"
|
||||
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -1 +1 @@
|
||||
/testing
|
||||
.venv
|
||||
|
6
CHANGELOG.md
Normal file
6
CHANGELOG.md
Normal file
@ -0,0 +1,6 @@
|
||||
# Change log
|
||||
|
||||
## 2.0.0 (unreleased)
|
||||
|
||||
- Rewrite from Bash to Python
|
||||
- Add support for push notifications (#24)
|
18
Dockerfile
18
Dockerfile
@ -1,13 +1,11 @@
|
||||
FROM docker:24.0.6-dind
|
||||
FROM docker:24.0.7-dind
|
||||
|
||||
RUN apk add --upgrade --no-cache \
|
||||
bash \
|
||||
curl \
|
||||
jq \
|
||||
restic
|
||||
RUN apk add --upgrade --no-cache restic bash python3 py3-pip py3-click py3-docker-py py3-json-logger curl
|
||||
|
||||
COPY backup.sh /usr/bin/backup.sh
|
||||
COPY setup-cron.sh /usr/bin/setup-cron.sh
|
||||
RUN chmod +x /usr/bin/backup.sh /usr/bin/setup-cron.sh
|
||||
# Todo use requirements file with specific versions
|
||||
RUN pip install --break-system-packages resticpy==1.0.2
|
||||
|
||||
ENTRYPOINT [ "/usr/bin/setup-cron.sh" ]
|
||||
COPY backupbot.py /usr/bin/backup
|
||||
COPY entrypoint.sh /entrypoint.sh
|
||||
|
||||
ENTRYPOINT /entrypoint.sh
|
||||
|
280
README.md
280
README.md
@ -4,7 +4,21 @@
|
||||
|
||||
_This Time, It's Easily Configurable_
|
||||
|
||||
Automatically take backups from running Docker Swarm services into a volume.
|
||||
Automatically take backups from all volumes of running Docker Swarm services and runs pre- and post commands.
|
||||
|
||||
<!-- metadata -->
|
||||
|
||||
* **Category**: Utilities
|
||||
* **Status**: 0, work-in-progress
|
||||
* **Image**: [`git.coopcloud.tech/coop-cloud/backup-bot-two`](https://git.coopcloud.tech/coop-cloud/-/packages/container/backup-bot-two), 4, upstream
|
||||
* **Healthcheck**: No
|
||||
* **Backups**: N/A
|
||||
* **Email**: N/A
|
||||
* **Tests**: No
|
||||
* **SSO**: N/A
|
||||
|
||||
<!-- endmetadata -->
|
||||
|
||||
|
||||
## Background
|
||||
|
||||
@ -20,53 +34,253 @@ Backupbot II tries to help, by
|
||||
|
||||
### With Co-op Cloud
|
||||
|
||||
1. Set up Docker Swarm and [`abra`][abra]
|
||||
2. `abra app new backup-bot-two`
|
||||
3. `abra app config <your-app-name>`, and set storage options. Either configure `CRON_SCHEDULE`, or set up `swarm-cronjob`
|
||||
4. `abra app secret generate <your-app-name> restic-password v1`, optionally with `--pass` before `<your-app-name>` to save the generated secret in `pass`.
|
||||
5. `abra app secret insert <your-app-name> ssh-key v1 ...` or similar, to load required secrets.
|
||||
4. `abra app deploy <your-app-name>`
|
||||
|
||||
<!-- metadata -->
|
||||
|
||||
* **Category**: Utilities
|
||||
* **Status**: 0, work-in-progress
|
||||
* **Image**: [`thecoopcloud/backup-bot-two`](https://hub.docker.com/r/thecoopcloud/backup-bot-two), 4, upstream
|
||||
* **Healthcheck**: No
|
||||
* **Backups**: N/A
|
||||
* **Email**: N/A
|
||||
* **Tests**: No
|
||||
* **SSO**: N/A
|
||||
|
||||
<!-- endmetadata -->
|
||||
* `abra app new backup-bot-two`
|
||||
* `abra app config <app-name>`
|
||||
- set storage options. Either configure `CRON_SCHEDULE`, or set up `swarm-cronjob`
|
||||
* `abra app secret generate -a <backupbot_name>`
|
||||
* `abra app deploy <app-name>`
|
||||
|
||||
## Configuration
|
||||
|
||||
Per default Backupbot stores the backups locally in the repository `/backups/restic`, which is accessible as volume at `/var/lib/docker/volumes/<backupbot_name>_backups/_data/restic/`
|
||||
|
||||
The backup location can be changed using the `RESTIC_REPOSITORY` env variable.
|
||||
|
||||
### S3 Storage
|
||||
|
||||
To use S3 storage as backup location set the following envs:
|
||||
```
|
||||
RESTIC_REPOSITORY=s3:<S3-SERVICE-URL>/<BUCKET-NAME>
|
||||
SECRET_AWS_SECRET_ACCESS_KEY_VERSION=v1
|
||||
AWS_ACCESS_KEY_ID=<MY_ACCESS_KEY>
|
||||
COMPOSE_FILE="$COMPOSE_FILE:compose.s3.yml"
|
||||
```
|
||||
and add your `<SECRET_ACCESS_KEY>` as docker secret:
|
||||
`abra app secret insert <backupbot_name> aws_secret_access_key v1 <SECRET_ACCESS_KEY>`
|
||||
|
||||
See [restic s3 docs](https://restic.readthedocs.io/en/latest/030_preparing_a_new_repo.html#amazon-s3) for more information.
|
||||
|
||||
### SFTP Storage
|
||||
|
||||
> With sftp it is not possible to prevent the backupbot from deleting backups in case of a compromised machine. Therefore we recommend to use S3, REST or rclone server without delete permissions.
|
||||
|
||||
To use SFTP storage as backup location set the following envs:
|
||||
```
|
||||
RESTIC_REPOSITORY=sftp:user@host:/restic-repo-path
|
||||
SECRET_SSH_KEY_VERSION=v1
|
||||
SSH_HOST_KEY="hostname ssh-rsa AAAAB3...
|
||||
COMPOSE_FILE="$COMPOSE_FILE:compose.ssh.yml"
|
||||
```
|
||||
To get the `SSH_HOST_KEY` run the following command `ssh-keyscan <hostname>`
|
||||
|
||||
Generate an ssh keypair: `ssh-keygen -t ed25519 -f backupkey -P ''`
|
||||
Add the key to your `authorized_keys`:
|
||||
`ssh-copy-id -i backupkey <user>@<hostname>`
|
||||
Add your `SSH_KEY` as docker secret:
|
||||
```
|
||||
abra app secret insert <backupbot_name> ssh_key v1 """$(cat backupkey)
|
||||
"""
|
||||
```
|
||||
> Attention: This command needs to be executed exactly as stated above, because it places a trailing newline at the end, if this is missing you will get the following error: `Load key "/run/secrets/ssh_key": error in libcrypto`
|
||||
|
||||
### Restic REST server Storage
|
||||
|
||||
You can simply set the `RESTIC_REPOSITORY` variable to your REST server URL `rest:http://host:8000/`.
|
||||
If you access the REST server with a password `rest:https://user:pass@host:8000/` you should hide the whole URL containing the password inside a secret.
|
||||
Uncomment these lines:
|
||||
```
|
||||
SECRET_RESTIC_REPO_VERSION=v1
|
||||
COMPOSE_FILE="$COMPOSE_FILE:compose.secret.yml"
|
||||
```
|
||||
Add your REST server url as secret:
|
||||
```
|
||||
abra app secret insert <backupbot_name> restic_repo v1 "rest:https://user:pass@host:8000/"
|
||||
```
|
||||
The secret will overwrite the `RESTIC_REPOSITORY` variable.
|
||||
|
||||
|
||||
See [restic REST docs](https://restic.readthedocs.io/en/latest/030_preparing_a_new_repo.html#rest-server) for more information.
|
||||
|
||||
## Push notifications
|
||||
|
||||
It is possible to configure three push events, that may trigger on the backup cronjob. Those can be used to detect failures from mointoring systems.
|
||||
The events are:
|
||||
- start
|
||||
- success
|
||||
- fail
|
||||
|
||||
### Using a Prometheus Push Gateway
|
||||
|
||||
[A prometheus push gateway](https://git.coopcloud.tech/coop-cloud/monitoring-ng#setup-push-gateway) can be used by setting the following env variables:
|
||||
- `PUSH_PROMETHEUS_URL=pushgateway.example.com/metrics/job/backup`
|
||||
|
||||
### Using custom URLs
|
||||
|
||||
The following env variables can be used to setup push notifications for backups. `PUSH_URL_START` is requested just before the backups starts, `PUSH_URL_SUCCESS` is only requested if the backup was successful and if the backup fails `PUSH_URL_FAIL` will be requested.
|
||||
Each variable is optional and independent of the other.
|
||||
|
||||
```
|
||||
PUSH_URL_START=https://status.example.com/api/push/xxxxxxxxxx?status=up&msg=start
|
||||
PUSH_URL_SUCCESS=https://status.example.com/api/push/xxxxxxxxxx?status=up&msg=OK
|
||||
PUSH_URL_FAIL=https://status.example.com/api/push/xxxxxxxxxx?status=down&msg=fail
|
||||
```
|
||||
|
||||
### Push endpoint behind basic auth
|
||||
|
||||
Insert the basic auth secret
|
||||
`abra app secret insert <backupbot_name> push_basicauth v1 "user:password"`
|
||||
|
||||
Enable basic auth in the env file, by uncommenting the following line:
|
||||
```
|
||||
#COMPOSE_FILE="$COMPOSE_FILE:compose.pushbasicauth.yml"
|
||||
#SECRET_PUSH_BASICAUTH=v1
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
Run the cronjob that creates a backup, including the push notifications and docker logging:
|
||||
`abra app cmd <backupbot_name> app run_cron`
|
||||
|
||||
Create a backup of all apps:
|
||||
|
||||
`abra app run <backupbot_name> app -- backup create`
|
||||
|
||||
> The apps to backup up need to be deployed
|
||||
|
||||
Create an individual backup:
|
||||
|
||||
`abra app run <backupbot_name> app -- backup --host <target_app_name> create`
|
||||
|
||||
Create a backup to a local repository:
|
||||
|
||||
`abra app run <backupbot_name> app -- backup create -r /backups/restic`
|
||||
|
||||
> It is recommended to shutdown/undeploy an app before restoring the data
|
||||
|
||||
Restore the latest snapshot of all including apps:
|
||||
|
||||
`abra app run <backupbot_name> app -- backup restore`
|
||||
|
||||
Restore a specific snapshot of an individual app:
|
||||
|
||||
`abra app run <backupbot_name> app -- backup --host <target_app_name> restore --snapshot <snapshot_id>`
|
||||
|
||||
Show all snapshots:
|
||||
|
||||
`abra app run <backupbot_name> app -- backup snapshots`
|
||||
|
||||
Show all snapshots containing a specific app:
|
||||
|
||||
`abra app run <backupbot_name> app -- backup --host <target_app_name> snapshots`
|
||||
|
||||
Show all files inside the latest snapshot (can be very verbose):
|
||||
|
||||
`abra app run <backupbot_name> app -- backup ls`
|
||||
|
||||
Show specific files inside a selected snapshot:
|
||||
|
||||
`abra app run <backupbot_name> app -- backup ls --snapshot <snapshot_id> /var/lib/docker/volumes/`
|
||||
|
||||
Download files from a snapshot:
|
||||
|
||||
```
|
||||
filename=$(abra app run <backupbot_name> app -- backup download --snapshot <snapshot_id> --path <absolute_path>)
|
||||
abra app cp <backupbot_name> app:$filename .
|
||||
```
|
||||
|
||||
## Run restic
|
||||
|
||||
```
|
||||
abra app run <backupbot_name> app bash
|
||||
export AWS_SECRET_ACCESS_KEY=$(cat $AWS_SECRET_ACCESS_KEY_FILE)
|
||||
export RESTIC_PASSWORD=$(cat $RESTIC_PASSWORD_FILE)
|
||||
restic snapshots
|
||||
```
|
||||
|
||||
## Recipe Configuration
|
||||
|
||||
Like Traefik, or `swarm-cronjob`, Backupbot II uses access to the Docker socket to read labels from running Docker Swarm services:
|
||||
|
||||
1. Add `ENABLE_BACKUPS=true` to .env.sample
|
||||
|
||||
2. Add backupbot labels to the compose file
|
||||
|
||||
```
|
||||
services:
|
||||
db:
|
||||
deploy:
|
||||
labels:
|
||||
backupbot.backup: "true"
|
||||
backupbot.backup.pre-hook: 'mysqldump -u root -p"$(cat /run/secrets/db_root_password)" -f /tmp/dump/dump.db'
|
||||
backupbot.backup.post-hook: "rm -rf /tmp/dump/dump.db"
|
||||
backupbot.backup.path: "/tmp/dump/,/etc/foo/"
|
||||
backupbot.backup: "${ENABLE_BACKUPS:-true}"
|
||||
backupbot.backup.pre-hook: "/pg_backup.sh backup"
|
||||
backupbot.backup.volumes.db.path: "backup.sql"
|
||||
backupbot.restore.post-hook: '/pg_backup.sh restore'
|
||||
backupbot.backup.volumes.redis: "false"
|
||||
```
|
||||
|
||||
- `backupbot.backup` -- set to `true` to back up this service (REQUIRED)
|
||||
- `backupbot.backup.path` -- comma separated list of file paths within the service to copy (REQUIRED)
|
||||
- `backupbot.backup.pre-hook` -- command to run before copying files (optional)
|
||||
- `backupbot.backup.post-hook` -- command to run after copying files (optional)
|
||||
- this is the only required backup label, per default it will backup all volumes
|
||||
- `backupbot.backup.volumes.<volume_name>.path` -- only backup the listed relative paths from `<volume_name>`
|
||||
- `backupbot.backup.volumes.<volume_name>: false` -- exclude <volume_name> from the backup
|
||||
- `backupbot.backup.pre-hook` -- command to run before copying files
|
||||
- i.e. save all database dumps into the volumes
|
||||
- `backupbot.backup.post-hook` -- command to run after copying files
|
||||
- `backupbot.restore.pre-hook` -- command to run before restoring files
|
||||
- `backupbot.restore.post-hook` -- command to run after restoring files
|
||||
- i.e. read all database dumps from the volumes
|
||||
|
||||
3. (Optional) add backup/restore scripts to the compose file
|
||||
|
||||
```
|
||||
services:
|
||||
db:
|
||||
configs:
|
||||
- source: pg_backup
|
||||
target: /pg_backup.sh
|
||||
mode: 0555
|
||||
|
||||
|
||||
configs:
|
||||
pg_backup:
|
||||
name: ${STACK_NAME}_pg_backup_${PG_BACKUP_VERSION}
|
||||
file: pg_backup.sh
|
||||
```
|
||||
|
||||
Version the config file in `abra.sh`:
|
||||
|
||||
```
|
||||
export PG_BACKUP_VERSION=v1
|
||||
```
|
||||
|
||||
As in the above example, you can reference Docker Secrets, e.g. for looking up database passwords, by reading the files in `/run/secrets` directly.
|
||||
|
||||
## Development
|
||||
|
||||
1. Install `direnv`
|
||||
2. `cp .envrc.sample .envrc`
|
||||
3. Edit `.envrc` as appropriate, including setting `DOCKER_CONTEXT` to a remote Docker context, if you're not running a swarm server locally.
|
||||
4. Run `./backup.sh` -- you can add the `--skip-backup` or `--skip-upload` options if you just want to test one other step
|
||||
|
||||
[abra]: https://git.autonomic.zone/autonomic-cooperative/abra
|
||||
|
||||
## Backupbot Development
|
||||
|
||||
1. Copy modified backupbot.py into the container:
|
||||
|
||||
```
|
||||
cp backupbot.py /tmp/backupbot.py; git stash; abra app cp <backupbot_name> /tmp/backupbot.py app:/usr/bin/backupbot.py; git checkout main; git stash pop
|
||||
```
|
||||
|
||||
2. Testing stuff with the python interpreter inside the container:
|
||||
|
||||
```
|
||||
abra app run <backupbot_name> app bash
|
||||
cd /usr/bin/
|
||||
python
|
||||
from backupbot import *
|
||||
```
|
||||
|
||||
### Versioning
|
||||
|
||||
- App version: changes to `backup.py` (build a new image)
|
||||
- Co-op Cloud package version: changes to recipe.
|
||||
|
||||
For example, starting with 1.0.0+2.0.0:
|
||||
"patch" change to recipe: 1.0.1+2.0.0
|
||||
"patch" change to backup.py: increment both, so 1.1.0+2.0.1
|
||||
because bumping the image version would result in a minor recipe release
|
||||
|
||||
https://git.coopcloud.tech/coop-cloud/backup-bot-two/issues/4
|
||||
|
11
abra.sh
Normal file
11
abra.sh
Normal file
@ -0,0 +1,11 @@
|
||||
export SSH_CONFIG_VERSION=v1
|
||||
export ENTRYPOINT_VERSION=v17
|
||||
export CRONJOB_VERSION=v2
|
||||
|
||||
run_cron () {
|
||||
schedule="$(crontab -l | tr -s " " | cut -d ' ' -f-5)"
|
||||
rm -f /tmp/backup.log
|
||||
echo "* * * * * $(crontab -l | tr -s " " | cut -d ' ' -f6-)" | crontab -
|
||||
while [ ! -f /tmp/backup.log ]; do sleep 1; done
|
||||
echo "$schedule $(crontab -l | tr -s " " | cut -d ' ' -f6-)" | crontab -
|
||||
}
|
139
backup.sh
139
backup.sh
@ -1,139 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
server_name="${SERVER_NAME:?SERVER_NAME not set}"
|
||||
|
||||
restic_password_file="${RESTIC_PASSWORD_FILE:?RESTIC_PASSWORD_FILE not set}"
|
||||
|
||||
restic_host="${RESTIC_HOST:?RESTIC_HOST not set}"
|
||||
|
||||
backup_path="${BACKUP_DEST:?BACKUP_DEST not set}"
|
||||
|
||||
# shellcheck disable=SC2153
|
||||
ssh_key_file="${SSH_KEY_FILE}"
|
||||
s3_key_file="${AWS_SECRET_ACCESS_KEY_FILE}"
|
||||
# shellcheck disable=SC2153
|
||||
https_password_file="${HTTPS_PASSWORD_FILE}"
|
||||
|
||||
restic_repo=
|
||||
restic_extra_options=
|
||||
|
||||
if [ -n "$ssh_key_file" ] && [ -f "$ssh_key_file" ]; then
|
||||
restic_repo="sftp:$restic_host:/$server_name"
|
||||
|
||||
# Only check server against provided SSH_HOST_KEY, if set
|
||||
if [ -n "$SSH_HOST_KEY" ]; then
|
||||
tmpfile=$(mktemp)
|
||||
echo "$SSH_HOST_KEY" >>"$tmpfile"
|
||||
echo "using host key $SSH_HOST_KEY"
|
||||
ssh_options="-o 'UserKnownHostsFile $tmpfile'"
|
||||
elif [ "$SSH_HOST_KEY_DISABLE" = "1" ]; then
|
||||
echo "disabling SSH host key checking"
|
||||
ssh_options="-o 'StrictHostKeyChecking=No'"
|
||||
else
|
||||
echo "neither SSH_HOST_KEY nor SSH_HOST_KEY_DISABLE set"
|
||||
fi
|
||||
restic_extra_options="sftp.command=ssh $ssh_options -i $ssh_key_file $restic_host -s sftp"
|
||||
fi
|
||||
|
||||
if [ -n "$s3_key_file" ] && [ -f "$s3_key_file" ] && [ -n "$AWS_ACCESS_KEY_ID" ]; then
|
||||
AWS_SECRET_ACCESS_KEY="$(cat "${s3_key_file}")"
|
||||
export AWS_SECRET_ACCESS_KEY
|
||||
restic_repo="s3:$restic_host:/$server_name"
|
||||
fi
|
||||
|
||||
if [ -n "$https_password_file" ] && [ -f "$https_password_file" ]; then
|
||||
HTTPS_PASSWORD="$(cat "${https_password_file}")"
|
||||
export HTTPS_PASSWORD
|
||||
restic_user="${RESTIC_USER:?RESTIC_USER not set}"
|
||||
restic_repo="rest:https://$restic_user:$HTTPS_PASSWORD@$restic_host"
|
||||
fi
|
||||
|
||||
if [ -z "$restic_repo" ]; then
|
||||
echo "you must configure either SFTP, S3, or HTTPS storage, see README"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "restic_repo: $restic_repo"
|
||||
|
||||
# Pre-bake-in some default restic options
|
||||
_restic() {
|
||||
if [ -z "$restic_extra_options" ]; then
|
||||
# shellcheck disable=SC2068
|
||||
restic -p "$restic_password_file" \
|
||||
--quiet -r "$restic_repo" \
|
||||
$@
|
||||
else
|
||||
# shellcheck disable=SC2068
|
||||
restic -p "$restic_password_file" \
|
||||
--quiet -r "$restic_repo" \
|
||||
-o "$restic_extra_options" \
|
||||
$@
|
||||
fi
|
||||
}
|
||||
|
||||
if [ -n "$SERVICES_OVERRIDE" ]; then
|
||||
# this is fine because docker service names should never include spaces or
|
||||
# glob characters
|
||||
# shellcheck disable=SC2206
|
||||
services=($SERVICES_OVERRIDE)
|
||||
else
|
||||
mapfile -t services < <(docker service ls --format '{{ .Name }}')
|
||||
fi
|
||||
|
||||
if [[ \ $*\ != *\ --skip-backup\ * ]]; then
|
||||
rm -rf "${backup_path}"
|
||||
|
||||
for service in "${services[@]}"; do
|
||||
echo "service: $service"
|
||||
details=$(docker service inspect "$service" --format "{{ json .Spec.Labels }}")
|
||||
if echo "$details" | jq -r '.["backupbot.backup"]' | grep -q 'true'; then
|
||||
pre=$(echo "$details" | jq -r '.["backupbot.backup.pre-hook"]')
|
||||
post=$(echo "$details" | jq -r '.["backupbot.backup.post-hook"]')
|
||||
path=$(echo "$details" | jq -r '.["backupbot.backup.path"]')
|
||||
|
||||
if [ "$path" = "null" ]; then
|
||||
echo "ERROR: missing 'path' for $service"
|
||||
continue # or maybe exit?
|
||||
fi
|
||||
|
||||
container=$(docker container ls -f "name=$service" --format '{{ .ID }}')
|
||||
|
||||
echo "backing up $service"
|
||||
|
||||
if [ "$pre" != "null" ]; then
|
||||
# run the precommand
|
||||
# shellcheck disable=SC2086
|
||||
docker exec "$container" sh -c "$pre"
|
||||
fi
|
||||
|
||||
# run the backup
|
||||
for p in ${path//,/ }; do
|
||||
# creates the parent folder, so `docker cp` has reliable behaviour no matter if $p ends with `/` or `/.`
|
||||
dir=$backup_path/$service/$(dirname "$p")
|
||||
test -d "$dir" || mkdir -p "$dir"
|
||||
docker cp -a "$container:$p" "$dir/$(basename "$p")"
|
||||
done
|
||||
|
||||
if [ "$post" != "null" ]; then
|
||||
# run the postcommand
|
||||
# shellcheck disable=SC2086
|
||||
docker exec "$container" sh -c "$post"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# check if restic repo exists, initialise if not
|
||||
if [ -z "$(_restic cat config)" ] 2>/dev/null; then
|
||||
echo "initializing restic repo"
|
||||
_restic init
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ \ $*\ != *\ --skip-upload\ * ]]; then
|
||||
_restic backup --host "$server_name" --tag coop-cloud "$backup_path"
|
||||
|
||||
if [ "$REMOVE_BACKUP_VOLUME_AFTER_UPLOAD" -eq 1 ]; then
|
||||
echo "Cleaning up ${backup_path}"
|
||||
rm -rf "${backup_path:?}"/*
|
||||
fi
|
||||
fi
|
649
backupbot.py
Executable file
649
backupbot.py
Executable file
@ -0,0 +1,649 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import click
|
||||
import json
|
||||
import subprocess
|
||||
import logging
|
||||
import docker
|
||||
import restic
|
||||
import tarfile
|
||||
import io
|
||||
from pythonjsonlogger import jsonlogger
|
||||
from datetime import datetime, timezone
|
||||
from restic.errors import ResticFailedError
|
||||
from pathlib import Path
|
||||
from shutil import copyfile, rmtree
|
||||
|
||||
VOLUME_PATH = "/var/lib/docker/volumes/"
|
||||
SECRET_PATH = "/secrets/"
|
||||
SERVICE = "ALL"
|
||||
|
||||
logger = logging.getLogger("backupbot")
|
||||
logging.addLevelName(55, "SUMMARY")
|
||||
setattr(logging, "SUMMARY", 55)
|
||||
setattr(
|
||||
logger,
|
||||
"summary",
|
||||
lambda message, *args, **kwargs: logger.log(55, message, *args, **kwargs),
|
||||
)
|
||||
|
||||
|
||||
def handle_exception(exc_type, exc_value, exc_traceback):
|
||||
if issubclass(exc_type, KeyboardInterrupt):
|
||||
sys.__excepthook__(exc_type, exc_value, exc_traceback)
|
||||
return
|
||||
logger.critical("Uncaught exception", exc_info=(exc_type, exc_value, exc_traceback))
|
||||
|
||||
|
||||
sys.excepthook = handle_exception
|
||||
|
||||
|
||||
@click.group()
|
||||
@click.option("-l", "--log", "loglevel")
|
||||
@click.option(
|
||||
"-m", "--machine-logs", "machine_logs", is_flag=True, envvar="MACHINE_LOGS"
|
||||
)
|
||||
@click.option("service", "--host", "-h", envvar="SERVICE")
|
||||
@click.option("repository", "--repo", "-r", envvar="RESTIC_REPOSITORY")
|
||||
def cli(loglevel, service, repository, machine_logs):
|
||||
global SERVICE
|
||||
if service:
|
||||
SERVICE = service.replace(".", "_")
|
||||
if repository:
|
||||
os.environ["RESTIC_REPOSITORY"] = repository
|
||||
if loglevel:
|
||||
numeric_level = getattr(logging, loglevel.upper(), None)
|
||||
if not isinstance(numeric_level, int):
|
||||
raise ValueError("Invalid log level: %s" % loglevel)
|
||||
logger.setLevel(numeric_level)
|
||||
logHandler = logging.StreamHandler()
|
||||
if machine_logs:
|
||||
formatter = jsonlogger.JsonFormatter(
|
||||
"%(levelname)s %(filename)s %(lineno)s %(process)d %(message)s",
|
||||
rename_fields={"levelname": "message_type"},
|
||||
)
|
||||
logHandler.setFormatter(formatter)
|
||||
logger.addHandler(logHandler)
|
||||
|
||||
export_secrets()
|
||||
init_repo()
|
||||
|
||||
|
||||
def init_repo():
|
||||
if repo := os.environ.get("RESTIC_REPOSITORY_FILE"):
|
||||
# RESTIC_REPOSITORY_FILE and RESTIC_REPOSITORY are mutually exclusive
|
||||
del os.environ["RESTIC_REPOSITORY"]
|
||||
else:
|
||||
repo = os.environ["RESTIC_REPOSITORY"]
|
||||
restic.repository = repo
|
||||
logger.debug(f"set restic repository location: {repo}")
|
||||
restic.password_file = "/var/run/secrets/restic_password"
|
||||
try:
|
||||
restic.cat.config()
|
||||
except ResticFailedError as error:
|
||||
if "unable to open config file" in str(error):
|
||||
result = restic.init()
|
||||
logger.info(f"Initialized restic repo: {result}")
|
||||
else:
|
||||
raise error
|
||||
|
||||
|
||||
def export_secrets():
|
||||
for env in os.environ:
|
||||
if env.endswith("FILE") and not "COMPOSE_FILE" in env:
|
||||
logger.debug(f"exported secret: {env}")
|
||||
with open(os.environ[env]) as file:
|
||||
secret = file.read()
|
||||
os.environ[env.removesuffix("_FILE")] = secret
|
||||
# logger.debug(f"Read secret value: {secret}")
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option("retries", "--retries", "-r", envvar="RETRIES", default=1)
|
||||
def create(retries):
|
||||
app_settings = parse_backup_labels()
|
||||
pre_commands, post_commands, backup_paths, apps_versions = get_backup_details(
|
||||
app_settings
|
||||
)
|
||||
copy_secrets(apps_versions)
|
||||
backup_paths.append(Path(SECRET_PATH))
|
||||
run_commands(pre_commands)
|
||||
backup_volumes(backup_paths, apps_versions, int(retries))
|
||||
run_commands(post_commands)
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option("snapshot_id", "--snapshot", "-s", envvar="SNAPSHOT", default="latest")
|
||||
@click.option("target", "--target", "-t", envvar="TARGET", default="/")
|
||||
@click.option(
|
||||
"noninteractive", "--noninteractive", envvar="NONINTERACTIVE", is_flag=True
|
||||
)
|
||||
@click.option("volumes", "--volumes", "-v", envvar="VOLUMES", multiple=True)
|
||||
@click.option("container", "--container", "-c", envvar="CONTAINER", multiple=True)
|
||||
@click.option("no_commands", "--no-commands", envvar="NO_COMMANDS", is_flag=True)
|
||||
def restore(snapshot_id, target, noninteractive, volumes, container, no_commands):
|
||||
app_settings = parse_backup_labels("restore", container)
|
||||
if SERVICE != "ALL":
|
||||
if not app_settings.get(SERVICE):
|
||||
logger.error(
|
||||
f"The app {SERVICE} is not running, use the restore-path argument to restore paths of undeployed apps"
|
||||
)
|
||||
exit(1)
|
||||
app_settings = {SERVICE: app_settings.get(SERVICE)}
|
||||
pre_commands, post_commands, backup_paths, apps_versions = get_backup_details(
|
||||
app_settings, volumes
|
||||
)
|
||||
snapshots = get_snapshots(snapshot_id)
|
||||
if not snapshots:
|
||||
logger.error(
|
||||
f"No Snapshots with ID {snapshot_id} for {apps_versions.keys()} found."
|
||||
)
|
||||
exit(1)
|
||||
snapshot = snapshots[0]
|
||||
snapshot_id = snapshot["short_id"]
|
||||
if not noninteractive:
|
||||
print(f"Snapshot to restore: \t{snapshot_id}")
|
||||
restore_app_versions = app_versions_from_tags(snapshot.get("tags"))
|
||||
print("Apps:")
|
||||
for app, version in apps_versions.items():
|
||||
restore_version = restore_app_versions.get(app)
|
||||
print(f"\t{app} \t {restore_version}")
|
||||
if version != restore_version:
|
||||
print(f"WARNING!!! The running app is deployed with version {version}")
|
||||
print("The following volume paths will be restored:")
|
||||
for p in backup_paths:
|
||||
print(f"\t{p}")
|
||||
if not no_commands:
|
||||
print("The following commands will be executed:")
|
||||
for container, cmd in list(pre_commands.items()) + list(
|
||||
post_commands.items()
|
||||
):
|
||||
print(f"\t{container.labels['com.docker.swarm.service.name']}:\t{cmd}")
|
||||
snapshot_date = datetime.fromisoformat(snapshot["time"])
|
||||
delta = datetime.now(tz=timezone.utc) - snapshot_date
|
||||
print(f"This snapshot is {delta} old")
|
||||
print("\nTHIS COMMAND WILL IRREVERSIBLY OVERWRITES FILES")
|
||||
prompt = input("Type YES (uppercase) to continue: ")
|
||||
if prompt != "YES":
|
||||
logger.error("Restore aborted")
|
||||
exit(1)
|
||||
print(f"Restoring Snapshot {snapshot_id} at {target}")
|
||||
if not no_commands and pre_commands:
|
||||
print(f"Run pre commands.")
|
||||
run_commands(pre_commands)
|
||||
if backup_paths:
|
||||
result = restic_restore(
|
||||
snapshot_id=snapshot_id, include=backup_paths, target_dir=target
|
||||
)
|
||||
logger.debug(result)
|
||||
else:
|
||||
print("No paths to restore.")
|
||||
if not no_commands and post_commands:
|
||||
print(f"Run post commands.")
|
||||
run_commands(post_commands)
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option("snapshot_id", "--snapshot", "-s", envvar="SNAPSHOT", default="latest")
|
||||
@click.option("target", "--target", "-t", envvar="TARGET", default="/")
|
||||
@click.option(
|
||||
"noninteractive", "--noninteractive", envvar="NONINTERACTIVE", is_flag=True
|
||||
)
|
||||
@click.argument("paths", nargs=-1, required=True, envvar="INCLUDE_PATH")
|
||||
def restore_path(snapshot_id, target, noninteractive, paths):
|
||||
"""PATHS: list of paths to restore"""
|
||||
snapshots = get_snapshots(snapshot_id)
|
||||
if not snapshots:
|
||||
logger.error(f"No Snapshots with ID {snapshot_id} for app {SERVICE} found.")
|
||||
exit(1)
|
||||
snapshot = snapshots[0]
|
||||
snapshot_id = snapshot["short_id"]
|
||||
if not noninteractive:
|
||||
print(f"Snapshot to restore: \t{snapshot_id}")
|
||||
restore_app_versions = app_versions_from_tags(snapshot.get("tags"))
|
||||
print("Apps:")
|
||||
for app, version in restore_app_versions.items():
|
||||
if SERVICE == "ALL" or SERVICE == app:
|
||||
print(f"\t{app} \t {version}")
|
||||
print("The following paths will be restored:")
|
||||
for p in paths:
|
||||
print(f"\t{p}")
|
||||
snapshot_date = datetime.fromisoformat(snapshot["time"])
|
||||
delta = datetime.now(tz=timezone.utc) - snapshot_date
|
||||
print(f"This snapshot is {delta} old")
|
||||
print("\nTHIS COMMAND WILL IRREVERSIBLY OVERWRITES FILES")
|
||||
prompt = input("Type YES (uppercase) to continue: ")
|
||||
if prompt != "YES":
|
||||
logger.error("Restore aborted")
|
||||
exit(1)
|
||||
print(f"Restoring Snapshot {snapshot_id} at {target}")
|
||||
result = restic_restore(snapshot_id=snapshot_id, include=paths, target_dir=target)
|
||||
logger.debug(result)
|
||||
|
||||
|
||||
def restic_restore(snapshot_id, include=[], target_dir=None):
|
||||
cmd = restic.cat.base_command() + ["restore", snapshot_id]
|
||||
for path in include:
|
||||
cmd.extend(["--include", path])
|
||||
if target_dir:
|
||||
cmd.extend(["--target", target_dir])
|
||||
return restic.internal.command_executor.execute(cmd)
|
||||
|
||||
|
||||
def get_snapshots(snapshot_id=None):
|
||||
if snapshot_id and snapshot_id != "latest":
|
||||
snapshots = restic.snapshots(snapshot_id=snapshot_id)
|
||||
if not SERVICE in app_versions_from_tags(snapshots[0].get("tags")):
|
||||
logger.error(f"Snapshot with ID {snapshot_id} does not contain {SERVICE}")
|
||||
exit(1)
|
||||
else:
|
||||
snapshots = restic.snapshots()
|
||||
snapshots = list(
|
||||
filter(
|
||||
lambda x: SERVICE in app_versions_from_tags(x.get("tags")), snapshots
|
||||
)
|
||||
)
|
||||
if snapshot_id == "latest":
|
||||
return snapshots[-1:]
|
||||
else:
|
||||
return snapshots
|
||||
|
||||
|
||||
def app_versions_from_tags(tags):
|
||||
if tags:
|
||||
app_versions = map(lambda x: x.split(":"), tags)
|
||||
return {i[0]: i[1] if len(i) > 1 else None for i in app_versions}
|
||||
else:
|
||||
return {}
|
||||
|
||||
|
||||
def str2bool(value: str) -> bool:
|
||||
return value.lower() in ("yes", "true", "t", "1")
|
||||
|
||||
|
||||
def parse_backup_labels(hook_type="backup", selected_container=[]):
|
||||
client = docker.from_env()
|
||||
container_by_service = {
|
||||
c.labels.get("com.docker.swarm.service.name"): c
|
||||
for c in client.containers.list()
|
||||
}
|
||||
services = client.services.list()
|
||||
app_settings = {}
|
||||
for s in services:
|
||||
specs = s.attrs["Spec"]
|
||||
labels = specs["Labels"]
|
||||
stack_name = labels["com.docker.stack.namespace"]
|
||||
container_name = s.name.removeprefix(f"{stack_name}_")
|
||||
version = labels.get(f"coop-cloud.{stack_name}.version")
|
||||
settings = app_settings[stack_name] = app_settings.get(stack_name) or {}
|
||||
if (backup := labels.get("backupbot.backup")) and str2bool(backup):
|
||||
settings["enabled"] = True
|
||||
if version:
|
||||
settings["version"] = version
|
||||
if selected_container and container_name not in selected_container:
|
||||
logger.debug(f"Skipping {s.name} because it's not a selected container")
|
||||
continue
|
||||
if mounts := specs["TaskTemplate"]["ContainerSpec"].get("Mounts"):
|
||||
volumes = parse_volumes(stack_name, mounts)
|
||||
volumes.update(settings.get("volumes") or {})
|
||||
settings["volumes"] = volumes
|
||||
excluded_volumes, included_volume_paths = parse_excludes_includes(labels)
|
||||
settings["excluded_volumes"] = excluded_volumes.union(
|
||||
settings.get("excluded_volumes") or set()
|
||||
)
|
||||
settings["included_volume_paths"] = included_volume_paths.union(
|
||||
settings.get("included_volume_paths") or set()
|
||||
)
|
||||
if container := container_by_service.get(s.name):
|
||||
if command := labels.get(f"backupbot.{hook_type}.pre-hook"):
|
||||
if not (pre_hooks := settings.get("pre_hooks")):
|
||||
pre_hooks = settings["pre_hooks"] = {}
|
||||
pre_hooks[container] = command
|
||||
if command := labels.get(f"backupbot.{hook_type}.post-hook"):
|
||||
if not (post_hooks := settings.get("post_hooks")):
|
||||
post_hooks = settings["post_hooks"] = {}
|
||||
post_hooks[container] = command
|
||||
else:
|
||||
logger.debug(f"Container {s.name} is not running.")
|
||||
if labels.get(f"backupbot.{hook_type}.pre-hook") or labels.get(
|
||||
f"backupbot.{hook_type}.post-hook"
|
||||
):
|
||||
logger.error(f"Container {s.name} contain hooks but it's not running")
|
||||
return app_settings
|
||||
|
||||
|
||||
def get_backup_details(app_settings, volumes=[]):
|
||||
backup_paths = set()
|
||||
backup_apps_versions = {}
|
||||
pre_hooks = {}
|
||||
post_hooks = {}
|
||||
for app, settings in app_settings.items():
|
||||
if settings.get("enabled"):
|
||||
if SERVICE != "ALL" and SERVICE != app:
|
||||
continue
|
||||
backup_apps_versions[app] = settings.get("version")
|
||||
add_backup_paths(backup_paths, settings, app, volumes)
|
||||
if hooks := settings.get("pre_hooks"):
|
||||
pre_hooks.update(hooks)
|
||||
if hooks := settings.get("post_hooks"):
|
||||
post_hooks.update(hooks)
|
||||
return pre_hooks, post_hooks, list(backup_paths), backup_apps_versions
|
||||
|
||||
|
||||
def add_backup_paths(backup_paths, settings, app, selected_volumes):
|
||||
if volumes := settings.get("volumes"):
|
||||
if includes := settings.get("included_volume_paths"):
|
||||
included_volumes = list(zip(*includes))[0]
|
||||
for volume, rel_paths in includes:
|
||||
if not (volume_path := volumes.get(volume)):
|
||||
logger.error(
|
||||
f"Can not find volume with the name {volume} for {app}"
|
||||
)
|
||||
continue
|
||||
if selected_volumes and volume not in selected_volumes:
|
||||
logger.debug(
|
||||
f"Skipping {volume}:{rel_paths} because the volume is not selected"
|
||||
)
|
||||
continue
|
||||
for p in rel_paths:
|
||||
absolute_path = Path(f"{volume_path}/{p}")
|
||||
backup_paths.add(absolute_path)
|
||||
else:
|
||||
included_volumes = []
|
||||
excluded_volumes = settings.get("excluded_volumes") or []
|
||||
for name, path in volumes.items():
|
||||
if selected_volumes and name not in selected_volumes:
|
||||
logger.debug(
|
||||
f"Skipping volume: {name} because the volume is not selected"
|
||||
)
|
||||
continue
|
||||
if name in excluded_volumes:
|
||||
logger.debug(f"Skipping volume: {name} because the volume is excluded")
|
||||
continue
|
||||
if name in included_volumes:
|
||||
logger.debug(f"Skipping volume: {name} because a path is selected")
|
||||
continue
|
||||
backup_paths.add(path)
|
||||
else:
|
||||
logger.warning(f"{app} does not contain any volumes")
|
||||
|
||||
|
||||
def parse_volumes(stack_name, mounts):
|
||||
volumes = {}
|
||||
for m in mounts:
|
||||
if m["Type"] != "volume":
|
||||
continue
|
||||
relative_path = m["Source"]
|
||||
name = relative_path.removeprefix(stack_name + "_")
|
||||
absolute_path = Path(f"{VOLUME_PATH}{relative_path}/_data/")
|
||||
volumes[name] = absolute_path
|
||||
return volumes
|
||||
|
||||
|
||||
def parse_excludes_includes(labels):
|
||||
excluded_volumes = set()
|
||||
included_volume_paths = set()
|
||||
for label, value in labels.items():
|
||||
if label.startswith("backupbot.backup.volumes."):
|
||||
volume_name = label.removeprefix("backupbot.backup.volumes.").removesuffix(
|
||||
".path"
|
||||
)
|
||||
if label.endswith("path"):
|
||||
relative_paths = tuple(value.split(","))
|
||||
included_volume_paths.add((volume_name, relative_paths))
|
||||
elif not str2bool(value):
|
||||
excluded_volumes.add(volume_name)
|
||||
return excluded_volumes, included_volume_paths
|
||||
|
||||
|
||||
def copy_secrets(apps):
|
||||
# TODO: check if it is deployed
|
||||
rmtree(SECRET_PATH, ignore_errors=True)
|
||||
os.mkdir(SECRET_PATH)
|
||||
client = docker.from_env()
|
||||
container_by_service = {
|
||||
c.labels.get("com.docker.swarm.service.name"): c
|
||||
for c in client.containers.list()
|
||||
}
|
||||
services = client.services.list()
|
||||
for s in services:
|
||||
app_name = s.attrs["Spec"]["Labels"]["com.docker.stack.namespace"]
|
||||
if app_name in apps and (
|
||||
app_secs := s.attrs["Spec"]["TaskTemplate"]["ContainerSpec"].get("Secrets")
|
||||
):
|
||||
if not container_by_service.get(s.name):
|
||||
logger.warning(
|
||||
f"Container {s.name} is not running, secrets can not be copied."
|
||||
)
|
||||
continue
|
||||
container_id = container_by_service[s.name].id
|
||||
for sec in app_secs:
|
||||
src = f"/var/lib/docker/containers/{container_id}/mounts/secrets/{sec['SecretID']}"
|
||||
if not Path(src).exists():
|
||||
logger.error(
|
||||
f"For the secret {sec['SecretName']} the file {src} does not exist for {s.name}"
|
||||
)
|
||||
continue
|
||||
dst = SECRET_PATH + sec["SecretName"]
|
||||
logger.debug(f"Copy Secret {sec['SecretName']}")
|
||||
copyfile(src, dst)
|
||||
|
||||
|
||||
def run_commands(commands):
|
||||
for container, command in commands.items():
|
||||
if not command:
|
||||
continue
|
||||
# Remove bash/sh wrapping
|
||||
command = (
|
||||
command.removeprefix("bash -c").removeprefix("sh -c").removeprefix(" ")
|
||||
)
|
||||
# Remove quotes surrounding the command
|
||||
if (
|
||||
len(command) >= 2
|
||||
and command[0] == command[-1]
|
||||
and (command[0] == "'" or command[0] == '"')
|
||||
):
|
||||
command = command[1:-1]
|
||||
# Use bash's pipefail to return exit codes inside a pipe to prevent silent failure
|
||||
command = f"bash -c 'set -o pipefail;{command}'"
|
||||
logger.info(f"run command in {container.name}:")
|
||||
logger.info(command)
|
||||
result = container.exec_run(command)
|
||||
if result.exit_code:
|
||||
logger.error(
|
||||
f"Failed to run command {command} in {container.name}: {result.output.decode()}"
|
||||
)
|
||||
else:
|
||||
logger.debug(result.output.decode())
|
||||
|
||||
|
||||
def backup_volumes(backup_paths, apps_versions, retries, dry_run=False):
|
||||
while True:
|
||||
try:
|
||||
logger.info("Backup these paths:")
|
||||
logger.info("\n".join(map(str, backup_paths)))
|
||||
backup_paths = list(filter(path_exists, backup_paths))
|
||||
cmd = restic.cat.base_command()
|
||||
parent = get_snapshots("latest")
|
||||
if parent:
|
||||
# https://restic.readthedocs.io/en/stable/040_backup.html#file-change-detection
|
||||
cmd.extend(["--parent", parent[0]["short_id"]])
|
||||
tags = [f"{app}:{version}" for app, version in apps_versions.items()]
|
||||
if SERVICE == "ALL":
|
||||
tags.append(SERVICE)
|
||||
logger.info("Start volume backup")
|
||||
result = restic.internal.backup.run(
|
||||
cmd, backup_paths, dry_run=dry_run, tags=tags
|
||||
)
|
||||
logger.summary("backup finished", extra=result)
|
||||
return
|
||||
except ResticFailedError as error:
|
||||
logger.error(f"Backup failed for {SERVICE}.")
|
||||
logger.error(error, exc_info=True)
|
||||
if retries > 0:
|
||||
retries -= 1
|
||||
else:
|
||||
exit(1)
|
||||
|
||||
|
||||
def path_exists(path):
|
||||
if not path.exists():
|
||||
logger.error(f"{path} does not exist")
|
||||
return path.exists()
|
||||
|
||||
|
||||
@cli.command()
|
||||
def snapshots():
|
||||
snapshots = get_snapshots()
|
||||
for snap in snapshots:
|
||||
output = [snap["time"].split(".")[0], snap["short_id"]]
|
||||
if tags := snap.get("tags"):
|
||||
app_versions = app_versions_from_tags(tags)
|
||||
if version := app_versions.get(SERVICE):
|
||||
output.append(version)
|
||||
print(*output)
|
||||
if not snapshots:
|
||||
err_msg = "No Snapshots found"
|
||||
if SERVICE != "ALL":
|
||||
service_name = SERVICE.replace("_", ".")
|
||||
err_msg += f" for app {service_name}"
|
||||
logger.warning(err_msg)
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option("snapshot", "--snapshot", "-s", envvar="SNAPSHOT", default="latest")
|
||||
@click.option("show_all", "--all", "-a", envvar="SHOW_ALL", is_flag=True)
|
||||
@click.option("timestamps", "--timestamps", "-t", envvar="TIMESTAMPS", is_flag=True)
|
||||
@click.argument(
|
||||
"path", required=False, default="/var/lib/docker/volumes/", envvar="INCLUDE_PATH"
|
||||
)
|
||||
def ls(snapshot, show_all, timestamps, path):
|
||||
if snapshot == "latest":
|
||||
latest_snapshot = get_snapshots("latest")
|
||||
if not latest_snapshot:
|
||||
logger.error(f"There is no latest snapshot for {SERVICE}")
|
||||
exit(1)
|
||||
snapshot = latest_snapshot[0]["short_id"]
|
||||
if show_all:
|
||||
path = None
|
||||
results = list_files(snapshot, path)
|
||||
for r in results:
|
||||
if r.get("path"):
|
||||
if timestamps:
|
||||
print(f"{r['ctime']}\t{r['path']}")
|
||||
else:
|
||||
print(f"{r['path']}")
|
||||
|
||||
|
||||
def list_files(snapshot, path):
|
||||
cmd = restic.cat.base_command() + ["ls"]
|
||||
cmd.append(snapshot)
|
||||
if path:
|
||||
cmd.append(path)
|
||||
try:
|
||||
output = restic.internal.command_executor.execute(cmd)
|
||||
except ResticFailedError as error:
|
||||
if "no snapshot found" in str(error):
|
||||
err_msg = f'There is no snapshot "{snapshot}"'
|
||||
if SERVICE != "ALL":
|
||||
err_msg += f' for the app "{SERVICE}"'
|
||||
logger.error(err_msg)
|
||||
exit(1)
|
||||
else:
|
||||
raise error
|
||||
output = output.replace("}\n{", "}|{")
|
||||
results = list(map(json.loads, output.split("|")))
|
||||
return results
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option("snapshot", "--snapshot", "-s", envvar="SNAPSHOT", default="latest")
|
||||
@click.option("path", "--path", "-p", envvar="INCLUDE_PATH")
|
||||
@click.option("volumes", "--volumes", "-v", envvar="VOLUMES")
|
||||
@click.option("secrets", "--secrets", "-c", is_flag=True, envvar="SECRETS")
|
||||
def download(snapshot, path, volumes, secrets):
|
||||
file_dumps = []
|
||||
if snapshot == "latest":
|
||||
latest_snapshot = get_snapshots("latest")
|
||||
if not latest_snapshot:
|
||||
logger.error(f"There is no latest snapshot for {SERVICE}")
|
||||
exit(1)
|
||||
snapshot = latest_snapshot[0]["short_id"]
|
||||
if not any([path, volumes, secrets]):
|
||||
volumes = secrets = True
|
||||
if path:
|
||||
path = path.removesuffix("/")
|
||||
binary_output = dump(snapshot, path)
|
||||
files = list_files(snapshot, path)
|
||||
filetype = [f.get("type") for f in files if f.get("path") == path][0]
|
||||
filename = Path(path).name
|
||||
if filetype == "dir":
|
||||
filename = filename + ".tar"
|
||||
tarinfo = tarfile.TarInfo(name=filename)
|
||||
tarinfo.size = len(binary_output)
|
||||
file_dumps.append((binary_output, tarinfo))
|
||||
if volumes:
|
||||
if SERVICE == "ALL":
|
||||
logger.error("Please specify '--host' when using '--volumes'")
|
||||
exit(1)
|
||||
files = list_files(snapshot, VOLUME_PATH)
|
||||
for f in files[1:]:
|
||||
path = f["path"]
|
||||
if Path(path).name.startswith(SERVICE) and f["type"] == "dir":
|
||||
binary_output = dump(snapshot, path)
|
||||
filename = f"{Path(path).name}.tar"
|
||||
tarinfo = tarfile.TarInfo(name=filename)
|
||||
tarinfo.size = len(binary_output)
|
||||
file_dumps.append((binary_output, tarinfo))
|
||||
if secrets:
|
||||
if SERVICE == "ALL":
|
||||
logger.error("Please specify '--host' when using '--secrets'")
|
||||
exit(1)
|
||||
filename = f"{SERVICE}.json"
|
||||
files = list_files(snapshot, SECRET_PATH)
|
||||
secrets = {}
|
||||
for f in files[1:]:
|
||||
path = f["path"]
|
||||
if Path(path).name.startswith(SERVICE) and f["type"] == "file":
|
||||
secret = dump(snapshot, path).decode()
|
||||
secret_name = path.removeprefix(f"{SECRET_PATH}{SERVICE}_")
|
||||
secrets[secret_name] = secret
|
||||
binary_output = json.dumps(secrets).encode()
|
||||
tarinfo = tarfile.TarInfo(name=filename)
|
||||
tarinfo.size = len(binary_output)
|
||||
file_dumps.append((binary_output, tarinfo))
|
||||
with tarfile.open("/tmp/backup.tar.gz", "w:gz") as tar:
|
||||
print(f"Writing files to /tmp/backup.tar.gz...")
|
||||
for binary_output, tarinfo in file_dumps:
|
||||
tar.addfile(tarinfo, fileobj=io.BytesIO(binary_output))
|
||||
size = get_formatted_size("/tmp/backup.tar.gz")
|
||||
print(f"Backup has been written to /tmp/backup.tar.gz with a size of {size}")
|
||||
|
||||
|
||||
def get_formatted_size(file_path):
|
||||
file_size = os.path.getsize(file_path)
|
||||
units = ["Bytes", "KB", "MB", "GB", "TB"]
|
||||
for unit in units:
|
||||
if file_size < 1024:
|
||||
return f"{round(file_size, 3)} {unit}"
|
||||
file_size /= 1024
|
||||
return f"{round(file_size, 3)} {units[-1]}"
|
||||
|
||||
|
||||
def dump(snapshot, path):
|
||||
cmd = restic.cat.base_command() + ["dump"]
|
||||
cmd = cmd + [snapshot, path]
|
||||
print(f"Dumping {path} from snapshot '{snapshot}'")
|
||||
output = subprocess.run(cmd, capture_output=True)
|
||||
if output.returncode:
|
||||
logger.error(
|
||||
f"error while dumping {path} from snapshot '{snapshot}': {output.stderr}"
|
||||
)
|
||||
exit(1)
|
||||
return output.stdout
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cli()
|
@ -1,15 +0,0 @@
|
||||
---
|
||||
version: "3.8"
|
||||
services:
|
||||
app:
|
||||
environment:
|
||||
- HTTPS_PASSWORD_FILE=/run/secrets/https_password
|
||||
- RESTIC_USER
|
||||
secrets:
|
||||
- source: https_password
|
||||
mode: 0400
|
||||
|
||||
secrets:
|
||||
https_password:
|
||||
external: true
|
||||
name: ${STACK_NAME}_https_password_${SECRET_HTTPS_PASSWORD_VERSION}
|
11
compose.pushbasicauth.yml
Normal file
11
compose.pushbasicauth.yml
Normal file
@ -0,0 +1,11 @@
|
||||
---
|
||||
version: "3.8"
|
||||
services:
|
||||
app:
|
||||
secrets:
|
||||
- push_basicauth
|
||||
|
||||
secrets:
|
||||
push_basicauth:
|
||||
external: true
|
||||
name: ${STACK_NAME}_push_basicauth_${SECRET_PUSH_BASICAUTH}
|
13
compose.secret.yml
Normal file
13
compose.secret.yml
Normal file
@ -0,0 +1,13 @@
|
||||
---
|
||||
version: "3.8"
|
||||
services:
|
||||
app:
|
||||
environment:
|
||||
- RESTIC_REPOSITORY_FILE=/run/secrets/restic_repo
|
||||
secrets:
|
||||
- restic_repo
|
||||
|
||||
secrets:
|
||||
restic_repo:
|
||||
external: true
|
||||
name: ${STACK_NAME}_restic_repo_${SECRET_RESTIC_REPO_VERSION}
|
@ -5,12 +5,19 @@ services:
|
||||
environment:
|
||||
- SSH_KEY_FILE=/run/secrets/ssh_key
|
||||
- SSH_HOST_KEY
|
||||
- SSH_HOST_KEY_DISABLE
|
||||
secrets:
|
||||
- source: ssh_key
|
||||
mode: 0400
|
||||
configs:
|
||||
- source: ssh_config
|
||||
target: /root/.ssh/config
|
||||
|
||||
secrets:
|
||||
ssh_key:
|
||||
external: true
|
||||
name: ${STACK_NAME}_ssh_key_${SECRET_SSH_KEY_VERSION}
|
||||
|
||||
configs:
|
||||
ssh_config:
|
||||
name: ${STACK_NAME}_ssh_config_${SSH_CONFIG_VERSION}
|
||||
file: ssh_config
|
||||
|
50
compose.yml
50
compose.yml
@ -2,34 +2,50 @@
|
||||
version: "3.8"
|
||||
services:
|
||||
app:
|
||||
image: thecoopcloud/backup-bot-two:latest
|
||||
# build: .
|
||||
image: git.coopcloud.tech/coop-cloud/backup-bot-two:2.3.0-beta
|
||||
volumes:
|
||||
- "/var/run/docker.sock:/var/run/docker.sock"
|
||||
- "backups:/backups"
|
||||
- "/var/lib/docker/volumes/:/var/lib/docker/volumes/"
|
||||
- "/var/lib/docker/containers/:/var/lib/docker/containers/:ro"
|
||||
- backups:/backups
|
||||
environment:
|
||||
- CRON_SCHEDULE
|
||||
- RESTIC_REPO
|
||||
- RESTIC_REPOSITORY
|
||||
- RESTIC_PASSWORD_FILE=/run/secrets/restic_password
|
||||
- BACKUP_DEST=/backups
|
||||
- RESTIC_HOST
|
||||
- SERVER_NAME
|
||||
- REMOVE_BACKUP_VOLUME_AFTER_UPLOAD=1
|
||||
secrets:
|
||||
- restic_password
|
||||
configs:
|
||||
- source: entrypoint
|
||||
target: /entrypoint.sh
|
||||
mode: 666
|
||||
- source: cronjob
|
||||
target: /cronjob.sh
|
||||
mode: 666
|
||||
deploy:
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.services.${STACK_NAME}.loadbalancer.server.port=8008"
|
||||
- "traefik.http.routers.${STACK_NAME}.rule="
|
||||
- "traefik.http.routers.${STACK_NAME}.entrypoints=web-secure"
|
||||
- "traefik.http.routers.${STACK_NAME}.tls.certresolver=${LETS_ENCRYPT_ENV}"
|
||||
- coop-cloud.${STACK_NAME}.version=0.1.0+latest
|
||||
|
||||
volumes:
|
||||
backups:
|
||||
- coop-cloud.${STACK_NAME}.version=2.3.0+2.3.0-beta
|
||||
- coop-cloud.${STACK_NAME}.timeout=${TIMEOUT:-300}
|
||||
- coop-cloud.backupbot.enabled=true
|
||||
#entrypoint: ['tail', '-f','/dev/null']
|
||||
healthcheck:
|
||||
test: "pgrep crond"
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 10
|
||||
start_period: 5m
|
||||
|
||||
secrets:
|
||||
restic_password:
|
||||
external: true
|
||||
name: ${STACK_NAME}_restic_password_${SECRET_RESTIC_PASSWORD_VERSION}
|
||||
|
||||
configs:
|
||||
entrypoint:
|
||||
name: ${STACK_NAME}_entrypoint_${ENTRYPOINT_VERSION}
|
||||
file: entrypoint.sh
|
||||
cronjob:
|
||||
name: ${STACK_NAME}_cronjob_${CRONJOB_VERSION}
|
||||
file: cronjob.sh
|
||||
|
||||
volumes:
|
||||
backups:
|
||||
|
40
cronjob.sh
Executable file
40
cronjob.sh
Executable file
@ -0,0 +1,40 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
CURL_OPTS="-s"
|
||||
# Check for basic auth
|
||||
if [ -n "$(cat /run/secrets/push_basicauth)" ]
|
||||
then
|
||||
CURL_OPTS="$CURL_OPTS -u $(cat /run/secrets/push_basicauth)"
|
||||
fi
|
||||
|
||||
if [ -n "$PUSH_PROMETHEUS_URL" ]
|
||||
then
|
||||
push_start_notification="(echo 'backup 1' | curl $CURL_OPTS --data-binary @- $PUSH_PROMETHEUS_URL)"
|
||||
push_success_notification="(echo 'backup 0' | curl $CURL_OPTS --data-binary @- $PUSH_PROMETHEUS_URL)"
|
||||
push_fail_notification="(echo 'backup -1' | curl $CURL_OPTS --data-binary @- $PUSH_PROMETHEUS_URL)"
|
||||
else
|
||||
if [ -n "$PUSH_URL_START" ]
|
||||
then
|
||||
push_start_notification="curl $CURL_OPTS '$PUSH_URL_START'"
|
||||
fi
|
||||
|
||||
if [ -n "$PUSH_URL_FAIL" ]
|
||||
then
|
||||
push_fail_notification="curl $CURL_OPTS '$PUSH_URL_FAIL'"
|
||||
fi
|
||||
|
||||
if [ -n "$PUSH_URL_SUCCESS" ]
|
||||
then
|
||||
push_success_notification="curl $CURL_OPTS '$PUSH_URL_SUCCESS'"
|
||||
fi
|
||||
fi
|
||||
|
||||
eval "$push_start_notification"
|
||||
if [ "$(backup --machine-logs create 2>&1 | tee /tmp/backup.log && (grep -q 'backup finished' /tmp/backup.log))" ]
|
||||
then
|
||||
eval "$push_success_notification"
|
||||
else
|
||||
eval "$push_fail_notification"
|
||||
fi
|
15
entrypoint.sh
Executable file
15
entrypoint.sh
Executable file
@ -0,0 +1,15 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
if [ -n "$SSH_HOST_KEY" ]
|
||||
then
|
||||
echo "$SSH_HOST_KEY" > /root/.ssh/known_hosts
|
||||
fi
|
||||
|
||||
cron_schedule="${CRON_SCHEDULE:?CRON_SCHEDULE not set}"
|
||||
|
||||
echo "$cron_schedule /cronjob.sh" | crontab -
|
||||
crontab -l
|
||||
|
||||
crond -f -d8 -L /dev/stdout
|
34
pg_backup.sh
Normal file
34
pg_backup.sh
Normal file
@ -0,0 +1,34 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
BACKUP_FILE='/var/lib/postgresql/data/backup.sql'
|
||||
|
||||
function backup {
|
||||
export PGPASSWORD=$(cat $POSTGRES_PASSWORD_FILE)
|
||||
pg_dump -U ${POSTGRES_USER} ${POSTGRES_DB} > $BACKUP_FILE
|
||||
}
|
||||
|
||||
function restore {
|
||||
cd /var/lib/postgresql/data/
|
||||
restore_config(){
|
||||
# Restore allowed connections
|
||||
cat pg_hba.conf.bak > pg_hba.conf
|
||||
su postgres -c 'pg_ctl reload'
|
||||
}
|
||||
# Don't allow any other connections than local
|
||||
cp pg_hba.conf pg_hba.conf.bak
|
||||
echo "local all all trust" > pg_hba.conf
|
||||
su postgres -c 'pg_ctl reload'
|
||||
trap restore_config EXIT INT TERM
|
||||
|
||||
# Recreate Database
|
||||
psql -U ${POSTGRES_USER} -d postgres -c "DROP DATABASE ${POSTGRES_DB} WITH (FORCE);"
|
||||
createdb -U ${POSTGRES_USER} ${POSTGRES_DB}
|
||||
psql -U ${POSTGRES_USER} -d ${POSTGRES_DB} -1 -f $BACKUP_FILE
|
||||
|
||||
trap - EXIT INT TERM
|
||||
restore_config
|
||||
}
|
||||
|
||||
$@
|
1
release/1.0.0+2.0.0-beta
Normal file
1
release/1.0.0+2.0.0-beta
Normal file
@ -0,0 +1 @@
|
||||
This is the first beta release of the new backup-bot-two rewrite in python. Be aware when updating, it can break. Please read the readme and update your config according to it.
|
3
release/1.0.0+latest
Normal file
3
release/1.0.0+latest
Normal file
@ -0,0 +1,3 @@
|
||||
Breaking Change: the variables `SERVER_NAME` and `RESTIC_HOST` are merged into `RESTIC_REPOSITORY`. The format can be looked up here: https://restic.readthedocs.io/en/stable/030_preparing_a_new_repo.html
|
||||
ssh/sftp: `sftp:user@host:/repo-path`
|
||||
S3: `s3:https://s3.example.com/bucket_name`
|
@ -1,11 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
cron_schedule="${CRON_SCHEDULE:?CRON_SCHEDULE not set}"
|
||||
|
||||
echo "$cron_schedule /usr/bin/backup.sh" | crontab -
|
||||
crontab -l
|
||||
|
||||
crond -f -d8 -L /dev/stdout
|
4
ssh_config
Normal file
4
ssh_config
Normal file
@ -0,0 +1,4 @@
|
||||
Host *
|
||||
IdentityFile /run/secrets/ssh_key
|
||||
ServerAliveInterval 60
|
||||
ServerAliveCountMax 240
|
Loading…
x
Reference in New Issue
Block a user