Compare commits

...

86 Commits
main ... main

Author SHA1 Message Date
54e32ab422
backupbot formatting 2025-05-14 12:26:20 +02:00
4cda3c1018
make some logoutput more useful 2025-04-24 01:13:16 +02:00
f7f46d7b7b Merge pull request 'feat: Adds monitoring setup for prometheus push gateway' (#69) from prom-mon into main
Reviewed-on: coop-cloud/backup-bot-two#69
2025-01-14 13:34:34 +00:00
c1902b2dbc feat: Adds monitoring setup for prometheus push gateway 2024-12-30 14:08:05 +01:00
f40eb00435 Cleaner output for snapshots closes #63 2024-10-29 12:29:35 +01:00
3eea69ddee fix(restore): don't restore everything if no backup_path is given. 2024-10-28 17:24:41 +01:00
f1661c04e7 refactor(ls): INCLUDE_PATH parsing via click 2024-10-28 17:01:52 +01:00
4b4371ed3f add restore-path argument for undeployed apps closes #59 2024-10-28 17:01:14 +01:00
1214f59c79 expose MACHINE_LOGS flag as env 2024-10-28 16:08:56 +01:00
8798e2feb5 abra.sh: remove old BACKUPBOT_VERSION 2024-10-24 18:26:36 +02:00
119787ed39 chore: publish 2.3.0+2.3.0-beta release 2024-10-24 18:23:32 +02:00
141bedb069 feat(ls): add --timestamps flag #37 2024-10-24 18:09:33 +02:00
14b55bbc79 feat(ls): default to /var/lib/docker/volumes/ if no path is given #37 2024-10-24 17:58:38 +02:00
ebcb0d42c5 feat(ls): default to show selected paths, --all flag to show all #37 2024-10-24 17:54:34 +02:00
dccc93ac6b optimize logging 2024-10-24 17:44:54 +02:00
826bec925f add example pg_backup.sh script 2024-10-24 17:03:02 +02:00
49dd989302 update README 2024-10-22 22:01:18 +02:00
2f965a93dc fix: select correct latest snapshot to restore 2024-10-22 21:30:12 +02:00
4054d3417e fix backup label parsing 2024-10-22 17:24:20 +02:00
f8cfcef029 refactor: move latest snapshot checking 2024-10-22 14:18:46 +02:00
4a49c4a7f0 fix download command / dump function 2024-10-22 14:18:15 +02:00
79cdec6705 chore: publish 2.2.0+2.2.1-beta release 2024-10-16 17:29:53 +02:00
2bc9400807 fix README 2024-10-16 17:21:20 +02:00
9b141a5185 Add versioning infos to README 2024-10-16 17:18:57 +02:00
6ff2312090 fix restore output 2024-10-16 17:13:19 +02:00
8b66b80332 chore: publish 2.1.0+2.2.0-beta release 2024-10-16 17:01:56 +02:00
c9b04db7a0 add app versions into restic backup tags 2024-10-16 16:53:19 +02:00
333b7ec16d chore: publish 2.0.1+2.1.1-beta release 2024-10-15 18:56:43 +02:00
aeccd605ee improve logging 2024-10-15 18:14:17 +02:00
f877186a57 list restore commands before executing it 2024-10-15 18:14:17 +02:00
9dc239b199 add --no_commands flag for restore 2024-10-15 18:14:11 +02:00
43548273fe chore: publish 2.1.0-beta release 2024-09-18 12:58:10 +02:00
5a0467dbdd change drone runner: create only images from releases 2024-09-18 12:29:08 +02:00
3aefae61c0 add host while preserving filechange detection #53 2024-09-17 19:47:40 +02:00
ac7c5fb50d restore: execute hooks, filter for volumes and container 2024-09-17 17:49:37 +02:00
cc59087b8c restic_restore: extends restic.restore to use multiple --include paths 2024-09-17 17:48:20 +02:00
eb7c35d4cd get_backup_details/add_backup_paths: filter by volumes 2024-09-17 17:45:30 +02:00
249772ec03 filter snapshots by app 2024-09-17 17:45:27 +02:00
45af6e8b5e parse_backup_labels: read restore/backup hooks, filter by container 2024-09-17 17:44:25 +02:00
f7207cdf36 refactor: move restore function 2024-09-17 14:04:52 +02:00
241fe3ce92 remove old get_backup_cmds 2024-09-17 13:54:51 +02:00
b8d61d01cd add_backup_paths: extract and merge paths from includes/excludes 2024-09-17 13:44:26 +02:00
6ac781c7e6 refactor parse_excludes_includes: save all label infos in settings
without path extraction
2024-09-17 13:43:03 +02:00
197cabf564 check backup path existence 2024-09-17 13:39:45 +02:00
fe35f1ede8 implement volume excludes and path includes 2024-09-13 19:24:32 +02:00
f254a365f2 Revert "Revert "Removed redundant code formatting""
This reverts commit e09e1b476c6776ef9c289c47082dde60754628e7.
2024-08-21 14:24:14 +02:00
0d15765673 Revert "Added extra secret step to REST server instructions"
This step is already included in the deployment process by the command `abra app secret generate -a <backupbot_name>` and it applies to every setup, not only the REST server.

This reverts commit 72688dc42c31b0df14a60a56cdd1bc9aa69e91fc.
2024-08-21 14:23:48 +02:00
e09e1b476c
Revert "Removed redundant code formatting"
This reverts commit 10e460ff2d5dbcd843338c45d1444d9f0efe9769.
2024-08-21 11:59:10 +01:00
72688dc42c
Added extra secret step to REST server instructions 2024-08-19 17:08:41 +01:00
10e460ff2d
Removed redundant code formatting 2024-08-19 14:53:48 +01:00
f2d0b92fa3 chore: publish 1.0.0+2.0.0-beta release 2024-08-14 14:47:12 +02:00
cc049b858b README: new image location 2024-07-05 16:30:40 +02:00
b7bc8ed58f fix: ignore none swarm container #52 2024-07-05 14:41:50 +02:00
68e37f5c23 Merge pull request 'Add dockerfile, and compose.yml to use it' (#49) from feature/dockerfile into main
Reviewed-on: coop-cloud/backup-bot-two#49
2024-06-01 03:36:07 +00:00
3wc
4d39d84733 Switch ENTRYPOINT to try to resolve loop on start 2024-06-01 03:35:52 +00:00
3wc
e5b9bc0446 Update requirements 2024-06-01 03:35:52 +00:00
3wc
ec4c4509dc Make entrypoint executable 2024-06-01 03:35:52 +00:00
3wc
26162a9e38 Add --break-system-packages, surely we don't need a virtualenv 2024-06-01 03:35:52 +00:00
3wc
bd581fd8d7 Move entrypoint script into Docker image 2024-06-01 03:35:52 +00:00
3wc
e77432e3ab Move /entrypoint.sh to Dockerfile 2024-06-01 03:35:52 +00:00
3wc
001a654e37 Remove redundant stuff from entrypoint 2024-06-01 03:35:52 +00:00
3wc
c5574edc54 Whoops, wrong image 2024-06-01 03:35:52 +00:00
3wc
50e4d68717 Switch to backup-bot-two image 2024-06-01 03:35:52 +00:00
3wc
c7830ceb6f Whoops skip shellcheck 2024-06-01 03:35:52 +00:00
3wc
b6f859efbb Reinstate Docker image 2024-06-01 03:35:52 +00:00
7f14698824 change loglever to warning for not running container 2024-05-06 11:31:40 +02:00
2a9a98172f Add debug infos 2024-04-30 15:27:17 +02:00
282215cf9c Add debug infos 2024-04-30 14:59:59 +02:00
ae7a14b6f1 Fix usage of RESTIC_REPOSITORY_FILE #51 2024-04-30 14:51:46 +02:00
8acdb20e5b Fix loghandler 2024-04-29 14:18:32 +02:00
5582744073 Fix usage of RESTIC_REPOSITORY_FILE #51 2024-04-29 14:16:13 +02:00
3wc
84d606fa80 Add CHANGELOG.md
[ci skip]
2024-04-09 22:51:09 -03:00
7865907811 fix push notification precendence race condition 2024-03-08 15:42:00 +01:00
dc66c02e23 make run_cron cmd independent from push_success_notifiaction 2024-02-13 11:53:27 +01:00
f730c70bfe feat: add retry option 2024-01-18 18:01:30 +01:00
faa7ae3dd1 fix Readme 2024-01-17 20:36:06 +01:00
79eeec428a Push Notifications #24 2024-01-16 19:40:31 +01:00
4164760dc6 Sepcify secret and volume donwload via env, fixes #44 2024-01-11 18:46:58 +01:00
e644679b8b Clearer service name in warning message. Fixes #46 2024-01-11 18:39:26 +01:00
0c587ac926 add spaces for missing snapshot, fixes #45 2024-01-11 18:34:58 +01:00
65686cd891 Fix python package install error 2023-12-19 01:16:12 +01:00
ac055c932e fix: remove bash/sh wrapping 2023-12-13 18:27:12 +01:00
64328c79b1 make --noninteractive a flag 2023-12-12 13:39:26 +01:00
15275b2571 structured json logging with -m flag 2023-11-23 20:16:15 +01:00
4befebba38 Merge pull request 'fix removing quotes' (#40) from p4u1/backup-bot-two:fix-quotes into main
Reviewed-on: coop-cloud/backup-bot-two#40
2023-11-11 08:15:12 +00:00
d2087a441e fix removing quotes 2023-11-11 08:55:12 +01:00
14 changed files with 758 additions and 213 deletions

View File

@ -2,11 +2,16 @@
kind: pipeline kind: pipeline
name: linters name: linters
steps: steps:
- name: run shellcheck - name: publish image
image: koalaman/shellcheck-alpine image: plugins/docker
commands: settings:
- shellcheck backup.sh username: 3wordchant
password:
trigger: from_secret: git_coopcloud_tech_token_3wc
branch: repo: git.coopcloud.tech/coop-cloud/backup-bot-two
- main tags: ${DRONE_SEMVER_BUILD}
registry: git.coopcloud.tech
when:
event:
include:
- tag

View File

@ -8,6 +8,17 @@ RESTIC_REPOSITORY=/backups/restic
CRON_SCHEDULE='30 3 * * *' CRON_SCHEDULE='30 3 * * *'
# Push Notifiactions
#PUSH_PROMETHEUS_URL=https://pushgateway.example.com/metrics/job/backup
# or
#PUSH_URL_START=https://status.example.com/api/push/xxxxxxxxxx?status=up&msg=start
#PUSH_URL_SUCCESS=https://status.example.com/api/push/xxxxxxxxxx?status=up&msg=OK
#PUSH_URL_FAIL=https://status.example.com/api/push/xxxxxxxxxx?status=down&msg=fail
# Push Basic Auth
#COMPOSE_FILE="$COMPOSE_FILE:compose.pushbasicauth.yml"
#SECRET_PUSH_BASICAUTH=v1
# swarm-cronjob, instead of built-in cron # swarm-cronjob, instead of built-in cron
#COMPOSE_FILE="$COMPOSE_FILE:compose.swarm-cronjob.yml" #COMPOSE_FILE="$COMPOSE_FILE:compose.swarm-cronjob.yml"

2
.gitignore vendored
View File

@ -1 +1 @@
/testing .venv

6
CHANGELOG.md Normal file
View File

@ -0,0 +1,6 @@
# Change log
## 2.0.0 (unreleased)
- Rewrite from Bash to Python
- Add support for push notifications (#24)

11
Dockerfile Normal file
View File

@ -0,0 +1,11 @@
FROM docker:24.0.7-dind
RUN apk add --upgrade --no-cache restic bash python3 py3-pip py3-click py3-docker-py py3-json-logger curl
# Todo use requirements file with specific versions
RUN pip install --break-system-packages resticpy==1.0.2
COPY backupbot.py /usr/bin/backup
COPY entrypoint.sh /entrypoint.sh
ENTRYPOINT /entrypoint.sh

148
README.md
View File

@ -10,7 +10,7 @@ Automatically take backups from all volumes of running Docker Swarm services and
* **Category**: Utilities * **Category**: Utilities
* **Status**: 0, work-in-progress * **Status**: 0, work-in-progress
* **Image**: [`thecoopcloud/backup-bot-two`](https://hub.docker.com/r/thecoopcloud/backup-bot-two), 4, upstream * **Image**: [`git.coopcloud.tech/coop-cloud/backup-bot-two`](https://git.coopcloud.tech/coop-cloud/-/packages/container/backup-bot-two), 4, upstream
* **Healthcheck**: No * **Healthcheck**: No
* **Backups**: N/A * **Backups**: N/A
* **Email**: N/A * **Email**: N/A
@ -38,12 +38,12 @@ Backupbot II tries to help, by
* `abra app new backup-bot-two` * `abra app new backup-bot-two`
* `abra app config <app-name>` * `abra app config <app-name>`
- set storage options. Either configure `CRON_SCHEDULE`, or set up `swarm-cronjob` - set storage options. Either configure `CRON_SCHEDULE`, or set up `swarm-cronjob`
* `abra app secret generate -a <app_name>` * `abra app secret generate -a <backupbot_name>`
* `abra app deploy <app-name>` * `abra app deploy <app-name>`
## Configuration ## Configuration
Per default Backupbot stores the backups locally in the repository `/backups/restic`, which is accessible as volume at `/var/lib/docker/volumes/<app_name>_backups/_data/restic/` Per default Backupbot stores the backups locally in the repository `/backups/restic`, which is accessible as volume at `/var/lib/docker/volumes/<backupbot_name>_backups/_data/restic/`
The backup location can be changed using the `RESTIC_REPOSITORY` env variable. The backup location can be changed using the `RESTIC_REPOSITORY` env variable.
@ -57,7 +57,7 @@ AWS_ACCESS_KEY_ID=<MY_ACCESS_KEY>
COMPOSE_FILE="$COMPOSE_FILE:compose.s3.yml" COMPOSE_FILE="$COMPOSE_FILE:compose.s3.yml"
``` ```
and add your `<SECRET_ACCESS_KEY>` as docker secret: and add your `<SECRET_ACCESS_KEY>` as docker secret:
`abra app secret insert <app_name> aws_secret_access_key v1 <SECRET_ACCESS_KEY>` `abra app secret insert <backupbot_name> aws_secret_access_key v1 <SECRET_ACCESS_KEY>`
See [restic s3 docs](https://restic.readthedocs.io/en/latest/030_preparing_a_new_repo.html#amazon-s3) for more information. See [restic s3 docs](https://restic.readthedocs.io/en/latest/030_preparing_a_new_repo.html#amazon-s3) for more information.
@ -79,7 +79,7 @@ Add the key to your `authorized_keys`:
`ssh-copy-id -i backupkey <user>@<hostname>` `ssh-copy-id -i backupkey <user>@<hostname>`
Add your `SSH_KEY` as docker secret: Add your `SSH_KEY` as docker secret:
``` ```
abra app secret insert <app_name> ssh_key v1 """$(cat backupkey) abra app secret insert <backupbot_name> ssh_key v1 """$(cat backupkey)
""" """
``` ```
> Attention: This command needs to be executed exactly as stated above, because it places a trailing newline at the end, if this is missing you will get the following error: `Load key "/run/secrets/ssh_key": error in libcrypto` > Attention: This command needs to be executed exactly as stated above, because it places a trailing newline at the end, if this is missing you will get the following error: `Load key "/run/secrets/ssh_key": error in libcrypto`
@ -95,67 +95,104 @@ COMPOSE_FILE="$COMPOSE_FILE:compose.secret.yml"
``` ```
Add your REST server url as secret: Add your REST server url as secret:
``` ```
`abra app secret insert <app_name> restic_repo v1 "rest:https://user:pass@host:8000/"` abra app secret insert <backupbot_name> restic_repo v1 "rest:https://user:pass@host:8000/"
``` ```
The secret will overwrite the `RESTIC_REPOSITORY` variable. The secret will overwrite the `RESTIC_REPOSITORY` variable.
See [restic REST docs](https://restic.readthedocs.io/en/latest/030_preparing_a_new_repo.html#rest-server) for more information. See [restic REST docs](https://restic.readthedocs.io/en/latest/030_preparing_a_new_repo.html#rest-server) for more information.
## Push notifications
It is possible to configure three push events, that may trigger on the backup cronjob. Those can be used to detect failures from mointoring systems.
The events are:
- start
- success
- fail
### Using a Prometheus Push Gateway
[A prometheus push gateway](https://git.coopcloud.tech/coop-cloud/monitoring-ng#setup-push-gateway) can be used by setting the following env variables:
- `PUSH_PROMETHEUS_URL=pushgateway.example.com/metrics/job/backup`
### Using custom URLs
The following env variables can be used to setup push notifications for backups. `PUSH_URL_START` is requested just before the backups starts, `PUSH_URL_SUCCESS` is only requested if the backup was successful and if the backup fails `PUSH_URL_FAIL` will be requested.
Each variable is optional and independent of the other.
```
PUSH_URL_START=https://status.example.com/api/push/xxxxxxxxxx?status=up&msg=start
PUSH_URL_SUCCESS=https://status.example.com/api/push/xxxxxxxxxx?status=up&msg=OK
PUSH_URL_FAIL=https://status.example.com/api/push/xxxxxxxxxx?status=down&msg=fail
```
### Push endpoint behind basic auth
Insert the basic auth secret
`abra app secret insert <backupbot_name> push_basicauth v1 "user:password"`
Enable basic auth in the env file, by uncommenting the following line:
```
#COMPOSE_FILE="$COMPOSE_FILE:compose.pushbasicauth.yml"
#SECRET_PUSH_BASICAUTH=v1
```
## Usage ## Usage
Run the cronjob that creates a backup, including the push notifications and docker logging:
`abra app cmd <backupbot_name> app run_cron`
Create a backup of all apps: Create a backup of all apps:
`abra app run <app_name> app -- backup create` `abra app run <backupbot_name> app -- backup create`
> The apps to backup up need to be deployed > The apps to backup up need to be deployed
Create an individual backup: Create an individual backup:
`abra app run <app_name> app -- backup --host <target_app_name> create` `abra app run <backupbot_name> app -- backup --host <target_app_name> create`
Create a backup to a local repository: Create a backup to a local repository:
`abra app run <app_name> app -- backup create -r /backups/restic` `abra app run <backupbot_name> app -- backup create -r /backups/restic`
> It is recommended to shutdown/undeploy an app before restoring the data > It is recommended to shutdown/undeploy an app before restoring the data
Restore the latest snapshot of all including apps: Restore the latest snapshot of all including apps:
`abra app run <app_name> app -- backup restore` `abra app run <backupbot_name> app -- backup restore`
Restore a specific snapshot of an individual app: Restore a specific snapshot of an individual app:
`abra app run <app_name> app -- backup --host <target_app_name> restore --snapshot <snapshot_id>` `abra app run <backupbot_name> app -- backup --host <target_app_name> restore --snapshot <snapshot_id>`
Show all snapshots: Show all snapshots:
`abra app run <app_name> app -- backup snapshots` `abra app run <backupbot_name> app -- backup snapshots`
Show all snapshots containing a specific app: Show all snapshots containing a specific app:
`abra app run <app_name> app -- backup --host <target_app_name> snapshots` `abra app run <backupbot_name> app -- backup --host <target_app_name> snapshots`
Show all files inside the latest snapshot (can be very verbose): Show all files inside the latest snapshot (can be very verbose):
`abra app run <app_name> app -- backup ls` `abra app run <backupbot_name> app -- backup ls`
Show specific files inside a selected snapshot: Show specific files inside a selected snapshot:
`abra app run <app_name> app -- backup ls --snapshot <snapshot_id> --path /var/lib/docker/volumes/` `abra app run <backupbot_name> app -- backup ls --snapshot <snapshot_id> /var/lib/docker/volumes/`
Download files from a snapshot: Download files from a snapshot:
``` ```
filename=$(abra app run <app_name> app -- backup download --snapshot <snapshot_id> --path <absolute_path>) filename=$(abra app run <backupbot_name> app -- backup download --snapshot <snapshot_id> --path <absolute_path>)
abra app cp <app_name> app:$filename . abra app cp <backupbot_name> app:$filename .
``` ```
## Run restic ## Run restic
``` ```
abra app run <app_name> app bash abra app run <backupbot_name> app bash
export AWS_SECRET_ACCESS_KEY=$(cat $AWS_SECRET_ACCESS_KEY_FILE) export AWS_SECRET_ACCESS_KEY=$(cat $AWS_SECRET_ACCESS_KEY_FILE)
export RESTIC_PASSWORD=$(cat $RESTIC_PASSWORD_FILE) export RESTIC_PASSWORD=$(cat $RESTIC_PASSWORD_FILE)
restic snapshots restic snapshots
@ -165,20 +202,85 @@ restic snapshots
Like Traefik, or `swarm-cronjob`, Backupbot II uses access to the Docker socket to read labels from running Docker Swarm services: Like Traefik, or `swarm-cronjob`, Backupbot II uses access to the Docker socket to read labels from running Docker Swarm services:
1. Add `ENABLE_BACKUPS=true` to .env.sample
2. Add backupbot labels to the compose file
``` ```
services: services:
db: db:
deploy: deploy:
labels: labels:
backupbot.backup: ${BACKUP:-"true"} backupbot.backup: "${ENABLE_BACKUPS:-true}"
backupbot.backup.pre-hook: 'mysqldump -u root -p"$(cat /run/secrets/db_root_password)" -f /volume_path/dump.db' backupbot.backup.pre-hook: "/pg_backup.sh backup"
backupbot.backup.post-hook: "rm -rf /volume_path/dump.db" backupbot.backup.volumes.db.path: "backup.sql"
backupbot.restore.post-hook: '/pg_backup.sh restore'
backupbot.backup.volumes.redis: "false"
``` ```
- `backupbot.backup` -- set to `true` to back up this service (REQUIRED) - `backupbot.backup` -- set to `true` to back up this service (REQUIRED)
- `backupbot.backup.pre-hook` -- command to run before copying files (optional), save all dumps into the volumes - this is the only required backup label, per default it will backup all volumes
- `backupbot.backup.post-hook` -- command to run after copying files (optional) - `backupbot.backup.volumes.<volume_name>.path` -- only backup the listed relative paths from `<volume_name>`
- `backupbot.backup.volumes.<volume_name>: false` -- exclude <volume_name> from the backup
- `backupbot.backup.pre-hook` -- command to run before copying files
- i.e. save all database dumps into the volumes
- `backupbot.backup.post-hook` -- command to run after copying files
- `backupbot.restore.pre-hook` -- command to run before restoring files
- `backupbot.restore.post-hook` -- command to run after restoring files
- i.e. read all database dumps from the volumes
3. (Optional) add backup/restore scripts to the compose file
```
services:
db:
configs:
- source: pg_backup
target: /pg_backup.sh
mode: 0555
configs:
pg_backup:
name: ${STACK_NAME}_pg_backup_${PG_BACKUP_VERSION}
file: pg_backup.sh
```
Version the config file in `abra.sh`:
```
export PG_BACKUP_VERSION=v1
```
As in the above example, you can reference Docker Secrets, e.g. for looking up database passwords, by reading the files in `/run/secrets` directly. As in the above example, you can reference Docker Secrets, e.g. for looking up database passwords, by reading the files in `/run/secrets` directly.
[abra]: https://git.autonomic.zone/autonomic-cooperative/abra [abra]: https://git.autonomic.zone/autonomic-cooperative/abra
## Backupbot Development
1. Copy modified backupbot.py into the container:
```
cp backupbot.py /tmp/backupbot.py; git stash; abra app cp <backupbot_name> /tmp/backupbot.py app:/usr/bin/backupbot.py; git checkout main; git stash pop
```
2. Testing stuff with the python interpreter inside the container:
```
abra app run <backupbot_name> app bash
cd /usr/bin/
python
from backupbot import *
```
### Versioning
- App version: changes to `backup.py` (build a new image)
- Co-op Cloud package version: changes to recipe.
For example, starting with 1.0.0+2.0.0:
"patch" change to recipe: 1.0.1+2.0.0
"patch" change to backup.py: increment both, so 1.1.0+2.0.1
because bumping the image version would result in a minor recipe release
https://git.coopcloud.tech/coop-cloud/backup-bot-two/issues/4

12
abra.sh
View File

@ -1,3 +1,11 @@
export ENTRYPOINT_VERSION=v1
export BACKUPBOT_VERSION=v1
export SSH_CONFIG_VERSION=v1 export SSH_CONFIG_VERSION=v1
export ENTRYPOINT_VERSION=v17
export CRONJOB_VERSION=v2
run_cron () {
schedule="$(crontab -l | tr -s " " | cut -d ' ' -f-5)"
rm -f /tmp/backup.log
echo "* * * * * $(crontab -l | tr -s " " | cut -d ' ' -f6-)" | crontab -
while [ ! -f /tmp/backup.log ]; do sleep 1; done
echo "$schedule $(crontab -l | tr -s " " | cut -d ' ' -f6-)" | crontab -
}

View File

@ -1,6 +1,7 @@
#!/usr/bin/python3 #!/usr/bin/python3
import os import os
import sys
import click import click
import json import json
import subprocess import subprocess
@ -9,125 +10,424 @@ import docker
import restic import restic
import tarfile import tarfile
import io import io
from pythonjsonlogger import jsonlogger
from datetime import datetime, timezone from datetime import datetime, timezone
from restic.errors import ResticFailedError from restic.errors import ResticFailedError
from pathlib import Path from pathlib import Path
from shutil import copyfile, rmtree from shutil import copyfile, rmtree
# logging.basicConfig(level=logging.INFO)
VOLUME_PATH = "/var/lib/docker/volumes/" VOLUME_PATH = "/var/lib/docker/volumes/"
SECRET_PATH = '/secrets/' SECRET_PATH = "/secrets/"
SERVICE = None SERVICE = "ALL"
logger = logging.getLogger("backupbot")
logging.addLevelName(55, "SUMMARY")
setattr(logging, "SUMMARY", 55)
setattr(
logger,
"summary",
lambda message, *args, **kwargs: logger.log(55, message, *args, **kwargs),
)
def handle_exception(exc_type, exc_value, exc_traceback):
if issubclass(exc_type, KeyboardInterrupt):
sys.__excepthook__(exc_type, exc_value, exc_traceback)
return
logger.critical("Uncaught exception", exc_info=(exc_type, exc_value, exc_traceback))
sys.excepthook = handle_exception
@click.group() @click.group()
@click.option('-l', '--log', 'loglevel') @click.option("-l", "--log", "loglevel")
@click.option('service', '--host', '-h', envvar='SERVICE') @click.option(
@click.option('repository', '--repo', '-r', envvar='RESTIC_REPOSITORY', required=True) "-m", "--machine-logs", "machine_logs", is_flag=True, envvar="MACHINE_LOGS"
def cli(loglevel, service, repository): )
@click.option("service", "--host", "-h", envvar="SERVICE")
@click.option("repository", "--repo", "-r", envvar="RESTIC_REPOSITORY")
def cli(loglevel, service, repository, machine_logs):
global SERVICE global SERVICE
if service: if service:
SERVICE = service.replace('.', '_') SERVICE = service.replace(".", "_")
if repository: if repository:
os.environ['RESTIC_REPOSITORY'] = repository os.environ["RESTIC_REPOSITORY"] = repository
if loglevel: if loglevel:
numeric_level = getattr(logging, loglevel.upper(), None) numeric_level = getattr(logging, loglevel.upper(), None)
if not isinstance(numeric_level, int): if not isinstance(numeric_level, int):
raise ValueError('Invalid log level: %s' % loglevel) raise ValueError("Invalid log level: %s" % loglevel)
logging.basicConfig(level=numeric_level) logger.setLevel(numeric_level)
logHandler = logging.StreamHandler()
if machine_logs:
formatter = jsonlogger.JsonFormatter(
"%(levelname)s %(filename)s %(lineno)s %(process)d %(message)s",
rename_fields={"levelname": "message_type"},
)
logHandler.setFormatter(formatter)
logger.addHandler(logHandler)
export_secrets() export_secrets()
init_repo() init_repo()
def init_repo(): def init_repo():
repo = os.environ['RESTIC_REPOSITORY'] if repo := os.environ.get("RESTIC_REPOSITORY_FILE"):
logging.debug(f"set restic repository location: {repo}") # RESTIC_REPOSITORY_FILE and RESTIC_REPOSITORY are mutually exclusive
restic.repository = repo del os.environ["RESTIC_REPOSITORY"]
restic.password_file = '/var/run/secrets/restic_password' else:
repo = os.environ["RESTIC_REPOSITORY"]
restic.repository = repo
logger.debug(f"set restic repository location: {repo}")
restic.password_file = "/var/run/secrets/restic_password"
try: try:
restic.cat.config() restic.cat.config()
except ResticFailedError as error: except ResticFailedError as error:
if 'unable to open config file' in str(error): if "unable to open config file" in str(error):
result = restic.init() result = restic.init()
logging.info(f"Initialized restic repo: {result}") logger.info(f"Initialized restic repo: {result}")
else: else:
raise error raise error
def export_secrets(): def export_secrets():
for env in os.environ: for env in os.environ:
if env.endswith('FILE') and not "COMPOSE_FILE" in env: if env.endswith("FILE") and not "COMPOSE_FILE" in env:
logging.debug(f"exported secret: {env}") logger.debug(f"exported secret: {env}")
with open(os.environ[env]) as file: with open(os.environ[env]) as file:
secret = file.read() secret = file.read()
os.environ[env.removesuffix('_FILE')] = secret os.environ[env.removesuffix("_FILE")] = secret
# logging.debug(f"Read secret value: {secret}") # logger.debug(f"Read secret value: {secret}")
@cli.command() @cli.command()
def create(): @click.option("retries", "--retries", "-r", envvar="RETRIES", default=1)
pre_commands, post_commands, backup_paths, apps = get_backup_cmds() def create(retries):
copy_secrets(apps) app_settings = parse_backup_labels()
backup_paths.append(SECRET_PATH) pre_commands, post_commands, backup_paths, apps_versions = get_backup_details(
app_settings
)
copy_secrets(apps_versions)
backup_paths.append(Path(SECRET_PATH))
run_commands(pre_commands) run_commands(pre_commands)
backup_volumes(backup_paths, apps) backup_volumes(backup_paths, apps_versions, int(retries))
run_commands(post_commands) run_commands(post_commands)
def get_backup_cmds(): @cli.command()
@click.option("snapshot_id", "--snapshot", "-s", envvar="SNAPSHOT", default="latest")
@click.option("target", "--target", "-t", envvar="TARGET", default="/")
@click.option(
"noninteractive", "--noninteractive", envvar="NONINTERACTIVE", is_flag=True
)
@click.option("volumes", "--volumes", "-v", envvar="VOLUMES", multiple=True)
@click.option("container", "--container", "-c", envvar="CONTAINER", multiple=True)
@click.option("no_commands", "--no-commands", envvar="NO_COMMANDS", is_flag=True)
def restore(snapshot_id, target, noninteractive, volumes, container, no_commands):
app_settings = parse_backup_labels("restore", container)
if SERVICE != "ALL":
if not app_settings.get(SERVICE):
logger.error(
f"The app {SERVICE} is not running, use the restore-path argument to restore paths of undeployed apps"
)
exit(1)
app_settings = {SERVICE: app_settings.get(SERVICE)}
pre_commands, post_commands, backup_paths, apps_versions = get_backup_details(
app_settings, volumes
)
snapshots = get_snapshots(snapshot_id)
if not snapshots:
logger.error(
f"No Snapshots with ID {snapshot_id} for {apps_versions.keys()} found."
)
exit(1)
snapshot = snapshots[0]
snapshot_id = snapshot["short_id"]
if not noninteractive:
print(f"Snapshot to restore: \t{snapshot_id}")
restore_app_versions = app_versions_from_tags(snapshot.get("tags"))
print("Apps:")
for app, version in apps_versions.items():
restore_version = restore_app_versions.get(app)
print(f"\t{app} \t {restore_version}")
if version != restore_version:
print(f"WARNING!!! The running app is deployed with version {version}")
print("The following volume paths will be restored:")
for p in backup_paths:
print(f"\t{p}")
if not no_commands:
print("The following commands will be executed:")
for container, cmd in list(pre_commands.items()) + list(
post_commands.items()
):
print(f"\t{container.labels['com.docker.swarm.service.name']}:\t{cmd}")
snapshot_date = datetime.fromisoformat(snapshot["time"])
delta = datetime.now(tz=timezone.utc) - snapshot_date
print(f"This snapshot is {delta} old")
print("\nTHIS COMMAND WILL IRREVERSIBLY OVERWRITES FILES")
prompt = input("Type YES (uppercase) to continue: ")
if prompt != "YES":
logger.error("Restore aborted")
exit(1)
print(f"Restoring Snapshot {snapshot_id} at {target}")
if not no_commands and pre_commands:
print(f"Run pre commands.")
run_commands(pre_commands)
if backup_paths:
result = restic_restore(
snapshot_id=snapshot_id, include=backup_paths, target_dir=target
)
logger.debug(result)
else:
print("No paths to restore.")
if not no_commands and post_commands:
print(f"Run post commands.")
run_commands(post_commands)
@cli.command()
@click.option("snapshot_id", "--snapshot", "-s", envvar="SNAPSHOT", default="latest")
@click.option("target", "--target", "-t", envvar="TARGET", default="/")
@click.option(
"noninteractive", "--noninteractive", envvar="NONINTERACTIVE", is_flag=True
)
@click.argument("paths", nargs=-1, required=True, envvar="INCLUDE_PATH")
def restore_path(snapshot_id, target, noninteractive, paths):
"""PATHS: list of paths to restore"""
snapshots = get_snapshots(snapshot_id)
if not snapshots:
logger.error(f"No Snapshots with ID {snapshot_id} for app {SERVICE} found.")
exit(1)
snapshot = snapshots[0]
snapshot_id = snapshot["short_id"]
if not noninteractive:
print(f"Snapshot to restore: \t{snapshot_id}")
restore_app_versions = app_versions_from_tags(snapshot.get("tags"))
print("Apps:")
for app, version in restore_app_versions.items():
if SERVICE == "ALL" or SERVICE == app:
print(f"\t{app} \t {version}")
print("The following paths will be restored:")
for p in paths:
print(f"\t{p}")
snapshot_date = datetime.fromisoformat(snapshot["time"])
delta = datetime.now(tz=timezone.utc) - snapshot_date
print(f"This snapshot is {delta} old")
print("\nTHIS COMMAND WILL IRREVERSIBLY OVERWRITES FILES")
prompt = input("Type YES (uppercase) to continue: ")
if prompt != "YES":
logger.error("Restore aborted")
exit(1)
print(f"Restoring Snapshot {snapshot_id} at {target}")
result = restic_restore(snapshot_id=snapshot_id, include=paths, target_dir=target)
logger.debug(result)
def restic_restore(snapshot_id, include=[], target_dir=None):
cmd = restic.cat.base_command() + ["restore", snapshot_id]
for path in include:
cmd.extend(["--include", path])
if target_dir:
cmd.extend(["--target", target_dir])
return restic.internal.command_executor.execute(cmd)
def get_snapshots(snapshot_id=None):
if snapshot_id and snapshot_id != "latest":
snapshots = restic.snapshots(snapshot_id=snapshot_id)
if not SERVICE in app_versions_from_tags(snapshots[0].get("tags")):
logger.error(f"Snapshot with ID {snapshot_id} does not contain {SERVICE}")
exit(1)
else:
snapshots = restic.snapshots()
snapshots = list(
filter(
lambda x: SERVICE in app_versions_from_tags(x.get("tags")), snapshots
)
)
if snapshot_id == "latest":
return snapshots[-1:]
else:
return snapshots
def app_versions_from_tags(tags):
if tags:
app_versions = map(lambda x: x.split(":"), tags)
return {i[0]: i[1] if len(i) > 1 else None for i in app_versions}
else:
return {}
def str2bool(value: str) -> bool:
return value.lower() in ("yes", "true", "t", "1")
def parse_backup_labels(hook_type="backup", selected_container=[]):
client = docker.from_env() client = docker.from_env()
container_by_service = { container_by_service = {
c.labels['com.docker.swarm.service.name']: c for c in client.containers.list()} c.labels.get("com.docker.swarm.service.name"): c
backup_paths = set() for c in client.containers.list()
backup_apps = set() }
pre_commands = {}
post_commands = {}
services = client.services.list() services = client.services.list()
app_settings = {}
for s in services: for s in services:
labels = s.attrs['Spec']['Labels'] specs = s.attrs["Spec"]
if (backup := labels.get('backupbot.backup')) and bool(backup): labels = specs["Labels"]
stack_name = labels['com.docker.stack.namespace'] stack_name = labels["com.docker.stack.namespace"]
# Remove this lines to backup only a specific service container_name = s.name.removeprefix(f"{stack_name}_")
# This will unfortenately decrease restice performance version = labels.get(f"coop-cloud.{stack_name}.version")
# if SERVICE and SERVICE != stack_name: settings = app_settings[stack_name] = app_settings.get(stack_name) or {}
# continue if (backup := labels.get("backupbot.backup")) and str2bool(backup):
backup_apps.add(stack_name) settings["enabled"] = True
backup_paths = backup_paths.union( if version:
Path(VOLUME_PATH).glob(f"{stack_name}_*")) settings["version"] = version
if not (container:= container_by_service.get(s.name)): if selected_container and container_name not in selected_container:
logging.error( logger.debug(f"Skipping {s.name} because it's not a selected container")
f"Container {s.name} is not running, hooks can not be executed") continue
if mounts := specs["TaskTemplate"]["ContainerSpec"].get("Mounts"):
volumes = parse_volumes(stack_name, mounts)
volumes.update(settings.get("volumes") or {})
settings["volumes"] = volumes
excluded_volumes, included_volume_paths = parse_excludes_includes(labels)
settings["excluded_volumes"] = excluded_volumes.union(
settings.get("excluded_volumes") or set()
)
settings["included_volume_paths"] = included_volume_paths.union(
settings.get("included_volume_paths") or set()
)
if container := container_by_service.get(s.name):
if command := labels.get(f"backupbot.{hook_type}.pre-hook"):
if not (pre_hooks := settings.get("pre_hooks")):
pre_hooks = settings["pre_hooks"] = {}
pre_hooks[container] = command
if command := labels.get(f"backupbot.{hook_type}.post-hook"):
if not (post_hooks := settings.get("post_hooks")):
post_hooks = settings["post_hooks"] = {}
post_hooks[container] = command
else:
logger.debug(f"Container {s.name} is not running.")
if labels.get(f"backupbot.{hook_type}.pre-hook") or labels.get(
f"backupbot.{hook_type}.post-hook"
):
logger.error(f"Container {s.name} contain hooks but it's not running")
return app_settings
def get_backup_details(app_settings, volumes=[]):
backup_paths = set()
backup_apps_versions = {}
pre_hooks = {}
post_hooks = {}
for app, settings in app_settings.items():
if settings.get("enabled"):
if SERVICE != "ALL" and SERVICE != app:
continue continue
if prehook := labels.get('backupbot.backup.pre-hook'): backup_apps_versions[app] = settings.get("version")
pre_commands[container] = prehook add_backup_paths(backup_paths, settings, app, volumes)
if posthook := labels.get('backupbot.backup.post-hook'): if hooks := settings.get("pre_hooks"):
post_commands[container] = posthook pre_hooks.update(hooks)
return pre_commands, post_commands, list(backup_paths), list(backup_apps) if hooks := settings.get("post_hooks"):
post_hooks.update(hooks)
return pre_hooks, post_hooks, list(backup_paths), backup_apps_versions
def add_backup_paths(backup_paths, settings, app, selected_volumes):
if volumes := settings.get("volumes"):
if includes := settings.get("included_volume_paths"):
included_volumes = list(zip(*includes))[0]
for volume, rel_paths in includes:
if not (volume_path := volumes.get(volume)):
logger.error(
f"Can not find volume with the name {volume} for {app}"
)
continue
if selected_volumes and volume not in selected_volumes:
logger.debug(
f"Skipping {volume}:{rel_paths} because the volume is not selected"
)
continue
for p in rel_paths:
absolute_path = Path(f"{volume_path}/{p}")
backup_paths.add(absolute_path)
else:
included_volumes = []
excluded_volumes = settings.get("excluded_volumes") or []
for name, path in volumes.items():
if selected_volumes and name not in selected_volumes:
logger.debug(
f"Skipping volume: {name} because the volume is not selected"
)
continue
if name in excluded_volumes:
logger.debug(f"Skipping volume: {name} because the volume is excluded")
continue
if name in included_volumes:
logger.debug(f"Skipping volume: {name} because a path is selected")
continue
backup_paths.add(path)
else:
logger.warning(f"{app} does not contain any volumes")
def parse_volumes(stack_name, mounts):
volumes = {}
for m in mounts:
if m["Type"] != "volume":
continue
relative_path = m["Source"]
name = relative_path.removeprefix(stack_name + "_")
absolute_path = Path(f"{VOLUME_PATH}{relative_path}/_data/")
volumes[name] = absolute_path
return volumes
def parse_excludes_includes(labels):
excluded_volumes = set()
included_volume_paths = set()
for label, value in labels.items():
if label.startswith("backupbot.backup.volumes."):
volume_name = label.removeprefix("backupbot.backup.volumes.").removesuffix(
".path"
)
if label.endswith("path"):
relative_paths = tuple(value.split(","))
included_volume_paths.add((volume_name, relative_paths))
elif not str2bool(value):
excluded_volumes.add(volume_name)
return excluded_volumes, included_volume_paths
def copy_secrets(apps): def copy_secrets(apps):
#TODO: check if it is deployed # TODO: check if it is deployed
rmtree(SECRET_PATH, ignore_errors=True) rmtree(SECRET_PATH, ignore_errors=True)
os.mkdir(SECRET_PATH) os.mkdir(SECRET_PATH)
client = docker.from_env() client = docker.from_env()
container_by_service = { container_by_service = {
c.labels['com.docker.swarm.service.name']: c for c in client.containers.list()} c.labels.get("com.docker.swarm.service.name"): c
for c in client.containers.list()
}
services = client.services.list() services = client.services.list()
for s in services: for s in services:
app_name = s.attrs['Spec']['Labels']['com.docker.stack.namespace'] app_name = s.attrs["Spec"]["Labels"]["com.docker.stack.namespace"]
if (app_name in apps and if app_name in apps and (
(app_secs := s.attrs['Spec']['TaskTemplate']['ContainerSpec'].get('Secrets'))): app_secs := s.attrs["Spec"]["TaskTemplate"]["ContainerSpec"].get("Secrets")
):
if not container_by_service.get(s.name): if not container_by_service.get(s.name):
logging.error( logger.warning(
f"Container {s.name} is not running, secrets can not be copied.") f"Container {s.name} is not running, secrets can not be copied."
)
continue continue
container_id = container_by_service[s.name].id container_id = container_by_service[s.name].id
for sec in app_secs: for sec in app_secs:
src = f'/var/lib/docker/containers/{container_id}/mounts/secrets/{sec["SecretID"]}' src = f"/var/lib/docker/containers/{container_id}/mounts/secrets/{sec['SecretID']}"
if not Path(src).exists(): if not Path(src).exists():
logging.error(f"For the secret {sec['SecretName']} the file {src} does not exist for {s.name}") logger.error(
f"For the secret {sec['SecretName']} the file {src} does not exist for {s.name}"
)
continue continue
dst = SECRET_PATH + sec['SecretName'] dst = SECRET_PATH + sec["SecretName"]
logger.debug(f"Copy Secret {sec['SecretName']}")
copyfile(src, dst) copyfile(src, dst)
@ -136,173 +436,195 @@ def run_commands(commands):
if not command: if not command:
continue continue
# Remove bash/sh wrapping # Remove bash/sh wrapping
command = command.removeprefix('bash -c').removeprefix('sh -c') command = (
command.removeprefix("bash -c").removeprefix("sh -c").removeprefix(" ")
)
# Remove quotes surrounding the command # Remove quotes surrounding the command
if (len(command) >= 2 and command[0] == command[-1] and (command[0] == "'" or command[0] == '"')): if (
command[1:-1] len(command) >= 2
and command[0] == command[-1]
and (command[0] == "'" or command[0] == '"')
):
command = command[1:-1]
# Use bash's pipefail to return exit codes inside a pipe to prevent silent failure # Use bash's pipefail to return exit codes inside a pipe to prevent silent failure
command = f"bash -c 'set -o pipefail;{command}'" command = f"bash -c 'set -o pipefail;{command}'"
logging.info(f"run command in {container.name}:") logger.info(f"run command in {container.name}:")
logging.info(command) logger.info(command)
result = container.exec_run(command) result = container.exec_run(command)
if result.exit_code: if result.exit_code:
logging.error( logger.error(
f"Failed to run command {command} in {container.name}: {result.output.decode()}") f"Failed to run command {command} in {container.name}: {result.output.decode()}"
)
else: else:
logging.info(result.output.decode()) logger.debug(result.output.decode())
def backup_volumes(backup_paths, apps, dry_run=False): def backup_volumes(backup_paths, apps_versions, retries, dry_run=False):
try: while True:
result = restic.backup(backup_paths, dry_run=dry_run, tags=apps) try:
print(result) logger.info("Backup these paths:")
logging.info(result) logger.info("\n".join(map(str, backup_paths)))
except ResticFailedError as error: backup_paths = list(filter(path_exists, backup_paths))
logging.error(f"Backup failed for {apps}. Could not Backup these paths: {backup_paths}") cmd = restic.cat.base_command()
logging.error(error) parent = get_snapshots("latest")
exit(1) if parent:
# https://restic.readthedocs.io/en/stable/040_backup.html#file-change-detection
cmd.extend(["--parent", parent[0]["short_id"]])
tags = [f"{app}:{version}" for app, version in apps_versions.items()]
if SERVICE == "ALL":
tags.append(SERVICE)
logger.info("Start volume backup")
result = restic.internal.backup.run(
cmd, backup_paths, dry_run=dry_run, tags=tags
)
logger.summary("backup finished", extra=result)
return
except ResticFailedError as error:
logger.error(f"Backup failed for {SERVICE}.")
logger.error(error, exc_info=True)
if retries > 0:
retries -= 1
else:
exit(1)
@cli.command() def path_exists(path):
@click.option('snapshot', '--snapshot', '-s', envvar='SNAPSHOT', default='latest') if not path.exists():
@click.option('target', '--target', '-t', envvar='TARGET', default='/') logger.error(f"{path} does not exist")
@click.option('noninteractive', '--noninteractive', envvar='NONINTERACTIVE', default=False) return path.exists()
def restore(snapshot, target, noninteractive):
# Todo: recommend to shutdown the container
service_paths = VOLUME_PATH
if SERVICE:
service_paths = service_paths + f'{SERVICE}_*'
snapshots = restic.snapshots(snapshot_id=snapshot)
if not snapshot:
logging.error("No Snapshots with ID {snapshots}")
exit(1)
if not noninteractive:
snapshot_date = datetime.fromisoformat(snapshots[0]['time'])
delta = datetime.now(tz=timezone.utc) - snapshot_date
print(
f"You are going to restore Snapshot {snapshot} of {service_paths} at {target}")
print(f"This snapshot is {delta} old")
print(
f"THIS COMMAND WILL IRREVERSIBLY OVERWRITES {target}{service_paths.removeprefix('/')}")
prompt = input("Type YES (uppercase) to continue: ")
if prompt != 'YES':
logging.error("Restore aborted")
exit(1)
print(f"Restoring Snapshot {snapshot} of {service_paths} at {target}")
result = restic.restore(snapshot_id=snapshot,
include=service_paths, target_dir=target)
logging.debug(result)
@cli.command() @cli.command()
def snapshots(): def snapshots():
snapshots = restic.snapshots() snapshots = get_snapshots()
no_snapshots = True
for snap in snapshots: for snap in snapshots:
if not SERVICE or (tags := snap.get('tags')) and SERVICE in tags: output = [snap["time"].split(".")[0], snap["short_id"]]
print(snap['time'], snap['id']) if tags := snap.get("tags"):
no_snapshots = False app_versions = app_versions_from_tags(tags)
if no_snapshots: if version := app_versions.get(SERVICE):
output.append(version)
print(*output)
if not snapshots:
err_msg = "No Snapshots found" err_msg = "No Snapshots found"
if SERVICE: if SERVICE != "ALL":
err_msg += f' for app {SERVICE}' service_name = SERVICE.replace("_", ".")
logging.warning(err_msg) err_msg += f" for app {service_name}"
logger.warning(err_msg)
@cli.command() @cli.command()
@click.option('snapshot', '--snapshot', '-s', envvar='SNAPSHOT', default='latest') @click.option("snapshot", "--snapshot", "-s", envvar="SNAPSHOT", default="latest")
@click.option('path', '--path', '-p', envvar='INCLUDE_PATH') @click.option("show_all", "--all", "-a", envvar="SHOW_ALL", is_flag=True)
def ls(snapshot, path): @click.option("timestamps", "--timestamps", "-t", envvar="TIMESTAMPS", is_flag=True)
@click.argument(
"path", required=False, default="/var/lib/docker/volumes/", envvar="INCLUDE_PATH"
)
def ls(snapshot, show_all, timestamps, path):
if snapshot == "latest":
latest_snapshot = get_snapshots("latest")
if not latest_snapshot:
logger.error(f"There is no latest snapshot for {SERVICE}")
exit(1)
snapshot = latest_snapshot[0]["short_id"]
if show_all:
path = None
results = list_files(snapshot, path) results = list_files(snapshot, path)
for r in results: for r in results:
if r.get('path'): if r.get("path"):
print(f"{r['ctime']}\t{r['path']}") if timestamps:
print(f"{r['ctime']}\t{r['path']}")
else:
print(f"{r['path']}")
def list_files(snapshot, path): def list_files(snapshot, path):
cmd = restic.cat.base_command() + ['ls'] cmd = restic.cat.base_command() + ["ls"]
if SERVICE:
cmd = cmd + ['--tag', SERVICE]
cmd.append(snapshot) cmd.append(snapshot)
if path: if path:
cmd.append(path) cmd.append(path)
try: try:
output = restic.internal.command_executor.execute(cmd) output = restic.internal.command_executor.execute(cmd)
except ResticFailedError as error: except ResticFailedError as error:
if 'no snapshot found' in str(error): if "no snapshot found" in str(error):
err_msg = f'There is no snapshot {snapshot}' err_msg = f'There is no snapshot "{snapshot}"'
if SERVICE: if SERVICE != "ALL":
err_msg += f'for the app {SERVICE}' err_msg += f' for the app "{SERVICE}"'
logging.error(err_msg) logger.error(err_msg)
exit(1) exit(1)
else: else:
raise error raise error
output = output.replace('}\n{', '}|{') output = output.replace("}\n{", "}|{")
results = list(map(json.loads, output.split('|'))) results = list(map(json.loads, output.split("|")))
return results return results
@cli.command() @cli.command()
@click.option('snapshot', '--snapshot', '-s', envvar='SNAPSHOT', default='latest') @click.option("snapshot", "--snapshot", "-s", envvar="SNAPSHOT", default="latest")
@click.option('path', '--path', '-p', envvar='INCLUDE_PATH') @click.option("path", "--path", "-p", envvar="INCLUDE_PATH")
@click.option('volumes', '--volumes', '-v', is_flag=True) @click.option("volumes", "--volumes", "-v", envvar="VOLUMES")
@click.option('secrets', '--secrets', '-c', is_flag=True) @click.option("secrets", "--secrets", "-c", is_flag=True, envvar="SECRETS")
def download(snapshot, path, volumes, secrets): def download(snapshot, path, volumes, secrets):
file_dumps = [] file_dumps = []
if snapshot == "latest":
latest_snapshot = get_snapshots("latest")
if not latest_snapshot:
logger.error(f"There is no latest snapshot for {SERVICE}")
exit(1)
snapshot = latest_snapshot[0]["short_id"]
if not any([path, volumes, secrets]): if not any([path, volumes, secrets]):
volumes = secrets = True volumes = secrets = True
if path: if path:
path = path.removesuffix('/') path = path.removesuffix("/")
binary_output = dump(snapshot, path) binary_output = dump(snapshot, path)
files = list_files(snapshot, path) files = list_files(snapshot, path)
filetype = [f.get('type') for f in files if f.get('path') == path][0] filetype = [f.get("type") for f in files if f.get("path") == path][0]
filename = Path(path).name filename = Path(path).name
if filetype == 'dir': if filetype == "dir":
filename = filename + ".tar" filename = filename + ".tar"
tarinfo = tarfile.TarInfo(name=filename) tarinfo = tarfile.TarInfo(name=filename)
tarinfo.size = len(binary_output) tarinfo.size = len(binary_output)
file_dumps.append((binary_output, tarinfo)) file_dumps.append((binary_output, tarinfo))
if volumes: if volumes:
if not SERVICE: if SERVICE == "ALL":
logging.error("Please specify '--host' when using '--volumes'") logger.error("Please specify '--host' when using '--volumes'")
exit(1) exit(1)
files = list_files(snapshot, VOLUME_PATH) files = list_files(snapshot, VOLUME_PATH)
for f in files[1:]: for f in files[1:]:
path = f['path'] path = f["path"]
if Path(path).name.startswith(SERVICE) and f['type'] == 'dir': if Path(path).name.startswith(SERVICE) and f["type"] == "dir":
binary_output = dump(snapshot, path) binary_output = dump(snapshot, path)
filename = f"{Path(path).name}.tar" filename = f"{Path(path).name}.tar"
tarinfo = tarfile.TarInfo(name=filename) tarinfo = tarfile.TarInfo(name=filename)
tarinfo.size = len(binary_output) tarinfo.size = len(binary_output)
file_dumps.append((binary_output, tarinfo)) file_dumps.append((binary_output, tarinfo))
if secrets: if secrets:
if not SERVICE: if SERVICE == "ALL":
logging.error("Please specify '--host' when using '--secrets'") logger.error("Please specify '--host' when using '--secrets'")
exit(1) exit(1)
filename = f"{SERVICE}.json" filename = f"{SERVICE}.json"
files = list_files(snapshot, SECRET_PATH) files = list_files(snapshot, SECRET_PATH)
secrets = {} secrets = {}
for f in files[1:]: for f in files[1:]:
path = f['path'] path = f["path"]
if Path(path).name.startswith(SERVICE) and f['type'] == 'file': if Path(path).name.startswith(SERVICE) and f["type"] == "file":
secret = dump(snapshot, path).decode() secret = dump(snapshot, path).decode()
secret_name = path.removeprefix(f'{SECRET_PATH}{SERVICE}_') secret_name = path.removeprefix(f"{SECRET_PATH}{SERVICE}_")
secrets[secret_name] = secret secrets[secret_name] = secret
binary_output = json.dumps(secrets).encode() binary_output = json.dumps(secrets).encode()
tarinfo = tarfile.TarInfo(name=filename) tarinfo = tarfile.TarInfo(name=filename)
tarinfo.size = len(binary_output) tarinfo.size = len(binary_output)
file_dumps.append((binary_output, tarinfo)) file_dumps.append((binary_output, tarinfo))
with tarfile.open('/tmp/backup.tar.gz', "w:gz") as tar: with tarfile.open("/tmp/backup.tar.gz", "w:gz") as tar:
print(f"Writing files to /tmp/backup.tar.gz...") print(f"Writing files to /tmp/backup.tar.gz...")
for binary_output, tarinfo in file_dumps: for binary_output, tarinfo in file_dumps:
tar.addfile(tarinfo, fileobj=io.BytesIO(binary_output)) tar.addfile(tarinfo, fileobj=io.BytesIO(binary_output))
size = get_formatted_size('/tmp/backup.tar.gz') size = get_formatted_size("/tmp/backup.tar.gz")
print(f"Backup has been written to /tmp/backup.tar.gz with a size of {size}") print(f"Backup has been written to /tmp/backup.tar.gz with a size of {size}")
def get_formatted_size(file_path): def get_formatted_size(file_path):
file_size = os.path.getsize(file_path) file_size = os.path.getsize(file_path)
units = ['Bytes', 'KB', 'MB', 'GB', 'TB'] units = ["Bytes", "KB", "MB", "GB", "TB"]
for unit in units: for unit in units:
if file_size < 1024: if file_size < 1024:
return f"{round(file_size, 3)} {unit}" return f"{round(file_size, 3)} {unit}"
@ -311,18 +633,17 @@ def get_formatted_size(file_path):
def dump(snapshot, path): def dump(snapshot, path):
cmd = restic.cat.base_command() + ['dump'] cmd = restic.cat.base_command() + ["dump"]
if SERVICE:
cmd = cmd + ['--tag', SERVICE]
cmd = cmd + [snapshot, path] cmd = cmd + [snapshot, path]
print(f"Dumping {path} from snapshot '{snapshot}'") print(f"Dumping {path} from snapshot '{snapshot}'")
output = subprocess.run(cmd, capture_output=True) output = subprocess.run(cmd, capture_output=True)
if output.returncode: if output.returncode:
logging.error( logger.error(
f"error while dumping {path} from snapshot '{snapshot}': {output.stderr}") f"error while dumping {path} from snapshot '{snapshot}': {output.stderr}"
)
exit(1) exit(1)
return output.stdout return output.stdout
if __name__ == '__main__': if __name__ == "__main__":
cli() cli()

11
compose.pushbasicauth.yml Normal file
View File

@ -0,0 +1,11 @@
---
version: "3.8"
services:
app:
secrets:
- push_basicauth
secrets:
push_basicauth:
external: true
name: ${STACK_NAME}_push_basicauth_${SECRET_PUSH_BASICAUTH}

View File

@ -2,7 +2,7 @@
version: "3.8" version: "3.8"
services: services:
app: app:
image: docker:24.0.7-dind image: git.coopcloud.tech/coop-cloud/backup-bot-two:2.3.0-beta
volumes: volumes:
- "/var/run/docker.sock:/var/run/docker.sock" - "/var/run/docker.sock:/var/run/docker.sock"
- "/var/lib/docker/volumes/:/var/lib/docker/volumes/" - "/var/lib/docker/volumes/:/var/lib/docker/volumes/"
@ -14,19 +14,19 @@ services:
- RESTIC_PASSWORD_FILE=/run/secrets/restic_password - RESTIC_PASSWORD_FILE=/run/secrets/restic_password
secrets: secrets:
- restic_password - restic_password
deploy:
labels:
- coop-cloud.${STACK_NAME}.version=0.1.0+latest
- coop-cloud.${STACK_NAME}.timeout=${TIMEOUT:-300}
- coop-cloud.backupbot.enabled=true
configs: configs:
- source: entrypoint - source: entrypoint
target: /entrypoint.sh target: /entrypoint.sh
mode: 0555 mode: 666
- source: backupbot - source: cronjob
target: /usr/bin/backup target: /cronjob.sh
mode: 0555 mode: 666
entrypoint: ['/entrypoint.sh'] deploy:
labels:
- coop-cloud.${STACK_NAME}.version=2.3.0+2.3.0-beta
- coop-cloud.${STACK_NAME}.timeout=${TIMEOUT:-300}
- coop-cloud.backupbot.enabled=true
#entrypoint: ['tail', '-f','/dev/null']
healthcheck: healthcheck:
test: "pgrep crond" test: "pgrep crond"
interval: 30s interval: 30s
@ -38,14 +38,14 @@ secrets:
restic_password: restic_password:
external: true external: true
name: ${STACK_NAME}_restic_password_${SECRET_RESTIC_PASSWORD_VERSION} name: ${STACK_NAME}_restic_password_${SECRET_RESTIC_PASSWORD_VERSION}
volumes:
backups:
configs: configs:
entrypoint: entrypoint:
name: ${STACK_NAME}_entrypoint_${ENTRYPOINT_VERSION} name: ${STACK_NAME}_entrypoint_${ENTRYPOINT_VERSION}
file: entrypoint.sh file: entrypoint.sh
backupbot: cronjob:
name: ${STACK_NAME}_backupbot_${BACKUPBOT_VERSION} name: ${STACK_NAME}_cronjob_${CRONJOB_VERSION}
file: backupbot.py file: cronjob.sh
volumes:
backups:

40
cronjob.sh Executable file
View File

@ -0,0 +1,40 @@
#!/bin/sh
set -e
CURL_OPTS="-s"
# Check for basic auth
if [ -n "$(cat /run/secrets/push_basicauth)" ]
then
CURL_OPTS="$CURL_OPTS -u $(cat /run/secrets/push_basicauth)"
fi
if [ -n "$PUSH_PROMETHEUS_URL" ]
then
push_start_notification="(echo 'backup 1' | curl $CURL_OPTS --data-binary @- $PUSH_PROMETHEUS_URL)"
push_success_notification="(echo 'backup 0' | curl $CURL_OPTS --data-binary @- $PUSH_PROMETHEUS_URL)"
push_fail_notification="(echo 'backup -1' | curl $CURL_OPTS --data-binary @- $PUSH_PROMETHEUS_URL)"
else
if [ -n "$PUSH_URL_START" ]
then
push_start_notification="curl $CURL_OPTS '$PUSH_URL_START'"
fi
if [ -n "$PUSH_URL_FAIL" ]
then
push_fail_notification="curl $CURL_OPTS '$PUSH_URL_FAIL'"
fi
if [ -n "$PUSH_URL_SUCCESS" ]
then
push_success_notification="curl $CURL_OPTS '$PUSH_URL_SUCCESS'"
fi
fi
eval "$push_start_notification"
if [ "$(backup --machine-logs create 2>&1 | tee /tmp/backup.log && (grep -q 'backup finished' /tmp/backup.log))" ]
then
eval "$push_success_notification"
else
eval "$push_fail_notification"
fi

9
entrypoint.sh Normal file → Executable file
View File

@ -1,11 +1,6 @@
#!/bin/sh #!/bin/sh
set -e -o pipefail set -e
apk add --upgrade --no-cache restic bash python3 py3-pip
# Todo use requirements file with specific versions
pip install click==8.1.7 docker==6.1.3 resticpy==1.0.2
if [ -n "$SSH_HOST_KEY" ] if [ -n "$SSH_HOST_KEY" ]
then then
@ -14,7 +9,7 @@ fi
cron_schedule="${CRON_SCHEDULE:?CRON_SCHEDULE not set}" cron_schedule="${CRON_SCHEDULE:?CRON_SCHEDULE not set}"
echo "$cron_schedule backup create" | crontab - echo "$cron_schedule /cronjob.sh" | crontab -
crontab -l crontab -l
crond -f -d8 -L /dev/stdout crond -f -d8 -L /dev/stdout

34
pg_backup.sh Normal file
View File

@ -0,0 +1,34 @@
#!/bin/bash
set -e
BACKUP_FILE='/var/lib/postgresql/data/backup.sql'
function backup {
export PGPASSWORD=$(cat $POSTGRES_PASSWORD_FILE)
pg_dump -U ${POSTGRES_USER} ${POSTGRES_DB} > $BACKUP_FILE
}
function restore {
cd /var/lib/postgresql/data/
restore_config(){
# Restore allowed connections
cat pg_hba.conf.bak > pg_hba.conf
su postgres -c 'pg_ctl reload'
}
# Don't allow any other connections than local
cp pg_hba.conf pg_hba.conf.bak
echo "local all all trust" > pg_hba.conf
su postgres -c 'pg_ctl reload'
trap restore_config EXIT INT TERM
# Recreate Database
psql -U ${POSTGRES_USER} -d postgres -c "DROP DATABASE ${POSTGRES_DB} WITH (FORCE);"
createdb -U ${POSTGRES_USER} ${POSTGRES_DB}
psql -U ${POSTGRES_USER} -d ${POSTGRES_DB} -1 -f $BACKUP_FILE
trap - EXIT INT TERM
restore_config
}
$@

1
release/1.0.0+2.0.0-beta Normal file
View File

@ -0,0 +1 @@
This is the first beta release of the new backup-bot-two rewrite in python. Be aware when updating, it can break. Please read the readme and update your config according to it.