WIP: Add compression to recipe #61

Draft
notplants wants to merge 7 commits from compress into main
6 changed files with 299 additions and 2 deletions

View File

@ -76,6 +76,10 @@ ENCRYPTED_BY_DEFAULT=all
#TRACK_PUPPETED_USER_IPS=true
## Room complexity limit (prevents joining large remote rooms that cause DB bloat)
## complexity ≈ state_events / 500. Default 100.0 blocks rooms with >50000 state events.
#ROOM_COMPLEXITY_LIMIT=100.0
## Retention
ALLOWED_LIFETIME_MAX=4w
@ -199,6 +203,14 @@ RETENTION_MAX_LIFETIME=4w
#WEB_CLIENT_LOCATION=https://element-web.example.com
## State compression (reduces database bloat from federation)
## Runs synapse_auto_compressor daily, built from source on first start
#COMPOSE_FILE="$COMPOSE_FILE:compose.compress-state.yml"
# See https://github.com/matrix-org/rust-synapse-compress-state#running-options
#STATE_COMPRESS_CHUNK_SIZE=500
#STATE_COMPRESS_CHUNKS=100
#STATE_COMPRESS_SCHEDULE=0 3 * * *
## Admin interface at /admin
#COMPOSE_FILE="$COMPOSE_FILE:compose.admin.yml"
#ADMIN_INTERFACE_ENABLED=1

209
abra.sh
View File

@ -1,6 +1,6 @@
export DISCORD_BRIDGE_YAML_VERSION=v2
export ENTRYPOINT_CONF_VERSION=v3
export HOMESERVER_YAML_VERSION=v35
export HOMESERVER_YAML_VERSION=v36
export LOG_CONFIG_VERSION=v2
export SHARED_SECRET_AUTH_VERSION=v2
export SIGNAL_BRIDGE_YAML_VERSION=v6
@ -10,6 +10,213 @@ export WK_SERVER_VERSION=v1
export WK_CLIENT_VERSION=v1
export PG_BACKUP_VERSION=v2
export ADMIN_CONFIG_VERSION=v1
export COMPRESS_STATE_ENTRYPOINT_VERSION=v5
###############################################################################
# Database maintenance — shrink a bloated Synapse database
#
# See https://levans.fr/shrink-synapse-database.html
#
# Recommended steps to reclaim disk space:
# 1. abra app cmd <domain> compress-state run_compressor 500 10000
# (compress redundant state — safe while Synapse is running)
# 2. abra app cmd <domain> db reindex
# (rebuild indexes — stop Synapse first)
# 3. abra app cmd <domain> db vacuum_full
# (rewrite tables and reclaim disk — stop Synapse first)
#
# Diagnostic commands (safe to run anytime):
# abra app cmd <domain> db db_size
# abra app cmd <domain> db state_bloat
# abra app cmd <domain> db empty_rooms
#
# Purge commands (require an admin token):
# abra app cmd <domain> app register_admin <user> <pass>
# abra app cmd <domain> app get_token <user> <pass>
# abra app cmd <domain> app purge_remote_media <days> <token>
# abra app cmd <domain> app purge_empty_rooms <token>
# abra app cmd <domain> app purge_room <room_id> <token>
# abra app cmd <domain> app purge_history <room_id> <days> <token>
###############################################################################
# --- Diagnostics (db) ---
db_size() {
echo "=== Database size ==="
psql -U synapse -d synapse -c "SELECT pg_size_pretty(pg_database_size('synapse')) AS db_size;"
echo ""
echo "=== Top 10 largest tables ==="
psql -U synapse -d synapse -c "
SELECT nspname || '.' || relname AS table,
pg_size_pretty(pg_total_relation_size(C.oid)) AS total_size
FROM pg_class C
LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace)
WHERE nspname NOT IN ('pg_catalog', 'information_schema')
ORDER BY pg_total_relation_size(C.oid) DESC
LIMIT 10;"
}
state_bloat() {
echo "=== Rooms with most state bloat ==="
psql -U synapse -d synapse -c "
SELECT room_id, count(*) AS state_entries
FROM state_groups_state
GROUP BY room_id
ORDER BY state_entries DESC
LIMIT 20;"
}
empty_rooms() {
echo "=== Rooms with no local members ==="
psql -U synapse -d synapse -c "
SELECT room_id, room_version
FROM rooms
WHERE room_id NOT IN (
SELECT room_id FROM local_current_membership WHERE membership = 'join'
);"
}
# --- Compression (compress-state) ---
run_compressor() {
CHUNK_SIZE="${1:-${STATE_COMPRESS_CHUNK_SIZE:-500}}"
CHUNKS="${2:-${STATE_COMPRESS_CHUNKS:-100}}"
DB_PASS=$(cat /run/secrets/db_password)
echo "Running synapse_auto_compressor (chunk_size=$CHUNK_SIZE, chunks=$CHUNKS)..."
/build/synapse_auto_compressor \
-p "postgresql://synapse:${DB_PASS}@db:5432/synapse" \
-c "$CHUNK_SIZE" -n "$CHUNKS"
}
# --- Maintenance (db) — stop Synapse before running these ---
reindex() {
echo "WARNING: REINDEX locks tables. Synapse should be stopped before running this."
echo "Running REINDEX on synapse database..."
psql -U synapse -d synapse -c "REINDEX (VERBOSE) DATABASE synapse;"
echo "REINDEX complete."
psql -U synapse -d synapse -c "SELECT pg_size_pretty(pg_database_size('synapse')) AS db_size;"
}
vacuum_full() {
echo "WARNING: VACUUM FULL locks tables and requires temporary disk space."
echo "Synapse should be stopped before running this."
echo "Running VACUUM FULL on synapse database..."
psql -U synapse -d synapse -c "VACUUM FULL;"
echo "VACUUM FULL complete."
psql -U synapse -d synapse -c "SELECT pg_size_pretty(pg_database_size('synapse')) AS db_size;"
}
# --- Purge commands (app) — require an admin access token ---
register_admin() {
USER="${1}"
PASS="${2}"
if [ -z "$USER" ] || [ -z "$PASS" ]; then
echo "Usage: register_admin <username> <password>"
return 1
fi
register_new_matrix_user -u "$USER" -p "$PASS" -a -c /data/homeserver.yaml http://localhost:8008
}
get_token() {
USER="${1}"
PASS="${2}"
if [ -z "$USER" ] || [ -z "$PASS" ]; then
echo "Usage: get_token <username> <password>"
echo "Returns an admin access token for use with purge commands."
return 1
fi
curl -s -X POST "http://localhost:8008/_matrix/client/r0/login" \
-H "Content-Type: application/json" \
-d "{\"type\":\"m.login.password\",\"user\":\"$USER\",\"password\":\"$PASS\"}" \
| python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('access_token', d.get('error', 'unknown error')))"
}
purge_remote_media() {
DAYS="${1:-30}"
TOKEN="${2}"
if [ -z "$TOKEN" ]; then
echo "Usage: purge_remote_media <days> <admin_token>"
return 1
fi
BEFORE_TS=$(( $(date +%s) * 1000 - DAYS * 86400000 ))
echo "Purging remote media older than $DAYS days..."
curl -s -X POST "http://localhost:8008/_synapse/admin/v1/purge_media_cache?before_ts=$BEFORE_TS" \
-H "Authorization: Bearer $TOKEN"
echo ""
}
purge_room() {
ROOM_ID="${1}"
TOKEN="${2}"
if [ -z "$ROOM_ID" ] || [ -z "$TOKEN" ]; then
echo "Usage: purge_room <room_id> <admin_token>"
return 1
fi
echo "Purging room $ROOM_ID..."
curl -s -X DELETE "http://localhost:8008/_synapse/admin/v1/rooms/$ROOM_ID" \
-H "Authorization: Bearer $TOKEN" \
-H "Content-Type: application/json" \
-d '{"purge": true}'
echo ""
}
purge_history() {
ROOM_ID="${1}"
DAYS="${2:-90}"
TOKEN="${3}"
if [ -z "$ROOM_ID" ] || [ -z "$TOKEN" ]; then
echo "Usage: purge_history <room_id> <days> <admin_token>"
return 1
fi
BEFORE_TS=$(( $(date +%s) * 1000 - DAYS * 86400000 ))
echo "Purging history older than $DAYS days from $ROOM_ID..."
curl -s -X POST "http://localhost:8008/_synapse/admin/v1/purge_history/$ROOM_ID" \
-H "Authorization: Bearer $TOKEN" \
-H "Content-Type: application/json" \
-d "{\"purge_up_to_ts\": $BEFORE_TS}"
echo ""
}
purge_empty_rooms() {
TOKEN="${1}"
if [ -z "$TOKEN" ]; then
echo "Usage: purge_empty_rooms <admin_token>"
return 1
fi
echo "Fetching rooms with no local members..."
ROOMS=$(curl -s "http://localhost:8008/_synapse/admin/v1/rooms?limit=1000" \
-H "Authorization: Bearer $TOKEN" \
| python3 -c "
import sys, json
data = json.load(sys.stdin)
for r in data.get('rooms', []):
if r.get('joined_local_members', 0) == 0:
print(r['room_id'])
")
COUNT=$(echo "$ROOMS" | grep -c '.' || true)
echo "Found $COUNT empty rooms."
if [ "$COUNT" -eq 0 ]; then
echo "Nothing to purge."
return 0
fi
echo "$ROOMS"
echo ""
echo "Purging..."
for ROOM_ID in $ROOMS; do
echo " Purging $ROOM_ID"
curl -s -X DELETE "http://localhost:8008/_synapse/admin/v1/rooms/$ROOM_ID" \
-H "Authorization: Bearer $TOKEN" \
-H "Content-Type: application/json" \
-d '{"purge": true}' > /dev/null
done
echo "Done."
}
###############################################################################
# Other commands
###############################################################################
set_admin () {
admin=akadmin

View File

@ -0,0 +1,31 @@
version: "3.8"
services:
compress-state:
image: rust:1-alpine
entrypoint: /compress_state_entrypoint.sh
environment:
- STATE_COMPRESS_CHUNK_SIZE=${STATE_COMPRESS_CHUNK_SIZE:-500}
- STATE_COMPRESS_CHUNKS=${STATE_COMPRESS_CHUNKS:-100}
- STATE_COMPRESS_SCHEDULE=${STATE_COMPRESS_SCHEDULE:-0 3 * * *}
secrets:
- db_password
configs:
- source: compress_entrypoint
target: /compress_state_entrypoint.sh
mode: 0555
volumes:
- compress_state_build:/build
networks:
- internal
deploy:
restart_policy:
condition: on-failure
volumes:
compress_state_build:
configs:
compress_entrypoint:
name: ${STACK_NAME}_compress_ep_${COMPRESS_STATE_ENTRYPOINT_VERSION}
file: compress_state_entrypoint.sh

View File

@ -90,6 +90,7 @@ services:
- LOGIN_LIMIT_IP_BURST=${LOGIN_LIMIT_IP_BURST:-5}
- LOGIN_LIMIT_ACCOUNT_PER_SECOND=${LOGIN_LIMIT_ACCOUNT_PER_SECOND:-0.003}
- LOGIN_LIMIT_ACCOUNT_BURST=${LOGIN_LIMIT_ACCOUNT_BURST:-5}
- ROOM_COMPLEXITY_LIMIT=${ROOM_COMPLEXITY_LIMIT:-100.0}
- WEB_CLIENT_LOCATION
networks:
- internal

View File

@ -0,0 +1,46 @@
#!/bin/sh
set -e
BINARY="/build/synapse_auto_compressor"
REPO_DIR="/build/rust-synapse-compress-state"
DB_PASS=$(cat /run/secrets/db_password)
CONN="postgresql://synapse:${DB_PASS}@db:5432/synapse"
CHUNK_SIZE="${STATE_COMPRESS_CHUNK_SIZE:-500}"
CHUNKS="${STATE_COMPRESS_CHUNKS:-100}"
SCHEDULE="${STATE_COMPRESS_SCHEDULE:-0 3 * * *}"
# Build from source if binary doesn't exist
if [ ! -f "$BINARY" ]; then
echo "[compress-state] Binary not found, building from source..."
apk add --no-cache git openssl-dev openssl-libs-static perl make musl-dev jemalloc-dev
rm -rf "$REPO_DIR"
git clone https://github.com/matrix-org/rust-synapse-compress-state "$REPO_DIR"
cd "$REPO_DIR"
cargo build --release -p synapse_auto_compressor
cp target/release/synapse_auto_compressor "$BINARY"
echo "[compress-state] Build complete"
# Clean up source to save space
rm -rf "$REPO_DIR"
else
echo "[compress-state] Using cached binary"
fi
# Run once at startup
echo "[compress-state] Running initial compression at $(date)"
"$BINARY" -p "$CONN" -c "$CHUNK_SIZE" -n "$CHUNKS" || echo "[compress-state] Error: $?"
# Set up cron job
CRON_SCRIPT="/build/run_compressor.sh"
cat > "$CRON_SCRIPT" <<EOF
#!/bin/sh
echo "[compress-state] Running at \$(date)"
$BINARY -p "$CONN" -c $CHUNK_SIZE -n $CHUNKS || echo "[compress-state] Error: \$?"
echo "[compress-state] Done at \$(date)"
EOF
chmod +x "$CRON_SCRIPT"
echo "$SCHEDULE $CRON_SCRIPT" | crontab -
echo "[compress-state] Cron scheduled: $SCHEDULE"
# Run crond in the foreground
exec crond -f -l 2

View File

@ -69,7 +69,7 @@ admin_contact: 'mailto:{{ env "ADMIN_EMAIL" }}'
# https://matrix-org.github.io/synapse/latest/usage/configuration/config_documentation.html#limit_remote_rooms
limit_remote_rooms:
enabled: true
complexity: 200.0
complexity: {{ env "ROOM_COMPLEXITY_LIMIT" }}
# https://matrix-org.github.io/synapse/latest/usage/configuration/config_documentation.html#max_avatar_size
max_avatar_size: 10M