The in-place pg_upgrade in the db entrypoint could crash-loop or fail on real clusters. This reworks it: - Idempotent, crash-safe: replace the fragile migration_in_progress marker with a state-driven guard on the old_data/new_data scratch dirs. An empty leftover means a run was interrupted before any data moved (data still intact at $PGDATA) so it is discarded and retried; a non-empty one means data may live only there, so it stops for manual recovery. Removes both the "mkdir: File exists" crash-loop and the silent fresh-initdb-over-live-data window. - Correct install user: pg_upgrade must run as the old cluster's bootstrap superuser (oid 10), and the new cluster must be initialised with that same user. It is not necessarily $POSTGRES_USER (clusters created with the default "postgres" superuser plus a separate app role are common). Detect it from the old cluster (briefly start it and read pg_roles where oid = 10) and use it for both the new cluster's initdb and the pg_upgrade -U argument. - Bump DB_ENTRYPOINT_VERSION to v3 so swarm reloads the (immutable) config. Verified on cctest: clean 13->17, interrupted-then-retried, and prod-like clusters whose install user is "postgres" with a separate "discourse" app role.
65 lines
2.8 KiB
Bash
65 lines
2.8 KiB
Bash
#!/bin/bash
|
|
|
|
set -e
|
|
|
|
OLDDATA=$PGDATA/old_data
|
|
NEWDATA=$PGDATA/new_data
|
|
|
|
echo "Running as $(id)"
|
|
|
|
# The migration uses $OLDDATA/$NEWDATA as scratch and removes them when it
|
|
# finishes; a leftover *empty* one means a run was interrupted before any data
|
|
# moved (data still intact at $PGDATA) so we clear it and retry, while a
|
|
# *non-empty* one means data may live only there, so we stop for manual recovery.
|
|
for scratch in $OLDDATA $NEWDATA; do
|
|
if [ -d "$scratch" ] && [ -n "$(ls -A "$scratch")" ]; then
|
|
echo "FATAL: $scratch exists and is not empty - a previous migration did not"
|
|
echo "complete and the data may only exist there. manual recovery necessary."
|
|
exit 1
|
|
fi
|
|
done
|
|
rm -rf $OLDDATA $NEWDATA
|
|
|
|
if [ -f $PGDATA/PG_VERSION ]; then
|
|
DATA_VERSION=$(cat $PGDATA/PG_VERSION)
|
|
|
|
if [ -n "$DATA_VERSION" -a "$PG_MAJOR" != "$DATA_VERSION" ]; then
|
|
echo "postgres data version $DATA_VERSION found, but need $PG_MAJOR. Starting migration"
|
|
echo "Installing postgres $DATA_VERSION"
|
|
sed -i "s/$/ $DATA_VERSION/" /etc/apt/sources.list.d/pgdg.list
|
|
apt-get update && apt-get install -y --no-install-recommends \
|
|
postgresql-$DATA_VERSION \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
# pg_upgrade must run as the old cluster's bootstrap superuser (the "install
|
|
# user", oid 10), and the new cluster must be initialised with that same
|
|
# user. It is not necessarily $POSTGRES_USER (e.g. clusters created with the
|
|
# default "postgres" superuser and a separate app role), so read it from the
|
|
# old cluster: briefly start it and ask, connecting as the app role we know.
|
|
PGBIN=/usr/lib/postgresql/$DATA_VERSION/bin
|
|
gosu postgres $PGBIN/pg_ctl -D $PGDATA -w \
|
|
-o "-c listen_addresses= -c unix_socket_directories=/tmp" start
|
|
INSTALL_USER=$(gosu postgres psql -h /tmp -U "$POSTGRES_USER" -d postgres -tAc \
|
|
"select rolname from pg_roles where oid = 10")
|
|
gosu postgres $PGBIN/pg_ctl -D $PGDATA -w stop
|
|
echo "old cluster install user: $INSTALL_USER"
|
|
echo "shuffling around"
|
|
gosu postgres mkdir $OLDDATA $NEWDATA
|
|
chmod 700 $OLDDATA $NEWDATA
|
|
mv $PGDATA/* $OLDDATA/ || true
|
|
echo "running initdb"
|
|
# abuse entrypoint script for initdb by making server error out; initialise
|
|
# the new cluster with the same superuser as the old one so pg_upgrade matches
|
|
gosu postgres bash -c "export PGDATA=$NEWDATA POSTGRES_USER=$INSTALL_USER ; /usr/local/bin/docker-entrypoint.sh --invalid-arg || true"
|
|
echo "running pg_upgrade"
|
|
cd /tmp
|
|
gosu postgres pg_upgrade --link -b /usr/lib/postgresql/$DATA_VERSION/bin -d $OLDDATA -D $NEWDATA -U $INSTALL_USER
|
|
cp $OLDDATA/pg_hba.conf $NEWDATA/
|
|
mv $NEWDATA/* $PGDATA
|
|
rm -rf $OLDDATA
|
|
rmdir $NEWDATA
|
|
echo "migration complete"
|
|
fi
|
|
fi
|
|
|
|
/usr/local/bin/docker-entrypoint.sh postgres
|