Add retry logic for database operations and enhance DBConfig with retry parameters

This commit is contained in:
Christian Galo 2025-06-03 20:20:17 -05:00
parent 6ac0d5e256
commit f557d6141f

View File

@ -4,13 +4,14 @@ import (
"context"
"database/sql"
_ "embed" // Required for go:embed
"errors"
"fmt"
"log/slog"
"os"
"path/filepath"
"time"
_ "github.com/mattn/go-sqlite3" // SQLite driver
"github.com/mattn/go-sqlite3" // SQLite driver
)
//go:embed schema.sql
@ -23,6 +24,8 @@ type DBConfig struct {
MaxIdleConns int // Maximum number of idle connections
ConnMaxLifetime time.Duration // Maximum lifetime of connections
ConnMaxIdleTime time.Duration // Maximum idle time for connections
MaxRetries int // Maximum number of connection retry attempts
RetryDelay time.Duration // Delay between retry attempts
}
// DefaultDBConfig returns a DBConfig with sensible defaults for SQLite.
@ -33,17 +36,86 @@ func DefaultDBConfig(dsn string) *DBConfig {
MaxIdleConns: 10, // Keep some connections idle for reuse
ConnMaxLifetime: 30 * time.Minute, // Rotate connections every 30 minutes
ConnMaxIdleTime: 5 * time.Minute, // Close idle connections after 5 minutes
MaxRetries: 3, // Retry connection attempts up to 3 times
RetryDelay: time.Second, // Wait 1 second between retry attempts
}
}
// NewDB initializes and returns a new database connection pool and runs migrations.
func NewDB(ctx context.Context, logger *slog.Logger, config *DBConfig) (*sql.DB, error) {
// isRetryableError checks if an error is worth retrying.
// This checks SQLite error codes for transient locking/busy conditions.
func isRetryableError(err error) bool {
if err == nil {
return false
}
// Check if it's a sqlite3.Error
var sqliteErr sqlite3.Error
if errors.As(err, &sqliteErr) {
switch sqliteErr.Code {
case sqlite3.ErrBusy, // SQLITE_BUSY (5) - database is locked by another process
sqlite3.ErrLocked: // SQLITE_LOCKED (6) - database is locked within same connection
return true
}
// Check extended error codes for more specific busy conditions
switch sqliteErr.ExtendedCode {
case sqlite3.ErrBusyRecovery, // SQLITE_BUSY_RECOVERY (261) - WAL recovery in progress
sqlite3.ErrBusySnapshot: // SQLITE_BUSY_SNAPSHOT (517) - snapshot conflict
return true
}
}
// No fallback string matching - if it's not a proper SQLite error with
// the right error codes, we don't retry
return false
}
// retryOperation executes an operation with retry logic for transient SQLite errors.
func retryOperation(ctx context.Context, logger *slog.Logger, config *DBConfig, operationName string, operation func() error) error {
for attempt := 0; attempt <= config.MaxRetries; attempt++ {
if attempt > 0 {
logger.Info("retrying operation",
slog.String("operation", operationName),
slog.Int("attempt", attempt),
slog.Int("max_retries", config.MaxRetries))
// Wait before retrying
select {
case <-ctx.Done():
return fmt.Errorf("context canceled during %s retry: %w", operationName, ctx.Err())
case <-time.After(config.RetryDelay):
}
}
err := operation()
if err != nil {
if isRetryableError(err) && attempt < config.MaxRetries {
logger.Warn("retryable error during operation",
slog.String("operation", operationName),
slog.Any("error", err),
slog.Int("attempt", attempt))
continue
}
return fmt.Errorf("failed to %s: %w", operationName, err)
}
// Operation successful
return nil
}
// This should never be reached due to the loop condition, but for safety
return fmt.Errorf("unexpected retry loop exit for %s", operationName)
}
// openAndConfigureDB opens the database connection and configures the connection pool.
func openAndConfigureDB(config *DBConfig) (*sql.DB, error) {
// Ensure the directory for the SQLite file exists
dbDir := filepath.Dir(config.DSN)
if err := os.MkdirAll(dbDir, 0755); err != nil {
return nil, fmt.Errorf("failed to create database directory %s: %w", dbDir, err)
}
// Open database connection (no retry needed - sql.Open rarely fails with transient errors)
db, err := sql.Open("sqlite3", config.DSN+"?_foreign_keys=on") // Enable foreign key constraints
if err != nil {
return nil, fmt.Errorf("failed to open database: %w", err)
@ -55,9 +127,23 @@ func NewDB(ctx context.Context, logger *slog.Logger, config *DBConfig) (*sql.DB,
db.SetConnMaxLifetime(config.ConnMaxLifetime)
db.SetConnMaxIdleTime(config.ConnMaxIdleTime)
if err = db.PingContext(ctx); err != nil {
return db, nil
}
// NewDB initializes and returns a new database connection pool and runs migrations.
func NewDB(ctx context.Context, logger *slog.Logger, config *DBConfig) (*sql.DB, error) {
db, err := openAndConfigureDB(config)
if err != nil {
return nil, err
}
// Test the actual connection with retry logic
err = retryOperation(ctx, logger, config, "database connection", func() error {
return db.PingContext(ctx)
})
if err != nil {
db.Close() // Clean up connection on failure
return nil, fmt.Errorf("failed to ping database: %w", err)
return nil, err
}
logger.Info("database connection established",
@ -65,12 +151,16 @@ func NewDB(ctx context.Context, logger *slog.Logger, config *DBConfig) (*sql.DB,
slog.Int("max_open_conns", config.MaxOpenConns),
slog.Int("max_idle_conns", config.MaxIdleConns))
// Execute schema.
if _, err := db.ExecContext(ctx, ddl); err != nil {
// Execute schema with retry logic
err = retryOperation(ctx, logger, config, "schema execution", func() error {
_, err := db.ExecContext(ctx, ddl)
return err
})
if err != nil {
db.Close() // Clean up connection on failure
return nil, fmt.Errorf("failed to execute DDL: %w", err)
return nil, err
}
logger.Info("database schema applied")
logger.Info("database schema applied")
return db, nil
}