Add retry logic for database operations and enhance DBConfig with retry parameters
This commit is contained in:
parent
6ac0d5e256
commit
f557d6141f
@ -4,13 +4,14 @@ import (
|
||||
"context"
|
||||
"database/sql"
|
||||
_ "embed" // Required for go:embed
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
_ "github.com/mattn/go-sqlite3" // SQLite driver
|
||||
"github.com/mattn/go-sqlite3" // SQLite driver
|
||||
)
|
||||
|
||||
//go:embed schema.sql
|
||||
@ -23,6 +24,8 @@ type DBConfig struct {
|
||||
MaxIdleConns int // Maximum number of idle connections
|
||||
ConnMaxLifetime time.Duration // Maximum lifetime of connections
|
||||
ConnMaxIdleTime time.Duration // Maximum idle time for connections
|
||||
MaxRetries int // Maximum number of connection retry attempts
|
||||
RetryDelay time.Duration // Delay between retry attempts
|
||||
}
|
||||
|
||||
// DefaultDBConfig returns a DBConfig with sensible defaults for SQLite.
|
||||
@ -33,17 +36,86 @@ func DefaultDBConfig(dsn string) *DBConfig {
|
||||
MaxIdleConns: 10, // Keep some connections idle for reuse
|
||||
ConnMaxLifetime: 30 * time.Minute, // Rotate connections every 30 minutes
|
||||
ConnMaxIdleTime: 5 * time.Minute, // Close idle connections after 5 minutes
|
||||
MaxRetries: 3, // Retry connection attempts up to 3 times
|
||||
RetryDelay: time.Second, // Wait 1 second between retry attempts
|
||||
}
|
||||
}
|
||||
|
||||
// NewDB initializes and returns a new database connection pool and runs migrations.
|
||||
func NewDB(ctx context.Context, logger *slog.Logger, config *DBConfig) (*sql.DB, error) {
|
||||
// isRetryableError checks if an error is worth retrying.
|
||||
// This checks SQLite error codes for transient locking/busy conditions.
|
||||
func isRetryableError(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check if it's a sqlite3.Error
|
||||
var sqliteErr sqlite3.Error
|
||||
if errors.As(err, &sqliteErr) {
|
||||
switch sqliteErr.Code {
|
||||
case sqlite3.ErrBusy, // SQLITE_BUSY (5) - database is locked by another process
|
||||
sqlite3.ErrLocked: // SQLITE_LOCKED (6) - database is locked within same connection
|
||||
return true
|
||||
}
|
||||
|
||||
// Check extended error codes for more specific busy conditions
|
||||
switch sqliteErr.ExtendedCode {
|
||||
case sqlite3.ErrBusyRecovery, // SQLITE_BUSY_RECOVERY (261) - WAL recovery in progress
|
||||
sqlite3.ErrBusySnapshot: // SQLITE_BUSY_SNAPSHOT (517) - snapshot conflict
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// No fallback string matching - if it's not a proper SQLite error with
|
||||
// the right error codes, we don't retry
|
||||
return false
|
||||
}
|
||||
|
||||
// retryOperation executes an operation with retry logic for transient SQLite errors.
|
||||
func retryOperation(ctx context.Context, logger *slog.Logger, config *DBConfig, operationName string, operation func() error) error {
|
||||
for attempt := 0; attempt <= config.MaxRetries; attempt++ {
|
||||
if attempt > 0 {
|
||||
logger.Info("retrying operation",
|
||||
slog.String("operation", operationName),
|
||||
slog.Int("attempt", attempt),
|
||||
slog.Int("max_retries", config.MaxRetries))
|
||||
|
||||
// Wait before retrying
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return fmt.Errorf("context canceled during %s retry: %w", operationName, ctx.Err())
|
||||
case <-time.After(config.RetryDelay):
|
||||
}
|
||||
}
|
||||
|
||||
err := operation()
|
||||
if err != nil {
|
||||
if isRetryableError(err) && attempt < config.MaxRetries {
|
||||
logger.Warn("retryable error during operation",
|
||||
slog.String("operation", operationName),
|
||||
slog.Any("error", err),
|
||||
slog.Int("attempt", attempt))
|
||||
continue
|
||||
}
|
||||
return fmt.Errorf("failed to %s: %w", operationName, err)
|
||||
}
|
||||
|
||||
// Operation successful
|
||||
return nil
|
||||
}
|
||||
|
||||
// This should never be reached due to the loop condition, but for safety
|
||||
return fmt.Errorf("unexpected retry loop exit for %s", operationName)
|
||||
}
|
||||
|
||||
// openAndConfigureDB opens the database connection and configures the connection pool.
|
||||
func openAndConfigureDB(config *DBConfig) (*sql.DB, error) {
|
||||
// Ensure the directory for the SQLite file exists
|
||||
dbDir := filepath.Dir(config.DSN)
|
||||
if err := os.MkdirAll(dbDir, 0755); err != nil {
|
||||
return nil, fmt.Errorf("failed to create database directory %s: %w", dbDir, err)
|
||||
}
|
||||
|
||||
// Open database connection (no retry needed - sql.Open rarely fails with transient errors)
|
||||
db, err := sql.Open("sqlite3", config.DSN+"?_foreign_keys=on") // Enable foreign key constraints
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open database: %w", err)
|
||||
@ -55,9 +127,23 @@ func NewDB(ctx context.Context, logger *slog.Logger, config *DBConfig) (*sql.DB,
|
||||
db.SetConnMaxLifetime(config.ConnMaxLifetime)
|
||||
db.SetConnMaxIdleTime(config.ConnMaxIdleTime)
|
||||
|
||||
if err = db.PingContext(ctx); err != nil {
|
||||
return db, nil
|
||||
}
|
||||
|
||||
// NewDB initializes and returns a new database connection pool and runs migrations.
|
||||
func NewDB(ctx context.Context, logger *slog.Logger, config *DBConfig) (*sql.DB, error) {
|
||||
db, err := openAndConfigureDB(config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Test the actual connection with retry logic
|
||||
err = retryOperation(ctx, logger, config, "database connection", func() error {
|
||||
return db.PingContext(ctx)
|
||||
})
|
||||
if err != nil {
|
||||
db.Close() // Clean up connection on failure
|
||||
return nil, fmt.Errorf("failed to ping database: %w", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
logger.Info("database connection established",
|
||||
@ -65,12 +151,16 @@ func NewDB(ctx context.Context, logger *slog.Logger, config *DBConfig) (*sql.DB,
|
||||
slog.Int("max_open_conns", config.MaxOpenConns),
|
||||
slog.Int("max_idle_conns", config.MaxIdleConns))
|
||||
|
||||
// Execute schema.
|
||||
if _, err := db.ExecContext(ctx, ddl); err != nil {
|
||||
// Execute schema with retry logic
|
||||
err = retryOperation(ctx, logger, config, "schema execution", func() error {
|
||||
_, err := db.ExecContext(ctx, ddl)
|
||||
return err
|
||||
})
|
||||
if err != nil {
|
||||
db.Close() // Clean up connection on failure
|
||||
return nil, fmt.Errorf("failed to execute DDL: %w", err)
|
||||
return nil, err
|
||||
}
|
||||
logger.Info("database schema applied")
|
||||
|
||||
logger.Info("database schema applied")
|
||||
return db, nil
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user