Add retry logic for database operations and enhance DBConfig with retry parameters
This commit is contained in:
parent
6ac0d5e256
commit
f557d6141f
@ -4,13 +4,14 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"database/sql"
|
"database/sql"
|
||||||
_ "embed" // Required for go:embed
|
_ "embed" // Required for go:embed
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
_ "github.com/mattn/go-sqlite3" // SQLite driver
|
"github.com/mattn/go-sqlite3" // SQLite driver
|
||||||
)
|
)
|
||||||
|
|
||||||
//go:embed schema.sql
|
//go:embed schema.sql
|
||||||
@ -23,6 +24,8 @@ type DBConfig struct {
|
|||||||
MaxIdleConns int // Maximum number of idle connections
|
MaxIdleConns int // Maximum number of idle connections
|
||||||
ConnMaxLifetime time.Duration // Maximum lifetime of connections
|
ConnMaxLifetime time.Duration // Maximum lifetime of connections
|
||||||
ConnMaxIdleTime time.Duration // Maximum idle time for connections
|
ConnMaxIdleTime time.Duration // Maximum idle time for connections
|
||||||
|
MaxRetries int // Maximum number of connection retry attempts
|
||||||
|
RetryDelay time.Duration // Delay between retry attempts
|
||||||
}
|
}
|
||||||
|
|
||||||
// DefaultDBConfig returns a DBConfig with sensible defaults for SQLite.
|
// DefaultDBConfig returns a DBConfig with sensible defaults for SQLite.
|
||||||
@ -33,17 +36,86 @@ func DefaultDBConfig(dsn string) *DBConfig {
|
|||||||
MaxIdleConns: 10, // Keep some connections idle for reuse
|
MaxIdleConns: 10, // Keep some connections idle for reuse
|
||||||
ConnMaxLifetime: 30 * time.Minute, // Rotate connections every 30 minutes
|
ConnMaxLifetime: 30 * time.Minute, // Rotate connections every 30 minutes
|
||||||
ConnMaxIdleTime: 5 * time.Minute, // Close idle connections after 5 minutes
|
ConnMaxIdleTime: 5 * time.Minute, // Close idle connections after 5 minutes
|
||||||
|
MaxRetries: 3, // Retry connection attempts up to 3 times
|
||||||
|
RetryDelay: time.Second, // Wait 1 second between retry attempts
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewDB initializes and returns a new database connection pool and runs migrations.
|
// isRetryableError checks if an error is worth retrying.
|
||||||
func NewDB(ctx context.Context, logger *slog.Logger, config *DBConfig) (*sql.DB, error) {
|
// This checks SQLite error codes for transient locking/busy conditions.
|
||||||
|
func isRetryableError(err error) bool {
|
||||||
|
if err == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if it's a sqlite3.Error
|
||||||
|
var sqliteErr sqlite3.Error
|
||||||
|
if errors.As(err, &sqliteErr) {
|
||||||
|
switch sqliteErr.Code {
|
||||||
|
case sqlite3.ErrBusy, // SQLITE_BUSY (5) - database is locked by another process
|
||||||
|
sqlite3.ErrLocked: // SQLITE_LOCKED (6) - database is locked within same connection
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check extended error codes for more specific busy conditions
|
||||||
|
switch sqliteErr.ExtendedCode {
|
||||||
|
case sqlite3.ErrBusyRecovery, // SQLITE_BUSY_RECOVERY (261) - WAL recovery in progress
|
||||||
|
sqlite3.ErrBusySnapshot: // SQLITE_BUSY_SNAPSHOT (517) - snapshot conflict
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// No fallback string matching - if it's not a proper SQLite error with
|
||||||
|
// the right error codes, we don't retry
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// retryOperation executes an operation with retry logic for transient SQLite errors.
|
||||||
|
func retryOperation(ctx context.Context, logger *slog.Logger, config *DBConfig, operationName string, operation func() error) error {
|
||||||
|
for attempt := 0; attempt <= config.MaxRetries; attempt++ {
|
||||||
|
if attempt > 0 {
|
||||||
|
logger.Info("retrying operation",
|
||||||
|
slog.String("operation", operationName),
|
||||||
|
slog.Int("attempt", attempt),
|
||||||
|
slog.Int("max_retries", config.MaxRetries))
|
||||||
|
|
||||||
|
// Wait before retrying
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return fmt.Errorf("context canceled during %s retry: %w", operationName, ctx.Err())
|
||||||
|
case <-time.After(config.RetryDelay):
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
err := operation()
|
||||||
|
if err != nil {
|
||||||
|
if isRetryableError(err) && attempt < config.MaxRetries {
|
||||||
|
logger.Warn("retryable error during operation",
|
||||||
|
slog.String("operation", operationName),
|
||||||
|
slog.Any("error", err),
|
||||||
|
slog.Int("attempt", attempt))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return fmt.Errorf("failed to %s: %w", operationName, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Operation successful
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// This should never be reached due to the loop condition, but for safety
|
||||||
|
return fmt.Errorf("unexpected retry loop exit for %s", operationName)
|
||||||
|
}
|
||||||
|
|
||||||
|
// openAndConfigureDB opens the database connection and configures the connection pool.
|
||||||
|
func openAndConfigureDB(config *DBConfig) (*sql.DB, error) {
|
||||||
// Ensure the directory for the SQLite file exists
|
// Ensure the directory for the SQLite file exists
|
||||||
dbDir := filepath.Dir(config.DSN)
|
dbDir := filepath.Dir(config.DSN)
|
||||||
if err := os.MkdirAll(dbDir, 0755); err != nil {
|
if err := os.MkdirAll(dbDir, 0755); err != nil {
|
||||||
return nil, fmt.Errorf("failed to create database directory %s: %w", dbDir, err)
|
return nil, fmt.Errorf("failed to create database directory %s: %w", dbDir, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Open database connection (no retry needed - sql.Open rarely fails with transient errors)
|
||||||
db, err := sql.Open("sqlite3", config.DSN+"?_foreign_keys=on") // Enable foreign key constraints
|
db, err := sql.Open("sqlite3", config.DSN+"?_foreign_keys=on") // Enable foreign key constraints
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to open database: %w", err)
|
return nil, fmt.Errorf("failed to open database: %w", err)
|
||||||
@ -55,9 +127,23 @@ func NewDB(ctx context.Context, logger *slog.Logger, config *DBConfig) (*sql.DB,
|
|||||||
db.SetConnMaxLifetime(config.ConnMaxLifetime)
|
db.SetConnMaxLifetime(config.ConnMaxLifetime)
|
||||||
db.SetConnMaxIdleTime(config.ConnMaxIdleTime)
|
db.SetConnMaxIdleTime(config.ConnMaxIdleTime)
|
||||||
|
|
||||||
if err = db.PingContext(ctx); err != nil {
|
return db, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewDB initializes and returns a new database connection pool and runs migrations.
|
||||||
|
func NewDB(ctx context.Context, logger *slog.Logger, config *DBConfig) (*sql.DB, error) {
|
||||||
|
db, err := openAndConfigureDB(config)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test the actual connection with retry logic
|
||||||
|
err = retryOperation(ctx, logger, config, "database connection", func() error {
|
||||||
|
return db.PingContext(ctx)
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
db.Close() // Clean up connection on failure
|
db.Close() // Clean up connection on failure
|
||||||
return nil, fmt.Errorf("failed to ping database: %w", err)
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.Info("database connection established",
|
logger.Info("database connection established",
|
||||||
@ -65,12 +151,16 @@ func NewDB(ctx context.Context, logger *slog.Logger, config *DBConfig) (*sql.DB,
|
|||||||
slog.Int("max_open_conns", config.MaxOpenConns),
|
slog.Int("max_open_conns", config.MaxOpenConns),
|
||||||
slog.Int("max_idle_conns", config.MaxIdleConns))
|
slog.Int("max_idle_conns", config.MaxIdleConns))
|
||||||
|
|
||||||
// Execute schema.
|
// Execute schema with retry logic
|
||||||
if _, err := db.ExecContext(ctx, ddl); err != nil {
|
err = retryOperation(ctx, logger, config, "schema execution", func() error {
|
||||||
|
_, err := db.ExecContext(ctx, ddl)
|
||||||
|
return err
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
db.Close() // Clean up connection on failure
|
db.Close() // Clean up connection on failure
|
||||||
return nil, fmt.Errorf("failed to execute DDL: %w", err)
|
return nil, err
|
||||||
}
|
}
|
||||||
logger.Info("database schema applied")
|
|
||||||
|
|
||||||
|
logger.Info("database schema applied")
|
||||||
return db, nil
|
return db, nil
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user