#!/usr/bin/env bash # # Test suite for pg_scribe chain transfer operations # # This test suite verifies proper behavior when transferring streaming # between chains using --new-chain --start # # Tests cover: # 1. --status correctly identifies which chain is actively streaming # 2. --new-chain --start seals old chain's active.sql before transferring # set -euo pipefail # Colors for test output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[0;33m' BLUE='\033[0;34m' NC='\033[0m' # No Color # Test configuration SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" PG_SCRIBE="$SCRIPT_DIR/scripts/pg_scribe" TEST_DIR="/tmp/pg_scribe_chain_transfer_test_$$" TEST_DB_PREFIX="pg_scribe_chain_$$" PGUSER="${PGUSER:-postgres}" # Test counters TESTS_RUN=0 TESTS_PASSED=0 TESTS_FAILED=0 # Cleanup tracking DATABASES_TO_CLEANUP=() SLOTS_TO_CLEANUP=() # # Logging functions # log_test() { echo -e "${BLUE}TEST:${NC} $*" } log_pass() { echo -e "${GREEN}PASS:${NC} $*" ((TESTS_PASSED++)) } log_fail() { echo -e "${RED}FAIL:${NC} $*" ((TESTS_FAILED++)) } log_info() { echo -e "${YELLOW}INFO:${NC} $*" } # # Helper functions # run_psql() { local dbname="$1" shift psql -U "$PGUSER" -d "$dbname" -tAq "$@" } query_db() { local dbname="$1" local query="$2" run_psql "$dbname" -c "$query" 2>/dev/null || true } create_test_db() { local dbname="$1" log_info "Creating test database: $dbname" # Drop if exists psql -U "$PGUSER" -d postgres -c "DROP DATABASE IF EXISTS $dbname;" &>/dev/null || true # Create database psql -U "$PGUSER" -d postgres -c "CREATE DATABASE $dbname;" &>/dev/null DATABASES_TO_CLEANUP+=("$dbname") } # shellcheck disable=SC2317 # Function called from cleanup trap handler drop_test_db() { local dbname="$1" log_info "Dropping test database: $dbname" # Terminate connections psql -U "$PGUSER" -d postgres -c " SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = '$dbname' AND pid <> pg_backend_pid(); " &>/dev/null || true # Drop database psql -U "$PGUSER" -d postgres -c "DROP DATABASE IF EXISTS $dbname;" &>/dev/null || true } create_table_with_pk() { local dbname="$1" local table="$2" query_db "$dbname" " CREATE TABLE $table ( id SERIAL PRIMARY KEY, name TEXT, created_at TIMESTAMP DEFAULT now() ); " } # Initialize a backup directory (creates replication slot and initial backups) init_backup_system() { local dbname="$1" local backup_dir="$2" local slot="$3" mkdir -p "$backup_dir" "$PG_SCRIBE" --init -d "$dbname" -f "$backup_dir" -S "$slot" -U "$PGUSER" &>/dev/null SLOTS_TO_CLEANUP+=("$dbname:$slot") } # Start streaming in background, return PID start_streaming() { local dbname="$1" local backup_dir="$2" "$PG_SCRIBE" --start -d "$dbname" -f "$backup_dir" -U "$PGUSER" & local pid=$! # Wait for streaming to start sleep 2 echo "$pid" } # Stop streaming process stop_streaming() { local backup_dir="$1" "$PG_SCRIBE" --stop -f "$backup_dir" &>/dev/null || true } # Get the chain ID that pg_scribe --status reports as active get_status_active_chain() { local dbname="$1" local backup_dir="$2" local slot="$3" local status_output status_output=$("$PG_SCRIBE" --status -d "$dbname" -f "$backup_dir" -S "$slot" -U "$PGUSER" 2>&1) # Extract chain ID from line like " chain-20251020T050048Z (ACTIVE - streaming)" echo "$status_output" | grep -oP 'chain-\K[0-9TZ]+(?=.*ACTIVE.*streaming)' || echo "" } # Get the chain ID that the PID is actually writing to get_actual_active_chain() { local pid="$1" # Check which active.sql file the process has open local active_file active_file=$(ls -l /proc/"$pid"/fd/ 2>/dev/null | grep -oP '/tmp/.*?/chain-\K[0-9TZ]+(?=/active\.sql)' || echo "") echo "$active_file" } # # Test cases # test_status_reports_correct_active_chain() { ((TESTS_RUN++)) log_test "--status should report the chain that is actually streaming" local dbname="${TEST_DB_PREFIX}_status" local backup_dir="$TEST_DIR/status_test" local slot="test_slot_status" # Setup: Create database and init backup system create_test_db "$dbname" create_table_with_pk "$dbname" "users" query_db "$dbname" "INSERT INTO users (name) VALUES ('Alice');" init_backup_system "$dbname" "$backup_dir" "$slot" # Start streaming to first chain log_info "Starting streaming to first chain..." start_streaming "$dbname" "$backup_dir" >/dev/null sleep 3 # Generate some data query_db "$dbname" "INSERT INTO users (name) VALUES ('Bob');" sleep 2 # Create a new chain and transfer streaming to it (using --new-chain --start) log_info "Creating new chain and transferring streaming..." "$PG_SCRIBE" --new-chain --start -d "$dbname" -f "$backup_dir" -S "$slot" -U "$PGUSER" &>/dev/null & local new_pid=$! sleep 5 # Get the PID from pidfile local pidfile="$backup_dir/.pg_scribe.pid" local streaming_pid streaming_pid=$(cat "$pidfile") # Get actual active chain (from /proc) local actual_chain actual_chain=$(get_actual_active_chain "$streaming_pid") if [[ -z "$actual_chain" ]]; then log_fail "Could not determine actual active chain from /proc/$streaming_pid/fd/" kill -TERM "$streaming_pid" 2>/dev/null || true return 1 fi log_info "Process $streaming_pid is actually writing to chain-$actual_chain" # Get reported active chain (from --status) local reported_chain reported_chain=$(get_status_active_chain "$dbname" "$backup_dir" "$slot") if [[ -z "$reported_chain" ]]; then log_fail "--status did not report any active chain" kill -TERM "$streaming_pid" 2>/dev/null || true return 1 fi log_info "--status reports chain-$reported_chain as active" # Stop streaming kill -TERM "$streaming_pid" 2>/dev/null || true sleep 2 # Compare if [[ "$actual_chain" != "$reported_chain" ]]; then log_fail "MISMATCH: --status reports chain-$reported_chain but process is writing to chain-$actual_chain" return 1 fi log_pass "--status correctly reports the active chain" return 0 } test_new_chain_seals_old_active_sql() { ((TESTS_RUN++)) log_test "--new-chain --start should seal old chain's active.sql" local dbname="${TEST_DB_PREFIX}_seal" local backup_dir="$TEST_DIR/seal_test" local slot="test_slot_seal" # Setup: Create database and init backup system create_test_db "$dbname" create_table_with_pk "$dbname" "products" query_db "$dbname" "INSERT INTO products (name) VALUES ('Widget');" init_backup_system "$dbname" "$backup_dir" "$slot" # Start streaming to first chain log_info "Starting streaming to first chain..." start_streaming "$dbname" "$backup_dir" >/dev/null sleep 3 # Generate some data to ensure active.sql has content for i in {1..10}; do query_db "$dbname" "INSERT INTO products (name) VALUES ('Product $i');" done sleep 2 # Find the old chain ID local old_chain old_chain=$(find "$backup_dir" -maxdepth 1 -type d -name 'chain-*' 2>/dev/null | sort | tail -1) local old_chain_id old_chain_id=$(basename "$old_chain" | sed 's/^chain-//') log_info "Old chain: $old_chain_id" # Verify old chain has active.sql with data if [[ ! -s "$old_chain/active.sql" ]]; then log_fail "Old chain's active.sql doesn't exist or is empty before transfer" stop_streaming "$backup_dir" return 1 fi local old_active_size old_active_size=$(stat -c %s "$old_chain/active.sql") log_info "Old chain's active.sql size before transfer: $old_active_size bytes" # Create a new chain and transfer streaming to it log_info "Creating new chain and transferring streaming..." "$PG_SCRIBE" --new-chain --start -d "$dbname" -f "$backup_dir" -S "$slot" -U "$PGUSER" &>/dev/null & sleep 5 # Stop streaming stop_streaming "$backup_dir" sleep 2 # Check old chain: should NOT have active.sql anymore if [[ -f "$old_chain/active.sql" ]]; then log_fail "CRITICAL: Old chain still has active.sql after transfer!" log_fail "This is the bug - active.sql should have been sealed into a timestamped differential" # Show what's in the old chain log_info "Files in old chain:" ls -lh "$old_chain/" return 1 fi # Check old chain: should have a sealed differential with the content local sealed_diffs sealed_diffs=$(find "$old_chain" -maxdepth 1 -name 'diff-*.sql' 2>/dev/null | wc -l) if [[ $sealed_diffs -eq 0 ]]; then log_fail "Old chain has no sealed differentials after transfer" log_fail "The active.sql content was lost!" return 1 fi log_info "Old chain has $sealed_diffs sealed differential(s)" # Verify the sealed differential has the data that was in active.sql local total_sealed_size=0 while IFS= read -r diff_file; do local size size=$(stat -c %s "$diff_file") total_sealed_size=$((total_sealed_size + size)) done < <(find "$old_chain" -maxdepth 1 -name 'diff-*.sql' 2>/dev/null) log_info "Total sealed differential size: $total_sealed_size bytes" # The sealed data should be at least as much as the old active.sql if [[ $total_sealed_size -lt $old_active_size ]]; then log_fail "Sealed differential size ($total_sealed_size) is less than old active.sql ($old_active_size)" log_fail "Data may have been lost!" return 1 fi log_pass "Old chain's active.sql was properly sealed into differential(s)" return 0 } # # Cleanup # # shellcheck disable=SC2317 # Function called via trap handler cleanup() { log_info "Cleaning up test resources..." # Stop any lingering streaming processes if [[ -d "$TEST_DIR" ]]; then find "$TEST_DIR" -name '.pg_scribe.pid' 2>/dev/null | while read -r pidfile; do if [[ -f "$pidfile" ]]; then local pid pid=$(cat "$pidfile") kill -TERM "$pid" 2>/dev/null || true fi done fi # Give processes time to stop sleep 2 # Drop replication slots for entry in "${SLOTS_TO_CLEANUP[@]}"; do local dbname="${entry%%:*}" local slot="${entry#*:}" psql -U "$PGUSER" -d "$dbname" -c " SELECT pg_drop_replication_slot('$slot') FROM pg_replication_slots WHERE slot_name = '$slot'; " &>/dev/null || true done # Drop databases for dbname in "${DATABASES_TO_CLEANUP[@]}"; do drop_test_db "$dbname" done # Remove test directory if [[ -d "$TEST_DIR" ]]; then rm -rf "$TEST_DIR" fi log_info "Cleanup complete" } # # Main test runner # main() { echo "========================================" echo "pg_scribe Chain Transfer Tests" echo "========================================" echo "" echo "These tests verify proper behavior when" echo "transferring streaming between chains" echo "" # Verify pg_scribe exists if [[ ! -x "$PG_SCRIBE" ]]; then echo "ERROR: pg_scribe not found or not executable: $PG_SCRIBE" exit 1 fi # Verify PostgreSQL is running if ! psql -U "$PGUSER" -d postgres -c "SELECT 1;" &>/dev/null; then echo "ERROR: Cannot connect to PostgreSQL" exit 1 fi # Verify wal_level is logical local wal_level wal_level=$(psql -U "$PGUSER" -d postgres -tAq -c "SHOW wal_level;") if [[ "$wal_level" != "logical" ]]; then echo "ERROR: wal_level must be 'logical', currently: $wal_level" echo "Update ~/.pgenv/pgsql/data/postgresql.conf and restart PostgreSQL" exit 1 fi # Create test directory mkdir -p "$TEST_DIR" # Set up cleanup trap trap cleanup EXIT INT TERM echo "Running tests..." echo "" # Run all tests (use || true to prevent set -e from exiting) test_status_reports_correct_active_chain || true test_new_chain_seals_old_active_sql || true # Summary echo "" echo "========================================" echo "Test Results" echo "========================================" echo "Tests run: $TESTS_RUN" echo -e "Tests passed: ${GREEN}$TESTS_PASSED${NC}" echo -e "Tests failed: ${RED}$TESTS_FAILED${NC}" echo "" if [[ $TESTS_FAILED -eq 0 ]]; then echo -e "${GREEN}All tests passed!${NC}" exit 0 else echo -e "${RED}Some tests failed!${NC}" echo "" echo "This is EXPECTED before fixing the bugs." echo "These failures demonstrate the chain transfer problems." exit 1 fi } main "$@"