3 # Test suite for pg_scribe validation ordering bugs
5 # This test suite verifies that pg_scribe validates all prerequisites
6 # BEFORE making any state changes, to prevent corrupted/orphaned chains
9 # 1. cmd_new_chain with non-existent replication slot
10 # 2. cmd_new_chain --start with non-existent replication slot
11 # 3. cmd_start with metadata/slot name mismatch
12 # 4. cmd_new_chain with missing compression tool
17 # Colors for test output
22 NC='\033[0m' # No Color
25 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
26 PG_SCRIBE="$SCRIPT_DIR/scripts/pg_scribe"
27 TEST_DIR="/tmp/pg_scribe_validation_test_$$"
28 TEST_DB_PREFIX="pg_scribe_val_$$"
29 PGUSER="${PGUSER:-postgres}"
37 DATABASES_TO_CLEANUP=()
45 echo -e "${BLUE}TEST:${NC} $*"
49 echo -e "${GREEN}PASS:${NC} $*"
54 echo -e "${RED}FAIL:${NC} $*"
59 echo -e "${YELLOW}INFO:${NC} $*"
69 psql -U "$PGUSER" -d "$dbname" -tAq "$@"
75 run_psql "$dbname" -c "$query" 2>/dev/null || true
80 log_info "Creating test database: $dbname"
83 psql -U "$PGUSER" -d postgres -c "DROP DATABASE IF EXISTS $dbname;" &>/dev/null || true
86 psql -U "$PGUSER" -d postgres -c "CREATE DATABASE $dbname;" &>/dev/null
88 DATABASES_TO_CLEANUP+=("$dbname")
91 # shellcheck disable=SC2317 # Function called from cleanup trap handler
94 log_info "Dropping test database: $dbname"
96 # Terminate connections
97 psql -U "$PGUSER" -d postgres -c "
98 SELECT pg_terminate_backend(pid)
100 WHERE datname = '$dbname' AND pid <> pg_backend_pid();
101 " &>/dev/null || true
104 psql -U "$PGUSER" -d postgres -c "DROP DATABASE IF EXISTS $dbname;" &>/dev/null || true
107 create_table_with_pk() {
111 CREATE TABLE $table (
112 id SERIAL PRIMARY KEY,
114 created_at TIMESTAMP DEFAULT now()
119 # Initialize a backup directory (creates replication slot and initial backups)
120 init_backup_system() {
122 local backup_dir="$2"
125 mkdir -p "$backup_dir"
126 "$PG_SCRIBE" --init -d "$dbname" -f "$backup_dir" -S "$slot" -U "$PGUSER" &>/dev/null
128 SLOTS_TO_CLEANUP+=("$dbname:$slot")
131 # Check if a chain directory was created
133 local backup_dir="$1"
135 chain_count=$(find "$backup_dir" -maxdepth 1 -type d -name 'chain-*' 2>/dev/null | wc -l)
136 [[ $chain_count -gt 0 ]]
143 test_new_chain_validates_slot_exists() {
145 log_test "cmd_new_chain should validate slot exists BEFORE creating chain"
147 local dbname="${TEST_DB_PREFIX}_slot1"
148 local backup_dir="$TEST_DIR/slot1"
149 local real_slot="test_slot_real"
150 local fake_slot="nonexistent_slot"
152 # Setup: Initialize with real slot
153 create_test_db "$dbname"
154 create_table_with_pk "$dbname" "users"
155 query_db "$dbname" "INSERT INTO users (name) VALUES ('Alice');"
156 init_backup_system "$dbname" "$backup_dir" "$real_slot"
158 # Count chains before (should be 1 from init)
160 chains_before=$(find "$backup_dir" -maxdepth 1 -type d -name 'chain-*' 2>/dev/null | wc -l)
162 # Try to create new chain with non-existent slot
165 output=$("$PG_SCRIBE" --new-chain -d "$dbname" -f "$backup_dir" -S "$fake_slot" -U "$PGUSER" 2>&1) || exit_code=$?
167 # Should fail with slot error (exit code 3)
168 if [[ $exit_code -ne 3 ]]; then
169 log_fail "Expected exit code 3 (slot error), got $exit_code"
170 echo "Output: $output"
174 # CRITICAL: Should NOT have created a new chain directory
176 chains_after=$(find "$backup_dir" -maxdepth 1 -type d -name 'chain-*' 2>/dev/null | wc -l)
178 if [[ $chains_after -ne $chains_before ]]; then
179 log_fail "CRITICAL: Created orphaned chain before validating slot!"
180 log_fail "Chains before: $chains_before, after: $chains_after"
184 log_pass "Validates slot exists before creating chain"
188 test_new_chain_start_validates_slot_exists() {
190 log_test "cmd_new_chain --start should validate slot BEFORE creating chain"
192 local dbname="${TEST_DB_PREFIX}_slot2"
193 local backup_dir="$TEST_DIR/slot2"
194 local real_slot="test_slot_real2"
195 local fake_slot="nonexistent_slot2"
197 # Setup: Initialize with real slot
198 create_test_db "$dbname"
199 create_table_with_pk "$dbname" "products"
200 init_backup_system "$dbname" "$backup_dir" "$real_slot"
202 # Count chains before
204 chains_before=$(find "$backup_dir" -maxdepth 1 -type d -name 'chain-*' 2>/dev/null | wc -l)
206 # Try to create new chain with --start and wrong slot
207 # Use timeout to prevent hanging if it tries to exec pg_recvlogical
210 output=$(timeout 10s "$PG_SCRIBE" --new-chain --start -d "$dbname" -f "$backup_dir" -S "$fake_slot" -U "$PGUSER" 2>&1) || exit_code=$?
212 # Check what happened
214 chains_after=$(find "$backup_dir" -maxdepth 1 -type d -name 'chain-*' 2>/dev/null | wc -l)
216 # If command timed out (exit 124), it means it started pg_recvlogical
217 # which means it created the chain first - THIS IS THE BUG
218 if [[ $exit_code -eq 124 ]]; then
219 log_fail "CRITICAL: Command hung (tried to start pg_recvlogical)"
220 log_fail "This means it created the chain BEFORE validating the slot!"
221 log_fail "Chains before: $chains_before, after: $chains_after"
223 # Kill any lingering pg_recvlogical
224 pkill -f "pg_recvlogical.*$backup_dir" 2>/dev/null || true
225 rm -f "$backup_dir/.pg_scribe.pid" 2>/dev/null || true
230 # Should fail with slot error (exit code 3)
231 if [[ $exit_code -ne 3 ]]; then
232 log_fail "Expected exit code 3 (slot error), got $exit_code"
233 echo "Output: $output"
235 # Check if orphaned chain was created
236 if [[ $chains_after -ne $chains_before ]]; then
237 log_fail "Also created orphaned chain!"
243 # CRITICAL: Should NOT have created a new chain directory
244 if [[ $chains_after -ne $chains_before ]]; then
245 log_fail "CRITICAL: Created orphaned chain before validating slot!"
246 log_fail "This is the bug that hit the user!"
247 log_fail "Chains before: $chains_before, after: $chains_after"
251 log_pass "Validates slot exists before creating chain (with --start)"
255 test_start_reads_slot_from_metadata() {
257 log_test "cmd_start should read slot from metadata (not accept -S parameter)"
259 local dbname="${TEST_DB_PREFIX}_meta"
260 local backup_dir="$TEST_DIR/meta"
261 local real_slot="test_slot_meta"
263 # Setup: Initialize with real slot
264 create_test_db "$dbname"
265 create_table_with_pk "$dbname" "orders"
266 init_backup_system "$dbname" "$backup_dir" "$real_slot"
268 # Start should work without -S flag (reads from metadata)
269 # Use timeout in case something goes wrong
272 output=$(timeout 5s "$PG_SCRIBE" --start -d "$dbname" -f "$backup_dir" -U "$PGUSER" 2>&1) || exit_code=$?
274 # Should have started successfully (exit 124 = timeout = streaming started)
275 if [[ $exit_code -ne 124 ]]; then
276 log_fail "Expected streaming to start (timeout), got exit code $exit_code"
277 echo "Output: $output"
281 # Should have created pidfile
282 if [[ ! -f "$backup_dir/.pg_scribe.pid" ]]; then
283 log_fail "Pidfile not created"
287 # Clean up streaming process
289 pid=$(cat "$backup_dir/.pg_scribe.pid")
290 kill -TERM "$pid" 2>/dev/null || true
291 rm -f "$backup_dir/.pg_scribe.pid"
293 # Verify it logged the correct slot from metadata
294 if ! echo "$output" | grep -q "$real_slot"; then
295 log_fail "Output should show slot from metadata: $real_slot"
296 echo "Output: $output"
300 log_pass "Reads slot from metadata correctly"
304 test_new_chain_validates_compression_tool() {
306 log_test "cmd_new_chain should validate compression tool exists BEFORE backup"
308 local dbname="${TEST_DB_PREFIX}_compress"
309 local backup_dir="$TEST_DIR/compress"
310 local slot="test_slot_compress"
313 create_test_db "$dbname"
314 create_table_with_pk "$dbname" "data_table"
315 init_backup_system "$dbname" "$backup_dir" "$slot"
317 # Count chains before
319 chains_before=$(find "$backup_dir" -maxdepth 1 -type d -name 'chain-*' 2>/dev/null | wc -l)
321 # Try to use a fake compression method
324 output=$("$PG_SCRIBE" --new-chain -d "$dbname" -f "$backup_dir" -Z totally_fake_compression -U "$PGUSER" 2>&1) || exit_code=$?
326 # Should fail with validation error (exit code 5) or backup error (exit code 4)
327 # The important thing is it should NOT create a chain directory first
328 if [[ $exit_code -eq 0 ]]; then
329 log_fail "Should have failed with invalid compression method"
333 # CRITICAL: Should NOT have created a new chain directory
335 chains_after=$(find "$backup_dir" -maxdepth 1 -type d -name 'chain-*' 2>/dev/null | wc -l)
337 if [[ $chains_after -ne $chains_before ]]; then
338 log_fail "CRITICAL: Created orphaned chain before validating compression!"
339 log_fail "Chains before: $chains_before, after: $chains_after"
343 log_pass "Validates compression method before creating chain"
347 test_new_chain_metadata_slot_consistency() {
349 log_test "cmd_new_chain should preserve slot name in metadata"
351 local dbname="${TEST_DB_PREFIX}_consistency"
352 local backup_dir="$TEST_DIR/consistency"
353 local slot="test_slot_consistency"
356 create_test_db "$dbname"
357 create_table_with_pk "$dbname" "items"
358 init_backup_system "$dbname" "$backup_dir" "$slot"
360 # Create a new chain (should succeed)
361 sleep 1 # Ensure different timestamp
362 if ! "$PG_SCRIBE" --new-chain -d "$dbname" -f "$backup_dir" -S "$slot" -U "$PGUSER" &>/dev/null; then
363 log_fail "New chain creation failed"
369 latest_chain=$(find "$backup_dir" -maxdepth 1 -type d -name 'chain-*' 2>/dev/null | sort | tail -1)
371 # Verify metadata has correct slot name
373 metadata_slot=$(grep '"replication_slot"' "$latest_chain/metadata.json" | cut -d'"' -f4)
375 if [[ "$metadata_slot" != "$slot" ]]; then
376 log_fail "Metadata slot mismatch: expected '$slot', got '$metadata_slot'"
380 log_pass "Metadata preserves slot name correctly"
388 # shellcheck disable=SC2317 # Function called via trap handler
390 log_info "Cleaning up test resources..."
392 # Drop replication slots
393 for entry in "${SLOTS_TO_CLEANUP[@]}"; do
394 local dbname="${entry%%:*}"
395 local slot="${entry#*:}"
396 psql -U "$PGUSER" -d "$dbname" -c "
397 SELECT pg_drop_replication_slot('$slot')
398 FROM pg_replication_slots
399 WHERE slot_name = '$slot';
400 " &>/dev/null || true
403 # Stop any lingering streaming processes
404 if [[ -d "$TEST_DIR" ]]; then
405 find "$TEST_DIR" -name '.pg_scribe.pid' 2>/dev/null | while read -r pidfile; do
406 if [[ -f "$pidfile" ]]; then
408 pid=$(cat "$pidfile")
409 kill -TERM "$pid" 2>/dev/null || true
415 for dbname in "${DATABASES_TO_CLEANUP[@]}"; do
416 drop_test_db "$dbname"
419 # Remove test directory
420 if [[ -d "$TEST_DIR" ]]; then
424 log_info "Cleanup complete"
432 echo "========================================"
433 echo "pg_scribe Validation Ordering Tests"
434 echo "========================================"
436 echo "These tests verify that pg_scribe validates"
437 echo "all prerequisites BEFORE making state changes"
440 # Verify pg_scribe exists
441 if [[ ! -x "$PG_SCRIBE" ]]; then
442 echo "ERROR: pg_scribe not found or not executable: $PG_SCRIBE"
446 # Verify PostgreSQL is running
447 if ! psql -U "$PGUSER" -d postgres -c "SELECT 1;" &>/dev/null; then
448 echo "ERROR: Cannot connect to PostgreSQL"
452 # Verify wal_level is logical
454 wal_level=$(psql -U "$PGUSER" -d postgres -tAq -c "SHOW wal_level;")
455 if [[ "$wal_level" != "logical" ]]; then
456 echo "ERROR: wal_level must be 'logical', currently: $wal_level"
457 echo "Update ~/.pgenv/pgsql/data/postgresql.conf and restart PostgreSQL"
461 # Create test directory
464 # Set up cleanup trap
465 trap cleanup EXIT INT TERM
467 echo "Running tests..."
470 # Run all tests (use || true to prevent set -e from exiting)
471 test_new_chain_validates_slot_exists || true
472 test_new_chain_start_validates_slot_exists || true
473 test_start_reads_slot_from_metadata || true
474 test_new_chain_validates_compression_tool || true
475 test_new_chain_metadata_slot_consistency || true
479 echo "========================================"
481 echo "========================================"
482 echo "Tests run: $TESTS_RUN"
483 echo -e "Tests passed: ${GREEN}$TESTS_PASSED${NC}"
484 echo -e "Tests failed: ${RED}$TESTS_FAILED${NC}"
487 if [[ $TESTS_FAILED -eq 0 ]]; then
488 echo -e "${GREEN}All tests passed!${NC}"
491 echo -e "${RED}Some tests failed!${NC}"
493 echo "This is EXPECTED before fixing the bugs."
494 echo "These failures demonstrate the validation ordering problems."