begriffs open source - ai-unix/blob - tests/integration/test-semantic.sh

   1 #!/bin/bash
   2
   3 # Semantic tests using ai-test to validate AI tool outputs
   4 # Uses ai-test itself to check if outputs meet semantic expectations
   5
   6 # Note: Not using set -e to prevent early exit on test failures
   7
   8 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
   9 TOOLS_DIR="$SCRIPT_DIR/../../"
  10 TEST_DATA="$SCRIPT_DIR/../data"
  11
  12 # Source common constants
  13 source "$TOOLS_DIR/ai-common"
  14
  15 # Test utilities
  16 PASSED=0
  17 FAILED=0
  18
  19 execute_test_command() {
  20     local cmd=("$@")
  21     "${cmd[@]}" 2>/dev/null
  22 }
  23
  24 validate_semantic_condition() {
  25     local output="$1"
  26     local condition="$2"
  27     echo "$output" | "$TOOLS_DIR/ai-test" -q "$condition"
  28 }
  29
  30 report_test_success() {
  31     local test_name="$1"
  32     echo "✓ $test_name"
  33     ((PASSED++))
  34 }
  35
  36 report_test_failure() {
  37     local test_name="$1"
  38     local reason="$2"
  39     local output="$3"
  40
  41     echo "✗ $test_name - $reason"
  42     [ -n "$output" ] && echo "Output was: $output"
  43     ((FAILED++))
  44 }
  45
  46 test_semantic_property() {
  47     local test_name="$1"
  48     local condition="$2"
  49     shift 2
  50     local cmd=("$@")
  51
  52     local output
  53     output=$(execute_test_command "${cmd[@]}") || {
  54         report_test_failure "$test_name" "command failed to execute"
  55         return 1
  56     }
  57
  58     # Early return for successful validation
  59     if validate_semantic_condition "$output" "$condition"; then
  60         report_test_success "$test_name"
  61         return 0
  62     fi
  63
  64     # Handle validation failure
  65     report_test_failure "$test_name" "semantic condition not met: $condition" "$output"
  66 }
  67
  68 echo "=== Semantic Property Tests ==="
  69 echo
  70
  71 # Test ai-grep semantic matching
  72 echo "--- ai-grep Semantic Tests ---"
  73 test_semantic_property \
  74     "ai-grep finds error-related content" \
  75     "contains information about errors or problems" \
  76     "$TOOLS_DIR/ai-grep" "problems" "$TEST_DATA/sample-log.txt"
  77
  78 test_semantic_property \
  79     "ai-grep -v excludes error content" \
  80     "does not contain information about errors or problems" \
  81     "$TOOLS_DIR/ai-grep" -v "problems" "$TEST_DATA/sample-log.txt"
  82
  83 # Test ai-cut extraction quality
  84 echo "--- ai-cut Semantic Tests ---"
  85 test_semantic_property \
  86     "ai-cut extracts contact information" \
  87     "contains names and email addresses" \
  88     "$TOOLS_DIR/ai-cut" -f "name,email" "$TEST_DATA/contacts.txt"
  89
  90 test_semantic_property \
  91     "ai-cut JSON contains proper fields" \
  92     "contains JSON objects with name and email fields" \
  93     "$TOOLS_DIR/ai-cut" -j -f "name,email" "$TEST_DATA/contacts.txt"
  94
  95 # Test ai-class categorization accuracy
  96 echo "--- ai-class Semantic Tests ---"
  97 test_semantic_property \
  98     "ai-class categorizes feedback correctly" \
  99     "positive feedback is labeled as positive, negative as negative" \
 100     "$TOOLS_DIR/ai-class" "positive,negative,neutral" "$TEST_DATA/feedback.txt"
 101
 102 # Test ai-tr transformation preservation
 103 echo "--- ai-tr Semantic Tests ---"
 104 test_semantic_property \
 105     "ai-tr preserves meaning in transformation" \
 106     "contains the same essential information as the original but in a different format" \
 107     "$TOOLS_DIR/ai-tr" "make more formal" "$TEST_DATA/feedback.txt"
 108
 109 # Test ai-test validation logic
 110 echo "--- ai-test Semantic Tests ---"
 111 # Create a temporary file with valid content
 112 echo "john.doe@example.com" > /tmp/valid_emails.txt
 113 echo "jane.smith@company.org" >> /tmp/valid_emails.txt
 114
 115 if "$TOOLS_DIR/ai-test" -q "contains valid email addresses" /tmp/valid_emails.txt; then
 116     echo "✓ ai-test correctly validates valid emails"
 117     ((PASSED++))
 118 else
 119     echo "✗ ai-test failed to validate valid emails"
 120     ((FAILED++))
 121 fi
 122
 123 # Test with invalid content
 124 echo "not-an-email" > /tmp/invalid_emails.txt
 125 echo "also-not-valid" >> /tmp/invalid_emails.txt
 126
 127 if ! "$TOOLS_DIR/ai-test" -q "contains valid email addresses" /tmp/invalid_emails.txt; then
 128     echo "✓ ai-test correctly rejects invalid emails"
 129     ((PASSED++))
 130 else
 131     echo "✗ ai-test incorrectly accepted invalid emails"
 132     ((FAILED++))
 133 fi
 134
 135 # Test ai-test invert flag
 136 if "$TOOLS_DIR/ai-test" -v -q "contains valid email addresses" /tmp/invalid_emails.txt; then
 137     echo "✓ ai-test -v correctly inverts condition"
 138     ((PASSED++))
 139 else
 140     echo "✗ ai-test -v failed to invert condition"
 141     ((FAILED++))
 142 fi
 143
 144 # Cleanup
 145 rm -f /tmp/valid_emails.txt /tmp/invalid_emails.txt
 146
 147 # Test ai-fix semantic functionality
 148 echo "--- ai-fix Semantic Tests ---"
 149 test_semantic_property \
 150     "ai-fix review identifies bugs" \
 151     "identifies issues like division by zero, undefined variables, or missing return statements" \
 152     "$TOOLS_DIR/ai-fix" -r "$TEST_DATA/code-review-prompt.txt" "$TEST_DATA/buggy-code.py"
 153
 154 test_semantic_property \
 155     "ai-fix follows prompt format instructions" \
 156     "starts with 'ISSUES FOUND:' and contains 'SEVERITY:' and 'RECOMMENDATION:' sections" \
 157     "$TOOLS_DIR/ai-fix" -r "$TEST_DATA/structured-review-prompt.txt" "$TEST_DATA/buggy-code.py"
 158
 159 test_semantic_property \
 160     "ai-fix fixes preserve functionality" \
 161     "contains Python code that appears to fix the original bugs while maintaining the same functionality" \
 162     "$TOOLS_DIR/ai-fix" "$TEST_DATA/code-review-prompt.txt" "$TEST_DATA/buggy-code.py"
 163
 164 # Test pipeline behavior
 165 echo "--- Pipeline Semantic Tests ---"
 166 pipeline_output=$(echo "I love this product! It's amazing and works perfectly." | "$TOOLS_DIR/ai-class" "positive,negative,neutral" | "$TOOLS_DIR/ai-grep" "positive")
 167
 168 if echo "$pipeline_output" | "$TOOLS_DIR/ai-test" -q "contains positive sentiment"; then
 169     echo "✓ Pipeline preserves semantic meaning through multiple tools"
 170     ((PASSED++))
 171 else
 172     echo "✗ Pipeline lost semantic meaning"
 173     echo "Pipeline output was: $pipeline_output"
 174     ((FAILED++))
 175 fi
 176
 177 echo
 178 echo "=== Test Results ==="
 179 echo "Passed: $PASSED"
 180 echo "Failed: $FAILED"
 181
 182 if [ "$FAILED" -gt 0 ]; then
 183     exit 1
 184 fi
 185
 186 echo "All semantic tests passed!"