3 # Semantic tests using ai-test to validate AI tool outputs
4 # Uses ai-test itself to check if outputs meet semantic expectations
6 # Note: Not using set -e to prevent early exit on test failures
8 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
9 TOOLS_DIR="$SCRIPT_DIR/../../"
10 TEST_DATA="$SCRIPT_DIR/../data"
12 # Source common constants
13 source "$TOOLS_DIR/ai-common"
19 execute_test_command() {
21 "${cmd[@]}" 2>/dev/null
24 validate_semantic_condition() {
27 echo "$output" | "$TOOLS_DIR/ai-test" -q "$condition"
30 report_test_success() {
36 report_test_failure() {
41 echo "✗ $test_name - $reason"
42 [ -n "$output" ] && echo "Output was: $output"
46 test_semantic_property() {
53 output=$(execute_test_command "${cmd[@]}") || {
54 report_test_failure "$test_name" "command failed to execute"
58 # Early return for successful validation
59 if validate_semantic_condition "$output" "$condition"; then
60 report_test_success "$test_name"
64 # Handle validation failure
65 report_test_failure "$test_name" "semantic condition not met: $condition" "$output"
68 echo "=== Semantic Property Tests ==="
71 # Test ai-grep semantic matching
72 echo "--- ai-grep Semantic Tests ---"
73 test_semantic_property \
74 "ai-grep finds error-related content" \
75 "contains information about errors or problems" \
76 "$TOOLS_DIR/ai-grep" "problems" "$TEST_DATA/sample-log.txt"
78 test_semantic_property \
79 "ai-grep -v excludes error content" \
80 "does not contain information about errors or problems" \
81 "$TOOLS_DIR/ai-grep" -v "problems" "$TEST_DATA/sample-log.txt"
83 # Test ai-cut extraction quality
84 echo "--- ai-cut Semantic Tests ---"
85 test_semantic_property \
86 "ai-cut extracts contact information" \
87 "contains names and email addresses" \
88 "$TOOLS_DIR/ai-cut" -f "name,email" "$TEST_DATA/contacts.txt"
90 test_semantic_property \
91 "ai-cut JSON contains proper fields" \
92 "contains JSON objects with name and email fields" \
93 "$TOOLS_DIR/ai-cut" -j -f "name,email" "$TEST_DATA/contacts.txt"
95 # Test ai-class categorization accuracy
96 echo "--- ai-class Semantic Tests ---"
97 test_semantic_property \
98 "ai-class categorizes feedback correctly" \
99 "positive feedback is labeled as positive, negative as negative" \
100 "$TOOLS_DIR/ai-class" "positive,negative,neutral" "$TEST_DATA/feedback.txt"
102 # Test ai-tr transformation preservation
103 echo "--- ai-tr Semantic Tests ---"
104 test_semantic_property \
105 "ai-tr preserves meaning in transformation" \
106 "contains the same essential information as the original but in a different format" \
107 "$TOOLS_DIR/ai-tr" "make more formal" "$TEST_DATA/feedback.txt"
109 # Test ai-test validation logic
110 echo "--- ai-test Semantic Tests ---"
111 # Create a temporary file with valid content
112 echo "john.doe@example.com" > /tmp/valid_emails.txt
113 echo "jane.smith@company.org" >> /tmp/valid_emails.txt
115 if "$TOOLS_DIR/ai-test" -q "contains valid email addresses" /tmp/valid_emails.txt; then
116 echo "✓ ai-test correctly validates valid emails"
119 echo "✗ ai-test failed to validate valid emails"
123 # Test with invalid content
124 echo "not-an-email" > /tmp/invalid_emails.txt
125 echo "also-not-valid" >> /tmp/invalid_emails.txt
127 if ! "$TOOLS_DIR/ai-test" -q "contains valid email addresses" /tmp/invalid_emails.txt; then
128 echo "✓ ai-test correctly rejects invalid emails"
131 echo "✗ ai-test incorrectly accepted invalid emails"
135 # Test ai-test invert flag
136 if "$TOOLS_DIR/ai-test" -v -q "contains valid email addresses" /tmp/invalid_emails.txt; then
137 echo "✓ ai-test -v correctly inverts condition"
140 echo "✗ ai-test -v failed to invert condition"
145 rm -f /tmp/valid_emails.txt /tmp/invalid_emails.txt
147 # Test ai-fix semantic functionality
148 echo "--- ai-fix Semantic Tests ---"
149 test_semantic_property \
150 "ai-fix review identifies bugs" \
151 "identifies issues like division by zero, undefined variables, or missing return statements" \
152 "$TOOLS_DIR/ai-fix" -r "$TEST_DATA/code-review-prompt.txt" "$TEST_DATA/buggy-code.py"
154 test_semantic_property \
155 "ai-fix follows prompt format instructions" \
156 "starts with 'ISSUES FOUND:' and contains 'SEVERITY:' and 'RECOMMENDATION:' sections" \
157 "$TOOLS_DIR/ai-fix" -r "$TEST_DATA/structured-review-prompt.txt" "$TEST_DATA/buggy-code.py"
159 test_semantic_property \
160 "ai-fix fixes preserve functionality" \
161 "contains Python code that appears to fix the original bugs while maintaining the same functionality" \
162 "$TOOLS_DIR/ai-fix" "$TEST_DATA/code-review-prompt.txt" "$TEST_DATA/buggy-code.py"
164 # Test pipeline behavior
165 echo "--- Pipeline Semantic Tests ---"
166 pipeline_output=$(echo "I love this product! It's amazing and works perfectly." | "$TOOLS_DIR/ai-class" "positive,negative,neutral" | "$TOOLS_DIR/ai-grep" "positive")
168 if echo "$pipeline_output" | "$TOOLS_DIR/ai-test" -q "contains positive sentiment"; then
169 echo "✓ Pipeline preserves semantic meaning through multiple tools"
172 echo "✗ Pipeline lost semantic meaning"
173 echo "Pipeline output was: $pipeline_output"
178 echo "=== Test Results ==="
179 echo "Passed: $PASSED"
180 echo "Failed: $FAILED"
182 if [ "$FAILED" -gt 0 ]; then
186 echo "All semantic tests passed!"