begriffs open source - ai-unix/blob - ai-cut

   1 #!/bin/bash
   2
   3 # AI-Cut: Structured field extraction tool
   4 # Beautiful implementation using composable functions
   5
   6 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
   7 source "$SCRIPT_DIR/ai-common"
   8
   9 show_usage() {
  10     cat << 'EOF'
  11 Usage: ai-cut -f FIELDS [FILE...]
  12 Extract structured fields from unstructured text using natural language field descriptions.
  13
  14 Options:
  15   -f FIELDS   Comma-separated field descriptions (required)
  16   -j          Output as JSON objects (default: TSV)
  17   -h          Show this help message
  18 EOF
  19 }
  20
  21 build_extraction_prompt() {
  22     local fields="$1"
  23     local json_output="$2"
  24
  25     local format_instructions
  26     if [[ "$json_output" == "true" ]]; then
  27         format_instructions='For each line or record in the input, extract the specified fields and output as a JSON object with the field names as keys. Output one JSON object per line. Output ONLY the raw JSON - do not wrap in markdown code blocks or add any other formatting.
  28
  29 Example format:
  30 {"field1": "value1", "field2": "value2", "field3": "value3"}'
  31     else
  32         format_instructions='For each line or record in the input, extract the specified fields and output as tab-separated values. Output the fields in the same order as specified.
  33
  34 Example format:
  35 value1  value2  value3'
  36     fi
  37
  38     cat << EOF
  39 You are a structured data extraction tool. Extract the following fields from the unstructured text: $fields
  40
  41 $format_instructions
  42 EOF
  43 }
  44
  45 process_extraction_response() {
  46     local response="$1"
  47     local json_output="$2"
  48
  49     if [[ "$json_output" == "true" ]]; then
  50         process_json_response "$response"
  51     else
  52         echo "$response"
  53         return 0
  54     fi
  55 }
  56
  57 validate_and_setup() {
  58     local fields="$1"
  59     shift
  60     local files=("$@")
  61
  62     ensure_dependencies
  63     ensure_argument_provided "Field specification (-f)" "$fields" show_usage
  64
  65     [[ ${#files[@]} -gt 0 ]] && ensure_files_exist "${files[@]}"
  66 }
  67
  68 main() {
  69     local fields=""
  70     local json_output="false"
  71     local files=()
  72
  73     # Parse options
  74     while getopts "f:jh" opt; do
  75         case $opt in
  76             f) fields="$OPTARG" ;;
  77             j) json_output="true" ;;
  78             h) handle_help_option show_usage ;;
  79             \?) handle_invalid_option "$OPTARG" ;;
  80         esac
  81     done
  82
  83     shift $((OPTIND-1))
  84     files=("$@")
  85
  86     # Early validation with immediate exit on failure
  87     validate_and_setup "$fields" "${files[@]}"
  88
  89     # Process input with early exit
  90     local input
  91     input=$(process_input_sources "${files[@]}") || exit "$EXIT_NO_MATCH"
  92
  93     # Execute LLM request with early exit
  94     local prompt response
  95     prompt=$(build_extraction_prompt "$fields" "$json_output")
  96     response=$(execute_llm_request "$prompt" "$input") || handle_llm_error $?
  97
  98     # Process response with early exit
  99     local result
 100     result=$(process_extraction_response "$response" "$json_output") || exit $?
 101
 102     echo "$result"
 103 }
 104
 105 main "$@"