begriffs open source - ai-unix/blob - ai-cut

   1 #!/bin/bash
   2
   3 # AI-Cut: Structured field extraction tool
   4 # Beautiful implementation using composable functions
   5
   6 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
   7 source "$SCRIPT_DIR/ai-common"
   8
   9 show_usage() {
  10     cat << 'EOF'
  11 Usage: ai-cut -f FIELDS [FILE...]
  12 Extract structured fields from unstructured text using natural language field descriptions.
  13
  14 Options:
  15   -f FIELDS   Comma-separated field descriptions (required)
  16   -j          Output as JSON objects (default: TSV)
  17   -h          Show this help message
  18 EOF
  19 }
  20
  21 build_extraction_prompt() {
  22     local fields="$1"
  23     local json_output="$2"
  24     local input="$3"
  25
  26     local format_instructions
  27     if [[ "$json_output" == "true" ]]; then
  28         format_instructions='For each line or record in the input, extract the specified fields and output as a JSON object with the field names as keys. Output one JSON object per line. Output ONLY the raw JSON - do not wrap in markdown code blocks or add any other formatting.
  29
  30 Example format:
  31 {"field1": "value1", "field2": "value2", "field3": "value3"}'
  32     else
  33         format_instructions='For each line or record in the input, extract the specified fields and output as tab-separated values. Output the fields in the same order as specified.
  34
  35 Example format:
  36 value1  value2  value3'
  37     fi
  38
  39     cat << EOF
  40 You are a structured data extraction tool. Extract the following fields from the unstructured text: $fields
  41
  42 $format_instructions
  43
  44 Text to analyze:
  45 $input
  46 EOF
  47 }
  48
  49 process_extraction_response() {
  50     local response="$1"
  51     local json_output="$2"
  52
  53     if [[ "$json_output" == "true" ]]; then
  54         process_json_response "$response"
  55     else
  56         echo "$response"
  57         return 0
  58     fi
  59 }
  60
  61 main() {
  62     local fields=""
  63     local json_output="false"
  64     local files=()
  65     local input response result
  66
  67     # Parse options
  68     while getopts "f:jh" opt; do
  69         case $opt in
  70             f) fields="$OPTARG" ;;
  71             j) json_output="true" ;;
  72             h) handle_help_option show_usage ;;
  73             \?) handle_invalid_option "$OPTARG" ;;
  74         esac
  75     done
  76
  77     shift $((OPTIND-1))
  78     files=("$@")
  79
  80     # Validate environment and arguments
  81     ensure_dependencies
  82     ensure_argument_provided "Field specification (-f)" "$fields" show_usage
  83
  84     # Validate files if provided
  85     [[ ${#files[@]} -gt 0 ]] && ensure_files_exist "${files[@]}"
  86
  87     # Process input
  88     if ! input=$(process_input_sources "${files[@]}"); then
  89         exit "$EXIT_NO_MATCH"
  90     fi
  91
  92     # Execute LLM request
  93     local prompt
  94     prompt=$(build_extraction_prompt "$fields" "$json_output" "$input")
  95
  96     if ! response=$(execute_llm_request "$prompt"); then
  97         case $? in
  98             "$EXIT_NO_MATCH") print_error "$(error_llm_no_response)" ;;
  99             "$EXIT_API_ERROR") print_error "$(error_llm_api_error)" ;;
 100             *) print_error "$(error_llm_command_failed)" ;;
 101         esac
 102         exit $?
 103     fi
 104
 105     # Process and validate response
 106     if ! result=$(process_extraction_response "$response" "$json_output"); then
 107         exit $?
 108     fi
 109
 110     echo "$result"
 111     exit "$EXIT_SUCCESS"
 112 }
 113
 114 main "$@"