#!/bin/bash
# receipt — extract structured data from receipt photos
#
# OCRs a receipt image, then asks apfel to extract vendor, date, total,
# and line items. Keeps within apfel's 4096-token window by truncating
# long receipts.
#
# Usage:
#   receipt <image>               # extract receipt data as text
#   receipt -j <image>            # extract as JSON
#   receipt -c <image>            # copy to clipboard
#
# Examples:
#   receipt grocery.jpg
#   receipt -j scan.png | jq .total
#   receipt -j receipt.pdf | jq '.items[] | .name'
#
# Requires: auge, apfel

set -euo pipefail

copy=false
json=false
file=""

while [[ $# -gt 0 ]]; do
  case "$1" in
    -c|--copy) copy=true; shift ;;
    -j|--json) json=true; shift ;;
    -h|--help)
      sed -n '2,/^$/{ s/^# //; s/^#//; p; }' "$0"
      exit 0 ;;
    *)
      [[ -z "$file" ]] && file="$1" || { echo "error: too many arguments" >&2; exit 2; }
      shift ;;
  esac
done

[[ -n "$file" ]] || { echo "usage: receipt <image>" >&2; exit 2; }
[[ -f "$file" ]] || { echo "error: file not found: $file" >&2; exit 1; }

command -v auge >/dev/null || { echo "error: auge not found" >&2; exit 1; }
command -v apfel >/dev/null || { echo "error: apfel not found" >&2; exit 1; }

# OCR — truncate to ~25 lines to stay under token budget
text=$(auge --ocr "$file" -q 2>/dev/null | head -25)

if [[ -z "$text" ]]; then
  echo "No text detected in $file" >&2
  exit 0
fi

if $json; then
  sys="Extract receipt data as JSON. Output ONLY valid JSON with keys: vendor (string), date (string or null), total (string or null), items (array of {name, price}). No markdown fences."
else
  sys="Extract receipt data: vendor name, date, total, and line items with prices. Be concise."
fi

output=$(apfel -q -s "$sys" "Receipt text:
$text" 2>/dev/null)

if [[ -z "$output" ]]; then
  echo "Could not parse receipt." >&2
  exit 1
fi

if $copy; then
  printf '%s' "$output" | pbcopy
  echo "$output"
  printf '\033[2m(copied to clipboard)\033[0m\n' >&2
else
  echo "$output"
fi
