#!/bin/sh
if [ -z "$1" ]; then
    echo "USAGE: ./ocr_script [pdf name in the same folder]"
    exit 1
fi
if [ ! -f "$1" ]; then
    echo "Error: File '$1' not found. Please run just './ocr_script' for instructions."
    exit 1
fi
rm -rf ./images
mkdir images
pdftoppm -png -r 300 $1 ./images/page_image
for img in ./images/*.png; do tesseract "$img" "${img%.*}" --psm 4 txt; done
for f in ./images/page_image-*.txt; do cat "$f"; echo -e "\n"; done > combined.txt
rm -rf ./images
