Files
gotcha/gotcha_v2
2023-05-14 16:10:50 -05:00

111 lines
2.1 KiB
Bash
Executable File

#!/bin/sh
#
# Breaks simple captchas.
#
# deps: gocr, imagemagick, sed
#
# -*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
# CHARSET="[0-9A-z]"
#### dataset2 only has numbers
CHARSET="[0-9]"
# 0-100, higher numbers will force strict matches
CERTAINTY=0
# 16: not dot divide overlapping chars
# 32: do not context correct
MODE='-m 16 -m 32'
IMAGE_PROG=mpvimg
# -*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
usage() {
>&2 printf "Usage: %s [-d] [-v] [file]\n" "${0##*/}"
exit 1
}
init() {
random() { r=$(($(date +%s) / $$ - $$)) ; echo ${r#-} ; }
dir=/tmp/$(random) # dir to store tmp files
tmp=$dir/$(random) # image created
unset -f random
clean
mkdir -p "$dir"
}
clean() {
rm -r "${dir:-?}" 2>/dev/null ||:
}
check() {
if [ ! -f "$1" ] ; then
>&2 echo "No such image: $1"
exit 1
fi
}
jpg2png() {
case $(file "$1") in
*jpg*|*JPG*|*jpeg*|*JPEG*)
convert "$1" "${1%.*}.png"
# rm "$1"
image=${image%.*}.png
;;
esac
}
main() {
image=$1
check "$image"
init
# trap 'clean' INT TERM EXIT
# convert to png if necessary
jpg2png "$image"
# trim
convert -trim "$image" "$image"
# clean up text and display black on white background
# text will be evenly spaced but not aligned
convert \
-quality 100 \
-shave 1x1 \
-resize 500%x500% \
-threshold 50% \
-deskew 50 \
-trim \
-bordercolor white \
-border 30 \
"$image" "$tmp"
[ "$DEBUG" = true ] && $IMAGE_PROG "$tmp" &
mv -f "$tmp" "$dir"/out.png
if [ "$DEBUG" = true ] || [ "$DISPLAY_RESULT" = true ] ; then
$IMAGE_PROG "$dir/out.png" &
fi
# shellcheck disable=2086
gocr -u '?' $MODE \
-a ${CERTAINTY:-0} \
-c ${CHARSET:-'[0-9A-z]'} \
"$dir/out.png" 2>/dev/null | \
sed 's/ //g' | sed 's/\?/9/g' # very commonly '9' is '?'
}
while [ "$1" ] ; do
case $1 in
-h) usage ;;
-v) DEBUG=true ; shift ;;
-d) DISPLAY_RESULT=true ; shift ;;
*) break
esac
done
main "$@"