#!/bin/sh # # Breaks simple captchas. # # deps: gocr, imagemagick, sed # # -*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* # CHARSET="[0-9A-z]" #### dataset2 only has numbers CHARSET="[0-9]" # 0-100, higher numbers will force strict matches CERTAINTY=0 # 16: not dot divide overlapping chars # 32: do not context correct MODE='-m 16 -m 32' IMAGE_PROG=mpvimg # -*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* usage() { >&2 printf "Usage: %s [-d] [-v] [file]\n" "${0##*/}" exit 1 } init() { random() { r=$(($(date +%s) / $$ - $$)) ; echo ${r#-} ; } dir=/tmp/$(random) # dir to store tmp files tmp=$dir/$(random) # image created unset -f random clean mkdir -p "$dir" } clean() { rm -r "${dir:-?}" 2>/dev/null ||: } check() { if [ ! -f "$1" ] ; then >&2 echo "No such image: $1" exit 1 fi } jpg2png() { case $(file "$1") in *jpg*|*JPG*|*jpeg*|*JPEG*) convert "$1" "${1%.*}.png" # rm "$1" image=${image%.*}.png ;; esac } main() { image=$1 check "$image" init # trap 'clean' INT TERM EXIT # convert to png if necessary jpg2png "$image" # trim convert -trim "$image" "$image" # clean up text and display black on white background # text will be evenly spaced but not aligned convert \ -quality 100 \ -shave 1x1 \ -resize 500%x500% \ -threshold 50% \ -deskew 50 \ -trim \ -bordercolor white \ -border 30 \ "$image" "$tmp" [ "$DEBUG" = true ] && $IMAGE_PROG "$tmp" & mv -f "$tmp" "$dir"/out.png if [ "$DEBUG" = true ] || [ "$DISPLAY_RESULT" = true ] ; then $IMAGE_PROG "$dir/out.png" & fi # shellcheck disable=2086 gocr -u '?' $MODE \ -a ${CERTAINTY:-0} \ -c ${CHARSET:-'[0-9A-z]'} \ "$dir/out.png" 2>/dev/null | \ sed 's/ //g' | sed 's/\?/9/g' # very commonly '9' is '?' } while [ "$1" ] ; do case $1 in -h) usage ;; -v) DEBUG=true ; shift ;; -d) DISPLAY_RESULT=true ; shift ;; *) break esac done main "$@"