commit 7215a570533bc6018b07ad489ed30a73ff7d1d40 Author: wvr Date: Sun May 14 16:10:50 2023 -0500 initial diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..b560485 --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +PREFIX = /usr/local + +all: install + +install: + install -D -m 0755 gotcha ${DESTDIR}${PREFIX}/bin/gotcha + +uninstall: + rm ${DESTDIR}${PREFIX}/bin/gotcha + +test: + sh ./test.sh diff --git a/README.md b/README.md new file mode 100644 index 0000000..a74e9cc --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# gotcha + +playing around with captcha breaking diff --git a/dataset1/2AKY.png b/dataset1/2AKY.png new file mode 100644 index 0000000..67b84f2 Binary files /dev/null and b/dataset1/2AKY.png differ diff --git a/dataset1/2CJT.png b/dataset1/2CJT.png new file mode 100644 index 0000000..90053ba Binary files /dev/null and b/dataset1/2CJT.png differ diff --git a/dataset1/2R4Z.png b/dataset1/2R4Z.png new file mode 100644 index 0000000..ae04513 Binary files /dev/null and b/dataset1/2R4Z.png differ diff --git a/dataset1/32HP.png b/dataset1/32HP.png new file mode 100644 index 0000000..8d49641 Binary files /dev/null and b/dataset1/32HP.png differ diff --git a/dataset1/34TE.png b/dataset1/34TE.png new file mode 100644 index 0000000..fa19b65 Binary files /dev/null and b/dataset1/34TE.png differ diff --git a/dataset1/4HRE.png b/dataset1/4HRE.png new file mode 100644 index 0000000..69d6a11 Binary files /dev/null and b/dataset1/4HRE.png differ diff --git a/dataset1/4RET.png b/dataset1/4RET.png new file mode 100644 index 0000000..8a45358 Binary files /dev/null and b/dataset1/4RET.png differ diff --git a/dataset1/542Y.png b/dataset1/542Y.png new file mode 100644 index 0000000..35aff06 Binary files /dev/null and b/dataset1/542Y.png differ diff --git a/dataset1/5HHL.png b/dataset1/5HHL.png new file mode 100644 index 0000000..c2cdefc Binary files /dev/null and b/dataset1/5HHL.png differ diff --git a/dataset1/AM45.png b/dataset1/AM45.png new file mode 100644 index 0000000..44fd6f0 Binary files /dev/null and b/dataset1/AM45.png differ diff --git a/dataset1/C64J.png b/dataset1/C64J.png new file mode 100644 index 0000000..522a3a9 Binary files /dev/null and b/dataset1/C64J.png differ diff --git a/dataset1/CMAW.png b/dataset1/CMAW.png new file mode 100644 index 0000000..085a31a Binary files /dev/null and b/dataset1/CMAW.png differ diff --git a/dataset1/CWCX.png b/dataset1/CWCX.png new file mode 100644 index 0000000..cdf24a3 Binary files /dev/null and b/dataset1/CWCX.png differ diff --git a/dataset1/CYFV.png b/dataset1/CYFV.png new file mode 100644 index 0000000..0224fb8 Binary files /dev/null and b/dataset1/CYFV.png differ diff --git a/dataset1/E56P.png b/dataset1/E56P.png new file mode 100644 index 0000000..007cffa Binary files /dev/null and b/dataset1/E56P.png differ diff --git a/dataset1/EMM2.png b/dataset1/EMM2.png new file mode 100644 index 0000000..5d7a9cb Binary files /dev/null and b/dataset1/EMM2.png differ diff --git a/dataset1/F2HH.png b/dataset1/F2HH.png new file mode 100644 index 0000000..4e02d35 Binary files /dev/null and b/dataset1/F2HH.png differ diff --git a/dataset1/F46X.png b/dataset1/F46X.png new file mode 100644 index 0000000..66db571 Binary files /dev/null and b/dataset1/F46X.png differ diff --git a/dataset1/FHLF.png b/dataset1/FHLF.png new file mode 100644 index 0000000..04a54ce Binary files /dev/null and b/dataset1/FHLF.png differ diff --git a/dataset1/FMTW.png b/dataset1/FMTW.png new file mode 100644 index 0000000..6be4bae Binary files /dev/null and b/dataset1/FMTW.png differ diff --git a/dataset1/FZRL.png b/dataset1/FZRL.png new file mode 100644 index 0000000..623b11d Binary files /dev/null and b/dataset1/FZRL.png differ diff --git a/dataset1/HCE4.png b/dataset1/HCE4.png new file mode 100644 index 0000000..f85ba81 Binary files /dev/null and b/dataset1/HCE4.png differ diff --git a/dataset1/HJFY.png b/dataset1/HJFY.png new file mode 100644 index 0000000..6eeab07 Binary files /dev/null and b/dataset1/HJFY.png differ diff --git a/dataset1/HXCA.png b/dataset1/HXCA.png new file mode 100644 index 0000000..8df2429 Binary files /dev/null and b/dataset1/HXCA.png differ diff --git a/dataset1/J34E.png b/dataset1/J34E.png new file mode 100644 index 0000000..7efb16c Binary files /dev/null and b/dataset1/J34E.png differ diff --git a/dataset1/JPHL.png b/dataset1/JPHL.png new file mode 100644 index 0000000..cd3fb4a Binary files /dev/null and b/dataset1/JPHL.png differ diff --git a/dataset1/JXCF.png b/dataset1/JXCF.png new file mode 100644 index 0000000..48044a3 Binary files /dev/null and b/dataset1/JXCF.png differ diff --git a/dataset1/JYJC.png b/dataset1/JYJC.png new file mode 100644 index 0000000..1c6f1e1 Binary files /dev/null and b/dataset1/JYJC.png differ diff --git a/dataset1/K6L2.png b/dataset1/K6L2.png new file mode 100644 index 0000000..86c9257 Binary files /dev/null and b/dataset1/K6L2.png differ diff --git a/dataset1/LAYP.png b/dataset1/LAYP.png new file mode 100644 index 0000000..7c7fff4 Binary files /dev/null and b/dataset1/LAYP.png differ diff --git a/dataset1/LTJW.png b/dataset1/LTJW.png new file mode 100644 index 0000000..3c2166d Binary files /dev/null and b/dataset1/LTJW.png differ diff --git a/dataset1/LWYH.png b/dataset1/LWYH.png new file mode 100644 index 0000000..52794bb Binary files /dev/null and b/dataset1/LWYH.png differ diff --git a/dataset1/MLTW.png b/dataset1/MLTW.png new file mode 100644 index 0000000..d85f7b1 Binary files /dev/null and b/dataset1/MLTW.png differ diff --git a/dataset1/PHEZ.png b/dataset1/PHEZ.png new file mode 100644 index 0000000..baa4cc6 Binary files /dev/null and b/dataset1/PHEZ.png differ diff --git a/dataset1/PLVV.png b/dataset1/PLVV.png new file mode 100644 index 0000000..5691486 Binary files /dev/null and b/dataset1/PLVV.png differ diff --git a/dataset1/PRYW.png b/dataset1/PRYW.png new file mode 100644 index 0000000..7b2c628 Binary files /dev/null and b/dataset1/PRYW.png differ diff --git a/dataset1/R4XV.png b/dataset1/R4XV.png new file mode 100644 index 0000000..1363268 Binary files /dev/null and b/dataset1/R4XV.png differ diff --git a/dataset1/REEK.png b/dataset1/REEK.png new file mode 100644 index 0000000..fe86cdd Binary files /dev/null and b/dataset1/REEK.png differ diff --git a/dataset1/RHRR.png b/dataset1/RHRR.png new file mode 100644 index 0000000..372980d Binary files /dev/null and b/dataset1/RHRR.png differ diff --git a/dataset1/RMZ3.png b/dataset1/RMZ3.png new file mode 100644 index 0000000..ed2d58a Binary files /dev/null and b/dataset1/RMZ3.png differ diff --git a/dataset1/T66Y.png b/dataset1/T66Y.png new file mode 100644 index 0000000..a4c3480 Binary files /dev/null and b/dataset1/T66Y.png differ diff --git a/dataset1/TEEC.png b/dataset1/TEEC.png new file mode 100644 index 0000000..4117131 Binary files /dev/null and b/dataset1/TEEC.png differ diff --git a/dataset1/TXAE.png b/dataset1/TXAE.png new file mode 100644 index 0000000..6980158 Binary files /dev/null and b/dataset1/TXAE.png differ diff --git a/dataset1/VJCM.png b/dataset1/VJCM.png new file mode 100644 index 0000000..b13192e Binary files /dev/null and b/dataset1/VJCM.png differ diff --git a/dataset1/VP4X.png b/dataset1/VP4X.png new file mode 100644 index 0000000..a8a0702 Binary files /dev/null and b/dataset1/VP4X.png differ diff --git a/dataset1/W5AE.png b/dataset1/W5AE.png new file mode 100644 index 0000000..3c97e3f Binary files /dev/null and b/dataset1/W5AE.png differ diff --git a/dataset1/W5PP.png b/dataset1/W5PP.png new file mode 100644 index 0000000..6756e90 Binary files /dev/null and b/dataset1/W5PP.png differ diff --git a/dataset1/WYLE.png b/dataset1/WYLE.png new file mode 100644 index 0000000..ffef6dd Binary files /dev/null and b/dataset1/WYLE.png differ diff --git a/dataset1/X6CR.png b/dataset1/X6CR.png new file mode 100644 index 0000000..9cde6bc Binary files /dev/null and b/dataset1/X6CR.png differ diff --git a/dataset1/YWL6.png b/dataset1/YWL6.png new file mode 100644 index 0000000..a749dab Binary files /dev/null and b/dataset1/YWL6.png differ diff --git a/dataset2/a1.png b/dataset2/a1.png new file mode 100644 index 0000000..174bc54 Binary files /dev/null and b/dataset2/a1.png differ diff --git a/dataset2/a10.jpg b/dataset2/a10.jpg new file mode 100644 index 0000000..fab4509 Binary files /dev/null and b/dataset2/a10.jpg differ diff --git a/dataset2/a11.jpg b/dataset2/a11.jpg new file mode 100644 index 0000000..d3007cd Binary files /dev/null and b/dataset2/a11.jpg differ diff --git a/dataset2/a12.jpg b/dataset2/a12.jpg new file mode 100644 index 0000000..abef7d7 Binary files /dev/null and b/dataset2/a12.jpg differ diff --git a/dataset2/a13.jpg b/dataset2/a13.jpg new file mode 100644 index 0000000..49f36f1 Binary files /dev/null and b/dataset2/a13.jpg differ diff --git a/dataset2/a14.jpg b/dataset2/a14.jpg new file mode 100644 index 0000000..07868c0 Binary files /dev/null and b/dataset2/a14.jpg differ diff --git a/dataset2/a15.jpg b/dataset2/a15.jpg new file mode 100644 index 0000000..94e5457 Binary files /dev/null and b/dataset2/a15.jpg differ diff --git a/dataset2/a16.jpg b/dataset2/a16.jpg new file mode 100644 index 0000000..a93757c Binary files /dev/null and b/dataset2/a16.jpg differ diff --git a/dataset2/a17.jpg b/dataset2/a17.jpg new file mode 100644 index 0000000..4b69df2 Binary files /dev/null and b/dataset2/a17.jpg differ diff --git a/dataset2/a18.jpg b/dataset2/a18.jpg new file mode 100644 index 0000000..90de46e Binary files /dev/null and b/dataset2/a18.jpg differ diff --git a/dataset2/a19.jpg b/dataset2/a19.jpg new file mode 100644 index 0000000..552bb24 Binary files /dev/null and b/dataset2/a19.jpg differ diff --git a/dataset2/a2.jpg b/dataset2/a2.jpg new file mode 100644 index 0000000..13f49d8 Binary files /dev/null and b/dataset2/a2.jpg differ diff --git a/dataset2/a2.png b/dataset2/a2.png new file mode 100644 index 0000000..e628ff1 Binary files /dev/null and b/dataset2/a2.png differ diff --git a/dataset2/a20.jpg b/dataset2/a20.jpg new file mode 100644 index 0000000..65b2baa Binary files /dev/null and b/dataset2/a20.jpg differ diff --git a/dataset2/a21.jpg b/dataset2/a21.jpg new file mode 100644 index 0000000..1aa5bdd Binary files /dev/null and b/dataset2/a21.jpg differ diff --git a/dataset2/a22.jpg b/dataset2/a22.jpg new file mode 100644 index 0000000..2944f4a Binary files /dev/null and b/dataset2/a22.jpg differ diff --git a/dataset2/a23.jpg b/dataset2/a23.jpg new file mode 100644 index 0000000..90235c2 Binary files /dev/null and b/dataset2/a23.jpg differ diff --git a/dataset2/a24.jpg b/dataset2/a24.jpg new file mode 100644 index 0000000..cebc086 Binary files /dev/null and b/dataset2/a24.jpg differ diff --git a/dataset2/a25.jpg b/dataset2/a25.jpg new file mode 100644 index 0000000..e0b8bbe Binary files /dev/null and b/dataset2/a25.jpg differ diff --git a/dataset2/a26.jpg b/dataset2/a26.jpg new file mode 100644 index 0000000..c5adacc Binary files /dev/null and b/dataset2/a26.jpg differ diff --git a/dataset2/a27.jpg b/dataset2/a27.jpg new file mode 100644 index 0000000..878b2f6 Binary files /dev/null and b/dataset2/a27.jpg differ diff --git a/dataset2/a28.jpg b/dataset2/a28.jpg new file mode 100644 index 0000000..ad5406f Binary files /dev/null and b/dataset2/a28.jpg differ diff --git a/dataset2/a29.jpg b/dataset2/a29.jpg new file mode 100644 index 0000000..0713376 Binary files /dev/null and b/dataset2/a29.jpg differ diff --git a/dataset2/a3.jpg b/dataset2/a3.jpg new file mode 100644 index 0000000..5c828ba Binary files /dev/null and b/dataset2/a3.jpg differ diff --git a/dataset2/a30.jpg b/dataset2/a30.jpg new file mode 100644 index 0000000..5f10d7b Binary files /dev/null and b/dataset2/a30.jpg differ diff --git a/dataset2/a4.jpg b/dataset2/a4.jpg new file mode 100644 index 0000000..f8253c7 Binary files /dev/null and b/dataset2/a4.jpg differ diff --git a/dataset2/a5.jpg b/dataset2/a5.jpg new file mode 100644 index 0000000..68e55ec Binary files /dev/null and b/dataset2/a5.jpg differ diff --git a/dataset2/a6.jpg b/dataset2/a6.jpg new file mode 100644 index 0000000..14d250e Binary files /dev/null and b/dataset2/a6.jpg differ diff --git a/dataset2/a7.jpg b/dataset2/a7.jpg new file mode 100644 index 0000000..4def35d Binary files /dev/null and b/dataset2/a7.jpg differ diff --git a/dataset2/a8.jpg b/dataset2/a8.jpg new file mode 100644 index 0000000..d2e8124 Binary files /dev/null and b/dataset2/a8.jpg differ diff --git a/dataset2/a9.jpg b/dataset2/a9.jpg new file mode 100644 index 0000000..ac73172 Binary files /dev/null and b/dataset2/a9.jpg differ diff --git a/gotcha_demo.mp4 b/gotcha_demo.mp4 new file mode 100644 index 0000000..6fb92d1 Binary files /dev/null and b/gotcha_demo.mp4 differ diff --git a/gotcha_v1 b/gotcha_v1 new file mode 100755 index 0000000..0d7a672 --- /dev/null +++ b/gotcha_v1 @@ -0,0 +1,143 @@ +#!/bin/sh -e +# +# Breaks simple captchas. +# +# deps: gocr, imagemagick, multicrop, textcleaner +# + +# -*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* +NUM_CHARS=4 +CHARSET="[0-9A-z]" + +# lower number = more dense to search = more rigorous detection +DENSITY=3 + +# 0-100, higher numbers will force strict matches +CERTAINTY=0 + +# 16: not dot divide overlapping chars +# 32: do not context correct +MODE='-m 16 -m 32' + +IMAGE_PROG=mpvimg +# -*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* + +usage() { + >&2 printf "Usage: %s [-d] [-v] [file]\n" "${0##*/}" + exit 1 +} + +init() { + random() { r=$(($(date +%s) / $$ - $$)) ; echo ${r#-} ; } + dir=/tmp/$(random) # dir to store tmp files + tmp=$dir/$(random) # image created + unset -f random + + clean + mkdir -p "$dir" +} + +clean() { + rm -r "${dir:-?}" 2>/dev/null ||: +} + +check() { + if [ ! -f "$1" ] ; then + >&2 echo "No such image: $1" + exit 1 + fi +} + +jpg2png() { + case $(file "$1") in + *jpg*|*JPG*|*jpeg*|*JPEG*) + convert "$1" "${1%.*}.png" + rm "$1" + image=${image%.*}.png + ;; + esac +} + +main() { + image=$1 + check "$image" + + init + # trap 'clean' INT TERM EXIT + + # convert to png if necessary + jpg2png "image" + + # clean up text and $IM black on white background + # text will be evenly spaced but not aligned + convert \ + -quality 100 \ + -shave 1x1 \ + -resize 500%x500% \ + -threshold 50% \ + -deskew 50 \ + -trim \ + -bordercolor white \ + -border 20 \ + "$image" "$tmp" + + [ "$DEBUG" = true ] && $IMAGE_PROG "$tmp" & + + + # IMPORTANT: multicrop reads from top-to-bottom, while + # we need to read left-to-right as we are reading text. + # + # Without rotation the resulting letters are mixed at random. + convert -rotate 90 "$tmp" "$tmp" + multicrop -u 1 -f 20 -g "${DENSITY:-5}" \ + -b white "$tmp" "$dir/multicut.png" >/dev/null + + # expand borders a little to help with ocr + for i in "$dir"/multicut-*.png ; do + { + # and rotate 270 to put us back where we were + convert -rotate 270 "$i" "$i" + + textcleaner -g -e stretch -f 25 -o 10 -s 1 "$i" "$i" + + convert \ + -quality 100 \ + -monochrome \ + -trim \ + -bordercolor white \ + -border 100 \ + "$i" "$i" + } & + done + wait + + if [ "$DEBUG" = true ] ; then + for i in "$dir/multicut-"*.png ; do + $IMAGE_PROG "$i" & + done + fi + + montage "$dir/multicut-*.png" -tile "${NUM_CHARS:-4}"x1 "$dir/out.png" + + if [ "$DEBUG" = true ] || [ "$DISPLAY_RESULT" = true ] ; then + $IMAGE_PROG "$dir/out.png" & + fi + + # shellcheck disable=2086 + gocr -u '?' $MODE \ + -a ${CERTAINTY:-0} \ + -c ${CHARSET:-'[0-9A-z]'} \ + "$dir/out.png" 2>/dev/null | \ + sed 's/ //g' | tr '[:lower:]' '[:upper:]' +} + +while [ "$1" ] ; do + case $1 in + -h) usage ;; + -v) DEBUG=true ; shift ;; + -d) DISPLAY_RESULT=true ; shift ;; + *) break + esac +done + +main "$@" diff --git a/gotcha_v2 b/gotcha_v2 new file mode 100755 index 0000000..8ae9662 --- /dev/null +++ b/gotcha_v2 @@ -0,0 +1,110 @@ +#!/bin/sh +# +# Breaks simple captchas. +# +# deps: gocr, imagemagick, sed +# + +# -*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* +# CHARSET="[0-9A-z]" +#### dataset2 only has numbers +CHARSET="[0-9]" + +# 0-100, higher numbers will force strict matches +CERTAINTY=0 + +# 16: not dot divide overlapping chars +# 32: do not context correct +MODE='-m 16 -m 32' + +IMAGE_PROG=mpvimg +# -*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* + +usage() { + >&2 printf "Usage: %s [-d] [-v] [file]\n" "${0##*/}" + exit 1 +} + +init() { + random() { r=$(($(date +%s) / $$ - $$)) ; echo ${r#-} ; } + dir=/tmp/$(random) # dir to store tmp files + tmp=$dir/$(random) # image created + unset -f random + + clean + mkdir -p "$dir" +} + +clean() { + rm -r "${dir:-?}" 2>/dev/null ||: +} + +check() { + if [ ! -f "$1" ] ; then + >&2 echo "No such image: $1" + exit 1 + fi +} + +jpg2png() { + case $(file "$1") in + *jpg*|*JPG*|*jpeg*|*JPEG*) + convert "$1" "${1%.*}.png" + # rm "$1" + image=${image%.*}.png + ;; + esac +} + +main() { + image=$1 + check "$image" + + init + # trap 'clean' INT TERM EXIT + + # convert to png if necessary + jpg2png "$image" + + # trim + convert -trim "$image" "$image" + + # clean up text and display black on white background + # text will be evenly spaced but not aligned + convert \ + -quality 100 \ + -shave 1x1 \ + -resize 500%x500% \ + -threshold 50% \ + -deskew 50 \ + -trim \ + -bordercolor white \ + -border 30 \ + "$image" "$tmp" + + [ "$DEBUG" = true ] && $IMAGE_PROG "$tmp" & + + mv -f "$tmp" "$dir"/out.png + + if [ "$DEBUG" = true ] || [ "$DISPLAY_RESULT" = true ] ; then + $IMAGE_PROG "$dir/out.png" & + fi + + # shellcheck disable=2086 + gocr -u '?' $MODE \ + -a ${CERTAINTY:-0} \ + -c ${CHARSET:-'[0-9A-z]'} \ + "$dir/out.png" 2>/dev/null | \ + sed 's/ //g' | sed 's/\?/9/g' # very commonly '9' is '?' +} + +while [ "$1" ] ; do + case $1 in + -h) usage ;; + -v) DEBUG=true ; shift ;; + -d) DISPLAY_RESULT=true ; shift ;; + *) break + esac +done + +main "$@" diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..7d1e8d2 --- /dev/null +++ b/test.sh @@ -0,0 +1,23 @@ +#!/bin/sh -e + +[ -d dataset ] || exit 1 + +correct=0 +count=0 +total=$(printf '%s\n' dataset/* | wc -l) + +for i in dataset/*.png ; do + res=$(bash ./gotcha "$i") + i=${i#dataset/} + if [ "$res" = "${i%.png}" ] ; then + correct=$((correct + 1)) + status=SUCCESS + else + status=FAIL + fi + printf '%s\n' "$status: [count: $count | correct: $correct/$total] - ANS: ${i%.png} RES: $res" + count=$((count + 1)) +done + +echo +echo 'Done!'