This commit is contained in:
wvr
2023-05-14 16:10:50 -05:00
commit 7215a57053
87 changed files with 291 additions and 0 deletions

12
Makefile Normal file
View File

@@ -0,0 +1,12 @@
PREFIX = /usr/local
all: install
install:
install -D -m 0755 gotcha ${DESTDIR}${PREFIX}/bin/gotcha
uninstall:
rm ${DESTDIR}${PREFIX}/bin/gotcha
test:
sh ./test.sh

3
README.md Normal file
View File

@@ -0,0 +1,3 @@
# gotcha
playing around with captcha breaking

BIN
dataset1/2AKY.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

BIN
dataset1/2CJT.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
dataset1/2R4Z.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

BIN
dataset1/32HP.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

BIN
dataset1/34TE.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
dataset1/4HRE.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
dataset1/4RET.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
dataset1/542Y.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
dataset1/5HHL.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
dataset1/AM45.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
dataset1/C64J.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
dataset1/CMAW.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

BIN
dataset1/CWCX.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
dataset1/CYFV.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
dataset1/E56P.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

BIN
dataset1/EMM2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
dataset1/F2HH.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
dataset1/F46X.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

BIN
dataset1/FHLF.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

BIN
dataset1/FMTW.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

BIN
dataset1/FZRL.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

BIN
dataset1/HCE4.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

BIN
dataset1/HJFY.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

BIN
dataset1/HXCA.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

BIN
dataset1/J34E.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
dataset1/JPHL.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

BIN
dataset1/JXCF.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

BIN
dataset1/JYJC.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

BIN
dataset1/K6L2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

BIN
dataset1/LAYP.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
dataset1/LTJW.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
dataset1/LWYH.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
dataset1/MLTW.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

BIN
dataset1/PHEZ.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
dataset1/PLVV.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
dataset1/PRYW.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

BIN
dataset1/R4XV.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
dataset1/REEK.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
dataset1/RHRR.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
dataset1/RMZ3.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
dataset1/T66Y.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

BIN
dataset1/TEEC.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

BIN
dataset1/TXAE.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

BIN
dataset1/VJCM.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
dataset1/VP4X.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

BIN
dataset1/W5AE.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

BIN
dataset1/W5PP.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
dataset1/WYLE.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
dataset1/X6CR.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

BIN
dataset1/YWL6.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

BIN
dataset2/a1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

BIN
dataset2/a10.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a11.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a12.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a13.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a14.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a15.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a16.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a17.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a18.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a19.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a2.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

BIN
dataset2/a20.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a21.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a22.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a23.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a24.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a25.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a26.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a27.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a28.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a29.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a3.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a30.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a4.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a5.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a6.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a7.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a8.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
dataset2/a9.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
gotcha_demo.mp4 Normal file

Binary file not shown.

143
gotcha_v1 Executable file
View File

@@ -0,0 +1,143 @@
#!/bin/sh -e
#
# Breaks simple captchas.
#
# deps: gocr, imagemagick, multicrop, textcleaner
#
# -*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
NUM_CHARS=4
CHARSET="[0-9A-z]"
# lower number = more dense to search = more rigorous detection
DENSITY=3
# 0-100, higher numbers will force strict matches
CERTAINTY=0
# 16: not dot divide overlapping chars
# 32: do not context correct
MODE='-m 16 -m 32'
IMAGE_PROG=mpvimg
# -*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
usage() {
>&2 printf "Usage: %s [-d] [-v] [file]\n" "${0##*/}"
exit 1
}
init() {
random() { r=$(($(date +%s) / $$ - $$)) ; echo ${r#-} ; }
dir=/tmp/$(random) # dir to store tmp files
tmp=$dir/$(random) # image created
unset -f random
clean
mkdir -p "$dir"
}
clean() {
rm -r "${dir:-?}" 2>/dev/null ||:
}
check() {
if [ ! -f "$1" ] ; then
>&2 echo "No such image: $1"
exit 1
fi
}
jpg2png() {
case $(file "$1") in
*jpg*|*JPG*|*jpeg*|*JPEG*)
convert "$1" "${1%.*}.png"
rm "$1"
image=${image%.*}.png
;;
esac
}
main() {
image=$1
check "$image"
init
# trap 'clean' INT TERM EXIT
# convert to png if necessary
jpg2png "image"
# clean up text and $IM<Plug> black on white background
# text will be evenly spaced but not aligned
convert \
-quality 100 \
-shave 1x1 \
-resize 500%x500% \
-threshold 50% \
-deskew 50 \
-trim \
-bordercolor white \
-border 20 \
"$image" "$tmp"
[ "$DEBUG" = true ] && $IMAGE_PROG "$tmp" &
# IMPORTANT: multicrop reads from top-to-bottom, while
# we need to read left-to-right as we are reading text.
#
# Without rotation the resulting letters are mixed at random.
convert -rotate 90 "$tmp" "$tmp"
multicrop -u 1 -f 20 -g "${DENSITY:-5}" \
-b white "$tmp" "$dir/multicut.png" >/dev/null
# expand borders a little to help with ocr
for i in "$dir"/multicut-*.png ; do
{
# and rotate 270 to put us back where we were
convert -rotate 270 "$i" "$i"
textcleaner -g -e stretch -f 25 -o 10 -s 1 "$i" "$i"
convert \
-quality 100 \
-monochrome \
-trim \
-bordercolor white \
-border 100 \
"$i" "$i"
} &
done
wait
if [ "$DEBUG" = true ] ; then
for i in "$dir/multicut-"*.png ; do
$IMAGE_PROG "$i" &
done
fi
montage "$dir/multicut-*.png" -tile "${NUM_CHARS:-4}"x1 "$dir/out.png"
if [ "$DEBUG" = true ] || [ "$DISPLAY_RESULT" = true ] ; then
$IMAGE_PROG "$dir/out.png" &
fi
# shellcheck disable=2086
gocr -u '?' $MODE \
-a ${CERTAINTY:-0} \
-c ${CHARSET:-'[0-9A-z]'} \
"$dir/out.png" 2>/dev/null | \
sed 's/ //g' | tr '[:lower:]' '[:upper:]'
}
while [ "$1" ] ; do
case $1 in
-h) usage ;;
-v) DEBUG=true ; shift ;;
-d) DISPLAY_RESULT=true ; shift ;;
*) break
esac
done
main "$@"

110
gotcha_v2 Executable file
View File

@@ -0,0 +1,110 @@
#!/bin/sh
#
# Breaks simple captchas.
#
# deps: gocr, imagemagick, sed
#
# -*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
# CHARSET="[0-9A-z]"
#### dataset2 only has numbers
CHARSET="[0-9]"
# 0-100, higher numbers will force strict matches
CERTAINTY=0
# 16: not dot divide overlapping chars
# 32: do not context correct
MODE='-m 16 -m 32'
IMAGE_PROG=mpvimg
# -*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
usage() {
>&2 printf "Usage: %s [-d] [-v] [file]\n" "${0##*/}"
exit 1
}
init() {
random() { r=$(($(date +%s) / $$ - $$)) ; echo ${r#-} ; }
dir=/tmp/$(random) # dir to store tmp files
tmp=$dir/$(random) # image created
unset -f random
clean
mkdir -p "$dir"
}
clean() {
rm -r "${dir:-?}" 2>/dev/null ||:
}
check() {
if [ ! -f "$1" ] ; then
>&2 echo "No such image: $1"
exit 1
fi
}
jpg2png() {
case $(file "$1") in
*jpg*|*JPG*|*jpeg*|*JPEG*)
convert "$1" "${1%.*}.png"
# rm "$1"
image=${image%.*}.png
;;
esac
}
main() {
image=$1
check "$image"
init
# trap 'clean' INT TERM EXIT
# convert to png if necessary
jpg2png "$image"
# trim
convert -trim "$image" "$image"
# clean up text and display black on white background
# text will be evenly spaced but not aligned
convert \
-quality 100 \
-shave 1x1 \
-resize 500%x500% \
-threshold 50% \
-deskew 50 \
-trim \
-bordercolor white \
-border 30 \
"$image" "$tmp"
[ "$DEBUG" = true ] && $IMAGE_PROG "$tmp" &
mv -f "$tmp" "$dir"/out.png
if [ "$DEBUG" = true ] || [ "$DISPLAY_RESULT" = true ] ; then
$IMAGE_PROG "$dir/out.png" &
fi
# shellcheck disable=2086
gocr -u '?' $MODE \
-a ${CERTAINTY:-0} \
-c ${CHARSET:-'[0-9A-z]'} \
"$dir/out.png" 2>/dev/null | \
sed 's/ //g' | sed 's/\?/9/g' # very commonly '9' is '?'
}
while [ "$1" ] ; do
case $1 in
-h) usage ;;
-v) DEBUG=true ; shift ;;
-d) DISPLAY_RESULT=true ; shift ;;
*) break
esac
done
main "$@"

23
test.sh Executable file
View File

@@ -0,0 +1,23 @@
#!/bin/sh -e
[ -d dataset ] || exit 1
correct=0
count=0
total=$(printf '%s\n' dataset/* | wc -l)
for i in dataset/*.png ; do
res=$(bash ./gotcha "$i")
i=${i#dataset/}
if [ "$res" = "${i%.png}" ] ; then
correct=$((correct + 1))
status=SUCCESS
else
status=FAIL
fi
printf '%s\n' "$status: [count: $count | correct: $correct/$total] - ANS: ${i%.png} RES: $res"
count=$((count + 1))
done
echo
echo 'Done!'