diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b319d8f --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +**/data diff --git a/import/api/wpforms-crawler/.gitignore b/import/api/wpforms-crawler/.gitignore new file mode 100644 index 0000000..7997ddc --- /dev/null +++ b/import/api/wpforms-crawler/.gitignore @@ -0,0 +1,2 @@ +config.sh +data* diff --git a/import/api/wpforms-crawler/README.md b/import/api/wpforms-crawler/README.md new file mode 100644 index 0000000..af7d405 --- /dev/null +++ b/import/api/wpforms-crawler/README.md @@ -0,0 +1,3 @@ +[wpforms](https://wpforms.com/) uses an counter for `ENTRY_ID`s and seems to be vulnerable against CSRF :( + +Once we have obtained a cookie, crawling is trivial… diff --git a/import/api/wpforms-crawler/config.sh b/import/api/wpforms-crawler/config.sh new file mode 100644 index 0000000..792ad9f --- /dev/null +++ b/import/api/wpforms-crawler/config.sh @@ -0,0 +1,27 @@ +## common + +DATA_DIR="./data" + +## download + +START=500 #57 +END=500 #1000 + +WP_ADMIN_URL='https://example.com/wp-admin/admin.php' +FORM_ID=16993 +NONCE='caffeeeeee' +AUTHORIZATION_HEADER='authorization: Basic Base64EncodedDataaaaaaaaaa==' +COOKIE_HEADER='cookie: wordpress_sec_thisCopiedFromTheBrower; wordpress_logged_in_; some_other_cookies' + +## HEADERS_THAT_SEEM_TO_BE_NOT_REQUIRED +#-H 'authority: example.com' \ +#-H 'upgrade-insecure-requests: 1' \ +#-H 'cache-control: max-age=0' \ + +## merge + +OUT="/tmp/example.csv" + +## setup + +[ -d $DATA_DIR ] || mkdir $DATA_DIR diff --git a/import/api/wpforms-crawler/download.sh b/import/api/wpforms-crawler/download.sh new file mode 100755 index 0000000..5d377f6 --- /dev/null +++ b/import/api/wpforms-crawler/download.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +source ./config.sh + +function download() { + ENTRY=$1 + OUT="${DATA_DIR}/${ENTRY}.csv" + echo $ENTRY + curl "${WP_ADMIN_URL}?page=wpforms-tools&view=export&action=wpforms_tools_single_entry_export_download&form=${FORM_ID}&entry_id=${ENTRY}&export_options%5B0%5D=csv&nonce=${NONCE}" \ + -H "$COOKIE_HEADER" \ + -H "$AUTHORIZATION_HEADER" \ + --compressed | tee $OUT +} + + +for i in $(seq $START $END); do + download $i || exit +done diff --git a/import/api/wpforms-crawler/merge.sh b/import/api/wpforms-crawler/merge.sh new file mode 100755 index 0000000..0e5d480 --- /dev/null +++ b/import/api/wpforms-crawler/merge.sh @@ -0,0 +1,8 @@ +## quick and dirty! TODO: replace grep -v + +source ./config.sh + +(cd $DATA_DIR; head -n1 $(ls | head -n1)) > $OUT +cat $DATA_DIR/* | grep -v 'Name,Land,Straße,Hausnummer' >> $OUT + +wc -l $OUT