#!/usr/bin/env sh dependencies="sed test cp libreoffice expr cut file" # target filename tn1="dhl-cp1252-n.csv" tn2="dhl-cp1252-rn.csv" assert_tools () { while test $# -gt 0; do command -v "$1" >/dev/null 2>/dev/null || { echo "tool missing: "$1 exit 2 } shift done } assert_tools ${dependencies} testcmp () { # usage: testcmp "cmpresult" "cmdfunctionname" [ "argument1", "argument2", […] ] if test "$#" -eq 0 ; then echo "arguments missing."; exit 1 fi # whitelisted functions case $2 in # 2 arg functions where file exists convnl) expect="$1" fn="$2" shift 2 if test -e "$expect" ; then echo "not testing as result $expect already exists." else result=$("$fn" "$1" "$2") if test -e $expect ; then echo "passed $fn for: $1 $2." else echo "failed $fn for: $1 $2." fi fi ;; # 2 arg functions chklength|stringcrop) expect="$1" fn="$2" shift 2 while test "$#" -ge "2"; do result=$("$fn" "$1" "$2") if test "$result" = "$expect" ; then echo "passed $fn for: $1 $2." else echo "failed $fn for: $1 $2." fi shift 2 done ;; # 1 arg functions iso3countrycode) expect="$1" fn="$2" shift 2 for a in "$@"; do result=$("$fn" "$a") if test "$result" = "$expect" ; then echo "passed $fn for: $a." else echo "failed $fn for: $a." fi done ;; *) echo "no test performed, function or command not allowd." esac } chklength () { test "$#" -eq "0" && { echo "-1"; } expect="$1"; shift result=$(expr length "$1") test "$expect" -eq "$result" && { echo "true"; } || { echo "$result"; } } testchklength () { testcmp "true" "chklength" "3" "123" testcmp "3" "chklength" "2" "1-2" } stringcrop () { echo "$1" | cut -c1-"$2" } teststringcrop () { testcmp "string" "stringcrop" "string too long" "6" } # convert country to countrycode as in ISO3, see https://unstats.un.org/unsd/tradekb/Knowledgebase/Country-Code iso3countrycode () { if test "$#" -eq 0 ; then echo "argument missing. ($0)" else case "$1" in Belgien|Belgium|Belgique) echo "BEL";; Deutschland|Germany|Allemagne) echo "DEU";; England|Great\ Britain|United\ Kingdom|Scotland|Wales|North\ Ireland) echo "GBR";; Luxemburg|Luxembourg) echo "LUX";; Niederlande|Netherlands) echo "NLD";; Österreich|Austria|Autriche) echo "AUT";; Schweiz|Switzerland|Suisse) echo "CHE";; Ungarn|Hungaria) echo "HUN";; # Tschechien|Czechia|Tchéquie|Česko) echo "CZE";; Polen|Poland|Pologne|Polska) echo "POL";; Weißrussland|Belarus|Biélorussie|Беларусь) echo "BLR";; Russland|Russia|Russie|Россия) echo "RUS";; Slowakei|Slovakia|Slovaquie|Slovensko) echo "SVK";; Italien|Italy|Italie|Italia) echo "ITA";; Italien|Italy|Italie|Italia) echo "ITA";; Norwegen|Norway|Norvège|Norge|Noreg|Norga) echo "NOR";; Schweden|Sweden|Suède|Sverige) echo "SWE";; Dänemarl|Denmark|Danemark|Danmark) echo "DNK";; Island|Iceland|Islande|Ísland) echo "ISL";; Finnland|Finland|Finlande|Suomi|Finland) echo "FIN";; Spanien|Spain|Espagne|España) echo "ESP";; Estland|Estonia|Estonie|Eesti) echo "EST";; Litauen|Lithuania|Lituanie|Lietuva) echo "LTU";; Lettland|Latvia|Lettonie|Latvija) echo "LVA";; Irland|Ireland|Irlande|Éire) echo "IRL";; Indien|India|Inde|Bharat) echo "IND";; China|China|Chine|中国|Zhōngguó) echo "CHN";; Japan|Japan|Japon|Nippon|Nihon) echo "JPN";; *) echo "unknown country: $1";exit 1;; esac fi } testiso3 () { # covering data in https://doku.ccc.de/index.php?title=Attribut:Chaostreff-Country&limit=500&offset=0 testcmp "BEL" "iso3countrycode" "Belgien" "Belgium" "Belgique" testcmp "DEU" "iso3countrycode" "Deutschland" "Germany" "Allemagne" testcmp "GBR" "iso3countrycode" "England" "Great Britain" "United Kingdom" "Scotland" "Wales" "North Ireland" testcmp "LUX" "iso3countrycode" "Luxembourg" "Luxemburg" testcmp "NLD" "iso3countrycode" "Niederlande" "Netherlands" testcmp "AUT" "iso3countrycode" "Österreich" "Austria" "Autriche" testcmp "HUN" "iso3countrycode" "Ungarn" "Hungaria" testcmp "CHE" "iso3countrycode" "Schweiz" "Switzerland" "Suisse" # and coving some more testcmp "CZE" "iso3countrycode" "Tschechien" "Czechia" "Tchéquie" "Česko" testcmp "POL" "iso3countrycode" "Polen" "Poland" "Pologne" "Polska" testcmp "BLR" "iso3countrycode" "Weißrussland" "Belarus" "Biélorussie" "Беларусь" testcmp "RUS" "iso3countrycode" "Russland" "Russia" "Russie" "Россия" testcmp "SVK" "iso3countrycode" "Slowakei" "Slovakia" "Slovaquie" "Slovensko" testcmp "ITA" "iso3countrycode" "Italien" "Italy" "Italie" "Italia" testcmp "NOR" "iso3countrycode" "Norwegen" "Norway" "Norvège" "Norge" "Noreg" "Norga" testcmp "SWE" "iso3countrycode" "Schweden" "Sweden" "Suède" "Sverige" testcmp "DNK" "iso3countrycode" "Dänemarl" "Denmark" "Danemark" "Danmark" testcmp "ISL" "iso3countrycode" "Island" "Iceland" "Islande" "Ísland" testcmp "ISR" "iso3countrycode" "Israel" "Israel" "Israël" testcmp "FIN" "iso3countrycode" "Finnland" "Finland" "Finlande" "Suomi" "Finland" testcmp "ESP" "iso3countrycode" "Spanien" "Spain" "Espagne" "España" testcmp "EST" "iso3countrycode" "Estland" "Estonia" "Estonie" "Eesti" testcmp "LTU" "iso3countrycode" "Litauen" "Lithuania" "Lituanie" "Lietuva" testcmp "LVA" "iso3countrycode" "Lettland" "Latvia" "Lettonie" "Latvija" testcmp "IRL" "iso3countrycode" "Irland" "Ireland" "Irlande" "Éire" testcmp "IND" "iso3countrycode" "Indien" "India" "Inde" "Bharat" testcmp "CHN" "iso3countrycode" "China" "China" "Chine" "中国" "Zhōngguó" testcmp "JPN" "iso3countrycode" "Japan" "Japan" "Japon" "Nippon" "Nihon" "日本国" "日本" testcmp "" "iso3countrycode" "Simbabwe" # should fail } # replace newline 0a with 0d 0a for dos version # test with: echo | sed 's/$'"/`echo \\\r`/" | hexdump convnl () { if="$1" of="$2" shift 2 expect="0a0d" # hexdump does reverse the byte order result=$(echo | sed 's/$'"/`echo \\\r`/" | hexdump |head -1|cut -d" " -f2) if test $expect = $result ; then sed -e 's/$'"/`echo \\\r`/" "$if" > "$of" fi } testconvnl () { testcmp "$tn2" "convnl" "$tn1" "$tn2" && rm "$tn2" } getauth () { # Zugangsdaten für das Mediawiki aus Konfiguratonsdatei bzw. ggf. user="" pass="" test -f ds.conf && { user=$(cat ds.conf|grep -i "^user="|cut -d= -f2) # pass=$(cat ds.conf|grep -i "^pass="|cut -d= -f2) # } || { command pass 2>/dev/null && { passfile="$(cat ds.conf|grep -i "^passfile="|cut -d= -f2)" # ccc/doku-htaccess passstore="$(cat ds.conf|grep -i "^passstore="|cut -d= -f2)" # ~/.password-store/ test -f ${passstore}${passfile} || { >&2 echo "path for password manager does not match: "${passfile}; exit 1; } user=$(pass ${passfile} |grep -i "^login "|rev|cut -d" " -f1|rev) pass=$(pass ${passfile} |head -1) } || { >&2 echo "tool missing: pass (unix password manager)"; exit 1; } } test "$user" = "" && { >&2 echo "no auth data found."; exit 1; } echo "${user}:${pass}" } runtests () { testchklength teststringcrop testiso3 testconvnl } runtests exit 0 # untested: # todo: download file (its UTF-8 encoded) fn="dhl.csv" # backup cp -f "$fn" "${fn}.bak" # replace special spaces and hyphens sed -i -e 's/[\u00A0\u202F[:space:]]+/ /g' -e 's/[\u2010\u2011\u2012\u2013\u002D]+/-/g' -e 's/[\u00AD\uFEFF]+//g' "$fn" # todo: add quote variants # todo: check for delimiters that may conflict during conversion # convert encoding from UTF-8 to CP1252 / WINDOWS-1252 # another possibility for conversion? seems not to be relyable: iconv -o "$of" -f UTF-8 -t WINDOWS-1252 "$if" convcsv () { if test $# -eq 0 ; then >&2 echo "argument missing."; exit 1; fi # doc for headless conversion options https://wiki.openoffice.org/wiki/Documentation/DevGuide/Spreadsheets/Filter_Options if="${1}" format=$(file ${if}|rev|cut -d':' -f1|rev|cut -d' ' -f2) if test "$format" != "UTF-8" ; then >&2 echo "source file encoding not matching 'UTF-8' as expected."; fi of=$(echo "${if}"|rev|cut -d. -f2-|rev)".ods" if test -e $of ; then >&2 echo "output file already exists."; exit 2; fi #~ iconv -o "$of" -f $format -t cp1252//TRANSLIT "$if" #~ iconv -o "$of" -f $format -t WINDOWS-1252 "$if" # to take care of more options in csv at the same time, convert to ods and back to csv libreoffice --headless --convert-to ods --infilter=CSV:59,34,76,1 "${if}" && { mv "${if}" "${if}.bak" libreoffice --headless --infilter=CSV:59,34,76,1 --convert-to csv:"Text - txt - csv (StarCalc)":"44,34,1,0,1,,0" "${tf}" && { rm "${tf}" "${if}.bak" } } format=$(file ${if}|cut -d' ' -f2) if test "$format" != "Non-ISO" ; then >&2 echo "output file encoding not matching the expected Non-ISO, its $format."; fi } convcsv "$fn"