#!/usr/bin/env sh dependencies="sed test cp libreoffice expr cut" assert_tools () { while test $# -gt 0; do which $1 >/dev/null 2>/dev/null || { echo "tool missing: "$1 exit 2 } shift done } assert_tools ${dependencies} testcmp () { # usage: testcmp "cmpresult" "cmdfunctionname" [ "argument1", "argument2", […] ] if test "$#" -eq 0 ; then echo "arguments missing."; exit 1 fi # whitelisted functions case $2 in # 2 arg functions chklength|stringcrop) expect="$1"; shift fn="$1"; shift while test "$#" -ge "2"; do result=$("$fn" "$1" "$2") if test "$result" = "$expect" ; then echo "passed $fn for: $1 $2." else echo "failed $fn for: $1 $2." fi shift 2 done ;; # 1 arg functions iso3countrycode) expect="$1"; shift fn="$1"; shift for a in "$@"; do result=$("$fn" "$a") if test "$result" = "$expect" ; then echo "passed $fn for: $a." else echo "failed $fn for: $a." fi done ;; *) echo "no test performed, function or command not allowd." esac } chklength () { test "$#" -eq "0" && { echo "-1"; } expect="$1"; shift result=$(expr length "$1") test "$expect" -eq "$result" && { echo "true"; } || { echo "$result"; } } testchklength () { testcmp "true" "chklength" "3" "123" testcmp "3" "chklength" "2" "1-2" } testchklength stringcrop () { echo "$1" | cut -c1-"$2" } teststringcrop () { testcmp "string" "stringcrop" "string too long" "6" } teststringcrop # convert country to countrycode as in ISO3 iso3countrycode () { if test "$#" -eq 0 ; then echo "argument missing. ($0)" else case "$1" in Belgien) echo "BEL";; Deutschland) echo "DEU";; England) echo "GBR";; Luxemburg|Luxembourg) echo "LUX";; Niederlande) echo "NLD";; Österreich) echo "AUT";; Schweiz) echo "CHE";; Ungarn) echo "HUN";; *) echo "unknown country: $1";exit 1;; esac fi } testiso3 () { # covering data in https://doku.ccc.de/index.php?title=Attribut:Chaostreff-Country&limit=500&offset=0 testcmp "BEL" "iso3countrycode" "Belgien" testcmp "DEU" "iso3countrycode" "Deutschland" testcmp "GBR" "iso3countrycode" "England" testcmp "LUX" "iso3countrycode" "Luxembourg" "Luxemburg" testcmp "NLD" "iso3countrycode" "Niederlande" testcmp "AUT" "iso3countrycode" "Österreich" testcmp "HUN" "iso3countrycode" "Ungarn" testcmp "CHE" "iso3countrycode" "Schweiz" testcmp "" "iso3countrycode" "Simbabwe" # should fail } testiso3 exit 0; # target filename tn="dhl-cp1252.csv" # todo: download file (its UTF-8 encoded) fn="dhl.csv" # backup cp -f "${fn}" "${fn}.bak" # replace special spaces and hyphens sed -i -e 's/[\u00A0\u202F[:space:]]+/ /g' -e 's/[\u2010\u2011\u2012\u2013\u002D]+/-/g' -e 's/[\u00AD\uFEFF]+//g' "$fn" # convert encoding from UTF-8 to CP1252 "Windows" # another possibility for conversion? seems not to be relyable: iconv -o "$of" -f UTF-8 -t CP1252 "$if" convcsv () { if test $# -eq 0 ; then echo "argument missing."; exit 1; fi if test -e $1 ; then echo "file already exists."; exit 2; fi # doc for headless conversion options https://wiki.openoffice.org/wiki/Documentation/DevGuide/Spreadsheets/Filter_Options if="${1}" of=$(echo "${if}"|rev|cut -d. -f2-|rev)".ods" libreoffice --headless --convert-to ods --infilter=CSV:59,34,76,1 "${if}" && { mv "${if}" "${if}.bak" libreoffice --headless --infilter=CSV:59,34,76,1 --convert-to csv:"Text - txt - csv (StarCalc)":"44,34,1,0,1,,0" "${tf}" && { rm "${tf}" "${if}.bak" } } } convcsv "$fn"