#!/usr/bin/env sh dependencies="sed test cp libreoffice expr" assert_tools () { while test $# -gt 0; do which $1 >/dev/null 2>/dev/null || { echo "tool missing: "$1 exit 2 } shift done } assert_tools ${dependencies} testcmp () { # usage: testcmp "cmpresult" "cmdfunctionname" [ "argument1", "argument2", […] ] if test "$#" -eq 0 ; then echo "arguments missing."; exit 1 fi case $2 in #whitelisted fn chklength) expect="$1"; shift fn="$1"; shift result=$("$fn" "$1" "$2") if test "$result" = "$expect" ; then echo "$fn passed for: $1 $2." else echo "$fn failed for: $1 $2." fi ;; iso3countrycode) expect="$1"; shift fn="$1"; shift for a in "$@"; do result=$("$fn" "$a") if test "$result" = "$expect" ; then echo "$fn passed for: $a." else echo "$fn failed for: $a." fi done ;; *) echo "no test performed, function or command not allowd." esac } chklength () { test "$#" -eq "0" && { echo "-1"; } expect="$1"; shift result=$(expr length "$1") test "$expect" -eq "$result" && { echo "true"; } || { echo "$result"; } } testchklength () { chklength "3" "123" testcmp "true" "chklength" "3" "123" testcmp "3" "chklength" "2" "1-2" } # target filename tn="dhl-cp1252.csv" # todo: download file (its UTF-8 encoded) fn="dhl.csv" # backup cp -f "${fn}" "${fn}.bak" # replace special spaces and hyphens sed -i -e 's/[\u00A0\u202F[:space:]]+/ /g' -e 's/[\u2010\u2011\u2012\u2013\u002D]+/-/g' -e 's/[\u00AD\uFEFF]+//g' "$fn" # todo: validate length for entries # todo: convert country to countrycode as in ISO3 iso3countrycode () { if test "$#" -eq 0 ; then echo "argument missing. ($0)" else case "$1" in Belgien) echo "BEL";; Deutschland) echo "DEU";; England) echo "GBR";; Luxemburg|Luxembourg) echo "LUX";; Niederlande) echo "NLD";; Österreich) echo "AUT";; Schweiz) echo "CHE";; Ungarn) echo "HUN";; *) echo "unknown country: $1";exit 1;; esac fi } testiso3 () { # covering data in https://doku.ccc.de/index.php?title=Attribut:Chaostreff-Country&limit=500&offset=0 testcmp "BEL" "iso3countrycode" "Belgien" testcmp "DEU" "iso3countrycode" "Deutschland" testcmp "GBR" "iso3countrycode" "England" testcmp "LUX" "iso3countrycode" "Luxembourg" "Luxemburg" testcmp "NLD" "iso3countrycode" "Niederlande" testcmp "AUT" "iso3countrycode" "Österreich" testcmp "HUN" "iso3countrycode" "Ungarn" testcmp "CHE" "iso3countrycode" "Schweiz" testcmp "" "iso3countrycode" "Simbabwe" # should fail } #~ testiso3 # convert encoding from UTF-8 to CP1252 "Windows" # another possibility for conversion? seems not to be relyable: iconv -o "$of" -f UTF-8 -t CP1252 "$if" convcsv () { if test $# -eq 0 ; then echo "argument missing."; exit 1; fi if test -e $1 ; then echo "file already exists."; exit 2; fi # doc for headless conversion options https://wiki.openoffice.org/wiki/Documentation/DevGuide/Spreadsheets/Filter_Options if="${1}" of=$(echo "${if}"|rev|cut -d. -f2-|rev)".ods" libreoffice --headless --convert-to ods --infilter=CSV:59,34,76,1 "${if}" && { mv "${if}" "${if}.bak" libreoffice --headless --infilter=CSV:59,34,76,1 --convert-to csv:"Text - txt - csv (StarCalc)":"44,34,1,0,1,,0" "${tf}" && { rm "${tf}" "${if}.bak" } } } convcsv "$fn"