ds-dhl/dhl.sh

124 lines
3.2 KiB
Bash
Executable File

#!/usr/bin/env sh
dependencies="sed test cp libreoffice expr"
assert_tools () {
while test $# -gt 0; do
which $1 >/dev/null 2>/dev/null || {
echo "tool missing: "$1
exit 2
}
shift
done
}
assert_tools ${dependencies}
testcmp () {
# usage: testcmp "cmpresult" "cmdfunctionname" [ "argument1", "argument2", […] ]
if test "$#" -eq 0 ; then
echo "arguments missing."; exit 1
fi
case $2 in
#whitelisted fn
chklength)
expect="$1"; shift
fn="$1"; shift
result=$("$fn" "$1" "$2")
if test "$result" = "$expect" ; then
echo "$fn passed for: $1 $2."
else
echo "$fn failed for: $1 $2."
fi
;;
iso3countrycode)
expect="$1"; shift
fn="$1"; shift
for a in "$@"; do
result=$("$fn" "$a")
if test "$result" = "$expect" ; then
echo "$fn passed for: $a."
else
echo "$fn failed for: $a."
fi
done
;;
*)
echo "no test performed, function or command not allowd."
esac
}
chklength () {
test "$#" -eq "0" && { echo "-1"; }
expect="$1"; shift
result=$(expr length "$1")
test "$expect" -eq "$result" && { echo "true"; } || { echo "$result"; }
}
testchklength () {
chklength "3" "123"
testcmp "true" "chklength" "3" "123"
testcmp "3" "chklength" "2" "1-2"
}
# target filename
tn="dhl-cp1252.csv"
# todo: download file (its UTF-8 encoded)
fn="dhl.csv"
# backup
cp -f "${fn}" "${fn}.bak"
# replace special spaces and hyphens
sed -i -e 's/[\u00A0\u202F[:space:]]+/ /g' -e 's/[\u2010\u2011\u2012\u2013\u002D]+/-/g' -e 's/[\u00AD\uFEFF]+//g' "$fn"
# todo: validate length for entries
# todo: convert country to countrycode as in ISO3
iso3countrycode () {
if test "$#" -eq 0 ; then
echo "argument missing. ($0)"
else
case "$1" in
Belgien) echo "BEL";;
Deutschland) echo "DEU";;
England) echo "GBR";;
Luxemburg|Luxembourg) echo "LUX";;
Niederlande) echo "NLD";;
Österreich) echo "AUT";;
Schweiz) echo "CHE";;
Ungarn) echo "HUN";;
*) echo "unknown country: $1";exit 1;;
esac
fi
}
testiso3 () {
# covering data in https://doku.ccc.de/index.php?title=Attribut:Chaostreff-Country&limit=500&offset=0
testcmp "BEL" "iso3countrycode" "Belgien"
testcmp "DEU" "iso3countrycode" "Deutschland"
testcmp "GBR" "iso3countrycode" "England"
testcmp "LUX" "iso3countrycode" "Luxembourg" "Luxemburg"
testcmp "NLD" "iso3countrycode" "Niederlande"
testcmp "AUT" "iso3countrycode" "Österreich"
testcmp "HUN" "iso3countrycode" "Ungarn"
testcmp "CHE" "iso3countrycode" "Schweiz"
testcmp "" "iso3countrycode" "Simbabwe" # should fail
}
#~ testiso3
# convert encoding from UTF-8 to CP1252 "Windows"
# another possibility for conversion? seems not to be relyable: iconv -o "$of" -f UTF-8 -t CP1252 "$if"
convcsv () {
if test $# -eq 0 ; then echo "argument missing."; exit 1; fi
if test -e $1 ; then echo "file already exists."; exit 2; fi
# doc for headless conversion options https://wiki.openoffice.org/wiki/Documentation/DevGuide/Spreadsheets/Filter_Options
if="${1}"
of=$(echo "${if}"|rev|cut -d. -f2-|rev)".ods"
libreoffice --headless --convert-to ods --infilter=CSV:59,34,76,1 "${if}" && {
mv "${if}" "${if}.bak"
libreoffice --headless --infilter=CSV:59,34,76,1 --convert-to csv:"Text - txt - csv (StarCalc)":"44,34,1,0,1,,0" "${tf}" && {
rm "${tf}" "${if}.bak"
}
}
}
convcsv "$fn"