ds-dhl/dhl.sh

179 lines
4.2 KiB
Bash
Executable File

#!/usr/bin/env sh
dependencies="sed test cp libreoffice expr cut"
# target filename
tn1="dhl-cp1252-n.csv"
tn2="dhl-cp1252-rn.csv"
assert_tools () {
while test $# -gt 0; do
which $1 >/dev/null 2>/dev/null || {
echo "tool missing: "$1
exit 2
}
shift
done
}
assert_tools ${dependencies}
testcmp () {
# usage: testcmp "cmpresult" "cmdfunctionname" [ "argument1", "argument2", […] ]
if test "$#" -eq 0 ; then
echo "arguments missing."; exit 1
fi
# whitelisted functions
case $2 in
# 2 arg functions where file exists
convnl)
expect="$1"
fn="$2"
shift 2
if test -e "$expect" ; then
echo "not testing as result $expect already exists."
else
result=$("$fn" "$1" "$2")
if test -e $expect ; then
echo "passed $fn for: $1 $2."
else
echo "failed $fn for: $1 $2."
fi
fi
;;
# 2 arg functions
chklength|stringcrop)
expect="$1"
fn="$2"
shift 2
while test "$#" -ge "2"; do
result=$("$fn" "$1" "$2")
if test "$result" = "$expect" ; then
echo "passed $fn for: $1 $2."
else
echo "failed $fn for: $1 $2."
fi
shift 2
done
;;
# 1 arg functions
iso3countrycode)
expect="$1"
fn="$2"
shift 2
for a in "$@"; do
result=$("$fn" "$a")
if test "$result" = "$expect" ; then
echo "passed $fn for: $a."
else
echo "failed $fn for: $a."
fi
done
;;
*)
echo "no test performed, function or command not allowd."
esac
}
chklength () {
test "$#" -eq "0" && { echo "-1"; }
expect="$1"; shift
result=$(expr length "$1")
test "$expect" -eq "$result" && { echo "true"; } || { echo "$result"; }
}
testchklength () {
testcmp "true" "chklength" "3" "123"
testcmp "3" "chklength" "2" "1-2"
}
stringcrop () {
echo "$1" | cut -c1-"$2"
}
teststringcrop () {
testcmp "string" "stringcrop" "string too long" "6"
}
# convert country to countrycode as in ISO3
iso3countrycode () {
if test "$#" -eq 0 ; then
echo "argument missing. ($0)"
else
case "$1" in
Belgien) echo "BEL";;
Deutschland) echo "DEU";;
England) echo "GBR";;
Luxemburg|Luxembourg) echo "LUX";;
Niederlande) echo "NLD";;
Österreich) echo "AUT";;
Schweiz) echo "CHE";;
Ungarn) echo "HUN";;
*) echo "unknown country: $1";exit 1;;
esac
fi
}
testiso3 () {
# covering data in https://doku.ccc.de/index.php?title=Attribut:Chaostreff-Country&limit=500&offset=0
testcmp "BEL" "iso3countrycode" "Belgien"
testcmp "DEU" "iso3countrycode" "Deutschland"
testcmp "GBR" "iso3countrycode" "England"
testcmp "LUX" "iso3countrycode" "Luxembourg" "Luxemburg"
testcmp "NLD" "iso3countrycode" "Niederlande"
testcmp "AUT" "iso3countrycode" "Österreich"
testcmp "HUN" "iso3countrycode" "Ungarn"
testcmp "CHE" "iso3countrycode" "Schweiz"
testcmp "" "iso3countrycode" "Simbabwe" # should fail
}
# replace newline 0a with 0d 0a for dos version
# test with: echo | sed 's/$'"/`echo \\\r`/" | hexdump
convnl () {
if="$1"
of="$2"
shift 2
expect="0a0d" # hexdump does reverse the byte order
result=$(echo | sed 's/$'"/`echo \\\r`/" | hexdump |head -1|cut -d" " -f2)
if test $expect = $result ; then
sed -e 's/$'"/`echo \\\r`/" "$if" > "$of"
fi
}
testconvnl () {
testcmp "$tn2" "convnl" "$tn1" "$tn2" && rm "$tn2"
}
runtests () {
testchklength
teststringcrop
testiso3
testconvnl
}
runtests
# todo: download file (its UTF-8 encoded)
fn="dhl.csv"
# backup
cp -f "$fn" "${fn}.bak"
exit 0;
# replace special spaces and hyphens
sed -i -e 's/[\u00A0\u202F[:space:]]+/ /g' -e 's/[\u2010\u2011\u2012\u2013\u002D]+/-/g' -e 's/[\u00AD\uFEFF]+//g' "$fn"
# convert encoding from UTF-8 to CP1252 "Windows"
# another possibility for conversion? seems not to be relyable: iconv -o "$of" -f UTF-8 -t CP1252 "$if"
convcsv () {
if test $# -eq 0 ; then echo "argument missing."; exit 1; fi
if test -e $1 ; then echo "file already exists."; exit 2; fi
# doc for headless conversion options https://wiki.openoffice.org/wiki/Documentation/DevGuide/Spreadsheets/Filter_Options
if="${1}"
of=$(echo "${if}"|rev|cut -d. -f2-|rev)".ods"
libreoffice --headless --convert-to ods --infilter=CSV:59,34,76,1 "${if}" && {
mv "${if}" "${if}.bak"
libreoffice --headless --infilter=CSV:59,34,76,1 --convert-to csv:"Text - txt - csv (StarCalc)":"44,34,1,0,1,,0" "${tf}" && {
rm "${tf}" "${if}.bak"
}
}
}
convcsv "$fn"