ds-dhl/dhl.sh

212 lines
5.9 KiB
Bash
Raw Normal View History

#!/usr/bin/env sh
2019-12-20 14:30:21 +01:00
dependencies="sed test cp libreoffice expr cut file"
2019-11-17 03:26:28 +01:00
# target filename
tn1="dhl-cp1252-n.csv"
tn2="dhl-cp1252-rn.csv"
2019-11-17 00:23:10 +01:00
assert_tools () {
while test $# -gt 0; do
which $1 >/dev/null 2>/dev/null || {
echo "tool missing: "$1
exit 2
}
shift
done
}
assert_tools ${dependencies}
2019-11-16 21:49:06 +01:00
2019-11-17 01:01:22 +01:00
testcmp () {
# usage: testcmp "cmpresult" "cmdfunctionname" [ "argument1", "argument2", […] ]
if test "$#" -eq 0 ; then
echo "arguments missing."; exit 1
fi
2019-11-17 01:12:15 +01:00
# whitelisted functions
2019-11-17 03:26:28 +01:00
case $2 in
# 2 arg functions where file exists
convnl)
expect="$1"
fn="$2"
shift 2
if test -e "$expect" ; then
echo "not testing as result $expect already exists."
else
result=$("$fn" "$1" "$2")
if test -e $expect ; then
echo "passed $fn for: $1 $2."
else
echo "failed $fn for: $1 $2."
fi
fi
;;
2019-11-17 01:12:15 +01:00
# 2 arg functions
chklength|stringcrop)
2019-11-17 03:26:28 +01:00
expect="$1"
fn="$2"
shift 2
2019-11-17 01:12:15 +01:00
while test "$#" -ge "2"; do
result=$("$fn" "$1" "$2")
if test "$result" = "$expect" ; then
echo "passed $fn for: $1 $2."
else
echo "failed $fn for: $1 $2."
fi
shift 2
done
2019-11-17 01:01:22 +01:00
;;
2019-11-17 01:12:15 +01:00
# 1 arg functions
2019-11-17 01:01:22 +01:00
iso3countrycode)
2019-11-17 03:26:28 +01:00
expect="$1"
fn="$2"
shift 2
2019-11-17 01:01:22 +01:00
for a in "$@"; do
result=$("$fn" "$a")
if test "$result" = "$expect" ; then
2019-11-17 01:12:15 +01:00
echo "passed $fn for: $a."
2019-11-17 01:01:22 +01:00
else
2019-11-17 01:12:15 +01:00
echo "failed $fn for: $a."
2019-11-17 01:01:22 +01:00
fi
done
;;
*)
echo "no test performed, function or command not allowd."
esac
}
chklength () {
test "$#" -eq "0" && { echo "-1"; }
expect="$1"; shift
result=$(expr length "$1")
test "$expect" -eq "$result" && { echo "true"; } || { echo "$result"; }
}
testchklength () {
testcmp "true" "chklength" "3" "123"
testcmp "3" "chklength" "2" "1-2"
}
2019-11-17 01:12:15 +01:00
stringcrop () {
echo "$1" | cut -c1-"$2"
}
2019-11-16 21:49:06 +01:00
2019-11-17 01:12:15 +01:00
teststringcrop () {
testcmp "string" "stringcrop" "string too long" "6"
}
2019-11-16 22:04:21 +01:00
2019-11-17 01:12:15 +01:00
# convert country to countrycode as in ISO3
2019-11-16 22:04:21 +01:00
iso3countrycode () {
if test "$#" -eq 0 ; then
echo "argument missing. ($0)"
else
case "$1" in
2019-11-17 12:35:27 +01:00
Belgien|Belgium|Belgique) echo "BEL";;
Deutschland|Germany|Allemagne) echo "DEU";;
England|Great\ Britain|United\ Kingdom|Scotland|Wales|North\ Ireland) echo "GBR";;
2019-11-16 22:04:21 +01:00
Luxemburg|Luxembourg) echo "LUX";;
2019-11-17 12:35:27 +01:00
Niederlande|Netherlands) echo "NLD";;
Österreich|Austria|Autriche) echo "AUT";;
Schweiz|Switzerland|Suisse) echo "CHE";;
Ungarn|Hungaria) echo "HUN";;
2019-11-16 22:04:21 +01:00
*) echo "unknown country: $1";exit 1;;
esac
fi
}
testiso3 () {
2019-11-16 22:11:17 +01:00
# covering data in https://doku.ccc.de/index.php?title=Attribut:Chaostreff-Country&limit=500&offset=0
2019-11-17 12:35:27 +01:00
testcmp "BEL" "iso3countrycode" "Belgien" "Belgium" "Belgique"
testcmp "DEU" "iso3countrycode" "Deutschland" "Germany" "Allemagne"
testcmp "GBR" "iso3countrycode" "England" "Great Britain" "United Kingdom" "Scotland" "Wales" "North Ireland"
2019-11-16 22:25:41 +01:00
testcmp "LUX" "iso3countrycode" "Luxembourg" "Luxemburg"
2019-11-17 12:35:27 +01:00
testcmp "NLD" "iso3countrycode" "Niederlande" "Netherlands"
testcmp "AUT" "iso3countrycode" "Österreich" "Austria" "Autriche"
testcmp "HUN" "iso3countrycode" "Ungarn" "Hungaria"
testcmp "CHE" "iso3countrycode" "Schweiz" "Switzerland" "Suisse"
2019-11-16 22:25:41 +01:00
testcmp "" "iso3countrycode" "Simbabwe" # should fail
2019-11-16 22:04:21 +01:00
}
2019-11-17 01:12:15 +01:00
2019-11-17 03:26:28 +01:00
# replace newline 0a with 0d 0a for dos version
# test with: echo | sed 's/$'"/`echo \\\r`/" | hexdump
convnl () {
if="$1"
of="$2"
shift 2
expect="0a0d" # hexdump does reverse the byte order
result=$(echo | sed 's/$'"/`echo \\\r`/" | hexdump |head -1|cut -d" " -f2)
if test $expect = $result ; then
sed -e 's/$'"/`echo \\\r`/" "$if" > "$of"
fi
}
testconvnl () {
testcmp "$tn2" "convnl" "$tn1" "$tn2" && rm "$tn2"
}
2019-11-17 12:35:27 +01:00
getauth () {
# Zugangsdaten für das Mediawiki aus Konfiguratonsdatei bzw. ggf.
user=""
pass=""
test -f ds.conf && {
user=$(cat ds.conf|grep -i "^user="|cut -d= -f2) #
pass=$(cat ds.conf|grep -i "^pass="|cut -d= -f2) #
} || {
which pass 2>/dev/null && {
passfile="$(cat ds.conf|grep -i "^passfile="|cut -d= -f2)" # ccc/doku-htaccess
passstore="$(cat ds.conf|grep -i "^passstore="|cut -d= -f2)" # ~/.password-store/
test -f ${passstore}${passfile} || { >&2 echo "path for password manager does not match: "${passfile}; exit 1; }
user=$(pass ${passfile} |grep -i "^login "|rev|cut -d" " -f1|rev)
pass=$(pass ${passfile} |head -1)
} || { >&2 echo "tool missing: pass (unix password manager)"; exit 1; }
}
test "$user" = "" && { >&2 echo "no auth data found."; exit 1; }
echo "${user}:${pass}"
}
2019-11-17 03:26:28 +01:00
runtests () {
testchklength
teststringcrop
testiso3
testconvnl
}
runtests
2019-11-17 01:12:15 +01:00
2019-12-20 14:30:21 +01:00
exit 0
# untested:
2019-11-17 01:12:15 +01:00
# todo: download file (its UTF-8 encoded)
fn="dhl.csv"
# backup
2019-11-17 03:26:28 +01:00
cp -f "$fn" "${fn}.bak"
2019-12-20 14:30:21 +01:00
2019-11-17 01:12:15 +01:00
# replace special spaces and hyphens
sed -i -e 's/[\u00A0\u202F[:space:]]+/ /g' -e 's/[\u2010\u2011\u2012\u2013\u002D]+/-/g' -e 's/[\u00AD\uFEFF]+//g' "$fn"
2019-12-20 14:30:21 +01:00
# todo: add quote variants
# todo: check for delimiters that may conflict during conversion
2019-11-17 00:23:10 +01:00
2019-12-20 14:30:21 +01:00
# convert encoding from UTF-8 to CP1252 / WINDOWS-1252
# another possibility for conversion? seems not to be relyable: iconv -o "$of" -f UTF-8 -t WINDOWS-1252 "$if"
2019-11-17 00:23:10 +01:00
convcsv () {
2019-12-20 14:30:21 +01:00
if test $# -eq 0 ; then >&2 echo "argument missing."; exit 1; fi
2019-11-17 00:23:10 +01:00
# doc for headless conversion options https://wiki.openoffice.org/wiki/Documentation/DevGuide/Spreadsheets/Filter_Options
if="${1}"
2019-12-20 14:30:21 +01:00
format=$(file ${if}|cut -d' ' -f2)
if test "$format" != "UTF-8" ; then >&2 echo "source file encoding not matching the expected UTF-8."; fi
2019-11-17 00:23:10 +01:00
of=$(echo "${if}"|rev|cut -d. -f2-|rev)".ods"
2019-12-20 14:30:21 +01:00
if test -e $of ; then >&2 echo "output file already exists."; exit 2; fi
#~ iconv -o "$of" -f $format -t WINDOWS-1252 "$if"
# to take care of more options in csv at the same time, convert to ods and back to csv
2019-11-17 00:23:10 +01:00
libreoffice --headless --convert-to ods --infilter=CSV:59,34,76,1 "${if}" && {
mv "${if}" "${if}.bak"
libreoffice --headless --infilter=CSV:59,34,76,1 --convert-to csv:"Text - txt - csv (StarCalc)":"44,34,1,0,1,,0" "${tf}" && {
rm "${tf}" "${if}.bak"
}
}
2019-12-20 14:30:21 +01:00
format=$(file ${if}|cut -d' ' -f2)
if test "$format" != "Non-ISO" ; then >&2 echo "output file encoding not matching the expected Non-ISO, its $format."; fi
2019-11-17 00:23:10 +01:00
}
convcsv "$fn"