212 lines
5.9 KiB
Bash
Executable File
212 lines
5.9 KiB
Bash
Executable File
#!/usr/bin/env sh
|
|
dependencies="sed test cp libreoffice expr cut file"
|
|
|
|
# target filename
|
|
tn1="dhl-cp1252-n.csv"
|
|
tn2="dhl-cp1252-rn.csv"
|
|
|
|
assert_tools () {
|
|
while test $# -gt 0; do
|
|
which $1 >/dev/null 2>/dev/null || {
|
|
echo "tool missing: "$1
|
|
exit 2
|
|
}
|
|
shift
|
|
done
|
|
}
|
|
assert_tools ${dependencies}
|
|
|
|
testcmp () {
|
|
# usage: testcmp "cmpresult" "cmdfunctionname" [ "argument1", "argument2", […] ]
|
|
if test "$#" -eq 0 ; then
|
|
echo "arguments missing."; exit 1
|
|
fi
|
|
# whitelisted functions
|
|
case $2 in
|
|
# 2 arg functions where file exists
|
|
convnl)
|
|
expect="$1"
|
|
fn="$2"
|
|
shift 2
|
|
if test -e "$expect" ; then
|
|
echo "not testing as result $expect already exists."
|
|
else
|
|
result=$("$fn" "$1" "$2")
|
|
if test -e $expect ; then
|
|
echo "passed $fn for: $1 $2."
|
|
else
|
|
echo "failed $fn for: $1 $2."
|
|
fi
|
|
fi
|
|
;;
|
|
# 2 arg functions
|
|
chklength|stringcrop)
|
|
expect="$1"
|
|
fn="$2"
|
|
shift 2
|
|
while test "$#" -ge "2"; do
|
|
result=$("$fn" "$1" "$2")
|
|
if test "$result" = "$expect" ; then
|
|
echo "passed $fn for: $1 $2."
|
|
else
|
|
echo "failed $fn for: $1 $2."
|
|
fi
|
|
shift 2
|
|
done
|
|
;;
|
|
# 1 arg functions
|
|
iso3countrycode)
|
|
expect="$1"
|
|
fn="$2"
|
|
shift 2
|
|
for a in "$@"; do
|
|
result=$("$fn" "$a")
|
|
if test "$result" = "$expect" ; then
|
|
echo "passed $fn for: $a."
|
|
else
|
|
echo "failed $fn for: $a."
|
|
fi
|
|
done
|
|
;;
|
|
*)
|
|
echo "no test performed, function or command not allowd."
|
|
esac
|
|
}
|
|
|
|
chklength () {
|
|
test "$#" -eq "0" && { echo "-1"; }
|
|
expect="$1"; shift
|
|
result=$(expr length "$1")
|
|
test "$expect" -eq "$result" && { echo "true"; } || { echo "$result"; }
|
|
}
|
|
|
|
testchklength () {
|
|
testcmp "true" "chklength" "3" "123"
|
|
testcmp "3" "chklength" "2" "1-2"
|
|
}
|
|
|
|
stringcrop () {
|
|
echo "$1" | cut -c1-"$2"
|
|
}
|
|
|
|
teststringcrop () {
|
|
testcmp "string" "stringcrop" "string too long" "6"
|
|
}
|
|
|
|
# convert country to countrycode as in ISO3
|
|
iso3countrycode () {
|
|
if test "$#" -eq 0 ; then
|
|
echo "argument missing. ($0)"
|
|
else
|
|
case "$1" in
|
|
Belgien|Belgium|Belgique) echo "BEL";;
|
|
Deutschland|Germany|Allemagne) echo "DEU";;
|
|
England|Great\ Britain|United\ Kingdom|Scotland|Wales|North\ Ireland) echo "GBR";;
|
|
Luxemburg|Luxembourg) echo "LUX";;
|
|
Niederlande|Netherlands) echo "NLD";;
|
|
Österreich|Austria|Autriche) echo "AUT";;
|
|
Schweiz|Switzerland|Suisse) echo "CHE";;
|
|
Ungarn|Hungaria) echo "HUN";;
|
|
*) echo "unknown country: $1";exit 1;;
|
|
esac
|
|
fi
|
|
}
|
|
|
|
testiso3 () {
|
|
# covering data in https://doku.ccc.de/index.php?title=Attribut:Chaostreff-Country&limit=500&offset=0
|
|
testcmp "BEL" "iso3countrycode" "Belgien" "Belgium" "Belgique"
|
|
testcmp "DEU" "iso3countrycode" "Deutschland" "Germany" "Allemagne"
|
|
testcmp "GBR" "iso3countrycode" "England" "Great Britain" "United Kingdom" "Scotland" "Wales" "North Ireland"
|
|
testcmp "LUX" "iso3countrycode" "Luxembourg" "Luxemburg"
|
|
testcmp "NLD" "iso3countrycode" "Niederlande" "Netherlands"
|
|
testcmp "AUT" "iso3countrycode" "Österreich" "Austria" "Autriche"
|
|
testcmp "HUN" "iso3countrycode" "Ungarn" "Hungaria"
|
|
testcmp "CHE" "iso3countrycode" "Schweiz" "Switzerland" "Suisse"
|
|
testcmp "" "iso3countrycode" "Simbabwe" # should fail
|
|
}
|
|
|
|
# replace newline 0a with 0d 0a for dos version
|
|
# test with: echo | sed 's/$'"/`echo \\\r`/" | hexdump
|
|
convnl () {
|
|
if="$1"
|
|
of="$2"
|
|
shift 2
|
|
expect="0a0d" # hexdump does reverse the byte order
|
|
result=$(echo | sed 's/$'"/`echo \\\r`/" | hexdump |head -1|cut -d" " -f2)
|
|
if test $expect = $result ; then
|
|
sed -e 's/$'"/`echo \\\r`/" "$if" > "$of"
|
|
fi
|
|
}
|
|
testconvnl () {
|
|
testcmp "$tn2" "convnl" "$tn1" "$tn2" && rm "$tn2"
|
|
}
|
|
|
|
getauth () {
|
|
# Zugangsdaten für das Mediawiki aus Konfiguratonsdatei bzw. ggf.
|
|
user=""
|
|
pass=""
|
|
test -f ds.conf && {
|
|
user=$(cat ds.conf|grep -i "^user="|cut -d= -f2) #
|
|
pass=$(cat ds.conf|grep -i "^pass="|cut -d= -f2) #
|
|
} || {
|
|
which pass 2>/dev/null && {
|
|
passfile="$(cat ds.conf|grep -i "^passfile="|cut -d= -f2)" # ccc/doku-htaccess
|
|
passstore="$(cat ds.conf|grep -i "^passstore="|cut -d= -f2)" # ~/.password-store/
|
|
test -f ${passstore}${passfile} || { >&2 echo "path for password manager does not match: "${passfile}; exit 1; }
|
|
user=$(pass ${passfile} |grep -i "^login "|rev|cut -d" " -f1|rev)
|
|
pass=$(pass ${passfile} |head -1)
|
|
} || { >&2 echo "tool missing: pass (unix password manager)"; exit 1; }
|
|
}
|
|
test "$user" = "" && { >&2 echo "no auth data found."; exit 1; }
|
|
echo "${user}:${pass}"
|
|
}
|
|
|
|
|
|
runtests () {
|
|
testchklength
|
|
teststringcrop
|
|
testiso3
|
|
testconvnl
|
|
}
|
|
runtests
|
|
|
|
exit 0
|
|
|
|
# untested:
|
|
|
|
# todo: download file (its UTF-8 encoded)
|
|
|
|
fn="dhl.csv"
|
|
# backup
|
|
cp -f "$fn" "${fn}.bak"
|
|
|
|
|
|
|
|
# replace special spaces and hyphens
|
|
sed -i -e 's/[\u00A0\u202F[:space:]]+/ /g' -e 's/[\u2010\u2011\u2012\u2013\u002D]+/-/g' -e 's/[\u00AD\uFEFF]+//g' "$fn"
|
|
# todo: add quote variants
|
|
# todo: check for delimiters that may conflict during conversion
|
|
|
|
# convert encoding from UTF-8 to CP1252 / WINDOWS-1252
|
|
# another possibility for conversion? seems not to be relyable: iconv -o "$of" -f UTF-8 -t WINDOWS-1252 "$if"
|
|
convcsv () {
|
|
if test $# -eq 0 ; then >&2 echo "argument missing."; exit 1; fi
|
|
# doc for headless conversion options https://wiki.openoffice.org/wiki/Documentation/DevGuide/Spreadsheets/Filter_Options
|
|
if="${1}"
|
|
format=$(file ${if}|cut -d' ' -f2)
|
|
if test "$format" != "UTF-8" ; then >&2 echo "source file encoding not matching the expected UTF-8."; fi
|
|
of=$(echo "${if}"|rev|cut -d. -f2-|rev)".ods"
|
|
if test -e $of ; then >&2 echo "output file already exists."; exit 2; fi
|
|
#~ iconv -o "$of" -f $format -t WINDOWS-1252 "$if"
|
|
# to take care of more options in csv at the same time, convert to ods and back to csv
|
|
libreoffice --headless --convert-to ods --infilter=CSV:59,34,76,1 "${if}" && {
|
|
mv "${if}" "${if}.bak"
|
|
libreoffice --headless --infilter=CSV:59,34,76,1 --convert-to csv:"Text - txt - csv (StarCalc)":"44,34,1,0,1,,0" "${tf}" && {
|
|
rm "${tf}" "${if}.bak"
|
|
}
|
|
}
|
|
format=$(file ${if}|cut -d' ' -f2)
|
|
if test "$format" != "Non-ISO" ; then >&2 echo "output file encoding not matching the expected Non-ISO, its $format."; fi
|
|
}
|
|
convcsv "$fn"
|