255 lines
8.6 KiB
Bash
Executable File
255 lines
8.6 KiB
Bash
Executable File
#!/usr/bin/env sh
|
|
dependencies="sed test cp libreoffice expr cut file"
|
|
|
|
# target filename
|
|
tn1="dhl-cp1252-n.csv"
|
|
tn2="dhl-cp1252-rn.csv"
|
|
|
|
assert_tools () {
|
|
while test $# -gt 0; do
|
|
command -v "$1" >/dev/null 2>/dev/null || {
|
|
echo "tool missing: "$1
|
|
exit 2
|
|
}
|
|
shift
|
|
done
|
|
}
|
|
assert_tools ${dependencies}
|
|
|
|
testcmp () {
|
|
# usage: testcmp "cmpresult" "cmdfunctionname" [ "argument1", "argument2", […] ]
|
|
if test "$#" -eq 0 ; then
|
|
echo "arguments missing."; exit 1
|
|
fi
|
|
# whitelisted functions
|
|
case $2 in
|
|
# 2 arg functions where file exists
|
|
convnl)
|
|
expect="$1"
|
|
fn="$2"
|
|
shift 2
|
|
if test -e "$expect" ; then
|
|
echo "not testing as result $expect already exists."
|
|
else
|
|
result=$("$fn" "$1" "$2")
|
|
if test -e $expect ; then
|
|
echo "passed $fn for: $1 $2."
|
|
else
|
|
echo "failed $fn for: $1 $2."
|
|
fi
|
|
fi
|
|
;;
|
|
# 2 arg functions
|
|
chklength|stringcrop)
|
|
expect="$1"
|
|
fn="$2"
|
|
shift 2
|
|
while test "$#" -ge "2"; do
|
|
result=$("$fn" "$1" "$2")
|
|
if test "$result" = "$expect" ; then
|
|
echo "passed $fn for: $1 $2."
|
|
else
|
|
echo "failed $fn for: $1 $2."
|
|
fi
|
|
shift 2
|
|
done
|
|
;;
|
|
# 1 arg functions
|
|
iso3countrycode)
|
|
expect="$1"
|
|
fn="$2"
|
|
shift 2
|
|
for a in "$@"; do
|
|
result=$("$fn" "$a")
|
|
if test "$result" = "$expect" ; then
|
|
echo "passed $fn for: $a."
|
|
else
|
|
echo "failed $fn for: $a."
|
|
fi
|
|
done
|
|
;;
|
|
*)
|
|
echo "no test performed, function or command not allowd."
|
|
esac
|
|
}
|
|
|
|
chklength () {
|
|
test "$#" -eq "0" && { echo "-1"; }
|
|
expect="$1"; shift
|
|
result=$(expr length "$1")
|
|
test "$expect" -eq "$result" && { echo "true"; } || { echo "$result"; }
|
|
}
|
|
|
|
testchklength () {
|
|
testcmp "true" "chklength" "3" "123"
|
|
testcmp "3" "chklength" "2" "1-2"
|
|
}
|
|
|
|
stringcrop () {
|
|
echo "$1" | cut -c1-"$2"
|
|
}
|
|
|
|
teststringcrop () {
|
|
testcmp "string" "stringcrop" "string too long" "6"
|
|
}
|
|
|
|
# convert country to countrycode as in ISO3, see https://unstats.un.org/unsd/tradekb/Knowledgebase/Country-Code
|
|
iso3countrycode () {
|
|
if test "$#" -eq 0 ; then
|
|
echo "argument missing. ($0)"
|
|
else
|
|
case "$1" in
|
|
Belgien|Belgium|Belgique) echo "BEL";;
|
|
Deutschland|Germany|Allemagne) echo "DEU";;
|
|
England|Great\ Britain|United\ Kingdom|Scotland|Wales|North\ Ireland) echo "GBR";;
|
|
Luxemburg|Luxembourg) echo "LUX";;
|
|
Niederlande|Netherlands) echo "NLD";;
|
|
Österreich|Austria|Autriche) echo "AUT";;
|
|
Schweiz|Switzerland|Suisse) echo "CHE";;
|
|
Ungarn|Hungaria) echo "HUN";;
|
|
#
|
|
Tschechien|Czechia|Tchéquie|Česko) echo "CZE";;
|
|
Polen|Poland|Pologne|Polska) echo "POL";;
|
|
Weißrussland|Belarus|Biélorussie|Беларусь) echo "BLR";;
|
|
Russland|Russia|Russie|Россия) echo "RUS";;
|
|
Slowakei|Slovakia|Slovaquie|Slovensko) echo "SVK";;
|
|
Italien|Italy|Italie|Italia) echo "ITA";;
|
|
Italien|Italy|Italie|Italia) echo "ITA";;
|
|
Norwegen|Norway|Norvège|Norge|Noreg|Norga) echo "NOR";;
|
|
Schweden|Sweden|Suède|Sverige) echo "SWE";;
|
|
Dänemarl|Denmark|Danemark|Danmark) echo "DNK";;
|
|
Island|Iceland|Islande|Ísland) echo "ISL";;
|
|
Finnland|Finland|Finlande|Suomi|Finland) echo "FIN";;
|
|
Spanien|Spain|Espagne|España) echo "ESP";;
|
|
Estland|Estonia|Estonie|Eesti) echo "EST";;
|
|
Litauen|Lithuania|Lituanie|Lietuva) echo "LTU";;
|
|
Lettland|Latvia|Lettonie|Latvija) echo "LVA";;
|
|
Irland|Ireland|Irlande|Éire) echo "IRL";;
|
|
Indien|India|Inde|Bharat) echo "IND";;
|
|
China|China|Chine|中国|Zhōngguó) echo "CHN";;
|
|
Japan|Japan|Japon|Nippon|Nihon) echo "JPN";;
|
|
*) echo "unknown country: $1";exit 1;;
|
|
esac
|
|
fi
|
|
}
|
|
|
|
testiso3 () {
|
|
# covering data in https://doku.ccc.de/index.php?title=Attribut:Chaostreff-Country&limit=500&offset=0
|
|
testcmp "BEL" "iso3countrycode" "Belgien" "Belgium" "Belgique"
|
|
testcmp "DEU" "iso3countrycode" "Deutschland" "Germany" "Allemagne"
|
|
testcmp "GBR" "iso3countrycode" "England" "Great Britain" "United Kingdom" "Scotland" "Wales" "North Ireland"
|
|
testcmp "LUX" "iso3countrycode" "Luxembourg" "Luxemburg"
|
|
testcmp "NLD" "iso3countrycode" "Niederlande" "Netherlands"
|
|
testcmp "AUT" "iso3countrycode" "Österreich" "Austria" "Autriche"
|
|
testcmp "HUN" "iso3countrycode" "Ungarn" "Hungaria"
|
|
testcmp "CHE" "iso3countrycode" "Schweiz" "Switzerland" "Suisse"
|
|
# and coving some more
|
|
testcmp "CZE" "iso3countrycode" "Tschechien" "Czechia" "Tchéquie" "Česko"
|
|
testcmp "POL" "iso3countrycode" "Polen" "Poland" "Pologne" "Polska"
|
|
testcmp "BLR" "iso3countrycode" "Weißrussland" "Belarus" "Biélorussie" "Беларусь"
|
|
testcmp "RUS" "iso3countrycode" "Russland" "Russia" "Russie" "Россия"
|
|
testcmp "SVK" "iso3countrycode" "Slowakei" "Slovakia" "Slovaquie" "Slovensko"
|
|
testcmp "ITA" "iso3countrycode" "Italien" "Italy" "Italie" "Italia"
|
|
testcmp "NOR" "iso3countrycode" "Norwegen" "Norway" "Norvège" "Norge" "Noreg" "Norga"
|
|
testcmp "SWE" "iso3countrycode" "Schweden" "Sweden" "Suède" "Sverige"
|
|
testcmp "DNK" "iso3countrycode" "Dänemarl" "Denmark" "Danemark" "Danmark"
|
|
testcmp "ISL" "iso3countrycode" "Island" "Iceland" "Islande" "Ísland"
|
|
testcmp "ISR" "iso3countrycode" "Israel" "Israel" "Israël"
|
|
testcmp "FIN" "iso3countrycode" "Finnland" "Finland" "Finlande" "Suomi" "Finland"
|
|
testcmp "ESP" "iso3countrycode" "Spanien" "Spain" "Espagne" "España"
|
|
testcmp "EST" "iso3countrycode" "Estland" "Estonia" "Estonie" "Eesti"
|
|
testcmp "LTU" "iso3countrycode" "Litauen" "Lithuania" "Lituanie" "Lietuva"
|
|
testcmp "LVA" "iso3countrycode" "Lettland" "Latvia" "Lettonie" "Latvija"
|
|
testcmp "IRL" "iso3countrycode" "Irland" "Ireland" "Irlande" "Éire"
|
|
testcmp "IND" "iso3countrycode" "Indien" "India" "Inde" "Bharat"
|
|
testcmp "CHN" "iso3countrycode" "China" "China" "Chine" "中国" "Zhōngguó"
|
|
testcmp "JPN" "iso3countrycode" "Japan" "Japan" "Japon" "Nippon" "Nihon" "日本国" "日本"
|
|
testcmp "" "iso3countrycode" "Simbabwe" # should fail
|
|
}
|
|
|
|
# replace newline 0a with 0d 0a for dos version
|
|
# test with: echo | sed 's/$'"/`echo \\\r`/" | hexdump
|
|
convnl () {
|
|
if="$1"
|
|
of="$2"
|
|
shift 2
|
|
expect="0a0d" # hexdump does reverse the byte order
|
|
result=$(echo | sed 's/$'"/`echo \\\r`/" | hexdump |head -1|cut -d" " -f2)
|
|
if test $expect = $result ; then
|
|
sed -e 's/$'"/`echo \\\r`/" "$if" > "$of"
|
|
fi
|
|
}
|
|
testconvnl () {
|
|
testcmp "$tn2" "convnl" "$tn1" "$tn2" && rm "$tn2"
|
|
}
|
|
|
|
getauth () {
|
|
# Zugangsdaten für das Mediawiki aus Konfiguratonsdatei bzw. ggf.
|
|
user=""
|
|
pass=""
|
|
test -f ds.conf && {
|
|
user=$(cat ds.conf|grep -i "^user="|cut -d= -f2) #
|
|
pass=$(cat ds.conf|grep -i "^pass="|cut -d= -f2) #
|
|
} || {
|
|
command pass 2>/dev/null && {
|
|
passfile="$(cat ds.conf|grep -i "^passfile="|cut -d= -f2)" # ccc/doku-htaccess
|
|
passstore="$(cat ds.conf|grep -i "^passstore="|cut -d= -f2)" # ~/.password-store/
|
|
test -f ${passstore}${passfile} || { >&2 echo "path for password manager does not match: "${passfile}; exit 1; }
|
|
user=$(pass ${passfile} |grep -i "^login "|rev|cut -d" " -f1|rev)
|
|
pass=$(pass ${passfile} |head -1)
|
|
} || { >&2 echo "tool missing: pass (unix password manager)"; exit 1; }
|
|
}
|
|
test "$user" = "" && { >&2 echo "no auth data found."; exit 1; }
|
|
echo "${user}:${pass}"
|
|
}
|
|
|
|
|
|
runtests () {
|
|
testchklength
|
|
teststringcrop
|
|
testiso3
|
|
testconvnl
|
|
}
|
|
runtests
|
|
|
|
exit 0
|
|
|
|
# untested:
|
|
|
|
# todo: download file (its UTF-8 encoded)
|
|
|
|
fn="dhl.csv"
|
|
# backup
|
|
cp -f "$fn" "${fn}.bak"
|
|
|
|
|
|
|
|
# replace special spaces and hyphens
|
|
sed -i -e 's/[\u00A0\u202F[:space:]]+/ /g' -e 's/[\u2010\u2011\u2012\u2013\u002D]+/-/g' -e 's/[\u00AD\uFEFF]+//g' "$fn"
|
|
# todo: add quote variants
|
|
# todo: check for delimiters that may conflict during conversion
|
|
|
|
# convert encoding from UTF-8 to CP1252 / WINDOWS-1252
|
|
# another possibility for conversion? seems not to be relyable: iconv -o "$of" -f UTF-8 -t WINDOWS-1252 "$if"
|
|
convcsv () {
|
|
if test $# -eq 0 ; then >&2 echo "argument missing."; exit 1; fi
|
|
# doc for headless conversion options https://wiki.openoffice.org/wiki/Documentation/DevGuide/Spreadsheets/Filter_Options
|
|
if="${1}"
|
|
format=$(file ${if}|rev|cut -d':' -f1|rev|cut -d' ' -f2)
|
|
if test "$format" != "UTF-8" ; then >&2 echo "source file encoding not matching 'UTF-8' as expected."; fi
|
|
of=$(echo "${if}"|rev|cut -d. -f2-|rev)".ods"
|
|
if test -e $of ; then >&2 echo "output file already exists."; exit 2; fi
|
|
#~ iconv -o "$of" -f $format -t cp1252//TRANSLIT "$if"
|
|
#~ iconv -o "$of" -f $format -t WINDOWS-1252 "$if"
|
|
# to take care of more options in csv at the same time, convert to ods and back to csv
|
|
libreoffice --headless --convert-to ods --infilter=CSV:59,34,76,1 "${if}" && {
|
|
mv "${if}" "${if}.bak"
|
|
libreoffice --headless --infilter=CSV:59,34,76,1 --convert-to csv:"Text - txt - csv (StarCalc)":"44,34,1,0,1,,0" "${tf}" && {
|
|
rm "${tf}" "${if}.bak"
|
|
}
|
|
}
|
|
format=$(file ${if}|cut -d' ' -f2)
|
|
if test "$format" != "Non-ISO" ; then >&2 echo "output file encoding not matching the expected Non-ISO, its $format."; fi
|
|
}
|
|
convcsv "$fn"
|