#!/usr/bin/env sh
dependencies="sed test cp libreoffice expr cut"
assert_tools () {
	while test $# -gt 0; do
		which $1  >/dev/null 2>/dev/null || {
			echo "tool missing: "$1
			exit 2
		}
		shift
	done
}
assert_tools ${dependencies}

testcmp () {
	# usage: testcmp "cmpresult" "cmdfunctionname"  [ "argument1", "argument2", […] ]
	if test "$#" -eq 0 ; then
		echo "arguments missing."; exit 1
	fi
	# whitelisted functions
	case $2 in 
	# 2 arg functions
	chklength|stringcrop)
		expect="$1"; shift
		fn="$1"; shift
		while test "$#" -ge "2"; do
			result=$("$fn" "$1" "$2")
			if test "$result" = "$expect" ; then
				echo "passed $fn for: $1 $2."
			else
				echo "failed $fn for: $1 $2."
			fi
			shift 2
		done
	;;
	# 1 arg functions
	iso3countrycode)
		expect="$1"; shift
		fn="$1"; shift
		for a in "$@"; do 
			result=$("$fn" "$a")
			if test "$result" = "$expect" ; then
				echo "passed $fn for: $a."
			else
				echo "failed $fn for: $a."
			fi
		done
	;;
	*)
		echo "no test performed, function or command not allowd."
	esac
}

chklength () {
	test "$#" -eq "0" && { echo "-1"; }
	expect="$1"; shift
	result=$(expr length "$1")
	test "$expect" -eq "$result" && { echo "true"; } || { echo "$result"; }
}

testchklength () {
	testcmp "true" "chklength" "3" "123"
	testcmp "3" "chklength" "2" "1-2"
}
testchklength

stringcrop () {
	echo "$1" | cut -c1-"$2"
}

teststringcrop () {
	testcmp "string" "stringcrop" "string too long" "6"
}
teststringcrop

# convert country to countrycode as in ISO3
iso3countrycode () {
	if test "$#" -eq 0 ; then
		echo "argument missing. ($0)"
	else
		case "$1" in
			Belgien) echo "BEL";;
			Deutschland) echo "DEU";;
			England) echo "GBR";;
			Luxemburg|Luxembourg) echo "LUX";;
			Niederlande) echo "NLD";;
			Österreich) echo "AUT";;
			Schweiz) echo "CHE";;
			Ungarn) echo "HUN";;
			*) echo "unknown country: $1";exit 1;; 
		esac
	fi
}

testiso3 () {
	# covering data in https://doku.ccc.de/index.php?title=Attribut:Chaostreff-Country&limit=500&offset=0
	testcmp "BEL" "iso3countrycode" "Belgien"
	testcmp "DEU" "iso3countrycode" "Deutschland"
	testcmp "GBR" "iso3countrycode" "England"
	testcmp "LUX" "iso3countrycode" "Luxembourg" "Luxemburg"
	testcmp "NLD" "iso3countrycode" "Niederlande"
	testcmp "AUT" "iso3countrycode" "Österreich"
	testcmp "HUN" "iso3countrycode" "Ungarn"
	testcmp "CHE" "iso3countrycode" "Schweiz"
	testcmp "" "iso3countrycode" "Simbabwe" # should fail
}
testiso3
exit 0;

# target filename
tn="dhl-cp1252.csv"

# todo: download file (its UTF-8 encoded)

fn="dhl.csv"
# backup
cp -f "${fn}" "${fn}.bak"

# replace special spaces and hyphens
sed -i -e 's/[\u00A0\u202F[:space:]]+/ /g' -e 's/[\u2010\u2011\u2012\u2013\u002D]+/-/g' -e 's/[\u00AD\uFEFF]+//g' "$fn"

# convert encoding from UTF-8 to CP1252 "Windows"
# another possibility for conversion? seems not to be relyable: iconv -o "$of" -f UTF-8 -t CP1252 "$if" 
convcsv () {
	if test $# -eq 0 ; then echo "argument missing."; exit 1; fi
	if test -e $1 ; then echo "file already exists."; exit 2; fi 
# doc for headless conversion options https://wiki.openoffice.org/wiki/Documentation/DevGuide/Spreadsheets/Filter_Options
	if="${1}"
	of=$(echo "${if}"|rev|cut -d. -f2-|rev)".ods"
	libreoffice --headless --convert-to ods --infilter=CSV:59,34,76,1 "${if}" && {
		mv "${if}" "${if}.bak"
		libreoffice --headless --infilter=CSV:59,34,76,1 --convert-to csv:"Text - txt - csv (StarCalc)":"44,34,1,0,1,,0" "${tf}" && {
			rm "${tf}" "${if}.bak"
		}
	}
}
convcsv "$fn"