stringcrop

This commit is contained in:
vv01f 2019-11-17 01:12:15 +01:00
parent d99d3ffc62
commit ed1772e76d
Signed by untrusted user who does not match committer: vv01f
GPG Key ID: 02625A16AC1D1FF6
1 changed files with 39 additions and 26 deletions

65
dhl.sh
View File

@ -1,5 +1,5 @@
#!/usr/bin/env sh
dependencies="sed test cp libreoffice expr"
dependencies="sed test cp libreoffice expr cut"
assert_tools () {
while test $# -gt 0; do
which $1 >/dev/null 2>/dev/null || {
@ -16,27 +16,32 @@ testcmp () {
if test "$#" -eq 0 ; then
echo "arguments missing."; exit 1
fi
# whitelisted functions
case $2 in
#whitelisted fn
chklength)
# 2 arg functions
chklength|stringcrop)
expect="$1"; shift
fn="$1"; shift
result=$("$fn" "$1" "$2")
if test "$result" = "$expect" ; then
echo "$fn passed for: $1 $2."
else
echo "$fn failed for: $1 $2."
fi
while test "$#" -ge "2"; do
result=$("$fn" "$1" "$2")
if test "$result" = "$expect" ; then
echo "passed $fn for: $1 $2."
else
echo "failed $fn for: $1 $2."
fi
shift 2
done
;;
# 1 arg functions
iso3countrycode)
expect="$1"; shift
fn="$1"; shift
for a in "$@"; do
result=$("$fn" "$a")
if test "$result" = "$expect" ; then
echo "$fn passed for: $a."
echo "passed $fn for: $a."
else
echo "$fn failed for: $a."
echo "failed $fn for: $a."
fi
done
;;
@ -53,26 +58,21 @@ chklength () {
}
testchklength () {
chklength "3" "123"
testcmp "true" "chklength" "3" "123"
testcmp "3" "chklength" "2" "1-2"
}
testchklength
# target filename
tn="dhl-cp1252.csv"
stringcrop () {
echo "$1" | cut -c1-"$2"
}
# todo: download file (its UTF-8 encoded)
teststringcrop () {
testcmp "string" "stringcrop" "string too long" "6"
}
teststringcrop
fn="dhl.csv"
# backup
cp -f "${fn}" "${fn}.bak"
# replace special spaces and hyphens
sed -i -e 's/[\u00A0\u202F[:space:]]+/ /g' -e 's/[\u2010\u2011\u2012\u2013\u002D]+/-/g' -e 's/[\u00AD\uFEFF]+//g' "$fn"
# todo: validate length for entries
# todo: convert country to countrycode as in ISO3
# convert country to countrycode as in ISO3
iso3countrycode () {
if test "$#" -eq 0 ; then
echo "argument missing. ($0)"
@ -103,7 +103,20 @@ testiso3 () {
testcmp "CHE" "iso3countrycode" "Schweiz"
testcmp "" "iso3countrycode" "Simbabwe" # should fail
}
#~ testiso3
testiso3
exit 0;
# target filename
tn="dhl-cp1252.csv"
# todo: download file (its UTF-8 encoded)
fn="dhl.csv"
# backup
cp -f "${fn}" "${fn}.bak"
# replace special spaces and hyphens
sed -i -e 's/[\u00A0\u202F[:space:]]+/ /g' -e 's/[\u2010\u2011\u2012\u2013\u002D]+/-/g' -e 's/[\u00AD\uFEFF]+//g' "$fn"
# convert encoding from UTF-8 to CP1252 "Windows"
# another possibility for conversion? seems not to be relyable: iconv -o "$of" -f UTF-8 -t CP1252 "$if"