stringcrop

This commit is contained in:
vv01f 2019-11-17 01:12:15 +01:00
parent d99d3ffc62
commit ed1772e76d
Signed by untrusted user who does not match committer: vv01f
GPG Key ID: 02625A16AC1D1FF6
1 changed files with 39 additions and 26 deletions

65
dhl.sh
View File

@ -1,5 +1,5 @@
#!/usr/bin/env sh #!/usr/bin/env sh
dependencies="sed test cp libreoffice expr" dependencies="sed test cp libreoffice expr cut"
assert_tools () { assert_tools () {
while test $# -gt 0; do while test $# -gt 0; do
which $1 >/dev/null 2>/dev/null || { which $1 >/dev/null 2>/dev/null || {
@ -16,27 +16,32 @@ testcmp () {
if test "$#" -eq 0 ; then if test "$#" -eq 0 ; then
echo "arguments missing."; exit 1 echo "arguments missing."; exit 1
fi fi
# whitelisted functions
case $2 in case $2 in
#whitelisted fn # 2 arg functions
chklength) chklength|stringcrop)
expect="$1"; shift expect="$1"; shift
fn="$1"; shift fn="$1"; shift
result=$("$fn" "$1" "$2") while test "$#" -ge "2"; do
if test "$result" = "$expect" ; then result=$("$fn" "$1" "$2")
echo "$fn passed for: $1 $2." if test "$result" = "$expect" ; then
else echo "passed $fn for: $1 $2."
echo "$fn failed for: $1 $2." else
fi echo "failed $fn for: $1 $2."
fi
shift 2
done
;; ;;
# 1 arg functions
iso3countrycode) iso3countrycode)
expect="$1"; shift expect="$1"; shift
fn="$1"; shift fn="$1"; shift
for a in "$@"; do for a in "$@"; do
result=$("$fn" "$a") result=$("$fn" "$a")
if test "$result" = "$expect" ; then if test "$result" = "$expect" ; then
echo "$fn passed for: $a." echo "passed $fn for: $a."
else else
echo "$fn failed for: $a." echo "failed $fn for: $a."
fi fi
done done
;; ;;
@ -53,26 +58,21 @@ chklength () {
} }
testchklength () { testchklength () {
chklength "3" "123"
testcmp "true" "chklength" "3" "123" testcmp "true" "chklength" "3" "123"
testcmp "3" "chklength" "2" "1-2" testcmp "3" "chklength" "2" "1-2"
} }
testchklength
# target filename stringcrop () {
tn="dhl-cp1252.csv" echo "$1" | cut -c1-"$2"
}
# todo: download file (its UTF-8 encoded) teststringcrop () {
testcmp "string" "stringcrop" "string too long" "6"
}
teststringcrop
fn="dhl.csv" # convert country to countrycode as in ISO3
# backup
cp -f "${fn}" "${fn}.bak"
# replace special spaces and hyphens
sed -i -e 's/[\u00A0\u202F[:space:]]+/ /g' -e 's/[\u2010\u2011\u2012\u2013\u002D]+/-/g' -e 's/[\u00AD\uFEFF]+//g' "$fn"
# todo: validate length for entries
# todo: convert country to countrycode as in ISO3
iso3countrycode () { iso3countrycode () {
if test "$#" -eq 0 ; then if test "$#" -eq 0 ; then
echo "argument missing. ($0)" echo "argument missing. ($0)"
@ -103,7 +103,20 @@ testiso3 () {
testcmp "CHE" "iso3countrycode" "Schweiz" testcmp "CHE" "iso3countrycode" "Schweiz"
testcmp "" "iso3countrycode" "Simbabwe" # should fail testcmp "" "iso3countrycode" "Simbabwe" # should fail
} }
#~ testiso3 testiso3
exit 0;
# target filename
tn="dhl-cp1252.csv"
# todo: download file (its UTF-8 encoded)
fn="dhl.csv"
# backup
cp -f "${fn}" "${fn}.bak"
# replace special spaces and hyphens
sed -i -e 's/[\u00A0\u202F[:space:]]+/ /g' -e 's/[\u2010\u2011\u2012\u2013\u002D]+/-/g' -e 's/[\u00AD\uFEFF]+//g' "$fn"
# convert encoding from UTF-8 to CP1252 "Windows" # convert encoding from UTF-8 to CP1252 "Windows"
# another possibility for conversion? seems not to be relyable: iconv -o "$of" -f UTF-8 -t CP1252 "$if" # another possibility for conversion? seems not to be relyable: iconv -o "$of" -f UTF-8 -t CP1252 "$if"