stringcrop
This commit is contained in:
parent
d99d3ffc62
commit
ed1772e76d
65
dhl.sh
65
dhl.sh
|
@ -1,5 +1,5 @@
|
|||
#!/usr/bin/env sh
|
||||
dependencies="sed test cp libreoffice expr"
|
||||
dependencies="sed test cp libreoffice expr cut"
|
||||
assert_tools () {
|
||||
while test $# -gt 0; do
|
||||
which $1 >/dev/null 2>/dev/null || {
|
||||
|
@ -16,27 +16,32 @@ testcmp () {
|
|||
if test "$#" -eq 0 ; then
|
||||
echo "arguments missing."; exit 1
|
||||
fi
|
||||
# whitelisted functions
|
||||
case $2 in
|
||||
#whitelisted fn
|
||||
chklength)
|
||||
# 2 arg functions
|
||||
chklength|stringcrop)
|
||||
expect="$1"; shift
|
||||
fn="$1"; shift
|
||||
result=$("$fn" "$1" "$2")
|
||||
if test "$result" = "$expect" ; then
|
||||
echo "$fn passed for: $1 $2."
|
||||
else
|
||||
echo "$fn failed for: $1 $2."
|
||||
fi
|
||||
while test "$#" -ge "2"; do
|
||||
result=$("$fn" "$1" "$2")
|
||||
if test "$result" = "$expect" ; then
|
||||
echo "passed $fn for: $1 $2."
|
||||
else
|
||||
echo "failed $fn for: $1 $2."
|
||||
fi
|
||||
shift 2
|
||||
done
|
||||
;;
|
||||
# 1 arg functions
|
||||
iso3countrycode)
|
||||
expect="$1"; shift
|
||||
fn="$1"; shift
|
||||
for a in "$@"; do
|
||||
result=$("$fn" "$a")
|
||||
if test "$result" = "$expect" ; then
|
||||
echo "$fn passed for: $a."
|
||||
echo "passed $fn for: $a."
|
||||
else
|
||||
echo "$fn failed for: $a."
|
||||
echo "failed $fn for: $a."
|
||||
fi
|
||||
done
|
||||
;;
|
||||
|
@ -53,26 +58,21 @@ chklength () {
|
|||
}
|
||||
|
||||
testchklength () {
|
||||
chklength "3" "123"
|
||||
testcmp "true" "chklength" "3" "123"
|
||||
testcmp "3" "chklength" "2" "1-2"
|
||||
}
|
||||
testchklength
|
||||
|
||||
# target filename
|
||||
tn="dhl-cp1252.csv"
|
||||
stringcrop () {
|
||||
echo "$1" | cut -c1-"$2"
|
||||
}
|
||||
|
||||
# todo: download file (its UTF-8 encoded)
|
||||
teststringcrop () {
|
||||
testcmp "string" "stringcrop" "string too long" "6"
|
||||
}
|
||||
teststringcrop
|
||||
|
||||
fn="dhl.csv"
|
||||
# backup
|
||||
cp -f "${fn}" "${fn}.bak"
|
||||
|
||||
# replace special spaces and hyphens
|
||||
sed -i -e 's/[\u00A0\u202F[:space:]]+/ /g' -e 's/[\u2010\u2011\u2012\u2013\u002D]+/-/g' -e 's/[\u00AD\uFEFF]+//g' "$fn"
|
||||
|
||||
# todo: validate length for entries
|
||||
|
||||
# todo: convert country to countrycode as in ISO3
|
||||
# convert country to countrycode as in ISO3
|
||||
iso3countrycode () {
|
||||
if test "$#" -eq 0 ; then
|
||||
echo "argument missing. ($0)"
|
||||
|
@ -103,7 +103,20 @@ testiso3 () {
|
|||
testcmp "CHE" "iso3countrycode" "Schweiz"
|
||||
testcmp "" "iso3countrycode" "Simbabwe" # should fail
|
||||
}
|
||||
#~ testiso3
|
||||
testiso3
|
||||
exit 0;
|
||||
|
||||
# target filename
|
||||
tn="dhl-cp1252.csv"
|
||||
|
||||
# todo: download file (its UTF-8 encoded)
|
||||
|
||||
fn="dhl.csv"
|
||||
# backup
|
||||
cp -f "${fn}" "${fn}.bak"
|
||||
|
||||
# replace special spaces and hyphens
|
||||
sed -i -e 's/[\u00A0\u202F[:space:]]+/ /g' -e 's/[\u2010\u2011\u2012\u2013\u002D]+/-/g' -e 's/[\u00AD\uFEFF]+//g' "$fn"
|
||||
|
||||
# convert encoding from UTF-8 to CP1252 "Windows"
|
||||
# another possibility for conversion? seems not to be relyable: iconv -o "$of" -f UTF-8 -t CP1252 "$if"
|
||||
|
|
Loading…
Reference in New Issue