stringcrop
This commit is contained in:
parent
d99d3ffc62
commit
ed1772e76d
65
dhl.sh
65
dhl.sh
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env sh
|
#!/usr/bin/env sh
|
||||||
dependencies="sed test cp libreoffice expr"
|
dependencies="sed test cp libreoffice expr cut"
|
||||||
assert_tools () {
|
assert_tools () {
|
||||||
while test $# -gt 0; do
|
while test $# -gt 0; do
|
||||||
which $1 >/dev/null 2>/dev/null || {
|
which $1 >/dev/null 2>/dev/null || {
|
||||||
|
@ -16,27 +16,32 @@ testcmp () {
|
||||||
if test "$#" -eq 0 ; then
|
if test "$#" -eq 0 ; then
|
||||||
echo "arguments missing."; exit 1
|
echo "arguments missing."; exit 1
|
||||||
fi
|
fi
|
||||||
|
# whitelisted functions
|
||||||
case $2 in
|
case $2 in
|
||||||
#whitelisted fn
|
# 2 arg functions
|
||||||
chklength)
|
chklength|stringcrop)
|
||||||
expect="$1"; shift
|
expect="$1"; shift
|
||||||
fn="$1"; shift
|
fn="$1"; shift
|
||||||
result=$("$fn" "$1" "$2")
|
while test "$#" -ge "2"; do
|
||||||
if test "$result" = "$expect" ; then
|
result=$("$fn" "$1" "$2")
|
||||||
echo "$fn passed for: $1 $2."
|
if test "$result" = "$expect" ; then
|
||||||
else
|
echo "passed $fn for: $1 $2."
|
||||||
echo "$fn failed for: $1 $2."
|
else
|
||||||
fi
|
echo "failed $fn for: $1 $2."
|
||||||
|
fi
|
||||||
|
shift 2
|
||||||
|
done
|
||||||
;;
|
;;
|
||||||
|
# 1 arg functions
|
||||||
iso3countrycode)
|
iso3countrycode)
|
||||||
expect="$1"; shift
|
expect="$1"; shift
|
||||||
fn="$1"; shift
|
fn="$1"; shift
|
||||||
for a in "$@"; do
|
for a in "$@"; do
|
||||||
result=$("$fn" "$a")
|
result=$("$fn" "$a")
|
||||||
if test "$result" = "$expect" ; then
|
if test "$result" = "$expect" ; then
|
||||||
echo "$fn passed for: $a."
|
echo "passed $fn for: $a."
|
||||||
else
|
else
|
||||||
echo "$fn failed for: $a."
|
echo "failed $fn for: $a."
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
;;
|
;;
|
||||||
|
@ -53,26 +58,21 @@ chklength () {
|
||||||
}
|
}
|
||||||
|
|
||||||
testchklength () {
|
testchklength () {
|
||||||
chklength "3" "123"
|
|
||||||
testcmp "true" "chklength" "3" "123"
|
testcmp "true" "chklength" "3" "123"
|
||||||
testcmp "3" "chklength" "2" "1-2"
|
testcmp "3" "chklength" "2" "1-2"
|
||||||
}
|
}
|
||||||
|
testchklength
|
||||||
|
|
||||||
# target filename
|
stringcrop () {
|
||||||
tn="dhl-cp1252.csv"
|
echo "$1" | cut -c1-"$2"
|
||||||
|
}
|
||||||
|
|
||||||
# todo: download file (its UTF-8 encoded)
|
teststringcrop () {
|
||||||
|
testcmp "string" "stringcrop" "string too long" "6"
|
||||||
|
}
|
||||||
|
teststringcrop
|
||||||
|
|
||||||
fn="dhl.csv"
|
# convert country to countrycode as in ISO3
|
||||||
# backup
|
|
||||||
cp -f "${fn}" "${fn}.bak"
|
|
||||||
|
|
||||||
# replace special spaces and hyphens
|
|
||||||
sed -i -e 's/[\u00A0\u202F[:space:]]+/ /g' -e 's/[\u2010\u2011\u2012\u2013\u002D]+/-/g' -e 's/[\u00AD\uFEFF]+//g' "$fn"
|
|
||||||
|
|
||||||
# todo: validate length for entries
|
|
||||||
|
|
||||||
# todo: convert country to countrycode as in ISO3
|
|
||||||
iso3countrycode () {
|
iso3countrycode () {
|
||||||
if test "$#" -eq 0 ; then
|
if test "$#" -eq 0 ; then
|
||||||
echo "argument missing. ($0)"
|
echo "argument missing. ($0)"
|
||||||
|
@ -103,7 +103,20 @@ testiso3 () {
|
||||||
testcmp "CHE" "iso3countrycode" "Schweiz"
|
testcmp "CHE" "iso3countrycode" "Schweiz"
|
||||||
testcmp "" "iso3countrycode" "Simbabwe" # should fail
|
testcmp "" "iso3countrycode" "Simbabwe" # should fail
|
||||||
}
|
}
|
||||||
#~ testiso3
|
testiso3
|
||||||
|
exit 0;
|
||||||
|
|
||||||
|
# target filename
|
||||||
|
tn="dhl-cp1252.csv"
|
||||||
|
|
||||||
|
# todo: download file (its UTF-8 encoded)
|
||||||
|
|
||||||
|
fn="dhl.csv"
|
||||||
|
# backup
|
||||||
|
cp -f "${fn}" "${fn}.bak"
|
||||||
|
|
||||||
|
# replace special spaces and hyphens
|
||||||
|
sed -i -e 's/[\u00A0\u202F[:space:]]+/ /g' -e 's/[\u2010\u2011\u2012\u2013\u002D]+/-/g' -e 's/[\u00AD\uFEFF]+//g' "$fn"
|
||||||
|
|
||||||
# convert encoding from UTF-8 to CP1252 "Windows"
|
# convert encoding from UTF-8 to CP1252 "Windows"
|
||||||
# another possibility for conversion? seems not to be relyable: iconv -o "$of" -f UTF-8 -t CP1252 "$if"
|
# another possibility for conversion? seems not to be relyable: iconv -o "$of" -f UTF-8 -t CP1252 "$if"
|
||||||
|
|
Loading…
Reference in New Issue