diff --git a/dhl.sh b/dhl.sh index c7b0aed..67abd8f 100755 --- a/dhl.sh +++ b/dhl.sh @@ -1,5 +1,5 @@ #!/usr/bin/env sh -dependencies="sed test cp libreoffice expr cut" +dependencies="sed test cp libreoffice expr cut file" # target filename tn1="dhl-cp1252-n.csv" @@ -170,30 +170,42 @@ runtests () { } runtests +exit 0 + +# untested: + # todo: download file (its UTF-8 encoded) fn="dhl.csv" # backup cp -f "$fn" "${fn}.bak" -exit 0; + # replace special spaces and hyphens sed -i -e 's/[\u00A0\u202F[:space:]]+/ /g' -e 's/[\u2010\u2011\u2012\u2013\u002D]+/-/g' -e 's/[\u00AD\uFEFF]+//g' "$fn" +# todo: add quote variants +# todo: check for delimiters that may conflict during conversion -# convert encoding from UTF-8 to CP1252 "Windows" -# another possibility for conversion? seems not to be relyable: iconv -o "$of" -f UTF-8 -t CP1252 "$if" +# convert encoding from UTF-8 to CP1252 / WINDOWS-1252 +# another possibility for conversion? seems not to be relyable: iconv -o "$of" -f UTF-8 -t WINDOWS-1252 "$if" convcsv () { - if test $# -eq 0 ; then echo "argument missing."; exit 1; fi - if test -e $1 ; then echo "file already exists."; exit 2; fi + if test $# -eq 0 ; then >&2 echo "argument missing."; exit 1; fi # doc for headless conversion options https://wiki.openoffice.org/wiki/Documentation/DevGuide/Spreadsheets/Filter_Options if="${1}" + format=$(file ${if}|cut -d' ' -f2) + if test "$format" != "UTF-8" ; then >&2 echo "source file encoding not matching the expected UTF-8."; fi of=$(echo "${if}"|rev|cut -d. -f2-|rev)".ods" + if test -e $of ; then >&2 echo "output file already exists."; exit 2; fi + #~ iconv -o "$of" -f $format -t WINDOWS-1252 "$if" + # to take care of more options in csv at the same time, convert to ods and back to csv libreoffice --headless --convert-to ods --infilter=CSV:59,34,76,1 "${if}" && { mv "${if}" "${if}.bak" libreoffice --headless --infilter=CSV:59,34,76,1 --convert-to csv:"Text - txt - csv (StarCalc)":"44,34,1,0,1,,0" "${tf}" && { rm "${tf}" "${if}.bak" } } + format=$(file ${if}|cut -d' ' -f2) + if test "$format" != "Non-ISO" ; then >&2 echo "output file encoding not matching the expected Non-ISO, its $format."; fi } convcsv "$fn"