check for file format added
This commit is contained in:
parent
9b95fa13e7
commit
03322bd398
24
dhl.sh
24
dhl.sh
|
@ -1,5 +1,5 @@
|
|||
#!/usr/bin/env sh
|
||||
dependencies="sed test cp libreoffice expr cut"
|
||||
dependencies="sed test cp libreoffice expr cut file"
|
||||
|
||||
# target filename
|
||||
tn1="dhl-cp1252-n.csv"
|
||||
|
@ -170,30 +170,42 @@ runtests () {
|
|||
}
|
||||
runtests
|
||||
|
||||
exit 0
|
||||
|
||||
# untested:
|
||||
|
||||
# todo: download file (its UTF-8 encoded)
|
||||
|
||||
fn="dhl.csv"
|
||||
# backup
|
||||
cp -f "$fn" "${fn}.bak"
|
||||
|
||||
exit 0;
|
||||
|
||||
|
||||
# replace special spaces and hyphens
|
||||
sed -i -e 's/[\u00A0\u202F[:space:]]+/ /g' -e 's/[\u2010\u2011\u2012\u2013\u002D]+/-/g' -e 's/[\u00AD\uFEFF]+//g' "$fn"
|
||||
# todo: add quote variants
|
||||
# todo: check for delimiters that may conflict during conversion
|
||||
|
||||
# convert encoding from UTF-8 to CP1252 "Windows"
|
||||
# another possibility for conversion? seems not to be relyable: iconv -o "$of" -f UTF-8 -t CP1252 "$if"
|
||||
# convert encoding from UTF-8 to CP1252 / WINDOWS-1252
|
||||
# another possibility for conversion? seems not to be relyable: iconv -o "$of" -f UTF-8 -t WINDOWS-1252 "$if"
|
||||
convcsv () {
|
||||
if test $# -eq 0 ; then echo "argument missing."; exit 1; fi
|
||||
if test -e $1 ; then echo "file already exists."; exit 2; fi
|
||||
if test $# -eq 0 ; then >&2 echo "argument missing."; exit 1; fi
|
||||
# doc for headless conversion options https://wiki.openoffice.org/wiki/Documentation/DevGuide/Spreadsheets/Filter_Options
|
||||
if="${1}"
|
||||
format=$(file ${if}|cut -d' ' -f2)
|
||||
if test "$format" != "UTF-8" ; then >&2 echo "source file encoding not matching the expected UTF-8."; fi
|
||||
of=$(echo "${if}"|rev|cut -d. -f2-|rev)".ods"
|
||||
if test -e $of ; then >&2 echo "output file already exists."; exit 2; fi
|
||||
#~ iconv -o "$of" -f $format -t WINDOWS-1252 "$if"
|
||||
# to take care of more options in csv at the same time, convert to ods and back to csv
|
||||
libreoffice --headless --convert-to ods --infilter=CSV:59,34,76,1 "${if}" && {
|
||||
mv "${if}" "${if}.bak"
|
||||
libreoffice --headless --infilter=CSV:59,34,76,1 --convert-to csv:"Text - txt - csv (StarCalc)":"44,34,1,0,1,,0" "${tf}" && {
|
||||
rm "${tf}" "${if}.bak"
|
||||
}
|
||||
}
|
||||
format=$(file ${if}|cut -d' ' -f2)
|
||||
if test "$format" != "Non-ISO" ; then >&2 echo "output file encoding not matching the expected Non-ISO, its $format."; fi
|
||||
}
|
||||
convcsv "$fn"
|
||||
|
|
Loading…
Reference in New Issue