check for file format added
This commit is contained in:
parent
9b95fa13e7
commit
03322bd398
24
dhl.sh
24
dhl.sh
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env sh
|
#!/usr/bin/env sh
|
||||||
dependencies="sed test cp libreoffice expr cut"
|
dependencies="sed test cp libreoffice expr cut file"
|
||||||
|
|
||||||
# target filename
|
# target filename
|
||||||
tn1="dhl-cp1252-n.csv"
|
tn1="dhl-cp1252-n.csv"
|
||||||
|
@ -170,30 +170,42 @@ runtests () {
|
||||||
}
|
}
|
||||||
runtests
|
runtests
|
||||||
|
|
||||||
|
exit 0
|
||||||
|
|
||||||
|
# untested:
|
||||||
|
|
||||||
# todo: download file (its UTF-8 encoded)
|
# todo: download file (its UTF-8 encoded)
|
||||||
|
|
||||||
fn="dhl.csv"
|
fn="dhl.csv"
|
||||||
# backup
|
# backup
|
||||||
cp -f "$fn" "${fn}.bak"
|
cp -f "$fn" "${fn}.bak"
|
||||||
|
|
||||||
exit 0;
|
|
||||||
|
|
||||||
# replace special spaces and hyphens
|
# replace special spaces and hyphens
|
||||||
sed -i -e 's/[\u00A0\u202F[:space:]]+/ /g' -e 's/[\u2010\u2011\u2012\u2013\u002D]+/-/g' -e 's/[\u00AD\uFEFF]+//g' "$fn"
|
sed -i -e 's/[\u00A0\u202F[:space:]]+/ /g' -e 's/[\u2010\u2011\u2012\u2013\u002D]+/-/g' -e 's/[\u00AD\uFEFF]+//g' "$fn"
|
||||||
|
# todo: add quote variants
|
||||||
|
# todo: check for delimiters that may conflict during conversion
|
||||||
|
|
||||||
# convert encoding from UTF-8 to CP1252 "Windows"
|
# convert encoding from UTF-8 to CP1252 / WINDOWS-1252
|
||||||
# another possibility for conversion? seems not to be relyable: iconv -o "$of" -f UTF-8 -t CP1252 "$if"
|
# another possibility for conversion? seems not to be relyable: iconv -o "$of" -f UTF-8 -t WINDOWS-1252 "$if"
|
||||||
convcsv () {
|
convcsv () {
|
||||||
if test $# -eq 0 ; then echo "argument missing."; exit 1; fi
|
if test $# -eq 0 ; then >&2 echo "argument missing."; exit 1; fi
|
||||||
if test -e $1 ; then echo "file already exists."; exit 2; fi
|
|
||||||
# doc for headless conversion options https://wiki.openoffice.org/wiki/Documentation/DevGuide/Spreadsheets/Filter_Options
|
# doc for headless conversion options https://wiki.openoffice.org/wiki/Documentation/DevGuide/Spreadsheets/Filter_Options
|
||||||
if="${1}"
|
if="${1}"
|
||||||
|
format=$(file ${if}|cut -d' ' -f2)
|
||||||
|
if test "$format" != "UTF-8" ; then >&2 echo "source file encoding not matching the expected UTF-8."; fi
|
||||||
of=$(echo "${if}"|rev|cut -d. -f2-|rev)".ods"
|
of=$(echo "${if}"|rev|cut -d. -f2-|rev)".ods"
|
||||||
|
if test -e $of ; then >&2 echo "output file already exists."; exit 2; fi
|
||||||
|
#~ iconv -o "$of" -f $format -t WINDOWS-1252 "$if"
|
||||||
|
# to take care of more options in csv at the same time, convert to ods and back to csv
|
||||||
libreoffice --headless --convert-to ods --infilter=CSV:59,34,76,1 "${if}" && {
|
libreoffice --headless --convert-to ods --infilter=CSV:59,34,76,1 "${if}" && {
|
||||||
mv "${if}" "${if}.bak"
|
mv "${if}" "${if}.bak"
|
||||||
libreoffice --headless --infilter=CSV:59,34,76,1 --convert-to csv:"Text - txt - csv (StarCalc)":"44,34,1,0,1,,0" "${tf}" && {
|
libreoffice --headless --infilter=CSV:59,34,76,1 --convert-to csv:"Text - txt - csv (StarCalc)":"44,34,1,0,1,,0" "${tf}" && {
|
||||||
rm "${tf}" "${if}.bak"
|
rm "${tf}" "${if}.bak"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
format=$(file ${if}|cut -d' ' -f2)
|
||||||
|
if test "$format" != "Non-ISO" ; then >&2 echo "output file encoding not matching the expected Non-ISO, its $format."; fi
|
||||||
}
|
}
|
||||||
convcsv "$fn"
|
convcsv "$fn"
|
||||||
|
|
Loading…
Reference in New Issue