check for file format added

This commit is contained in:
vv01f 2019-12-20 14:30:21 +01:00
parent 9b95fa13e7
commit 03322bd398
Signed by untrusted user who does not match committer: vv01f
GPG Key ID: 02625A16AC1D1FF6
1 changed files with 18 additions and 6 deletions

24
dhl.sh
View File

@ -1,5 +1,5 @@
#!/usr/bin/env sh
dependencies="sed test cp libreoffice expr cut"
dependencies="sed test cp libreoffice expr cut file"
# target filename
tn1="dhl-cp1252-n.csv"
@ -170,30 +170,42 @@ runtests () {
}
runtests
exit 0
# untested:
# todo: download file (its UTF-8 encoded)
fn="dhl.csv"
# backup
cp -f "$fn" "${fn}.bak"
exit 0;
# replace special spaces and hyphens
sed -i -e 's/[\u00A0\u202F[:space:]]+/ /g' -e 's/[\u2010\u2011\u2012\u2013\u002D]+/-/g' -e 's/[\u00AD\uFEFF]+//g' "$fn"
# todo: add quote variants
# todo: check for delimiters that may conflict during conversion
# convert encoding from UTF-8 to CP1252 "Windows"
# another possibility for conversion? seems not to be relyable: iconv -o "$of" -f UTF-8 -t CP1252 "$if"
# convert encoding from UTF-8 to CP1252 / WINDOWS-1252
# another possibility for conversion? seems not to be relyable: iconv -o "$of" -f UTF-8 -t WINDOWS-1252 "$if"
convcsv () {
if test $# -eq 0 ; then echo "argument missing."; exit 1; fi
if test -e $1 ; then echo "file already exists."; exit 2; fi
if test $# -eq 0 ; then >&2 echo "argument missing."; exit 1; fi
# doc for headless conversion options https://wiki.openoffice.org/wiki/Documentation/DevGuide/Spreadsheets/Filter_Options
if="${1}"
format=$(file ${if}|cut -d' ' -f2)
if test "$format" != "UTF-8" ; then >&2 echo "source file encoding not matching the expected UTF-8."; fi
of=$(echo "${if}"|rev|cut -d. -f2-|rev)".ods"
if test -e $of ; then >&2 echo "output file already exists."; exit 2; fi
#~ iconv -o "$of" -f $format -t WINDOWS-1252 "$if"
# to take care of more options in csv at the same time, convert to ods and back to csv
libreoffice --headless --convert-to ods --infilter=CSV:59,34,76,1 "${if}" && {
mv "${if}" "${if}.bak"
libreoffice --headless --infilter=CSV:59,34,76,1 --convert-to csv:"Text - txt - csv (StarCalc)":"44,34,1,0,1,,0" "${tf}" && {
rm "${tf}" "${if}.bak"
}
}
format=$(file ${if}|cut -d' ' -f2)
if test "$format" != "Non-ISO" ; then >&2 echo "output file encoding not matching the expected Non-ISO, its $format."; fi
}
convcsv "$fn"