#!/usr/bin/env sh # target filename tn="dhl-ds-versendung.csv" # todo: download file (its UTF-8 encoded) fn="dhl-ds-versendung-3.csv" # backup cp -f "$fn" "dhl.csv" # replace special spaces and hyphens sed -i -e 's/[\u00A0\u202F[:space:]]+/ /g' -e 's/[\u2010\u2011\u2012\u2013\u002D]+/-/g' -e 's/[\u00AD\uFEFF]+//g' "$fn" # todo: convert encoding from UTF-8 to CP1252 "Windows" # a possibility for conversion? seems not to be reliant #~ iconv -o "$tn" -f UTF-8 -t CP1252 "$fn" # todo: validate length for entries # todo: convert country to countrycode as in ISO3 iso3countrycode () { if test "$#" -eq 0 ; then echo "argument missing. ($0)" else case "$1" in Belgien) echo "BEL";; Deutschland) echo "DEU";; England) echo "GBR";; Luxemburg|Luxembourg) echo "LUX";; Niederlande) echo "NLD";; Österreich) echo "AUT";; Schweiz) echo "CHE";; Ungarn) echo "HUN";; *) echo "unknown country: $1";exit 1;; esac fi } testiso3 () { # covering data in https://doku.ccc.de/index.php?title=Attribut:Chaostreff-Country&limit=500&offset=0 iso3countrycode "Belgien" iso3countrycode "Deutschland" iso3countrycode "England" iso3countrycode "Luxembourg" iso3countrycode "Luxemburg" iso3countrycode "Niederlande" iso3countrycode "Österreich" iso3countrycode "Ungarn" iso3countrycode "Schweiz" iso3countrycode "Simbabwe" # should fail } testiso3