ds-rss/gen.sh

256 lines
8.1 KiB
Bash
Raw Normal View History

2019-05-07 09:17:09 +02:00
#!/usr/bin/env sh
#~ set -x
assert_tools () {
2019-09-17 16:34:04 +02:00
err=0
while test $# -gt 0; do
2020-07-29 21:27:58 +02:00
command -v "$1" >/dev/null 2>/dev/null || {
>&2 printf "tool missing: $1\n"
2019-09-17 16:34:04 +02:00
err=$(( $err + 1 ))
}
shift
done
2019-09-17 16:34:04 +02:00
test $err -eq 0 || exit $err
2019-06-13 04:09:43 +02:00
}
assert_source () {
dependencies="test printf"
assert_tools ${dependencies}
test $# -gt 0 && {
datafn="$1";
} || {
>&2 printf "option for data source missing.\n";
datafn="data.xml";
>&2 printf "assuming fallback '${datafn}'.\n";
}
err=0
while test "${err}" -lt 2; do
test -f "$datafn" 2>/dev/null && { >&2 printf "from ${datafn} …\n"; err=0; break; } || {
>&2 printf "failed.\nfile '${datafn}' does not exist.\n"
err=$(( $err + 1 ));
datafn="data.xml";
}
done
test $err -eq 0 && { printf "$datafn"; exit 0; } || exit $err
}
build () {
datafn="$1"
2021-04-02 18:09:38 +02:00
dependencies="xsltproc date"
assert_tools ${dependencies}
>&2 printf "building ds-feed.xml … "
2021-04-02 18:09:38 +02:00
# --stringparam current-date `date +%Y-%m-%d` # possibility for current date-time-group via XSLT <xsl:value-of select="$current-date"/>
xsltproc --stringparam current-date "$(date --rfc-email)" --encoding utf-8 -o ds-feed.xml rss.xsl $datafn && { >&2 printf "done.\n"; } || { >&2 printf "failed on rss.xsl.\n"; exit 1; }
>&2 printf "building download.html … "
xsltproc --encoding iso-8859-1 -o download.html download.xsl $datafn && { >&2 printf "done.\n"; } || { >&2 printf "failed on download.xsl.\n"; exit 1; }
>&2 printf "building index.html … "
xsltproc --encoding utf-8 -o index.html index.xsl $datafn && { >&2 printf "done.\n"; } || { >&2 printf "failed on index.xsl.\n"; exit 1; }
exit 0
}
sortxml () {
#~ test "$1" = "sortxml" && {
dependencies="xsltproc"
assert_tools ${dependencies}
xsltproc --encoding utf-8 -o sorted.xml sort.xsl data.xml
mv -i sorted.xml data.xml
}
2019-09-17 16:34:04 +02:00
linkcheck () {
dependencies="curl uuid"
assert_tools ${dependencies}
tmpfile="/tmp/"$(uuid); curl -k https://ds.ccc.de/download.html > $tmpfile
for f in download.html ${tmpfile} ; do
2019-09-17 16:34:04 +02:00
test -e $f || continue
for e in $(cat ${f}|sed 's/<a /\n<a /g'|grep -i " href="|cut -d"=" -f2|cut -d'"' -f2); do
status=$(curl -k --max-time 1 --head --silent --output /dev/null --write-out "%{http_code}" $e )
test $status -eq 200 && >&2 printf ${e}" "${status}" OK\n" || >&2 printf ${e}" "${status}" FAILED\n"
2019-09-17 16:34:04 +02:00
done
done
}
mirror_website () {
dependencies="wget"
assert_tools ${dependencies}
local url=$@
local domain=`expr "$url" : '^http[s]*://\([^/?]*\)'`
wget \
--no-check-certificate \
--recursive \
--no-clobber \
--page-requisites \
--html-extension \
--convert-links \
--restrict-file-names=windows \
--domains $domain \
--no-parent \
$url
}
2019-09-17 16:34:04 +02:00
newdataentry () {
dependencies="cat grep head cut rev date printf"
2019-09-17 16:34:04 +02:00
assert_tools ${dependencies}
datafn=$(assert_source $1) && {
if test "$datafn" = "$1" ; then
shift
fi
} || {
>&2 printf "file '${datafn}' not found."; err=1; exit 1;
}
2019-09-17 16:34:04 +02:00
err=0
customdir=$(pwd)
2019-09-17 16:34:04 +02:00
fl=""
>&2 printf "determining issue number based on '${datafn}'.\n"
lastentry=$(grep -i "<schleuder id=" ${datafn} | head -1 | cut -d'"' -f2)
#~ lastentry=$(grep "<schleuder id=" ${datafn} | cut -d'"' -f2 | sort | tail -1)
newentry=$(( $lastentry + 1 ))
# todo: switches
#~ for o in "$@"; do
# assigned parameters with equal sign
#~ on=$(printf $o|cut -d"=" -f1)
#~ ov=$(printf $o|cut -d"=" -f2-)
#~ case $on in
# custom data directory for availability test and filesize, default pwd
#~ c|customdir) ;;
# custom issue, default next after top entry in ${datafn}
#~ i|issue) ;;
# custom timestamp, default: current time
#~ d|datetime) ;;
# custom teaser xml-text, default: none
#~ t|teaser) ;;
# custom fs for files
#~ p|pdffs) ;;
#~ e|epubfs) ;;
# help text
#~ h|help)
#~ *) ;;
#~ esac
# or without equal sign …
#~ case $o in
#~ c|customdir) ;;
#~ i|issue) ;;
#~ esac
#~ done
test $# -gt 0 && test -d $1 && {
customdir=$1
>&2 printf "custom base directory for files such as jpg, pdf, epub: "${customdir}".\n\n"
shift
}
test $# -gt 0 && test "$1" = "help" && {
bold="\033[1m"
nobold="\033[0m"
printf "\n${bold}datenschleuder release xml snippet generation${nobold}\nprints a new data entry for a release of datenschleuder\n\n"
printf "release [options]\n\n"
printf "${bold}option\t\tdescription${nobold}\n\n"
printf "directory\tcustom data directory for availability test and filesize, default pwd\n\n"
printf "number\t\tprint entry for release with custom number, default is the increment of the top entry in ${datafn}\n\n"
printf "date\t\tuse a unix timestamp for a custom date\n\n"
printf "teaser\t\tinclude teaser xml-message as last argument(s)\n\n"
printf "${bold}full example:${nobold} '\$./gen.sh release 99 1568505600 Die Ausgabe 99 ist fertig.'\n\n"
exit 1
}
2>/dev/null test $# -gt 0 && test $(( $1 * 1 )) -eq $1 && {
newentry=$1; shift;
>&2 printf "custom issue number '${newentry}' for '${datafn}'.\n"
2019-09-17 16:34:04 +02:00
} || {
printf "next argument not a number. assuming issue based on top entry.\n"
2019-09-17 16:34:04 +02:00
}
2>/dev/null test $# -gt 0 && test $(( $1 * 1 )) -eq $1 && {
>&2 printf "custom datetime.\n"
datetime="$1"; shift;
} || { printf "next argument not a number. using actual system time.\n"; datetime=0; } # remaining arguments should be teaser xml-message.
printf "\n<schleuder id=\"$newentry\">\n"
# todo: switch for dtg, integrate custom datetime input early
# or simply implement xslt for handling ISO and nothing else for input
# RFC-5322-Format without seconds and TZ
dtg="TZ=:UTC LANG=en_US.utf8 date -u -R |rev|cut -d\":\" -f2-|rev"; tz=" UT"
# ISO dtg as issues may provide datetime-data on months but mostly not on days …
#~ dtg="TZ=:UTC LANG=en_US.utf8 date +%Y-%m-%d %H:%M %Z"; tz=""
test $datetime -eq 0 && {
datetime=$(TZ=:UTC LANG=en_US.utf8 date -u -R |rev|cut -d":" -f2-|rev) #
2019-09-17 16:34:04 +02:00
} || {
#~ dtg="TZ=:UTC LANG=en_US.utf8 date -d @${datetime} +\"%Y-%m-%d %H:%M %Z\""; tz=""
dtg="TZ=:UTC LANG=en_US.utf8 date -d @${datetime} -u -R |rev|cut -d\":\" -f2-|rev"; tz=" UT"
#~ datetime="$(TZ=:UTC LANG=en_US.utf8 date -d @${datetime} -u -R|rev|cut -d":" -f2-|rev)"
# TZ=:UTC LANG=en_US.utf8 date -d @${datetime} +"%Y-%m-%d %H:%M %Z"
2019-09-17 16:34:04 +02:00
}
datetime="$(sh -c "${dtg}")"
printf "\t<date>${datetime}${tz}</date>\n" # custom format universal time
2019-09-17 16:34:04 +02:00
fn="ds"$(printf "%03d" $newentry)
for d in covers pdfs epubs ; do
2019-09-17 16:34:04 +02:00
case "$d" in
covers) ffn=${d}"/"${fn}".jpg"; cover=$ffn ;;
pdfs) ffn=${d}"/"${fn}".pdf" ;;
epubs) ffn=${d}"/"${fn}".epub" ;;
#~ *) printf "file "${ffn}" not found, in "${d}".\n" ;;
2019-09-17 16:34:04 +02:00
esac
#~ printf "$ffn\n"
test -e ${customdir}"/"${ffn} && {
2019-09-17 16:34:04 +02:00
test "$d" = "covers" && {
printf "\t<image>"${cover}"</image>\n"
2019-09-17 16:34:04 +02:00
} || {
fs=$(wc -c <${customdir}"/"${ffn})
printf "\t<link filesize=\"%d\">%s</link>\n" "${fs}" "${ffn}"
2019-09-17 16:34:04 +02:00
}
} || {
mfl=${mfl}" "${ffn}"\n"
#~ printf "file "${ffn}" not found.\n"
2019-09-17 16:34:04 +02:00
err=3
}
done
#~ printf "\t<preface></preface>\n" # not used on server yet
2019-09-17 16:34:04 +02:00
test $# -eq 0 && { teaser="" ; } || {
teaser="$@";
printf "\t<teaser>${teaser}</teaser>\n"
2019-09-17 16:34:04 +02:00
}
printf "</schleuder>\n"
test "$teaser" = "" && { >&2 printf "\nmissing teaser message.\n"; }
test -e ${customdir}"/"$cover || { >&2 printf "\nmissing cover file: ${cover}\n"; }
test $err -gt 0 && { >&2 printf "\nmissing files:\n${mfl}\n"; }
2019-09-17 16:34:04 +02:00
}
newinfoentry () {
>&2 printf "not implemented yet.\n\nwill be similar to 'release'.\n"
2019-09-17 16:34:04 +02:00
}
#~ echo $# $@
if test $# -lt 1 ; then
printf "missing parameter.\n"
exit 1
else
# a mirror is useful for local testing with files
case "$1" in
build)
shift
f=$(assert_source $@) || { exit 1; }
build "${f}"
;;
sortxml)
shift
f=$(assert_source $@) || { exit 1; }
sortxml "${f}"
;;
mirror)
>&2 printf "downloading the website may take quite some time.\n"
mirror_website http://ds.ccc.de/
;;
linkcheck)
linkcheck http://ds.ccc.de/
;;
release)
shift
newdataentry $@
;;
info)
shift
newinfoentry $@
;;
*)
printf "this is not implemented yet.\n"
;;
esac
fi
# generate small size version of pdf, might help e.g. for email attachments
#gs -sDEVICE=pdfwrite -dPDFSETTINGS=/ebook -q -o ds100-smaller.pdf ds100.pdf