ds-rss/gen.sh

238 lines
7.4 KiB
Bash
Raw Normal View History

2019-05-07 09:17:09 +02:00
#!/usr/bin/env sh
#~ set -x
assert_tools () {
2019-09-17 16:34:04 +02:00
err=0
while test $# -gt 0; do
which $1 >/dev/null 2>/dev/null || {
>&2 printf "tool missing: $1\n"
2019-09-17 16:34:04 +02:00
err=$(( $err + 1 ))
}
shift
done
2019-09-17 16:34:04 +02:00
test $err -eq 0 || exit $err
2019-06-13 04:09:43 +02:00
}
assert_source () {
dependencies="test printf"
assert_tools ${dependencies}
test $# -gt 0 && {
datafn="$1";
} || {
>&2 printf "option for data source missing.\n";
datafn="data.xml";
>&2 printf "assuming fallback '${datafn}'.\n";
}
test -f "$datafn" && { >&2 printf "from ${datafn} …\n"; exit 0; } || { >&2 printf "failed.\nfile '${datafn}' does not exist.\n"; exit 1; }
printf "$datafn"
}
build () {
datafn="$1"
dependencies="xsltproc"
assert_tools ${dependencies}
>&2 printf "building ds-feed.xml … "
xsltproc --encoding utf-8 -o ds-feed.xml rss.xsl $datafn && { >&2 printf "done.\n"; } || { >&2 printf "failed on rss.xsl.\n"; exit 1; }
>&2 printf "building download.html … "
xsltproc --encoding iso-8859-1 -o download.html download.xsl $datafn && { >&2 printf "done.\n"; } || { >&2 printf "failed on download.xsl.\n"; exit 1; }
>&2 printf "building index.html … "
xsltproc --encoding utf-8 -o index.html index.xsl $datafn && { >&2 printf "done.\n"; } || { >&2 printf "failed on index.xsl.\n"; exit 1; }
exit 0
}
sortxml () {
#~ test "$1" = "sortxml" && {
dependencies="xsltproc"
assert_tools ${dependencies}
xsltproc --encoding utf-8 -o sorted.xml sort.xsl data.xml
mv -i sorted.xml data.xml
}
2019-09-17 16:34:04 +02:00
linkcheck () {
dependencies="curl uuid"
assert_tools ${dependencies}
tmpfile="/tmp/"$(uuid); curl -k https://ds.ccc.de/download.html > $tmpfile
for f in download.html ${tmpfile} ; do
2019-09-17 16:34:04 +02:00
test -e $f || continue
for e in $(cat ${f}|sed 's/<a /\n<a /g'|grep -i " href="|cut -d"=" -f2|cut -d'"' -f2); do
status=$(curl -k --max-time 1 --head --silent --output /dev/null --write-out "%{http_code}" $e )
test $status -eq 200 && >&2 printf ${e}" "${status}" OK\n" || >&2 printf ${e}" "${status}" FAILED\n"
2019-09-17 16:34:04 +02:00
done
done
}
mirror_website () {
dependencies="wget"
assert_tools ${dependencies}
local url=$@
local domain=`expr "$url" : '^http[s]*://\([^/?]*\)'`
wget \
--no-check-certificate \
--recursive \
--no-clobber \
--page-requisites \
--html-extension \
--convert-links \
--restrict-file-names=windows \
--domains $domain \
--no-parent \
$url
}
2019-09-17 16:34:04 +02:00
newdataentry () {
dependencies="cat grep head cut rev date printf"
2019-09-17 16:34:04 +02:00
assert_tools ${dependencies}
#~ datafn=$(assert_source $1) && { shift; } || { >&2 printf "file '${datafn}' not found."; err=1; exit 1; }
datafn="data.xml"
2019-09-17 16:34:04 +02:00
err=0
customdir=$(pwd)
2019-09-17 16:34:04 +02:00
fl=""
>&2 printf "determining issue number based on '${datafn}'.\n"
lastentry=$(grep -i "<schleuder id=" ${datafn} | head -1 | cut -d'"' -f2)
#~ lastentry=$(grep "<schleuder id=" ${datafn} | cut -d'"' -f2 | sort | tail -1)
newentry=$(( $lastentry + 1 ))
# todo: switches
#~ for o in "$@"; do
#~ on=$(printf $o|cut -d"=" -f1)
#~ ov=$(printf $o|cut -d"=" -f2-)
#~ case $on in
# custom data directory for availability test and filesize, default pwd
#~ c)
#~ customdir) ;;
# custom issue, default next after top entry in ${datafn}
#~ i)
#~ issue) ;;
# custom timestamp, default: current time
#~ d)
#~ datetime) ;;
# custom teaser xml-text, default: none
#~ t)
#~ teaser) ;;
# custom fs for files
#~ p)
#~ pdffs) ;;
#~ e)
#~ epubfs) ;;
# help text
#~ h)
#~ help)
#~ *) ;;
#~ esac
#~ done
test -d $1 && {
customdir=$1; shift
>&2 printf "custom base directory for files such as jpg, pdf, epub: "${customdir}".\n\n"
}
test "$1" = "help" && {
printf "prints a new data entry for a release of datenschleuder\n\n"
printf "release [options]\n\n"
printf "option\tdescription\n\n"
printf "directory\tcustom data directory for availability test and filesize, default pwd\n\n"
printf "number\tprint entry for release with custom number, default is the increment of the top entry in ${datafn}\n\n"
printf "date\tuse a unix timestamp for a custom date\n\n"
printf "teaser\tinclude teaser xml-message as last argument(s)\n\n"
printf "full example: './gen.sh release 99 1568505600 Die Ausgabe 99 ist fertig.'\n"
exit 1
}
2>/dev/null test $(( $1 * 1 )) -eq $1 && {
newentry=$1; shift;
>&2 printf "custom issue number '${newentry}' for '${datafn}'.\n"
2019-09-17 16:34:04 +02:00
} || {
printf "next argument not a number. assuming issue based on top entry.\n"
2019-09-17 16:34:04 +02:00
}
2>/dev/null test $(( $1 * 1 )) -eq $1 && {
>&2 printf "custom datetime.\n\n"
datetime="$1"; shift;
} || { printf "next argument not a number. using actual system time.\n"; datetime=0; } # remaining arguments should be teaser xml-message.
printf "\nschleuder id=\"$newentry\">\n"
# todo: switch for dtg, integrate custom datetime input early
# or simply implement xslt for handling ISO and nothing else for input
# RFC-5322-Format without seconds and TZ
dtg="TZ=:UTC LANG=en_US.utf8 date -u -R |rev|cut -d\":\" -f2-|rev"; tz=" UT"
# ISO dtg as issues may provide datetime-data on months but mostly not on days …
#~ dtg="TZ=:UTC LANG=en_US.utf8 date +%Y-%m-%d %H:%M %Z"; tz=""
test $datetime -eq 0 && {
datetime=$(TZ=:UTC LANG=en_US.utf8 date -u -R |rev|cut -d":" -f2-|rev) #
2019-09-17 16:34:04 +02:00
} || {
#~ dtg="TZ=:UTC LANG=en_US.utf8 date -d @${datetime} +\"%Y-%m-%d %H:%M %Z\""; tz=""
dtg="TZ=:UTC LANG=en_US.utf8 date -d @${datetime} -u -R |rev|cut -d\":\" -f2-|rev"; tz=" UT"
#~ datetime="$(TZ=:UTC LANG=en_US.utf8 date -d @${datetime} -u -R|rev|cut -d":" -f2-|rev)"
# TZ=:UTC LANG=en_US.utf8 date -d @${datetime} +"%Y-%m-%d %H:%M %Z"
2019-09-17 16:34:04 +02:00
}
datetime="$(sh -c "${dtg}")"
printf "\t<date>${datetime}${tz}</date>\n" # custom format universal time
2019-09-17 16:34:04 +02:00
fn="ds"$(printf "%03d" $newentry)
for d in covers pdfs epubs ; do
2019-09-17 16:34:04 +02:00
case "$d" in
covers) ffn=${d}"/"${fn}".jpg"; cover=$ffn ;;
pdfs) ffn=${d}"/"${fn}".pdf" ;;
epubs) ffn=${d}"/"${fn}".epub" ;;
#~ *) printf "file "${ffn}" not found, in "${d}".\n" ;;
2019-09-17 16:34:04 +02:00
esac
#~ printf "$ffn\n"
test -e ${customdir}"/"${ffn} && {
2019-09-17 16:34:04 +02:00
test "$d" = "covers" && {
printf "\t<image>"${cover}"</image>\n"
2019-09-17 16:34:04 +02:00
} || {
fs=$(stat -c %s ${customdir}"/"${ffn})
printf "\t<link filesize=\""${fs}"\">"${ffn}"</link>\n"
2019-09-17 16:34:04 +02:00
}
} || {
mfl=${mfl}" "${ffn}"\n"
#~ printf "file "${ffn}" not found.\n"
2019-09-17 16:34:04 +02:00
err=3
}
done
#~ printf "\t<preface></preface>\n" # not used on server yet
2019-09-17 16:34:04 +02:00
test $# -eq 0 && { teaser="" ; } || {
teaser="$@";
printf "\t<teaser>"${teaser}"</teaser>\n"
2019-09-17 16:34:04 +02:00
}
printf "</schleuder>\n"
test "$teaser" = "" && { >&2 printf "\nmissing teaser message.\n"; }
test -e ${customdir}"/"$cover || { >&2 printf "\nmissing cover file: "${cover}"\n"; }
test $err -gt 0 && { >&2 printf "\nmissing files:\n"${mfl}"\n"; }
2019-09-17 16:34:04 +02:00
}
newinfoentry () {
>&2 printf "not implemented yet.\n\nwill be similar to 'release'.\n"
2019-09-17 16:34:04 +02:00
}
echo $# $@
if test $# -lt 1 ; then
printf "missing parameter.\n"
exit 1
else
# a mirror is useful for local testing with files
case "$1" in
build)
f=$(assert_source $@) && { shift; } || { exit 1; }
build "${f}"
;;
#~ sortxml)
#~ f=$(assert_source $@) && { shift; } || { exit 1; }
#~ sortxml "${f}"
#~ ;;
mirror)
>&2 printf "downloading the website may take quite some time.\n"
mirror_website http://ds.ccc.de/
;;
linkcheck)
linkcheck http://ds.ccc.de/
;;
release)
shift
newdataentry $@
;;
info)
shift
newinfoentry $@
;;
*)
printf "this is not implemented yet.\n"
;;
esac
fi
# generate small size version of pdf, might help e.g. for email attachments
#gs -sDEVICE=pdfwrite -dPDFSETTINGS=/ebook -q -o ds100-smaller.pdf ds100.pdf