ds-rss/gen.sh
2022-10-09 17:23:54 +02:00

338 lines
11 KiB
Bash
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env sh
#~ set -x
assert_tools () {
err=0
while test $# -gt 0; do
command -v "$1" >/dev/null 2>/dev/null || {
>&2 printf "tool missing: $1\n"
err=$(( $err + 1 ))
}
shift
done
test $err -eq 0 || exit $err
}
assert_source () {
dependencies="test printf"
assert_tools ${dependencies}
test $# -gt 0 && {
datafn="$1";
} || {
>&2 printf "option for data source missing.\n";
datafn="data.xml";
>&2 printf "assuming fallback '${datafn}'.\n";
}
err=0
while test "${err}" -lt 2; do
test -f "$datafn" 2>/dev/null && { >&2 printf "from ${datafn} …\n"; err=0; break; } || {
>&2 printf "failed.\nfile '${datafn}' does not exist.\n"
err=$(( $err + 1 ));
datafn="data.xml";
}
done
test $err -eq 0 && { printf "$datafn"; exit 0; } || exit $err
}
build () {
datafn="$1"
dependencies="xsltproc date"
assert_tools ${dependencies}
# generate references.html based on directory list
# todo: make it a transform (xsl) or drop it with the redesign of the webiste
pwd=$(pwd) && cd references && ./gen.sh && cd "${pwd}"
# todo: create covers
#~ pdftoppm -jpeg -f 1 -l 1 covers/ds102.pdf covers/ds102.jpg
#~ mv covers/ds102.jpg* covers/ds102.jpg
>&2 printf "building ds-feed.xml … "
# --stringparam current-date `date +%Y-%m-%d` # possibility for current date-time-group via XSLT <xsl:value-of select="$current-date"/>
xsltproc --stringparam current-date "$(date --rfc-email)" --encoding utf-8 -o ds-feed.xml rss.xsl $datafn && { >&2 printf "done.\n"; } || { >&2 printf "failed on rss.xsl.\n"; exit 1; }
>&2 printf "building download.html … "
#~ xsltproc --encoding iso-8859-1 -o download.html download.xsl $datafn && { >&2 printf "done.\n"; } || { >&2 printf "failed on download.xsl.\n"; exit 1; }
xsltproc --encoding utf-8 -o download.html download.xsl $datafn && { >&2 printf "done.\n"; } || { >&2 printf "failed on download.xsl.\n"; exit 1; }
>&2 printf "building index.html … "
xsltproc --encoding utf-8 -o index.html index.xsl $datafn && { >&2 printf "done.\n"; } || { >&2 printf "failed on index.xsl.\n"; exit 1; }
exit 0
}
sortxml () {
#~ test "$1" = "sortxml" && {
dependencies="xsltproc"
assert_tools ${dependencies}
xsltproc --encoding utf-8 -o sorted.xml sort.xsl data.xml
mv -i sorted.xml data.xml
}
linkcheck () {
dependencies="curl uuid"
assert_tools ${dependencies}
tmpfile="/tmp/"$(uuid); curl -k https://ds.ccc.de/download.html > $tmpfile
for f in download.html ${tmpfile} ; do
test -e $f || continue
for e in $(cat ${f}|sed 's/<a /\n<a /g'|grep -i " href="|cut -d"=" -f2|cut -d'"' -f2); do
status=$(curl -k --max-time 1 --head --silent --output /dev/null --write-out "%{http_code}" $e )
test $status -eq 200 && >&2 printf ${e}" "${status}" OK\n" || >&2 printf ${e}" "${status}" FAILED\n"
done
done
}
mirror_website () {
dependencies="wget"
assert_tools ${dependencies}
local url=$@
local domain=`expr "$url" : '^http[s]*://\([^/?]*\)'`
wget \
--no-check-certificate \
--recursive \
--no-clobber \
--page-requisites \
--html-extension \
--convert-links \
--restrict-file-names=windows \
--domains $domain \
--no-parent \
$url
}
newdataentry () {
dependencies="cat grep head cut rev date printf"
assert_tools ${dependencies}
datafn=$(assert_source $1) && {
if test "$datafn" = "$1" ; then
shift
fi
} || {
>&2 printf "file '${datafn}' not found."; err=1; exit 1;
}
err=0
customdir=$(pwd)
fl=""
>&2 printf "determining issue number based on '${datafn}'.\n"
lastentry=$(grep -i "<schleuder id=" ${datafn} | head -1 | cut -d'"' -f2)
#~ lastentry=$(grep "<schleuder id=" ${datafn} | cut -d'"' -f2 | sort | tail -1)
newentry=$(( $lastentry + 1 ))
# todo: switches
#~ for o in "$@"; do
# assigned parameters with equal sign
#~ on=$(printf $o|cut -d"=" -f1)
#~ ov=$(printf $o|cut -d"=" -f2-)
#~ case $on in
# custom data directory for availability test and filesize, default pwd
#~ c|customdir) ;;
# custom issue, default next after top entry in ${datafn}
#~ i|issue) ;;
# custom timestamp, default: current time
#~ d|datetime) ;;
# custom teaser xml-text, default: none
#~ t|teaser) ;;
# custom fs for files
#~ p|pdffs) ;;
#~ e|epubfs) ;;
# help text
#~ h|help)
#~ *) ;;
#~ esac
# or without equal sign …
#~ case $o in
#~ c|customdir) ;;
#~ i|issue) ;;
#~ esac
#~ done
test $# -gt 0 && test -d $1 && {
customdir=$1
>&2 printf "custom base directory for files such as jpg, pdf, epub: "${customdir}".\n\n"
shift
}
test $# -gt 0 && test "$1" = "help" && {
bold="\033[1m"
nobold="\033[0m"
printf "\n${bold}datenschleuder release xml snippet generation${nobold}\nprints a new data entry for a release of datenschleuder\n\n"
printf "release [options]\n\n"
printf "${bold}option\t\tdescription${nobold}\n\n"
printf "directory\tcustom data directory for availability test and filesize, default pwd\n\n"
printf "number\t\tprint entry for release with custom number, default is the increment of the top entry in ${datafn}\n\n"
printf "date\t\tuse a unix timestamp for a custom date\n\n"
printf "teaser\t\tinclude teaser xml-message as last argument(s)\n\n"
printf "${bold}full example:${nobold} '\$./gen.sh release 99 1568505600 Die Ausgabe 99 ist fertig.'\n\n"
exit 1
}
2>/dev/null test $# -gt 0 && test $(( $1 * 1 )) -eq $1 && {
newentry=$1; shift;
>&2 printf "custom issue number '${newentry}' for '${datafn}'.\n"
} || {
printf "next argument not a number. assuming issue based on top entry.\n"
}
2>/dev/null test $# -gt 0 && test $(( $1 * 1 )) -eq $1 && {
>&2 printf "custom datetime.\n"
datetime="$1"; shift;
} || { printf "next argument not a number. using actual system time.\n"; datetime=0; } # remaining arguments should be teaser xml-message.
printf "using UTC (+0000) for expressing the timezone.\n"
imagelist=""
filelist=""
for d in covers pdfs epubs ; do
case "$d" in
covers) ffn=${d}"/"${fn}".jpg"; cover=$ffn ;;
pdfs) ffn=${d}"/"${fn}".pdf" ;;
epubs) ffn=${d}"/"${fn}".epub" ;;
#~ *) printf "file "${ffn}" not found, in "${d}".\n" ;;
esac
test -e ${customdir}"/"${ffn} && {
test "$d" = "covers" && {
imagelist="${imagelist}\t<image>"${cover}"</image>\n"
} || {
fs=$(wc -c <${customdir}"/"${ffn})
filelist="${filelist}\t<link filesize=\"%d\">%s</link>\n" "${fs}" "${ffn}"
}
} || {
mfl=${mfl}" "${ffn}"\n"
#~ printf "file "${ffn}" not found.\n"
err=3
}
done
test -z "${filelist}" && printonly=" printonly=\"printonly\"" || printonly=""
printf "\n<schleuder id=\"$newentry\"${printonly}>\n"
# todo: switch for dtg, integrate custom datetime input early
# or simply implement xslt for handling ISO and nothing else for input
# RFC-5322-Format without seconds using the current timezone
dtg="TZ=:UTC LANG=en_US.utf8 date -u -R |rev|cut -d\":\" -f2-|rev"
tzc="TZ=:UTC LANG=en_US.utf8 date +\"%z\"" # timezone: `date +"%z"`
# ISO dtg as issues may provide datetime-data on months but mostly not on days …
#~ dtg="TZ=:UTC LANG=en_US.utf8 date +%Y-%m-%d %H:%M %Z"; tz=""
test $datetime -eq 0 && {
datetime=$(TZ=:UTC LANG=en_US.utf8 date -u -R |rev|cut -d":" -f2-|rev) #
tz=$(TZ=:UTC LANG=en_US.utf8 date +"%z")
} || {
#~ dtg="TZ=:UTC LANG=en_US.utf8 date -d @${datetime} +\"%Y-%m-%d %H:%M %Z\""; tz=""
dtg="TZ=:UTC LANG=en_US.utf8 date -d @${datetime} -u -R |rev|cut -d\":\" -f2-|rev"; tz=" UT"
tzc="TZ=:UTC LANG=en_US.utf8 date +\"%z\""
#~ datetime="$(TZ=:UTC LANG=en_US.utf8 date -d @${datetime} -u -R|rev|cut -d":" -f2-|rev)"
# TZ=:UTC LANG=en_US.utf8 date -d @${datetime} +"%Y-%m-%d %H:%M %Z"
datetime="$(sh -c "${dtg}")"
tz="$(sh -c "${tzc}")"
}
printf "\t<date>%s %s</date>\n" "${datetime}" "${tz}" # custom format universal time
fn="ds"$(printf "%03d" $newentry)
printf "${imagelist}"
printf "${filelist}"
#~ printf "\t<preface></preface>\n" # not used on server yet
test $# -eq 0 && { teaser="" ; } || {
teaser="$@";
printf "\t<teaser>${teaser}</teaser>\n"
}
printf "</schleuder>\n"
test "$teaser" = "" && { >&2 printf "\nmissing teaser message.\n"; }
test -e ${customdir}"/"$cover || { >&2 printf "\nmissing cover file: ${cover}\n"; }
test $err -gt 0 && { >&2 printf "\nmissing files:\n${mfl}\n"; }
}
upload () {
>&2 printf "not implemented yet.\n"
# check user ownership in www
#~ find /usr/local/www/ds.ccc.de/references -not -user www
# check rights for directories and files, so they can be updated via `scp` etc.
# newly uploaded files will be owned by the uploading user
# that might cause problems for the next one to update
# 775: pdfs epubs references editorial
# 664: pdfs/*.pdf epubs/*.epub references/*.html editorial/*.html
# scp build files
# source credentials/from password manager/…
#~ . ~/ds-web.sh
#~ scp -i $key pdfs/ds102.pdf $user@$host:$webdir/pdfs/
#~ scp -i $key covers/ds102.* $user@$host:$webdir/covers/
#~ scp -i $key epubs/ds102.epub $user@$host:${webdir}/epubs/
#~ scp -i $key editorial/ds102.html $user@$host:${webdir}/editorial/
#~ scp -i $key references/ds102.html $user@$host:${webdir}/references/
#~ scp -i $key references.html $user@$host:$webdir/
#~ scp -i $key download.html $user@$host:$webdir/
#~ scp -i $key index.html $user@$host:$webdir/
#~ scp -i $key ds-feed.xml $user@$host:$webdir/
# change to local copy
#~ set pwd=$(pwd)
#~ cd ../ds.ccc.de/
# copy files
#~ cp ../ds-web/data.xml ./
#~ cp ../ds-web/ds-feed.xml ./
#~ cp ../ds-web/index.html ./
#~ cp ../ds-web/download.html ./
#~ cp ../ds-web/references.html ./
#~ cp ../ds-web/references/*.html ./references/
#~ cp ../ds-web/editorial/*.html ./editorial/
#~ cp ../ds-web/epubs/*.epub ./epubs/
#~ cp ../ds-web/pdfs/*.pdf ./pdfs/
#~ cp ../ds-web/vorab/*.pdf ./vorab/
# commit build files to git
#~ git commit $filelist -m "upload "$(date )
#~ cd "${pwd}"
}
newinfoentry () {
>&2 printf "not implemented yet.\n\nwill be similar to 'release'.\n"
}
help () {
printf "commands:\n"
printf "\tlinkcheck check links for availability\n"
printf "\tbuild create HTML from XML\n"
printf "\trelease create XML for newly released issue\n"
printf "\tinfo to be: create XML for informational entry\n"
printf "\tupload to be: upload issue\n"
printf "\tsortxml sort items of the XML file\n"
printf "\tmirror create local mirror of the website\n"
printf "\n"
}
#~ echo $# $@
if test $# -lt 1 ; then
printf "missing parameter.\n"
exit 1
else
# a mirror is useful for local testing with files
case "$1" in
upload)
shift
upload
;;
build)
shift
f=$(assert_source $@) || { exit 1; }
build "${f}"
;;
sortxml)
shift
f=$(assert_source $@) || { exit 1; }
sortxml "${f}"
;;
mirror)
>&2 printf "downloading the website may take quite some time.\n"
mirror_website http://ds.ccc.de/
;;
linkcheck)
linkcheck http://ds.ccc.de/
;;
release)
shift
newdataentry $@
;;
info)
shift
newinfoentry $@
;;
help)
printf "to be extended, see README for exaples."
help
;;
*)
printf "this command does not exist.\n"
help
;;
esac
fi
# generate small size version of pdf, might help e.g. for email attachments
#gs -sDEVICE=pdfwrite -dPDFSETTINGS=/ebook -q -o ds100-smaller.pdf ds100.pdf