ds-rss/gen.sh

338 lines
11 KiB
Bash
Raw Permalink Normal View History

2019-05-07 09:17:09 +02:00
#!/usr/bin/env sh
#~ set -x
assert_tools () {
2019-09-17 16:34:04 +02:00
err=0
while test $# -gt 0; do
2020-07-29 21:27:58 +02:00
command -v "$1" >/dev/null 2>/dev/null || {
>&2 printf "tool missing: $1\n"
2019-09-17 16:34:04 +02:00
err=$(( $err + 1 ))
}
shift
done
2019-09-17 16:34:04 +02:00
test $err -eq 0 || exit $err
2019-06-13 04:09:43 +02:00
}
assert_source () {
dependencies="test printf"
assert_tools ${dependencies}
test $# -gt 0 && {
datafn="$1";
} || {
>&2 printf "option for data source missing.\n";
datafn="data.xml";
>&2 printf "assuming fallback '${datafn}'.\n";
}
err=0
while test "${err}" -lt 2; do
test -f "$datafn" 2>/dev/null && { >&2 printf "from ${datafn} …\n"; err=0; break; } || {
>&2 printf "failed.\nfile '${datafn}' does not exist.\n"
err=$(( $err + 1 ));
datafn="data.xml";
}
done
test $err -eq 0 && { printf "$datafn"; exit 0; } || exit $err
}
build () {
datafn="$1"
2021-04-02 18:09:38 +02:00
dependencies="xsltproc date"
assert_tools ${dependencies}
2022-07-14 18:30:23 +02:00
# generate references.html based on directory list
# todo: make it a transform (xsl) or drop it with the redesign of the webiste
pwd=$(pwd) && cd references && ./gen.sh && cd "${pwd}"
# todo: create covers
#~ pdftoppm -jpeg -f 1 -l 1 covers/ds102.pdf covers/ds102.jpg
#~ mv covers/ds102.jpg* covers/ds102.jpg
>&2 printf "building ds-feed.xml … "
2021-04-02 18:09:38 +02:00
# --stringparam current-date `date +%Y-%m-%d` # possibility for current date-time-group via XSLT <xsl:value-of select="$current-date"/>
xsltproc --stringparam current-date "$(date --rfc-email)" --encoding utf-8 -o ds-feed.xml rss.xsl $datafn && { >&2 printf "done.\n"; } || { >&2 printf "failed on rss.xsl.\n"; exit 1; }
>&2 printf "building download.html … "
2022-07-12 10:47:52 +02:00
#~ xsltproc --encoding iso-8859-1 -o download.html download.xsl $datafn && { >&2 printf "done.\n"; } || { >&2 printf "failed on download.xsl.\n"; exit 1; }
xsltproc --encoding utf-8 -o download.html download.xsl $datafn && { >&2 printf "done.\n"; } || { >&2 printf "failed on download.xsl.\n"; exit 1; }
>&2 printf "building index.html … "
xsltproc --encoding utf-8 -o index.html index.xsl $datafn && { >&2 printf "done.\n"; } || { >&2 printf "failed on index.xsl.\n"; exit 1; }
exit 0
}
sortxml () {
#~ test "$1" = "sortxml" && {
dependencies="xsltproc"
assert_tools ${dependencies}
xsltproc --encoding utf-8 -o sorted.xml sort.xsl data.xml
mv -i sorted.xml data.xml
}
2019-09-17 16:34:04 +02:00
linkcheck () {
dependencies="curl uuid"
assert_tools ${dependencies}
tmpfile="/tmp/"$(uuid); curl -k https://ds.ccc.de/download.html > $tmpfile
for f in download.html ${tmpfile} ; do
2019-09-17 16:34:04 +02:00
test -e $f || continue
for e in $(cat ${f}|sed 's/<a /\n<a /g'|grep -i " href="|cut -d"=" -f2|cut -d'"' -f2); do
status=$(curl -k --max-time 1 --head --silent --output /dev/null --write-out "%{http_code}" $e )
test $status -eq 200 && >&2 printf ${e}" "${status}" OK\n" || >&2 printf ${e}" "${status}" FAILED\n"
2019-09-17 16:34:04 +02:00
done
done
}
mirror_website () {
dependencies="wget"
assert_tools ${dependencies}
local url=$@
local domain=`expr "$url" : '^http[s]*://\([^/?]*\)'`
wget \
--no-check-certificate \
--recursive \
--no-clobber \
--page-requisites \
--html-extension \
--convert-links \
--restrict-file-names=windows \
--domains $domain \
--no-parent \
$url
}
2019-09-17 16:34:04 +02:00
newdataentry () {
dependencies="cat grep head cut rev date printf"
2019-09-17 16:34:04 +02:00
assert_tools ${dependencies}
datafn=$(assert_source $1) && {
if test "$datafn" = "$1" ; then
shift
fi
} || {
>&2 printf "file '${datafn}' not found."; err=1; exit 1;
}
2019-09-17 16:34:04 +02:00
err=0
customdir=$(pwd)
2019-09-17 16:34:04 +02:00
fl=""
>&2 printf "determining issue number based on '${datafn}'.\n"
lastentry=$(grep -i "<schleuder id=" ${datafn} | head -1 | cut -d'"' -f2)
#~ lastentry=$(grep "<schleuder id=" ${datafn} | cut -d'"' -f2 | sort | tail -1)
newentry=$(( $lastentry + 1 ))
# todo: switches
#~ for o in "$@"; do
# assigned parameters with equal sign
#~ on=$(printf $o|cut -d"=" -f1)
#~ ov=$(printf $o|cut -d"=" -f2-)
#~ case $on in
# custom data directory for availability test and filesize, default pwd
#~ c|customdir) ;;
# custom issue, default next after top entry in ${datafn}
#~ i|issue) ;;
# custom timestamp, default: current time
#~ d|datetime) ;;
# custom teaser xml-text, default: none
#~ t|teaser) ;;
# custom fs for files
#~ p|pdffs) ;;
#~ e|epubfs) ;;
# help text
#~ h|help)
#~ *) ;;
#~ esac
# or without equal sign …
#~ case $o in
#~ c|customdir) ;;
#~ i|issue) ;;
#~ esac
#~ done
test $# -gt 0 && test -d $1 && {
customdir=$1
>&2 printf "custom base directory for files such as jpg, pdf, epub: "${customdir}".\n\n"
shift
}
test $# -gt 0 && test "$1" = "help" && {
bold="\033[1m"
nobold="\033[0m"
printf "\n${bold}datenschleuder release xml snippet generation${nobold}\nprints a new data entry for a release of datenschleuder\n\n"
printf "release [options]\n\n"
printf "${bold}option\t\tdescription${nobold}\n\n"
printf "directory\tcustom data directory for availability test and filesize, default pwd\n\n"
printf "number\t\tprint entry for release with custom number, default is the increment of the top entry in ${datafn}\n\n"
printf "date\t\tuse a unix timestamp for a custom date\n\n"
printf "teaser\t\tinclude teaser xml-message as last argument(s)\n\n"
printf "${bold}full example:${nobold} '\$./gen.sh release 99 1568505600 Die Ausgabe 99 ist fertig.'\n\n"
exit 1
}
2>/dev/null test $# -gt 0 && test $(( $1 * 1 )) -eq $1 && {
newentry=$1; shift;
>&2 printf "custom issue number '${newentry}' for '${datafn}'.\n"
2019-09-17 16:34:04 +02:00
} || {
printf "next argument not a number. assuming issue based on top entry.\n"
2019-09-17 16:34:04 +02:00
}
2>/dev/null test $# -gt 0 && test $(( $1 * 1 )) -eq $1 && {
>&2 printf "custom datetime.\n"
datetime="$1"; shift;
} || { printf "next argument not a number. using actual system time.\n"; datetime=0; } # remaining arguments should be teaser xml-message.
2021-04-08 10:23:14 +02:00
printf "using UTC (+0000) for expressing the timezone.\n"
2022-10-09 17:23:54 +02:00
imagelist=""
filelist=""
for d in covers pdfs epubs ; do
case "$d" in
covers) ffn=${d}"/"${fn}".jpg"; cover=$ffn ;;
pdfs) ffn=${d}"/"${fn}".pdf" ;;
epubs) ffn=${d}"/"${fn}".epub" ;;
#~ *) printf "file "${ffn}" not found, in "${d}".\n" ;;
esac
test -e ${customdir}"/"${ffn} && {
test "$d" = "covers" && {
imagelist="${imagelist}\t<image>"${cover}"</image>\n"
} || {
fs=$(wc -c <${customdir}"/"${ffn})
filelist="${filelist}\t<link filesize=\"%d\">%s</link>\n" "${fs}" "${ffn}"
}
} || {
mfl=${mfl}" "${ffn}"\n"
#~ printf "file "${ffn}" not found.\n"
err=3
}
done
test -z "${filelist}" && printonly=" printonly=\"printonly\"" || printonly=""
printf "\n<schleuder id=\"$newentry\"${printonly}>\n"
# todo: switch for dtg, integrate custom datetime input early
# or simply implement xslt for handling ISO and nothing else for input
2021-04-08 10:23:14 +02:00
# RFC-5322-Format without seconds using the current timezone
dtg="TZ=:UTC LANG=en_US.utf8 date -u -R |rev|cut -d\":\" -f2-|rev"
tzc="TZ=:UTC LANG=en_US.utf8 date +\"%z\"" # timezone: `date +"%z"`
# ISO dtg as issues may provide datetime-data on months but mostly not on days …
#~ dtg="TZ=:UTC LANG=en_US.utf8 date +%Y-%m-%d %H:%M %Z"; tz=""
test $datetime -eq 0 && {
datetime=$(TZ=:UTC LANG=en_US.utf8 date -u -R |rev|cut -d":" -f2-|rev) #
2021-04-08 10:23:14 +02:00
tz=$(TZ=:UTC LANG=en_US.utf8 date +"%z")
2019-09-17 16:34:04 +02:00
} || {
#~ dtg="TZ=:UTC LANG=en_US.utf8 date -d @${datetime} +\"%Y-%m-%d %H:%M %Z\""; tz=""
dtg="TZ=:UTC LANG=en_US.utf8 date -d @${datetime} -u -R |rev|cut -d\":\" -f2-|rev"; tz=" UT"
2021-04-08 10:23:14 +02:00
tzc="TZ=:UTC LANG=en_US.utf8 date +\"%z\""
#~ datetime="$(TZ=:UTC LANG=en_US.utf8 date -d @${datetime} -u -R|rev|cut -d":" -f2-|rev)"
# TZ=:UTC LANG=en_US.utf8 date -d @${datetime} +"%Y-%m-%d %H:%M %Z"
2021-04-08 10:23:14 +02:00
datetime="$(sh -c "${dtg}")"
tz="$(sh -c "${tzc}")"
2019-09-17 16:34:04 +02:00
}
2021-04-08 10:23:14 +02:00
printf "\t<date>%s %s</date>\n" "${datetime}" "${tz}" # custom format universal time
2019-09-17 16:34:04 +02:00
fn="ds"$(printf "%03d" $newentry)
2022-10-09 17:23:54 +02:00
printf "${imagelist}"
printf "${filelist}"
#~ printf "\t<preface></preface>\n" # not used on server yet
2019-09-17 16:34:04 +02:00
test $# -eq 0 && { teaser="" ; } || {
teaser="$@";
printf "\t<teaser>${teaser}</teaser>\n"
2019-09-17 16:34:04 +02:00
}
printf "</schleuder>\n"
test "$teaser" = "" && { >&2 printf "\nmissing teaser message.\n"; }
test -e ${customdir}"/"$cover || { >&2 printf "\nmissing cover file: ${cover}\n"; }
test $err -gt 0 && { >&2 printf "\nmissing files:\n${mfl}\n"; }
2019-09-17 16:34:04 +02:00
}
2022-07-14 18:30:23 +02:00
upload () {
>&2 printf "not implemented yet.\n"
# check user ownership in www
#~ find /usr/local/www/ds.ccc.de/references -not -user www
# check rights for directories and files, so they can be updated via `scp` etc.
# newly uploaded files will be owned by the uploading user
# that might cause problems for the next one to update
# 775: pdfs epubs references editorial
# 664: pdfs/*.pdf epubs/*.epub references/*.html editorial/*.html
# scp build files
# source credentials/from password manager/…
#~ . ~/ds-web.sh
#~ scp -i $key pdfs/ds102.pdf $user@$host:$webdir/pdfs/
#~ scp -i $key covers/ds102.* $user@$host:$webdir/covers/
#~ scp -i $key epubs/ds102.epub $user@$host:${webdir}/epubs/
#~ scp -i $key editorial/ds102.html $user@$host:${webdir}/editorial/
#~ scp -i $key references/ds102.html $user@$host:${webdir}/references/
#~ scp -i $key references.html $user@$host:$webdir/
#~ scp -i $key download.html $user@$host:$webdir/
#~ scp -i $key index.html $user@$host:$webdir/
#~ scp -i $key ds-feed.xml $user@$host:$webdir/
# change to local copy
#~ set pwd=$(pwd)
#~ cd ../ds.ccc.de/
# copy files
#~ cp ../ds-web/data.xml ./
#~ cp ../ds-web/ds-feed.xml ./
#~ cp ../ds-web/index.html ./
#~ cp ../ds-web/download.html ./
#~ cp ../ds-web/references.html ./
#~ cp ../ds-web/references/*.html ./references/
#~ cp ../ds-web/editorial/*.html ./editorial/
#~ cp ../ds-web/epubs/*.epub ./epubs/
#~ cp ../ds-web/pdfs/*.pdf ./pdfs/
#~ cp ../ds-web/vorab/*.pdf ./vorab/
# commit build files to git
#~ git commit $filelist -m "upload "$(date )
#~ cd "${pwd}"
}
2019-09-17 16:34:04 +02:00
newinfoentry () {
>&2 printf "not implemented yet.\n\nwill be similar to 'release'.\n"
2019-09-17 16:34:04 +02:00
}
2022-07-12 10:47:52 +02:00
help () {
printf "commands:\n"
printf "\tlinkcheck check links for availability\n"
printf "\tbuild create HTML from XML\n"
printf "\trelease create XML for newly released issue\n"
printf "\tinfo to be: create XML for informational entry\n"
2022-07-14 18:30:23 +02:00
printf "\tupload to be: upload issue\n"
2022-07-12 10:47:52 +02:00
printf "\tsortxml sort items of the XML file\n"
printf "\tmirror create local mirror of the website\n"
printf "\n"
}
#~ echo $# $@
if test $# -lt 1 ; then
printf "missing parameter.\n"
exit 1
else
# a mirror is useful for local testing with files
case "$1" in
2021-04-08 10:23:14 +02:00
upload)
shift
upload
;;
build)
shift
f=$(assert_source $@) || { exit 1; }
build "${f}"
;;
sortxml)
shift
f=$(assert_source $@) || { exit 1; }
sortxml "${f}"
;;
mirror)
>&2 printf "downloading the website may take quite some time.\n"
mirror_website http://ds.ccc.de/
;;
linkcheck)
linkcheck http://ds.ccc.de/
;;
release)
shift
newdataentry $@
;;
info)
shift
newinfoentry $@
;;
2022-07-12 10:47:52 +02:00
help)
printf "to be extended, see README for exaples."
help
;;
*)
2022-07-12 10:47:52 +02:00
printf "this command does not exist.\n"
help
;;
esac
fi
# generate small size version of pdf, might help e.g. for email attachments
#gs -sDEVICE=pdfwrite -dPDFSETTINGS=/ebook -q -o ds100-smaller.pdf ds100.pdf