pentaradio-tools/scan.go

134 lines
3.6 KiB
Go

/*******************************************************************************
* Copyright 2022 Stefan Majewsky <majewsky@gmx.net>
* SPDX-License-Identifier: GPL-3.0-only
* Refer to the file "LICENSE" for details.
*******************************************************************************/
package main
import (
"fmt"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
)
type ScanResult struct {
//episode date
Day int
Month int
Year int
//files discovered
AudioFormats []string
//parts extracted from the shownotes
Title string
Description string
//parts extracted from the chaptermarks
ChapterMarks []ChapterMark
}
type ChapterMark struct {
StartTime string
Title string
URL string
}
func Scan() (result ScanResult) {
//determine episode date by finding shownotes file
names, err := filepath.Glob("shownotes-pentaradio-*.txt")
must(err)
if len(names) == 0 {
fail("no files found: shownotes-pentaradio-*.txt")
}
if len(names) > 1 {
fail("multiple files found: shownotes-pentaradio-*.txt")
}
match := regexp.MustCompile(`^shownotes-pentaradio-(\d{4})-(\d{2})-(\d{2})\.txt$`).FindStringSubmatch(names[0])
if match == nil {
fail("cannot extract date from file name: %s", names[0])
}
//validate episode date
result.Year, _ = strconv.Atoi(match[1])
if result.Year < 2000 || result.Year > 2999 {
fail("invalid year in filename: %s", names[0])
}
result.Month, _ = strconv.Atoi(match[2])
if result.Month < 1 || result.Month > 12 {
fail("invalid month in filename: %s", names[0])
}
result.Day, _ = strconv.Atoi(match[3])
if result.Day < 1 || result.Day > 31 {
fail("invalid day in filename: %s", names[0])
}
//parse input files
result.ParseShownotes(names[0])
chapterMarksPath := fmt.Sprintf("chapter-pentaradio-%04d-%02d-%02d.dat", result.Year, result.Month, result.Day)
result.ParseChapterMarks(chapterMarksPath)
//collect audio files
pattern := fmt.Sprintf("pentaradio-%04d-%02d-%02d.*", result.Year, result.Month, result.Day)
names, err = filepath.Glob(pattern)
must(err)
if len(names) == 0 {
fail("no files found: %s", pattern)
}
for _, name := range names {
result.AudioFormats = append(result.AudioFormats, strings.TrimPrefix(filepath.Ext(name), "."))
}
//TODO: parse chapter marks
return
}
var monthWords = []string{
"Null",
"Januar", "Februar", "März", "April",
"Mai", "Juni", "Juli", "August",
"September", "Oktober", "November", "Dezember",
}
func (scan *ScanResult) ParseShownotes(path string) {
buf, err := os.ReadFile(path)
must(err)
expectedTitle := fmt.Sprintf("Pentaradio vom %d. %s %d", scan.Day, monthWords[scan.Month], scan.Year)
shownotesRx := regexp.MustCompile(
fmt.Sprintf(`^# %s\n`, expectedTitle) + //header line
`# Titel: "(.*)"\n\n` + //episode title
`((?s:.*?))\n\n##\s`, //short description (everything up until the first <h2>)
)
match := shownotesRx.FindStringSubmatch(string(buf))
if match == nil {
fail("shownotes do not match expected format: /%s/", shownotesRx.String())
}
scan.Title = match[1]
scan.Description = match[2]
}
func (scan *ScanResult) ParseChapterMarks(path string) {
buf, err := os.ReadFile(path)
must(err)
lineRx := regexp.MustCompile(`^(\d\d:\d\d:\d\d.\d\d\d) (\S.*?\S)(?: <(https?://.*)>)?$`)
for _, line := range strings.Split(string(buf), "\n") {
if strings.TrimSpace(line) == "" {
continue
}
match := lineRx.FindStringSubmatch(line)
if match == nil {
fail("invalid format for chapter mark: %s", line)
}
scan.ChapterMarks = append(scan.ChapterMarks, ChapterMark{
StartTime: match[1],
Title: match[2],
URL: match[3],
})
}
}