From 804bc1c12a79e1d54916816d991d4209c63419c4 Mon Sep 17 00:00:00 2001 From: Stefan Majewsky Date: Sun, 17 Apr 2022 01:57:02 +0200 Subject: [PATCH] scan inputs --- main.go | 21 +++++++++ scan.go | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 154 insertions(+) create mode 100644 scan.go diff --git a/main.go b/main.go index 86aaafb..75c8b40 100644 --- a/main.go +++ b/main.go @@ -6,5 +6,26 @@ package main +import ( + "fmt" + "os" +) + func main() { + scanResult := Scan() + fmt.Printf("%#v", scanResult) +} + +func must(err error) { + if err != nil { + fail(err.Error()) + } +} + +func fail(msg string, args ...interface{}) { + if len(args) > 0 { + msg = fmt.Sprintf(msg, args...) + } + fmt.Fprintln(os.Stderr, "ERROR: ", msg) + os.Exit(1) } diff --git a/scan.go b/scan.go new file mode 100644 index 0000000..40421ce --- /dev/null +++ b/scan.go @@ -0,0 +1,133 @@ +/******************************************************************************* +* Copyright 2022 Stefan Majewsky +* SPDX-License-Identifier: GPL-3.0-only +* Refer to the file "LICENSE" for details. +*******************************************************************************/ + +package main + +import ( + "fmt" + "os" + "path/filepath" + "regexp" + "strconv" + "strings" +) + +type ScanResult struct { + //episode date + Day int + Month int + Year int + //files discovered + AudioFormats []string + //parts extracted from the shownotes + Title string + Description string + //parts extracted from the chaptermarks + ChapterMarks []ChapterMark +} + +type ChapterMark struct { + StartTime string + Title string + URL string +} + +func Scan() (result ScanResult) { + //determine episode date by finding shownotes file + names, err := filepath.Glob("shownotes-pentaradio-*.txt") + must(err) + if len(names) == 0 { + fail("no files found: shownotes-pentaradio-*.txt") + } + if len(names) > 1 { + fail("multiple files found: shownotes-pentaradio-*.txt") + } + match := regexp.MustCompile(`^shownotes-pentaradio-(\d{4})-(\d{2})-(\d{2})\.txt$`).FindStringSubmatch(names[0]) + if match == nil { + fail("cannot extract date from file name: %s", names[0]) + } + + //validate episode date + result.Year, _ = strconv.Atoi(match[1]) + if result.Year < 2000 || result.Year > 2999 { + fail("invalid year in filename: %s", names[0]) + } + result.Month, _ = strconv.Atoi(match[2]) + if result.Month < 1 || result.Month > 12 { + fail("invalid month in filename: %s", names[0]) + } + result.Day, _ = strconv.Atoi(match[3]) + if result.Day < 1 || result.Day > 31 { + fail("invalid day in filename: %s", names[0]) + } + + //parse input files + result.ParseShownotes(names[0]) + chapterMarksPath := fmt.Sprintf("chapter-pentaradio-%04d-%02d-%02d.dat", result.Year, result.Month, result.Day) + result.ParseChapterMarks(chapterMarksPath) + + //collect audio files + pattern := fmt.Sprintf("pentaradio-%04d-%02d-%02d.*", result.Year, result.Month, result.Day) + names, err = filepath.Glob(pattern) + must(err) + if len(names) == 0 { + fail("no files found: %s", pattern) + } + for _, name := range names { + result.AudioFormats = append(result.AudioFormats, strings.TrimPrefix(filepath.Ext(name), ".")) + } + + //TODO: parse chapter marks + + return +} + +var monthWords = []string{ + "Null", + "Januar", "Februar", "März", "April", + "Mai", "Juni", "Juli", "August", + "September", "Oktober", "November", "Dezember", +} + +func (scan *ScanResult) ParseShownotes(path string) { + buf, err := os.ReadFile(path) + must(err) + + expectedTitle := fmt.Sprintf("Pentaradio vom %d. %s %d", scan.Day, monthWords[scan.Month], scan.Year) + shownotesRx := regexp.MustCompile( + fmt.Sprintf(`^# %s\n`, expectedTitle) + //header line + `# Titel: "(.*)"\n\n` + //episode title + `((?s:.*?))\n\n##\s`, //short description (everything up until the first

) + ) + match := shownotesRx.FindStringSubmatch(string(buf)) + if match == nil { + fail("shownotes do not match expected format: /%s/", shownotesRx.String()) + } + + scan.Title = match[1] + scan.Description = match[2] +} + +func (scan *ScanResult) ParseChapterMarks(path string) { + buf, err := os.ReadFile(path) + must(err) + + lineRx := regexp.MustCompile(`^(\d\d:\d\d:\d\d.\d\d\d) (\S.*?\S)(?: <(https?://.*)>)?$`) + for _, line := range strings.Split(string(buf), "\n") { + if strings.TrimSpace(line) == "" { + continue + } + match := lineRx.FindStringSubmatch(line) + if match == nil { + fail("invalid format for chapter mark: %s", line) + } + scan.ChapterMarks = append(scan.ChapterMarks, ChapterMark{ + StartTime: match[1], + Title: match[2], + URL: match[3], + }) + } +}