From 50aee61a424c827892a2c45c1c2f0b2867c70e9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20L=C3=B6tzsch?= Date: Sat, 5 Mar 2022 20:59:29 +0100 Subject: [PATCH] Generate xlsx sheet from all mails received from wpforms --- import/api/wpforms-mails/.gitignore | 12 ++++ import/api/wpforms-mails/project.clj | 5 +- .../wpforms-mails/src/wpforms_mails/core.clj | 23 +++++-- .../src/wpforms_mails/parse_hickup.clj | 63 +++++++++++++++++++ 4 files changed, 98 insertions(+), 5 deletions(-) create mode 100644 import/api/wpforms-mails/.gitignore create mode 100644 import/api/wpforms-mails/src/wpforms_mails/parse_hickup.clj diff --git a/import/api/wpforms-mails/.gitignore b/import/api/wpforms-mails/.gitignore new file mode 100644 index 0000000..aa5f3a9 --- /dev/null +++ b/import/api/wpforms-mails/.gitignore @@ -0,0 +1,12 @@ +/target +/classes +/checkouts +profiles.clj +pom.xml +pom.xml.asc +*.jar +*.class +/.lein-* +/.nrepl-port +/.prepl-port +*.xlsx diff --git a/import/api/wpforms-mails/project.clj b/import/api/wpforms-mails/project.clj index b7bff55..7678b36 100644 --- a/import/api/wpforms-mails/project.clj +++ b/import/api/wpforms-mails/project.clj @@ -5,7 +5,10 @@ :url "https://www.eclipse.org/legal/epl-2.0/"} :dependencies [[org.clojure/clojure "1.10.3"] [yogthos/config "1.2.0"] - [io.forward/clojure-mail "1.0.8"]] + [io.forward/clojure-mail "1.0.8"] + [clj-tagsoup "0.3.0" :exclusions [org.clojure/clojure org.clojure/data.xml]] + [org.clojure/data.xml "0.0.8"] + [dk.ative/docjure "1.14.0"]] :main ^:skip-aot wpforms-mails.core :target-path "target/%s" :profiles {:uberjar {:aot :all diff --git a/import/api/wpforms-mails/src/wpforms_mails/core.clj b/import/api/wpforms-mails/src/wpforms_mails/core.clj index cb47172..0a52949 100644 --- a/import/api/wpforms-mails/src/wpforms_mails/core.clj +++ b/import/api/wpforms-mails/src/wpforms_mails/core.clj @@ -3,7 +3,10 @@ [clojure.java.io :as io] [mbox-parser.core :as mbox] [clojure.string :refer [join]] - [clojure-mail.message :as cmm]) + [clojure-mail.message :as cmm] + [pl.danieljanus.tagsoup :refer [parse-string]] + [wpforms-mails.parse-hickup :refer [wpforms_html->edn]] + [dk.ative.docjure.spreadsheet :refer [create-workbook save-workbook!]]) (:import [java.util Properties] [javax.mail Session] [javax.mail.internet MimeMessage]) @@ -33,12 +36,24 @@ [message] (let [msg:edn (cmm/read-message message)] (when (= (:content-type msg:edn) "text/html; charset=utf-8") - (-> msg:edn :body :body)))) + (->> msg:edn :body :body + parse-string)))) + +(defn save-spreadsheet! [filename sheet data] + (let [wb (create-workbook sheet + (concat [(keys (first data))] + (map vals data)))] + (save-workbook! filename wb))) (defn -main [& _args] - (map message->html - (file->messages (:wpforms-mails-file env)))) + (->> (file->messages (:wpforms-mails-file env)) + (map (fn [message] + (-> message + message->html + wpforms_html->edn))) + rest ;; TODO filter valid entries + (save-spreadsheet! "host-offers.xlsx" "Host Offers"))) (comment (count (mbox->emls (:wpforms-mails-file env))) diff --git a/import/api/wpforms-mails/src/wpforms_mails/parse_hickup.clj b/import/api/wpforms-mails/src/wpforms_mails/parse_hickup.clj new file mode 100644 index 0000000..e1b148e --- /dev/null +++ b/import/api/wpforms-mails/src/wpforms_mails/parse_hickup.clj @@ -0,0 +1,63 @@ +(ns wpforms-mails.parse-hickup) + +(defn filter_expr->filter_fn [filter_expr] + (cond + (keyword? filter_expr) + #(= filter_expr (first %)) + (fn? filter_expr) + filter_expr)) + +(defn children + ([h] + (filter #(not (map? %)) (rest h))) + ([filter_expr h] + (let [filter_fn (filter_expr->filter_fn filter_expr)] + (filter filter_fn (children h))))) + +(defn child [& args] + (first (apply children args))) + +(defn node? [hh] + (keyword? (first hh))) + +(defn pp + "pretty print" + [hh] + (if (node? hh) + [(first hh) (->> (map first (children hh)) + (into []))] + ;; multiple nodes + (map first hh))) + +(defn wpforms_input->map [input_table] + (let [rows (->> input_table + (child :tbody) + (children :tr) + (map #(child :td %)) + (map child)) + [k_strong v] rows + k (child k_strong)] + {k v})) + +(defn wpforms_html->edn [html] + (-> (->> html + (child :body) + (child :center) + (child :table) + (child :tr) + (child :td) + (child :table) + (children :tr) first + (child :td) + (child :table) + (child :tbody) + (child :tr) + (child :td) + (child :table) + (child :tbody) + (child :tr) + (child :td) + children + (map wpforms_input->map) + (apply merge)) + (update "E-Mail" #(child string? %))))