427 lines
14 KiB
Awk
427 lines
14 KiB
Awk
|
# awk script for converting an iCal formatted file to a sequence of org-mode headings.
|
||
|
# this may not work in general but seems to work for day and timed events from Google's
|
||
|
# calendar, which is really all I need right now...
|
||
|
#
|
||
|
# usage:
|
||
|
# awk -f THISFILE < icalinputfile.ics > orgmodeentries.org --assign NAME=category
|
||
|
#
|
||
|
# where the category is used to define a CATEGORY for all entries in
|
||
|
# the file and also assign that label as a tag to each entry
|
||
|
#
|
||
|
# Note: change org meta information generated below for author and
|
||
|
# email entries!
|
||
|
#
|
||
|
# Known bugs:
|
||
|
# - not so much a bug as a possible assumption: date entries with no time
|
||
|
# specified are assumed to be independent of the time zone.
|
||
|
#
|
||
|
# Eric S Fraga
|
||
|
# 20100629 - initial version
|
||
|
# 20100708 - added end times to timed events
|
||
|
# - adjust times according to time zone information
|
||
|
# - fixed incorrect transfer for entries with ":" embedded within the text
|
||
|
# - added support for multi-line summary entries (which become headlines)
|
||
|
# 20100709 - incorporated time zone identification
|
||
|
# - fixed processing of continuation lines as Google seems to
|
||
|
# have changed, in the last day, the number of spaces at
|
||
|
# the start of the line for each continuation...
|
||
|
# - remove backslashes used to protect commas in iCal text entries
|
||
|
# no further revision log after this as the file was moved into a git
|
||
|
# repository...
|
||
|
#
|
||
|
# Last change: 2016.05.26 08:47:12
|
||
|
#----------------------------------------------------------------------------------
|
||
|
|
||
|
# a function to take the iCal formatted date+time, convert it into an
|
||
|
# internal form (seconds since time 0), and adjust according to the
|
||
|
# local time zone (specified by +-seconds calculated in the BEGIN
|
||
|
# section)
|
||
|
|
||
|
function datetimestamp(input)
|
||
|
{
|
||
|
# convert the iCal Date+Time entry to a format that mktime can understand
|
||
|
datespec = gensub("([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])T([0-9][0-9])([0-9][0-9])([0-9][0-9]).*[\r]*", "\\1 \\2 \\3 \\4 \\5 \\6", "g", input);
|
||
|
# print "date spec : " datespec; convert this date+time into
|
||
|
# seconds from the beginning of time and include adjustment for
|
||
|
# time zone, as determined in the BEGIN section below. The
|
||
|
# adjustment is only included if the time stamp has a Z at the
|
||
|
# end. Of course, we should actually incorporate the time zone
|
||
|
# information in the time stamp line but ...
|
||
|
if (0 < index(input,"Z")) {
|
||
|
# For time
|
||
|
# zone adjustment, I have not tested edge effects, specifically
|
||
|
# what happens when UTC time is a different day to local time and
|
||
|
# especially when an event with a duration crosses midnight in UTC
|
||
|
# time. It should work but...
|
||
|
timestamp = mktime(datespec) + seconds;
|
||
|
}
|
||
|
else {
|
||
|
timestamp = mktime(datespec);
|
||
|
}
|
||
|
# print "date spec: " datespec;
|
||
|
#timestamp = mktime(datespec);
|
||
|
# print "adjusted : " timestamp
|
||
|
# print "Time stamp : " strftime("%Y-%m-%d %H:%M", timestamp);
|
||
|
return timestamp;
|
||
|
}
|
||
|
|
||
|
# version of above but for dates only
|
||
|
function datestamp(input)
|
||
|
{
|
||
|
# create a date using midnight as the time
|
||
|
datespec = gensub( "([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9]).*[\r]*", "\\1 \\2 \\3 0 0 0", "g", input );
|
||
|
# convert to internal representation
|
||
|
timestamp = mktime(datespec);
|
||
|
# and finally convert to something org understands
|
||
|
datestring = strftime("%Y-%m-%d %a", timestamp);
|
||
|
#print "In datestamp: datespec=" datespec " timestamp=" timestamp " datestring=" datestring;
|
||
|
return datestring;
|
||
|
}
|
||
|
|
||
|
# start of the output file now
|
||
|
BEGIN {
|
||
|
# use a colon to separate the type of data line from the actual contents
|
||
|
FS = ":";
|
||
|
|
||
|
# determine the number of seconds to use for adjusting for time
|
||
|
# zone difference from UTC. This is used in the function
|
||
|
# datetimestamp above. The time zone information returned by
|
||
|
# strftime() is in hours * 100 so we multiply by 36 to get
|
||
|
# seconds. This does not work for time zones that are not an
|
||
|
# integral multiple of hours (e.g. Newfoundland)
|
||
|
seconds = gensub("([+-])0", "\\1", 1, strftime("%z")) * 36;
|
||
|
|
||
|
date1 = ""; # for start of an event
|
||
|
date2 = ""; # for end of an event, if specified
|
||
|
entry = ""
|
||
|
first = 1; # true until an event has been found
|
||
|
headline = ""
|
||
|
icalentry = "" # the full entry for inspection
|
||
|
id = ""
|
||
|
indescription = 0;
|
||
|
inevent = 0; # we have VEVENTS but also other items which we do not process
|
||
|
location = ""; # outlook entries, at least, often include a location
|
||
|
repeat = ""; # is item repeated? if so, how often
|
||
|
time1 = ""; # for start of an event, if specified
|
||
|
time2 = ""; # for end of an event, if specified
|
||
|
todotype = ""; # type of TODO
|
||
|
|
||
|
if (NAME == "")
|
||
|
NAME = "ical2org";
|
||
|
|
||
|
print "# -*- mode: auto-revert; mode: org; -*-" # suggested by Henrik Holmboe
|
||
|
print "#+TITLE: Main Google calendar entries"
|
||
|
print "#+AUTHOR: Eric S Fraga"
|
||
|
print "#+EMAIL: e.fraga@ucl.ac.uk"
|
||
|
print "#+DESCRIPTION: converted using the ical2org awk script"
|
||
|
print "#+CATEGORY: " NAME
|
||
|
print " "
|
||
|
}
|
||
|
|
||
|
# continuation lines (at least from Google) start with two spaces
|
||
|
# if the continuation is after a description or a summary, append the entry
|
||
|
# to the respective variable
|
||
|
|
||
|
/^[ ]+/ {
|
||
|
if (indescription) {
|
||
|
entry = entry gensub("\r", "", "g", gensub("^[ ]+", "", 1, $0));
|
||
|
} else if (insummary) {
|
||
|
summary = summary gensub("\r", "", "g", gensub("^[ ]+", "", 1, $0))
|
||
|
} else if (inuid) {
|
||
|
id = id gensub("\r", "", "g", gensub("^[ ]+", "", 1, $0))
|
||
|
}
|
||
|
icalentry = icalentry "\n" $0
|
||
|
}
|
||
|
|
||
|
/^BEGIN:VEVENT/ {
|
||
|
# start of an event. if this is the first, output the preamble from the iCal file
|
||
|
if (first) {
|
||
|
print "* COMMENT original iCal preamble"
|
||
|
print gensub("\r", "", "g", icalentry)
|
||
|
icalentry = ""
|
||
|
}
|
||
|
havesummary = 0;
|
||
|
inevent = 1;
|
||
|
first = false;
|
||
|
repeat = "";
|
||
|
}
|
||
|
|
||
|
/^BEGIN:VTODO/ {
|
||
|
if (first){
|
||
|
print "* COMMENT original iCal preamble";
|
||
|
print gensub("\r", "", "g", icalentry);
|
||
|
icalentry = "";
|
||
|
first = false;
|
||
|
}
|
||
|
havesummary = 0;
|
||
|
intodo = 1;
|
||
|
repeat = "";
|
||
|
todotype = "";
|
||
|
}
|
||
|
# any line that starts at the left with a non-space character is a new data field
|
||
|
|
||
|
/^[A-Z]/ {
|
||
|
# we ignore DTSTAMP lines as they change every time you download
|
||
|
# the iCal format file which leads to a change in the converted
|
||
|
# org file as I output the original input. This change, which is
|
||
|
# really content free, makes a revision control system update the
|
||
|
# repository and confuses.
|
||
|
if (! index("DTSTAMP", $1)) icalentry = icalentry "\n" $0
|
||
|
# this line terminates the collection of description and summary entries
|
||
|
indescription = 0;
|
||
|
if (insummary) {
|
||
|
havesummary = 1;
|
||
|
}
|
||
|
insummary = 0;
|
||
|
}
|
||
|
|
||
|
# this type of entry represents a day entry, not timed, with date
|
||
|
# stamp YYYYMMDD. For a todo item, this indicates a scheduled item.
|
||
|
|
||
|
/^DTSTART;VALUE=DATE/ {
|
||
|
# print "DTSTART date only entry: " $0;
|
||
|
# date1 = gensub("([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9]).*[\r]*", "\\1-\\2-\\3", "g", $2)
|
||
|
date1 = datestamp($2);
|
||
|
time1 = ""
|
||
|
}
|
||
|
|
||
|
# this represents a timed entry with date and time stamp
|
||
|
# YYYYMMDDTHHMMSS we ignore the seconds. This entry may have a time
|
||
|
# zone specification which is currently ignored although it should be
|
||
|
# possible, not easy, to incorporate. We assume that this information
|
||
|
# is only relevant for appointments and not TODO items. We expect
|
||
|
# TODO items to have only a date for the START field and that date
|
||
|
# will be the scheduled date. See above.
|
||
|
|
||
|
/^DTSTART(;TZID.*)?:/ {
|
||
|
if (inevent) {
|
||
|
# print "DTSTART line: " $0;
|
||
|
# print "checking start time: " $2;
|
||
|
date1 = strftime("%Y-%m-%d %a", datetimestamp($2));
|
||
|
time1 = strftime(" %H:%M", datetimestamp($2));
|
||
|
# print "====> time: " time1;
|
||
|
# print date;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# and the same for the end date; here we extract only the time and append this to the
|
||
|
# date+time found by the DTSTART entry. We assume that entry was there, of course.
|
||
|
# should probably add some error checking here! In time...
|
||
|
|
||
|
/^DTEND;VALUE=DATE/ {
|
||
|
if (inevent) {
|
||
|
# date2 = gensub("([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9]).*[\r]", "\\1-\\2-\\3", "g", $2)
|
||
|
date2 = datestamp($2);
|
||
|
time2 = ""
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/^DTEND(;TZID=[^:]*)?:/ {
|
||
|
if (inevent) {
|
||
|
# print $0
|
||
|
date2 = strftime("%Y-%m-%d %a", datetimestamp($2));
|
||
|
time2 = strftime("%H:%M", datetimestamp($2));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# TODO items may (should?) have a DUE date/time.
|
||
|
/^DUE(;TZID=[^:]*)?:/ {
|
||
|
if (intodo){
|
||
|
date2 = strftime("%Y-%m-%d %a", datetimestamp($2));
|
||
|
time2 = strftime("%H:%M", datetimestamp($2));
|
||
|
}
|
||
|
}
|
||
|
# deadline with only a date
|
||
|
/^DUE;VALUE=DATE/ {
|
||
|
# print "DUE;VALUE=DATE entry:" $0
|
||
|
# print "... date part is >" $2 "<"
|
||
|
# print "... date2 before " date2
|
||
|
if (intodo) {
|
||
|
#date2 = gensub("([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9]).*[\r]*", "\\1-\\2-\\3", "g", $2)
|
||
|
date2 = datestamp($2);
|
||
|
time2 = ""
|
||
|
}
|
||
|
# print "... date2 after " date2
|
||
|
}
|
||
|
# The description will the contents of the entry in org-mode.
|
||
|
# this line may be continued.
|
||
|
|
||
|
/^DESCRIPTION/ {
|
||
|
$1 = "";
|
||
|
entry = entry "\n" gensub("\r", "", "g", $0);
|
||
|
indescription = 1;
|
||
|
}
|
||
|
|
||
|
/^LOCATION/ {
|
||
|
$1 = "";
|
||
|
location = gensub("\r", "", "g", $0);
|
||
|
}
|
||
|
|
||
|
# the status of a TODO item: we know about NEEDS-ACTION and
|
||
|
# COMPLETED. There may be others...
|
||
|
/^STATUS/ {
|
||
|
if ($2 == "NEEDS-ACTION")
|
||
|
todotype = "TODO";
|
||
|
else if ($2 == "COMPLETED")
|
||
|
todotype = "DONE";
|
||
|
else
|
||
|
todotype = "UNKNOWN";
|
||
|
}
|
||
|
# is there a repetition rule. I don't know how general this is but
|
||
|
# Microsoft's Outlook calendar uses this for repeats
|
||
|
|
||
|
/^RRULE/ {
|
||
|
# print ">>> Checking rule with string: " $2;
|
||
|
i = match($2,"FREQ=[A-Z]+;");
|
||
|
# printf(">>> Index=%d start=%d length=%d\n\n", i, RSTART, RLENGTH);
|
||
|
frequency = substr($2, RSTART+5, RLENGTH-6);
|
||
|
# print ">>> Frequency is " frequency "\n\n";
|
||
|
i = match($2,"INTERVAL=[0-9]+;");
|
||
|
interval = 1; # default interval if none is found
|
||
|
if (i>0) {
|
||
|
interval = substr($2, RSTART+9, RLENGTH-10);
|
||
|
}
|
||
|
period = "";
|
||
|
if (frequency == "DAILY") {
|
||
|
period = "d";
|
||
|
}
|
||
|
else if (frequency == "WEEKLY") {
|
||
|
period = "w";
|
||
|
}
|
||
|
else if(frequency == "MONTHLY") {
|
||
|
period = "m";
|
||
|
}
|
||
|
else if(frequency == "YEARLY") {
|
||
|
period = "y";
|
||
|
}
|
||
|
if (period != "") {
|
||
|
repeat = sprintf(" +%d%s", interval, period);
|
||
|
}
|
||
|
# print ">>> Repeat is " repeat;
|
||
|
}
|
||
|
|
||
|
# the summary will be the org heading
|
||
|
|
||
|
/^SUMMARY/ {
|
||
|
$1 = "";
|
||
|
if (!havesummary) {
|
||
|
summary = gensub("\r", "", "g", $0);
|
||
|
insummary = 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# the unique ID will be stored as a property of the entry
|
||
|
|
||
|
/^UID/ {
|
||
|
$1 = "";
|
||
|
id = gensub("\r", "", "g", $0);
|
||
|
inuid = 1;
|
||
|
}
|
||
|
|
||
|
# when we reach the end of the event line, we output everything we
|
||
|
# have collected so far, creating a top level org headline with the
|
||
|
# date/time stamp, unique ID property and the contents, if any
|
||
|
|
||
|
/^END:VEVENT/ {
|
||
|
# translate \n sequences to actual newlines and unprotect commas (,)
|
||
|
print "* " gensub("\\\\,", ",", "g", gensub("\\\\n", " ", "g", summary)) " :" NAME ":"
|
||
|
print ":PROPERTIES:"
|
||
|
print ":ID: " id
|
||
|
if (location != "") {
|
||
|
print ":LOCATION: " gensub("\\\\,", ",", "g", location);
|
||
|
}
|
||
|
print ":END:"
|
||
|
if (date1 == date2) {
|
||
|
if (time2 == "")
|
||
|
print " <" date1 time1 repeat ">"
|
||
|
else
|
||
|
print " <" date1 time1 "-" time2 repeat ">"
|
||
|
}
|
||
|
else {
|
||
|
if (time1 == "")
|
||
|
print "<" date1 ">--<" date2 ">"
|
||
|
else
|
||
|
print " <" date1 time1 ">--<" date2 " " time2 ">"
|
||
|
}
|
||
|
# for the entry, convert all embedded "\n" strings to actual newlines
|
||
|
print ""
|
||
|
# translate \n sequences to actual newlines and unprotect commas (,)
|
||
|
print gensub("\\\\,", ",", "g", gensub("\\\\n", "\n", "g", entry));
|
||
|
print "** COMMENT original iCal entry"
|
||
|
print gensub("\r", "", "g", icalentry)
|
||
|
summary = ""
|
||
|
date = ""
|
||
|
date1 = ""
|
||
|
date2 = ""
|
||
|
time1 = ""
|
||
|
time2 = ""
|
||
|
entry = ""
|
||
|
icalentry = ""
|
||
|
indescription = 0
|
||
|
inevent = 0
|
||
|
insummary = 0
|
||
|
period = "";
|
||
|
repeat = "";
|
||
|
}
|
||
|
|
||
|
# the end of a TODO item is similar to an event except that the dates
|
||
|
# are used for scheduling and deadline information
|
||
|
|
||
|
/^END:VTODO/ {
|
||
|
# translate \n sequences to actual newlines and unprotect commas (,)
|
||
|
print "* " todotype " " gensub("\\\\,", ",", "g", gensub("\\\\n", " ", "g", summary)) " :" NAME ":"
|
||
|
# scheduling and deadline information come immediately after the
|
||
|
# headline, before properties
|
||
|
if (date1 != "") {
|
||
|
if (date2 != "")
|
||
|
if (time2 != "")
|
||
|
print "SCHEDULED: <" date1 time1 "> DEADLINE: <" date2 " " time2 "> "
|
||
|
else
|
||
|
print "SCHEDULED: <" date1 time1 "> DEADLINE: <" date2 "> "
|
||
|
else
|
||
|
print "SCHEDULED: <" date1 time1 "> "
|
||
|
} else if (date2 != "") {
|
||
|
if (time2 != "")
|
||
|
print "DEADLINE: <" date2 " " time2 "> "
|
||
|
else
|
||
|
print "DEADLINE: <" date2 "> "
|
||
|
}
|
||
|
# now come the properties which include the ID always and possibly
|
||
|
# a location
|
||
|
print ":PROPERTIES:"
|
||
|
print ":ID: " id
|
||
|
if (location != "") {
|
||
|
print ":LOCATION: " gensub("\\\\,", ",", "g", location);
|
||
|
}
|
||
|
print ":END:"
|
||
|
# now the entry; we put in a blank line just because that's the
|
||
|
# way I like it, ah ha ah ha... ;-)
|
||
|
print ""
|
||
|
# translate \n sequences to actual newlines and unprotect commas (,)
|
||
|
print gensub("\\\\,", ",", "g", gensub("\\\\n", "\n", "g", entry));
|
||
|
print "** COMMENT original iCal entry"
|
||
|
print gensub("\r", "", "g", icalentry)
|
||
|
summary = "";
|
||
|
date = "";
|
||
|
date1 = "";
|
||
|
date2 = "";
|
||
|
time1 = "";
|
||
|
time2 = "";
|
||
|
entry = "";
|
||
|
icalentry = "";
|
||
|
indescription = 0;
|
||
|
inevent = 0;
|
||
|
insummary = 0;
|
||
|
intodo = 0;
|
||
|
period = "";
|
||
|
repeat = "";
|
||
|
}
|
||
|
|
||
|
# Local Variables:
|
||
|
# time-stamp-line-limit: 1000
|
||
|
# time-stamp-format: "%04y.%02m.%02d %02H:%02M:%02S"
|
||
|
# time-stamp-active: t
|
||
|
# time-stamp-start: "Last change:[ \t]+"
|
||
|
# time-stamp-end: "$"
|
||
|
# End:
|