From 1a831d181341e7fb52fd109baaef620ef7e42e0f Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Sun, 17 Nov 2019 18:45:51 +0100 Subject: [PATCH] Preserve leading whitespace in multi-line strings (#1550) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit … by moving any leading whitespace on the _last_ line into a string interpolation. This ensures that the parser can find the correct indentation level, no matter what the other lines contain. The additional null-check in prettyChunks is necessary to preserve formatting idempotence. Otherwise "\n " is first formatted as '' ${" "}'' but turns into "\n${" "}" on re-formatting. Fixes #1545. --- dhall/src/Dhall/Pretty/Internal.hs | 76 ++++++++++++++++++++---- dhall/tests/format/interpolation0A.dhall | 1 + dhall/tests/format/interpolation0B.dhall | 3 + dhall/tests/format/issue1545-1A.dhall | 1 + dhall/tests/format/issue1545-1B.dhall | 3 + dhall/tests/format/issue1545-2A.dhall | 1 + dhall/tests/format/issue1545-2B.dhall | 3 + dhall/tests/format/stringWithNullB.dhall | 2 +- 8 files changed, 79 insertions(+), 11 deletions(-) create mode 100644 dhall/tests/format/interpolation0A.dhall create mode 100644 dhall/tests/format/interpolation0B.dhall create mode 100644 dhall/tests/format/issue1545-1A.dhall create mode 100644 dhall/tests/format/issue1545-1B.dhall create mode 100644 dhall/tests/format/issue1545-2A.dhall create mode 100644 dhall/tests/format/issue1545-2B.dhall diff --git a/dhall/src/Dhall/Pretty/Internal.hs b/dhall/src/Dhall/Pretty/Internal.hs index d6006cf..8ae5347 100644 --- a/dhall/src/Dhall/Pretty/Internal.hs +++ b/dhall/src/Dhall/Pretty/Internal.hs @@ -1089,7 +1089,7 @@ prettyCharacterSet characterSet expression = prettyChunks :: Pretty a => Chunks Src a -> Doc Ann prettyChunks chunks@(Chunks a b) | anyText (== '\n') = - if anyText (/= '\n') + if not (null a) || anyText (/= '\n') then long else Pretty.flatAlt long short | otherwise = @@ -1139,24 +1139,80 @@ prettyCharacterSet characterSet expression = prettyText t = literal (Pretty.pretty (escapeText_ t)) --- | Prepare 'Chunks' for multi-line formatting by interpolating characters that --- may not appear in multi-line strings directly. +-- | Prepare 'Chunks' for multi-line formatting by escaping problematic +-- character sequences via string interpolations -- +-- >>> multilineChunks (Chunks [] "\n \tx") +-- Chunks [("\n",TextLit (Chunks [] " \t"))] "x" -- >>> multilineChunks (Chunks [] "\n\NUL\b\f\t") -- Chunks [("\n",TextLit (Chunks [] "\NUL\b\f"))] "\t" -multilineChunks :: Chunks Src a -> Chunks Src a -multilineChunks (Chunks as0 b0) = Chunks as1 b1 +multilineChunks :: Chunks s a -> Chunks s a +multilineChunks = escapeControlCharacters . escapeLastLineLeadingWhitespace + +-- | Escape leading whitespace on the last line by moving it into a string +-- string interpolation +-- +-- Unescaped leading whitespace on the last line would otherwise be removed +-- by the parser's dedentation logic. +-- +-- >>> escapeLastLineLeadingWhitespace (Chunks [] "\n \tx") +-- Chunks [("\n",TextLit (Chunks [] " \t"))] "x" +-- >>> escapeLastLineLeadingWhitespace (Chunks [("\n",Var (V "x" 0))] " ") +-- Chunks [("\n",Var (V "x" 0))] " " +-- >>> escapeLastLineLeadingWhitespace (Chunks [("\n ",Var (V "x" 0))] "") +-- Chunks [("\n",TextLit (Chunks [] " ")),("",Var (V "x" 0))] "" +-- >>> escapeLastLineLeadingWhitespace (Chunks [("\n ",Var (V "x" 0))] "\n") +-- Chunks [("\n ",Var (V "x" 0))] "\n" +-- +-- We assume that there's at least one newline and may therefore ignore leading +-- whitespace on the first line: +-- +-- >>> escapeLastLineLeadingWhitespace (Chunks [] " ") +-- Chunks [] " " +escapeLastLineLeadingWhitespace :: Chunks s a -> Chunks s a +escapeLastLineLeadingWhitespace (Chunks as0 b0) = + case escape1 b0 of + Nothing -> Chunks (escapeChunks as0) b0 + Just (Chunks cs b1) -> Chunks (as0 ++ cs) b1 where - as1 = foldr f (map toPair bs) as0 + -- Nothing: No newline found + -- Just chunks: Newline was found! + escape1 :: Text -> Maybe (Chunks s a) + escape1 t = case Text.breakOnEnd "\n" t of + ("", _) -> Nothing + (a , b) -> Just $ case Text.span predicate b of + ("", _) -> Chunks [] t + (c , d) -> Chunks [(a, TextLit (Chunks [] c))] d + + predicate c = c == ' ' || c == '\t' + + escapeChunks = snd . foldr f (NotDone, []) + + f chunk (Done , chunks) = (Done, chunk : chunks) + f (t, e) (NotDone, chunks) = case escape1 t of + Nothing -> (NotDone, (t, e) : chunks) + Just (Chunks as b) -> (Done, as ++ (b, e) : chunks) + +data Done = NotDone | Done + +-- | Escape control characters by moving them into string interpolations +-- +-- >>> escapeControlCharacters (Chunks [] "\n\NUL\b\f\t") +-- Chunks [("\n",TextLit (Chunks [] "\NUL\b\f"))] "\t" +escapeControlCharacters :: Chunks s a -> Chunks s a +escapeControlCharacters (Chunks as0 b0) = Chunks as1 b1 + where + as1 = foldr f (map toChunk bs) as0 (bs, b1) = splitOnPredicate predicate b0 + f (t0, e) chunks = map toChunk ts1 ++ (t1, e) : chunks + where + (ts1, t1) = splitOnPredicate predicate t0 + predicate c = Data.Char.isControl c && c /= ' ' && c /= '\t' && c /= '\n' - f (t0, e) pairs = case splitOnPredicate predicate t0 of - (ts1, t1) -> map toPair ts1 ++ (t1, e) : pairs - - toPair (t0, t1) = (t0, TextLit (Chunks [] t1)) + toChunk (t0, t1) = (t0, TextLit (Chunks [] t1)) -- | Split `Text` on a predicate, preserving all parts of the original string. -- diff --git a/dhall/tests/format/interpolation0A.dhall b/dhall/tests/format/interpolation0A.dhall new file mode 100644 index 0000000..8aeec3c --- /dev/null +++ b/dhall/tests/format/interpolation0A.dhall @@ -0,0 +1 @@ +"foo\n${a} bar" diff --git a/dhall/tests/format/interpolation0B.dhall b/dhall/tests/format/interpolation0B.dhall new file mode 100644 index 0000000..e51a31e --- /dev/null +++ b/dhall/tests/format/interpolation0B.dhall @@ -0,0 +1,3 @@ +'' +foo +${a} bar'' diff --git a/dhall/tests/format/issue1545-1A.dhall b/dhall/tests/format/issue1545-1A.dhall new file mode 100644 index 0000000..f07b21e --- /dev/null +++ b/dhall/tests/format/issue1545-1A.dhall @@ -0,0 +1 @@ +"\n\tx" diff --git a/dhall/tests/format/issue1545-1B.dhall b/dhall/tests/format/issue1545-1B.dhall new file mode 100644 index 0000000..1dfe872 --- /dev/null +++ b/dhall/tests/format/issue1545-1B.dhall @@ -0,0 +1,3 @@ +'' + +${"\t"}x'' diff --git a/dhall/tests/format/issue1545-2A.dhall b/dhall/tests/format/issue1545-2A.dhall new file mode 100644 index 0000000..ec31ed0 --- /dev/null +++ b/dhall/tests/format/issue1545-2A.dhall @@ -0,0 +1 @@ +"\n x" diff --git a/dhall/tests/format/issue1545-2B.dhall b/dhall/tests/format/issue1545-2B.dhall new file mode 100644 index 0000000..8d6cf0a --- /dev/null +++ b/dhall/tests/format/issue1545-2B.dhall @@ -0,0 +1,3 @@ +'' + +${" "}x'' diff --git a/dhall/tests/format/stringWithNullB.dhall b/dhall/tests/format/stringWithNullB.dhall index 1c9d134..b9b2d47 100644 --- a/dhall/tests/format/stringWithNullB.dhall +++ b/dhall/tests/format/stringWithNullB.dhall @@ -1,3 +1,3 @@ '' ${"\u0000"} $ \ - ☺'' +${" "}☺''