dhall-haskell/dhall/src/Dhall/Parser/Expression.hs
Gabriel Gonzalez b3b6bb4e1d Partially fix whitespace parsing performance regression (#1512)
* Partially fix whitespace parsing performance regression

This undoes some of the performance regression introduced
in https://github.com/dhall-lang/dhall-haskell/pull/1483

Before #1483:

```
benchmarked Line comment
time                 11.86 ms   (11.69 ms .. 11.98 ms)
                     0.999 R²   (0.999 R² .. 1.000 R²)
mean                 11.84 ms   (11.79 ms .. 11.89 ms)
std dev              129.4 μs   (107.2 μs .. 164.1 μs)

benchmarked Block comment
time                 13.20 ms   (13.00 ms .. 13.41 ms)
                     0.999 R²   (0.998 R² .. 1.000 R²)
mean                 13.59 ms   (13.41 ms .. 13.94 ms)
std dev              600.0 μs   (142.2 μs .. 953.7 μs)
```

After #1483:

```
benchmarked Line comment
time                 288.7 ms   (282.8 ms .. 294.7 ms)
                     1.000 R²   (0.999 R² .. 1.000 R²)
mean                 292.3 ms   (290.8 ms .. 294.6 ms)
std dev              3.156 ms   (2.216 ms .. 4.546 ms)

benchmarked Block comment
time                 286.2 ms   (280.9 ms .. 292.6 ms)
                     0.999 R²   (0.998 R² .. 1.000 R²)
mean                 290.6 ms   (288.3 ms .. 292.9 ms)
std dev              3.875 ms   (2.866 ms .. 5.500 ms)
```

After this change:

```
benchmarked Line comment
time                 61.44 ms   (60.37 ms .. 63.03 ms)
                     0.999 R²   (0.997 R² .. 1.000 R²)
mean                 61.41 ms   (60.74 ms .. 62.25 ms)
std dev              1.341 ms   (945.0 μs .. 1.901 ms)

benchmarked Block comment
time                 61.83 ms   (60.97 ms .. 63.14 ms)
                     0.999 R²   (0.998 R² .. 1.000 R²)
mean                 61.16 ms   (60.33 ms .. 61.85 ms)
std dev              1.396 ms   (1.011 ms .. 1.907 ms)
```

* Correctly parse `https://example.com usingBla`

... as caught by @sjakobi
2019-11-04 02:12:02 +00:00

1037 lines
31 KiB
Haskell

{-# LANGUAGE CPP #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE OverloadedLists #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
-- | Parsing Dhall expressions.
module Dhall.Parser.Expression where
import Control.Applicative (Alternative(..), optional)
import Data.ByteArray.Encoding (Base(..))
import Data.Functor (void)
import Data.List.NonEmpty (NonEmpty(..))
import Data.Semigroup (Semigroup(..))
import Data.Text (Text)
import Dhall.Syntax
import Dhall.Src (Src(..))
import Prelude hiding (const, pi)
import Text.Parser.Combinators (choice, try, (<?>))
import qualified Control.Monad
import qualified Data.ByteArray.Encoding
import qualified Data.ByteString
import qualified Data.Char as Char
import qualified Data.Foldable
import qualified Data.List
import qualified Data.List.NonEmpty
import qualified Data.Sequence
import qualified Data.Text
import qualified Data.Text.Encoding
import qualified Dhall.Crypto
import qualified Text.Megaparsec
#if !MIN_VERSION_megaparsec(7, 0, 0)
import qualified Text.Megaparsec.Char as Text.Megaparsec
#endif
import Dhall.Parser.Combinators
import Dhall.Parser.Token
-- | Get the current source position
getSourcePos :: Text.Megaparsec.MonadParsec e s m =>
m Text.Megaparsec.SourcePos
getSourcePos =
#if MIN_VERSION_megaparsec(7, 0, 0)
Text.Megaparsec.getSourcePos
#else
Text.Megaparsec.getPosition
#endif
{-# INLINE getSourcePos #-}
-- | Get the current source offset (in tokens)
getOffset :: Text.Megaparsec.MonadParsec e s m => m Int
#if MIN_VERSION_megaparsec(7, 0, 0)
getOffset = Text.Megaparsec.stateOffset <$> Text.Megaparsec.getParserState
#else
getOffset = Text.Megaparsec.stateTokensProcessed <$> Text.Megaparsec.getParserState
#endif
{-# INLINE getOffset #-}
-- | Set the current source offset
setOffset :: Text.Megaparsec.MonadParsec e s m => Int -> m ()
#if MIN_VERSION_megaparsec(7, 0, 0)
setOffset o = Text.Megaparsec.updateParserState $ \(Text.Megaparsec.State s _ pst) ->
Text.Megaparsec.State s o pst
#else
setOffset o = Text.Megaparsec.updateParserState $ \(Text.Megaparsec.State s p _ stw) ->
Text.Megaparsec.State s p o stw
#endif
{-# INLINE setOffset #-}
{-| Wrap a `Parser` to still match the same text but return only the `Src`
span
-}
src :: Parser a -> Parser Src
src parser = do
before <- getSourcePos
(tokens, _) <- Text.Megaparsec.match parser
after <- getSourcePos
return (Src before after tokens)
{-| Wrap a `Parser` to still match the same text, but to wrap the resulting
`Expr` in a `Note` constructor containing the `Src` span
-}
noted :: Parser (Expr Src a) -> Parser (Expr Src a)
noted parser = do
before <- getSourcePos
(tokens, e) <- Text.Megaparsec.match parser
after <- getSourcePos
let src = Src before after tokens
case e of
Note src _ | laxSrcEq src src -> return e
_ -> return (Note src e)
{-| Parse a complete expression (with leading and trailing whitespace)
This corresponds to the @complete-expression@ rule from the official
grammar
-}
completeExpression :: Parser a -> Parser (Expr Src a)
completeExpression embedded = completeExpression_
where
Parsers {..} = parsers embedded
{-| Parse an \"import expression\"
This is not the same thing as @`fmap` `Embed`@. This parses any
expression of the same or higher precedence as an import expression (such
as a selector expression). For example, this parses @(1)@
This corresponds to the @import-expression@ rule from the official grammar
-}
importExpression :: Parser a -> Parser (Expr Src a)
importExpression embedded = importExpression_
where
Parsers {..} = parsers embedded
{-| For efficiency (and simplicity) we only expose two parsers from the
result of the `parsers` function, since these are the only parsers needed
outside of this module
-}
data Parsers a = Parsers
{ completeExpression_ :: Parser (Expr Src a)
, importExpression_ :: Parser (Expr Src a)
}
-- | Given a parser for imports,
parsers :: Parser a -> Parsers a
parsers embedded = Parsers {..}
where
completeExpression_ = whitespace *> expression <* whitespace
expression =
noted
( choice
[ alternative0
, alternative1
, alternative2
, alternative3
, alternative4
, alternative5
]
) <?> "expression"
where
alternative0 = do
_lambda
whitespace
_openParens
whitespace
a <- label
whitespace
_colon
nonemptyWhitespace
b <- expression
whitespace
_closeParens
whitespace
_arrow
whitespace
c <- expression
return (Lam a b c)
alternative1 = do
_if
nonemptyWhitespace
a <- expression
whitespace
_then
nonemptyWhitespace
b <- expression
whitespace
_else
nonemptyWhitespace
c <- expression
return (BoolIf a b c)
alternative2 = do
let binding = do
_let
src0 <- src nonemptyWhitespace
c <- label
src1 <- src whitespace
d <- optional (do
_colon
src2 <- src nonemptyWhitespace
e <- expression
whitespace
return (Just src2, e) )
_equal
src3 <- src whitespace
f <- expression
whitespace
return (Binding (Just src0) c (Just src1) d (Just src3) f)
as <- Data.List.NonEmpty.some1 binding
_in
nonemptyWhitespace
b <- expression
-- 'Note's in let-in-let:
--
-- Subsequent @let@s that are not separated by an @in@ only get a
-- single surrounding 'Note'. For example:
--
-- let x = a
-- let y = b
-- in let z = c
-- in x
--
-- is parsed as
--
-- (Note …
-- (Let x …
-- (Let y …
-- (Note …
-- (Let z …
return (Dhall.Syntax.wrapInLets as b)
alternative3 = do
_forall
whitespace
_openParens
whitespace
a <- label
whitespace
_colon
nonemptyWhitespace
b <- expression
whitespace
_closeParens
whitespace
_arrow
whitespace
c <- expression
return (Pi a b c)
alternative4 = do
_assert
whitespace
_colon
nonemptyWhitespace
a <- expression
return (Assert a)
alternative5 = do
a <- operatorExpression
let alternative4A = do
_arrow
whitespace
b <- expression
whitespace
return (Pi "_" a b)
let alternative4B = do
_colon
nonemptyWhitespace
b <- expression
case shallowDenote a of
ListLit _ [] ->
return (ListLit (Just b) [])
Merge c d _ ->
return (Merge c d (Just b))
ToMap c _ ->
return (ToMap c (Just b))
_ -> return (Annot a b)
alternative4A <|> alternative4B <|> pure a
operatorExpression =
foldr makeOperatorExpression applicationExpression operatorParsers
makeOperatorExpression operatorParser subExpression =
noted (do
a <- subExpression
whitespace
b <- Text.Megaparsec.many $ do
op <- operatorParser
r <- subExpression
whitespace
return (\l -> l `op` r)
return (foldl (\x f -> f x) a b) )
operatorParsers :: [Parser (Expr s a -> Expr s a -> Expr s a)]
operatorParsers =
[ ImportAlt <$ _importAlt <* nonemptyWhitespace
, BoolOr <$ _or <* whitespace
, NaturalPlus <$ _plus <* nonemptyWhitespace
, TextAppend <$ _textAppend <* whitespace
, ListAppend <$ _listAppend <* whitespace
, BoolAnd <$ _and <* whitespace
, Combine <$ _combine <* whitespace
, Prefer <$ _prefer <* whitespace
, CombineTypes <$ _combineTypes <* whitespace
, NaturalTimes <$ _times <* whitespace
, BoolEQ <$ _doubleEqual <* whitespace
, BoolNE <$ _notEqual <* whitespace
, Equivalent <$ _equivalent <* whitespace
]
applicationExpression = do
f <- (Some <$ _Some <* nonemptyWhitespace)
<|> return id
a <- noted importExpression_
b <- Text.Megaparsec.many (try (nonemptyWhitespace *> noted importExpression_))
return (foldl app (f a) b)
where
app nL@(Note (Src before _ bytesL) _) nR@(Note (Src _ after bytesR) _) =
Note (Src before after (bytesL <> bytesR)) (App nL nR)
app nL nR =
App nL nR
importExpression_ = noted (choice [ alternative0, alternative1 ])
where
alternative0 = do
a <- embedded
return (Embed a)
alternative1 = completionExpression
completionExpression = noted (do
a <- selectorExpression
mb <- optional (do
_doubleColon
selectorExpression )
case mb of
Nothing -> return a
Just b -> return (RecordCompletion a b) )
selectorExpression = noted (do
a <- primitiveExpression
let recordType = _openParens *> whitespace *> expression <* whitespace <* _closeParens
let field x e = Field e x
let projectBySet xs e = Project e (Left xs)
let projectByExpression xs e = Project e (Right xs)
let alternatives =
fmap field anyLabel
<|> fmap projectBySet labels
<|> fmap projectByExpression recordType
b <- Text.Megaparsec.many (try (whitespace *> _dot *> whitespace *> alternatives))
return (foldl (\e k -> k e) a b) )
primitiveExpression =
noted
( choice
[ alternative00
, alternative01
, alternative02
, alternative03
, alternative04
, alternative05
, alternative06
, alternative07
, alternative08
, alternative37
, alternative09
, builtin
]
)
<|> alternative38
where
alternative00 = do
n <- getOffset
a <- try doubleLiteral
b <- if isInfinite a
then setOffset n *> fail "double out of bounds"
else return a
return (DoubleLit (DhallDouble b))
alternative01 = do
a <- try naturalLiteral
return (NaturalLit a)
alternative02 = do
a <- try integerLiteral
return (IntegerLit a)
alternative03 = textLiteral
alternative04 = (do
_openBrace
whitespace
_ <- optional (_comma *> whitespace)
a <- recordTypeOrLiteral
whitespace
_closeBrace
return a ) <?> "literal"
alternative05 = unionType
alternative06 = listLiteral
alternative07 = do
_merge
nonemptyWhitespace
a <- importExpression_
nonemptyWhitespace
b <- importExpression_ <?> "second argument to ❰merge❱"
return (Merge a b Nothing)
alternative08 = do
_toMap
nonemptyWhitespace
a <- importExpression_
return (ToMap a Nothing)
alternative09 = do
a <- try doubleInfinity
return (DoubleLit (DhallDouble a))
builtin = do
let predicate c =
c == 'N'
|| c == 'I'
|| c == 'D'
|| c == 'L'
|| c == 'O'
|| c == 'B'
|| c == 'S'
|| c == 'T'
|| c == 'F'
|| c == 'K'
let nan = DhallDouble (0.0/0.0)
c <- Text.Megaparsec.lookAhead (Text.Megaparsec.satisfy predicate)
case c of
'N' ->
choice
[ NaturalFold <$ _NaturalFold
, NaturalBuild <$ _NaturalBuild
, NaturalIsZero <$ _NaturalIsZero
, NaturalEven <$ _NaturalEven
, NaturalOdd <$ _NaturalOdd
, NaturalSubtract <$ _NaturalSubtract
, NaturalToInteger <$ _NaturalToInteger
, NaturalShow <$ _NaturalShow
, Natural <$ _Natural
, None <$ _None
, DoubleLit nan <$ _NaN
]
'I' ->
choice
[ IntegerClamp <$ _IntegerClamp
, IntegerNegate <$ _IntegerNegate
, IntegerShow <$ _IntegerShow
, IntegerToDouble <$ _IntegerToDouble
, Integer <$ _Integer
]
'D' ->
choice
[ DoubleShow <$ _DoubleShow
, Double <$ _Double
]
'L' ->
choice
[ ListBuild <$ _ListBuild
, ListFold <$ _ListFold
, ListLength <$ _ListLength
, ListHead <$ _ListHead
, ListLast <$ _ListLast
, ListIndexed <$ _ListIndexed
, ListReverse <$ _ListReverse
, List <$ _List
]
'O' ->
choice
[ OptionalFold <$ _OptionalFold
, OptionalBuild <$ _OptionalBuild
, Optional <$ _Optional
]
'B' -> Bool <$ _Bool
'S' -> Const Sort <$ _Sort
'T' ->
choice
[ TextShow <$ _TextShow
, Text <$ _Text
, BoolLit True <$ _True
, Const Type <$ _Type
]
'F' -> BoolLit False <$ _False
'K' -> Const Kind <$ _Kind
_ -> empty
alternative37 = do
a <- identifier
return (Var a)
alternative38 = do
_openParens
whitespace
a <- expression
whitespace
_closeParens
return a
doubleQuotedChunk =
choice
[ interpolation
, unescapedCharacterFast
, unescapedCharacterSlow
, escapedCharacter
]
where
interpolation = do
_ <- text "${"
e <- completeExpression_
_ <- char '}'
return (Chunks [(mempty, e)] mempty)
unescapedCharacterFast = do
t <- Text.Megaparsec.takeWhile1P Nothing predicate
return (Chunks [] t)
where
predicate c =
( ('\x20' <= c && c <= '\x21' )
|| ('\x23' <= c && c <= '\x5B' )
|| ('\x5D' <= c && c <= '\x10FFFF')
) && c /= '$'
unescapedCharacterSlow = do
_ <- char '$'
return (Chunks [] "$")
escapedCharacter = do
_ <- char '\\'
c <- choice
[ quotationMark
, dollarSign
, backSlash
, forwardSlash
, backSpace
, formFeed
, lineFeed
, carriageReturn
, tab
, unicode
]
return (Chunks [] (Data.Text.singleton c))
where
quotationMark = char '"'
dollarSign = char '$'
backSlash = char '\\'
forwardSlash = char '/'
backSpace = do _ <- char 'b'; return '\b'
formFeed = do _ <- char 'f'; return '\f'
lineFeed = do _ <- char 'n'; return '\n'
carriageReturn = do _ <- char 'r'; return '\r'
tab = do _ <- char 't'; return '\t'
unicode = do
_ <- char 'u';
let toNumber = Data.List.foldl' (\x y -> x * 16 + y) 0
let fourCharacterEscapeSequence =
fmap toNumber (Control.Monad.replicateM 4 hexNumber)
let bracedEscapeSequence = do
_ <- char '{'
ns <- some hexNumber
let number = toNumber ns
Control.Monad.guard (number <= 0x10FFFF && validCodepoint (Char.chr number))
<|> fail "Invalid Unicode code point"
_ <- char '}'
return (toNumber ns)
n <- bracedEscapeSequence <|> fourCharacterEscapeSequence
return (Char.chr n)
doubleQuotedLiteral = do
_ <- char '"'
chunks <- Text.Megaparsec.many doubleQuotedChunk
_ <- char '"'
return (mconcat chunks)
singleQuoteContinue =
choice
[ escapeSingleQuotes
, interpolation
, escapeInterpolation
, endLiteral
, unescapedCharacterFast
, unescapedCharacterSlow
, tab
, endOfLine
]
where
escapeSingleQuotes = do
_ <- "'''" :: Parser Text
b <- singleQuoteContinue
return ("''" <> b)
interpolation = do
_ <- text "${"
a <- completeExpression_
_ <- char '}'
b <- singleQuoteContinue
return (Chunks [(mempty, a)] mempty <> b)
escapeInterpolation = do
_ <- text "''${"
b <- singleQuoteContinue
return ("${" <> b)
endLiteral = do
_ <- text "''"
return mempty
unescapedCharacterFast = do
a <- Text.Megaparsec.takeWhile1P Nothing predicate
b <- singleQuoteContinue
return (Chunks [] a <> b)
where
predicate c =
('\x20' <= c && c <= '\x10FFFF') && c /= '$' && c /= '\''
unescapedCharacterSlow = do
a <- satisfy predicate
b <- singleQuoteContinue
return (Chunks [] a <> b)
where
predicate c = c == '$' || c == '\''
endOfLine = do
a <- "\n" <|> "\r\n"
b <- singleQuoteContinue
return (Chunks [] a <> b)
tab = do
_ <- char '\t' <?> "tab"
b <- singleQuoteContinue
return ("\t" <> b)
singleQuoteLiteral = do
_ <- text "''"
_ <- endOfLine
a <- singleQuoteContinue
return (toDoubleQuoted a)
where
endOfLine = (void (char '\n') <|> void (text "\r\n")) <?> "newline"
textLiteral = (do
literal <- doubleQuotedLiteral <|> singleQuoteLiteral
return (TextLit literal) ) <?> "literal"
recordTypeOrLiteral =
choice
[ alternative0
, alternative1
, alternative2
]
where
alternative0 = do
_equal
return (RecordLit mempty)
alternative1 = nonEmptyRecordTypeOrLiteral
alternative2 = return (Record mempty)
nonEmptyRecordTypeOrLiteral = do
a <- anyLabel
whitespace
let nonEmptyRecordType = do
_colon
nonemptyWhitespace
b <- expression
whitespace
e <- Text.Megaparsec.many (do
_comma
whitespace
c <- anyLabel
whitespace
_colon
nonemptyWhitespace
d <- expression
whitespace
return (c, d) )
m <- toMap ((a, b) : e)
return (Record m)
let nonEmptyRecordLiteral = do
_equal
whitespace
b <- expression
whitespace
e <- Text.Megaparsec.many (do
_comma
whitespace
c <- anyLabel
whitespace
_equal
whitespace
d <- expression
whitespace
return (c, d) )
m <- toMap ((a, b) : e)
return (RecordLit m)
nonEmptyRecordType <|> nonEmptyRecordLiteral
unionType = (do
_openAngle
whitespace
_ <- optional (_bar *> whitespace)
let unionTypeEntry = do
a <- anyLabel
whitespace
b <- optional (_colon *> nonemptyWhitespace *> expression <* whitespace)
return (a, b)
kvs <- Text.Megaparsec.sepBy unionTypeEntry (_bar *> whitespace)
m <- toMap kvs
_closeAngle
return (Union m) ) <?> "literal"
listLiteral = (do
_openBracket
whitespace
_ <- optional (_comma *> whitespace)
a <- Text.Megaparsec.sepBy (expression <* whitespace) (_comma *> whitespace)
_closeBracket
return (ListLit Nothing (Data.Sequence.fromList a)) ) <?> "literal"
{-| Parse an environment variable import
This corresponds to the @env@ rule from the official grammar
-}
env :: Parser ImportType
env = do
_ <- text "env:"
a <- (alternative0 <|> alternative1)
return (Env a)
where
alternative0 = bashEnvironmentVariable
alternative1 = do
_ <- char '"'
a <- posixEnvironmentVariable
_ <- char '"'
return a
-- | Parse a local import without trailing whitespace
localOnly :: Parser ImportType
localOnly =
choice
[ parentPath
, herePath
, homePath
, try absolutePath
]
where
parentPath = do
_ <- ".." :: Parser Text
file <- file_ FileComponent
return (Local Parent file)
herePath = do
_ <- "." :: Parser Text
file <- file_ FileComponent
return (Local Here file)
homePath = do
_ <- "~" :: Parser Text
file <- file_ FileComponent
return (Local Home file)
absolutePath = do
file <- file_ FileComponent
return (Local Absolute file)
{-| Parse a local import
This corresponds to the @local@ rule from the official grammar
-}
local :: Parser ImportType
local = do
a <- localOnly
return a
{-| Parse an HTTP(S) import
This corresponds to the @http@ rule from the official grammar
-}
http :: Parser ImportType
http = do
url <- httpRaw
headers <- optional (do
try (whitespace *> _using *> nonemptyWhitespace)
importExpression import_ )
return (Remote (url { headers }))
{-| Parse a `Missing` import
This corresponds to the @missing@ rule from the official grammar
-}
missing :: Parser ImportType
missing = do
_missing
return Missing
{-| Parse an `ImportType`
This corresponds to the @import-type@ rule from the official grammar
-}
importType_ :: Parser ImportType
importType_ = do
let predicate c =
c == '~' || c == '.' || c == '/' || c == 'h' || c == 'e' || c == 'm'
_ <- Text.Megaparsec.lookAhead (Text.Megaparsec.satisfy predicate)
choice [ local, http, env, missing ]
{-| Parse a `Dhall.Crypto.SHA256Digest`
This corresponds to the @hash@ rule from the official grammar
-}
importHash_ :: Parser Dhall.Crypto.SHA256Digest
importHash_ = do
_ <- text "sha256:"
t <- count 64 (satisfy hexdig <?> "hex digit")
let strictBytes16 = Data.Text.Encoding.encodeUtf8 t
strictBytes <- case Data.ByteArray.Encoding.convertFromBase Base16 strictBytes16 of
Left string -> fail string
Right strictBytes -> return (strictBytes :: Data.ByteString.ByteString)
case Dhall.Crypto.sha256DigestFromByteString strictBytes of
Nothing -> fail "Invalid sha256 hash"
Just h -> pure h
{-| Parse an `ImportHashed`
This corresponds to the @import-hashed@ rule from the official grammar
-}
importHashed_ :: Parser ImportHashed
importHashed_ = do
importType <- importType_
hash <- optional (try (nonemptyWhitespace *> importHash_))
return (ImportHashed {..})
{-| Parse an `Import`
This corresponds to the @import@ rule from the official grammar
-}
import_ :: Parser Import
import_ = (do
importHashed <- importHashed_
importMode <- alternative <|> pure Code
return (Import {..}) ) <?> "import"
where
alternative = do
try (whitespace *> _as *> nonemptyWhitespace)
(_Text >> pure RawText) <|> (_Location >> pure Location)
-- | Same as @Data.Text.splitOn@, except always returning a `NonEmpty` result
splitOn :: Text -> Text -> NonEmpty Text
splitOn needle haystack =
case Data.Text.splitOn needle haystack of
[] -> "" :| []
t : ts -> t :| ts
-- | Split `Chunks` by lines
linesLiteral :: Chunks s a -> NonEmpty (Chunks s a)
linesLiteral (Chunks [] suffix) =
fmap (Chunks []) (splitOn "\n" suffix)
linesLiteral (Chunks ((prefix, interpolation) : pairs) suffix) =
foldr
Data.List.NonEmpty.cons
(Chunks ((lastLine, interpolation) : pairs) suffix :| chunks)
(fmap (Chunks []) initLines)
where
splitLines = splitOn "\n" prefix
initLines = Data.List.NonEmpty.init splitLines
lastLine = Data.List.NonEmpty.last splitLines
Chunks pairs suffix :| chunks = linesLiteral (Chunks pairs suffix)
-- | Flatten several `Chunks` back into a single `Chunks` by inserting newlines
unlinesLiteral :: NonEmpty (Chunks s a) -> Chunks s a
unlinesLiteral chunks =
Data.Foldable.fold (Data.List.NonEmpty.intersperse "\n" chunks)
-- | Returns `True` if the `Chunks` represents a blank line
emptyLine :: Chunks s a -> Bool
emptyLine (Chunks [] "" ) = True
emptyLine (Chunks [] "\r") = True -- So that `\r\n` is treated as a blank line
emptyLine _ = False
-- | Return the leading whitespace for a `Chunks` literal
leadingSpaces :: Chunks s a -> Text
leadingSpaces chunks = Data.Text.takeWhile isSpace firstText
where
isSpace c = c == '\x20' || c == '\x09'
firstText =
case chunks of
Chunks [] suffix -> suffix
Chunks ((prefix, _) : _ ) _ -> prefix
-- | Drop the first @n@ characters for a `Chunks` literal
dropLiteral :: Int -> Chunks s a -> Chunks s a
dropLiteral n (Chunks [] suffix) =
Chunks [] (Data.Text.drop n suffix)
dropLiteral n (Chunks ((prefix, interpolation) : rest) suffix) =
Chunks ((Data.Text.drop n prefix, interpolation) : rest) suffix
{-| Convert a single-quoted `Chunks` literal to the equivalent double-quoted
`Chunks` literal
-}
toDoubleQuoted :: Chunks Src a -> Chunks Src a
toDoubleQuoted literal =
unlinesLiteral (fmap (dropLiteral indent) literals)
where
literals = linesLiteral literal
sharedPrefix ab ac =
case Data.Text.commonPrefixes ab ac of
Just (a, _b, _c) -> a
Nothing -> ""
-- The standard specifies to filter out blank lines for all lines *except*
-- for the last line
filteredLines = newInit <> pure oldLast
where
oldInit = Data.List.NonEmpty.init literals
oldLast = Data.List.NonEmpty.last literals
newInit = filter (not . emptyLine) oldInit
longestSharedPrefix =
case filteredLines of
l : ls ->
Data.Foldable.foldl' sharedPrefix (leadingSpaces l) (fmap leadingSpaces ls)
[] ->
""
indent = Data.Text.length longestSharedPrefix