dhall-haskell/dhall/src/Dhall/Parser/Expression.hs

1024 lines
31 KiB
Haskell

{-# LANGUAGE CPP #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE OverloadedLists #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
-- | Parsing Dhall expressions.
module Dhall.Parser.Expression where
import Control.Applicative (Alternative(..), optional)
import Data.ByteArray.Encoding (Base(..))
import Data.Foldable (foldl')
import Data.Functor (void)
import Data.List.NonEmpty (NonEmpty(..))
import Data.Semigroup (Semigroup(..))
import Data.Text (Text)
import Dhall.Syntax
import Dhall.Src (Src(..))
import Prelude hiding (const, pi)
import Text.Parser.Combinators (choice, try, (<?>))
import qualified Control.Monad
import qualified Data.ByteArray.Encoding
import qualified Data.ByteString
import qualified Data.Char as Char
import qualified Data.Foldable
import qualified Data.List
import qualified Data.List.NonEmpty
import qualified Data.Sequence
import qualified Data.Text
import qualified Data.Text.Encoding
import qualified Dhall.Crypto
import qualified Text.Megaparsec
import Dhall.Parser.Combinators
import Dhall.Parser.Token
-- | Get the current source position
getSourcePos :: Text.Megaparsec.MonadParsec e s m =>
m Text.Megaparsec.SourcePos
getSourcePos =
Text.Megaparsec.getSourcePos
{-# INLINE getSourcePos #-}
-- | Get the current source offset (in tokens)
getOffset :: Text.Megaparsec.MonadParsec e s m => m Int
getOffset = Text.Megaparsec.stateOffset <$> Text.Megaparsec.getParserState
{-# INLINE getOffset #-}
-- | Set the current source offset
setOffset :: Text.Megaparsec.MonadParsec e s m => Int -> m ()
setOffset o = Text.Megaparsec.updateParserState $ \state ->
state
{ Text.Megaparsec.stateOffset = o }
{-# INLINE setOffset #-}
{-| Wrap a `Parser` to still match the same text but return only the `Src`
span
-}
src :: Parser a -> Parser Src
src parser = do
before <- getSourcePos
(tokens, _) <- Text.Megaparsec.match parser
after <- getSourcePos
return (Src before after tokens)
{-| Wrap a `Parser` to still match the same text, but to wrap the resulting
`Expr` in a `Note` constructor containing the `Src` span
-}
noted :: Parser (Expr Src a) -> Parser (Expr Src a)
noted parser = do
before <- getSourcePos
(tokens, e) <- Text.Megaparsec.match parser
after <- getSourcePos
let src = Src before after tokens
case e of
Note src _ | laxSrcEq src src -> return e
_ -> return (Note src e)
{-| Parse a complete expression (with leading and trailing whitespace)
This corresponds to the @complete-expression@ rule from the official
grammar
-}
completeExpression :: Parser a -> Parser (Expr Src a)
completeExpression embedded = completeExpression_
where
Parsers {..} = parsers embedded
{-| Parse an \"import expression\"
This is not the same thing as @`fmap` `Embed`@. This parses any
expression of the same or higher precedence as an import expression (such
as a selector expression). For example, this parses @(1)@
This corresponds to the @import-expression@ rule from the official grammar
-}
importExpression :: Parser a -> Parser (Expr Src a)
importExpression embedded = importExpression_
where
Parsers {..} = parsers embedded
{-| For efficiency (and simplicity) we only expose two parsers from the
result of the `parsers` function, since these are the only parsers needed
outside of this module
-}
data Parsers a = Parsers
{ completeExpression_ :: Parser (Expr Src a)
, importExpression_ :: Parser (Expr Src a)
}
-- | Given a parser for imports,
parsers :: Parser a -> Parsers a
parsers embedded = Parsers {..}
where
completeExpression_ = whitespace *> expression <* whitespace
expression =
noted
( choice
[ alternative0
, alternative1
, alternative2
, alternative3
, alternative4
, alternative5
]
) <?> "expression"
where
alternative0 = do
_lambda
whitespace
_openParens
whitespace
a <- label
whitespace
_colon
nonemptyWhitespace
b <- expression
whitespace
_closeParens
whitespace
_arrow
whitespace
c <- expression
return (Lam a b c)
alternative1 = do
try (_if *> nonemptyWhitespace)
a <- expression
whitespace
try (_then *> nonemptyWhitespace)
b <- expression
whitespace
try (_else *> nonemptyWhitespace)
c <- expression
return (BoolIf a b c)
alternative2 = do
let binding = do
src0 <- try (_let *> src nonemptyWhitespace)
c <- label
src1 <- src whitespace
d <- optional (do
_colon
src2 <- src nonemptyWhitespace
e <- expression
whitespace
return (Just src2, e) )
_equal
src3 <- src whitespace
f <- expression
whitespace
return (Binding (Just src0) c (Just src1) d (Just src3) f)
as <- Data.List.NonEmpty.some1 binding
try (_in *> nonemptyWhitespace)
b <- expression
-- 'Note's in let-in-let:
--
-- Subsequent @let@s that are not separated by an @in@ only get a
-- single surrounding 'Note'. For example:
--
-- let x = a
-- let y = b
-- in let z = c
-- in x
--
-- is parsed as
--
-- (Note …
-- (Let x …
-- (Let y …
-- (Note …
-- (Let z …
return (Dhall.Syntax.wrapInLets as b)
alternative3 = do
try (_forall *> whitespace *> _openParens)
whitespace
a <- label
whitespace
_colon
nonemptyWhitespace
b <- expression
whitespace
_closeParens
whitespace
_arrow
whitespace
c <- expression
return (Pi a b c)
alternative4 = do
try (_assert *> whitespace *> _colon)
nonemptyWhitespace
a <- expression
return (Assert a)
alternative5 = do
a <- operatorExpression
let alternative4A = do
_arrow
whitespace
b <- expression
whitespace
return (Pi "_" a b)
let alternative4B = do
_colon
nonemptyWhitespace
b <- expression
case shallowDenote a of
ListLit Nothing [] ->
return (ListLit (Just b) [])
Merge c d Nothing ->
return (Merge c d (Just b))
ToMap c Nothing ->
return (ToMap c (Just b))
_ -> return (Annot a b)
alternative4A <|> alternative4B <|> pure a
operatorExpression =
foldr makeOperatorExpression applicationExpression operatorParsers
makeOperatorExpression operatorParser subExpression =
noted (do
a <- subExpression
whitespace
b <- Text.Megaparsec.many $ do
op <- operatorParser
r <- subExpression
whitespace
return (\l -> l `op` r)
return (foldl' (\x f -> f x) a b))
operatorParsers :: [Parser (Expr s a -> Expr s a -> Expr s a)]
operatorParsers =
[ ImportAlt <$ _importAlt <* nonemptyWhitespace
, BoolOr <$ _or <* whitespace
, NaturalPlus <$ _plus <* nonemptyWhitespace
, TextAppend <$ _textAppend <* whitespace
, ListAppend <$ _listAppend <* whitespace
, BoolAnd <$ _and <* whitespace
, Combine <$ _combine <* whitespace
, Prefer <$ _prefer <* whitespace
, CombineTypes <$ _combineTypes <* whitespace
, NaturalTimes <$ _times <* whitespace
, BoolEQ <$ _doubleEqual <* whitespace
, BoolNE <$ _notEqual <* whitespace
, Equivalent <$ _equivalent <* whitespace
]
applicationExpression = do
f <- (Some <$ try (_Some <* nonemptyWhitespace))
<|> return id
a <- noted importExpression_
bs <- Text.Megaparsec.many . try $ do
(sep, _) <- Text.Megaparsec.match nonemptyWhitespace
b <- importExpression_
return (sep, b)
return (foldl' app (f a) bs)
where
app a (sep, b)
| Note (Src left _ bytesL) _ <- a
, Note (Src _ right bytesR) _ <- b
= Note (Src left right (bytesL <> sep <> bytesR)) (App a b)
app a (_, b) =
App a b
importExpression_ = noted (choice [ alternative0, alternative1 ])
where
alternative0 = do
a <- embedded
return (Embed a)
alternative1 = completionExpression
completionExpression = noted (do
a <- selectorExpression
mb <- optional (do
try (whitespace *> _doubleColon)
whitespace
selectorExpression )
case mb of
Nothing -> return a
Just b -> return (RecordCompletion a b) )
selectorExpression = noted (do
a <- primitiveExpression
let recordType = _openParens *> whitespace *> expression <* whitespace <* _closeParens
let field x e = Field e x
let projectBySet xs e = Project e (Left xs)
let projectByExpression xs e = Project e (Right xs)
let alternatives =
fmap field anyLabel
<|> fmap projectBySet labels
<|> fmap projectByExpression recordType
b <- Text.Megaparsec.many (try (whitespace *> _dot *> whitespace *> alternatives))
return (foldl' (\e k -> k e) a b) )
primitiveExpression =
noted
( choice
[ alternative00
, alternative01
, alternative02
, alternative03
, alternative04
, alternative05
, alternative06
, alternative07
, alternative08
, alternative37
, alternative09
, builtin
]
)
<|> alternative38
where
alternative00 = do
n <- getOffset
a <- try doubleLiteral
b <- if isInfinite a
then setOffset n *> fail "double out of bounds"
else return a
return (DoubleLit (DhallDouble b))
alternative01 = do
a <- try naturalLiteral
return (NaturalLit a)
alternative02 = do
a <- try integerLiteral
return (IntegerLit a)
alternative03 = textLiteral
alternative04 = (do
_openBrace
whitespace
_ <- optional (_comma *> whitespace)
a <- recordTypeOrLiteral
whitespace
_closeBrace
return a ) <?> "literal"
alternative05 = unionType
alternative06 = listLiteral
alternative07 = do
try (_merge *> nonemptyWhitespace)
a <- importExpression_
nonemptyWhitespace
b <- importExpression_ <?> "second argument to ❰merge❱"
return (Merge a b Nothing)
alternative08 = do
try (_toMap *> nonemptyWhitespace)
a <- importExpression_
return (ToMap a Nothing)
alternative09 = do
a <- try doubleInfinity
return (DoubleLit (DhallDouble a))
builtin = do
let predicate c =
c == 'N'
|| c == 'I'
|| c == 'D'
|| c == 'L'
|| c == 'O'
|| c == 'B'
|| c == 'S'
|| c == 'T'
|| c == 'F'
|| c == 'K'
let nan = DhallDouble (0.0/0.0)
c <- Text.Megaparsec.lookAhead (Text.Megaparsec.satisfy predicate)
case c of
'N' ->
choice
[ NaturalFold <$ _NaturalFold
, NaturalBuild <$ _NaturalBuild
, NaturalIsZero <$ _NaturalIsZero
, NaturalEven <$ _NaturalEven
, NaturalOdd <$ _NaturalOdd
, NaturalSubtract <$ _NaturalSubtract
, NaturalToInteger <$ _NaturalToInteger
, NaturalShow <$ _NaturalShow
, Natural <$ _Natural
, None <$ _None
, DoubleLit nan <$ _NaN
]
'I' ->
choice
[ IntegerClamp <$ _IntegerClamp
, IntegerNegate <$ _IntegerNegate
, IntegerShow <$ _IntegerShow
, IntegerToDouble <$ _IntegerToDouble
, Integer <$ _Integer
]
'D' ->
choice
[ DoubleShow <$ _DoubleShow
, Double <$ _Double
]
'L' ->
choice
[ ListBuild <$ _ListBuild
, ListFold <$ _ListFold
, ListLength <$ _ListLength
, ListHead <$ _ListHead
, ListLast <$ _ListLast
, ListIndexed <$ _ListIndexed
, ListReverse <$ _ListReverse
, List <$ _List
]
'O' ->
choice
[ OptionalFold <$ _OptionalFold
, OptionalBuild <$ _OptionalBuild
, Optional <$ _Optional
]
'B' -> Bool <$ _Bool
'S' -> Const Sort <$ _Sort
'T' ->
choice
[ TextShow <$ _TextShow
, Text <$ _Text
, BoolLit True <$ _True
, Const Type <$ _Type
]
'F' -> BoolLit False <$ _False
'K' -> Const Kind <$ _Kind
_ -> empty
alternative37 = do
a <- identifier
return (Var a)
alternative38 = do
_openParens
whitespace
a <- expression
whitespace
_closeParens
return a
doubleQuotedChunk =
choice
[ interpolation
, unescapedCharacterFast
, unescapedCharacterSlow
, escapedCharacter
]
where
interpolation = do
_ <- text "${"
e <- completeExpression_
_ <- char '}'
return (Chunks [(mempty, e)] mempty)
unescapedCharacterFast = do
t <- Text.Megaparsec.takeWhile1P Nothing predicate
return (Chunks [] t)
where
predicate c =
( ('\x20' <= c && c <= '\x21' )
|| ('\x23' <= c && c <= '\x5B' )
|| ('\x5D' <= c && c <= '\x10FFFF')
) && c /= '$'
unescapedCharacterSlow = do
_ <- char '$'
return (Chunks [] "$")
escapedCharacter = do
_ <- char '\\'
c <- choice
[ quotationMark
, dollarSign
, backSlash
, forwardSlash
, backSpace
, formFeed
, lineFeed
, carriageReturn
, tab
, unicode
]
return (Chunks [] (Data.Text.singleton c))
where
quotationMark = char '"'
dollarSign = char '$'
backSlash = char '\\'
forwardSlash = char '/'
backSpace = do _ <- char 'b'; return '\b'
formFeed = do _ <- char 'f'; return '\f'
lineFeed = do _ <- char 'n'; return '\n'
carriageReturn = do _ <- char 'r'; return '\r'
tab = do _ <- char 't'; return '\t'
unicode = do
_ <- char 'u';
let toNumber = Data.List.foldl' (\x y -> x * 16 + y) 0
let fourCharacterEscapeSequence = do
ns <- Control.Monad.replicateM 4 hexNumber
let number = toNumber ns
Control.Monad.guard (validCodepoint number)
<|> fail "Invalid Unicode code point"
return number
let bracedEscapeSequence = do
_ <- char '{'
ns <- some hexNumber
let number = toNumber ns
Control.Monad.guard (number <= 0x10FFFD && validCodepoint number)
<|> fail "Invalid Unicode code point"
_ <- char '}'
return number
n <- bracedEscapeSequence <|> fourCharacterEscapeSequence
return (Char.chr n)
doubleQuotedLiteral = do
_ <- char '"'
chunks <- Text.Megaparsec.many doubleQuotedChunk
_ <- char '"'
return (mconcat chunks)
singleQuoteContinue =
choice
[ escapeSingleQuotes
, interpolation
, escapeInterpolation
, endLiteral
, unescapedCharacterFast
, unescapedCharacterSlow
, tab
, endOfLine
]
where
escapeSingleQuotes = do
_ <- "'''" :: Parser Text
b <- singleQuoteContinue
return ("''" <> b)
interpolation = do
_ <- text "${"
a <- completeExpression_
_ <- char '}'
b <- singleQuoteContinue
return (Chunks [(mempty, a)] mempty <> b)
escapeInterpolation = do
_ <- text "''${"
b <- singleQuoteContinue
return ("${" <> b)
endLiteral = do
_ <- text "''"
return mempty
unescapedCharacterFast = do
a <- Text.Megaparsec.takeWhile1P Nothing predicate
b <- singleQuoteContinue
return (Chunks [] a <> b)
where
predicate c =
('\x20' <= c && c <= '\x10FFFF') && c /= '$' && c /= '\''
unescapedCharacterSlow = do
a <- satisfy predicate
b <- singleQuoteContinue
return (Chunks [] a <> b)
where
predicate c = c == '$' || c == '\''
endOfLine = do
a <- "\n" <|> "\r\n"
b <- singleQuoteContinue
return (Chunks [] a <> b)
tab = do
_ <- char '\t' <?> "tab"
b <- singleQuoteContinue
return ("\t" <> b)
singleQuoteLiteral = do
_ <- text "''"
_ <- endOfLine
a <- singleQuoteContinue
return (toDoubleQuoted a)
where
endOfLine = (void (char '\n') <|> void (text "\r\n")) <?> "newline"
textLiteral = (do
literal <- doubleQuotedLiteral <|> singleQuoteLiteral
return (TextLit literal) ) <?> "literal"
recordTypeOrLiteral =
choice
[ alternative0
, alternative1
, alternative2
]
where
alternative0 = do
_equal
return (RecordLit mempty)
alternative1 = nonEmptyRecordTypeOrLiteral
alternative2 = return (Record mempty)
nonEmptyRecordTypeOrLiteral = do
a <- anyLabel
whitespace
let nonEmptyRecordType = do
_colon
nonemptyWhitespace
b <- expression
whitespace
e <- Text.Megaparsec.many (do
_comma
whitespace
c <- anyLabel
whitespace
_colon
nonemptyWhitespace
d <- expression
whitespace
return (c, d) )
m <- toMap ((a, b) : e)
return (Record m)
let nonEmptyRecordLiteral = do
_equal
whitespace
b <- expression
whitespace
e <- Text.Megaparsec.many (do
_comma
whitespace
c <- anyLabel
whitespace
_equal
whitespace
d <- expression
whitespace
return (c, d) )
m <- toMap ((a, b) : e)
return (RecordLit m)
nonEmptyRecordType <|> nonEmptyRecordLiteral
unionType = (do
_openAngle
whitespace
_ <- optional (_bar *> whitespace)
let unionTypeEntry = do
a <- anyLabel
whitespace
b <- optional (_colon *> nonemptyWhitespace *> expression <* whitespace)
return (a, b)
kvs <- Text.Megaparsec.sepBy unionTypeEntry (_bar *> whitespace)
m <- toMap kvs
_closeAngle
return (Union m) ) <?> "literal"
listLiteral = (do
_openBracket
whitespace
_ <- optional (_comma *> whitespace)
a <- Text.Megaparsec.sepBy (expression <* whitespace) (_comma *> whitespace)
_closeBracket
return (ListLit Nothing (Data.Sequence.fromList a)) ) <?> "literal"
{-| Parse an environment variable import
This corresponds to the @env@ rule from the official grammar
-}
env :: Parser ImportType
env = do
_ <- text "env:"
a <- (alternative0 <|> alternative1)
return (Env a)
where
alternative0 = bashEnvironmentVariable
alternative1 = do
_ <- char '"'
a <- posixEnvironmentVariable
_ <- char '"'
return a
-- | Parse a local import without trailing whitespace
localOnly :: Parser ImportType
localOnly =
choice
[ parentPath
, herePath
, homePath
, try absolutePath
]
where
parentPath = do
_ <- ".." :: Parser Text
file <- file_ FileComponent
return (Local Parent file)
herePath = do
_ <- "." :: Parser Text
file <- file_ FileComponent
return (Local Here file)
homePath = do
_ <- "~" :: Parser Text
file <- file_ FileComponent
return (Local Home file)
absolutePath = do
file <- file_ FileComponent
return (Local Absolute file)
{-| Parse a local import
This corresponds to the @local@ rule from the official grammar
-}
local :: Parser ImportType
local = do
a <- localOnly
return a
{-| Parse an HTTP(S) import
This corresponds to the @http@ rule from the official grammar
-}
http :: Parser ImportType
http = do
url <- httpRaw
headers <- optional (do
try (whitespace *> _using *> nonemptyWhitespace)
importExpression import_ )
return (Remote (url { headers }))
{-| Parse a `Missing` import
This corresponds to the @missing@ rule from the official grammar
-}
missing :: Parser ImportType
missing = do
_missing
return Missing
{-| Parse an `ImportType`
This corresponds to the @import-type@ rule from the official grammar
-}
importType_ :: Parser ImportType
importType_ = do
let predicate c =
c == '~' || c == '.' || c == '/' || c == 'h' || c == 'e' || c == 'm'
_ <- Text.Megaparsec.lookAhead (Text.Megaparsec.satisfy predicate)
choice [ local, http, env, missing ]
{-| Parse a `Dhall.Crypto.SHA256Digest`
This corresponds to the @hash@ rule from the official grammar
-}
importHash_ :: Parser Dhall.Crypto.SHA256Digest
importHash_ = do
_ <- text "sha256:"
t <- count 64 (satisfy hexdig <?> "hex digit")
let strictBytes16 = Data.Text.Encoding.encodeUtf8 t
strictBytes <- case Data.ByteArray.Encoding.convertFromBase Base16 strictBytes16 of
Left string -> fail string
Right strictBytes -> return (strictBytes :: Data.ByteString.ByteString)
case Dhall.Crypto.sha256DigestFromByteString strictBytes of
Nothing -> fail "Invalid sha256 hash"
Just h -> pure h
{-| Parse an `ImportHashed`
This corresponds to the @import-hashed@ rule from the official grammar
-}
importHashed_ :: Parser ImportHashed
importHashed_ = do
importType <- importType_
hash <- optional (try (nonemptyWhitespace *> importHash_))
return (ImportHashed {..})
{-| Parse an `Import`
This corresponds to the @import@ rule from the official grammar
-}
import_ :: Parser Import
import_ = (do
importHashed <- importHashed_
importMode <- alternative <|> pure Code
return (Import {..}) ) <?> "import"
where
alternative = do
try (whitespace *> _as *> nonemptyWhitespace)
(_Text >> pure RawText) <|> (_Location >> pure Location)
-- | Same as @Data.Text.splitOn@, except always returning a `NonEmpty` result
splitOn :: Text -> Text -> NonEmpty Text
splitOn needle haystack =
case Data.Text.splitOn needle haystack of
[] -> "" :| []
t : ts -> t :| ts
-- | Split `Chunks` by lines
linesLiteral :: Chunks s a -> NonEmpty (Chunks s a)
linesLiteral (Chunks [] suffix) =
fmap (Chunks []) (splitOn "\n" suffix)
linesLiteral (Chunks ((prefix, interpolation) : pairs) suffix) =
foldr
Data.List.NonEmpty.cons
(Chunks ((lastLine, interpolation) : pairs) suffix :| chunks)
(fmap (Chunks []) initLines)
where
splitLines = splitOn "\n" prefix
initLines = Data.List.NonEmpty.init splitLines
lastLine = Data.List.NonEmpty.last splitLines
Chunks pairs suffix :| chunks = linesLiteral (Chunks pairs suffix)
-- | Flatten several `Chunks` back into a single `Chunks` by inserting newlines
unlinesLiteral :: NonEmpty (Chunks s a) -> Chunks s a
unlinesLiteral chunks =
Data.Foldable.fold (Data.List.NonEmpty.intersperse "\n" chunks)
-- | Returns `True` if the `Chunks` represents a blank line
emptyLine :: Chunks s a -> Bool
emptyLine (Chunks [] "" ) = True
emptyLine (Chunks [] "\r") = True -- So that `\r\n` is treated as a blank line
emptyLine _ = False
-- | Return the leading whitespace for a `Chunks` literal
leadingSpaces :: Chunks s a -> Text
leadingSpaces chunks = Data.Text.takeWhile isSpace firstText
where
isSpace c = c == '\x20' || c == '\x09'
firstText =
case chunks of
Chunks [] suffix -> suffix
Chunks ((prefix, _) : _ ) _ -> prefix
-- | Drop the first @n@ characters for a `Chunks` literal
dropLiteral :: Int -> Chunks s a -> Chunks s a
dropLiteral n (Chunks [] suffix) =
Chunks [] (Data.Text.drop n suffix)
dropLiteral n (Chunks ((prefix, interpolation) : rest) suffix) =
Chunks ((Data.Text.drop n prefix, interpolation) : rest) suffix
{-| Convert a single-quoted `Chunks` literal to the equivalent double-quoted
`Chunks` literal
-}
toDoubleQuoted :: Chunks Src a -> Chunks Src a
toDoubleQuoted literal =
unlinesLiteral (fmap (dropLiteral indent) literals)
where
literals = linesLiteral literal
sharedPrefix ab ac =
case Data.Text.commonPrefixes ab ac of
Just (a, _b, _c) -> a
Nothing -> ""
-- The standard specifies to filter out blank lines for all lines *except*
-- for the last line
filteredLines = newInit <> pure oldLast
where
oldInit = Data.List.NonEmpty.init literals
oldLast = Data.List.NonEmpty.last literals
newInit = filter (not . emptyLine) oldInit
longestSharedPrefix =
case filteredLines of
l : ls ->
Data.Foldable.foldl' sharedPrefix (leadingSpaces l) (fmap leadingSpaces ls)
[] ->
""
indent = Data.Text.length longestSharedPrefix