Parsing performance improvements (#591)
This commit is contained in:
parent
d8e076c6da
commit
b60f8f671e
|
@ -208,6 +208,7 @@ Library
|
|||
if !impl(ghc >= 8.0)
|
||||
Build-Depends: semigroups == 0.18.*
|
||||
Build-Depends: transformers == 0.4.2.*
|
||||
Build-Depends: fail == 4.9.*
|
||||
|
||||
Exposed-Modules:
|
||||
Dhall,
|
||||
|
@ -245,7 +246,7 @@ Executable dhall
|
|||
Hs-Source-Dirs: dhall
|
||||
Main-Is: Main.hs
|
||||
Build-Depends: base, dhall
|
||||
GHC-Options: -Wall
|
||||
GHC-Options: -Wall -rtsopts
|
||||
Default-Language: Haskell2010
|
||||
|
||||
Test-Suite tasty
|
||||
|
@ -308,6 +309,7 @@ Benchmark dhall-parser
|
|||
serialise ,
|
||||
text >= 0.11.1.0 && < 1.3
|
||||
Default-Language: Haskell2010
|
||||
ghc-options: -rtsopts
|
||||
|
||||
Benchmark deep-nested-large-record
|
||||
Type: exitcode-stdio-1.0
|
||||
|
|
|
@ -1,13 +1,12 @@
|
|||
{-# LANGUAGE CPP #-}
|
||||
{-# LANGUAGE DeriveDataTypeable #-}
|
||||
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE RecordWildCards #-}
|
||||
module Dhall.Parser.Combinators where
|
||||
|
||||
|
||||
import Control.Applicative (Alternative (..), liftA2)
|
||||
import Control.Monad (MonadPlus)
|
||||
import Control.Monad (MonadPlus (..))
|
||||
import Data.Data (Data)
|
||||
import Data.HashMap.Strict.InsOrd (InsOrdHashMap)
|
||||
import Data.Semigroup (Semigroup (..))
|
||||
|
@ -27,6 +26,7 @@ import qualified Data.List
|
|||
import qualified Data.Sequence
|
||||
import qualified Data.Set
|
||||
import qualified Data.Text
|
||||
import qualified Control.Monad.Fail
|
||||
import qualified Text.Megaparsec
|
||||
import qualified Text.Megaparsec.Char
|
||||
import qualified Text.Parser.Char
|
||||
|
@ -34,9 +34,20 @@ import qualified Text.Parser.Combinators
|
|||
import qualified Text.Parser.Token.Style
|
||||
|
||||
-- | Source code extract
|
||||
data Src = Src Text.Megaparsec.SourcePos Text.Megaparsec.SourcePos Text
|
||||
data Src = Src !Text.Megaparsec.SourcePos !Text.Megaparsec.SourcePos Text
|
||||
-- Text field is intentionally lazy
|
||||
deriving (Data, Eq, Show)
|
||||
|
||||
-- | Doesn't force the 'Text' part
|
||||
laxSrcEq :: Src -> Src -> Bool
|
||||
laxSrcEq (Src p q _) (Src p' q' _) = eq p p' && eq q q'
|
||||
where
|
||||
-- Don't compare filename (which is FilePath = String)
|
||||
eq :: Text.Megaparsec.SourcePos -> Text.Megaparsec.SourcePos -> Bool
|
||||
eq (Text.Megaparsec.SourcePos _ a b) (Text.Megaparsec.SourcePos _ a' b') =
|
||||
a == a' && b == b'
|
||||
{-# INLINE laxSrcEq #-}
|
||||
|
||||
instance Pretty Src where
|
||||
pretty (Src begin _ text) =
|
||||
pretty text <> "\n"
|
||||
|
@ -48,14 +59,99 @@ instance Pretty Src where
|
|||
comments as whitespace
|
||||
-}
|
||||
newtype Parser a = Parser { unParser :: Text.Megaparsec.Parsec Void Text a }
|
||||
deriving
|
||||
( Functor
|
||||
, Applicative
|
||||
, Monad
|
||||
, Alternative
|
||||
, MonadPlus
|
||||
, Text.Megaparsec.MonadParsec Void Text
|
||||
)
|
||||
|
||||
instance Functor Parser where
|
||||
fmap f (Parser x) = Parser (fmap f x)
|
||||
{-# INLINE fmap #-}
|
||||
|
||||
f <$ Parser x = Parser (f <$ x)
|
||||
{-# INLINE (<$) #-}
|
||||
|
||||
instance Applicative Parser where
|
||||
pure = Parser . pure
|
||||
{-# INLINE pure #-}
|
||||
|
||||
Parser f <*> Parser x = Parser (f <*> x)
|
||||
{-# INLINE (<*>) #-}
|
||||
|
||||
Parser a <* Parser b = Parser (a <* b)
|
||||
{-# INLINE (<*) #-}
|
||||
|
||||
Parser a *> Parser b = Parser (a *> b)
|
||||
{-# INLINE (*>) #-}
|
||||
|
||||
instance Monad Parser where
|
||||
return = pure
|
||||
{-# INLINE return #-}
|
||||
|
||||
(>>) = (*>)
|
||||
{-# INLINE (>>) #-}
|
||||
|
||||
Parser n >>= k = Parser (n >>= unParser . k)
|
||||
{-# INLINE (>>=) #-}
|
||||
|
||||
fail = Control.Monad.Fail.fail
|
||||
{-# INLINE fail #-}
|
||||
|
||||
instance Control.Monad.Fail.MonadFail Parser where
|
||||
fail = Parser . Control.Monad.Fail.fail
|
||||
{-# INLINE fail #-}
|
||||
|
||||
instance Alternative Parser where
|
||||
empty = Parser empty
|
||||
-- {-# INLINE empty #-}
|
||||
|
||||
Parser a <|> Parser b = Parser (a <|> b)
|
||||
-- {-# INLINE (<|>) #-}
|
||||
|
||||
some (Parser a) = Parser (some a)
|
||||
-- {-# INLINE some #-}
|
||||
|
||||
many (Parser a) = Parser (many a)
|
||||
-- {-# INLINE many #-}
|
||||
|
||||
instance MonadPlus Parser where
|
||||
mzero = empty
|
||||
-- {-# INLINE mzero #-}
|
||||
|
||||
mplus = (<|>)
|
||||
-- {-# INLINE mplus #-}
|
||||
|
||||
instance Text.Megaparsec.MonadParsec Void Text Parser where
|
||||
failure u e = Parser (Text.Megaparsec.failure u e)
|
||||
|
||||
fancyFailure e = Parser (Text.Megaparsec.fancyFailure e)
|
||||
|
||||
label l (Parser p) = Parser (Text.Megaparsec.label l p)
|
||||
|
||||
hidden (Parser p) = Parser (Text.Megaparsec.hidden p)
|
||||
|
||||
try (Parser p) = Parser (Text.Megaparsec.try p)
|
||||
|
||||
lookAhead (Parser p) = Parser (Text.Megaparsec.lookAhead p)
|
||||
|
||||
notFollowedBy (Parser p) = Parser (Text.Megaparsec.notFollowedBy p)
|
||||
|
||||
withRecovery e (Parser p) = Parser (Text.Megaparsec.withRecovery (unParser . e) p)
|
||||
|
||||
observing (Parser p) = Parser (Text.Megaparsec.observing p)
|
||||
|
||||
eof = Parser Text.Megaparsec.eof
|
||||
|
||||
token f e = Parser (Text.Megaparsec.token f e)
|
||||
|
||||
tokens f ts = Parser (Text.Megaparsec.tokens f ts)
|
||||
|
||||
takeWhileP s f = Parser (Text.Megaparsec.takeWhileP s f)
|
||||
|
||||
takeWhile1P s f = Parser (Text.Megaparsec.takeWhile1P s f)
|
||||
|
||||
takeP s n = Parser (Text.Megaparsec.takeP s n)
|
||||
|
||||
getParserState = Parser Text.Megaparsec.getParserState
|
||||
{-# INLINE getParserState #-}
|
||||
|
||||
updateParserState f = Parser (Text.Megaparsec.updateParserState f)
|
||||
|
||||
instance Data.Semigroup.Semigroup a => Data.Semigroup.Semigroup (Parser a) where
|
||||
(<>) = liftA2 (<>)
|
||||
|
|
|
@ -34,8 +34,8 @@ noted parser = do
|
|||
after <- Text.Megaparsec.getSourcePos
|
||||
let src₀ = Src before after tokens
|
||||
case e of
|
||||
Note src₁ _ | src₀ == src₁ -> return e
|
||||
_ -> return (Note src₀ e)
|
||||
Note src₁ _ | laxSrcEq src₀ src₁ -> return e
|
||||
_ -> return (Note src₀ e)
|
||||
|
||||
expression :: Parser a -> Parser (Expr Src a)
|
||||
expression embedded =
|
||||
|
|
|
@ -2,7 +2,92 @@
|
|||
{-# LANGUAGE OverloadedStrings #-}
|
||||
-- | Parse Dhall tokens. Even though we don't have a tokenizer per-se this
|
||||
--- module is useful for keeping some small parsing utilities.
|
||||
module Dhall.Parser.Token where
|
||||
module Dhall.Parser.Token (
|
||||
whitespace,
|
||||
bashEnvironmentVariable,
|
||||
posixEnvironmentVariable,
|
||||
file_,
|
||||
label,
|
||||
labels,
|
||||
httpRaw,
|
||||
hexdig,
|
||||
identifier,
|
||||
hexNumber,
|
||||
doubleLiteral,
|
||||
naturalLiteral,
|
||||
integerLiteral,
|
||||
_Optional,
|
||||
_if,
|
||||
_then,
|
||||
_else,
|
||||
_let,
|
||||
_in,
|
||||
_as,
|
||||
_using,
|
||||
_merge,
|
||||
_constructors,
|
||||
_Some,
|
||||
_None,
|
||||
_NaturalFold,
|
||||
_NaturalBuild,
|
||||
_NaturalIsZero,
|
||||
_NaturalEven,
|
||||
_NaturalOdd,
|
||||
_NaturalToInteger,
|
||||
_NaturalShow,
|
||||
_IntegerShow,
|
||||
_IntegerToDouble,
|
||||
_DoubleShow,
|
||||
_ListBuild,
|
||||
_ListFold,
|
||||
_ListLength,
|
||||
_ListHead,
|
||||
_ListLast,
|
||||
_ListIndexed,
|
||||
_ListReverse,
|
||||
_OptionalFold,
|
||||
_OptionalBuild,
|
||||
_Bool,
|
||||
_Natural,
|
||||
_Integer,
|
||||
_Double,
|
||||
_Text,
|
||||
_List,
|
||||
_True,
|
||||
_False,
|
||||
_Type,
|
||||
_Kind,
|
||||
_equal,
|
||||
_or,
|
||||
_plus,
|
||||
_textAppend,
|
||||
_listAppend,
|
||||
_and,
|
||||
_times,
|
||||
_doubleEqual,
|
||||
_notEqual,
|
||||
_dot,
|
||||
_openBrace,
|
||||
_closeBrace,
|
||||
_openBracket,
|
||||
_closeBracket,
|
||||
_openAngle,
|
||||
_closeAngle,
|
||||
_bar,
|
||||
_comma,
|
||||
_openParens,
|
||||
_closeParens,
|
||||
_colon,
|
||||
_at,
|
||||
_missing,
|
||||
_importAlt,
|
||||
_combine,
|
||||
_combineTypes,
|
||||
_prefer,
|
||||
_lambda,
|
||||
_forall,
|
||||
_arrow,
|
||||
) where
|
||||
|
||||
import Dhall.Parser.Combinators
|
||||
|
||||
|
@ -401,6 +486,9 @@ unreserved c =
|
|||
reserved :: Data.Text.Text -> Parser ()
|
||||
reserved x = do _ <- Text.Parser.Char.text x; whitespace
|
||||
|
||||
reservedChar :: Char -> Parser ()
|
||||
reservedChar c = do _ <- Text.Parser.Char.char c; whitespace
|
||||
|
||||
keyword :: Data.Text.Text -> Parser ()
|
||||
keyword x = try (do _ <- Text.Parser.Char.text x; nonemptyWhitespace)
|
||||
|
||||
|
@ -528,25 +616,25 @@ _Kind :: Parser ()
|
|||
_Kind = reserved "Kind"
|
||||
|
||||
_equal :: Parser ()
|
||||
_equal = reserved "="
|
||||
_equal = reservedChar '='
|
||||
|
||||
_or :: Parser ()
|
||||
_or = reserved "||"
|
||||
|
||||
_plus :: Parser ()
|
||||
_plus = reserved "+"
|
||||
_plus = reservedChar '+'
|
||||
|
||||
_textAppend :: Parser ()
|
||||
_textAppend = reserved "++"
|
||||
|
||||
_listAppend :: Parser ()
|
||||
_listAppend = reserved "#"
|
||||
_listAppend = reservedChar '#'
|
||||
|
||||
_and :: Parser ()
|
||||
_and = reserved "&&"
|
||||
|
||||
_times :: Parser ()
|
||||
_times = reserved "*"
|
||||
_times = reservedChar '*'
|
||||
|
||||
_doubleEqual :: Parser ()
|
||||
_doubleEqual = reserved "=="
|
||||
|
@ -555,49 +643,49 @@ _notEqual :: Parser ()
|
|||
_notEqual = reserved "!="
|
||||
|
||||
_dot :: Parser ()
|
||||
_dot = reserved "."
|
||||
_dot = reservedChar '.'
|
||||
|
||||
_openBrace :: Parser ()
|
||||
_openBrace = reserved "{"
|
||||
_openBrace = reservedChar '{'
|
||||
|
||||
_closeBrace :: Parser ()
|
||||
_closeBrace = reserved "}"
|
||||
_closeBrace = reservedChar '}'
|
||||
|
||||
_openBracket :: Parser ()
|
||||
_openBracket = reserved "["
|
||||
_openBracket = reservedChar '['
|
||||
|
||||
_closeBracket :: Parser ()
|
||||
_closeBracket = reserved "]"
|
||||
_closeBracket = reservedChar ']'
|
||||
|
||||
_openAngle :: Parser ()
|
||||
_openAngle = reserved "<"
|
||||
_openAngle = reservedChar '<'
|
||||
|
||||
_closeAngle :: Parser ()
|
||||
_closeAngle = reserved ">"
|
||||
_closeAngle = reservedChar '>'
|
||||
|
||||
_bar :: Parser ()
|
||||
_bar = reserved "|"
|
||||
_bar = reservedChar '|'
|
||||
|
||||
_comma :: Parser ()
|
||||
_comma = reserved ","
|
||||
_comma = reservedChar ','
|
||||
|
||||
_openParens :: Parser ()
|
||||
_openParens = reserved "("
|
||||
_openParens = reservedChar '('
|
||||
|
||||
_closeParens :: Parser ()
|
||||
_closeParens = reserved ")"
|
||||
_closeParens = reservedChar ')'
|
||||
|
||||
_colon :: Parser ()
|
||||
_colon = reserved ":"
|
||||
_colon = reservedChar ':'
|
||||
|
||||
_at :: Parser ()
|
||||
_at = reserved "@"
|
||||
_at = reservedChar '@'
|
||||
|
||||
_missing :: Parser ()
|
||||
_missing = reserved "missing"
|
||||
|
||||
_importAlt :: Parser ()
|
||||
_importAlt = reserved "?"
|
||||
_importAlt = reservedChar '?'
|
||||
|
||||
_combine :: Parser ()
|
||||
_combine = do
|
||||
|
|
Loading…
Reference in New Issue
Block a user