Parsing performance improvements (#591)

This commit is contained in:
Oleg Grenrus 2018-09-19 04:27:17 +03:00 committed by Gabriel Gonzalez
parent d8e076c6da
commit b60f8f671e
4 changed files with 220 additions and 34 deletions

View File

@ -208,6 +208,7 @@ Library
if !impl(ghc >= 8.0)
Build-Depends: semigroups == 0.18.*
Build-Depends: transformers == 0.4.2.*
Build-Depends: fail == 4.9.*
Exposed-Modules:
Dhall,
@ -245,7 +246,7 @@ Executable dhall
Hs-Source-Dirs: dhall
Main-Is: Main.hs
Build-Depends: base, dhall
GHC-Options: -Wall
GHC-Options: -Wall -rtsopts
Default-Language: Haskell2010
Test-Suite tasty
@ -308,6 +309,7 @@ Benchmark dhall-parser
serialise ,
text >= 0.11.1.0 && < 1.3
Default-Language: Haskell2010
ghc-options: -rtsopts
Benchmark deep-nested-large-record
Type: exitcode-stdio-1.0

View File

@ -1,13 +1,12 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
module Dhall.Parser.Combinators where
import Control.Applicative (Alternative (..), liftA2)
import Control.Monad (MonadPlus)
import Control.Monad (MonadPlus (..))
import Data.Data (Data)
import Data.HashMap.Strict.InsOrd (InsOrdHashMap)
import Data.Semigroup (Semigroup (..))
@ -27,6 +26,7 @@ import qualified Data.List
import qualified Data.Sequence
import qualified Data.Set
import qualified Data.Text
import qualified Control.Monad.Fail
import qualified Text.Megaparsec
import qualified Text.Megaparsec.Char
import qualified Text.Parser.Char
@ -34,9 +34,20 @@ import qualified Text.Parser.Combinators
import qualified Text.Parser.Token.Style
-- | Source code extract
data Src = Src Text.Megaparsec.SourcePos Text.Megaparsec.SourcePos Text
data Src = Src !Text.Megaparsec.SourcePos !Text.Megaparsec.SourcePos Text
-- Text field is intentionally lazy
deriving (Data, Eq, Show)
-- | Doesn't force the 'Text' part
laxSrcEq :: Src -> Src -> Bool
laxSrcEq (Src p q _) (Src p' q' _) = eq p p' && eq q q'
where
-- Don't compare filename (which is FilePath = String)
eq :: Text.Megaparsec.SourcePos -> Text.Megaparsec.SourcePos -> Bool
eq (Text.Megaparsec.SourcePos _ a b) (Text.Megaparsec.SourcePos _ a' b') =
a == a' && b == b'
{-# INLINE laxSrcEq #-}
instance Pretty Src where
pretty (Src begin _ text) =
pretty text <> "\n"
@ -48,14 +59,99 @@ instance Pretty Src where
comments as whitespace
-}
newtype Parser a = Parser { unParser :: Text.Megaparsec.Parsec Void Text a }
deriving
( Functor
, Applicative
, Monad
, Alternative
, MonadPlus
, Text.Megaparsec.MonadParsec Void Text
)
instance Functor Parser where
fmap f (Parser x) = Parser (fmap f x)
{-# INLINE fmap #-}
f <$ Parser x = Parser (f <$ x)
{-# INLINE (<$) #-}
instance Applicative Parser where
pure = Parser . pure
{-# INLINE pure #-}
Parser f <*> Parser x = Parser (f <*> x)
{-# INLINE (<*>) #-}
Parser a <* Parser b = Parser (a <* b)
{-# INLINE (<*) #-}
Parser a *> Parser b = Parser (a *> b)
{-# INLINE (*>) #-}
instance Monad Parser where
return = pure
{-# INLINE return #-}
(>>) = (*>)
{-# INLINE (>>) #-}
Parser n >>= k = Parser (n >>= unParser . k)
{-# INLINE (>>=) #-}
fail = Control.Monad.Fail.fail
{-# INLINE fail #-}
instance Control.Monad.Fail.MonadFail Parser where
fail = Parser . Control.Monad.Fail.fail
{-# INLINE fail #-}
instance Alternative Parser where
empty = Parser empty
-- {-# INLINE empty #-}
Parser a <|> Parser b = Parser (a <|> b)
-- {-# INLINE (<|>) #-}
some (Parser a) = Parser (some a)
-- {-# INLINE some #-}
many (Parser a) = Parser (many a)
-- {-# INLINE many #-}
instance MonadPlus Parser where
mzero = empty
-- {-# INLINE mzero #-}
mplus = (<|>)
-- {-# INLINE mplus #-}
instance Text.Megaparsec.MonadParsec Void Text Parser where
failure u e = Parser (Text.Megaparsec.failure u e)
fancyFailure e = Parser (Text.Megaparsec.fancyFailure e)
label l (Parser p) = Parser (Text.Megaparsec.label l p)
hidden (Parser p) = Parser (Text.Megaparsec.hidden p)
try (Parser p) = Parser (Text.Megaparsec.try p)
lookAhead (Parser p) = Parser (Text.Megaparsec.lookAhead p)
notFollowedBy (Parser p) = Parser (Text.Megaparsec.notFollowedBy p)
withRecovery e (Parser p) = Parser (Text.Megaparsec.withRecovery (unParser . e) p)
observing (Parser p) = Parser (Text.Megaparsec.observing p)
eof = Parser Text.Megaparsec.eof
token f e = Parser (Text.Megaparsec.token f e)
tokens f ts = Parser (Text.Megaparsec.tokens f ts)
takeWhileP s f = Parser (Text.Megaparsec.takeWhileP s f)
takeWhile1P s f = Parser (Text.Megaparsec.takeWhile1P s f)
takeP s n = Parser (Text.Megaparsec.takeP s n)
getParserState = Parser Text.Megaparsec.getParserState
{-# INLINE getParserState #-}
updateParserState f = Parser (Text.Megaparsec.updateParserState f)
instance Data.Semigroup.Semigroup a => Data.Semigroup.Semigroup (Parser a) where
(<>) = liftA2 (<>)

View File

@ -34,8 +34,8 @@ noted parser = do
after <- Text.Megaparsec.getSourcePos
let src = Src before after tokens
case e of
Note src _ | src == src -> return e
_ -> return (Note src e)
Note src _ | laxSrcEq src src -> return e
_ -> return (Note src e)
expression :: Parser a -> Parser (Expr Src a)
expression embedded =

View File

@ -2,7 +2,92 @@
{-# LANGUAGE OverloadedStrings #-}
-- | Parse Dhall tokens. Even though we don't have a tokenizer per-se this
--- module is useful for keeping some small parsing utilities.
module Dhall.Parser.Token where
module Dhall.Parser.Token (
whitespace,
bashEnvironmentVariable,
posixEnvironmentVariable,
file_,
label,
labels,
httpRaw,
hexdig,
identifier,
hexNumber,
doubleLiteral,
naturalLiteral,
integerLiteral,
_Optional,
_if,
_then,
_else,
_let,
_in,
_as,
_using,
_merge,
_constructors,
_Some,
_None,
_NaturalFold,
_NaturalBuild,
_NaturalIsZero,
_NaturalEven,
_NaturalOdd,
_NaturalToInteger,
_NaturalShow,
_IntegerShow,
_IntegerToDouble,
_DoubleShow,
_ListBuild,
_ListFold,
_ListLength,
_ListHead,
_ListLast,
_ListIndexed,
_ListReverse,
_OptionalFold,
_OptionalBuild,
_Bool,
_Natural,
_Integer,
_Double,
_Text,
_List,
_True,
_False,
_Type,
_Kind,
_equal,
_or,
_plus,
_textAppend,
_listAppend,
_and,
_times,
_doubleEqual,
_notEqual,
_dot,
_openBrace,
_closeBrace,
_openBracket,
_closeBracket,
_openAngle,
_closeAngle,
_bar,
_comma,
_openParens,
_closeParens,
_colon,
_at,
_missing,
_importAlt,
_combine,
_combineTypes,
_prefer,
_lambda,
_forall,
_arrow,
) where
import Dhall.Parser.Combinators
@ -401,6 +486,9 @@ unreserved c =
reserved :: Data.Text.Text -> Parser ()
reserved x = do _ <- Text.Parser.Char.text x; whitespace
reservedChar :: Char -> Parser ()
reservedChar c = do _ <- Text.Parser.Char.char c; whitespace
keyword :: Data.Text.Text -> Parser ()
keyword x = try (do _ <- Text.Parser.Char.text x; nonemptyWhitespace)
@ -528,25 +616,25 @@ _Kind :: Parser ()
_Kind = reserved "Kind"
_equal :: Parser ()
_equal = reserved "="
_equal = reservedChar '='
_or :: Parser ()
_or = reserved "||"
_plus :: Parser ()
_plus = reserved "+"
_plus = reservedChar '+'
_textAppend :: Parser ()
_textAppend = reserved "++"
_listAppend :: Parser ()
_listAppend = reserved "#"
_listAppend = reservedChar '#'
_and :: Parser ()
_and = reserved "&&"
_times :: Parser ()
_times = reserved "*"
_times = reservedChar '*'
_doubleEqual :: Parser ()
_doubleEqual = reserved "=="
@ -555,49 +643,49 @@ _notEqual :: Parser ()
_notEqual = reserved "!="
_dot :: Parser ()
_dot = reserved "."
_dot = reservedChar '.'
_openBrace :: Parser ()
_openBrace = reserved "{"
_openBrace = reservedChar '{'
_closeBrace :: Parser ()
_closeBrace = reserved "}"
_closeBrace = reservedChar '}'
_openBracket :: Parser ()
_openBracket = reserved "["
_openBracket = reservedChar '['
_closeBracket :: Parser ()
_closeBracket = reserved "]"
_closeBracket = reservedChar ']'
_openAngle :: Parser ()
_openAngle = reserved "<"
_openAngle = reservedChar '<'
_closeAngle :: Parser ()
_closeAngle = reserved ">"
_closeAngle = reservedChar '>'
_bar :: Parser ()
_bar = reserved "|"
_bar = reservedChar '|'
_comma :: Parser ()
_comma = reserved ","
_comma = reservedChar ','
_openParens :: Parser ()
_openParens = reserved "("
_openParens = reservedChar '('
_closeParens :: Parser ()
_closeParens = reserved ")"
_closeParens = reservedChar ')'
_colon :: Parser ()
_colon = reserved ":"
_colon = reservedChar ':'
_at :: Parser ()
_at = reserved "@"
_at = reservedChar '@'
_missing :: Parser ()
_missing = reserved "missing"
_importAlt :: Parser ()
_importAlt = reserved "?"
_importAlt = reservedChar '?'
_combine :: Parser ()
_combine = do