Fix import resolution performance regression (#1522)

* Fix import resolution performance regression

Related to https://github.com/dhall-lang/dhall-haskell/issues/1511

This fixes a performance regression introduced in #1159 where `newManager`
was being called on every remote import.  This fixes that by going back to
caching the `Manager` created by the first request.

This leads to *dramatic* performance improvements for import-rich packages
(like the Prelude or `dhall-kubernetes`) on the first import.  For example,
here are the performance numbers for importing the Prelude for a cold cache
before and after this change:

Before:

```
$ XDG_CACHE_HOME=.cache time dhall hash <<< 'https://prelude.dhall-lang.org/package.dhall'
sha256:99462c205117931c0919f155a6046aec140c70fb8876d208c7c77027ab19c2fa
       64.10 real        10.83 user         2.73 sys
```

After:

```
$ XDG_CACHE_HOME=.cache2 time dhall hash <<< 'https://prelude.dhall-lang.org/package.dhall'
sha256:99462c205117931c0919f155a6046aec140c70fb8876d208c7c77027ab19c2fa
        4.39 real         0.49 user         0.15 sys
```

That's ~16x faster!

The improvement for `dhall-kubernetes` is smaller, but still significant:

Before:

```
$ XDG_CACHE_HOME=.cache3 time dhall hash <<< ~/proj/dhall-kubernetes-charts/stable/jenkins/index.dhall
sha256:04ebd960f6af331c49c3ccaedb353ac8269032b54fe0a29bd167febcd7104d4f
      833.59 real       145.36 user        36.16 sys

After:

```
$ XDG_CACHE_HOME=.cache4 time dhall hash <<< ~/proj/dhall-kubernetes-charts/stable/jenkins/index.dhall
sha256:04ebd960f6af331c49c3ccaedb353ac8269032b54fe0a29bd167febcd7104d4f
      381.41 real         8.41 user         1.91 sys
```

... or ~2-3x improvement.

* Fix `-f-with-http` build

* Remove unnecessary `CPP`

... as caught by @sjakobi
This commit is contained in:
Gabriel Gonzalez 2019-11-08 10:58:03 -08:00 committed by mergify[bot]
parent 6a160dbae9
commit a0c3be9ca1
7 changed files with 78 additions and 8 deletions

View File

@ -535,6 +535,7 @@ Library
if flag(with-http)
Other-Modules:
Dhall.Import.HTTP
Dhall.Import.Manager
GHC-Options: -Wall -fwarn-incomplete-uni-patterns
Default-Language: Haskell2010

View File

@ -2,7 +2,9 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
module Dhall.Import.HTTP where
module Dhall.Import.HTTP
( fetchFromHttpUrl
) where
import Control.Exception (Exception)
import Control.Monad.IO.Class (MonadIO(..))
@ -169,17 +171,27 @@ renderPrettyHttpException url e = case e of
<> show e' <> "\n"
#endif
newManager :: IO Manager
newManager :: StateT Status IO Manager
newManager = do
let settings = HTTP.tlsManagerSettings
#ifdef MIN_VERSION_http_client
#if MIN_VERSION_http_client(0,5,0)
{ HTTP.managerResponseTimeout = HTTP.responseTimeoutMicro (30 * 1000 * 1000) } -- 30 seconds
#else
{ HTTP.managerResponseTimeout = Just (30 * 1000 * 1000) } -- 30 seconds
#endif
#endif
HTTP.newManager settings
Status { _manager = oldManager, ..} <- State.get
case oldManager of
Nothing -> do
manager <- liftIO (HTTP.newManager settings)
State.put (Status { _manager = Just manager , ..})
return manager
Just manager -> do
return manager
data NotCORSCompliant = NotCORSCompliant
{ expectedOrigins :: [ByteString]
@ -260,7 +272,7 @@ type HTTPHeader = Network.HTTP.Types.Header
fetchFromHttpUrl :: URL -> Maybe [HTTPHeader] -> StateT Status IO Text.Text
fetchFromHttpUrl childURL mheaders = do
manager <- liftIO $ newManager
manager <- newManager
let childURLString = Text.unpack (renderURL childURL)

View File

@ -0,0 +1,14 @@
{-| Both the GHC and GHCJS implementations of "Dhall.Import.Manager" export a
`Manager` type suitable for use within the "Dhall.Import" module
For the GHC implementation the `Manager` type is a real `Manager` from the
@http-client@ package. For the GHCJS implementation the `Manager` type is
a synonym for @`Data.Void.Void`@ since GHCJS does not use a `Manager` for
HTTP requests.
-}
module Dhall.Import.Manager
( -- * Manager
Manager
) where
import Network.HTTP.Client (Manager)

View File

@ -1,7 +1,10 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
module Dhall.Import.HTTP where
module Dhall.Import.HTTP
( fetchFromHttpUrl
, Manager
) where
import Control.Monad.IO.Class (MonadIO(..))
import Control.Monad.Trans.State.Strict (StateT)
@ -12,10 +15,19 @@ import Data.Semigroup ((<>))
import qualified Data.Text as Text
import qualified JavaScript.XHR
import Data.Void (Void)
import Dhall.Core (URL(..))
import Dhall.URL (renderURL)
import Dhall.Import.Types
{-| The GHCJS implementation does not require a `Manager`
The purpose of this synonym is so that "Dhall.Import.Types" can import a
`Manager` type from "Dhall.Import.HTTP" that does the correct thing for
both the GHC and GHCJS implementations
-}
type Manager = Void
fetchFromHttpUrl
:: URL
-> Maybe [(CI ByteString, ByteString)]

View File

@ -0,0 +1,17 @@
{-| Both the GHC and GHCJS implementations of "Dhall.Import.Manager" export a
`Manager` type suitable for use within the "Dhall.Import" module
For the GHC implementation the `Manager` type is a real `Manager` from the
@http-client@ package. For the GHCJS implementation the `Manager` type is
a synonym for @`Data.Void.Void`@ since GHCJS does not use a `Manager` for
HTTP requests.
-}
module Dhall.Import.Manager
( -- * Manager
Manager
) where
import Data.Void (Void)
-- | GHCJS does not use a `Manager`
type Manager = Void

View File

@ -172,7 +172,7 @@ import Dhall.Syntax
, chunkExprs
)
#ifdef WITH_HTTP
import Dhall.Import.HTTP hiding (HTTPHeader)
import Dhall.Import.HTTP
#endif
import Dhall.Import.Types

View File

@ -1,5 +1,7 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# OPTIONS_GHC -Wall #-}
module Dhall.Import.Types where
@ -25,6 +27,9 @@ import Dhall.Core
, ReifiedNormalizer(..)
, URL
)
#ifdef WITH_HTTP
import Dhall.Import.Manager (Manager)
#endif
import Dhall.Parser (Src)
import Lens.Family (LensLike')
import System.FilePath (isRelative, splitDirectories)
@ -75,6 +80,13 @@ data Status = Status
-- ^ Cache of imported expressions with their node id in order to avoid
-- importing the same expression twice with different values
#ifdef WITH_HTTP
, _manager :: Maybe Manager
#else
, _manager :: Maybe Void
#endif
-- ^ Used to cache the `Manager` when making multiple requests
, _remote :: URL -> StateT Status IO Data.Text.Text
-- ^ The remote resolver, fetches the content at the given URL.
@ -96,6 +108,8 @@ emptyStatusWith _remote rootDirectory = Status {..}
_cache = Map.empty
_manager = Nothing
_normalizer = Nothing
_startingContext = Dhall.Context.empty