Implement semi-semantic caching (#1154)

* Tag ImportSemantics with their semantic hashes

This is in preparation for semi-semantic caching.

* Collect the list of imports during import resolution

The final step needed in preparation for semi-semantic caching!

* Implement semi-semantic caching

This completes the implementation of the "semi-semantic caching"
proposal (issue #1098).

We compute the semi-semantic hash of a dhall import/file/expression as
follows:

- Parse the input;
- compute the semantic hashes of all imports referenced in the AST, i.e.
the hashes of their normal forms;
- compute the syntactic hash of the input (hashing the parsed AST);
- concatenate the syntactic hash of the input with the semantic hashes
of its imports and hash the result.

The "semi-semantic" cache (normal forms, indexed by semi-semantic
hashes) has the following properties:

- For a given input we can quickly find out if it is in the cache: we
only need to parse the input – we don't need to typecheck or normalise
it!
- The cache stays consistent, that is, we don't need to ‘invalidate’ old
cache entries if their dependencies change!

* Simplify semi-semantic hash

As suggested by @Gabriel439.

* Simplify code

We don't actually need to carry the list of imports around when loading.

* Restore `load`
This commit is contained in:
Frederik Ramcke 2019-07-26 13:45:18 +00:00 committed by mergify[bot]
parent d93c01dfbf
commit 76a0d4159b

View File

@ -140,6 +140,7 @@ import Control.Exception (Exception, SomeException, toException)
import Control.Monad (guard)
import Control.Monad.Catch (throwM, MonadCatch(catch), handle)
import Control.Monad.IO.Class (MonadIO(..))
import Control.Monad.Trans.Class (lift)
import Control.Monad.Trans.State.Strict (StateT)
import Crypto.Hash (SHA256)
import Data.List.NonEmpty (NonEmpty(..))
@ -498,27 +499,27 @@ loadImport import_ = do
zoom cache (State.modify (Map.insert import_ importSemantics))
return importSemantics
-- | Load an import from the 'semantic cache'. Defers to `loadImportFresh` for
-- imports that aren't frozen (and therefore not cached semantically), as well
-- as those that aren't cached yet.
-- | Load an import from the 'semantic cache'. Defers to
-- `loadImportWithSemisemanticCache` for imports that aren't frozen (and
-- therefore not cached semantically), as well as those that aren't cached yet.
loadImportWithSemanticCache :: Chained -> StateT Status IO ImportSemantics
loadImportWithSemanticCache
import_@(Chained (Import (ImportHashed Nothing _) _)) = do
loadImportFresh import_
loadImportWithSemisemanticCache import_
loadImportWithSemanticCache
import_@(Chained (Import (ImportHashed (Just hash) _) _)) = do
import_@(Chained (Import (ImportHashed (Just semanticHash) _) _)) = do
Status { .. } <- State.get
mCached <- liftIO $ fetchFromSemanticCache hash
mCached <- liftIO $ fetchFromSemanticCache semanticHash
case mCached of
Just bytesStrict -> do
let actualHash = Crypto.Hash.hash bytesStrict
if hash == actualHash
if semanticHash == actualHash
then return ()
else do
Status { _stack } <- State.get
throwMissingImport (Imported _stack (HashMismatch {expectedHash = hash, ..}))
throwMissingImport (Imported _stack (HashMismatch {expectedHash = semanticHash, ..}))
let bytesLazy = Data.ByteString.Lazy.fromStrict bytesStrict
term <- case Codec.Serialise.deserialiseOrFail bytesLazy of
@ -527,17 +528,18 @@ loadImportWithSemanticCache
importSemantics <- case Dhall.Binary.decodeExpression term of
Left err -> throwMissingImport (Imported _stack err)
Right sem -> return sem
return (ImportSemantics {..})
Nothing -> do
ImportSemantics {..} <- loadImportFresh import_
ImportSemantics { importSemantics } <- loadImportWithSemisemanticCache import_
let variants = map (\version -> encodeExpression version importSemantics)
[ minBound .. maxBound ]
case Data.Foldable.find ((== hash). Crypto.Hash.hash) variants of
Just bytes -> liftIO $ writeToSemanticCache hash bytes
case Data.Foldable.find ((== semanticHash). Crypto.Hash.hash) variants of
Just bytes -> liftIO $ writeToSemanticCache semanticHash bytes
Nothing -> do
let expectedHash = hash
let expectedHash = semanticHash
Status { _standardVersion, _stack } <- State.get
let actualHash = hashExpression _standardVersion importSemantics
throwMissingImport (Imported _stack (HashMismatch {..}))
@ -547,7 +549,7 @@ loadImportWithSemanticCache
-- Fetch encoded normal form from "semantic cache"
fetchFromSemanticCache :: Crypto.Hash.Digest SHA256 -> IO (Maybe Data.ByteString.ByteString)
fetchFromSemanticCache expectedHash = Maybe.runMaybeT $ do
cacheFile <- getCacheFile expectedHash
cacheFile <- getCacheFile "dhall" expectedHash
True <- liftIO (Directory.doesFileExist cacheFile)
liftIO (Data.ByteString.readFile cacheFile)
@ -562,14 +564,17 @@ writeExpressionToSemanticCache expression = writeToSemanticCache hash bytes
writeToSemanticCache :: Crypto.Hash.Digest SHA256 -> Data.ByteString.ByteString -> IO ()
writeToSemanticCache hash bytes = do
_ <- Maybe.runMaybeT $ do
cacheFile <- getCacheFile hash
cacheFile <- getCacheFile "dhall" hash
liftIO (Data.ByteString.writeFile cacheFile bytes)
return ()
-- | Load, typecheck and normalise an import from scratch.
loadImportFresh :: Chained -> StateT Status IO ImportSemantics
loadImportFresh (Chained (Import (ImportHashed _ importType) Code)) = do
-- Check the "semi-semantic" disk cache, otherwise typecheck and normalise from
-- scratch.
loadImportWithSemisemanticCache
:: Chained -> StateT Status IO ImportSemantics
loadImportWithSemisemanticCache (Chained (Import (ImportHashed _ importType) Code)) = do
text <- fetchFresh importType
Status {..} <- State.get
path <- case importType of
Local prefix file -> liftIO $ do
@ -588,24 +593,57 @@ loadImportFresh (Chained (Import (ImportHashed _ importType) Code)) = do
parsedImport <- case Text.Megaparsec.parse parser path text of
Left errInfo -> do
Status { _stack } <- State.get
throwMissingImport (Imported _stack (ParseError errInfo text))
Right expr -> return expr
loadedExpr <- loadWith parsedImport -- we load imports recursively here
resolvedExpr <- loadWith parsedImport -- we load imports recursively here
Status {..} <- State.get
-- Check the semi-semantic cache. See
-- https://github.com/dhall-lang/dhall-haskell/issues/1098 for the reasoning
-- behind semi-semantic caching.
let semisemanticHash = computeSemisemanticHash resolvedExpr
mCached <- lift $ fetchFromSemisemanticCache semisemanticHash
importSemantics <- case mCached of
Just bytesStrict -> do
let bytesLazy = Data.ByteString.Lazy.fromStrict bytesStrict
term <- case Codec.Serialise.deserialiseOrFail bytesLazy of
Left err -> throwMissingImport (Imported _stack err)
Right t -> return t
importSemantics <- case Dhall.Binary.decodeExpression term of
Left err -> throwMissingImport (Imported _stack err)
Right sem -> return sem
return importSemantics
Nothing -> do
betaNormal <- case Dhall.TypeCheck.typeWith _startingContext resolvedExpr of
Left err -> throwMissingImport (Imported _stack err)
Right _ -> return (Dhall.Core.normalizeWith _normalizer resolvedExpr)
let alphaBetaNormal = Dhall.Core.alphaNormalize betaNormal
let bytes = encodeExpression _standardVersion alphaBetaNormal
lift $ writeToSemisemanticCache semisemanticHash bytes
importSemantics <- case Dhall.TypeCheck.typeWith _startingContext loadedExpr of
Left err -> throwMissingImport (Imported _stack err)
Right _ -> do
let betaNormal = Dhall.Core.normalizeWith _normalizer loadedExpr
alphaBetaNormal = Dhall.Core.alphaNormalize betaNormal
return alphaBetaNormal
return (ImportSemantics {..})
loadImportFresh (Chained (Import (ImportHashed _ importType) Location)) = do
-- `as Text` imports aren't cached since they are well-typed and normal by
-- construction
loadImportWithSemisemanticCache (Chained (Import (ImportHashed _ importType) RawText)) = do
text <- fetchFresh importType
-- importSemantics is alpha-beta-normal by construction!
let importSemantics = TextLit (Chunks [] text)
return (ImportSemantics {..})
-- `as Location` imports aren't cached since they are well-typed and normal by
-- construction
loadImportWithSemisemanticCache (Chained (Import (ImportHashed _ importType) Location)) = do
let locationType = Union $ Dhall.Map.fromList
[ ("Environment", Just Text)
, ("Remote", Just Text)
@ -628,12 +666,27 @@ loadImportFresh (Chained (Import (ImportHashed _ importType) Location)) = do
return (ImportSemantics {..})
loadImportFresh (Chained (Import (ImportHashed _ importType) RawText)) = do
text <- fetchFresh importType
-- The semi-semantic hash of an expression is computed from the fully resolved
-- AST (without normalising or type-checking it first). See
-- https://github.com/dhall-lang/dhall-haskell/issues/1098 for further
-- discussion.
computeSemisemanticHash :: Expr Src X -> Crypto.Hash.Digest Crypto.Hash.SHA256
computeSemisemanticHash resolvedExpr =
hashExpression Dhall.Binary.defaultStandardVersion resolvedExpr
-- importSemantics is alpha-beta-normal by construction!
let importSemantics = TextLit (Chunks [] text)
return (ImportSemantics {..})
-- Fetch encoded normal form from "semi-semantic cache"
fetchFromSemisemanticCache :: Crypto.Hash.Digest SHA256 -> IO (Maybe Data.ByteString.ByteString)
fetchFromSemisemanticCache semisemanticHash = Maybe.runMaybeT $ do
cacheFile <- getCacheFile "dhall-haskell" semisemanticHash
True <- liftIO (Directory.doesFileExist cacheFile)
liftIO (Data.ByteString.readFile cacheFile)
writeToSemisemanticCache :: Crypto.Hash.Digest SHA256 -> Data.ByteString.ByteString -> IO ()
writeToSemisemanticCache semisemanticHash bytes = do
_ <- Maybe.runMaybeT $ do
cacheFile <- getCacheFile "dhall-haskell" semisemanticHash
liftIO (Data.ByteString.writeFile cacheFile bytes)
return ()
-- Fetch source code directly from disk/network
fetchFresh :: ImportType -> StateT Status IO Text
@ -667,8 +720,8 @@ fetchFresh (Env env) = do
fetchFresh Missing = throwM (MissingImports [])
getCacheFile
:: (Alternative m, MonadIO m) => Crypto.Hash.Digest SHA256 -> m FilePath
getCacheFile hash = do
:: (Alternative m, MonadIO m) => FilePath -> Crypto.Hash.Digest SHA256 -> m FilePath
getCacheFile cacheName hash = do
let assertDirectory directory = do
let private = transform Directory.emptyPermissions
where
@ -699,11 +752,9 @@ getCacheFile hash = do
cacheDirectory <- getCacheDirectory
let dhallDirectory = cacheDirectory </> "dhall"
assertDirectory (cacheDirectory </> cacheName)
assertDirectory dhallDirectory
let cacheFile = dhallDirectory </> ("1220" <> show hash)
let cacheFile = (cacheDirectory </> cacheName) </> ("1220" <> show hash)
return cacheFile