From 6c70d27dac7335fcc56ade271ab84ed5a57c16a1 Mon Sep 17 00:00:00 2001 From: Ben Sima Date: Fri, 14 Nov 2025 19:30:36 -0500 Subject: Add persistent ghc-pkg cache to speed up analysis Implements optimization #3 from Phase 2 of the performance plan. Changes: - Cache stored in _/var/ghc-pkg-cache-.json - Hash based on GHC version + GHC_PACKAGE_PATH for automatic invalidation - Loads cache at startup, saves on successful completion - Uses atomic write (tmp + rename) to prevent corruption - Gracefully handles missing/corrupt cache files - Accumulates cache entries across builds - Works with parallel builds (in-memory IORef + disk persistence) Performance impact: - Eliminates redundant ghc-pkg invocations across runs - Near-zero ghc-pkg overhead once cache is populated - No impact on single-run performance (still uses in-memory IORef) --- Omni/Bild.hs | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 74 insertions(+), 4 deletions(-) (limited to 'Omni') diff --git a/Omni/Bild.hs b/Omni/Bild.hs index 96ea9e9..02e411b 100755 --- a/Omni/Bild.hs +++ b/Omni/Bild.hs @@ -130,7 +130,7 @@ import qualified Data.Char as Char import Data.Conduit ((.|)) import qualified Data.Conduit.Combinators as Conduit import qualified Data.Conduit.Process as Conduit -import Data.IORef (IORef, modifyIORef', newIORef, readIORef) +import Data.IORef (IORef, modifyIORef', newIORef, readIORef, writeIORef) import qualified Data.List as List import qualified Data.Map as Map import qualified Data.Set as Set @@ -139,6 +139,7 @@ import qualified Data.Text as Text import qualified Data.Text.IO as Text.IO import qualified GHC.Conc as GHC import qualified Network.HostName as HostName +import qualified Numeric import qualified Omni.Bild.Meta as Meta import qualified Omni.Cli as Cli import qualified Omni.Log as Log @@ -150,7 +151,7 @@ import qualified Omni.Test as Test import qualified System.Directory as Dir import qualified System.Environment as Env import qualified System.Exit as Exit -import System.FilePath (replaceExtension, ()) +import System.FilePath (replaceExtension, takeDirectory, ()) import qualified System.IO as IO import System.IO.Unsafe (unsafePerformIO) import qualified System.Process as Process @@ -214,6 +215,7 @@ move :: Cli.Arguments -> IO () move args = do IO.hSetBuffering stdout IO.NoBuffering root <- Env.getEnv "CODEROOT" + loadGhcPkgCache namespaces <- Cli.getAllArgs args (Cli.argument "target") |> filterM Dir.doesFileExist @@ -231,7 +233,9 @@ move args = do >> Log.fail ["bild", "timeout after " <> tshow minutes <> " minutes"] >> Log.br >> exitWith (ExitFailure 124) - Just s -> exitSummary s + Just s -> do + when (all isSuccess s) saveGhcPkgCache + exitSummary s where minutes = Cli.getArgWithDefault args "10" (Cli.longOption "time") @@ -891,10 +895,76 @@ test_detectPythonImports = Set.fromList ["Omni/Log.py"] @=? set ] +type GhcPkgCacheMem = Map String (Set String) + +type GhcPkgCacheDisk = Map String [String] + {-# NOINLINE ghcPkgCache #-} -ghcPkgCache :: IORef (Map String (Set String)) +ghcPkgCache :: IORef GhcPkgCacheMem ghcPkgCache = unsafePerformIO (newIORef Map.empty) +cacheToDisk :: GhcPkgCacheMem -> GhcPkgCacheDisk +cacheToDisk = Map.map Set.toList + +cacheFromDisk :: GhcPkgCacheDisk -> GhcPkgCacheMem +cacheFromDisk = Map.map Set.fromList + +ghcPkgCacheHash :: IO (Maybe String) +ghcPkgCacheHash = do + mdb <- Env.lookupEnv "GHC_PACKAGE_PATH" + case mdb of + Nothing -> pure Nothing + Just db -> do + v <- + Exception.catch + ( Process.readProcess "ghc" ["--numeric-version"] "" + /> takeWhile (/= '\n') + ) + (\(_ :: Exception.SomeException) -> pure "") + if null v then pure Nothing else pure (Just (hashString (v <> "|" <> db))) + where + hashString :: String -> String + hashString s = + List.foldl' (\h c -> h * 131 + fromEnum c) (7 :: Int) s + |> abs + |> toInteger + |> \n -> Numeric.showHex n "" + +ghcPkgCachePath :: IO (Maybe FilePath) +ghcPkgCachePath = do + root <- Env.getEnv "CODEROOT" + fmap (\h -> root vardir ("ghc-pkg-cache-" <> h <> ".json")) pure () + Just path -> do + exists <- Dir.doesFileExist path + if not exists + then pure () + else do + eres <- Exception.try (ByteString.Lazy.readFile path) :: IO (Either Exception.IOException ByteString.Lazy.ByteString) + case eres of + Left _ -> pure () + Right bs -> + case Aeson.eitherDecode bs :: Either String GhcPkgCacheDisk of + Left _ -> pure () + Right disk -> writeIORef ghcPkgCache (cacheFromDisk disk) + +saveGhcPkgCache :: IO () +saveGhcPkgCache = do + mpath <- ghcPkgCachePath + case mpath of + Nothing -> pure () + Just path -> do + cache <- readIORef ghcPkgCache + let tmp = path <> ".tmp" + Dir.createDirectoryIfMissing True (takeDirectory path) + ByteString.Lazy.writeFile tmp (Aeson.encode (cacheToDisk cache)) + Dir.renameFile tmp path + ghcPkgFindModuleCached :: String -> IO (Set String) ghcPkgFindModuleCached m = do cache <- readIORef ghcPkgCache -- cgit v1.2.3