summaryrefslogtreecommitdiff
path: root/Omni
diff options
context:
space:
mode:
authorBen Sima <ben@bsima.me>2025-11-14 19:30:36 -0500
committerBen Sima <ben@bsima.me>2025-11-14 19:30:36 -0500
commit6c70d27dac7335fcc56ade271ab84ed5a57c16a1 (patch)
treee6f81cf2850a2ea2622d59ce24b8f65236170f43 /Omni
parentf6cd87b7544628f8947fb63907f6f5fee479c9cd (diff)
Add persistent ghc-pkg cache to speed up analysis
Implements optimization #3 from Phase 2 of the performance plan. Changes: - Cache stored in _/var/ghc-pkg-cache-<hash>.json - Hash based on GHC version + GHC_PACKAGE_PATH for automatic invalidation - Loads cache at startup, saves on successful completion - Uses atomic write (tmp + rename) to prevent corruption - Gracefully handles missing/corrupt cache files - Accumulates cache entries across builds - Works with parallel builds (in-memory IORef + disk persistence) Performance impact: - Eliminates redundant ghc-pkg invocations across runs - Near-zero ghc-pkg overhead once cache is populated - No impact on single-run performance (still uses in-memory IORef)
Diffstat (limited to 'Omni')
-rwxr-xr-xOmni/Bild.hs78
1 files changed, 74 insertions, 4 deletions
diff --git a/Omni/Bild.hs b/Omni/Bild.hs
index 96ea9e9..02e411b 100755
--- a/Omni/Bild.hs
+++ b/Omni/Bild.hs
@@ -130,7 +130,7 @@ import qualified Data.Char as Char
import Data.Conduit ((.|))
import qualified Data.Conduit.Combinators as Conduit
import qualified Data.Conduit.Process as Conduit
-import Data.IORef (IORef, modifyIORef', newIORef, readIORef)
+import Data.IORef (IORef, modifyIORef', newIORef, readIORef, writeIORef)
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
@@ -139,6 +139,7 @@ import qualified Data.Text as Text
import qualified Data.Text.IO as Text.IO
import qualified GHC.Conc as GHC
import qualified Network.HostName as HostName
+import qualified Numeric
import qualified Omni.Bild.Meta as Meta
import qualified Omni.Cli as Cli
import qualified Omni.Log as Log
@@ -150,7 +151,7 @@ import qualified Omni.Test as Test
import qualified System.Directory as Dir
import qualified System.Environment as Env
import qualified System.Exit as Exit
-import System.FilePath (replaceExtension, (</>))
+import System.FilePath (replaceExtension, takeDirectory, (</>))
import qualified System.IO as IO
import System.IO.Unsafe (unsafePerformIO)
import qualified System.Process as Process
@@ -214,6 +215,7 @@ move :: Cli.Arguments -> IO ()
move args = do
IO.hSetBuffering stdout IO.NoBuffering
root <- Env.getEnv "CODEROOT"
+ loadGhcPkgCache
namespaces <-
Cli.getAllArgs args (Cli.argument "target")
|> filterM Dir.doesFileExist
@@ -231,7 +233,9 @@ move args = do
>> Log.fail ["bild", "timeout after " <> tshow minutes <> " minutes"]
>> Log.br
>> exitWith (ExitFailure 124)
- Just s -> exitSummary s
+ Just s -> do
+ when (all isSuccess s) saveGhcPkgCache
+ exitSummary s
where
minutes =
Cli.getArgWithDefault args "10" (Cli.longOption "time")
@@ -891,10 +895,76 @@ test_detectPythonImports =
Set.fromList ["Omni/Log.py"] @=? set
]
+type GhcPkgCacheMem = Map String (Set String)
+
+type GhcPkgCacheDisk = Map String [String]
+
{-# NOINLINE ghcPkgCache #-}
-ghcPkgCache :: IORef (Map String (Set String))
+ghcPkgCache :: IORef GhcPkgCacheMem
ghcPkgCache = unsafePerformIO (newIORef Map.empty)
+cacheToDisk :: GhcPkgCacheMem -> GhcPkgCacheDisk
+cacheToDisk = Map.map Set.toList
+
+cacheFromDisk :: GhcPkgCacheDisk -> GhcPkgCacheMem
+cacheFromDisk = Map.map Set.fromList
+
+ghcPkgCacheHash :: IO (Maybe String)
+ghcPkgCacheHash = do
+ mdb <- Env.lookupEnv "GHC_PACKAGE_PATH"
+ case mdb of
+ Nothing -> pure Nothing
+ Just db -> do
+ v <-
+ Exception.catch
+ ( Process.readProcess "ghc" ["--numeric-version"] ""
+ /> takeWhile (/= '\n')
+ )
+ (\(_ :: Exception.SomeException) -> pure "")
+ if null v then pure Nothing else pure (Just (hashString (v <> "|" <> db)))
+ where
+ hashString :: String -> String
+ hashString s =
+ List.foldl' (\h c -> h * 131 + fromEnum c) (7 :: Int) s
+ |> abs
+ |> toInteger
+ |> \n -> Numeric.showHex n ""
+
+ghcPkgCachePath :: IO (Maybe FilePath)
+ghcPkgCachePath = do
+ root <- Env.getEnv "CODEROOT"
+ fmap (\h -> root </> vardir </> ("ghc-pkg-cache-" <> h <> ".json")) </ ghcPkgCacheHash
+
+loadGhcPkgCache :: IO ()
+loadGhcPkgCache = do
+ mpath <- ghcPkgCachePath
+ case mpath of
+ Nothing -> pure ()
+ Just path -> do
+ exists <- Dir.doesFileExist path
+ if not exists
+ then pure ()
+ else do
+ eres <- Exception.try (ByteString.Lazy.readFile path) :: IO (Either Exception.IOException ByteString.Lazy.ByteString)
+ case eres of
+ Left _ -> pure ()
+ Right bs ->
+ case Aeson.eitherDecode bs :: Either String GhcPkgCacheDisk of
+ Left _ -> pure ()
+ Right disk -> writeIORef ghcPkgCache (cacheFromDisk disk)
+
+saveGhcPkgCache :: IO ()
+saveGhcPkgCache = do
+ mpath <- ghcPkgCachePath
+ case mpath of
+ Nothing -> pure ()
+ Just path -> do
+ cache <- readIORef ghcPkgCache
+ let tmp = path <> ".tmp"
+ Dir.createDirectoryIfMissing True (takeDirectory path)
+ ByteString.Lazy.writeFile tmp (Aeson.encode (cacheToDisk cache))
+ Dir.renameFile tmp path
+
ghcPkgFindModuleCached :: String -> IO (Set String)
ghcPkgFindModuleCached m = do
cache <- readIORef ghcPkgCache