summaryrefslogtreecommitdiff
path: root/Omni
diff options
context:
space:
mode:
authorBen Sima <ben@bsima.me>2025-11-14 12:54:20 -0500
committerBen Sima <ben@bsima.me>2025-11-14 12:54:20 -0500
commit538f976d17b88877aecc93566d1012543fb5fdf8 (patch)
tree26ca82abaf32a93ab54493882833215f16d3500a /Omni
parentf26fb86c6a9ea3323a6b2e53c879b4672e81b6c5 (diff)
Bild: breadth-first search and ghc-pkg cachingHEADlive
Replaced the old slow depth-first search with a breadth-first search for detecting imports. This should be way faster when building a single namespace because it doesn't have to visit the same file multiple times. The ghc-pkg caching means we only have to run ghc-pkg once per bild invocation.
Diffstat (limited to 'Omni')
-rwxr-xr-xOmni/Bild.hs203
1 files changed, 110 insertions, 93 deletions
diff --git a/Omni/Bild.hs b/Omni/Bild.hs
index c1c4210..dbae550 100755
--- a/Omni/Bild.hs
+++ b/Omni/Bild.hs
@@ -128,6 +128,7 @@ import qualified Data.Char as Char
import Data.Conduit ((.|))
import qualified Data.Conduit.Combinators as Conduit
import qualified Data.Conduit.Process as Conduit
+import Data.IORef (IORef, modifyIORef', newIORef, readIORef)
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
@@ -198,27 +199,26 @@ test_bildExamples =
xs -> all (== Exit.ExitSuccess) xs @=? True
move :: Cli.Arguments -> IO ()
-move args =
+move args = do
IO.hSetBuffering stdout IO.NoBuffering
- >> Env.getEnv "CODEROOT"
- +> \root ->
- Cli.getAllArgs args (Cli.argument "target")
- |> filterM Dir.doesFileExist
- +> filterM (\x -> isGitIgnored x /> don't)
- /> filter (\x -> isGitHook x |> don't)
- +> traverse Dir.makeAbsolute
- +> traverse (namespaceFromPathOrDie root)
- /> filter isBuildableNs
- +> foldM analyze mempty
- +> printOrBuild
- |> Timeout.timeout (toMillis minutes)
- +> \case
- Nothing ->
- Log.br
- >> Log.fail ["bild", "timeout after " <> tshow minutes <> " minutes"]
- >> Log.br
- >> exitWith (ExitFailure 124)
- Just s -> exitSummary s
+ root <- Env.getEnv "CODEROOT"
+ Cli.getAllArgs args (Cli.argument "target")
+ |> filterM Dir.doesFileExist
+ +> filterGitIgnored
+ /> filter (\x -> isGitHook x |> don't)
+ +> traverse Dir.makeAbsolute
+ +> traverse (namespaceFromPathOrDie root)
+ /> filter isBuildableNs
+ +> foldM analyze mempty
+ +> printOrBuild root
+ |> Timeout.timeout (toMillis minutes)
+ +> \case
+ Nothing ->
+ Log.br
+ >> Log.fail ["bild", "timeout after " <> tshow minutes <> " minutes"]
+ >> Log.br
+ >> exitWith (ExitFailure 124)
+ Just s -> exitSummary s
where
minutes =
Cli.getArgWithDefault args "10" (Cli.longOption "time")
@@ -226,8 +226,8 @@ move args =
|> \case
Nothing -> panic "could not read --time argument"
Just n -> (n == 0) ?: (-1, n)
- printOrBuild :: Analysis -> IO [ExitCode]
- printOrBuild targets
+ printOrBuild :: FilePath -> Analysis -> IO [ExitCode]
+ printOrBuild root targets
| Map.null targets =
Log.wipe
>> Log.fail ["bild", "nothing to build"]
@@ -236,7 +236,6 @@ move args =
| args `Cli.has` Cli.longOption "plan" =
Log.wipe >> putJSON targets >> pure [Exit.ExitSuccess]
| otherwise = do
- root <- Env.getEnv "CODEROOT"
nproc <- GHC.getNumProcessors
createHier root
build isTest isLoud jobs (cpus nproc) targets
@@ -268,6 +267,20 @@ isGitIgnored path =
(ExitSuccess, _, _) -> pure True
(ExitFailure _, _, _) -> pure False
+filterGitIgnored :: [FilePath] -> IO [FilePath]
+filterGitIgnored [] = pure []
+filterGitIgnored paths = do
+ (exitCode, out, _) <-
+ Process.readProcessWithExitCode
+ "git"
+ ["check-ignore", "--stdin"]
+ (List.intercalate "\n" paths)
+ case exitCode of
+ ExitSuccess ->
+ let ignoredPaths = Set.fromList (String.lines out)
+ in pure [p | p <- paths, don't (Set.member p ignoredPaths)]
+ ExitFailure _ -> pure paths
+
test_isGitIgnored :: Test.Tree
test_isGitIgnored =
Test.group
@@ -766,45 +779,41 @@ analyze hmap ns = case Map.lookup ns hmap of
|> pure
detectHaskellImports :: Analysis -> [Text] -> IO (Set Meta.Dep, Set FilePath)
-detectHaskellImports hmap contentLines =
- Env.getEnv "CODEROOT" +> \root ->
- contentLines
- /> Text.unpack
- /> Regex.match haskellImports
- |> catMaybes
- |> \imports ->
- foldM ghcPkgFindModule Set.empty imports
- +> \pkgs ->
- filepaths imports
- +> \files ->
- findDeps root files
- +> \deps ->
- (pkgs <> deps, map (stripRoot root) files |> Set.fromList)
- |> pure
+detectHaskellImports _ contentLines = do
+ root <- Env.getEnv "CODEROOT"
+ let initialMods = catMaybes (Regex.match haskellImports </ (Text.unpack </ contentLines))
+ initialLocals <- toLocalFiles root initialMods
+ let initialLocalsSet = Set.fromList initialLocals
+ let localMods = [m | m <- initialMods, (Namespace.fromHaskellModule m |> Namespace.toPath) `elem` initialLocals]
+ let initialExternals = filter (`notElem` localMods) initialMods
+ (srcs, transitiveExtMods) <- bfs root initialLocalsSet Set.empty Set.empty
+ let allExtMods = Set.fromList initialExternals <> transitiveExtMods
+ pkgSets <- Async.mapConcurrently ghcPkgFindModuleCached (Set.toList allExtMods)
+ let pkgs = mconcat pkgSets
+ pure (pkgs, srcs)
where
- filepaths :: [String] -> IO [FilePath]
- filepaths imports =
- imports
- |> map Namespace.fromHaskellModule
- |> map Namespace.toPath
- |> traverse Dir.makeAbsolute
- +> filterM Dir.doesFileExist
- findDeps :: String -> [FilePath] -> IO (Set Meta.Dep)
- findDeps root fps =
- fps
- |> traverse (pure <. Namespace.fromPath root)
- /> catMaybes
- -- this is still an inefficiency, because this recurses before the
- -- hmap is updated by the fold, transitive imports will be
- -- re-visited. you can see this with `TERM=dumb bild`. to fix this i
- -- need shared state instead of a fold, or figure out how to do a
- -- breadth-first search instead of depth-first.
- +> foldM analyze (onlyHaskell hmap)
- /> Map.elems
- /> map langdeps
- /> mconcat
- onlyHaskell :: Analysis -> Analysis
- onlyHaskell = Map.filterWithKey (\ns _ -> ext ns == Namespace.Hs)
+ bfs :: FilePath -> Set FilePath -> Set FilePath -> Set String -> IO (Set FilePath, Set String)
+ bfs root queue visited extMods
+ | Set.null queue = pure (visited, extMods)
+ | otherwise = do
+ let (rel, queue') = Set.deleteFindMin queue
+ fileLines <-
+ withFile (root </> rel) ReadMode <| \h ->
+ IO.hSetEncoding h IO.utf8_bom
+ >> Text.IO.hGetContents h
+ /> Text.lines
+ let mods = catMaybes (Regex.match haskellImports </ (Text.unpack </ fileLines))
+ locals <- toLocalFiles root mods
+ let localsSet = Set.fromList locals
+ let localModsFromPaths = Set.fromList [m | m <- mods, (Namespace.fromHaskellModule m |> Namespace.toPath) `elem` locals]
+ let newExternals = Set.fromList mods Set.\\ localModsFromPaths
+ let newLocals = localsSet Set.\\ visited
+ bfs root (queue' <> newLocals) (Set.insert rel visited) (extMods <> newExternals)
+
+ toLocalFiles :: FilePath -> [String] -> IO [FilePath]
+ toLocalFiles root mods = do
+ let rels = map (Namespace.fromHaskellModule .> Namespace.toPath) mods
+ filterM (\rel -> Dir.doesFileExist (root </> rel)) rels
stripRoot :: FilePath -> FilePath -> FilePath
stripRoot root f = fromMaybe f (List.stripPrefix (root <> "/") f)
@@ -821,20 +830,11 @@ detectLispImports contentLines =
-- | Finds local imports and recursively finds transitive imports, similar to
-- 'detectHaskellImports'.
detectPythonImports :: Analysis -> [Text] -> IO (Set FilePath)
-detectPythonImports pmap contentLines =
- Env.getEnv "CODEROOT" +> \root ->
- contentLines
- /> Text.unpack
- /> Regex.match pythonImport
- |> catMaybes
- |> \imports ->
- filepaths imports
- +> \files ->
- findDeps root files
- +> \deps ->
- (map (stripRoot root) files <> Set.toList deps)
- |> Set.fromList
- |> pure
+detectPythonImports _ contentLines = do
+ root <- Env.getEnv "CODEROOT"
+ let initialMods = catMaybes (Regex.match pythonImport </ (Text.unpack </ contentLines))
+ initialLocals <- toLocalFiles root initialMods
+ bfs root (Set.fromList initialLocals) Set.empty
where
-- only detects 'import x' because I don't like 'from'
pythonImport :: Regex.RE Char String
@@ -843,24 +843,27 @@ detectPythonImports pmap contentLines =
*> Regex.some (Regex.psym Char.isSpace)
*> Regex.many (Regex.psym isModuleChar)
<* Regex.many Regex.anySym
- filepaths :: [String] -> IO [FilePath]
- filepaths imports =
- imports
- |> map Namespace.fromPythonModule
- |> map Namespace.toPath
- |> traverse Dir.makeAbsolute
- +> filterM Dir.doesFileExist
- findDeps :: String -> [FilePath] -> IO (Set FilePath)
- findDeps root fps =
- fps
- |> traverse (pure <. Namespace.fromPath root)
- /> catMaybes
- +> foldM analyze (onlyPython pmap)
- /> Map.elems
- /> map srcs
- /> mconcat
- onlyPython :: Analysis -> Analysis
- onlyPython = Map.filterWithKey (\ns _ -> ext ns == Namespace.Py)
+
+ bfs :: FilePath -> Set FilePath -> Set FilePath -> IO (Set FilePath)
+ bfs root queue visited
+ | Set.null queue = pure visited
+ | otherwise = do
+ let (rel, queue') = Set.deleteFindMin queue
+ fileLines <-
+ withFile (root </> rel) ReadMode <| \h ->
+ IO.hSetEncoding h IO.utf8_bom
+ >> Text.IO.hGetContents h
+ /> Text.lines
+ let mods = catMaybes (Regex.match pythonImport </ (Text.unpack </ fileLines))
+ locals <- toLocalFiles root mods
+ let localsSet = Set.fromList locals
+ let newLocals = localsSet Set.\\ visited
+ bfs root (queue' <> newLocals) (Set.insert rel visited)
+
+ toLocalFiles :: FilePath -> [String] -> IO [FilePath]
+ toLocalFiles root mods = do
+ let rels = map (Namespace.fromPythonModule .> Namespace.toPath) mods
+ filterM (\rel -> Dir.doesFileExist (root </> rel)) rels
test_detectPythonImports :: Test.Tree
test_detectPythonImports =
@@ -874,6 +877,20 @@ test_detectPythonImports =
Set.fromList ["Omni/Log.py"] @=? set
]
+{-# NOINLINE ghcPkgCache #-}
+ghcPkgCache :: IORef (Map String (Set String))
+ghcPkgCache = unsafePerformIO (newIORef Map.empty)
+
+ghcPkgFindModuleCached :: String -> IO (Set String)
+ghcPkgFindModuleCached m = do
+ cache <- readIORef ghcPkgCache
+ case Map.lookup m cache of
+ Just pkgs -> pure pkgs
+ Nothing -> do
+ pkgs <- ghcPkgFindModule Set.empty m
+ modifyIORef' ghcPkgCache (Map.insert m pkgs)
+ pure pkgs
+
ghcPkgFindModule :: Set String -> String -> IO (Set String)
ghcPkgFindModule acc m =
Env.getEnv "GHC_PACKAGE_PATH" +> \packageDb ->