diff options
| author | Ben Sima <ben@bensima.com> | 2025-12-17 13:29:40 -0500 |
|---|---|---|
| committer | Ben Sima <ben@bensima.com> | 2025-12-17 13:29:40 -0500 |
| commit | ab01b34bf563990e0f491ada646472aaade97610 (patch) | |
| tree | 5e46a1a157bb846b0c3a090a83153c788da2b977 /Omni/Deploy | |
| parent | e112d3ce07fa24f31a281e521a554cc881a76c7b (diff) | |
| parent | 337648981cc5a55935116141341521f4fce83214 (diff) | |
Merge Ava deployment changes
Diffstat (limited to 'Omni/Deploy')
| -rw-r--r-- | Omni/Deploy/Caddy.hs | 241 | ||||
| -rw-r--r-- | Omni/Deploy/Deployer.hs | 317 | ||||
| -rw-r--r-- | Omni/Deploy/Deployer.nix | 104 | ||||
| -rw-r--r-- | Omni/Deploy/Manifest.hs | 673 | ||||
| -rw-r--r-- | Omni/Deploy/PLAN.md | 299 | ||||
| -rw-r--r-- | Omni/Deploy/Packages.nix | 11 | ||||
| -rw-r--r-- | Omni/Deploy/README.md | 211 | ||||
| -rw-r--r-- | Omni/Deploy/Systemd.hs | 269 |
8 files changed, 2125 insertions, 0 deletions
diff --git a/Omni/Deploy/Caddy.hs b/Omni/Deploy/Caddy.hs new file mode 100644 index 0000000..6cedf92 --- /dev/null +++ b/Omni/Deploy/Caddy.hs @@ -0,0 +1,241 @@ +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE NoImplicitPrelude #-} + +-- | Caddy admin API integration for the mini-PaaS deployment system. +-- +-- : out deploy-caddy +-- : dep aeson +-- : dep http-conduit +-- : dep http-types +module Omni.Deploy.Caddy + ( buildRoute, + getCurrentRoutes, + upsertRoute, + deleteRoute, + syncRoutes, + getRouteById, + caddyAdmin, + main, + test, + ) +where + +import Alpha +import Data.Aeson ((.=)) +import qualified Data.Aeson as Aeson +import qualified Data.Map as Map +import qualified Data.Text as Text +import qualified Network.HTTP.Simple as HTTP +import qualified Network.HTTP.Types.Status as Status +import Omni.Deploy.Manifest (Artifact (..), Exec (..), Hardening (..), Http (..), Service (..), Systemd (..)) +import qualified Omni.Test as Test + +caddyAdmin :: Text +caddyAdmin = "http://localhost:2019" + +data Route = Route + { routeId :: Text, + routeMatch :: [RouteMatch], + routeHandle :: [RouteHandler], + routeTerminal :: Bool + } + deriving (Show, Eq, Generic) + +instance Aeson.ToJSON Route where + toJSON Route {..} = + Aeson.object + [ "@id" .= routeId, + "match" .= routeMatch, + "handle" .= routeHandle, + "terminal" .= routeTerminal + ] + +newtype RouteMatch = RouteMatch + { matchHost :: [Text] + } + deriving (Show, Eq, Generic) + +instance Aeson.ToJSON RouteMatch where + toJSON RouteMatch {..} = + Aeson.object ["host" .= matchHost] + +data RouteHandler = RouteHandler + { handlerType :: Text, + handlerUpstreams :: [Upstream] + } + deriving (Show, Eq, Generic) + +instance Aeson.ToJSON RouteHandler where + toJSON RouteHandler {..} = + Aeson.object + [ "handler" .= handlerType, + "upstreams" .= handlerUpstreams + ] + +newtype Upstream = Upstream + { upstreamDial :: Text + } + deriving (Show, Eq, Generic) + +instance Aeson.ToJSON Upstream where + toJSON Upstream {..} = + Aeson.object ["dial" .= upstreamDial] + +buildRoute :: Service -> Maybe Route +buildRoute Service {..} = case serviceHttp of + Nothing -> Nothing + Just Http {..} -> + Just + <| Route + { routeId = "biz-" <> serviceName, + routeMatch = [RouteMatch [httpDomain]], + routeHandle = + [ RouteHandler + "reverse_proxy" + [Upstream <| "localhost:" <> tshow httpInternalPort] + ], + routeTerminal = True + } + +getCurrentRoutes :: Text -> IO [Aeson.Value] +getCurrentRoutes adminUrl = do + let url = Text.unpack adminUrl <> "/config/apps/http/servers/srv0/routes" + request <- HTTP.parseRequest url + result <- try @SomeException <| HTTP.httpLBS request + case result of + Left _ -> pure [] + Right response -> + if Status.statusIsSuccessful (HTTP.getResponseStatus response) + then case Aeson.decode (HTTP.getResponseBody response) of + Just routes -> pure routes + Nothing -> pure [] + else pure [] + +upsertRoute :: Text -> Service -> IO Bool +upsertRoute adminUrl svc = case buildRoute svc of + Nothing -> pure False + Just route -> do + let routeId' = "biz-" <> serviceName svc + patchUrl = Text.unpack adminUrl <> "/id/" <> Text.unpack routeId' + postUrl = Text.unpack adminUrl <> "/config/apps/http/servers/srv0/routes" + body = Aeson.encode route + + patchRequest <- + HTTP.parseRequest patchUrl + /> HTTP.setRequestMethod "PATCH" + /> HTTP.setRequestBodyLBS body + /> HTTP.setRequestHeader "Content-Type" ["application/json"] + patchResult <- try @SomeException <| HTTP.httpLBS patchRequest + + case patchResult of + Right resp + | Status.statusIsSuccessful (HTTP.getResponseStatus resp) -> + pure True + _ -> do + postRequest <- + HTTP.parseRequest postUrl + /> HTTP.setRequestMethod "POST" + /> HTTP.setRequestBodyLBS body + /> HTTP.setRequestHeader "Content-Type" ["application/json"] + postResult <- try @SomeException <| HTTP.httpLBS postRequest + case postResult of + Right resp -> pure <| Status.statusIsSuccessful (HTTP.getResponseStatus resp) + Left _ -> pure False + +deleteRoute :: Text -> Text -> IO Bool +deleteRoute adminUrl serviceName' = do + let routeId' = "biz-" <> serviceName' + url = Text.unpack adminUrl <> "/id/" <> Text.unpack routeId' + request <- + HTTP.parseRequest url + /> HTTP.setRequestMethod "DELETE" + result <- try @SomeException <| HTTP.httpLBS request + case result of + Right resp -> pure <| Status.statusIsSuccessful (HTTP.getResponseStatus resp) + Left _ -> pure False + +syncRoutes :: Text -> [Service] -> IO (Map Text Bool) +syncRoutes adminUrl services = do + results <- + forM services <| \svc -> + case serviceHttp svc of + Nothing -> pure Nothing + Just _ -> do + success <- upsertRoute adminUrl svc + pure <| Just (serviceName svc, success) + pure <| Map.fromList <| catMaybes results + +getRouteById :: Text -> Text -> IO (Maybe Aeson.Value) +getRouteById adminUrl routeId' = do + let url = Text.unpack adminUrl <> "/id/" <> Text.unpack routeId' + request <- HTTP.parseRequest url + result <- try @SomeException <| HTTP.httpLBS request + case result of + Right resp + | Status.statusIsSuccessful (HTTP.getResponseStatus resp) -> + pure <| Aeson.decode (HTTP.getResponseBody resp) + _ -> pure Nothing + +test :: Test.Tree +test = + Test.group + "Omni.Deploy.Caddy" + [ test_buildRouteWithHttp, + test_buildRouteWithoutHttp, + test_buildRouteWithPath + ] + +mkTestService :: Text -> Text -> Maybe Http -> Service +mkTestService name path http = + Service + { serviceName = name, + serviceArtifact = Artifact "nix-closure" path, + serviceHosts = ["biz"], + serviceExec = Exec Nothing "root" "root", + serviceEnv = mempty, + serviceEnvFile = Nothing, + serviceHttp = http, + serviceSystemd = Systemd ["network-online.target"] [] "on-failure" 5, + serviceHardening = Hardening False True "strict" True, + serviceRevision = Nothing + } + +test_buildRouteWithHttp :: Test.Tree +test_buildRouteWithHttp = + Test.unit "builds route for service with HTTP" <| do + let svc = mkTestService "test-svc" "/nix/store/abc" (Just <| Http "example.com" "/" 8000) + case buildRoute svc of + Nothing -> Test.assertFailure "expected route" + Just route -> do + routeId route Test.@=? "biz-test-svc" + case (head <| routeMatch route, head <| routeHandle route) of + (Just m, Just h) -> do + matchHost m Test.@=? ["example.com"] + case head <| handlerUpstreams h of + Just u -> upstreamDial u Test.@=? "localhost:8000" + Nothing -> Test.assertFailure "no upstreams" + _ -> Test.assertFailure "no match/handle" + +test_buildRouteWithoutHttp :: Test.Tree +test_buildRouteWithoutHttp = + Test.unit "returns Nothing for service without HTTP" <| do + let svc = mkTestService "worker" "/nix/store/xyz" Nothing + case buildRoute svc of + Nothing -> pure () + Just _ -> Test.assertFailure "expected Nothing" + +test_buildRouteWithPath :: Test.Tree +test_buildRouteWithPath = + Test.unit "builds route with custom path" <| do + let svc = mkTestService "api" "/nix/store/abc" (Just <| Http "api.example.com" "/v1" 8080) + case buildRoute svc of + Nothing -> Test.assertFailure "expected route" + Just route -> case head <| routeMatch route of + Nothing -> Test.assertFailure "no match" + Just m -> matchHost m Test.@=? ["api.example.com"] + +main :: IO () +main = Test.run test diff --git a/Omni/Deploy/Deployer.hs b/Omni/Deploy/Deployer.hs new file mode 100644 index 0000000..7e57b34 --- /dev/null +++ b/Omni/Deploy/Deployer.hs @@ -0,0 +1,317 @@ +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE NoImplicitPrelude #-} + +-- | Mini-PaaS deployer service. +-- +-- Polls manifest from S3, compares to local state, pulls changed closures, +-- generates systemd units, updates Caddy routes, and manages GC roots. +-- +-- : out biz-deployer +-- : dep aeson +-- : dep amazonka +-- : dep amazonka-core +-- : dep amazonka-s3 +-- : dep directory +-- : dep http-conduit +-- : dep http-types +-- : dep time +module Omni.Deploy.Deployer + ( DeployerState (..), + loadState, + saveState, + pullClosure, + createGcRoot, + removeGcRoot, + deployService, + removeService, + reconcile, + runOnce, + runDaemon, + stateDir, + stateFile, + gcrootsDir, + main, + test, + ) +where + +import Alpha +import qualified Control.Concurrent as Concurrent +import qualified Data.Aeson as Aeson +import qualified Data.ByteString.Lazy as BL +import qualified Data.Map as Map +import qualified Data.Set as Set +import qualified Data.Text as Text +import qualified Network.HostName as HostName +import qualified Omni.Cli as Cli +import qualified Omni.Deploy.Caddy as Caddy +import qualified Omni.Deploy.Manifest as Manifest +import qualified Omni.Deploy.Systemd as Systemd +import qualified Omni.Log as Log +import qualified Omni.Test as Test +import qualified System.Directory as Dir +import qualified System.Exit as Exit +import System.FilePath ((</>)) +import qualified System.Process as Process + +stateDir :: FilePath +stateDir = "/var/lib/biz-deployer" + +stateFile :: FilePath +stateFile = stateDir </> "state.json" + +gcrootsDir :: FilePath +gcrootsDir = "/nix/var/nix/gcroots/biz" + +s3Url :: String +s3Url = "s3://omni-nix-cache?profile=digitalocean&scheme=https&endpoint=nyc3.digitaloceanspaces.com" + +newtype DeployerState = DeployerState + { stateServices :: Map Text Text + } + deriving (Show, Eq, Generic) + deriving anyclass (Aeson.FromJSON, Aeson.ToJSON) + +emptyState :: DeployerState +emptyState = DeployerState mempty + +loadState :: IO DeployerState +loadState = do + exists <- Dir.doesFileExist stateFile + if exists + then do + contents <- BL.readFile stateFile + case Aeson.eitherDecode contents of + Left _ -> pure emptyState + Right s -> pure s + else pure emptyState + +saveState :: DeployerState -> IO () +saveState st = do + Dir.createDirectoryIfMissing True stateDir + BL.writeFile stateFile (Aeson.encode st) + +getHostname :: IO Text +getHostname = HostName.getHostName /> Text.pack + +pullClosure :: Text -> IO Bool +pullClosure storePath = do + -- First check if the path already exists locally + exists <- Dir.doesDirectoryExist (Text.unpack storePath) + if exists + then do + Log.info ["deployer", "path already exists locally", storePath] + pure True + else do + (exitCode, _, stderr') <- + Process.readProcessWithExitCode + "nix" + [ "copy", + "--extra-experimental-features", + "nix-command", + "--from", + s3Url, + Text.unpack storePath + ] + "" + case exitCode of + Exit.ExitSuccess -> pure True + Exit.ExitFailure _ -> do + Log.fail ["deployer", "pull failed", storePath, Text.pack stderr'] + pure False + +createGcRoot :: Text -> Text -> IO FilePath +createGcRoot serviceName storePath = do + Dir.createDirectoryIfMissing True gcrootsDir + let rootPath = gcrootsDir </> Text.unpack serviceName + exists <- Dir.doesPathExist rootPath + when exists <| Dir.removeFile rootPath + Dir.createFileLink (Text.unpack storePath) rootPath + pure rootPath + +removeGcRoot :: Text -> IO () +removeGcRoot serviceName = do + let rootPath = gcrootsDir </> Text.unpack serviceName + exists <- Dir.doesPathExist rootPath + when exists <| Dir.removeFile rootPath + +deployService :: Manifest.Service -> DeployerState -> IO (Bool, DeployerState) +deployService svc st = do + let name = Manifest.serviceName svc + path = Manifest.storePath (Manifest.serviceArtifact svc) + + -- Check what's actually running in systemd instead of in-memory state + runningPath <- Systemd.getRunningStorePath name + + if runningPath == Just path + then do + Log.info ["deployer", name, "already at", path] + pure (True, st) + else do + Log.info ["deployer", "deploying", name, fromMaybe "new" runningPath, "->", path] + + pulled <- pullClosure path + if don't pulled + then do + Log.fail ["deployer", "failed to pull", name] + pure (False, st) + else do + _ <- createGcRoot name path + + _ <- Systemd.writeUnit Systemd.servicesDir svc + _ <- Systemd.createSymlink Systemd.servicesDir "/run/systemd/system" svc + Systemd.reloadAndRestart name + + case Manifest.serviceHttp svc of + Just _ -> void <| Caddy.upsertRoute Caddy.caddyAdmin svc + Nothing -> pure () + + let newSt = st {stateServices = Map.insert name path (stateServices st)} + Log.good ["deployer", "deployed", name] + pure (True, newSt) + +removeService :: Text -> DeployerState -> IO DeployerState +removeService svcName st = do + Log.info ["deployer", "removing", svcName] + + Systemd.stopAndDisable svcName + Systemd.removeUnit Systemd.servicesDir "/run/systemd/system" svcName + _ <- Caddy.deleteRoute Caddy.caddyAdmin svcName + removeGcRoot svcName + + pure <| st {stateServices = Map.delete svcName (stateServices st)} + +reconcile :: Manifest.Manifest -> DeployerState -> IO DeployerState +reconcile manifest st = do + hostname <- getHostname + + let mfstServices = + Set.fromList + [ Manifest.serviceName svc + | svc <- Manifest.manifestServices manifest, + hostname `elem` Manifest.serviceHosts svc + ] + localServices = Set.fromList <| Map.keys (stateServices st) + toRemove = localServices Set.\\ mfstServices + + st' <- foldM (flip removeService) st (Set.toList toRemove) + + foldM + ( \s svc -> + if hostname `elem` Manifest.serviceHosts svc + then do + (_, newSt) <- deployService svc s + pure newSt + else pure s + ) + st' + (Manifest.manifestServices manifest) + +runOnce :: IO Bool +runOnce = do + Log.info ["deployer", "starting reconciliation"] + + manifest <- Manifest.loadManifestFromS3 + case manifest of + Nothing -> do + Log.warn ["deployer", "no manifest found in S3"] + pure False + Just m -> do + st <- loadState + st' <- reconcile m st + saveState st' + Log.good ["deployer", "reconciliation complete"] + pure True + +runDaemon :: Int -> IO () +runDaemon intervalSeconds = do + Log.info ["deployer", "starting daemon", "interval=" <> tshow intervalSeconds <> "s"] + forever <| do + result <- try runOnce + case result of + Left (e :: SomeException) -> + Log.fail ["deployer", "error in reconciliation", tshow e] + Right _ -> pure () + Concurrent.threadDelay (intervalSeconds * 1_000_000) + +help :: Cli.Docopt +help = + [Cli.docopt| +biz-deployer - Mini-PaaS deployment agent + +Usage: + biz-deployer test + biz-deployer once + biz-deployer daemon [<interval>] + biz-deployer status + biz-deployer (-h | --help) + +Commands: + test Run tests + once Run a single reconciliation cycle + daemon Run as daemon with interval in seconds (default: 300) + status Show current deployer state + +Options: + -h --help Show this help +|] + +move :: Cli.Arguments -> IO () +move args + | args `Cli.has` Cli.command "once" = do + success <- runOnce + if success + then Exit.exitSuccess + else Exit.exitWith (Exit.ExitFailure 1) + | args `Cli.has` Cli.command "daemon" = do + let interval = + Cli.getArg args (Cli.argument "interval") + +> readMaybe + |> fromMaybe 300 + runDaemon interval + | args `Cli.has` Cli.command "status" = do + st <- loadState + BL.putStr <| Aeson.encode st + putStrLn ("" :: String) + | otherwise = do + Log.fail ["deployer", "unknown command"] + Exit.exitWith (Exit.ExitFailure 1) + +test :: Test.Tree +test = + Test.group + "Omni.Deploy.Deployer" + [ test_emptyState, + test_stateJsonRoundtrip + ] + +test_emptyState :: Test.Tree +test_emptyState = + Test.unit "empty state has no services" <| do + let st = emptyState + Map.null (stateServices st) Test.@=? True + +test_stateJsonRoundtrip :: Test.Tree +test_stateJsonRoundtrip = + Test.unit "state JSON roundtrip" <| do + let testState = + DeployerState + { stateServices = + Map.fromList + [ ("svc-a", "/nix/store/abc"), + ("svc-b", "/nix/store/xyz") + ] + } + let encoded = Aeson.encode testState + case Aeson.eitherDecode encoded of + Left err -> Test.assertFailure err + Right decoded -> stateServices decoded Test.@=? stateServices testState + +main :: IO () +main = Cli.main <| Cli.Plan help move test pure diff --git a/Omni/Deploy/Deployer.nix b/Omni/Deploy/Deployer.nix new file mode 100644 index 0000000..091b43b --- /dev/null +++ b/Omni/Deploy/Deployer.nix @@ -0,0 +1,104 @@ +{ + options, + lib, + config, + pkgs, + ... +}: let + cfg = config.services.biz-deployer; +in { + options.services.biz-deployer = { + enable = lib.mkEnableOption "Enable the biz-deployer mini-PaaS service"; + + package = lib.mkOption { + type = lib.types.package; + description = "The biz-deployer package to use"; + }; + + manifestPackage = lib.mkOption { + type = lib.types.package; + description = "The deploy-manifest package for CLI operations"; + }; + + interval = lib.mkOption { + type = lib.types.int; + default = 300; + description = "Interval in seconds between reconciliation cycles"; + }; + + stateDir = lib.mkOption { + type = lib.types.path; + default = "/var/lib/biz-deployer"; + description = "Directory for deployer state and generated unit files"; + }; + + secretsDir = lib.mkOption { + type = lib.types.path; + default = "/var/lib/biz-secrets"; + description = "Directory containing service secret .env files"; + }; + + gcrootsDir = lib.mkOption { + type = lib.types.path; + default = "/nix/var/nix/gcroots/biz"; + description = "Directory for GC roots to prevent closure garbage collection"; + }; + }; + + config = lib.mkIf cfg.enable { + # Create required directories + systemd.tmpfiles.rules = [ + "d ${cfg.stateDir} 0755 root root -" + "d ${cfg.stateDir}/services 0755 root root -" + "d ${cfg.secretsDir} 0700 root root -" + "d ${cfg.gcrootsDir} 0755 root root -" + ]; + + # The deployer service runs as a timer-triggered oneshot + systemd.services.biz-deployer = { + description = "Mini-PaaS deployment agent"; + after = ["network-online.target"]; + wants = ["network-online.target"]; + path = [cfg.package cfg.manifestPackage pkgs.nix pkgs.awscli2]; + + serviceConfig = { + Type = "oneshot"; + ExecStart = "${cfg.package}/bin/biz-deployer once"; + Environment = [ + "HOME=/root" + "AWS_SHARED_CREDENTIALS_FILE=/root/.aws/credentials" + ]; + + # Note: Hardening disabled because deployer needs write access to + # /etc/systemd/system, /nix/store, /nix/var, /root/.cache/nix + PrivateTmp = true; + }; + }; + + # Timer to run deployer every N seconds + systemd.timers.biz-deployer = { + description = "Timer for biz-deployer reconciliation"; + wantedBy = ["timers.target"]; + timerConfig = { + OnBootSec = "1min"; + OnUnitActiveSec = "${toString cfg.interval}s"; + Unit = "biz-deployer.service"; + }; + }; + + # Caddy reverse proxy for deployed services + # TODO: Generate this dynamically from manifest in the future + services.caddy = { + enable = true; + globalConfig = '' + admin localhost:2019 + ''; + virtualHosts."podcastitlater.bensima.com".extraConfig = '' + reverse_proxy localhost:8000 + ''; + }; + + # Open firewall for HTTP/HTTPS + networking.firewall.allowedTCPPorts = [80 443]; + }; +} diff --git a/Omni/Deploy/Manifest.hs b/Omni/Deploy/Manifest.hs new file mode 100644 index 0000000..e0d0b78 --- /dev/null +++ b/Omni/Deploy/Manifest.hs @@ -0,0 +1,673 @@ +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE NoImplicitPrelude #-} + +-- | Manifest schema and S3 operations for the mini-PaaS deployment system. +-- +-- Uses aws CLI for S3 operations (simpler than amazonka, already available). +-- +-- : out deploy-manifest +-- : dep aeson +-- : dep time +-- : dep directory +-- : dep temporary +-- : run awscli2 +module Omni.Deploy.Manifest + ( Artifact (..), + Exec (..), + Http (..), + Systemd (..), + Hardening (..), + Service (..), + Manifest (..), + findService, + updateService, + createEmptyManifest, + loadManifestFromS3, + saveManifestToS3, + archiveManifest, + listArchivedManifests, + rollbackToManifest, + s3Bucket, + s3Endpoint, + main, + test, + ) +where + +import Alpha +import Data.Aeson ((.!=), (.:), (.:?), (.=)) +import qualified Data.Aeson as Aeson +import qualified Data.ByteString.Lazy as BL +import qualified Data.Text as Text +import qualified Data.Text.Encoding as TE +import Data.Time (UTCTime, getCurrentTime) +import Data.Time.Format.ISO8601 (iso8601Show) +import qualified Omni.Cli as Cli +import qualified Omni.Log as Log +import qualified Omni.Test as Test +import qualified System.Exit as Exit +import qualified System.IO as IO +import qualified System.IO.Temp as Temp +import qualified System.Process as Process + +s3Bucket :: Text +s3Bucket = "omni-nix-cache" + +s3Endpoint :: Text +s3Endpoint = "https://nyc3.digitaloceanspaces.com" + +data Artifact = Artifact + { artifactType :: Text, + storePath :: Text + } + deriving (Show, Eq, Generic) + +instance Aeson.FromJSON Artifact where + parseJSON = + Aeson.withObject "Artifact" <| \o -> + Artifact + </ (o .:? "type" .!= "nix-closure") + <*> o + .: "storePath" + +instance Aeson.ToJSON Artifact where + toJSON Artifact {..} = + Aeson.object + [ "type" .= artifactType, + "storePath" .= storePath + ] + +data Exec = Exec + { execCommand :: Maybe Text, + execUser :: Text, + execGroup :: Text + } + deriving (Show, Eq, Generic) + +instance Aeson.FromJSON Exec where + parseJSON = + Aeson.withObject "Exec" <| \o -> + Exec + </ (o .:? "command") + <*> o + .:? "user" + .!= "root" + <*> o + .:? "group" + .!= "root" + +instance Aeson.ToJSON Exec where + toJSON Exec {..} = + Aeson.object + [ "command" .= execCommand, + "user" .= execUser, + "group" .= execGroup + ] + +defaultExec :: Exec +defaultExec = Exec Nothing "root" "root" + +data Http = Http + { httpDomain :: Text, + httpPath :: Text, + httpInternalPort :: Int + } + deriving (Show, Eq, Generic) + +instance Aeson.FromJSON Http where + parseJSON = + Aeson.withObject "Http" <| \o -> + Http + </ (o .: "domain") + <*> o + .:? "path" + .!= "/" + <*> o + .: "internalPort" + +instance Aeson.ToJSON Http where + toJSON Http {..} = + Aeson.object + [ "domain" .= httpDomain, + "path" .= httpPath, + "internalPort" .= httpInternalPort + ] + +data Systemd = Systemd + { systemdAfter :: [Text], + systemdRequires :: [Text], + systemdRestart :: Text, + systemdRestartSec :: Int + } + deriving (Show, Eq, Generic) + +instance Aeson.FromJSON Systemd where + parseJSON = + Aeson.withObject "Systemd" <| \o -> + Systemd + </ (o .:? "after" .!= ["network-online.target"]) + <*> o + .:? "requires" + .!= [] + <*> o + .:? "restart" + .!= "on-failure" + <*> o + .:? "restartSec" + .!= 5 + +instance Aeson.ToJSON Systemd where + toJSON Systemd {..} = + Aeson.object + [ "after" .= systemdAfter, + "requires" .= systemdRequires, + "restart" .= systemdRestart, + "restartSec" .= systemdRestartSec + ] + +defaultSystemd :: Systemd +defaultSystemd = Systemd ["network-online.target"] [] "on-failure" 5 + +data Hardening = Hardening + { hardeningDynamicUser :: Bool, + hardeningPrivateTmp :: Bool, + hardeningProtectSystem :: Text, + hardeningProtectHome :: Bool + } + deriving (Show, Eq, Generic) + +instance Aeson.FromJSON Hardening where + parseJSON = + Aeson.withObject "Hardening" <| \o -> + Hardening + </ (o .:? "dynamicUser" .!= False) + <*> o + .:? "privateTmp" + .!= True + <*> o + .:? "protectSystem" + .!= "strict" + <*> o + .:? "protectHome" + .!= True + +instance Aeson.ToJSON Hardening where + toJSON Hardening {..} = + Aeson.object + [ "dynamicUser" .= hardeningDynamicUser, + "privateTmp" .= hardeningPrivateTmp, + "protectSystem" .= hardeningProtectSystem, + "protectHome" .= hardeningProtectHome + ] + +defaultHardening :: Hardening +defaultHardening = Hardening False True "strict" True + +data Service = Service + { serviceName :: Text, + serviceArtifact :: Artifact, + serviceHosts :: [Text], + serviceExec :: Exec, + serviceEnv :: Map Text Text, + serviceEnvFile :: Maybe Text, + serviceHttp :: Maybe Http, + serviceSystemd :: Systemd, + serviceHardening :: Hardening, + serviceRevision :: Maybe Text + } + deriving (Show, Eq, Generic) + +instance Aeson.FromJSON Service where + parseJSON = + Aeson.withObject "Service" <| \o -> + Service + </ (o .: "name") + <*> o + .: "artifact" + <*> o + .:? "hosts" + .!= ["biz"] + <*> o + .:? "exec" + .!= defaultExec + <*> o + .:? "env" + .!= mempty + <*> o + .:? "envFile" + <*> o + .:? "http" + <*> o + .:? "systemd" + .!= defaultSystemd + <*> o + .:? "hardening" + .!= defaultHardening + <*> o + .:? "revision" + +instance Aeson.ToJSON Service where + toJSON Service {..} = + Aeson.object + [ "name" .= serviceName, + "artifact" .= serviceArtifact, + "hosts" .= serviceHosts, + "exec" .= serviceExec, + "env" .= serviceEnv, + "envFile" .= serviceEnvFile, + "http" .= serviceHttp, + "systemd" .= serviceSystemd, + "hardening" .= serviceHardening, + "revision" .= serviceRevision + ] + +data Manifest = Manifest + { manifestVersion :: Int, + manifestGeneration :: UTCTime, + manifestServices :: [Service] + } + deriving (Show, Eq, Generic) + +instance Aeson.FromJSON Manifest where + parseJSON = + Aeson.withObject "Manifest" <| \o -> + Manifest + </ (o .:? "version" .!= 1) + <*> o + .: "generation" + <*> o + .:? "services" + .!= [] + +instance Aeson.ToJSON Manifest where + toJSON Manifest {..} = + Aeson.object + [ "version" .= manifestVersion, + "generation" .= manifestGeneration, + "services" .= manifestServices + ] + +findService :: Text -> Manifest -> Maybe Service +findService name manifest = + find (\s -> serviceName s == name) (manifestServices manifest) + +updateService :: Text -> Text -> Maybe Text -> Manifest -> Either Text Manifest +updateService name newStorePath revision manifest = + case findService name manifest of + Nothing -> Left <| "Service '" <> name <> "' not found in manifest" + Just _ -> Right <| manifest {manifestServices = updatedServices} + where + updatedServices = map updateIfMatch (manifestServices manifest) + updateIfMatch svc + | serviceName svc == name = + svc + { serviceArtifact = (serviceArtifact svc) {storePath = newStorePath}, + serviceRevision = revision + } + | otherwise = svc + +createEmptyManifest :: IO Manifest +createEmptyManifest = do + now <- getCurrentTime + pure <| Manifest 1 now [] + +awsS3Args :: [String] +awsS3Args = + [ "--endpoint-url", + Text.unpack s3Endpoint, + "--profile", + "digitalocean" + ] + +s3Get :: Text -> FilePath -> IO Bool +s3Get key destPath = do + let url = "s3://" <> Text.unpack s3Bucket <> "/" <> Text.unpack key + args = ["s3", "cp"] ++ awsS3Args ++ [url, destPath] + (exitCode, _, _) <- Process.readProcessWithExitCode "aws" args "" + pure <| exitCode == Exit.ExitSuccess + +s3Put :: FilePath -> Text -> IO Bool +s3Put srcPath key = do + let url = "s3://" <> Text.unpack s3Bucket <> "/" <> Text.unpack key + args = ["s3", "cp"] ++ awsS3Args ++ [srcPath, url] + (exitCode, _, _) <- Process.readProcessWithExitCode "aws" args "" + pure <| exitCode == Exit.ExitSuccess + +s3List :: Text -> IO [Text] +s3List prefix = do + let url = "s3://" <> Text.unpack s3Bucket <> "/" <> Text.unpack prefix + args = ["s3", "ls"] ++ awsS3Args ++ [url] + (exitCode, stdout', _) <- Process.readProcessWithExitCode "aws" args "" + case exitCode of + Exit.ExitSuccess -> + pure <| parseS3ListOutput (Text.pack stdout') + Exit.ExitFailure _ -> pure [] + +parseS3ListOutput :: Text -> [Text] +parseS3ListOutput output = + output + |> Text.lines + |> map extractFilename + |> filter (not <. Text.null) + where + extractFilename line = + case Text.words line of + [_, _, _, filename] -> filename + _ -> "" + +loadManifestFromS3 :: IO (Maybe Manifest) +loadManifestFromS3 = loadManifestFromS3' "manifest.json" + +loadManifestFromS3' :: Text -> IO (Maybe Manifest) +loadManifestFromS3' key = do + Temp.withSystemTempFile "manifest.json" <| \tmpPath tmpHandle -> do + IO.hClose tmpHandle + success <- s3Get key tmpPath + if success + then do + contents <- BL.readFile tmpPath + case Aeson.eitherDecode contents of + Left _ -> pure Nothing + Right manifest -> pure <| Just manifest + else pure Nothing + +archiveManifest :: Manifest -> IO Text +archiveManifest manifest = do + let timestamp = + iso8601Show (manifestGeneration manifest) + |> filter (\c -> c /= ':' && c /= '-') + |> Text.pack + archiveKey = "manifests/manifest-" <> timestamp <> ".json" + Temp.withSystemTempFile "manifest.json" <| \tmpPath tmpHandle -> do + BL.hPut tmpHandle (Aeson.encode manifest) + IO.hClose tmpHandle + _ <- s3Put tmpPath archiveKey + pure archiveKey + +listArchivedManifests :: IO [Text] +listArchivedManifests = do + files <- s3List "manifests/" + pure <| filter (Text.isSuffixOf ".json") files + +rollbackToManifest :: Text -> IO Bool +rollbackToManifest archiveKey = do + let fullKey = + if "manifests/" `Text.isPrefixOf` archiveKey + then archiveKey + else "manifests/" <> archiveKey + archived <- loadManifestFromS3' fullKey + case archived of + Nothing -> pure False + Just manifest -> do + saveManifestToS3 manifest + pure True + +saveManifestToS3 :: Manifest -> IO () +saveManifestToS3 = saveManifestToS3' "manifest.json" + +saveManifestToS3' :: Text -> Manifest -> IO () +saveManifestToS3' key manifest = do + existing <- loadManifestFromS3' key + case existing of + Just old -> void <| archiveManifest old + Nothing -> pure () + + now <- getCurrentTime + let updatedManifest = manifest {manifestGeneration = now} + Temp.withSystemTempFile "manifest.json" <| \tmpPath tmpHandle -> do + BL.hPut tmpHandle (Aeson.encode updatedManifest) + IO.hClose tmpHandle + _ <- s3Put tmpPath key + pure () + +help :: Cli.Docopt +help = + [Cli.docopt| +deploy-manifest - Manage deployment manifest in S3 + +Usage: + deploy-manifest test + deploy-manifest init + deploy-manifest show + deploy-manifest update <name> <store-path> [<revision>] + deploy-manifest add-service <json> + deploy-manifest list + deploy-manifest rollback <archive> + deploy-manifest (-h | --help) + +Commands: + test Run tests + init Initialize empty manifest in S3 + show Show current manifest + update Update service store path in manifest + add-service Add a new service from JSON + list List archived manifest generations + rollback Restore a previous manifest version + +Options: + -h --help Show this help +|] + +move :: Cli.Arguments -> IO () +move args + | args `Cli.has` Cli.command "init" = do + existing <- loadManifestFromS3 + case existing of + Just _ -> do + Log.fail ["manifest", "already exists"] + Exit.exitWith (Exit.ExitFailure 1) + Nothing -> do + manifest <- createEmptyManifest + saveManifestToS3 manifest + Log.good ["manifest", "initialized empty manifest"] + | args `Cli.has` Cli.command "show" = do + manifest <- loadManifestFromS3 + case manifest of + Nothing -> putStrLn ("no manifest found" :: String) + Just m -> BL.putStr <| Aeson.encode m + | args `Cli.has` Cli.command "update" = do + let name = + Cli.getArg args (Cli.argument "name") + |> fromMaybe "" + |> Text.pack + storePath' = + Cli.getArg args (Cli.argument "store-path") + |> fromMaybe "" + |> Text.pack + revision = + Cli.getArg args (Cli.argument "revision") + /> Text.pack + manifest <- loadManifestFromS3 + case manifest of + Nothing -> do + Log.fail ["manifest", "no manifest found in S3"] + Exit.exitWith (Exit.ExitFailure 1) + Just m -> case updateService name storePath' revision m of + Left err -> do + Log.fail ["manifest", err] + Exit.exitWith (Exit.ExitFailure 1) + Right updated -> do + saveManifestToS3 updated + Log.good ["manifest", "updated", name, "->", storePath'] + | args `Cli.has` Cli.command "add-service" = do + let jsonStr = + Cli.getArg args (Cli.argument "json") + |> fromMaybe "" + case Aeson.eitherDecode (BL.fromStrict <| TE.encodeUtf8 <| Text.pack jsonStr) of + Left err -> do + Log.fail ["manifest", "invalid JSON:", Text.pack err] + Exit.exitWith (Exit.ExitFailure 1) + Right svc -> do + manifest <- loadManifestFromS3 + m <- maybe createEmptyManifest pure manifest + case findService (serviceName svc) m of + Just _ -> do + Log.fail ["manifest", "service already exists:", serviceName svc] + Exit.exitWith (Exit.ExitFailure 1) + Nothing -> do + let newManifest = m {manifestServices = manifestServices m ++ [svc]} + saveManifestToS3 newManifest + Log.good ["manifest", "added service", serviceName svc] + | args `Cli.has` Cli.command "list" = do + archives <- listArchivedManifests + if null archives + then putStrLn ("no archived manifests found" :: String) + else + forM_ archives <| \archive -> do + putStrLn <| Text.unpack archive + | args `Cli.has` Cli.command "rollback" = do + let archive = + Cli.getArg args (Cli.argument "archive") + |> fromMaybe "" + |> Text.pack + success <- rollbackToManifest archive + if success + then Log.good ["manifest", "rolled back to", archive] + else do + Log.fail ["manifest", "failed to rollback to", archive] + Exit.exitWith (Exit.ExitFailure 1) + | otherwise = do + Log.fail ["manifest", "unknown command"] + Exit.exitWith (Exit.ExitFailure 1) + +test :: Test.Tree +test = + Test.group + "Omni.Deploy.Manifest" + [ test_artifactDefaults, + test_serviceDefaults, + test_manifestJsonRoundtrip, + test_updateService, + test_findService + ] + +test_artifactDefaults :: Test.Tree +test_artifactDefaults = + Test.unit "artifact defaults type to nix-closure" <| do + let json = "{\"storePath\": \"/nix/store/abc123\"}" + case Aeson.eitherDecode json of + Left err -> Test.assertFailure err + Right (artifact :: Artifact) -> + artifactType artifact Test.@=? "nix-closure" + +test_serviceDefaults :: Test.Tree +test_serviceDefaults = + Test.unit "service has correct defaults" <| do + let json = "{\"name\": \"test-svc\", \"artifact\": {\"storePath\": \"/nix/store/xyz\"}}" + case Aeson.eitherDecode json of + Left err -> Test.assertFailure err + Right (svc :: Service) -> do + serviceHosts svc Test.@=? ["biz"] + execUser (serviceExec svc) Test.@=? "root" + systemdRestart (serviceSystemd svc) Test.@=? "on-failure" + hardeningPrivateTmp (serviceHardening svc) Test.@=? True + +test_manifestJsonRoundtrip :: Test.Tree +test_manifestJsonRoundtrip = + Test.unit "manifest JSON roundtrip" <| do + now <- getCurrentTime + let manifest = + Manifest + { manifestVersion = 1, + manifestGeneration = now, + manifestServices = + [ Service + { serviceName = "test-svc", + serviceArtifact = Artifact "nix-closure" "/nix/store/abc123", + serviceHosts = ["biz"], + serviceExec = defaultExec, + serviceEnv = mempty, + serviceEnvFile = Nothing, + serviceHttp = Just (Http "example.com" "/" 8000), + serviceSystemd = defaultSystemd, + serviceHardening = defaultHardening, + serviceRevision = Nothing + } + ] + } + encoded = Aeson.encode manifest + case Aeson.eitherDecode encoded of + Left err -> Test.assertFailure err + Right decoded -> do + length (manifestServices decoded) Test.@=? 1 + case head <| manifestServices decoded of + Nothing -> Test.assertFailure "no services" + Just svc -> serviceName svc Test.@=? "test-svc" + +test_updateService :: Test.Tree +test_updateService = + Test.unit "updateService updates store path" <| do + now <- getCurrentTime + let manifest = + Manifest + { manifestVersion = 1, + manifestGeneration = now, + manifestServices = + [ Service + "svc-a" + (Artifact "nix-closure" "/nix/store/old") + ["biz"] + defaultExec + mempty + Nothing + Nothing + defaultSystemd + defaultHardening + Nothing, + Service + "svc-b" + (Artifact "nix-closure" "/nix/store/other") + ["biz"] + defaultExec + mempty + Nothing + Nothing + defaultSystemd + defaultHardening + Nothing + ] + } + case updateService "svc-a" "/nix/store/new" (Just "abc123") manifest of + Left err -> Test.assertFailure (Text.unpack err) + Right updated -> case head <| manifestServices updated of + Nothing -> Test.assertFailure "no services" + Just svcA -> do + storePath (serviceArtifact svcA) Test.@=? "/nix/store/new" + serviceRevision svcA Test.@=? Just "abc123" + +test_findService :: Test.Tree +test_findService = + Test.unit "findService finds service by name" <| do + now <- getCurrentTime + let manifest = + Manifest + { manifestVersion = 1, + manifestGeneration = now, + manifestServices = + [ Service + "svc-a" + (Artifact "nix-closure" "/nix/store/a") + ["biz"] + defaultExec + mempty + Nothing + Nothing + defaultSystemd + defaultHardening + Nothing + ] + } + case findService "svc-a" manifest of + Nothing -> Test.assertFailure "service not found" + Just svc -> serviceName svc Test.@=? "svc-a" + case findService "nonexistent" manifest of + Nothing -> pure () + Just _ -> Test.assertFailure "found nonexistent service" + +main :: IO () +main = Cli.main <| Cli.Plan help move test pure diff --git a/Omni/Deploy/PLAN.md b/Omni/Deploy/PLAN.md new file mode 100644 index 0000000..1870ebd --- /dev/null +++ b/Omni/Deploy/PLAN.md @@ -0,0 +1,299 @@ +# Mini-PaaS Deployment System + +## Overview + +A pull-based deployment system that allows deploying Nix-built services without full NixOS rebuilds. Services are defined in a manifest, pulled from an S3 binary cache, and managed as systemd units with Caddy for reverse proxying. + +## Problem Statement + +Current deployment (`push.sh` + full NixOS rebuild) is slow and heavyweight: +- Every service change requires rebuilding the entire NixOS configuration +- Adding a new service requires modifying Biz.nix and doing a full rebuild +- Deploy time from "code ready" to "running in prod" is too long + +## Goals + +1. **Fast deploys**: Update a single service in <5 minutes without touching others +2. **Independent services**: Deploy services without NixOS rebuild +3. **Add services dynamically**: New services via manifest, no NixOS changes needed +4. **Maintain NixOS for base OS**: Keep NixOS for infra (Postgres, SSH, firewall) +5. **Clear scale-up path**: Single host now, easy migration to Nomad later + +## Key Design Decisions + +1. **Nix closures, not Docker**: Deploy Nix store paths directly, not containers. Simpler, no Docker daemon needed. Use systemd hardening for isolation. + +2. **Pull-based, not push-based**: Target host polls S3 for manifest changes every 5 min. No SSH needed for deploys, just update manifest. + +3. **Caddy, not nginx**: Caddy has admin API for dynamic route updates and automatic HTTPS. No config file regeneration needed. + +4. **Separation of concerns**: + - `bild`: Build tool, adds `--cache` flag to sign+push closures + - `push.sh`: Deploy orchestrator, handles both NixOS and service deploys + - `deployer`: Runs on target, polls manifest, manages services + +5. **Out-of-band secrets**: Secrets stored in `/var/lib/biz-secrets/*.env`, manifest only references paths. No secrets in S3. + +6. **Nix profiles for rollback**: Each service gets a Nix profile, enabling `nix-env --rollback`. + +## Relevant Existing Files + +- `Omni/Bild.hs` - Build tool, modify to add `--cache` flag +- `Omni/Bild.nix` - Nix build library, has `bild.run` for building packages +- `Omni/Ide/push.sh` - Current deploy script, enhance for service deploys +- `Biz.nix` - Current NixOS config for biz host +- `Biz/Packages.nix` - Builds all Biz packages +- `Biz/PodcastItLater/Web.nix` - Example NixOS service module (to be replaced) +- `Biz/PodcastItLater/Web.py` - Example Python service (deploy target) +- `Omni/Os/Base.nix` - Base NixOS config, add S3 substituter here + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ DEV MACHINE │ +│ │ +│ ┌─────────────────────────────────────────────────────────────────────┐ │ +│ │ push.sh <target> │ │ +│ │ │ │ +│ │ if target.nix: (NixOS deploy - existing behavior) │ │ +│ │ bild <target> │ │ +│ │ nix copy --to ssh://host │ │ +│ │ ssh host switch-to-configuration │ │ +│ │ │ │ +│ │ else: (Service deploy - new behavior) │ │ +│ │ bild <target> --cache ──▶ sign + push closure to S3 │ │ +│ │ update manifest.json in S3 with new storePath │ │ +│ │ (deployer on target will pick up changes) │ │ +│ └─────────────────────────────────────────────────────────────────────┘ │ +│ │ +│ Separation of concerns: │ +│ - bild: Build + sign + push to S3 cache (--cache flag) │ +│ - push.sh: Orchestrates deploy, updates manifest, handles both modes │ +└─────────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ DO SPACES (S3 BINARY CACHE) - PRIVATE │ +│ │ +│ /nar/*.nar.xz ← Compressed Nix store paths │ +│ /*.narinfo ← Metadata + signatures │ +│ /nix-cache-info ← Cache metadata │ +│ /manifest.json ← Current deployment state │ +│ /manifests/ ← Historical manifests for rollback │ +│ manifest-<ts>.json │ +│ │ +│ Authentication: AWS credentials (Spaces access key) │ +│ - Dev machine: write access for pushing │ +│ - Target host: read access for pulling │ +└─────────────────────────────────────────────────────────────────────────────┘ + │ + poll every 5 min + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ TARGET HOST (biz) │ +│ │ +│ ┌──────────────────────────────────────────────────────────────────────┐ │ +│ │ biz-deployer │ │ +│ │ (Python systemd service, runs every 5 min via timer) │ │ +│ │ │ │ +│ │ 1. Fetch manifest.json from S3 │ │ +│ │ 2. Compare to local state │ │ +│ │ 3. For changed services: │ │ +│ │ - nix copy --from s3://... <storePath> │ │ +│ │ - Generate systemd unit file │ │ +│ │ - Create GC root │ │ +│ │ - systemctl daemon-reload && restart │ │ +│ │ 4. Update Caddy routes via API │ │ +│ │ 5. Save local state │ │ +│ └──────────────────────────────────────────────────────────────────────┘ │ +│ │ +│ Directories: │ +│ - /var/lib/biz-deployer/services/*.service (generated units) │ +│ - /var/lib/biz-deployer/state.json (local state) │ +│ - /var/lib/biz-secrets/*.env (secret env files) │ +│ - /nix/var/nix/gcroots/biz/* (GC roots) │ +│ │ +│ NixOS manages: │ +│ - Base OS, SSH, firewall │ +│ - Caddy with admin API enabled │ +│ - PostgreSQL, Redis (infra services) │ +│ - biz-deployer service itself │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +## Components + +### 1. S3 Binary Cache (DO Spaces) + +**Bucket**: `omni-nix-cache` (private) +**Region**: `nyc3` (or nearest) + +**Credentials**: +- Dev machine: `~/.aws/credentials` with `[digitalocean]` profile +- Target host: `/root/.aws/credentials` with same profile + +**Signing key**: +- Generate: `nix-store --generate-binary-cache-key omni-cache cache-priv-key.pem cache-pub-key.pem` +- Private key: `~/.config/nix/cache-priv-key.pem` (dev machine only) +- Public key: Added to target's `nix.settings.trusted-public-keys` + +**S3 URL format**: +``` +s3://omni-nix-cache?profile=digitalocean&scheme=https&endpoint=nyc3.digitaloceanspaces.com +``` + +### 2. Manifest Schema (v1) + +```json +{ + "version": 1, + "generation": "2025-01-15T12:34:56Z", + "services": [ + { + "name": "podcastitlater-web", + "artifact": { + "type": "nix-closure", + "storePath": "/nix/store/abc123-podcastitlater-web-1.2.3" + }, + "hosts": ["biz"], + "exec": { + "command": "podcastitlater-web", + "user": "pil-web", + "group": "pil" + }, + "env": { + "PORT": "8000", + "AREA": "Live", + "DATA_DIR": "/var/podcastitlater", + "BASE_URL": "https://podcastitlater.com" + }, + "envFile": "/var/lib/biz-secrets/podcastitlater-web.env", + "http": { + "domain": "podcastitlater.com", + "path": "/", + "internalPort": 8000 + }, + "systemd": { + "after": ["network-online.target", "postgresql.service"], + "requires": [], + "restart": "on-failure", + "restartSec": 5 + }, + "hardening": { + "dynamicUser": false, + "privateTmp": true, + "protectSystem": "strict", + "protectHome": true + }, + "revision": "abc123def" + } + ] +} +``` + +### 3. Deployer Service (Omni/Deploy/Deployer.py) + +Python service that: +- Polls manifest from S3 +- Pulls Nix closures +- Generates systemd units +- Updates Caddy via API +- Manages GC roots +- Tracks local state + +### 4. NixOS Module (Omni/Deploy/Deployer.nix) + +Configures: +- biz-deployer systemd service + timer +- Caddy with admin API +- S3 substituter configuration +- Required directories and permissions + +### 5. Bild Integration (Omni/Bild.hs) + +New `--cache` flag that: +1. Builds the target +2. Signs the closure with cache key (using NIX_CACHE_KEY env var) +3. Pushes to S3 cache +4. Outputs the store path for push.sh to use + +Does NOT update manifest - that's push.sh's responsibility. + +### 6. Push.sh Enhancement (Omni/Ide/push.sh) + +Detect deploy mode from target extension: +- `.nix` → NixOS deploy (existing behavior) +- `.py`, `.hs`, etc. → Service deploy (new behavior) + +For service deploys: +1. Call `bild <target> --cache` +2. Capture store path from bild output +3. Fetch current manifest.json from S3 +4. Archive current manifest to manifests/manifest-<timestamp>.json +5. Update manifest with new storePath for this service +6. Upload new manifest.json to S3 +7. Deployer on target picks up change within 5 minutes + +## Migration Path + +### Phase 1: Infrastructure Setup +1. Create DO Spaces bucket +2. Generate signing keys +3. Configure S3 substituter on target +4. Deploy base deployer service (empty manifest) + +### Phase 2: Migrate First Service +1. Choose non-critical service (e.g., podcastitlater-worker) +2. Add to manifest with different port +3. Verify via staging route +4. Flip Caddy to new service +5. Disable old NixOS-managed service + +### Phase 3: Migrate Remaining Services +- Repeat Phase 2 for each service +- Order: worker → web → storybook + +### Phase 4: Cleanup +- Remove service-specific NixOS modules +- Simplify Biz.nix to base OS only + +## Rollback Strategy + +1. Each deploy archives current manifest to `/manifests/manifest-<ts>.json` +2. Rollback = copy old manifest back to `manifest.json` +3. Deployer sees new generation, converges to old state +4. GC roots keep old closures alive (last 5 versions per service) + +## Scale-up Path + +| Stage | Hosts | Changes | +|-------|-------|---------| +| Current | 1 | Full architecture as described | +| 2-3 hosts | 2-3 | Add `hosts` filtering, each host runs deployer | +| 4+ hosts | 4+ | Consider Nomad with nix-nomad for job definitions | + +## Security Considerations + +- S3 bucket is private (authenticated reads/writes) +- Signing key never leaves dev machine +- Secrets stored out-of-band in `/var/lib/biz-secrets/` +- systemd hardening for service isolation +- Deployer validates manifest schema before applying + +## File Locations + +``` +Omni/ + Deploy/ + PLAN.md # This document + Deployer.py # Main deployer service + Deployer.nix # NixOS module + Manifest.py # Manifest schema/validation + Systemd.py # Unit file generation + Caddy.py # Caddy API integration + S3.py # S3 operations (for deployer) + Bild.hs # Add --cache flag for sign+push + Ide/ + push.sh # Enhanced: NixOS deploy OR service deploy + manifest update +``` diff --git a/Omni/Deploy/Packages.nix b/Omni/Deploy/Packages.nix new file mode 100644 index 0000000..4cc42e9 --- /dev/null +++ b/Omni/Deploy/Packages.nix @@ -0,0 +1,11 @@ +# Build all deployer packages independently, outside NixOS context. +# +# Usage: +# nix-build Omni/Deploy/Packages.nix # builds all packages +# nix-build Omni/Deploy/Packages.nix -A biz-deployer # builds one package +{bild ? import ../Bild.nix {}}: { + biz-deployer = bild.run ./Deployer.hs; + deploy-manifest = bild.run ./Manifest.hs; + deploy-systemd = bild.run ./Systemd.hs; + deploy-caddy = bild.run ./Caddy.hs; +} diff --git a/Omni/Deploy/README.md b/Omni/Deploy/README.md new file mode 100644 index 0000000..cabad43 --- /dev/null +++ b/Omni/Deploy/README.md @@ -0,0 +1,211 @@ +# Mini-PaaS Deployment System + +A pull-based deployment system for deploying Nix-built services without full NixOS rebuilds. + +## Quick Start + +### Deploy a Service + +```bash +# Build, cache to S3, and update manifest +Omni/Ide/push.sh Biz/PodcastItLater/Web.py + +# The deployer on the target host polls every 5 minutes +# To force immediate deployment, SSH to host and run: +ssh biz sudo systemctl start biz-deployer +``` + +### View Current State + +```bash +# Show current manifest +deploy-manifest show + +# List archived manifests (for rollback) +deploy-manifest list + +# Check deployer status on target +ssh biz sudo systemctl status biz-deployer +ssh biz cat /var/lib/biz-deployer/state.json +``` + +## Deployment Workflow + +``` +Developer Machine S3 Cache Target Host (biz) + │ │ │ + │ push.sh Biz/App.py │ │ + ├───────────────────────────────►│ │ + │ 1. bild builds + caches │ │ + │ 2. deploy-manifest update │ │ + │ │ poll every 5 min │ + │ │◄─────────────────────────────┤ + │ │ │ + │ │ manifest changed? │ + │ │ - pull closure │ + │ │ - generate systemd unit │ + │ │ - update Caddy route │ + │ │ - restart service │ + │ │─────────────────────────────►│ + │ │ │ +``` + +## Adding a New Service + +### 1. Create the Service Definition + +```bash +deploy-manifest add-service '{ + "name": "my-new-service", + "artifact": {"storePath": "/nix/store/placeholder"}, + "hosts": ["biz"], + "exec": {"command": null, "user": "root", "group": "root"}, + "env": {"PORT": "8080", "AREA": "Live"}, + "envFile": "/var/lib/biz-secrets/my-new-service.env", + "http": {"domain": "myservice.bensima.com", "path": "/", "internalPort": 8080} +}' +``` + +### 2. Create Secrets File on Target + +```bash +ssh biz +sudo mkdir -p /var/lib/biz-secrets +sudo tee /var/lib/biz-secrets/my-new-service.env << 'EOF' +SECRET_KEY=your-secret-here +DATABASE_URL=postgres://... +EOF +sudo chmod 600 /var/lib/biz-secrets/my-new-service.env +``` + +### 3. Deploy the Service + +```bash +Omni/Ide/push.sh Biz/MyService.py +``` + +## Secrets Management + +Secrets are stored out-of-band on the target host, never in S3 or the manifest. + +**Location**: `/var/lib/biz-secrets/<service-name>.env` + +**Format**: Standard environment file +``` +SECRET_KEY=abc123 +DATABASE_URL=postgres://user:pass@localhost/db +STRIPE_API_KEY=sk_live_... +``` + +**Permissions**: `chmod 600`, owned by root + +**Referencing in manifest**: Set `envFile` field to the path + +## Rollback + +### List Available Versions + +```bash +deploy-manifest list +# Output: +# manifest-20251216T033000Z.json +# manifest-20251216T045211.json +# manifest-20251215T120000Z.json +``` + +### Rollback to Previous Version + +```bash +# Restore a specific archived manifest +deploy-manifest rollback manifest-20251215T120000Z.json + +# Force immediate deployment +ssh biz sudo systemctl start biz-deployer +``` + +The rollback archives the current manifest before restoring, so you can always rollback the rollback. + +## Troubleshooting + +### Service Not Starting + +```bash +# Check deployer logs +ssh biz sudo journalctl -u biz-deployer -f + +# Check service logs +ssh biz sudo journalctl -u <service-name> -f + +# Check deployer state +ssh biz cat /var/lib/biz-deployer/state.json +``` + +### Manifest Update Failed + +```bash +# Verify AWS credentials +aws s3 ls s3://omni-nix-cache/ --endpoint-url https://nyc3.digitaloceanspaces.com --profile digitalocean + +# Check manifest exists +deploy-manifest show +``` + +### Closure Not Pulling + +```bash +# Check if store path exists in cache +aws s3 ls s3://omni-nix-cache/<hash>.narinfo --endpoint-url https://nyc3.digitaloceanspaces.com --profile digitalocean + +# Check target can access cache +ssh biz nix copy --from 's3://omni-nix-cache?profile=digitalocean&scheme=https&endpoint=nyc3.digitaloceanspaces.com' /nix/store/<path> +``` + +### Caddy Route Issues + +```bash +# Check Caddy config +ssh biz curl -s localhost:2019/config/ | jq . + +# Check Caddy logs +ssh biz sudo journalctl -u caddy -f +``` + +## Architecture + +| Component | Location | Purpose | +|-----------|----------|---------| +| `bild` | Dev machine | Build tool, caches to S3 | +| `push.sh` | Dev machine | Orchestrates deploys | +| `deploy-manifest` | Dev machine | Manage manifest in S3 | +| `biz-deployer` | Target host | Polls manifest, deploys services | +| Caddy | Target host | Reverse proxy with auto-HTTPS | + +### File Locations on Target + +| Path | Purpose | +|------|---------| +| `/var/lib/biz-deployer/state.json` | Local deployer state | +| `/var/lib/biz-deployer/services/` | Generated systemd units | +| `/var/lib/biz-secrets/` | Service secret env files | +| `/nix/var/nix/gcroots/biz/` | GC roots for deployed closures | +| `/root/.aws/credentials` | S3 credentials | + +## Scale-Up Path + +| Stage | Hosts | Changes Needed | +|-------|-------|----------------| +| Current | 1 | Full architecture as described | +| 2-3 hosts | 2-3 | Add `hosts` filtering (already supported) | +| 4+ hosts | 4+ | Consider migrating to Nomad + nix-nomad | + +The manifest already supports multi-host deployments via the `hosts` array. Each host runs its own deployer and only deploys services where its hostname appears in the `hosts` list. + +## Related Files + +- [Omni/Deploy/Manifest.hs](Manifest.hs) - Manifest CLI and schema +- [Omni/Deploy/Deployer.hs](Deployer.hs) - Deployer service +- [Omni/Deploy/Deployer.nix](Deployer.nix) - NixOS module +- [Omni/Deploy/Systemd.hs](Systemd.hs) - Systemd unit generation +- [Omni/Deploy/Caddy.hs](Caddy.hs) - Caddy API integration +- [Omni/Ide/push.sh](../Ide/push.sh) - Deploy script +- [Omni/Deploy/PLAN.md](PLAN.md) - Original design document diff --git a/Omni/Deploy/Systemd.hs b/Omni/Deploy/Systemd.hs new file mode 100644 index 0000000..7b64d1f --- /dev/null +++ b/Omni/Deploy/Systemd.hs @@ -0,0 +1,269 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE NoImplicitPrelude #-} + +-- | Systemd unit file generator for the mini-PaaS deployment system. +-- +-- : out deploy-systemd +-- : dep directory +module Omni.Deploy.Systemd + ( generateUnit, + writeUnit, + createSymlink, + reloadAndRestart, + stopAndDisable, + removeUnit, + getRunningStorePath, + servicesDir, + main, + test, + ) +where + +import Alpha +import qualified Data.Map as Map +import qualified Data.Text as Text +import qualified Data.Text.IO as Text.IO +import Omni.Deploy.Manifest (Artifact (..), Exec (..), Hardening (..), Service (..), Systemd (..)) +import qualified Omni.Test as Test +import qualified System.Directory as Dir +import System.FilePath ((</>)) +import qualified System.Process as Process + +servicesDir :: FilePath +servicesDir = "/var/lib/biz-deployer/services" + +generateUnit :: Service -> Text +generateUnit Service {..} = + Text.unlines <| unitSection ++ serviceSection ++ hardeningSection ++ installSection + where + binary = fromMaybe serviceName (execCommand serviceExec) + execStart = storePath serviceArtifact <> "/bin/" <> binary + + unitSection = + [ "[Unit]", + "Description=" <> serviceName, + "After=" <> Text.intercalate " " (systemdAfter serviceSystemd) + ] + ++ requiresLine + + requiresLine = + ["Requires=" <> Text.intercalate " " (systemdRequires serviceSystemd) | not (null (systemdRequires serviceSystemd))] + + serviceSection = + [ "", + "[Service]", + "Type=simple", + "ExecStart=" <> execStart, + "User=" <> execUser serviceExec, + "Group=" <> execGroup serviceExec, + "Restart=" <> systemdRestart serviceSystemd, + "RestartSec=" <> tshow (systemdRestartSec serviceSystemd) + ] + ++ envLines + ++ envFileLine + + envLines = + Map.toList serviceEnv + |> map (\(k, v) -> "Environment=\"" <> k <> "=" <> v <> "\"") + + envFileLine = case serviceEnvFile of + Nothing -> [] + Just path -> ["EnvironmentFile=" <> path] + + hardeningSection = + [ "", + "# Hardening", + "PrivateTmp=" <> boolToYesNo (hardeningPrivateTmp serviceHardening), + "ProtectSystem=" <> hardeningProtectSystem serviceHardening, + "ProtectHome=" <> boolToYesNo (hardeningProtectHome serviceHardening), + "NoNewPrivileges=yes" + ] + ++ readWritePathsLine + + readWritePathsLine = + case Map.lookup "DATA_DIR" serviceEnv of + Just dataDir -> ["ReadWritePaths=" <> dataDir] + Nothing -> [] + + installSection = + [ "", + "[Install]", + "WantedBy=multi-user.target" + ] + + boolToYesNo True = "yes" + boolToYesNo False = "no" + +writeUnit :: FilePath -> Service -> IO FilePath +writeUnit baseDir svc = do + Dir.createDirectoryIfMissing True baseDir + let path = baseDir </> Text.unpack (serviceName svc) <> ".service" + content = generateUnit svc + Text.IO.writeFile path content + pure path + +createSymlink :: FilePath -> FilePath -> Service -> IO FilePath +createSymlink baseDir sysDir svc = do + let unitPath = baseDir </> Text.unpack (serviceName svc) <> ".service" + linkPath = sysDir </> Text.unpack (serviceName svc) <> ".service" + exists <- Dir.doesPathExist linkPath + when exists <| Dir.removeFile linkPath + Dir.createFileLink unitPath linkPath + pure linkPath + +reloadAndRestart :: Text -> IO () +reloadAndRestart serviceName' = do + _ <- Process.readProcessWithExitCode "systemctl" ["daemon-reload"] "" + _ <- + Process.readProcessWithExitCode + "systemctl" + ["enable", "--now", Text.unpack serviceName' <> ".service"] + "" + pure () + +stopAndDisable :: Text -> IO () +stopAndDisable serviceName' = do + _ <- + Process.readProcessWithExitCode + "systemctl" + ["disable", "--now", Text.unpack serviceName' <> ".service"] + "" + pure () + +removeUnit :: FilePath -> FilePath -> Text -> IO () +removeUnit baseDir sysDir serviceName' = do + let unitPath = baseDir </> Text.unpack serviceName' <> ".service" + linkPath = sysDir </> Text.unpack serviceName' <> ".service" + linkExists <- Dir.doesPathExist linkPath + when linkExists <| Dir.removeFile linkPath + unitExists <- Dir.doesPathExist unitPath + when unitExists <| Dir.removeFile unitPath + _ <- Process.readProcessWithExitCode "systemctl" ["daemon-reload"] "" + pure () + +-- | Get the store path of the currently running service by reading its unit file. +getRunningStorePath :: Text -> IO (Maybe Text) +getRunningStorePath serviceName' = do + let unitPath = servicesDir </> Text.unpack serviceName' <> ".service" + exists <- Dir.doesFileExist unitPath + if not exists + then pure Nothing + else do + content <- Text.IO.readFile unitPath + pure <| extractStorePath content + where + -- Extract /nix/store/...-service-name from ExecStart=/nix/store/.../bin/... + extractStorePath content = + content + |> Text.lines + |> find (Text.isPrefixOf "ExecStart=") + |> fmap (Text.drop (Text.length "ExecStart=")) + |> fmap (Text.dropWhile (/= '/')) + |> fmap (Text.drop 1) + |> fmap (Text.takeWhile (/= '/')) + |> fmap ("/nix/store/" <>) + +test :: Test.Tree +test = + Test.group + "Omni.Deploy.Systemd" + [ test_generateBasicUnit, + test_generateUnitWithEnv, + test_generateUnitWithCustomExec, + test_generateUnitWithEnvFile, + test_generateUnitWithDependencies, + test_generateUnitWithHardening + ] + +mkTestService :: Text -> Text -> Service +mkTestService name path = + Service + { serviceName = name, + serviceArtifact = Artifact "nix-closure" path, + serviceHosts = ["biz"], + serviceExec = Exec Nothing "root" "root", + serviceEnv = mempty, + serviceEnvFile = Nothing, + serviceHttp = Nothing, + serviceSystemd = Systemd ["network-online.target"] [] "on-failure" 5, + serviceHardening = Hardening False True "strict" True, + serviceRevision = Nothing + } + +test_generateBasicUnit :: Test.Tree +test_generateBasicUnit = + Test.unit "generates basic unit file" <| do + let svc = mkTestService "test-service" "/nix/store/abc123-test" + unit = generateUnit svc + Text.isInfixOf "[Unit]" unit Test.@=? True + Text.isInfixOf "Description=test-service" unit Test.@=? True + Text.isInfixOf "[Service]" unit Test.@=? True + Text.isInfixOf "ExecStart=/nix/store/abc123-test/bin/test-service" unit Test.@=? True + Text.isInfixOf "[Install]" unit Test.@=? True + Text.isInfixOf "WantedBy=multi-user.target" unit Test.@=? True + +test_generateUnitWithEnv :: Test.Tree +test_generateUnitWithEnv = + Test.unit "generates unit with environment" <| do + let svc = + (mkTestService "env-test" "/nix/store/xyz") + { serviceEnv = Map.fromList [("PORT", "8000"), ("DEBUG", "true")] + } + unit = generateUnit svc + Text.isInfixOf "Environment=\"PORT=8000\"" unit Test.@=? True + Text.isInfixOf "Environment=\"DEBUG=true\"" unit Test.@=? True + +test_generateUnitWithCustomExec :: Test.Tree +test_generateUnitWithCustomExec = + Test.unit "generates unit with custom exec" <| do + let svc = + (mkTestService "custom-exec" "/nix/store/abc") + { serviceExec = Exec (Just "my-binary") "www-data" "www-data" + } + unit = generateUnit svc + Text.isInfixOf "ExecStart=/nix/store/abc/bin/my-binary" unit Test.@=? True + Text.isInfixOf "User=www-data" unit Test.@=? True + Text.isInfixOf "Group=www-data" unit Test.@=? True + +test_generateUnitWithEnvFile :: Test.Tree +test_generateUnitWithEnvFile = + Test.unit "generates unit with env file" <| do + let svc = + (mkTestService "env-file-test" "/nix/store/xyz") + { serviceEnvFile = Just "/var/lib/biz-secrets/test.env" + } + unit = generateUnit svc + Text.isInfixOf "EnvironmentFile=/var/lib/biz-secrets/test.env" unit Test.@=? True + +test_generateUnitWithDependencies :: Test.Tree +test_generateUnitWithDependencies = + Test.unit "generates unit with dependencies" <| do + let svc = + (mkTestService "dep-test" "/nix/store/abc") + { serviceSystemd = + Systemd + ["network-online.target", "postgresql.service"] + ["postgresql.service"] + "on-failure" + 5 + } + unit = generateUnit svc + Text.isInfixOf "After=network-online.target postgresql.service" unit Test.@=? True + Text.isInfixOf "Requires=postgresql.service" unit Test.@=? True + +test_generateUnitWithHardening :: Test.Tree +test_generateUnitWithHardening = + Test.unit "generates unit with hardening" <| do + let svc = + (mkTestService "hardened" "/nix/store/abc") + { serviceHardening = Hardening False True "full" True + } + unit = generateUnit svc + Text.isInfixOf "PrivateTmp=yes" unit Test.@=? True + Text.isInfixOf "ProtectSystem=full" unit Test.@=? True + Text.isInfixOf "ProtectHome=yes" unit Test.@=? True + Text.isInfixOf "NoNewPrivileges=yes" unit Test.@=? True + +main :: IO () +main = Test.run test |
