{ stdenv
, sources
, python3
, cmake
, pkgconfig
, openmpi
, cudaPackages
}:
let
  llama-python = python3.withPackages (ps: with ps; [ numpy sentencepiece ]);
in stdenv.mkDerivation {
  name = "llama.cpp";
  version = sources.llama-cpp.rev;

  src = sources.llama-cpp;

  postPatch = ''
    substituteInPlace ./ggml-metal.m \
      --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
    substituteInPlace ./*.py --replace '/usr/bin/env python' '${llama-python}/bin/python'
  '';

  nativeBuildInputs = [ cmake pkgconfig ];
  buildInputs = [ openmpi cudaPackages.cudatoolkit ];

  cmakeFlags = [
    "-DLLAMA_BUILD_SERVER=ON"
    "-DLLAMA_MPI=ON"
    "-DBUILD_SHARED_LIBS=ON"
    "-DCMAKE_SKIP_BUILD_RPATH=ON"
    "-DLLAMA_CUBLAS=ON"
  ];

  postInstall = ''
    mv $out/bin/main $out/bin/llama
    mv $out/bin/server $out/bin/llama-server
  '';

  meta.mainProgram = "llama";
}