Files
Dark Steveneq 646b892680
Some checks failed
Periodic Merges (6h) / master → staging-nixos (push) Failing after 12m50s
Periodic Merges (6h) / master → staging-next (push) Failing after 12m54s
Periodic Merges (24h) / merge-base(master,staging) → haskell-updates (push) Failing after 11m54s
Periodic Merges (6h) / staging-next → staging (push) Failing after 12m13s
Periodic Merges (24h) / staging-next-25.05 → staging-25.05 (push) Failing after 13m24s
Periodic Merges (24h) / release-25.05 → staging-next-25.05 (push) Failing after 14m28s
push sheeet
2025-10-09 14:15:47 +02:00

102 lines
2.8 KiB
Nix

{
lib,
makeWrapper,
tesseractBase,
languages,
runCommand,
imagemagick,
# A list of languages like [ "eng" "spa" … ] or `null` for all available languages
enableLanguages ? null,
# A list of files or a directory containing files
tessdata ? (
if enableLanguages == null then languages.all else map (lang: languages.${lang}) enableLanguages
),
# This argument is obsolete
enableLanguagesHash ? null,
}:
let
tesseractWithData = tesseractBase.overrideAttrs (_: {
inherit tesseractBase tessdata;
nativeBuildInputs = [ makeWrapper ];
buildPhase = ''
makeWrapper {$tesseractBase,$out}/bin/tesseract --set-default TESSDATA_PREFIX $out/share/tessdata
# Recursively link include, share
cp -rs --no-preserve=mode $tesseractBase/{include,share} $out
cp -r --no-preserve=mode $tesseractBase/lib $out
# Fixup the store paths in lib so that the tessdata from this derivation is used.
if (( ''${#tesseractBase} != ''${#out} )); then
echo "Can't replace store paths due to differing lengths"
exit 1
fi
find $out/lib -type f -exec sed -i "s|$tesseractBase|$out|g" {} \;
if [[ -d "$tessdata" ]]; then
ln -s $tessdata/* $out/share/tessdata
else
for lang in $tessdata; do
ln -s $lang $out/share/tessdata/''${lang#/nix/store*-}
done
fi
if [[ ! -e $out/share/tessdata/eng.traineddata ]]; then
# This is a bug in Tesseract's internal tessdata discovery mechanism
echo "eng.traineddata must be present in tessdata for Tesseract to work"
exit 1
fi
'';
});
passthru = { inherit tesseractBase languages tessdata; };
# Only run test when all languages are available
test =
runCommand "tesseract-test-ocr"
{
buildInputs = [
tesseractWithData
imagemagick
];
}
''
text="hello nix"
convert -size 400x40 xc:white -font 'DejaVu-Sans' -pointsize 20 \
-fill black -annotate +5+20 "$text" /tmp/test-img.png 2>/dev/null
ocrResult=$(tesseract /tmp/test-img.png - | tr -d "\f")
if [[ $ocrResult != $text ]]; then
echo "OCR test failed"
echo "expected: '$text'"
echo "actual: '$ocrResult'"
exit 1
fi
touch $out
'';
tesseract =
(if enableLanguages == [ ] then tesseractBase else tesseractWithData).overrideAttrs
(old: {
passthru =
(old.passthru or { })
// passthru
// lib.optionalAttrs (enableLanguages == null) {
tests = (old.passthru.tests or { }) // {
default = test;
};
};
});
in
if enableLanguagesHash == null then
tesseract
else
lib.warn "Argument `enableLanguagesHash` is obsolete and can be removed." tesseract