{ lib, makeWrapper, tesseractBase, languages, runCommand, imagemagick, # A list of languages like [ "eng" "spa" … ] or `null` for all available languages enableLanguages ? null, # A list of files or a directory containing files tessdata ? ( if enableLanguages == null then languages.all else map (lang: languages.${lang}) enableLanguages ), # This argument is obsolete enableLanguagesHash ? null, }: let tesseractWithData = tesseractBase.overrideAttrs (_: { inherit tesseractBase tessdata; nativeBuildInputs = [ makeWrapper ]; buildPhase = '' makeWrapper {$tesseractBase,$out}/bin/tesseract --set-default TESSDATA_PREFIX $out/share/tessdata # Recursively link include, share cp -rs --no-preserve=mode $tesseractBase/{include,share} $out cp -r --no-preserve=mode $tesseractBase/lib $out # Fixup the store paths in lib so that the tessdata from this derivation is used. if (( ''${#tesseractBase} != ''${#out} )); then echo "Can't replace store paths due to differing lengths" exit 1 fi find $out/lib -type f -exec sed -i "s|$tesseractBase|$out|g" {} \; if [[ -d "$tessdata" ]]; then ln -s $tessdata/* $out/share/tessdata else for lang in $tessdata; do ln -s $lang $out/share/tessdata/''${lang#/nix/store*-} done fi if [[ ! -e $out/share/tessdata/eng.traineddata ]]; then # This is a bug in Tesseract's internal tessdata discovery mechanism echo "eng.traineddata must be present in tessdata for Tesseract to work" exit 1 fi ''; }); passthru = { inherit tesseractBase languages tessdata; }; # Only run test when all languages are available test = runCommand "tesseract-test-ocr" { buildInputs = [ tesseractWithData imagemagick ]; } '' text="hello nix" convert -size 400x40 xc:white -font 'DejaVu-Sans' -pointsize 20 \ -fill black -annotate +5+20 "$text" /tmp/test-img.png 2>/dev/null ocrResult=$(tesseract /tmp/test-img.png - | tr -d "\f") if [[ $ocrResult != $text ]]; then echo "OCR test failed" echo "expected: '$text'" echo "actual: '$ocrResult'" exit 1 fi touch $out ''; tesseract = (if enableLanguages == [ ] then tesseractBase else tesseractWithData).overrideAttrs (old: { passthru = (old.passthru or { }) // passthru // lib.optionalAttrs (enableLanguages == null) { tests = (old.passthru.tests or { }) // { default = test; }; }; }); in if enableLanguagesHash == null then tesseract else lib.warn "Argument `enableLanguagesHash` is obsolete and can be removed." tesseract