push sheeet
Some checks failed
Periodic Merges (6h) / master → staging-nixos (push) Failing after 12m50s
Periodic Merges (6h) / master → staging-next (push) Failing after 12m54s
Periodic Merges (24h) / merge-base(master,staging) → haskell-updates (push) Failing after 11m54s
Periodic Merges (6h) / staging-next → staging (push) Failing after 12m13s
Periodic Merges (24h) / staging-next-25.05 → staging-25.05 (push) Failing after 13m24s
Periodic Merges (24h) / release-25.05 → staging-next-25.05 (push) Failing after 14m28s

This commit is contained in:
Dark Steveneq
2025-10-09 14:15:47 +02:00
commit 646b892680
49168 changed files with 5897842 additions and 0 deletions

View File

@@ -0,0 +1,115 @@
{
lib,
buildPythonPackage,
deprecation,
fetchFromGitHub,
ghostscript_headless,
hatch-vcs,
hatchling,
hypothesis,
img2pdf,
jbig2enc,
packaging,
pdfminer-six,
pillow-heif,
pikepdf,
pillow,
pluggy,
pngquant,
pytest-xdist,
pytestCheckHook,
rich,
reportlab,
replaceVars,
tesseract,
unpaper,
installShellFiles,
}:
buildPythonPackage rec {
pname = "ocrmypdf";
version = "16.11.0";
pyproject = true;
src = fetchFromGitHub {
owner = "ocrmypdf";
repo = "OCRmyPDF";
rev = "v${version}";
# The content of .git_archival.txt is substituted upon tarball creation,
# which creates indeterminism if master no longer points to the tag.
# See https://github.com/ocrmypdf/OCRmyPDF/issues/841
postFetch = ''
rm "$out/.git_archival.txt"
'';
hash = "sha256-seylNBl29+QxN+3SbgRUdtTo1JwvW1sODpsz7Gwer3E=";
};
patches = [
./use-pillow-heif.patch
(replaceVars ./paths.patch {
gs = lib.getExe ghostscript_headless;
jbig2 = lib.getExe jbig2enc;
pngquant = lib.getExe pngquant;
tesseract = lib.getExe tesseract;
unpaper = lib.getExe unpaper;
})
];
build-system = [
hatch-vcs
hatchling
];
nativeBuildInputs = [ installShellFiles ];
dependencies = [
deprecation
img2pdf
packaging
pdfminer-six
pillow-heif
pikepdf
pillow
pluggy
rich
];
nativeCheckInputs = [
hypothesis
pytest-xdist
pytestCheckHook
reportlab
];
pythonImportsCheck = [ "ocrmypdf" ];
disabledTests = [
# Broken by Python 3.13.4 change
# https://github.com/python/cpython/commit/8e923f36596370aedfdfb12251447bface41317a
# https://github.com/ocrmypdf/OCRmyPDF/blob/9f6e5a48ada5df7006a8c68b84e2aeae61943d8b/src/ocrmypdf/_exec/ghostscript.py#L66
"TestDuplicateFilter"
"test_masks"
"test_content_preservation"
];
postInstall = ''
installShellCompletion --cmd ocrmypdf \
--bash misc/completion/ocrmypdf.bash \
--fish misc/completion/ocrmypdf.fish
'';
meta = with lib; {
homepage = "https://github.com/ocrmypdf/OCRmyPDF";
description = "Adds an OCR text layer to scanned PDF files, allowing them to be searched";
license = with licenses; [
mpl20
mit
];
maintainers = with maintainers; [
dotlambda
];
changelog = "https://github.com/ocrmypdf/OCRmyPDF/blob/${src.rev}/docs/release_notes.md";
mainProgram = "ocrmypdf";
};
}

View File

@@ -0,0 +1,119 @@
diff --git a/src/ocrmypdf/_exec/ghostscript.py b/src/ocrmypdf/_exec/ghostscript.py
index eaa48117..30201d97 100644
--- a/src/ocrmypdf/_exec/ghostscript.py
+++ b/src/ocrmypdf/_exec/ghostscript.py
@@ -31,7 +31,7 @@ COLOR_CONVERSION_STRATEGIES = frozenset(
]
)
# Ghostscript executable - gswin32c is not supported
-GS = 'gswin64c' if os.name == 'nt' else 'gs'
+GS = '@gs@'
log = logging.getLogger(__name__)
diff --git a/src/ocrmypdf/_exec/jbig2enc.py b/src/ocrmypdf/_exec/jbig2enc.py
index 1c6dd5fe..b689a091 100644
--- a/src/ocrmypdf/_exec/jbig2enc.py
+++ b/src/ocrmypdf/_exec/jbig2enc.py
@@ -15,7 +15,7 @@ from ocrmypdf.subprocess import get_version, run
def version() -> Version:
try:
- version = get_version('jbig2', regex=r'jbig2enc (\d+(\.\d+)*).*')
+ version = get_version('@jbig2@', regex=r'jbig2enc (\d+(\.\d+)*).*')
except CalledProcessError as e:
# TeX Live for Windows provides an incompatible jbig2.EXE which may
# be on the PATH.
@@ -33,7 +33,7 @@ def available():
def convert_group(cwd, infiles, out_prefix, threshold):
args = [
- 'jbig2',
+ '@jbig2@',
'-b',
out_prefix,
'--symbol-mode', # symbol mode (lossy)
@@ -50,7 +50,7 @@ def convert_group(cwd, infiles, out_prefix, threshold):
def convert_single(cwd, infile, outfile, threshold):
- args = ['jbig2', '--pdf', '-t', str(threshold), infile]
+ args = ['@jbig2@', '--pdf', '-t', str(threshold), infile]
with open(outfile, 'wb') as fstdout:
proc = run(args, cwd=cwd, stdout=fstdout, stderr=PIPE)
proc.check_returncode()
diff --git a/src/ocrmypdf/_exec/pngquant.py b/src/ocrmypdf/_exec/pngquant.py
index 5b8600d0..fcad771b 100644
--- a/src/ocrmypdf/_exec/pngquant.py
+++ b/src/ocrmypdf/_exec/pngquant.py
@@ -15,7 +15,7 @@ from ocrmypdf.subprocess import get_version, run
def version() -> Version:
- return Version(get_version('pngquant', regex=r'(\d+(\.\d+)*).*'))
+ return Version(get_version('@pngquant@', regex=r'(\d+(\.\d+)*).*'))
def available():
@@ -37,7 +37,7 @@ def quantize(input_file: Path, output_file: Path, quality_min: int, quality_max:
"""
with open(input_file, 'rb') as input_stream:
args = [
- 'pngquant',
+ '@pngquant@',
'--force',
'--skip-if-larger',
'--quality',
diff --git a/src/ocrmypdf/_exec/tesseract.py b/src/ocrmypdf/_exec/tesseract.py
index 102bdab8..bfef4400 100644
--- a/src/ocrmypdf/_exec/tesseract.py
+++ b/src/ocrmypdf/_exec/tesseract.py
@@ -95,7 +95,7 @@ class TesseractVersion(Version):
def version() -> Version:
- return TesseractVersion(get_version('tesseract', regex=r'tesseract\s(.+)'))
+ return TesseractVersion(get_version('@tesseract@', regex=r'tesseract\s(.+)'))
def has_thresholding() -> bool:
@@ -113,7 +113,7 @@ def get_languages() -> set[str]:
msg += output
return msg
- args_tess = ['tesseract', '--list-langs']
+ args_tess = ['@tesseract@', '--list-langs']
try:
proc = run(
args_tess,
@@ -135,7 +135,7 @@ def get_languages() -> set[str]:
def tess_base_args(langs: list[str], engine_mode: int | None) -> list[str]:
- args = ['tesseract']
+ args = ['@tesseract@']
if langs:
args.extend(['-l', '+'.join(langs)])
if engine_mode is not None:
diff --git a/src/ocrmypdf/_exec/unpaper.py b/src/ocrmypdf/_exec/unpaper.py
index a5a92f4c..6cf56eb3 100644
--- a/src/ocrmypdf/_exec/unpaper.py
+++ b/src/ocrmypdf/_exec/unpaper.py
@@ -48,7 +48,7 @@ class UnpaperImageTooLargeError(Exception):
def version() -> Version:
- return Version(get_version('unpaper', regex=r'(?m).*?(\d+(\.\d+)(\.\d+)?)'))
+ return Version(get_version('@unpaper@', regex=r'(?m).*?(\d+(\.\d+)(\.\d+)?)'))
@contextmanager
@@ -70,7 +70,7 @@ def _setup_unpaper_io(input_file: Path) -> Iterator[tuple[Path, Path, Path]]:
def run_unpaper(
input_file: Path, output_file: Path, *, dpi: DecFloat, mode_args: list[str]
) -> None:
- args_unpaper = ['unpaper', '-v', '--dpi', str(round(dpi, 6))] + mode_args
+ args_unpaper = ['@unpaper@', '-v', '--dpi', str(round(dpi, 6))] + mode_args
with _setup_unpaper_io(input_file) as (input_png, output_pnm, tmpdir):
# To prevent any shenanigans from accepting arbitrary parameters in

View File

@@ -0,0 +1,26 @@
diff --git a/pyproject.toml b/pyproject.toml
index 7d665b6a..55f2f210 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,7 +16,7 @@ dependencies = [
"img2pdf>=0.5",
"packaging>=20",
"pdfminer.six>=20220319",
- "pi-heif", # Heif image format - maintainers: if this is removed, it will NOT break
+ "pillow-heif", # Heif image format - maintainers: if this is removed, it will NOT break
"pikepdf>=8.10.1,!=9.8.0",
"Pillow>=10.0.1",
"pluggy>=1",
diff --git a/src/ocrmypdf/_pipeline.py b/src/ocrmypdf/_pipeline.py
index 90524d58..0be5a0f8 100644
--- a/src/ocrmypdf/_pipeline.py
+++ b/src/ocrmypdf/_pipeline.py
@@ -42,7 +42,7 @@ from ocrmypdf.pdfinfo import Colorspace, Encoding, PageInfo, PdfInfo
from ocrmypdf.pluginspec import OrientationConfidence
try:
- from pi_heif import register_heif_opener
+ from pillow_heif import register_heif_opener
except ImportError:
def register_heif_opener():