better pdf ux

2026-05-21 12:34:18 +02:00
parent 9142590c79
commit cb48735178
4 changed files with 81 additions and 81 deletions
--- a/pytorrent/services/rtorrent/files.py
+++ b/pytorrent/services/rtorrent/files.py
@@ -166,7 +166,7 @@ def _image_preview_supported(path: str) -> bool:


 def _pdf_preview_supported(path: str) -> bool:
-    # Note: PDF previews use pypdf for bounded text extraction and do not require system tools such as poppler.
+    # Note: PDF previews are rendered inline by the browser so image-heavy books keep their page layout.
    return _file_extension(path) in _PDF_PREVIEW_EXTENSIONS


@@ -330,81 +330,32 @@ def _pdf_file_preview(
    max_bytes: int = _PDF_TEXT_BYTES,
    max_pages: int = _PDF_TEXT_PAGES,
 ) -> dict:
-    # Note: PDF text extraction reads only bounded, reasonably sized PDF files and extracts a limited number of pages for modal UX.
+    # Note: The modal keeps a metadata payload here, while the frontend streams the real PDF through the existing file download route in inline mode.
    size = int(selected.get("size") or 0)
-    result = {
+    return {
        **selected,
        "kind": "pdf",
-        "parser": "pypdf",
+        "parser": "browser-pdf-viewer",
        "supported": True,
        "sample_bytes": 0,
-        "sample_limit": int(max_bytes),
-        "page_limit": int(max_pages),
+        "sample_limit": 0,
+        "page_limit": 0,
        "partial": False,
-        "summary": {},
+        "summary": {
+            "duration": None,
+            "bit_rate": human_size(size) if size else None,
+            "compression": "PDF",
+            "producer": "Browser inline preview",
+            "creation_date": None,
+        },
        "fields": [
-            {"key": "Type", "value": "PDF text preview"},
-            {"key": "Read limit", "value": human_size(max_bytes)},
-            {"key": "Page limit", "value": str(max_pages)},
+            {"key": "Type", "value": "PDF inline preview"},
+            {"key": "PDF size", "value": human_size(size)},
+            {"key": "Preview mode", "value": "Browser PDF renderer"},
        ],
        "raw": [],
        "text": "",
    }
-    if size > max_bytes:
-        result.update({
-            "too_large": True,
-            "error": f"PDF text extraction is limited to {human_size(max_bytes)}. Download the file to read the full PDF.",
-        })
-        return result
-
-    PdfReader = _pdf_imports()
-    data = _read_file_prefix(profile, remote_path, max_bytes)
-    result["sample_bytes"] = len(data)
-    try:
-        from io import BytesIO
-
-        reader = PdfReader(BytesIO(data))
-        if getattr(reader, "is_encrypted", False):
-            try:
-                reader.decrypt("")
-            except Exception:
-                result.update({"error": "This PDF is encrypted and cannot be read without a password."})
-                return result
-        pages = list(reader.pages)
-        page_count = len(pages)
-        extracted = []
-        for page_number, page in enumerate(pages[:max_pages], start=1):
-            try:
-                page_text = page.extract_text() or ""
-            except Exception as exc:
-                page_text = f"[Page {page_number}: text extraction failed: {exc}]"
-            if page_text.strip():
-                extracted.append(f"--- Page {page_number} ---\n{page_text.strip()}")
-        text = "\n\n".join(extracted).strip()
-        result.update({
-            "text": text,
-            "page_count": page_count,
-            "extracted_pages": min(page_count, max_pages),
-            "partial": page_count > max_pages,
-            "summary": {
-                "duration": None,
-                "bit_rate": human_size(size) if size else None,
-                "compression": "PDF",
-                "producer": f"{min(page_count, max_pages)} / {page_count} page(s)",
-                "creation_date": None,
-            },
-            "fields": result["fields"] + [
-                {"key": "PDF size", "value": human_size(size)},
-                {"key": "Pages", "value": str(page_count)},
-                {"key": "Extracted pages", "value": str(min(page_count, max_pages))},
-            ],
-        })
-        if not text:
-            result["error"] = "No readable text was found in the selected PDF pages. The file may be scanned or image-based."
-        return result
-    except Exception as exc:
-        result.update({"error": f"Unable to read PDF text: {exc}"})
-        return result


 def _media_info_temp_sample(profile: dict, source_path: str, max_bytes: int) -> tuple[str, int]: