media info - read txt and images

2026-05-21 10:18:24 +02:00
parent d0026ab7f9
commit c2948ea277
3 changed files with 236 additions and 10 deletions
--- a/pytorrent/services/rtorrent/files.py
+++ b/pytorrent/services/rtorrent/files.py
@@ -130,14 +130,36 @@ _MEDIA_INFO_EXTENSIONS = {
    ".flv", ".m4a", ".m4v", ".mka", ".mkv", ".mov", ".mp3", ".mp4",
    ".mpeg", ".mpg", ".ogg", ".opus", ".ts", ".wav", ".webm", ".wma", ".wmv",
 }
+_TEXT_PREVIEW_EXTENSIONS = {
+    ".ass", ".cue", ".csv", ".ini", ".json", ".log", ".m3u", ".m3u8",
+    ".md", ".nfo", ".srt", ".ssa", ".sub", ".sfv", ".txt", ".url",
+    ".xml", ".yaml", ".yml",
+}
+_IMAGE_PREVIEW_EXTENSIONS = {".avif", ".bmp", ".gif", ".jpeg", ".jpg", ".png", ".webp"}
 _MEDIA_INFO_SAMPLE_BYTES = 32 * 1024 * 1024
 _MEDIA_INFO_CHUNK_BYTES = 1024 * 1024
+_TEXT_PREVIEW_BYTES = 512 * 1024
+_IMAGE_PREVIEW_BYTES = 8 * 1024 * 1024
 _MEDIA_INFO_TMP_DIR = BASE_DIR / "data" / "media-info-samples"


+def _file_extension(path: str) -> str:
+    return LocalPath(str(path or "")).suffix.lower()
+
+
 def _media_info_supported(path: str) -> bool:
    # Note: Extension filtering avoids trying binary metadata parsers on every torrent payload file.
-    return LocalPath(str(path or "")).suffix.lower() in _MEDIA_INFO_EXTENSIONS
+    return _file_extension(path) in _MEDIA_INFO_EXTENSIONS
+
+
+def _text_preview_supported(path: str) -> bool:
+    # Note: Text previews intentionally include NFO and subtitle files so the existing info button becomes useful for release notes too.
+    return _file_extension(path) in _TEXT_PREVIEW_EXTENSIONS
+
+
+def _image_preview_supported(path: str) -> bool:
+    # Note: Image previews are limited to browser-safe raster formats and avoid SVG to prevent inline script-like payloads.
+    return _file_extension(path) in _IMAGE_PREVIEW_EXTENSIONS


 def _media_info_sample_suffix(source_path: str) -> str:
@@ -147,6 +169,125 @@ def _media_info_sample_suffix(source_path: str) -> str:
    return ".bin"


+def _read_file_prefix(profile: dict, source_path: str, max_bytes: int) -> bytes:
+    # Note: Small previews use a bounded prefix read, so text and image preview actions never load an entire large file into RAM.
+    limit = max(0, int(max_bytes or 0))
+    chunks: list[bytes] = []
+    collected = 0
+    if int(profile.get("is_remote") or 0):
+        for chunk in iter_remote_file_chunks(profile, source_path, size=limit, chunk_size=_MEDIA_INFO_CHUNK_BYTES):
+            if collected >= limit:
+                break
+            data = bytes(chunk[: max(0, limit - collected)])
+            chunks.append(data)
+            collected += len(data)
+    else:
+        with open(source_path, "rb") as src:
+            while collected < limit:
+                data = src.read(min(_MEDIA_INFO_CHUNK_BYTES, limit - collected))
+                if not data:
+                    break
+                chunks.append(data)
+                collected += len(data)
+    return b"".join(chunks)
+
+
+def _decode_text_preview(data: bytes) -> tuple[str, str]:
+    # Note: NFO files are often CP437, while normal text is usually UTF-8; the fallback keeps ASCII art readable.
+    if not data:
+        return "utf-8", ""
+    for encoding in ("utf-8-sig", "utf-8"):
+        try:
+            return encoding, data.decode(encoding)
+        except UnicodeDecodeError:
+            pass
+    for encoding in ("cp437", "cp1250", "latin-1"):
+        try:
+            return encoding, data.decode(encoding, errors="replace")
+        except Exception:
+            pass
+    return "utf-8", data.decode("utf-8", errors="replace")
+
+
+def _image_preview_mime(path: str) -> str:
+    # Note: The MIME type is extension-based because preview input is already restricted to known image suffixes.
+    ext = _file_extension(path)
+    return {
+        ".avif": "image/avif",
+        ".bmp": "image/bmp",
+        ".gif": "image/gif",
+        ".jpeg": "image/jpeg",
+        ".jpg": "image/jpeg",
+        ".png": "image/png",
+        ".webp": "image/webp",
+    }.get(ext, "application/octet-stream")
+
+
+def _text_file_preview(profile: dict, selected: dict, remote_path: str, max_bytes: int = _TEXT_PREVIEW_BYTES) -> dict:
+    # Note: Text preview returns escaped-by-frontend content and a clear truncation flag for large NFO/log/subtitle files.
+    size = int(selected.get("size") or 0)
+    data = _read_file_prefix(profile, remote_path, max_bytes)
+    encoding, text = _decode_text_preview(data)
+    return {
+        **selected,
+        "kind": "text",
+        "parser": "text-preview",
+        "supported": True,
+        "sample_bytes": len(data),
+        "sample_limit": int(max_bytes),
+        "partial": bool(size and len(data) < size),
+        "encoding": encoding,
+        "text": text,
+        "line_count": text.count("\n") + (1 if text else 0),
+        "summary": {},
+        "fields": [
+            {"key": "Type", "value": "Text preview"},
+            {"key": "Encoding", "value": encoding},
+            {"key": "Preview bytes", "value": human_size(len(data))},
+        ],
+        "raw": [],
+    }
+
+
+def _image_file_preview(profile: dict, selected: dict, remote_path: str, max_bytes: int = _IMAGE_PREVIEW_BYTES) -> dict:
+    # Note: Image preview is size capped and CSS-constrained in the modal instead of decoding/resizing images server-side.
+    size = int(selected.get("size") or 0)
+    result = {
+        **selected,
+        "kind": "image",
+        "parser": "image-preview",
+        "supported": True,
+        "sample_bytes": 0,
+        "sample_limit": int(max_bytes),
+        "partial": False,
+        "mime_type": _image_preview_mime(str(selected.get("path") or remote_path)),
+        "summary": {},
+        "fields": [
+            {"key": "Type", "value": "Image preview"},
+            {"key": "Preview limit", "value": human_size(max_bytes)},
+        ],
+        "raw": [],
+    }
+    if size > max_bytes:
+        result.update({
+            "too_large": True,
+            "error": f"Image preview is limited to {human_size(max_bytes)}. Download the file to view the full image.",
+        })
+        return result
+    data = _read_file_prefix(profile, remote_path, max_bytes)
+    import base64
+
+    result.update({
+        "sample_bytes": len(data),
+        "data_url": f"data:{result['mime_type']};base64,{base64.b64encode(data).decode('ascii')}",
+        "fields": result["fields"] + [
+            {"key": "Image bytes", "value": human_size(len(data))},
+            {"key": "MIME type", "value": result["mime_type"]},
+        ],
+    })
+    return result
+
+
 def _media_info_temp_sample(profile: dict, source_path: str, max_bytes: int) -> tuple[str, int]:
    # Note: hachoir needs a seekable file, so this writes a bounded sample into the app data directory instead of loading whole media into RAM.
    import tempfile
@@ -268,13 +409,24 @@ def _media_info_hachoir_imports():


 def torrent_file_media_info(profile: dict, torrent_hash: str, index: int, max_bytes: int = _MEDIA_INFO_SAMPLE_BYTES) -> dict:
-    # Note: This endpoint is MediaInfo-like and intentionally avoids external binaries such as mediainfo, ffprobe or ffmpeg.
+    # Note: This additive endpoint now acts as a smart file preview: media metadata, text/NFO reader, or image preview depending on file type.
    selected, remote_path = _torrent_file_remote_path(profile, torrent_hash, index)
    name = str(selected.get("path") or remote_path)
    size = int(selected.get("size") or 0)
+
+    err = remote_file_readability_error(profile, remote_path) if int(profile.get("is_remote") or 0) else None
+    if err:
+        raise RuntimeError(err)
+
+    if _text_preview_supported(name):
+        return _text_file_preview(profile, selected, remote_path)
+    if _image_preview_supported(name):
+        return _image_file_preview(profile, selected, remote_path)
+
    supported = _media_info_supported(name)
    result = {
        **selected,
+        "kind": "media",
        "supported": supported,
        "sample_bytes": 0,
        "sample_limit": int(max_bytes),
@@ -285,15 +437,14 @@ def torrent_file_media_info(profile: dict, torrent_hash: str, index: int, max_by
        "parser": "hachoir",
    }
    if not supported:
-        result["error"] = "This file extension is not supported by the built-in media info parser."
+        result.update({
+            "kind": "unsupported",
+            "error": "This file extension is not supported by the built-in preview or media info parser.",
+        })
        return result

    createParser, extractMetadata = _media_info_hachoir_imports()

-    err = remote_file_readability_error(profile, remote_path) if int(profile.get("is_remote") or 0) else None
-    if err:
-        raise RuntimeError(err)
-
    tmp_path = None
    try:
        tmp_path, written = _media_info_temp_sample(profile, remote_path, max(1024 * 1024, int(max_bytes)))