chages in logs

This commit is contained in:
Mateusz Gruszczyński
2026-06-03 09:07:58 +02:00
parent f29710b24f
commit 4628ea653d
5 changed files with 210 additions and 24 deletions
+18
View File
@@ -200,6 +200,8 @@ def enqueue(action_name: str, profile_id: int, payload: dict, user_id: int | Non
"INSERT INTO jobs(id,user_id,profile_id,action,payload_json,status,attempts,max_attempts,progress_total,created_at,updated_at) VALUES(?,?,?,?,?,?,?,?,?,?,?)",
(job_id, user_id, profile_id, action_name, json.dumps(payload), "pending", 0, max_attempts, progress_total, now, now),
)
# Note: Queued jobs are now written to operation logs so work is visible before a worker starts it.
operation_logs.record_job_event(profile_id, action_name, "queued", payload, job_id=job_id, user_id=user_id)
_emit("job_update", {"id": job_id, "action": action_name, "profile_id": profile_id, "status": "pending"})
_submit_job(job_id, action_name)
return job_id
@@ -315,6 +317,8 @@ def _run(job_id: str):
profile = get_profile(int(job["profile_id"]), int(job["user_id"]))
if not profile:
_set_job(job_id, "failed", "rTorrent profile does not exist", finished=True)
# Note: Profile lookup failures used to appear only in the job queue; they are now persisted in operation logs too.
operation_logs.record_worker_event(int(job.get("profile_id") or 0), str(job.get("action") or ""), "failed", "Job failed: rTorrent profile does not exist", job_id=job_id, user_id=int(job.get("user_id") or 0), error="profile not found")
_emit("job_update", {"id": job_id, "profile_id": job.get("profile_id"), "status": "failed", "error": "profile not found"})
return
profile_id = int(profile["id"])
@@ -362,6 +366,9 @@ def _run(job_id: str):
_set_job(job_id, status, str(exc), finished=(status == "failed"))
if status == "failed":
operation_logs.record_job_event(int(job.get("profile_id") or 0), job.get("action"), "failed", payload, error=str(exc), job_id=job_id, user_id=int(job.get("user_id") or 0))
else:
# Note: Retried attempts are logged explicitly so transient failures are not lost between final states.
operation_logs.record_job_event(int(job.get("profile_id") or 0), job.get("action"), "retry", payload, error=str(exc), job_id=job_id, user_id=int(job.get("user_id") or 0))
_emit("operation_failed", {"job_id": job_id, "action": job.get("action"), "profile_id": job.get("profile_id"), "hashes": payload.get("hashes") or [], "error": str(exc), **_job_event_meta(payload)})
_emit("job_update", {"id": job_id, "profile_id": job.get("profile_id"), "status": status, "error": str(exc), "attempts": attempts})
if status == "pending":
@@ -408,6 +415,8 @@ def _timeout_running_jobs() -> None:
continue
message = f"Watchdog timeout after {_job_timeout_seconds(profile, row)} seconds"
_set_job(row["id"], "failed", message, finished=True)
# Note: Watchdog timeouts are stored in operation logs because no normal worker exception may be raised.
operation_logs.record_worker_event(int(row.get("profile_id") or 0), str(row.get("action") or ""), "timeout", message, job_id=row["id"], user_id=int(row.get("user_id") or 0), error=message)
_emit("operation_failed", {"job_id": row["id"], "action": row.get("action"), "profile_id": row.get("profile_id"), "hashes": [], "error": message, "source": "watchdog"})
_emit("job_update", {"id": row["id"], "profile_id": row.get("profile_id"), "status": "failed", "error": message})
@@ -435,6 +444,8 @@ def _resubmit_interrupted_running_jobs() -> None:
("Resuming interrupted job from last checkpoint", utcnow(), row["id"]),
)
if int(cur.rowcount or 0):
# Note: Interrupted jobs returned to the queue are logged so restart recovery is auditable.
operation_logs.record_worker_event(int(row.get("profile_id") or 0), str(row.get("action") or ""), "resubmitted", "Interrupted job resubmitted from checkpoint", job_id=row["id"], user_id=int(row.get("user_id") or 0))
_emit("job_update", {"id": row["id"], "profile_id": row.get("profile_id"), "status": "pending", "resumed": True})
_submit_job(row["id"], row.get("action"))
@@ -456,6 +467,8 @@ def _resubmit_stale_pending_jobs() -> None:
continue
with connect() as conn:
conn.execute("UPDATE jobs SET error=?, updated_at=? WHERE id=? AND status='pending'", ("Watchdog resubmitted stale pending job", utcnow(), row["id"]))
# Note: Stale pending resubmits are logged to explain duplicated queue attempts after watchdog recovery.
operation_logs.record_worker_event(int(row.get("profile_id") or 0), str(row.get("action") or ""), "resubmitted", "Stale pending job resubmitted by watchdog", job_id=row["id"], user_id=int(row.get("user_id") or 0))
_emit("job_update", {"id": row["id"], "profile_id": row.get("profile_id"), "status": "pending", "watchdog": True})
_submit_job(row["id"], row.get("action"))
@@ -561,6 +574,8 @@ def cancel_job(job_id: str) -> bool:
return False
# Note: Emergency cancel is useful only for unfinished jobs; failed/done entries stay available for retry or log cleanup.
_set_job(job_id, "cancelled", finished=True)
payload = _job_payload(row)
operation_logs.record_job_event(int(row.get("profile_id") or 0), row.get("action"), "cancelled", payload, error="Cancelled by user", job_id=job_id, user_id=int(row.get("user_id") or 0))
_emit("job_update", {"id": job_id, "profile_id": row.get("profile_id"), "status": "cancelled"})
return True
@@ -597,6 +612,7 @@ def force_job(job_id: str) -> bool:
payload['priority_job'] = True
with connect() as conn:
conn.execute("UPDATE jobs SET payload_json=?, updated_at=? WHERE id=?", (json.dumps(payload), utcnow(), job_id))
operation_logs.record_job_event(int(row.get('profile_id') or 0), row.get('action'), 'forced', payload, job_id=job_id, user_id=int(row.get('user_id') or 0))
_emit('job_update', {'id': job_id, 'profile_id': row.get('profile_id'), 'status': 'pending', 'forced': True})
_submit_job(job_id, row.get('action'))
return True
@@ -607,6 +623,8 @@ def retry_job(job_id: str) -> bool:
return False
with connect() as conn:
conn.execute("UPDATE jobs SET status='pending', error='', finished_at=NULL, state_json=NULL, progress_current=0, heartbeat_at=NULL, updated_at=? WHERE id=?", (utcnow(), job_id))
payload = _job_payload(row)
operation_logs.record_job_event(int(row.get("profile_id") or 0), row.get("action"), "retry", payload, job_id=job_id, user_id=int(row.get("user_id") or 0))
_emit("job_update", {"id": job_id, "profile_id": row.get("profile_id"), "status": "pending"})
_submit_job(job_id, row.get("action"))
return True