From a4f7427540840d31195ad37d7c68955ec7a36584 Mon Sep 17 00:00:00 2001 From: Ben Sima Date: Sat, 15 Nov 2025 23:13:56 -0500 Subject: Implement episode deduplication in submission flow - Check for existing episodes by URL hash before processing - Reuse existing episodes when user submits duplicate URL - Add episode to user's feed if they don't have it - Track 'added' metrics when episode added to feed - Worker now creates user_episodes link and tracks metrics - Show appropriate messages for already-in-feed vs newly-added episodes --- Biz/PodcastItLater/Web.py | 38 ++++++++++++++++++++++++++++++++++++-- Biz/PodcastItLater/Worker.py | 12 ++++++++++++ 2 files changed, 48 insertions(+), 2 deletions(-) (limited to 'Biz') diff --git a/Biz/PodcastItLater/Web.py b/Biz/PodcastItLater/Web.py index 1a94007..8caaf8c 100644 --- a/Biz/PodcastItLater/Web.py +++ b/Biz/PodcastItLater/Web.py @@ -1149,7 +1149,10 @@ def logout(request: Request) -> Response: @app.post("/submit") -def submit_article(request: Request, data: FormData) -> html.div: # noqa: PLR0911 +def submit_article( # noqa: PLR0911, PLR0914 + request: Request, + data: FormData, +) -> html.div: """Handle manual form submission.""" try: # Check if user is logged in @@ -1208,7 +1211,38 @@ def submit_article(request: Request, data: FormData) -> html.div: # noqa: PLR09 classes=["alert", "alert-warning"], ) - # Extract Open Graph metadata + # Check if episode already exists for this URL + url_hash = Core.hash_url(url) + existing_episode = Core.Database.get_episode_by_url_hash(url_hash) + + if existing_episode: + # Episode already processed - check if user has it + episode_id = existing_episode["id"] + if Core.Database.user_has_episode(user_id, episode_id): + return html.div( + html.i(classes=["bi", "bi-info-circle", "me-2"]), + "This episode is already in your feed.", + classes=["alert", "alert-info"], + ) + # Add existing episode to user's feed + Core.Database.add_episode_to_user(user_id, episode_id) + Core.Database.track_episode_event( + episode_id, + "added", + user_id, + ) + return html.div( + html.i(classes=["bi", "bi-check-circle", "me-2"]), + "✓ Episode added to your feed! ", + html.a( + "View episode", + href=f"/episode/{encode_episode_id(episode_id)}", + classes=["alert-link"], + ), + classes=["alert", "alert-success"], + ) + + # Episode doesn't exist yet - extract metadata and queue for processing title, author = extract_og_metadata(url) job_id = Core.Database.add_to_queue( diff --git a/Biz/PodcastItLater/Worker.py b/Biz/PodcastItLater/Worker.py index 202e512..75a111c 100644 --- a/Biz/PodcastItLater/Worker.py +++ b/Biz/PodcastItLater/Worker.py @@ -519,6 +519,7 @@ class ArticleProcessor: duration = ArticleProcessor.estimate_duration(audio_data) # Step 5: Create episode record + url_hash = Core.hash_url(url) episode_id = Core.Database.create_episode( title=title, audio_url=audio_url, @@ -527,8 +528,19 @@ class ArticleProcessor: user_id=job.get("user_id"), author=job.get("author"), # Pass author from job original_url=url, # Pass the original article URL + original_url_hash=url_hash, ) + # Add episode to user's feed + user_id = job.get("user_id") + if user_id: + Core.Database.add_episode_to_user(user_id, episode_id) + Core.Database.track_episode_event( + episode_id, + "added", + user_id, + ) + # Step 6: Mark job as complete Core.Database.update_job_status( job_id, -- cgit v1.2.3