summaryrefslogtreecommitdiff
path: root/Biz/PodcastItLater
diff options
context:
space:
mode:
authorBen Sima <ben@bsima.me>2025-11-15 23:13:56 -0500
committerBen Sima <ben@bsima.me>2025-11-15 23:13:56 -0500
commita4f7427540840d31195ad37d7c68955ec7a36584 (patch)
tree948a022b0feb7859cf48ccf222821aef83c6fd14 /Biz/PodcastItLater
parent1e616e0ab8e93fc0be4caf64e4d1584883eca0d3 (diff)
Implement episode deduplication in submission flow
- Check for existing episodes by URL hash before processing - Reuse existing episodes when user submits duplicate URL - Add episode to user's feed if they don't have it - Track 'added' metrics when episode added to feed - Worker now creates user_episodes link and tracks metrics - Show appropriate messages for already-in-feed vs newly-added episodes
Diffstat (limited to 'Biz/PodcastItLater')
-rw-r--r--Biz/PodcastItLater/Web.py38
-rw-r--r--Biz/PodcastItLater/Worker.py12
2 files changed, 48 insertions, 2 deletions
diff --git a/Biz/PodcastItLater/Web.py b/Biz/PodcastItLater/Web.py
index 1a94007..8caaf8c 100644
--- a/Biz/PodcastItLater/Web.py
+++ b/Biz/PodcastItLater/Web.py
@@ -1149,7 +1149,10 @@ def logout(request: Request) -> Response:
@app.post("/submit")
-def submit_article(request: Request, data: FormData) -> html.div: # noqa: PLR0911
+def submit_article( # noqa: PLR0911, PLR0914
+ request: Request,
+ data: FormData,
+) -> html.div:
"""Handle manual form submission."""
try:
# Check if user is logged in
@@ -1208,7 +1211,38 @@ def submit_article(request: Request, data: FormData) -> html.div: # noqa: PLR09
classes=["alert", "alert-warning"],
)
- # Extract Open Graph metadata
+ # Check if episode already exists for this URL
+ url_hash = Core.hash_url(url)
+ existing_episode = Core.Database.get_episode_by_url_hash(url_hash)
+
+ if existing_episode:
+ # Episode already processed - check if user has it
+ episode_id = existing_episode["id"]
+ if Core.Database.user_has_episode(user_id, episode_id):
+ return html.div(
+ html.i(classes=["bi", "bi-info-circle", "me-2"]),
+ "This episode is already in your feed.",
+ classes=["alert", "alert-info"],
+ )
+ # Add existing episode to user's feed
+ Core.Database.add_episode_to_user(user_id, episode_id)
+ Core.Database.track_episode_event(
+ episode_id,
+ "added",
+ user_id,
+ )
+ return html.div(
+ html.i(classes=["bi", "bi-check-circle", "me-2"]),
+ "✓ Episode added to your feed! ",
+ html.a(
+ "View episode",
+ href=f"/episode/{encode_episode_id(episode_id)}",
+ classes=["alert-link"],
+ ),
+ classes=["alert", "alert-success"],
+ )
+
+ # Episode doesn't exist yet - extract metadata and queue for processing
title, author = extract_og_metadata(url)
job_id = Core.Database.add_to_queue(
diff --git a/Biz/PodcastItLater/Worker.py b/Biz/PodcastItLater/Worker.py
index 202e512..75a111c 100644
--- a/Biz/PodcastItLater/Worker.py
+++ b/Biz/PodcastItLater/Worker.py
@@ -519,6 +519,7 @@ class ArticleProcessor:
duration = ArticleProcessor.estimate_duration(audio_data)
# Step 5: Create episode record
+ url_hash = Core.hash_url(url)
episode_id = Core.Database.create_episode(
title=title,
audio_url=audio_url,
@@ -527,8 +528,19 @@ class ArticleProcessor:
user_id=job.get("user_id"),
author=job.get("author"), # Pass author from job
original_url=url, # Pass the original article URL
+ original_url_hash=url_hash,
)
+ # Add episode to user's feed
+ user_id = job.get("user_id")
+ if user_id:
+ Core.Database.add_episode_to_user(user_id, episode_id)
+ Core.Database.track_episode_event(
+ episode_id,
+ "added",
+ user_id,
+ )
+
# Step 6: Mark job as complete
Core.Database.update_job_status(
job_id,