summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Biz/PodcastItLater.md14
-rw-r--r--Biz/PodcastItLater/Web.nix1
-rw-r--r--Biz/PodcastItLater/Web.py248
-rw-r--r--Biz/PodcastItLater/Worker.py1
4 files changed, 4 insertions, 260 deletions
diff --git a/Biz/PodcastItLater.md b/Biz/PodcastItLater.md
index 89fc9b5..1e22e05 100644
--- a/Biz/PodcastItLater.md
+++ b/Biz/PodcastItLater.md
@@ -13,13 +13,11 @@ A service that converts web articles to podcast episodes via email submission or
#### User Management
- Email-based registration/login (no passwords)
-- Auto-create users on first email submission
- Session-based authentication
- Personal RSS feed tokens
- User-specific data isolation
#### Article Processing
-- Email submission via Mailgun webhook
- Manual URL submission via web form
- Content extraction with trafilatura
- LLM-powered text preparation for natural speech
@@ -150,7 +148,6 @@ S3_ENDPOINT=
S3_BUCKET=
S3_ACCESS_KEY=
S3_SECRET_KEY=
-MAILGUN_WEBHOOK_KEY=
BASE_URL=
DATABASE_PATH= # Used by both Web and Worker services
SESSION_SECRET=
@@ -185,7 +182,7 @@ The test suite will ensure reliability and correctness of all components before
Tests will be placed in the same file as the code they test, following the pattern established in the codebase. Each module will contain its test classes nearby the functionality that class is testing:
- `Biz/PodcastItLater/Core.py` - Contains database logic and TestDatabase, TestUserManagement, TestQueueOperations, TestEpisodeManagement classes
-- `Biz/PodcastItLater/Web.py` - Contains web interface and TestAuthentication, TestArticleSubmission, TestRSSFeed, TestWebhook, TestAdminInterface classes
+- `Biz/PodcastItLater/Web.py` - Contains web interface and TestAuthentication, TestArticleSubmission, TestRSSFeed, TestAdminInterface classes
- `Biz/PodcastItLater/Worker.py` - Contains background worker and TestArticleExtraction, TestTextToSpeech, TestJobProcessing classes
Each file will follow this pattern:
@@ -261,13 +258,6 @@ This keeps tests close to the code they test, making it easier to maintain and u
- `test_feed_episode_order` - Ensure reverse chronological order
- `test_feed_enclosures` - Verify audio URLs and metadata
-#### TestWebhook
-- `test_mailgun_signature_valid` - Accept valid signatures
-- `test_mailgun_signature_invalid` - Reject invalid signatures
-- `test_webhook_url_extraction` - Extract URLs from email body
-- `test_webhook_auto_create_user` - Create user on first email
-- `test_webhook_multiple_urls` - Handle emails with multiple URLs
-- `test_webhook_no_urls` - Handle emails without URLs gracefully
#### TestAdminInterface
- `test_queue_status_view` - Verify queue display
@@ -307,7 +297,6 @@ This keeps tests close to the code they test, making it easier to maintain and u
### Integration Tests
#### TestEndToEnd
-- `test_email_to_podcast` - Full pipeline from email to RSS
- `test_web_to_podcast` - Full pipeline from web submission
- `test_multiple_users` - Concurrent multi-user scenarios
- `test_error_recovery` - System recovery from failures
@@ -317,7 +306,6 @@ This keeps tests close to the code they test, making it easier to maintain and u
#### Fixtures and Mocks
- Mock OpenAI API responses
- Mock S3/Digital Ocean Spaces
-- Mock Mailgun webhooks
- In-memory SQLite for fast tests
- Test data generators for articles
diff --git a/Biz/PodcastItLater/Web.nix b/Biz/PodcastItLater/Web.nix
index 692d39e..dfd26eb 100644
--- a/Biz/PodcastItLater/Web.nix
+++ b/Biz/PodcastItLater/Web.nix
@@ -37,7 +37,6 @@ in {
mkdir -p ${cfg.dataDir}
# Manual step: create this file with secrets
- # MAILGUN_WEBHOOK_KEY=your-mailgun-webhook-key
# SECRET_KEY=your-secret-key-for-sessions
# SESSION_SECRET=your-session-secret
# EMAIL_FROM=noreply@podcastitlater.bensima.com
diff --git a/Biz/PodcastItLater/Web.py b/Biz/PodcastItLater/Web.py
index 792803c..f37fd86 100644
--- a/Biz/PodcastItLater/Web.py
+++ b/Biz/PodcastItLater/Web.py
@@ -1,8 +1,8 @@
"""
PodcastItLater Web Service.
-Web frontend for converting articles to podcast episodes via email submission.
-Provides ludic + htmx interface, mailgun webhook, and RSS feed generation.
+Web frontend for converting articles to podcast episodes.
+Provides ludic + htmx interface and RSS feed generation.
"""
# : out podcastitlater-web
@@ -17,8 +17,6 @@ Provides ludic + htmx interface, mailgun webhook, and RSS feed generation.
# : dep starlette
import Biz.EmailAgent
import Biz.PodcastItLater.Core as Core
-import hashlib
-import hmac
import ludic.catalog.layouts as layouts
import ludic.catalog.pages as pages
import ludic.html as html
@@ -30,7 +28,6 @@ import pathlib
import re
import sys
import tempfile
-import time
import typing
import urllib.parse
import uvicorn
@@ -54,7 +51,6 @@ logger = Log.setup()
# Configuration
DATABASE_PATH = os.getenv("DATABASE_PATH", "podcast.db")
-MAILGUN_WEBHOOK_KEY = os.getenv("MAILGUN_WEBHOOK_KEY", "")
BASE_URL = os.getenv("BASE_URL", "http://localhost:8000")
PORT = int(os.getenv("PORT", "8000"))
@@ -944,30 +940,6 @@ app.add_middleware(
)
-def extract_urls_from_text(text: str) -> list[str]:
- """Extract HTTP/HTTPS URLs from text."""
- url_pattern = r'https?://[^\s<>"\']+[^\s<>"\'.,;!?]'
- return re.findall(url_pattern, text)
-
-
-def verify_mailgun_signature(
- token: str,
- timestamp: str,
- signature: str,
-) -> bool:
- """Verify Mailgun webhook signature."""
- if not MAILGUN_WEBHOOK_KEY:
- return True # Skip verification if no key configured
-
- value = f"{timestamp}{token}"
- expected = hmac.new(
- MAILGUN_WEBHOOK_KEY.encode(),
- value.encode(),
- hashlib.sha256,
- ).hexdigest()
- return hmac.compare_digest(signature, expected)
-
-
@app.get("/")
def index(request: Request) -> HomePage:
"""Display main page with form and status."""
@@ -1165,61 +1137,6 @@ def submit_article(request: Request, data: FormData) -> html.div:
return html.div(f"Error: {e!s}", style={"color": "#dc3545"})
-@app.post("/webhook/mailgun")
-def mailgun_webhook(request: Request, data: FormData) -> Response: # noqa: ARG001
- """Process email submissions."""
- try:
- # Verify signature
- token_raw = data.get("token", "")
- timestamp_raw = data.get("timestamp", "")
- signature_raw = data.get("signature", "")
-
- token = token_raw if isinstance(token_raw, str) else ""
- timestamp = timestamp_raw if isinstance(timestamp_raw, str) else ""
- signature = signature_raw if isinstance(signature_raw, str) else ""
-
- if not verify_mailgun_signature(token, timestamp, signature):
- return Response("Unauthorized", status_code=401)
-
- # Extract email data
- sender_raw = data.get("sender", "")
- body_plain_raw = data.get("body-plain", "")
-
- sender = sender_raw if isinstance(sender_raw, str) else ""
- body_plain = body_plain_raw if isinstance(body_plain_raw, str) else ""
-
- # Auto-create user if doesn't exist
- user = Core.Database.get_user_by_email(sender, get_database_path())
- if not user:
- user_id, token = Core.Database.create_user(
- sender,
- get_database_path(),
- )
- logger.info("Auto-created user %s for email %s", user_id, sender)
- else:
- user_id = user["id"]
-
- # Look for URLs in email body
- urls = extract_urls_from_text(body_plain)
-
- if urls:
- # Use first URL found
- url = urls[0]
- Core.Database.add_to_queue(
- url,
- sender,
- user_id,
- get_database_path(),
- )
- return Response("OK - URL queued")
- # No URL found, treat body as content
- # For MVP, we'll skip this case
- return Response("OK - No URL found")
-
- except Exception: # noqa: BLE001
- return Response("Error", status_code=500)
-
-
@app.get("/feed/{token}.xml")
def rss_feed(request: Request, token: str) -> Response: # noqa: ARG001
"""Generate user-specific RSS podcast feed."""
@@ -1645,166 +1562,6 @@ class TestRSSFeed(BaseWebTest):
self.assertIn("https://example.com/ep2.mp3", response.text)
-class TestWebhook(BaseWebTest):
- """Test Mailgun webhook functionality."""
-
- def test_mailgun_signature_valid(self) -> None:
- """Accept valid signatures."""
- # Save original key
- original_key = globals()["MAILGUN_WEBHOOK_KEY"]
- globals()["MAILGUN_WEBHOOK_KEY"] = "test-key"
-
- try:
- # Generate valid signature
- timestamp = str(int(time.time()))
- token = "test-token" # noqa: S105
-
- value = f"{timestamp}{token}"
- signature = hmac.new(
- b"test-key",
- value.encode(),
- hashlib.sha256,
- ).hexdigest()
-
- response = self.client.post(
- "/webhook/mailgun",
- data={
- "token": token,
- "timestamp": timestamp,
- "signature": signature,
- "sender": "test@example.com",
- "body-plain": "Check out https://example.com/article",
- },
- )
-
- self.assertEqual(response.status_code, 200)
- finally:
- globals()["MAILGUN_WEBHOOK_KEY"] = original_key
-
- def test_mailgun_signature_invalid(self) -> None:
- """Reject invalid signatures."""
- # Save original key
- original_key = globals()["MAILGUN_WEBHOOK_KEY"]
- globals()["MAILGUN_WEBHOOK_KEY"] = "test-key"
-
- try:
- response = self.client.post(
- "/webhook/mailgun",
- data={
- "token": "test-token",
- "timestamp": "12345",
- "signature": "invalid",
- "sender": "test@example.com",
- "body-plain": "https://example.com",
- },
- )
-
- self.assertEqual(response.status_code, 401)
- finally:
- globals()["MAILGUN_WEBHOOK_KEY"] = original_key
-
- def test_webhook_url_extraction(self) -> None:
- """Extract URLs from email body."""
- # Save original key
- original_key = globals()["MAILGUN_WEBHOOK_KEY"]
- globals()["MAILGUN_WEBHOOK_KEY"] = ""
-
- try:
- response = self.client.post(
- "/webhook/mailgun",
- data={
- "sender": "test@example.com",
- "body-plain": (
- "Hey, check this out: "
- "https://example.com/article and also "
- "https://example.com/other"
- ),
- },
- )
-
- self.assertEqual(response.status_code, 200)
-
- # Should queue first URL
- jobs = Core.Database.get_pending_jobs(db_path=get_database_path())
- self.assertEqual(len(jobs), 1)
- self.assertEqual(jobs[0]["url"], "https://example.com/article")
- finally:
- globals()["MAILGUN_WEBHOOK_KEY"] = original_key
-
- def test_webhook_auto_create_user(self) -> None:
- """Create user on first email."""
- # Save original key
- original_key = globals()["MAILGUN_WEBHOOK_KEY"]
- globals()["MAILGUN_WEBHOOK_KEY"] = ""
-
- try:
- response = self.client.post(
- "/webhook/mailgun",
- data={
- "sender": "newuser@example.com",
- "body-plain": "https://example.com/article",
- },
- )
-
- self.assertEqual(response.status_code, 200)
-
- # User should be created
- user = Core.Database.get_user_by_email(
- "newuser@example.com",
- get_database_path(),
- )
- self.assertIsNotNone(user)
- finally:
- globals()["MAILGUN_WEBHOOK_KEY"] = original_key
-
- def test_webhook_multiple_urls(self) -> None:
- """Handle emails with multiple URLs."""
- # Save original key
- original_key = globals()["MAILGUN_WEBHOOK_KEY"]
- globals()["MAILGUN_WEBHOOK_KEY"] = ""
-
- try:
- response = self.client.post(
- "/webhook/mailgun",
- data={
- "sender": "test@example.com",
- "body-plain": (
- "URLs: https://example.com/1 "
- "https://example.com/2 https://example.com/3"
- ),
- },
- )
-
- self.assertEqual(response.status_code, 200)
-
- # Should only queue first URL
- jobs = Core.Database.get_pending_jobs(db_path=get_database_path())
- self.assertEqual(len(jobs), 1)
- self.assertEqual(jobs[0]["url"], "https://example.com/1")
- finally:
- globals()["MAILGUN_WEBHOOK_KEY"] = original_key
-
- def test_webhook_no_urls(self) -> None:
- """Handle emails without URLs gracefully."""
- # Save original key
- original_key = globals()["MAILGUN_WEBHOOK_KEY"]
- globals()["MAILGUN_WEBHOOK_KEY"] = ""
-
- try:
- response = self.client.post(
- "/webhook/mailgun",
- data={
- "sender": "test@example.com",
- "body-plain": "This email has no URLs",
- },
- )
-
- self.assertEqual(response.status_code, 200)
- self.assertIn("No URL found", response.text)
- finally:
- globals()["MAILGUN_WEBHOOK_KEY"] = original_key
-
-
class TestAdminInterface(BaseWebTest):
"""Test admin interface functionality."""
@@ -1925,7 +1682,6 @@ def test() -> None:
TestAuthentication,
TestArticleSubmission,
TestRSSFeed,
- TestWebhook,
TestAdminInterface,
],
)
diff --git a/Biz/PodcastItLater/Worker.py b/Biz/PodcastItLater/Worker.py
index af51260..834d44b 100644
--- a/Biz/PodcastItLater/Worker.py
+++ b/Biz/PodcastItLater/Worker.py
@@ -544,6 +544,7 @@ def main_loop() -> None:
while True:
try:
+ # Process pending jobs
process_pending_jobs(processor)
process_retryable_jobs()