summaryrefslogtreecommitdiff
path: root/Biz
diff options
context:
space:
mode:
Diffstat (limited to 'Biz')
-rw-r--r--Biz/PodcastItLater/Core.py50
-rw-r--r--Biz/PodcastItLater/Web.py121
2 files changed, 162 insertions, 9 deletions
diff --git a/Biz/PodcastItLater/Core.py b/Biz/PodcastItLater/Core.py
index 6c04db8..86676b5 100644
--- a/Biz/PodcastItLater/Core.py
+++ b/Biz/PodcastItLater/Core.py
@@ -82,7 +82,9 @@ class Database: # noqa: PLR0904
status TEXT DEFAULT 'pending',
retry_count INTEGER DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
- error_message TEXT
+ error_message TEXT,
+ title TEXT,
+ author TEXT
)
""")
@@ -117,14 +119,19 @@ class Database: # noqa: PLR0904
# Run migration to add user support
Database.migrate_to_multi_user(db_path)
+ # Run migration to add metadata fields
+ Database.migrate_add_metadata_fields(db_path)
+
@staticmethod
- def add_to_queue(
+ def add_to_queue( # noqa: PLR0913, PLR0917
url: str,
email: str,
user_id: int,
db_path: str | None = None,
+ title: str | None = None,
+ author: str | None = None,
) -> int:
- """Insert new job into queue, return job ID.
+ """Insert new job into queue with metadata, return job ID.
Raises:
ValueError: If job ID cannot be retrieved after insert.
@@ -134,8 +141,9 @@ class Database: # noqa: PLR0904
with Database.get_connection(db_path) as conn:
cursor = conn.cursor()
cursor.execute(
- "INSERT INTO queue (url, email, user_id) VALUES (?, ?, ?)",
- (url, email, user_id),
+ "INSERT INTO queue (url, email, user_id, title, author) "
+ "VALUES (?, ?, ?, ?, ?)",
+ (url, email, user_id, title, author),
)
conn.commit()
job_id = cursor.lastrowid
@@ -284,7 +292,8 @@ class Database: # noqa: PLR0904
with Database.get_connection(db_path) as conn:
cursor = conn.cursor()
cursor.execute("""
- SELECT id, url, email, status, created_at, error_message
+ SELECT id, url, email, status, created_at, error_message,
+ title, author
FROM queue
WHERE status IN ('pending', 'processing', 'error')
ORDER BY created_at DESC
@@ -382,7 +391,7 @@ class Database: # noqa: PLR0904
cursor.execute(
"""
SELECT id, url, email, status, retry_count, created_at,
- error_message
+ error_message, title, author
FROM queue
WHERE user_id = ?
ORDER BY created_at DESC
@@ -392,7 +401,7 @@ class Database: # noqa: PLR0904
else:
cursor.execute("""
SELECT id, url, email, status, retry_count, created_at,
- error_message
+ error_message, title, author
FROM queue
ORDER BY created_at DESC
""")
@@ -490,6 +499,28 @@ class Database: # noqa: PLR0904
logger.info("Database migrated to support multiple users")
@staticmethod
+ def migrate_add_metadata_fields(db_path: str | None = None) -> None:
+ """Add title and author fields to queue table."""
+ if db_path is None:
+ db_path = Database.get_default_db_path()
+ with Database.get_connection(db_path) as conn:
+ cursor = conn.cursor()
+
+ # Check if columns already exist
+ cursor.execute("PRAGMA table_info(queue)")
+ queue_info = cursor.fetchall()
+ queue_columns = [col[1] for col in queue_info]
+
+ if "title" not in queue_columns:
+ cursor.execute("ALTER TABLE queue ADD COLUMN title TEXT")
+
+ if "author" not in queue_columns:
+ cursor.execute("ALTER TABLE queue ADD COLUMN author TEXT")
+
+ conn.commit()
+ logger.info("Database migrated to support metadata fields")
+
+ @staticmethod
def create_user(email: str, db_path: str | None = None) -> tuple[int, str]:
"""Create a new user and return (user_id, token).
@@ -583,7 +614,8 @@ class Database: # noqa: PLR0904
cursor = conn.cursor()
cursor.execute(
"""
- SELECT id, url, email, status, created_at, error_message
+ SELECT id, url, email, status, created_at, error_message,
+ title, author
FROM queue
WHERE user_id = ? AND
status IN ('pending', 'processing', 'error')
diff --git a/Biz/PodcastItLater/Web.py b/Biz/PodcastItLater/Web.py
index 86b2099..036dd45 100644
--- a/Biz/PodcastItLater/Web.py
+++ b/Biz/PodcastItLater/Web.py
@@ -17,6 +17,8 @@ Provides ludic + htmx interface and RSS feed generation.
# : dep starlette
import Biz.EmailAgent
import Biz.PodcastItLater.Core as Core
+import html as html_module
+import httpx
import ludic.catalog.layouts as layouts
import ludic.catalog.pages as pages
import ludic.html as html
@@ -94,6 +96,55 @@ RSS_CONFIG = {
}
+def extract_og_metadata(url: str) -> tuple[str | None, str | None]:
+ """Extract Open Graph title and author from URL.
+
+ Returns:
+ tuple: (title, author) - both may be None if extraction fails
+ """
+ try:
+ # Use httpx to fetch the page with a timeout
+ response = httpx.get(url, timeout=10.0, follow_redirects=True)
+ response.raise_for_status()
+
+ # Simple regex-based extraction to avoid heavy dependencies
+ html_content = response.text
+
+ # Extract og:title
+ title_match = re.search(
+ r'<meta\s+(?:property|name)=["\']og:title["\']\s+content=["\'](.*?)["\']',
+ html_content,
+ re.IGNORECASE,
+ )
+ title = title_match.group(1) if title_match else None
+
+ # Extract author - try article:author first, then og:site_name
+ author_match = re.search(
+ r'<meta\s+(?:property|name)=["\']article:author["\']\s+content=["\'](.*?)["\']',
+ html_content,
+ re.IGNORECASE,
+ )
+ if not author_match:
+ author_match = re.search(
+ r'<meta\s+(?:property|name)=["\']og:site_name["\']\s+content=["\'](.*?)["\']',
+ html_content,
+ re.IGNORECASE,
+ )
+ author = author_match.group(1) if author_match else None
+
+ # Clean up HTML entities
+ if title:
+ title = html_module.unescape(title)
+ if author:
+ author = html_module.unescape(author)
+
+ except Exception as e: # noqa: BLE001
+ logger.warning("Failed to extract metadata from %s: %s", url, e)
+ return None, None
+ else:
+ return title, author
+
+
def send_magic_link(email: str, token: str) -> None:
"""Send magic link email to user."""
subject = "Login to PodcastItLater"
@@ -271,6 +322,21 @@ class QueueStatus(Component[AnyChildren, QueueStatusAttrs]):
style={"color": status_color, "font-weight": "bold"},
),
html.br(),
+ # Add title and author if available
+ *(
+ [
+ html.div(
+ html.strong(item["title"]),
+ html.br() if item.get("author") else "",
+ html.small(f"by {item['author']}")
+ if item.get("author")
+ else "",
+ style={"margin": "5px 0"},
+ ),
+ ]
+ if item.get("title")
+ else []
+ ),
html.small(
item["url"][:URL_TRUNCATE_LENGTH]
+ (
@@ -445,6 +511,13 @@ class AdminView(Component[AnyChildren, AdminViewAttrs]):
},
),
html.th(
+ "Title",
+ style={
+ "padding": "10px",
+ "text-align": "left",
+ },
+ ),
+ html.th(
"Email",
style={
"padding": "10px",
@@ -532,6 +605,49 @@ class AdminView(Component[AnyChildren, AdminViewAttrs]):
},
),
html.td(
+ html.div(
+ item.get(
+ "title",
+ "-",
+ )[
+ :TITLE_TRUNCATE_LENGTH
+ ]
+ + (
+ "..."
+ if item.get(
+ "title",
+ )
+ and len(
+ item[
+ "title"
+ ],
+ )
+ > (
+ TITLE_TRUNCATE_LENGTH
+ )
+ else ""
+ ),
+ title=item.get(
+ "title",
+ "",
+ ),
+ style={
+ "max-width": (
+ "200px"
+ ),
+ "overflow": (
+ "hidden"
+ ),
+ "text-overflow": ( # noqa: E501
+ "ellipsis"
+ ),
+ },
+ ),
+ style={
+ "padding": "10px",
+ },
+ ),
+ html.td(
item["email"] or "-",
style={
"padding": "10px",
@@ -1183,11 +1299,16 @@ def submit_article(request: Request, data: FormData) -> html.div:
style={"color": "#dc3545"},
)
+ # Extract Open Graph metadata
+ title, author = extract_og_metadata(url)
+
job_id = Core.Database.add_to_queue(
url,
user["email"],
user_id,
get_database_path(),
+ title=title,
+ author=author,
)
return html.div(
f"✓ Article submitted successfully! Job ID: {job_id}",