diff options
Diffstat (limited to 'Biz')
| -rw-r--r-- | Biz/PodcastItLater/Core.py | 50 | ||||
| -rw-r--r-- | Biz/PodcastItLater/Web.py | 121 |
2 files changed, 162 insertions, 9 deletions
diff --git a/Biz/PodcastItLater/Core.py b/Biz/PodcastItLater/Core.py index 6c04db8..86676b5 100644 --- a/Biz/PodcastItLater/Core.py +++ b/Biz/PodcastItLater/Core.py @@ -82,7 +82,9 @@ class Database: # noqa: PLR0904 status TEXT DEFAULT 'pending', retry_count INTEGER DEFAULT 0, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - error_message TEXT + error_message TEXT, + title TEXT, + author TEXT ) """) @@ -117,14 +119,19 @@ class Database: # noqa: PLR0904 # Run migration to add user support Database.migrate_to_multi_user(db_path) + # Run migration to add metadata fields + Database.migrate_add_metadata_fields(db_path) + @staticmethod - def add_to_queue( + def add_to_queue( # noqa: PLR0913, PLR0917 url: str, email: str, user_id: int, db_path: str | None = None, + title: str | None = None, + author: str | None = None, ) -> int: - """Insert new job into queue, return job ID. + """Insert new job into queue with metadata, return job ID. Raises: ValueError: If job ID cannot be retrieved after insert. @@ -134,8 +141,9 @@ class Database: # noqa: PLR0904 with Database.get_connection(db_path) as conn: cursor = conn.cursor() cursor.execute( - "INSERT INTO queue (url, email, user_id) VALUES (?, ?, ?)", - (url, email, user_id), + "INSERT INTO queue (url, email, user_id, title, author) " + "VALUES (?, ?, ?, ?, ?)", + (url, email, user_id, title, author), ) conn.commit() job_id = cursor.lastrowid @@ -284,7 +292,8 @@ class Database: # noqa: PLR0904 with Database.get_connection(db_path) as conn: cursor = conn.cursor() cursor.execute(""" - SELECT id, url, email, status, created_at, error_message + SELECT id, url, email, status, created_at, error_message, + title, author FROM queue WHERE status IN ('pending', 'processing', 'error') ORDER BY created_at DESC @@ -382,7 +391,7 @@ class Database: # noqa: PLR0904 cursor.execute( """ SELECT id, url, email, status, retry_count, created_at, - error_message + error_message, title, author FROM queue WHERE user_id = ? ORDER BY created_at DESC @@ -392,7 +401,7 @@ class Database: # noqa: PLR0904 else: cursor.execute(""" SELECT id, url, email, status, retry_count, created_at, - error_message + error_message, title, author FROM queue ORDER BY created_at DESC """) @@ -490,6 +499,28 @@ class Database: # noqa: PLR0904 logger.info("Database migrated to support multiple users") @staticmethod + def migrate_add_metadata_fields(db_path: str | None = None) -> None: + """Add title and author fields to queue table.""" + if db_path is None: + db_path = Database.get_default_db_path() + with Database.get_connection(db_path) as conn: + cursor = conn.cursor() + + # Check if columns already exist + cursor.execute("PRAGMA table_info(queue)") + queue_info = cursor.fetchall() + queue_columns = [col[1] for col in queue_info] + + if "title" not in queue_columns: + cursor.execute("ALTER TABLE queue ADD COLUMN title TEXT") + + if "author" not in queue_columns: + cursor.execute("ALTER TABLE queue ADD COLUMN author TEXT") + + conn.commit() + logger.info("Database migrated to support metadata fields") + + @staticmethod def create_user(email: str, db_path: str | None = None) -> tuple[int, str]: """Create a new user and return (user_id, token). @@ -583,7 +614,8 @@ class Database: # noqa: PLR0904 cursor = conn.cursor() cursor.execute( """ - SELECT id, url, email, status, created_at, error_message + SELECT id, url, email, status, created_at, error_message, + title, author FROM queue WHERE user_id = ? AND status IN ('pending', 'processing', 'error') diff --git a/Biz/PodcastItLater/Web.py b/Biz/PodcastItLater/Web.py index 86b2099..036dd45 100644 --- a/Biz/PodcastItLater/Web.py +++ b/Biz/PodcastItLater/Web.py @@ -17,6 +17,8 @@ Provides ludic + htmx interface and RSS feed generation. # : dep starlette import Biz.EmailAgent import Biz.PodcastItLater.Core as Core +import html as html_module +import httpx import ludic.catalog.layouts as layouts import ludic.catalog.pages as pages import ludic.html as html @@ -94,6 +96,55 @@ RSS_CONFIG = { } +def extract_og_metadata(url: str) -> tuple[str | None, str | None]: + """Extract Open Graph title and author from URL. + + Returns: + tuple: (title, author) - both may be None if extraction fails + """ + try: + # Use httpx to fetch the page with a timeout + response = httpx.get(url, timeout=10.0, follow_redirects=True) + response.raise_for_status() + + # Simple regex-based extraction to avoid heavy dependencies + html_content = response.text + + # Extract og:title + title_match = re.search( + r'<meta\s+(?:property|name)=["\']og:title["\']\s+content=["\'](.*?)["\']', + html_content, + re.IGNORECASE, + ) + title = title_match.group(1) if title_match else None + + # Extract author - try article:author first, then og:site_name + author_match = re.search( + r'<meta\s+(?:property|name)=["\']article:author["\']\s+content=["\'](.*?)["\']', + html_content, + re.IGNORECASE, + ) + if not author_match: + author_match = re.search( + r'<meta\s+(?:property|name)=["\']og:site_name["\']\s+content=["\'](.*?)["\']', + html_content, + re.IGNORECASE, + ) + author = author_match.group(1) if author_match else None + + # Clean up HTML entities + if title: + title = html_module.unescape(title) + if author: + author = html_module.unescape(author) + + except Exception as e: # noqa: BLE001 + logger.warning("Failed to extract metadata from %s: %s", url, e) + return None, None + else: + return title, author + + def send_magic_link(email: str, token: str) -> None: """Send magic link email to user.""" subject = "Login to PodcastItLater" @@ -271,6 +322,21 @@ class QueueStatus(Component[AnyChildren, QueueStatusAttrs]): style={"color": status_color, "font-weight": "bold"}, ), html.br(), + # Add title and author if available + *( + [ + html.div( + html.strong(item["title"]), + html.br() if item.get("author") else "", + html.small(f"by {item['author']}") + if item.get("author") + else "", + style={"margin": "5px 0"}, + ), + ] + if item.get("title") + else [] + ), html.small( item["url"][:URL_TRUNCATE_LENGTH] + ( @@ -445,6 +511,13 @@ class AdminView(Component[AnyChildren, AdminViewAttrs]): }, ), html.th( + "Title", + style={ + "padding": "10px", + "text-align": "left", + }, + ), + html.th( "Email", style={ "padding": "10px", @@ -532,6 +605,49 @@ class AdminView(Component[AnyChildren, AdminViewAttrs]): }, ), html.td( + html.div( + item.get( + "title", + "-", + )[ + :TITLE_TRUNCATE_LENGTH + ] + + ( + "..." + if item.get( + "title", + ) + and len( + item[ + "title" + ], + ) + > ( + TITLE_TRUNCATE_LENGTH + ) + else "" + ), + title=item.get( + "title", + "", + ), + style={ + "max-width": ( + "200px" + ), + "overflow": ( + "hidden" + ), + "text-overflow": ( # noqa: E501 + "ellipsis" + ), + }, + ), + style={ + "padding": "10px", + }, + ), + html.td( item["email"] or "-", style={ "padding": "10px", @@ -1183,11 +1299,16 @@ def submit_article(request: Request, data: FormData) -> html.div: style={"color": "#dc3545"}, ) + # Extract Open Graph metadata + title, author = extract_og_metadata(url) + job_id = Core.Database.add_to_queue( url, user["email"], user_id, get_database_path(), + title=title, + author=author, ) return html.div( f"✓ Article submitted successfully! Job ID: {job_id}", |
