1 files changed, 121 insertions, 0 deletions
diff --git a/Biz/PodcastItLater/Web.py b/Biz/PodcastItLater/Web.py
index 86b2099..036dd45 100644
--- a/Biz/PodcastItLater/Web.py
+++ b/Biz/PodcastItLater/Web.py
@@ -17,6 +17,8 @@ Provides ludic + htmx interface and RSS feed generation.
 # : dep starlette
 import Biz.EmailAgent
 import Biz.PodcastItLater.Core as Core
+import html as html_module
+import httpx
 import ludic.catalog.layouts as layouts
 import ludic.catalog.pages as pages
 import ludic.html as html
@@ -94,6 +96,55 @@ RSS_CONFIG = {
 }
 
 
+def extract_og_metadata(url: str) -> tuple[str | None, str | None]:
+    """Extract Open Graph title and author from URL.
+
+    Returns:
+        tuple: (title, author) - both may be None if extraction fails
+    """
+    try:
+        # Use httpx to fetch the page with a timeout
+        response = httpx.get(url, timeout=10.0, follow_redirects=True)
+        response.raise_for_status()
+
+        # Simple regex-based extraction to avoid heavy dependencies
+        html_content = response.text
+
+        # Extract og:title
+        title_match = re.search(
+            r'<meta\s+(?:property|name)=["\']og:title["\']\s+content=["\'](.*?)["\']',
+            html_content,
+            re.IGNORECASE,
+        )
+        title = title_match.group(1) if title_match else None
+
+        # Extract author - try article:author first, then og:site_name
+        author_match = re.search(
+            r'<meta\s+(?:property|name)=["\']article:author["\']\s+content=["\'](.*?)["\']',
+            html_content,
+            re.IGNORECASE,
+        )
+        if not author_match:
+            author_match = re.search(
+                r'<meta\s+(?:property|name)=["\']og:site_name["\']\s+content=["\'](.*?)["\']',
+                html_content,
+                re.IGNORECASE,
+            )
+        author = author_match.group(1) if author_match else None
+
+        # Clean up HTML entities
+        if title:
+            title = html_module.unescape(title)
+        if author:
+            author = html_module.unescape(author)
+
+    except Exception as e:  # noqa: BLE001
+        logger.warning("Failed to extract metadata from %s: %s", url, e)
+        return None, None
+    else:
+        return title, author
+
+
 def send_magic_link(email: str, token: str) -> None:
     """Send magic link email to user."""
     subject = "Login to PodcastItLater"
@@ -271,6 +322,21 @@ class QueueStatus(Component[AnyChildren, QueueStatusAttrs]):
                         style={"color": status_color, "font-weight": "bold"},
                     ),
                     html.br(),
+                    # Add title and author if available
+                    *(
+                        [
+                            html.div(
+                                html.strong(item["title"]),
+                                html.br() if item.get("author") else "",
+                                html.small(f"by {item['author']}")
+                                if item.get("author")
+                                else "",
+                                style={"margin": "5px 0"},
+                            ),
+                        ]
+                        if item.get("title")
+                        else []
+                    ),
                     html.small(
                         item["url"][:URL_TRUNCATE_LENGTH]
                         + (
@@ -445,6 +511,13 @@ class AdminView(Component[AnyChildren, AdminViewAttrs]):
                                                     },
                                                 ),
                                                 html.th(
+                                                    "Title",
+                                                    style={
+                                                        "padding": "10px",
+                                                        "text-align": "left",
+                                                    },
+                                                ),
+                                                html.th(
                                                     "Email",
                                                     style={
                                                         "padding": "10px",
@@ -532,6 +605,49 @@ class AdminView(Component[AnyChildren, AdminViewAttrs]):
                                                         },
                                                     ),
                                                     html.td(
+                                                        html.div(
+                                                            item.get(
+                                                                "title",
+                                                                "-",
+                                                            )[
+                                                                :TITLE_TRUNCATE_LENGTH
+                                                            ]
+                                                            + (
+                                                                "..."
+                                                                if item.get(
+                                                                    "title",
+                                                                )
+                                                                and len(
+                                                                    item[
+                                                                        "title"
+                                                                    ],
+                                                                )
+                                                                > (
+                                                                    TITLE_TRUNCATE_LENGTH
+                                                                )
+                                                                else ""
+                                                            ),
+                                                            title=item.get(
+                                                                "title",
+                                                                "",
+                                                            ),
+                                                            style={
+                                                                "max-width": (
+                                                                    "200px"
+                                                                ),
+                                                                "overflow": (
+                                                                    "hidden"
+                                                                ),
+                                                                "text-overflow": (  # noqa: E501
+                                                                    "ellipsis"
+                                                                ),
+                                                            },
+                                                        ),
+                                                        style={
+                                                            "padding": "10px",
+                                                        },
+                                                    ),
+                                                    html.td(
                                                         item["email"] or "-",
                                                         style={
                                                             "padding": "10px",
@@ -1183,11 +1299,16 @@ def submit_article(request: Request, data: FormData) -> html.div:
                 style={"color": "#dc3545"},
             )
 
+        # Extract Open Graph metadata
+        title, author = extract_og_metadata(url)
+
         job_id = Core.Database.add_to_queue(
             url,
             user["email"],
             user_id,
             get_database_path(),
+            title=title,
+            author=author,
         )
         return html.div(
             f"✓ Article submitted successfully! Job ID: {job_id}",