"""Text processing utilities for TTS conversion."""

# : dep openai
# : dep pytest
import logging
import Omni.App as App
import Omni.Log as Log
import Omni.Test as Test
import openai
import os
import sys

logger = logging.getLogger(__name__)
Log.setup(logger)

# Configuration from environment variables
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")


def prepare_text_for_tts(text: str, title: str) -> list[str]:
    """Use LLM to prepare text for TTS, returning chunks ready for speech.

    First splits text mechanically, then has LLM edit each chunk.
    """
    # First, split the text into manageable chunks
    raw_chunks = split_text_into_chunks(text, max_chars=3000)

    logger.info("Split article into %d raw chunks", len(raw_chunks))

    # Prepare the first chunk with intro
    edited_chunks = []

    for i, chunk in enumerate(raw_chunks):
        is_first = i == 0
        is_last = i == len(raw_chunks) - 1

        try:
            edited_chunk = edit_chunk_for_speech(
                chunk,
                title=title if is_first else None,
                is_first=is_first,
                is_last=is_last,
            )
            edited_chunks.append(edited_chunk)
        except Exception:
            logger.exception("Failed to edit chunk %d", i + 1)
            # Fall back to raw chunk if LLM fails
            if is_first:
                edited_chunks.append(
                    f"This is an audio version of {title}. {chunk}",
                )
            elif is_last:
                edited_chunks.append(f"{chunk} This concludes the article.")
            else:
                edited_chunks.append(chunk)

    return edited_chunks


def split_text_into_chunks(text: str, max_chars: int = 3000) -> list[str]:
    """Split text into chunks at sentence boundaries."""
    chunks = []
    current_chunk = ""

    # Split into paragraphs first
    paragraphs = text.split("\n\n")

    for para in paragraphs:
        para_stripped = para.strip()
        if not para_stripped:
            continue

        # If paragraph itself is too long, split by sentences
        if len(para_stripped) > max_chars:
            sentences = para_stripped.split(". ")
            for sentence in sentences:
                if len(current_chunk) + len(sentence) + 2 < max_chars:
                    current_chunk += sentence + ". "
                else:
                    if current_chunk:
                        chunks.append(current_chunk.strip())
                    current_chunk = sentence + ". "
        # If adding this paragraph would exceed limit, start new chunk
        elif len(current_chunk) + len(para_stripped) + 2 > max_chars:
            if current_chunk:
                chunks.append(current_chunk.strip())
            current_chunk = para_stripped + " "
        else:
            current_chunk += para_stripped + " "

    # Don't forget the last chunk
    if current_chunk:
        chunks.append(current_chunk.strip())

    return chunks


def edit_chunk_for_speech(
    chunk: str,
    title: str | None = None,
    *,
    is_first: bool = False,
    is_last: bool = False,
) -> str:
    """Use LLM to lightly edit a single chunk for speech.

    Raises:
        ValueError: If no content is returned from LLM.
    """
    system_prompt = (
        "You are a podcast script editor. Your job is to lightly edit text "
        "to make it sound natural when spoken aloud.\n\n"
        "Guidelines:\n"
    )
    system_prompt += """
- Remove URLs and email addresses, replacing with descriptive phrases
- Convert bullet points and lists into flowing sentences
- Fix any awkward phrasing for speech
- Remove references like "click here" or "see below"
- Keep edits minimal - preserve the original content and style
- Do NOT add commentary or explanations
- Return ONLY the edited text, no JSON or formatting
"""

    user_prompt = chunk

    # Add intro/outro if needed
    if is_first and title:
        user_prompt = (
            f"Add a brief intro mentioning this is an audio version of "
            f"'{title}', then edit this text:\n\n{chunk}"
        )
    elif is_last:
        user_prompt = f"Edit this text and add a brief closing:\n\n{chunk}"

    try:
        client: openai.OpenAI = openai.OpenAI(api_key=OPENAI_API_KEY)
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt},
            ],
            temperature=0.3,  # Lower temperature for more consistent edits
            max_tokens=4000,
        )

        content = response.choices[0].message.content
        if not content:
            msg = "No content returned from LLM"
            raise ValueError(msg)  # noqa: TRY301

        # Ensure the chunk isn't too long
        max_chunk_length = 4000
        if len(content) > max_chunk_length:
            # Truncate at sentence boundary
            sentences = content.split(". ")
            truncated = ""
            for sentence in sentences:
                if len(truncated) + len(sentence) + 2 < max_chunk_length:
                    truncated += sentence + ". "
                else:
                    break
            content = truncated.strip()

    except Exception:
        logger.exception("LLM chunk editing failed")
        raise
    else:
        return content


class TestTextChunking(Test.TestCase):
    """Test text chunking edge cases."""

    def test_split_text_single_long_word(self) -> None:
        """Handle text with a single word exceeding limit."""
        long_word = "a" * 4000
        chunks = split_text_into_chunks(long_word, max_chars=3000)

        # Should keep it as one chunk or split?
        # The current implementation does not split words
        self.assertEqual(len(chunks), 1)
        self.assertEqual(len(chunks[0]), 4000)

    def test_split_text_no_sentence_boundaries(self) -> None:
        """Handle long text with no sentence boundaries."""
        text = "word " * 1000  # 5000 chars
        chunks = split_text_into_chunks(text, max_chars=3000)

        # Should keep it as one chunk as it can't split by ". "
        self.assertEqual(len(chunks), 1)
        self.assertGreater(len(chunks[0]), 3000)


def test() -> None:
    """Run the tests."""
    Test.run(
        App.Area.Test,
        [
            TestTextChunking,
        ],
    )


def main() -> None:
    """Entry point for the module."""
    if "test" in sys.argv:
        test()
    else:
        logger.info("TextProcessing module loaded")