from __future__ import annotations

import html
import re
from html.parser import HTMLParser

VISIBLE_TEXT_TAGS = {"h1", "h2", "h3", "h4", "h5", "h6", "p", "button", "a", "label", "li"}
IGNORED_TAGS = {"script", "style", "noscript", "template"}


def html_unescape(value: str) -> str:
    return html.unescape(value)


def normalize_text(value: str) -> str:
    return re.sub(r"\s+", " ", html_unescape(value)).strip()


class VisibleTextExtractor(HTMLParser):
    def __init__(self) -> None:
        super().__init__(convert_charrefs=True)
        self.ignored_depth = 0
        self.hidden_stack: list[bool] = []
        self.visible_tag_stack: list[str] = []
        self.current_chunks: list[str] = []
        self.lines: list[str] = []

    def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
        lowered = tag.lower()
        attrs_dict = {key.lower(): (value or "") for key, value in attrs}
        if lowered in IGNORED_TAGS:
            self.ignored_depth += 1
            return
        self.hidden_stack.append(self._is_hidden(attrs_dict))
        if lowered in VISIBLE_TEXT_TAGS and not self.ignored_depth and not any(self.hidden_stack):
            self.visible_tag_stack.append(lowered)

    def handle_endtag(self, tag: str) -> None:
        lowered = tag.lower()
        if lowered in IGNORED_TAGS and self.ignored_depth:
            self.ignored_depth -= 1
            return
        if lowered in VISIBLE_TEXT_TAGS and self.visible_tag_stack:
            self.visible_tag_stack.pop()
            self._flush_line()
        if self.hidden_stack:
            self.hidden_stack.pop()

    def handle_data(self, data: str) -> None:
        if self.ignored_depth or any(self.hidden_stack) or not self.visible_tag_stack:
            return
        normalized = normalize_text(data)
        if normalized:
            self.current_chunks.append(normalized)

    def handle_comment(self, data: str) -> None:
        return

    def close(self) -> None:
        super().close()
        self._flush_line()

    def _flush_line(self) -> None:
        if not self.current_chunks:
            return
        line = normalize_text(" ".join(self.current_chunks))
        if line:
            self.lines.append(line)
        self.current_chunks = []

    @staticmethod
    def _is_hidden(attrs: dict[str, str]) -> bool:
        if "hidden" in attrs:
            return True
        if attrs.get("aria-hidden", "").lower() == "true":
            return True
        style = attrs.get("style", "").replace(" ", "").lower()
        return "display:none" in style or "visibility:hidden" in style


def extract_visible_rendered_text(body: str) -> str:
    parser = VisibleTextExtractor()
    parser.feed(body)
    parser.close()
    return "\n".join(parser.lines)