from __future__ import annotations

import json
import os
import re
from dataclasses import dataclass
from typing import Optional, Tuple
from urllib.parse import urlparse

import httpx
from bs4 import BeautifulSoup

SHOP_MAP = {
    "alza.cz": "alza",
    "www.alza.cz": "alza",
    "czc.cz": "czc",
    "www.czc.cz": "czc",
    "mall.cz": "mall",
    "www.mall.cz": "mall",
    "datart.cz": "datart",
    "www.datart.cz": "datart",
    "rohlik.cz": "rohlik",
    "www.rohlik.cz": "rohlik",
}

@dataclass
class ScrapedProduct:
    name: Optional[str]
    price_czk: Optional[float]
    currency: str = "CZK"
    image_url: Optional[str] = None

def detect_shop(url: str) -> str:
    host = (urlparse(url).hostname or "").lower()
    if host in SHOP_MAP:
        return SHOP_MAP[host]
    # fallback: use registrable-ish domain
    parts = host.split(".")
    if len(parts) >= 2:
        return parts[-2]
    return host or "unknown"

def _to_float_price(text: str) -> Optional[float]:
    if not text:
        return None
    # Keep digits, commas, dots
    t = re.sub(r"[^0-9,\.]", "", text)
    if not t:
        return None
    # Czech format: 12 990,00 -> 12990.00; sometimes 12990
    t = t.replace(" ", "")
    if t.count(",") == 1 and t.count(".") == 0:
        t = t.replace(",", ".")
    # Remove thousands separators if any (e.g., 12.990,00)
    # If both dot and comma exist, assume dot thousands, comma decimals
    if t.count(".") >= 1 and t.count(",") == 1:
        t = t.replace(".", "")
        t = t.replace(",", ".")
    try:
        return float(t)
    except ValueError:
        return None

def _extract_jsonld_product(soup: BeautifulSoup) -> Tuple[Optional[str], Optional[float], Optional[str]]:
    """Try to read schema.org Product JSON-LD: name, price, currency"""
    scripts = soup.find_all("script", attrs={"type": "application/ld+json"})
    for s in scripts:
        try:
            data = json.loads(s.get_text(strip=True) or "{}")
        except Exception:
            continue

        candidates = data if isinstance(data, list) else [data]
        for item in candidates:
            if not isinstance(item, dict):
                continue
            # Sometimes nested in @graph
            if "@graph" in item and isinstance(item["@graph"], list):
                candidates.extend([x for x in item["@graph"] if isinstance(x, dict)])
                continue
            t = item.get("@type")
            if isinstance(t, list):
                is_product = any(x.lower() == "product" for x in t if isinstance(x, str))
            else:
                is_product = isinstance(t, str) and t.lower() == "product"
            if not is_product:
                continue

            name = item.get("name")
            offers = item.get("offers")
            price = None
            currency = None

            def parse_offer(off):
                nonlocal price, currency
                if not isinstance(off, dict):
                    return
                if price is None:
                    p = off.get("price") or off.get("lowPrice") or off.get("highPrice")
                    if isinstance(p, (int, float)):
                        price = float(p)
                    elif isinstance(p, str):
                        price = _to_float_price(p)
                if currency is None:
                    c = off.get("priceCurrency")
                    if isinstance(c, str):
                        currency = c

            if isinstance(offers, dict):
                parse_offer(offers)
            elif isinstance(offers, list):
                for off in offers:
                    parse_offer(off)

            if price is not None:
                # Convert to CZK only if already CZK; otherwise leave as-is (you can add FX later)
                return (name if isinstance(name, str) else None, float(price), currency or None)

    return (None, None, None)

def _extract_og(soup: BeautifulSoup) -> Tuple[Optional[str], Optional[str]]:
    title = None
    img = None
    ogt = soup.find("meta", property="og:title")
    if ogt and ogt.get("content"):
        title = ogt["content"].strip()
    ogi = soup.find("meta", property="og:image")
    if ogi and ogi.get("content"):
        img = ogi["content"].strip()
    if not title:
        if soup.title and soup.title.get_text(strip=True):
            title = soup.title.get_text(strip=True)
    return title, img

async def scrape_product(url: str, shop: Optional[str] = None) -> ScrapedProduct:
    shop = shop or detect_shop(url)

    headers = {"User-Agent": "Mozilla/5.0 (compatible; HlidacCen/1.0)"}
    async with httpx.AsyncClient(headers=headers, follow_redirects=True, timeout=20) as client:
        r = await client.get(url)
        r.raise_for_status()
        html = r.text

    soup = BeautifulSoup(html, "lxml")

    name_ld, price_ld, currency_ld = _extract_jsonld_product(soup)
    title_og, img_og = _extract_og(soup)

    name = name_ld or title_og
    price = price_ld
    currency = (currency_ld or "CZK").upper()

    # Fallback: try common price meta/itemprop
    if price is None:
        meta_price = soup.find("meta", attrs={"itemprop": "price"})
        if meta_price and meta_price.get("content"):
            price = _to_float_price(meta_price["content"])
        if price is None:
            # last resort: look for "Kč" near
            text = soup.get_text(" ", strip=True)
            m = re.search(r"(\d[\d\s\.,]{1,12})\s*Kč", text)
            if m:
                price = _to_float_price(m.group(1))
                currency = "CZK"

    image_url = img_og
    return ScrapedProduct(name=name, price_czk=price, currency=currency, image_url=image_url)

async def download_image(image_url: str, dest_dir: str, filename_hint: str = "product") -> Optional[str]:
    if not image_url:
        return None
    os.makedirs(dest_dir, exist_ok=True)
    safe = re.sub(r"[^a-zA-Z0-9_-]", "_", filename_hint)[:80] or "product"
    ext = os.path.splitext(urlparse(image_url).path)[1].lower()
    if ext not in [".jpg", ".jpeg", ".png", ".webp"]:
        ext = ".jpg"
    rel_path = f"images/{safe}{ext}"
    abs_path = os.path.join(dest_dir, rel_path)

    headers = {"User-Agent": "Mozilla/5.0 (compatible; HlidacCen/1.0)"}
    async with httpx.AsyncClient(headers=headers, follow_redirects=True, timeout=20) as client:
        r = await client.get(image_url)
        r.raise_for_status()
        with open(abs_path, "wb") as f:
            f.write(r.content)
    return rel_path
