import argparse
import asyncio
import json
import random
import re
import uuid
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Set, Tuple
from urllib.parse import urljoin, urlparse

import aiofiles
import aiohttp
import pandas as pd
import uvicorn
from aiohttp import ClientError, ClientTimeout, TCPConnector
from bs4 import BeautifulSoup, Tag
from fastapi import BackgroundTasks, FastAPI, Header, HTTPException
from fastapi.responses import JSONResponse
from pydantic import BaseModel

DEFAULT_USER_AGENT = (
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
    "(KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
)

JOBS_DIR = Path("jobs")
JOBS_DIR.mkdir(exist_ok=True)


class SyncRequest(BaseModel):
    event: Optional[str] = None
    sync_mode: Optional[str] = "status_only"
    timestamp: Optional[str] = None


def load_config(config_path: Path) -> Dict[str, Any]:
    if not config_path.exists():
        raise FileNotFoundError(f"Config file not found: {config_path}")
    with config_path.open("r", encoding="utf-8") as f:
        return json.load(f)


def clean_text(value: Optional[str]) -> str:
    if not value:
        return ""
    return re.sub(r"\s+", " ", value).strip()


def inner_html(element: Optional[Tag]) -> str:
    if element is None:
        return ""
    return "".join(str(child) for child in element.contents).strip()


def parse_numeric_price(raw_price: str) -> Optional[float]:
    if not raw_price:
        return None
    normalized = raw_price.replace("\xa0", " ").replace(" ", "")
    normalized = normalized.replace(",", ".")
    normalized = re.sub(r"[^0-9.]", "", normalized)
    if normalized == "":
        return None
    try:
        return float(normalized)
    except ValueError:
        return None


def save_job_status(job_id: str, status: Dict) -> None:
    job_file = JOBS_DIR / f"{job_id}.json"
    with open(job_file, "w", encoding="utf-8") as f:
        json.dump(status, f, ensure_ascii=False, indent=2)


def get_job_status(job_id: str) -> Optional[Dict]:
    job_file = JOBS_DIR / f"{job_id}.json"
    if not job_file.exists():
        return None
    try:
        with open(job_file, "r", encoding="utf-8") as f:
            return json.load(f)
    except (json.JSONDecodeError, IOError):
        return None


def hash_equals(a: str, b: str) -> bool:
    if len(a) != len(b):
        return False
    result = 0
    for x, y in zip(a, b):
        result |= ord(x) ^ ord(y)
    return result == 0


class OngyujtoScraper:
    """Async scraper using aiohttp for high-performance concurrent requests."""

    def __init__(
        self,
        config: Dict[str, Any],
        test_mode: bool = False,
        test_product_limit: int = 3,
    ):
        self.config = config
        self.test_mode = test_mode
        self.test_product_limit = test_product_limit

        self.target_url = str(config.get("target_url", "https://ongyujto.net/")).strip()
        self.request_timeout = ClientTimeout(
            total=int(config.get("request_timeout_seconds", 10)), connect=5
        )
        self.max_retries = int(config.get("max_retries", 3))
        self.max_concurrent = int(config.get("max_concurrent", 20))
        
        delay = config.get("scraper_delay_seconds", [0.05, 0.1])
        if isinstance(delay, list) and len(delay) == 2:
            self.delay_min, self.delay_max = float(delay[0]), float(delay[1])
        else:
            self.delay_min, self.delay_max = 0.05, 0.1

        self.connector = TCPConnector(
            limit=30,
            limit_per_host=10,
            enable_cleanup_closed=True,
            force_close=True,
            ttl_dns_cache=300,
            use_dns_cache=True,
        )

        self.session: Optional[aiohttp.ClientSession] = None
        self.semaphore: Optional[asyncio.Semaphore] = None

    async def __aenter__(self):
        """Async context manager entry."""
        self.session = aiohttp.ClientSession(
            connector=self.connector,
            timeout=self.request_timeout,
            headers={
                "User-Agent": DEFAULT_USER_AGENT,
                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
                "Accept-Language": "hu-HU,hu;q=0.9,en-US;q=0.8,en;q=0.7",
                "Accept-Encoding": "gzip, deflate",
            },
            raise_for_status=False,
        )
        self.semaphore = asyncio.Semaphore(self.max_concurrent)
        return self

    async def __aexit__(self, exc_type, exc_val, exc_tb):
        """Async context manager exit."""
        if self.session:
            await self.session.close()
        if self.connector:
            await self.connector.close()

    async def fetch_url(self, url: str) -> Optional[BeautifulSoup]:
        """Fetch URL with retry logic using aiohttp."""
        if not self.session:
            raise RuntimeError("Session not initialized. Use 'async with scraper:' context!")

        absolute_url = urljoin(self.target_url, url)

        for attempt in range(1, self.max_retries + 1):
            try:
                async with self.semaphore:
                    await asyncio.sleep(random.uniform(self.delay_min, self.delay_max))

                    async with self.session.get(absolute_url, allow_redirects=True) as response:
                        if 500 <= response.status < 600:
                            print(f"[WARN] Server error {response.status} on {absolute_url} attempt {attempt}")
                            if attempt < self.max_retries:
                                await asyncio.sleep(min(8, attempt * 2))
                                continue
                            return None

                        if 400 <= response.status < 500:
                            print(f"[ERROR] Client error {response.status} on {absolute_url}")
                            return None

                        html = await response.text()
                        return BeautifulSoup(html, "html.parser")

            except (ClientError, asyncio.TimeoutError) as exc:
                print(f"[WARN] Request exception ({absolute_url}) attempt {attempt}/{self.max_retries}: {exc}")
                if attempt < self.max_retries:
                    await asyncio.sleep(min(8, attempt * 2))
                    continue
                return None
            except Exception as exc:
                print(f"[ERROR] Unexpected error fetching {absolute_url}: {exc}")
                return None

        return None

    def extract_categories(self, home_soup: BeautifulSoup) -> List[Dict[str, str]]:
        """Extract category links from homepage (sync, CPU bound)."""
        categories: List[Dict[str, str]] = []
        seen_urls: Set[str] = set()

        for a_tag in home_soup.select(".category-sub-link[href]"):
            href = clean_text(a_tag.get("href"))
            if not href:
                continue

            category_url = urljoin(self.target_url, href)
            if category_url in seen_urls:
                continue

            category_name = clean_text(a_tag.get_text(" ", strip=True))
            category_hierarchy = self._build_category_hierarchy(a_tag)

            categories.append({
                "name": category_name,
                "hierarchy": category_hierarchy or category_name,
                "url": category_url,
            })
            seen_urls.add(category_url)

        return categories

    def _build_category_hierarchy(self, category_link: Tag) -> str:
        """Build category hierarchy string."""
        current_li = category_link.find_parent("li")
        hierarchy_parts: List[str] = []

        own_name = clean_text(category_link.get_text(" ", strip=True))
        if own_name:
            hierarchy_parts.append(own_name)

        while current_li is not None:
            parent_li = current_li.find_parent("li")
            if parent_li is None:
                break

            parent_link = parent_li.find("a", class_="category-sub-link")
            if parent_link:
                parent_name = clean_text(parent_link.get_text(" ", strip=True))
                if parent_name and parent_name not in hierarchy_parts:
                    hierarchy_parts.append(parent_name)

            current_li = parent_li

        hierarchy_parts.reverse()
        return " > ".join(hierarchy_parts)

    async def collect_product_links_for_category(
        self,
        category: Dict[str, str],
        max_products: Optional[int] = None,
    ) -> List[str]:
        """Collect product links from category pages with pagination."""
        found_products: List[str] = []
        found_set: Set[str] = set()
        next_page_url: Optional[str] = category["url"]
        visited_pages: Set[str] = set()

        while next_page_url:
            normalized_page = self._normalize_url(next_page_url)
            if normalized_page in visited_pages:
                break
            visited_pages.add(normalized_page)

            soup = await self.fetch_url(next_page_url)
            if soup is None:
                break

            for a_tag in soup.select(".ajax_block_product a.thumbnail[href]"):
                href = clean_text(a_tag.get("href"))
                if not href:
                    continue

                product_url = self._normalize_url(urljoin(self.target_url, href))
                if product_url in found_set:
                    continue

                found_set.add(product_url)
                found_products.append(product_url)

                if max_products and len(found_products) >= max_products:
                    return found_products

            next_page_url = self._find_next_page_url(soup)

        return found_products

    def _find_next_page_url(self, soup: BeautifulSoup) -> Optional[str]:
        """Find next page URL in pagination."""
        selectors = [
            "a[rel='next'][href]",
            ".pagination .next a[href]",
            ".pagination-next a[href]",
            "a.js-search-link[rel='next'][href]",
        ]

        for selector in selectors:
            next_link = soup.select_one(selector)
            if next_link:
                href = clean_text(next_link.get("href"))
                if href:
                    return self._normalize_url(urljoin(self.target_url, href))

        for anchor in soup.select("a[href]"):
            anchor_text = clean_text(anchor.get_text(" ", strip=True)).lower()
            if "következő" in anchor_text or anchor_text == "next":
                href = clean_text(anchor.get("href"))
                if href:
                    return self._normalize_url(urljoin(self.target_url, href))

        return None

    async def parse_products_parallel(
        self,
        product_urls: List[str],
        category_hierarchy: str,
        progress_callback: Optional[Callable[[int, int], Any]] = None,
    ) -> List[Dict[str, Any]]:
        """Parse products in parallel using asyncio.gather with Semaphore."""
        total = len(product_urls)
        print(f"[PARALLEL] Starting async processing of {total} products with max {self.max_concurrent} concurrent")

        processed_count = 0

        async def fetch_single(url: str) -> Optional[Dict]:
            nonlocal processed_count

            try:
                soup = await self.fetch_url(url)
                if not soup:
                    return None

                result = self._parse_product_from_soup(soup, category_hierarchy, url)

                processed_count += 1
                if processed_count % 10 == 0:
                    print(f"[PROGRESS] {processed_count}/{total} termék feldolgozva ({processed_count * 100 // total}%)")
                    if progress_callback:
                        await progress_callback(processed_count, total)

                return result

            except Exception as e:
                print(f"[ERROR] Failed {url}: {e}")
                return None

        tasks = [fetch_single(url) for url in product_urls]
        results = await asyncio.gather(*tasks, return_exceptions=True)

        valid_results = []
        for i, result in enumerate(results):
            if isinstance(result, Exception):
                print(f"[ERROR] Task {i} raised exception: {result}")
            elif result is not None:
                valid_results.append(result)

        print(f"[PARALLEL] Completed: {len(valid_results)}/{total} products successfully parsed")
        return valid_results

    def _parse_product_from_soup(
        self,
        soup: BeautifulSoup,
        category_hierarchy: str,
        product_url: str,
    ) -> Optional[Dict[str, Any]]:
        """Synchronous parsing logic (CPU bound, no I/O)."""
        try:
            name_tag = soup.select_one("h1.h1.product-detail-name[itemprop='name']") or soup.select_one("h1[itemprop='name']")
            name = clean_text(name_tag.get_text(" ", strip=True)) if name_tag else ""

            price = None
            price_tag = soup.select_one("span[itemprop='price'][content]")
            if price_tag and price_tag.get("content"):
                price = parse_numeric_price(clean_text(price_tag.get("content")))
            elif price_tag:
                price = parse_numeric_price(clean_text(price_tag.get_text(" ", strip=True)))

            short_desc_tag = soup.select_one(".description-short") or soup.select_one("div[id^='product-description-short']")
            short_description = inner_html(short_desc_tag)

            long_desc_tag = (
                soup.select_one("#product-details .product-description")
                or soup.select_one("#product-details")
                or soup.select_one(".product-description")
            )
            long_description = inner_html(long_desc_tag)

            main_image_url, gallery_image_urls = self._extract_images(soup)
            sku = self._extract_sku(soup, name, product_url)

            return {
                "sku": sku,
                "name": name,
                "price": price,
                "short_description": short_description,
                "long_description": long_description,
                "main_image_url": main_image_url,
                "gallery_image_urls": gallery_image_urls,
                "category": category_hierarchy,
                "product_url": product_url,
                "scraped_at": datetime.now(timezone.utc).isoformat(),
            }

        except Exception as exc:
            print(f"[WARN] Parsing failed on {product_url}: {exc}")
            return None

    def _extract_images(self, soup: BeautifulSoup) -> Tuple[str, List[str]]:
        """Extract main and gallery images."""
        def pick_image_url(tag: Tag) -> str:
            for attr in [
                "data-image-large-src",
                "data-zoom-image",
                "data-full-size-image-url",
                "src",
                "href",
            ]:
                value = clean_text(tag.get(attr))
                if value:
                    return self._normalize_url(urljoin(self.target_url, value))
            return ""

        main_image_url = ""
        for selector in ["#zoom_product", "img.js-qv-product-cover", "img#zoom_product"]:
            main_tag = soup.select_one(selector)
            if main_tag:
                main_image_url = pick_image_url(main_tag)
                if main_image_url:
                    break

        gallery_urls: List[str] = []
        seen_gallery: Set[str] = set()

        gallery_tags = soup.select(
            "#thumb-gallery [data-image-large-src],"
            "#thumb-gallery [data-zoom-image],"
            "#thumb-gallery img[src],"
            "#thumb-gallery a[href]"
        )

        for tag in gallery_tags:
            image_url = pick_image_url(tag)
            if image_url and image_url not in seen_gallery:
                seen_gallery.add(image_url)
                gallery_urls.append(image_url)

        if main_image_url and main_image_url in gallery_urls:
            gallery_urls = [url for url in gallery_urls if url != main_image_url]

        return main_image_url, gallery_urls

    def _extract_sku(self, soup: BeautifulSoup, name: str, product_url: str) -> Optional[str]:
        """Extract SKU from product page."""
        sku_tag = soup.select_one("span[itemprop='sku']")
        if sku_tag:
            sku_text = clean_text(sku_tag.get_text(" ", strip=True))
            if sku_text:
                return sku_text

        name_match = re.match(r"^\s*(\d{3,})\b", name)
        if name_match:
            return name_match.group(1)

        parsed = urlparse(product_url)
        slug = parsed.path.strip("/").split("/")[-1].split(".")[0]

        parts = slug.split("-")
        numeric_parts = [p for p in parts if p.isdigit()]

        if len(numeric_parts) >= 2:
            return numeric_parts[1]
        if len(numeric_parts) == 1:
            return numeric_parts[0]

        all_numbers = re.findall(r"\d{3,}", slug)
        if len(all_numbers) >= 2:
            return all_numbers[1]
        if len(all_numbers) == 1:
            return all_numbers[0]

        return None

    @staticmethod
    def _normalize_url(url: str) -> str:
        parsed = urlparse(url)
        return parsed._replace(fragment="").geturl().strip()

    async def run_scrape_async(self) -> Tuple[bool, List[Dict[str, Any]], str]:
        """Fully async main scrape method."""
        try:
            print("[SCRAPE] Loading homepage and extracting categories...")
            home_soup = await self.fetch_url(self.target_url)
            if home_soup is None:
                return False, [], "Could not load target homepage. Scrape aborted."

            categories = self.extract_categories(home_soup)
            if not categories:
                return False, [], "No categories found."

            if self.test_mode:
                categories = categories[:1]
                print(f"[INFO] TEST MODE - category limit: {len(categories)}")

            print("[SCRAPE] Collecting product links from categories...")
            all_product_urls = []

            for category in categories:
                category_name = category.get("hierarchy") or category.get("name") or ""
                limit = self.test_product_limit if self.test_mode else None

                urls = await self.collect_product_links_for_category(category, max_products=limit)
                print(f"[INFO] Category: {category_name} | Collected: {len(urls)} links")
                all_product_urls.extend(urls)

                if self.test_mode and len(all_product_urls) >= self.test_product_limit:
                    all_product_urls = all_product_urls[:self.test_product_limit]
                    break

            all_product_urls = list(dict.fromkeys(all_product_urls))
            total = len(all_product_urls)
            print(f"[SCRAPE] Total unique products to process: {total}")

            if total == 0:
                return True, [], ""

            print(f"[SCRAPE] Starting async parsing with max {self.max_concurrent} concurrent...")
            results = await self.parse_products_parallel(all_product_urls, "")

            products_by_key: Dict[str, Dict[str, Any]] = {}
            for product_data in results:
                if not product_data:
                    continue

                product_key = clean_text(str(product_data.get("sku") or ""))
                if product_key == "":
                    product_key = f"url::{product_data.get('product_url', '')}"

                existing = products_by_key.get(product_key)
                if existing is None:
                    products_by_key[product_key] = product_data
                else:
                    existing_categories = set([
                        clean_text(str(existing.get("category", ""))),
                        clean_text(str(product_data.get("category", ""))),
                    ])
                    existing_categories = {c for c in existing_categories if c}
                    existing["category"] = " | ".join(sorted(existing_categories))

                    if not existing.get("main_image_url") and product_data.get("main_image_url"):
                        existing["main_image_url"] = product_data["main_image_url"]

                    gallery_existing = existing.get("gallery_image_urls") or []
                    gallery_new = product_data.get("gallery_image_urls") or []
                    existing["gallery_image_urls"] = list(
                        dict.fromkeys(list(gallery_existing) + list(gallery_new))
                    )

            products_list = list(products_by_key.values())
            print(f"[SCRAPE] Finished: {len(products_list)} unique products after merging")
            return True, products_list, ""

        except Exception as exc:
            error_msg = str(exc)
            print(f"[ERROR] Scrape failed: {error_msg}")
            import traceback
            traceback.print_exc()
            return False, [], error_msg


def save_state(products: Dict[str, Dict[str, Any]], state_path: Path) -> Dict[str, Any]:
    payload = {
        "generated_at": datetime.now(timezone.utc).isoformat(),
        "total_products": len(products),
        "products": products,
    }
    with open(state_path, "w", encoding="utf-8") as f:
        json.dump(payload, f, ensure_ascii=False, indent=2)
    return payload


def export_to_csv(state_path: Path, csv_path: Path) -> None:
    if not state_path.exists():
        raise FileNotFoundError(f"state.json not found: {state_path}")
    with open(state_path, "r", encoding="utf-8") as f:
        payload = json.load(f)

    products = payload.get("products", {})
    rows: List[Dict[str, Any]] = []

    for key, product in products.items():
        row = dict(product)
        row["state_key"] = key

        gallery = row.get("gallery_image_urls", [])
        if isinstance(gallery, list):
            row["gallery_image_urls"] = ",".join(gallery)
        else:
            row["gallery_image_urls"] = ""

        main_image = row.get("main_image_url", "")
        all_images: List[str] = []
        if main_image:
            all_images.append(main_image)
        if isinstance(gallery, list):
            all_images.extend(gallery)
        row["all_image_urls"] = ",".join(dict.fromkeys(all_images))
        rows.append(row)

    df = pd.DataFrame(rows)
    df.to_csv(csv_path, index=False)


# ==================== Background Task Handler ====================

async def update_job_progress(job_id: str, processed: int, total: int) -> None:
    status = get_job_status(job_id)
    if status:
        status["processed"] = processed
        status["total"] = total
        status["status"] = "running"
        status["updated_at"] = datetime.now(timezone.utc).isoformat()
        save_job_status(job_id, status)


async def run_background_scrape(job_id: str, config: Dict[str, Any], sync_mode: str):
    """Background task for FastAPI (async version)."""
    print(f"[SYNC_STARTED] Job {job_id}")

    try:
        save_job_status(job_id, {
            "job_id": job_id,
            "status": "running",
            "sync_mode": sync_mode,
            "total": 0,
            "processed": 0,
            "products": None,
            "error": None,
            "created_at": datetime.now(timezone.utc).isoformat(),
        })

        async with OngyujtoScraper(config=config, test_mode=False) as scraper:
            async def progress_callback(processed: int, total: int):
                await update_job_progress(job_id, processed, total)

            success, products, error_msg = await scraper.run_scrape_async()

            if not success:
                raise RuntimeError(error_msg)

            script_dir = Path(__file__).resolve().parent
            products_by_key: Dict[str, Dict[str, Any]] = {}
            for product in products:
                key = clean_text(str(product.get("sku") or ""))
                if key == "":
                    key = f"url::{product.get('product_url', '')}"
                products_by_key[key] = product

            save_state(products_by_key, script_dir / "state.json")

            save_job_status(job_id, {
                "job_id": job_id,
                "status": "completed",
                "sync_mode": sync_mode,
                "total": len(products),
                "processed": len(products),
                "products": products,
                "error": None,
                "created_at": datetime.now(timezone.utc).isoformat(),
                "updated_at": datetime.now(timezone.utc).isoformat(),
            })
            print(f"[SYNC_COMPLETED] Job {job_id}: {len(products)} products")

    except Exception as exc:
        error_msg = str(exc)
        print(f"[SYNC_FAILED] Job {job_id}: {error_msg}")
        save_job_status(job_id, {
            "job_id": job_id,
            "status": "failed",
            "error": error_msg,
            "updated_at": datetime.now(timezone.utc).isoformat(),
        })


# ==================== FastAPI Server ====================

app = FastAPI(title="Ongyujto Scraper API (Async)")
server_config: Dict[str, Any] = {}
config_path: Path = Path("config.json")


def get_scraper_config() -> Dict[str, Any]:
    return load_config(config_path)


@app.post("/sync")
async def sync_endpoint(
    request: SyncRequest,
    background_tasks: BackgroundTasks,
    x_api_key: Optional[str] = Header(None),
):
    config = get_scraper_config()
    expected_api_key = clean_text(str(config.get("api_key", "")))
    provided_key = clean_text(x_api_key or "")

    if not expected_api_key or not provided_key:
        raise HTTPException(status_code=401, detail="API key not configured or not provided")

    if not hash_equals(expected_api_key, provided_key):
        raise HTTPException(status_code=401, detail="Invalid API key")

    job_id = str(uuid.uuid4())
    sync_mode = request.sync_mode or "status_only"

    print(f"[SYNC_REQUEST] Job {job_id} started. Mode: {sync_mode}")

    save_job_status(job_id, {
        "job_id": job_id,
        "status": "accepted",
        "sync_mode": sync_mode,
        "total": 0,
        "processed": 0,
        "products": None,
        "error": None,
        "created_at": datetime.now(timezone.utc).isoformat(),
        "updated_at": datetime.now(timezone.utc).isoformat(),
    })

    background_tasks.add_task(run_background_scrape, job_id, config, sync_mode)

    return {
        "status": "accepted",
        "job_id": job_id,
        "message": "Async scrape elindult a háttérben",
        "estimated_time": "20-30 mp",
        "check_status_url": f"/status/{job_id}",
    }


@app.get("/status/{job_id}")
async def get_status_endpoint(job_id: str):
    if job_id == "test":
        return {"status": "ok", "message": "aiohttp backend running", "timestamp": datetime.now(timezone.utc).isoformat()}

    status = get_job_status(job_id)
    if not status:
        raise HTTPException(status_code=404, detail="Job not found")
    return status


# ==================== CLI Mode ====================

def run_cli_mode(
    config_path_arg: Path,
    state_path: Path,
    csv_path: Path,
    test_mode: bool,
    test_product_limit: int,
) -> None:
    """Traditional CLI mode - runs scrape and exits (async wrapper)."""

    async def main():
        config = load_config(config_path_arg)
        async with OngyujtoScraper(config=config, test_mode=test_mode, test_product_limit=test_product_limit) as scraper:
            success, products, error_msg = await scraper.run_scrape_async()

            if not success:
                raise RuntimeError(error_msg)

            products_by_key: Dict[str, Dict[str, Any]] = {}
            for product in products:
                key = clean_text(str(product.get("sku") or ""))
                if key == "":
                    key = f"url::{product.get('product_url', '')}"
                products_by_key[key] = product

            save_state(products_by_key, state_path)
            export_to_csv(state_path, csv_path)

            print(f"[INFO] Scrape finished. Products saved: {len(products)}")
            print(f"[INFO] state.json path: {state_path}")
            print(f"[INFO] export.csv path: {csv_path}")

    asyncio.run(main())


def run_server_mode(config_path_arg: Path) -> None:
    """Server mode - starts FastAPI HTTP server."""
    global config_path, server_config
    config_path = config_path_arg

    config = load_config(config_path)
    server_config = config

    host = str(config.get("server_host", "0.0.0.0"))
    port = int(config.get("server_port", 5000))
    max_concurrent = int(config.get("max_concurrent", 20))

    print(f"[IDLE] Server listening on :{port}")
    print(f"[CONFIG] Max concurrent: {max_concurrent}")

    uvicorn.run(app, host=host, port=port, log_level="warning")


def parse_args():
    parser = argparse.ArgumentParser(description="Ongyujto async scraper + parser")
    parser.add_argument("--config", default="config.json", help="Path to config.json")
    parser.add_argument("--state-file", default="state.json", help="Output state JSON file")
    parser.add_argument("--csv-file", default="export.csv", help="Output CSV file")
    parser.add_argument("--test", action="store_true", help="Test mode")
    parser.add_argument("--test-product-limit", type=int, default=3, help="Max products in test mode")
    parser.add_argument("--server", action="store_true", help="Run in HTTP server mode")
    return parser.parse_args()


def main():
    args = parse_args()

    script_dir = Path(__file__).resolve().parent
    config_path = (script_dir / args.config).resolve() if not Path(args.config).is_absolute() else Path(args.config)
    state_path = (script_dir / args.state_file).resolve() if not Path(args.state_file).is_absolute() else Path(args.state_file)
    csv_path = (script_dir / args.csv_file).resolve() if not Path(args.csv_file).is_absolute() else Path(args.csv_file)

    if args.server:
        run_server_mode(config_path)
    else:
        run_cli_mode(
            config_path_arg=config_path,
            state_path=state_path,
            csv_path=csv_path,
            test_mode=bool(args.test),
            test_product_limit=max(1, int(args.test_product_limit)),
        )


if __name__ == "__main__":
    main()