"""
test_phase3.py — 測試 tagger.py, indexer.py, run.py
"""

import json
import os
import sys
import tempfile
from datetime import datetime, timezone
from pathlib import Path

# 加入 src 到路徑
sys.path.insert(0, str(Path(__file__).parent.parent))

from src.capturers.base import RawNote
from src.organizer.tagger import Tagger, KeywordTagger, TagSuggestion
from src.organizer.indexer import NoteIndexer
from src.utils.config import ConfigLoader


def make_temp_data_dir(vocabulary: dict = None, config: dict = None) -> Path:
    """建立暫時的 _data 目錄，帶 config/vocabulary"""
    import yaml
    tmp = Path(tempfile.mkdtemp())

    default_vocab = {
        "技術": {
            "AI/ML": {"keywords": ["llm", "gpt", "pytorch"]},
            "程式語言": {"keywords": ["python", "javascript"]},
        }
    }
    default_config = {
        "vault": {"root": str(tmp / "vault"), "output_dir": "0.raw"},
        "sources": {},
    }

    vocab_data = vocabulary or default_vocab
    cfg_data = config or default_config

    (tmp).mkdir(parents=True, exist_ok=True)
    with open(tmp / "vocabulary.yaml", "w") as f:
        yaml.dump(vocab_data, f, allow_unicode=True)
    with open(tmp / "config.yaml", "w") as f:
        yaml.dump(cfg_data, f, allow_unicode=True)

    return tmp


def make_note(content: str, tags_raw=None, source="test") -> RawNote:
    return RawNote(
        source=source,
        source_id="abc12345",
        content=content,
        created_at="2026-04-30T10:00:00+00:00",
        tags_raw=tags_raw or [],
    )


# ── Test 1: KeywordTagger — 命中關鍵字 ──
def test_keyword_tagger_hit():
    data_dir = make_temp_data_dir()
    config_loader = ConfigLoader(data_dir=data_dir)
    tagger = KeywordTagger(config_loader)
    note = make_note("今天研究 LLM 和 GPT-4 的 fine-tuning 方法")
    suggestions = tagger.tag(note)
    tags = [s.tag for s in suggestions]
    assert any("AI/ML" in t or "llm" in t.lower() for t in tags), f"Expected AI tag, got {tags}"
    print(f"  ✅ KeywordTagger hit: {tags}")


# ── Test 2: KeywordTagger — 無命中 ──
def test_keyword_tagger_miss():
    data_dir = make_temp_data_dir()
    config_loader = ConfigLoader(data_dir=data_dir)
    tagger = KeywordTagger(config_loader)
    note = make_note("今天天氣很好，去公園散步")
    suggestions = tagger.tag(note)
    # 不應有高 confidence 的技術標籤
    high_conf = [s for s in suggestions if s.confidence > 0.5 and s.method == "keyword"]
    assert len(high_conf) == 0, f"Unexpected high-conf tags: {high_conf}"
    print(f"  ✅ KeywordTagger no hit for non-tech content")


# ── Test 3: Tagger.top_tags confidence 過濾 ──
def test_tagger_top_tags():
    data_dir = make_temp_data_dir()
    config_loader = ConfigLoader(data_dir=data_dir)
    tagger = Tagger(config_loader)
    note = make_note("python 和 javascript 開發技巧", tags_raw=["#programming"])
    suggestions = tagger.tag(note, use_llm=False)
    top = tagger.top_tags(suggestions, max_tags=5, min_confidence=0.3)
    assert len(top) >= 1, f"Expected at least 1 tag, got {top}"
    print(f"  ✅ Tagger top_tags: {top}")


# ── Test 4: existing tags 優先（高 confidence）──
def test_existing_tags_high_confidence():
    data_dir = make_temp_data_dir()
    config_loader = ConfigLoader(data_dir=data_dir)
    tagger = KeywordTagger(config_loader)
    note = make_note("隨便寫的內容", tags_raw=["#myProject", "#2026"])
    suggestions = tagger.tag(note)
    existing = [s for s in suggestions if s.method == "existing"]
    assert len(existing) == 2, f"Expected 2 existing tags, got {existing}"
    assert all(s.confidence >= 0.9 for s in existing), "Existing tags should have high confidence"
    print(f"  ✅ Existing tags confidence: {[s.confidence for s in existing]}")


# ── Test 5: NoteIndexer — 寫入 vault ──
def test_indexer_write():
    data_dir = make_temp_data_dir()
    vault_root = data_dir / "vault"
    config_loader = ConfigLoader(data_dir=data_dir)
    indexer = NoteIndexer(config_loader, vault_root=vault_root)
    note = make_note("# 測試筆記\n\n今天學了 python 的 dataclass 用法")
    tags = ["技術/程式語言", "學習"]
    note_id = "test-20260430-abc12345"

    filepath = indexer.index_note(note, tags, note_id)
    assert filepath is not None, "Should have written a file"
    assert filepath.exists(), f"File not found: {filepath}"

    content = filepath.read_text(encoding="utf-8")
    assert "note_id: test-20260430-abc12345" in content
    assert "技術/程式語言" in content
    assert "測試筆記" in content
    print(f"  ✅ Indexer wrote to {filepath.name}")


# ── Test 6: NoteIndexer — 重複寫入不覆寫 ──
def test_indexer_no_overwrite():
    data_dir = make_temp_data_dir()
    vault_root = data_dir / "vault"
    config_loader = ConfigLoader(data_dir=data_dir)
    indexer = NoteIndexer(config_loader, vault_root=vault_root)
    note = make_note("# 原始內容")
    note_id = "test-20260430-abc12345"

    p1 = indexer.index_note(note, [], note_id)
    p2 = indexer.index_note(note, [], note_id, overwrite=False)
    assert p1 is not None
    assert p2 is None, "Second write without overwrite should return None"
    print(f"  ✅ Indexer no-overwrite works")


# ── Test 7: NoteIndexer — content hash 計算 ──
def test_indexer_content_hash():
    data_dir = make_temp_data_dir()
    config_loader = ConfigLoader(data_dir=data_dir)
    indexer = NoteIndexer(config_loader, vault_root=data_dir / "vault")
    note = make_note("hello world")
    h1 = indexer.get_note_content_hash(note, ["tag1"])
    h2 = indexer.get_note_content_hash(note, ["tag1"])
    h3 = indexer.get_note_content_hash(note, ["tag2"])
    assert h1 == h2
    assert h1 != h3
    print(f"  ✅ Content hash stable: {h1}")


if __name__ == "__main__":
    tests = [
        test_keyword_tagger_hit,
        test_keyword_tagger_miss,
        test_tagger_top_tags,
        test_existing_tags_high_confidence,
        test_indexer_write,
        test_indexer_no_overwrite,
        test_indexer_content_hash,
    ]
    passed = 0
    failed = 0
    for test in tests:
        try:
            print(f"\n🧪 {test.__name__}")
            test()
            passed += 1
        except Exception as e:
            import traceback
            print(f"  ❌ FAIL: {e}")
            traceback.print_exc()
            failed += 1

    print(f"\n{'='*40}")
    print(f"結果：{passed} passed, {failed} failed")
    sys.exit(0 if failed == 0 else 1)
