"""
阶段7验证脚本 - 统计分析服务验证

验证所有统计分析和导出功能的正确实现。
"""

import sys
import os
from pathlib import Path

# 添加项目根目录到 Python 路径
project_root = Path(__file__).parent
sys.path.insert(0, str(project_root))

from src.core.config import get_config
from src.core.logger import setup_logger
from src.database.database_manager import DatabaseManager
from src.database.sqlite_dao import SQLiteDAO
from src.database.chroma_dao import ChromaDAO
from src.services.embedding_service import EmbeddingService
from src.services.management_service import ManagementService
from src.services.analytics_service import AnalyticsService
from src.services.export_service import ExportService
from src.database.models import QuestionCreateDTO


def print_header(title):
    """打印标题"""
    print("\n" + "=" * 70)
    print(f"  {title}")
    print("=" * 70)


def print_section(section):
    """打印分部标题"""
    print(f"\n--- {section} ---")


def print_success(msg):
    """打印成功消息"""
    print(f"✓ {msg}")


def print_error(msg):
    """打印错误消息"""
    print(f"✗ {msg}")


def verify_stage7():
    """验证第7阶段的所有功能"""
    print_header("第7阶段：统计分析服务验证")

    try:
        # 1. 初始化系统
        print_section("1. 系统初始化")
        config = get_config()
        logger = setup_logger(config)
        print_success("配置和日志加载成功")

        # 2. 初始化数据库服务
        print_section("2. 数据库服务初始化")
        sqlite_dao = SQLiteDAO(config.sqlite_db_path, logger)
        chroma_dao = ChromaDAO(
            persist_dir=config.chromadb_persist_dir,
            collection_name=config.chromadb_collection_name,
            logger=logger
        )
        print_success("数据库DAO初始化成功")

        # 3. 初始化embedding和管理服务
        print_section("3. Embedding和业务服务初始化")
        embedding_service = EmbeddingService(config_manager=config)
        db_manager = DatabaseManager(
            sqlite_dao=sqlite_dao,
            chroma_dao=chroma_dao,
            embedding_service=embedding_service,
            logger=logger
        )
        db_manager.sqlite_dao.initialize_schema()
        db_manager.chroma_dao.initialize_collection()
        print_success("数据库初始化成功")

        management_service = ManagementService(
            db_manager=db_manager,
            embedding_service=embedding_service,
            logger=logger
        )
        print_success("管理服务初始化成功")

        # 4. 初始化统计分析服务
        print_section("4. 统计分析服务初始化")
        analytics_service = AnalyticsService(
            db_manager=db_manager,
            logger=logger
        )
        print_success("统计分析服务初始化成功")

        # 5. 初始化导出服务
        print_section("5. 导出服务初始化")
        export_service = ExportService(
            config=config,
            logger=logger
        )
        print_success("导出服务初始化成功")

        # 6. 创建测试数据
        print_section("6. 创建测试数据")
        test_questions = [
            QuestionCreateDTO(
                title="Python基础1",
                content="Python中列表如何创建？",
                question_type="单选",
                category="Python",
                difficulty="简单",
                tags=["Python", "基础"],
                answer="A",
                explanation="使用方括号创建列表",
                points=10,
                status="已发布"
            ),
            QuestionCreateDTO(
                title="数据结构1",
                content="什么是二叉树？",
                question_type="简答",
                category="数据结构",
                difficulty="中等",
                tags=["数据结构", "树"],
                answer="二叉树是每个节点最多有两个子节点的树",
                points=15,
                status="已发布"
            ),
            QuestionCreateDTO(
                title="算法1",
                content="快速排序的时间复杂度是多少？",
                question_type="单选",
                category="算法",
                difficulty="困难",
                tags=["算法", "排序"],
                answer="O(n log n)",
                points=20,
                status="已发布"
            ),
        ]

        created_ids = []
        for q in test_questions:
            q_id = management_service.create_question(q)
            created_ids.append(q_id)
        print_success(f"创建了{len(created_ids)}个测试题目")

        # 7. 测试整体统计
        print_section("7. 整体统计功能测试")
        overall_stats = analytics_service.get_overall_statistics()
        print(f"  - 题目总数: {overall_stats['total_questions']}")
        print(f"  - 已发布: {overall_stats['published_questions']}")
        print(f"  - 平均分值: {overall_stats.get('average_points', 'N/A')}")
        print_success("整体统计功能正常")

        # 8. 测试分类统计
        print_section("8. 分类统计功能测试")
        category_stats = analytics_service.get_category_statistics()
        print(f"  - 总分类数: {category_stats['total_categories']}")
        for cat in category_stats['categories'][:3]:
            print(f"    - {cat['category']}: {cat['total_count']}题")
        print_success("分类统计功能正常")

        # 9. 测试难度统计
        print_section("9. 难度统计功能测试")
        difficulty_stats = analytics_service.get_difficulty_statistics()
        for diff in difficulty_stats['difficulties']:
            print(f"  - {diff['difficulty']}: {diff['count']}题 ({diff['percentage']}%)")
        print_success("难度统计功能正常")

        # 10. 测试标签统计
        print_section("10. 标签统计功能测试")
        tag_stats = analytics_service.get_tag_statistics(top_n=5)
        print(f"  - 总标签数: {tag_stats['total_tags']}")
        print(f"  - 显示Top {min(5, len(tag_stats['tags']))}:")
        for tag in tag_stats['tags'][:5]:
            print(f"    - {tag['tag_name']}: {tag['question_count']}题")
        print_success("标签统计功能正常")

        # 11. 测试质量评估
        print_section("11. 质量评估功能测试")
        quality_metrics = analytics_service.get_quality_metrics()
        print(f"  - 评估题目数: {quality_metrics['total_questions_evaluated']}")
        print(f"  - 平均完整度分数: {quality_metrics['average_completeness_score']:.2f}")
        print(f"  - 平均质量分数: {quality_metrics['average_quality_score']:.2f}")
        dist = quality_metrics['quality_distribution']
        print(f"  - 质量分布: 优秀{dist['excellent']}, 良好{dist['good']}, 一般{dist['fair']}, 较差{dist['poor']}")
        print_success("质量评估功能正常")

        # 12. 测试时间序列分析
        print_section("12. 时间序列分析功能测试")
        time_series = analytics_service.get_time_series_analysis(days=30, period='day')
        print(f"  - 数据粒度: {time_series['period']}")
        print(f"  - 数据记录数: {time_series['total_records']}")
        if time_series['time_series']:
            latest = time_series['time_series'][-1]
            print(f"  - 最新数据: {latest['date']} - 创建{latest['questions_created']}题，累计{latest['cumulative_total']}题")
        print_success("时间序列分析功能正常")

        # 13. 测试题目类型统计
        print_section("13. 题目类型统计功能测试")
        type_stats = analytics_service.get_question_type_statistics()
        print(f"  - 题目类型数: {type_stats['total_types']}")
        for qt in type_stats['question_types']:
            print(f"    - {qt['question_type']}: {qt['count']}题 ({qt['percentage']}%)")
        print_success("题目类型统计功能正常")

        # 14. 测试状态分布统计
        print_section("14. 状态分布统计功能测试")
        status_stats = analytics_service.get_status_statistics()
        for status in status_stats['statuses']:
            print(f"  - {status['status']}: {status['count']}题 ({status['percentage']}%)")
        print_success("状态分布统计功能正常")

        # 15. 测试综合分析报告
        print_section("15. 综合分析报告功能测试")
        full_report = analytics_service.generate_analysis_report()
        print(f"  - 报告时间戳: {full_report['report_timestamp']}")
        print(f"  - 报告包含分部:")
        if 'insights' in full_report:
            insights = full_report['insights']
            print(f"    - 优势: {len(insights.get('strengths', []))}条")
            print(f"    - 弱项: {len(insights.get('weaknesses', []))}条")
            print(f"    - 建议: {len(insights.get('recommendations', []))}条")
        print_success("综合分析报告功能正常")

        # 16. 测试JSON导出
        print_section("16. JSON导出功能测试")
        json_result = export_service.export_to_json(
            overall_stats,
            filename="stage7_test_stats.json"
        )
        if json_result['status'] == 'success':
            print(f"  - 文件大小: {json_result.get('file_size', 'N/A')} 字节")
            print(f"  - 文件路径: {json_result.get('filepath', 'N/A')}")
            print_success("JSON导出功能正常")
        else:
            print_error(f"JSON导出失败: {json_result}")

        # 17. 测试CSV导出
        print_section("17. CSV导出功能测试")
        category_list = category_stats.get('categories', [])
        if category_list:
            csv_result = export_service.export_to_csv(
                category_list,
                filename="stage7_test_categories.csv"
            )
            if csv_result['status'] == 'success':
                print(f"  - 导出记录数: {csv_result.get('records_count', 0)}")
                print(f"  - 导出列数: {csv_result.get('columns_count', 0)}")
                print_success("CSV导出功能正常")
            else:
                print_error(f"CSV导出失败: {csv_result}")

        # 18. 测试批量导出
        print_section("18. 批量导出功能测试")
        batch_result = export_service.export_overall_statistics(
            overall_stats,
            formats=['json', 'csv'],
            base_filename="stage7_batch_export"
        )
        if batch_result['status'] == 'success':
            print(f"  - 导出格式: {batch_result.get('formats', [])}")
            print(f"  - 导出时间戳: {batch_result.get('timestamp', 'N/A')}")
            print_success("批量导出功能正常")
        else:
            print_error(f"批量导出失败: {batch_result}")

        # 19. 测试文件列表
        print_section("19. 导出文件管理功能测试")
        file_list = export_service.list_export_files()
        if file_list['status'] == 'success':
            print(f"  - 导出文件总数: {file_list['total_files']}")
            if file_list['files']:
                print(f"  - 最新文件: {file_list['files'][0]['filename']}")
            print_success("文件列表功能正常")
        else:
            print_error(f"文件列表获取失败: {file_list}")

        # 20. 综合测试总结
        print_section("20. 验证总结")
        print_success("所有统计分析功能验证通过")
        print_success("所有导出功能验证通过")
        print_success("第7阶段实现完整")

        # 最终总结
        print_header("验证完成")
        print("""
✓ 统计分析服务实现完整：
  - 整体统计概览
  - 分类统计分析
  - 难度分布统计
  - 标签使用统计
  - 题目质量评估
  - 时间序列分析
  - 题目类型统计
  - 状态分布统计
  - 综合分析报告

✓ 导出服务实现完整：
  - JSON格式导出
  - CSV格式导出
  - 批量导出支持
  - 文件管理功能

✓ MCP Tools已注册：
  - 14个统计分析Tools
  - 6个导出相关Tools

第7阶段验证成功！
        """)

        return True

    except Exception as e:
        print_error(f"验证过程中出错: {e}")
        import traceback
        traceback.print_exc()
        return False


if __name__ == "__main__":
    success = verify_stage7()
    sys.exit(0 if success else 1)
