volsu-contests/backend/app/services/scoring.py

import math
from app.services.judge import judge_service, JudgeStatus
from app.models.test_case import TestCase


def distribute_points(total_points: int, num_tests: int) -> list[int]:
    """
    Distribute total_points across num_tests.
    If not divisible, round up (each test gets ceil(total/num) points,
    but we cap so total doesn't exceed total_points).
    """
    if num_tests == 0:
        return []

    points_per_test = math.ceil(total_points / num_tests)
    distributed = []
    remaining = total_points

    for i in range(num_tests):
        # Give each test ceil points, but don't exceed remaining
        test_points = min(points_per_test, remaining)
        distributed.append(test_points)
        remaining -= test_points

    return distributed


async def evaluate_submission(
    source_code: str,
    language_id: int,
    test_cases: list[TestCase],
    total_points: int = 100,
    time_limit_ms: int = 1000,
    memory_limit_kb: int = 262144,
) -> dict:
    """
    Evaluate a submission against all test cases.
    Returns score details with partial points (IOI style).
    Points are auto-distributed from total_points across test cases.
    """
    total_score = 0
    tests_passed = 0
    max_time_ms = 0
    max_memory_kb = 0
    results = []

    time_limit_sec = time_limit_ms / 1000.0

    # Auto-distribute points across tests
    test_points_list = distribute_points(total_points, len(test_cases))

    for idx, test_case in enumerate(test_cases):
        test_max_points = test_points_list[idx] if idx < len(test_points_list) else 0
        try:
            result = await judge_service.submit(
                source_code=source_code,
                language_id=language_id,
                stdin=test_case.input,
                expected_output=test_case.expected_output,
                cpu_time_limit=time_limit_sec,
                memory_limit=memory_limit_kb,
            )

            status_id = result.get("status", {}).get("id", 0)
            status_desc = result.get("status", {}).get("description", "Unknown")
            time_ms = int(float(result.get("time", 0) or 0) * 1000)
            memory_kb = result.get("memory", 0) or 0

            passed = status_id == JudgeStatus.ACCEPTED
            points = test_max_points if passed else 0

            if passed:
                tests_passed += 1
                total_score += test_max_points

            if time_ms > max_time_ms:
                max_time_ms = time_ms
            if memory_kb > max_memory_kb:
                max_memory_kb = memory_kb

            results.append({
                "test_id": test_case.id,
                "is_sample": test_case.is_sample,
                "status": status_desc,
                "status_id": status_id,
                "passed": passed,
                "points": points,
                "max_points": test_max_points,
                "time_ms": time_ms,
                "memory_kb": memory_kb,
                # Debug info
                "stdout": result.get("stdout") or "",
                "stderr": result.get("stderr") or "",
                "compile_output": result.get("compile_output") or "",
                "message": result.get("message") or "",
            })

        except Exception as e:
            results.append({
                "test_id": test_case.id,
                "is_sample": test_case.is_sample,
                "status": "Internal Error",
                "status_id": JudgeStatus.INTERNAL_ERROR,
                "passed": False,
                "points": 0,
                "max_points": test_max_points,
                "error": str(e),
            })

    # Determine overall status
    if tests_passed == len(test_cases):
        overall_status = "accepted"
    elif tests_passed > 0:
        overall_status = "partial"
    else:
        # Check what kind of error
        first_failed = next((r for r in results if not r["passed"]), None)
        if first_failed:
            status_id = first_failed.get("status_id", 0)
            if status_id == JudgeStatus.COMPILATION_ERROR:
                overall_status = "compilation_error"
            elif status_id == JudgeStatus.TIME_LIMIT_EXCEEDED:
                overall_status = "time_limit_exceeded"
            elif status_id >= JudgeStatus.RUNTIME_ERROR_SIGSEGV:
                # Runtime errors: SIGSEGV(7), SIGXFSZ(8), SIGFPE(9), SIGABRT(10), NZEC(11), OTHER(12), INTERNAL(13), EXEC_FORMAT(14)
                overall_status = "runtime_error"
            else:
                overall_status = "wrong_answer"
        else:
            overall_status = "wrong_answer"

    return {
        "status": overall_status,
        "score": total_score,
        "total_points": total_points,  # Use parameter, not sum of test_case.points
        "tests_passed": tests_passed,
        "tests_total": len(test_cases),
        "execution_time_ms": max_time_ms,
        "memory_used_kb": max_memory_kb,
        "details": results,
    }