Merge branch 'main' of https://code.rizlum.com/ngpthanh15/IQA-Metric-Benchmark

2025-09-12 08:03:17 +00:00
parent 0d6091fc0c 462754cfb0
commit 77b9784c32
37 changed files with 3831 additions and 4 deletions
--- a/scripts/threshold_analysis.py
+++ b/scripts/threshold_analysis.py
@@ -130,12 +130,312 @@ def compute_metric_curves(scores: np.ndarray, y_true: np.ndarray) -> pd.DataFram
    return pd.DataFrame(data).sort_values("threshold").reset_index(drop=True)


-def plot_distributions(df: pd.DataFrame, out_path: Path) -> None:
-    plt.figure(figsize=(8, 5))
-    sns.histplot(data=df, x="score", hue="label", bins=30, kde=True, stat="density", common_norm=False)
+def _robust_bandwidth(x: np.ndarray) -> float:
+    """Silverman-like robust bandwidth for Gaussian KDE."""
+    x = np.asarray(x, dtype=float)
+    n = len(x)
+    if n <= 1:
+        return 0.1 if n == 1 else 0.2
+    std = np.std(x, ddof=1)
+    iqr = np.subtract(*np.percentile(x, [75, 25]))
+    sigma = min(std, iqr / 1.34) if iqr > 0 else std
+    return 0.9 * sigma * n ** (-1/5)
+
+
+def _kde_gaussian(x: np.ndarray, grid: np.ndarray, bw: float | None = None) -> np.ndarray:
+    """Univariate Gaussian KDE evaluated on grid."""
+    x = np.asarray(x, dtype=float)
+    grid = np.asarray(grid, dtype=float)
+    if bw is None or bw <= 0:
+        bw = _robust_bandwidth(x)
+        if bw <= 0:
+            bw = max(1e-3, 0.1 * (np.max(x) - np.min(x) + 1e-6))
+    z = (grid[None, :] - x[:, None]) / bw
+    dens = np.exp(-0.5 * z * z) / (np.sqrt(2 * np.pi))
+    dens = dens.mean(axis=0) / bw
+    return dens
+
+
+def find_density_intersections(x_high: np.ndarray, x_low: np.ndarray) -> list[float]:
+    """Find x where KDE_high == KDE_low via linear interpolation on a fine grid."""
+    x_all = np.concatenate([x_high, x_low]).astype(float)
+    lo, hi = float(np.min(x_all)), float(np.max(x_all))
+    grid = np.linspace(lo, hi, 1024)
+    fH = _kde_gaussian(x_high, grid)
+    fL = _kde_gaussian(x_low, grid)
+    diff = fH - fL
+    s = np.sign(diff)
+    sign_change = np.where(np.diff(s) != 0)[0]
+    xs: list[float] = []
+    for i in sign_change:
+        x1, x2 = grid[i], grid[i + 1]
+        y1, y2 = diff[i], diff[i + 1]
+        if (y2 - y1) != 0:
+            xr = x1 - y1 * (x2 - x1) / (y2 - y1)
+            if lo <= xr <= hi:
+                xs.append(float(xr))
+    return xs
+
+
+def pick_density_threshold(df: pd.DataFrame) -> float | None:
+    """Pick 'prior-balanced' threshold at intersection near midpoint of class means."""
+    xH = df.loc[df["label"] == "High", "score"].astype(float).to_numpy()
+    xL = df.loc[df["label"] == "Low", "score"].astype(float).to_numpy()
+    if len(xH) < 2 or len(xL) < 2:
+        return None
+    inters = find_density_intersections(xH, xL)
+    if not inters:
+        return None
+    mH, mL = float(np.mean(xH)), float(np.mean(xL))
+    mid = 0.5 * (mH + mL)
+    thr = min(inters, key=lambda t: abs(t - mid))
+    return float(thr)
+
+
+def plot_distributions(
+    df: pd.DataFrame,
+    out_path: Path,
+    threshold: float | None = None,
+    acc_at_thr: float | None = None,
+    f1_at_thr: float | None = None,
+    density_thr: float | None = None,
+    density_acc: float | None = None,
+    density_f1: float | None = None,
+) -> None:
+    # Clean, white background without gray grid
+    sns.set_style("white")
+    plt.figure(figsize=(10, 6))
+    # Side-by-side bars (dodge) with wider bars
+    palette = {"High": "tab:blue", "Low": "tab:orange"}
+    sns.histplot(
+        data=df,
+        x="score",
+        hue="label",
+        bins=None,
+        binwidth=0.18,
+        kde=False,
+        stat="density",
+        common_norm=False,
+        multiple="dodge",
+        palette=palette,
+        element="bars",
+        shrink=0.85,
+        alpha=0.8,
+        edgecolor="white",
+        linewidth=0.5,
+    )
+
+    # KDE lines for High, Low, and All samples (three lines)
+    try:
+        high_scores = df.loc[df["label"] == "High", "score"].astype(float)
+        low_scores = df.loc[df["label"] == "Low", "score"].astype(float)
+        all_scores = df["score"].astype(float)
+        if len(high_scores) > 1:
+            sns.kdeplot(high_scores, color="tab:blue", linewidth=2.0, label="High density")
+        if len(low_scores) > 1:
+            sns.kdeplot(low_scores, color="tab:orange", linewidth=2.0, label="Low density")
+        if len(all_scores) > 1:
+            sns.kdeplot(all_scores, color="black", linewidth=2.2, linestyle="-", label="All density")
+    except Exception:
+        pass
+
+    # Threshold vertical line with styled annotation (F1-opt)
+    if threshold is not None:
+        ax = plt.gca()
+        ax.axvline(threshold, color="red", linestyle=(0, (6, 4)), linewidth=2.0)
+        acc_str = f"{acc_at_thr:.3f}" if acc_at_thr is not None else "NA"
+        f1_str = f"{f1_at_thr:.3f}" if f1_at_thr is not None else "NA"
+        label_text = f"threshold(F1)={threshold:.3f}  Accuracy={acc_str}  F1={f1_str}"
+        ymax = ax.get_ylim()[1]
+        ax.text(
+            threshold + 0.02,
+            ymax * 0.97,
+            label_text,
+            color="red",
+            ha="left",
+            va="top",
+            fontsize=10,
+            bbox=dict(boxstyle="round,pad=0.3", facecolor="#ffecec", edgecolor="#ff9a9a", alpha=0.85),
+        )
+
+    # Density-intersection threshold (purple)
+    if density_thr is not None:
+        ax = plt.gca()
+        ax.axvline(density_thr, color="purple", linestyle="--", linewidth=2.0)
+        ymax = ax.get_ylim()[1]
+        dens_acc_str = f"{density_acc:.3f}" if density_acc is not None else "NA"
+        dens_f1_str = f"{density_f1:.3f}" if density_f1 is not None else "NA"
+        ax.text(
+            density_thr + 0.02,
+            ymax * 0.90,
+            f"threshold(density)={density_thr:.3f}  Accuracy={dens_acc_str}  F1={dens_f1_str}",
+            color="purple",
+            ha="left",
+            va="top",
+            fontsize=10,
+            bbox=dict(boxstyle="round,pad=0.3", facecolor="#efe6ff", edgecolor="#b497ff", alpha=0.85),
+        )
+
+    # Add stats box in bottom-right: counts and mean/std per class and overall
+    try:
+        high_scores = df.loc[df["label"] == "High", "score"].astype(float)
+        low_scores = df.loc[df["label"] == "Low", "score"].astype(float)
+        n_high = int(high_scores.shape[0])
+        n_low = int(low_scores.shape[0])
+        mean_high = float(high_scores.mean()) if n_high > 0 else float("nan")
+        std_high = float(high_scores.std(ddof=1)) if n_high > 1 else float("nan")
+        mean_low = float(low_scores.mean()) if n_low > 0 else float("nan")
+        std_low = float(low_scores.std(ddof=1)) if n_low > 1 else float("nan")
+        all_scores = df["score"].astype(float)
+        mean_all = float(all_scores.mean()) if all_scores.shape[0] > 0 else float("nan")
+        std_all = float(all_scores.std(ddof=1)) if all_scores.shape[0] > 1 else float("nan")
+        stats_text = (
+            f"High: n={n_high}, \u03BC={mean_high:.3f}, \u03C3={std_high:.3f}\n"
+            f"Low:  n={n_low}, \u03BC={mean_low:.3f}, \u03C3={std_low:.3f}\n"
+            f"All:  n={n_high+n_low}, \u03BC={mean_all:.3f}, \u03C3={std_all:.3f}"
+        )
+        ax = plt.gca()
+        ax.text(
+            0.99, 0.02, stats_text,
+            transform=ax.transAxes,
+            ha="right", va="bottom",
+            fontsize=9,
+            bbox=dict(boxstyle="round,pad=0.5", facecolor="white", edgecolor="gray", alpha=0.95),
+        )
+    except Exception:
+        pass
+
    plt.title("DeQA score distributions by label")
    plt.xlabel("DeQA score")
    plt.ylabel("Density")
+    plt.legend()
+    plt.tight_layout()
+    plt.savefig(out_path, dpi=150)
+    plt.close()
+
+
+def plot_distributions_count(
+    df: pd.DataFrame,
+    out_path: Path,
+    threshold: float | None = None,
+    acc_at_thr: float | None = None,
+    f1_at_thr: float | None = None,
+    density_thr: float | None = None,
+    density_acc: float | None = None,
+    density_f1: float | None = None,
+) -> None:
+    sns.set_style("white")
+    plt.figure(figsize=(10, 6))
+    palette = {"High": "tab:blue", "Low": "tab:orange"}
+    used_binwidth = 0.18
+    ax = plt.gca()
+    sns.histplot(
+        data=df,
+        x="score",
+        hue="label",
+        bins=None,
+        binwidth=used_binwidth,
+        kde=False,
+        stat="count",
+        common_norm=False,
+        multiple="dodge",
+        palette=palette,
+        element="bars",
+        shrink=0.85,
+        alpha=0.8,
+        edgecolor="white",
+        linewidth=0.5,
+        ax=ax,
+    )
+
+    # KDE lines for High, Low, and All, scaled to counts
+    try:
+        high_scores = df.loc[df["label"] == "High", "score"].astype(float)
+        low_scores = df.loc[df["label"] == "Low", "score"].astype(float)
+        all_scores = df["score"].astype(float)
+        if len(high_scores) > 1:
+            sns.kdeplot(high_scores, color="tab:blue", linewidth=2.0, label="High KDE (count)", ax=ax)
+            line = ax.lines[-1]
+            x, y = line.get_data()
+            line.set_data(x, y * len(high_scores) * used_binwidth)
+        if len(low_scores) > 1:
+            sns.kdeplot(low_scores, color="tab:orange", linewidth=2.0, label="Low KDE (count)", ax=ax)
+            line = ax.lines[-1]
+            x, y = line.get_data()
+            line.set_data(x, y * len(low_scores) * used_binwidth)
+        if len(all_scores) > 1:
+            sns.kdeplot(all_scores, color="black", linewidth=2.2, linestyle="-", label="All KDE (count)", ax=ax)
+            line = ax.lines[-1]
+            x, y = line.get_data()
+            line.set_data(x, y * len(all_scores) * used_binwidth)
+    except Exception:
+        pass
+
+    if threshold is not None:
+        ax.axvline(threshold, color="red", linestyle=(0, (6, 4)), linewidth=2.0)
+        acc_str = f"{acc_at_thr:.3f}" if acc_at_thr is not None else "NA"
+        f1_str = f"{f1_at_thr:.3f}" if f1_at_thr is not None else "NA"
+        label_text = f"threshold(F1)={threshold:.3f}  Accuracy={acc_str}  F1={f1_str}"
+        ymax = ax.get_ylim()[1]
+        ax.text(
+            threshold + 0.02,
+            ymax * 0.97,
+            label_text,
+            color="red",
+            ha="left",
+            va="top",
+            fontsize=10,
+            bbox=dict(boxstyle="round,pad=0.3", facecolor="#ffecec", edgecolor="#ff9a9a", alpha=0.85),
+        )
+
+    if density_thr is not None:
+        ax.axvline(density_thr, color="purple", linestyle="--", linewidth=2.0)
+        ymax = ax.get_ylim()[1]
+        dens_acc_str = f"{density_acc:.3f}" if density_acc is not None else "NA"
+        dens_f1_str = f"{density_f1:.3f}" if density_f1 is not None else "NA"
+        ax.text(
+            density_thr + 0.02,
+            ymax * 0.90,
+            f"threshold(density)={density_thr:.3f}  Accuracy={dens_acc_str}  F1={dens_f1_str}",
+            color="purple",
+            ha="left",
+            va="top",
+            fontsize=10,
+            bbox=dict(boxstyle="round,pad=0.3", facecolor="#efe6ff", edgecolor="#b497ff", alpha=0.85),
+        )
+
+    # Stats box
+    try:
+        high_scores = df.loc[df["label"] == "High", "score"].astype(float)
+        low_scores = df.loc[df["label"] == "Low", "score"].astype(float)
+        n_high = int(high_scores.shape[0])
+        n_low = int(low_scores.shape[0])
+        mean_high = float(high_scores.mean()) if n_high > 0 else float("nan")
+        std_high = float(high_scores.std(ddof=1)) if n_high > 1 else float("nan")
+        mean_low = float(low_scores.mean()) if n_low > 0 else float("nan")
+        std_low = float(low_scores.std(ddof=1)) if n_low > 1 else float("nan")
+        all_scores = df["score"].astype(float)
+        mean_all = float(all_scores.mean()) if all_scores.shape[0] > 0 else float("nan")
+        std_all = float(all_scores.std(ddof=1)) if all_scores.shape[0] > 1 else float("nan")
+        stats_text = (
+            f"High: n={n_high}, \u03BC={mean_high:.3f}, \u03C3={std_high:.3f}\n"
+            f"Low:  n={n_low}, \u03BC={mean_low:.3f}, \u03C3={std_low:.3f}\n"
+            f"All:  n={n_high+n_low}, \u03BC={mean_all:.3f}, \u03C3={std_all:.3f}"
+        )
+        ax.text(
+            0.99, 0.02, stats_text,
+            transform=ax.transAxes,
+            ha="right", va="bottom",
+            fontsize=9,
+            bbox=dict(boxstyle="round,pad=0.5", facecolor="white", edgecolor="gray", alpha=0.95),
+        )
+    except Exception:
+        pass
+
+    plt.title("DeQA score distributions by label (counts)")
+    plt.xlabel("DeQA score")
+    plt.ylabel("Count")
+    plt.legend()
    plt.tight_layout()
    plt.savefig(out_path, dpi=150)
    plt.close()
@@ -255,6 +555,17 @@ def main() -> None:
    thr_prec, best_prec, conf_prec = pick_threshold(scores, y_true, metric="precision")
    thr_rec, best_rec, conf_rec = pick_threshold(scores, y_true, metric="recall")

+    # New: density-intersection threshold
+    density_thr = pick_density_threshold(df)
+    if density_thr is not None:
+        tp_d, fp_d, fn_d, tn_d = confusion_from_threshold(scores, y_true, density_thr)
+        acc_at_density = metric_from_confusion(tp_d, fp_d, fn_d, tn_d, "accuracy")
+        f1_at_density = metric_from_confusion(tp_d, fp_d, fn_d, tn_d, "f1")
+    else:
+        tp_d = fp_d = fn_d = tn_d = None
+        acc_at_density = None
+        f1_at_density = None
+
    summary = {
        "positive_definition": "HIGH when score >= threshold",
        "best_thresholds": {
@@ -262,6 +573,13 @@ def main() -> None:
            "accuracy": {"threshold": thr_acc, "value": best_acc, "confusion": conf_acc},
            "precision": {"threshold": thr_prec, "value": best_prec, "confusion": conf_prec},
            "recall": {"threshold": thr_rec, "value": best_rec, "confusion": conf_rec},
+            "density_intersection": {
+                "threshold": density_thr,
+                "acc": acc_at_density,
+                "f1": f1_at_density,
+                "confusion": {"TP": tp_d, "FP": fp_d, "FN": fn_d, "TN": tn_d} if density_thr is not None else None,
+                "notes": "Intersection of KDE(High) and KDE(Low), equal prior decision boundary",
+            },
        },
        "counts": {
            "total": int(len(df)),
@@ -272,7 +590,31 @@ def main() -> None:

    # Metric curves and figures
    curves = compute_metric_curves(scores, y_true)
-    plot_distributions(df, outdir / "facture_score_distributions.png")
+    # Accuracy and F1 at selected threshold for annotation
+    tp_f1, fp_f1, fn_f1, tn_f1 = confusion_from_threshold(scores, y_true, thr_f1)
+    acc_at_thr = metric_from_confusion(tp_f1, fp_f1, fn_f1, tn_f1, "accuracy")
+    f1_at_thr = metric_from_confusion(tp_f1, fp_f1, fn_f1, tn_f1, "f1")
+    plot_distributions(
+        df,
+        outdir / "facture_score_distributions.png",
+        threshold=thr_f1,
+        acc_at_thr=acc_at_thr,
+        f1_at_thr=f1_at_thr,
+        density_thr=density_thr,
+        density_acc=acc_at_density,
+        density_f1=f1_at_density,
+    )
+    # New: counts version
+    plot_distributions_count(
+        df,
+        outdir / "facture_score_distributions_count.png",
+        threshold=thr_f1,
+        acc_at_thr=acc_at_thr,
+        f1_at_thr=f1_at_thr,
+        density_thr=density_thr,
+        density_acc=acc_at_density,
+        density_f1=f1_at_density,
+    )
    plot_metric_curves(curves, outdir / "facture_metric_curves.png")
    # Extra plots
    plot_sorted_scores_with_threshold(df, thr_f1, outdir / "facture_sorted_scores_with_thr.png")
@@ -319,6 +661,8 @@ def main() -> None:
    for k in ["f1", "accuracy", "precision", "recall"]:
        info = summary["best_thresholds"][k]
        print(f"- {k}: thr={info['threshold']:.3f}, value={info['value']:.3f}, conf={info['confusion']}")
+    if density_thr is not None:
+        print(f"- density_threshold: {density_thr:.3f}")


 if __name__ == "__main__":