diff --git a/docs/task/cni/results/facture_confusion_matrix.png b/docs/task/cni/results/facture_confusion_matrix.png index 9455892..9c84b87 100644 Binary files a/docs/task/cni/results/facture_confusion_matrix.png and b/docs/task/cni/results/facture_confusion_matrix.png differ diff --git a/docs/task/cni/results/facture_deqa_images.xlsx b/docs/task/cni/results/facture_deqa_images.xlsx index a892850..6943602 100644 Binary files a/docs/task/cni/results/facture_deqa_images.xlsx and b/docs/task/cni/results/facture_deqa_images.xlsx differ diff --git a/docs/task/cni/results/facture_metric_curves.png b/docs/task/cni/results/facture_metric_curves.png index e03e65a..487a888 100644 Binary files a/docs/task/cni/results/facture_metric_curves.png and b/docs/task/cni/results/facture_metric_curves.png differ diff --git a/docs/task/cni/results/facture_precision_recall_curve.png b/docs/task/cni/results/facture_precision_recall_curve.png index 51c2791..e7945ac 100644 Binary files a/docs/task/cni/results/facture_precision_recall_curve.png and b/docs/task/cni/results/facture_precision_recall_curve.png differ diff --git a/docs/task/cni/results/facture_roc_like_curve.png b/docs/task/cni/results/facture_roc_like_curve.png index d3f9abd..6bdafe8 100644 Binary files a/docs/task/cni/results/facture_roc_like_curve.png and b/docs/task/cni/results/facture_roc_like_curve.png differ diff --git a/docs/task/cni/results/facture_score_distributions.png b/docs/task/cni/results/facture_score_distributions.png index 0b0caa0..05e88e4 100644 Binary files a/docs/task/cni/results/facture_score_distributions.png and b/docs/task/cni/results/facture_score_distributions.png differ diff --git a/docs/task/cni/results/facture_score_distributions_count.png b/docs/task/cni/results/facture_score_distributions_count.png new file mode 100644 index 0000000..9e0c0e9 Binary files /dev/null and b/docs/task/cni/results/facture_score_distributions_count.png differ diff --git a/docs/task/cni/results/facture_sorted_scores_with_thr.png b/docs/task/cni/results/facture_sorted_scores_with_thr.png index 90c55f9..6ccb237 100644 Binary files a/docs/task/cni/results/facture_sorted_scores_with_thr.png and b/docs/task/cni/results/facture_sorted_scores_with_thr.png differ diff --git a/docs/task/cni/results/facture_thresholds_summary.json b/docs/task/cni/results/facture_thresholds_summary.json index 1b632c4..f06eacd 100644 --- a/docs/task/cni/results/facture_thresholds_summary.json +++ b/docs/task/cni/results/facture_thresholds_summary.json @@ -40,6 +40,18 @@ "FN": 0, "TN": 4 } + }, + "density_intersection": { + "threshold": 3.1280716580085004, + "acc": 0.6666666666666666, + "f1": 0.7291666666666666, + "confusion": { + "TP": 35, + "FP": 2, + "FN": 24, + "TN": 17 + }, + "notes": "Intersection of KDE(High) and KDE(Low), equal prior decision boundary" } }, "counts": { diff --git a/docs/task/facture/200_samples/facture_confusion_matrix.png b/docs/task/facture/200_samples/facture_confusion_matrix.png index 857d112..6f04c1f 100644 Binary files a/docs/task/facture/200_samples/facture_confusion_matrix.png and b/docs/task/facture/200_samples/facture_confusion_matrix.png differ diff --git a/docs/task/facture/200_samples/facture_deqa_images.xlsx b/docs/task/facture/200_samples/facture_deqa_images.xlsx index ef0ba95..4a75812 100644 Binary files a/docs/task/facture/200_samples/facture_deqa_images.xlsx and b/docs/task/facture/200_samples/facture_deqa_images.xlsx differ diff --git a/docs/task/facture/200_samples/facture_metric_curves.png b/docs/task/facture/200_samples/facture_metric_curves.png index 286145a..d976092 100644 Binary files a/docs/task/facture/200_samples/facture_metric_curves.png and b/docs/task/facture/200_samples/facture_metric_curves.png differ diff --git a/docs/task/facture/200_samples/facture_precision_recall_curve.png b/docs/task/facture/200_samples/facture_precision_recall_curve.png index a03cefe..40ed2c9 100644 Binary files a/docs/task/facture/200_samples/facture_precision_recall_curve.png and b/docs/task/facture/200_samples/facture_precision_recall_curve.png differ diff --git a/docs/task/facture/200_samples/facture_roc_like_curve.png b/docs/task/facture/200_samples/facture_roc_like_curve.png index 44e9023..32685c9 100644 Binary files a/docs/task/facture/200_samples/facture_roc_like_curve.png and b/docs/task/facture/200_samples/facture_roc_like_curve.png differ diff --git a/docs/task/facture/200_samples/facture_score_distributions.png b/docs/task/facture/200_samples/facture_score_distributions.png index 86ca2d3..fb645ee 100644 Binary files a/docs/task/facture/200_samples/facture_score_distributions.png and b/docs/task/facture/200_samples/facture_score_distributions.png differ diff --git a/docs/task/facture/200_samples/facture_score_distributions_count.png b/docs/task/facture/200_samples/facture_score_distributions_count.png new file mode 100644 index 0000000..557d087 Binary files /dev/null and b/docs/task/facture/200_samples/facture_score_distributions_count.png differ diff --git a/docs/task/facture/200_samples/facture_sorted_scores_with_thr.png b/docs/task/facture/200_samples/facture_sorted_scores_with_thr.png index 114a306..1a46234 100644 Binary files a/docs/task/facture/200_samples/facture_sorted_scores_with_thr.png and b/docs/task/facture/200_samples/facture_sorted_scores_with_thr.png differ diff --git a/docs/task/facture/200_samples/facture_thresholds_summary.json b/docs/task/facture/200_samples/facture_thresholds_summary.json index 6d7f040..a6b5f25 100644 --- a/docs/task/facture/200_samples/facture_thresholds_summary.json +++ b/docs/task/facture/200_samples/facture_thresholds_summary.json @@ -40,6 +40,18 @@ "FN": 0, "TN": 0 } + }, + "density_intersection": { + "threshold": 3.2458209887076017, + "acc": 0.595, + "f1": 0.6823529411764706, + "confusion": { + "TP": 87, + "FP": 68, + "FN": 13, + "TN": 32 + }, + "notes": "Intersection of KDE(High) and KDE(Low), equal prior decision boundary" } }, "counts": { diff --git a/docs/task/facture/full_samples/facture_confusion_matrix.png b/docs/task/facture/full_samples/facture_confusion_matrix.png index b4a58fe..2726ff4 100644 Binary files a/docs/task/facture/full_samples/facture_confusion_matrix.png and b/docs/task/facture/full_samples/facture_confusion_matrix.png differ diff --git a/docs/task/facture/full_samples/facture_deqa_images.xlsx b/docs/task/facture/full_samples/facture_deqa_images.xlsx index 7017117..c2bfebf 100644 Binary files a/docs/task/facture/full_samples/facture_deqa_images.xlsx and b/docs/task/facture/full_samples/facture_deqa_images.xlsx differ diff --git a/docs/task/facture/full_samples/facture_metric_curves.png b/docs/task/facture/full_samples/facture_metric_curves.png index 961a5d9..b309d7a 100644 Binary files a/docs/task/facture/full_samples/facture_metric_curves.png and b/docs/task/facture/full_samples/facture_metric_curves.png differ diff --git a/docs/task/facture/full_samples/facture_precision_recall_curve.png b/docs/task/facture/full_samples/facture_precision_recall_curve.png index e28d7e3..9eaedab 100644 Binary files a/docs/task/facture/full_samples/facture_precision_recall_curve.png and b/docs/task/facture/full_samples/facture_precision_recall_curve.png differ diff --git a/docs/task/facture/full_samples/facture_roc_like_curve.png b/docs/task/facture/full_samples/facture_roc_like_curve.png index ee1d6ce..cbf0472 100644 Binary files a/docs/task/facture/full_samples/facture_roc_like_curve.png and b/docs/task/facture/full_samples/facture_roc_like_curve.png differ diff --git a/docs/task/facture/full_samples/facture_score_distributions.png b/docs/task/facture/full_samples/facture_score_distributions.png index bfa80ab..537851e 100644 Binary files a/docs/task/facture/full_samples/facture_score_distributions.png and b/docs/task/facture/full_samples/facture_score_distributions.png differ diff --git a/docs/task/facture/full_samples/facture_score_distributions_count.png b/docs/task/facture/full_samples/facture_score_distributions_count.png new file mode 100644 index 0000000..e762a30 Binary files /dev/null and b/docs/task/facture/full_samples/facture_score_distributions_count.png differ diff --git a/docs/task/facture/full_samples/facture_sorted_scores_with_thr.png b/docs/task/facture/full_samples/facture_sorted_scores_with_thr.png index d410fbe..8221da0 100644 Binary files a/docs/task/facture/full_samples/facture_sorted_scores_with_thr.png and b/docs/task/facture/full_samples/facture_sorted_scores_with_thr.png differ diff --git a/docs/task/facture/full_samples/facture_thresholds_summary.json b/docs/task/facture/full_samples/facture_thresholds_summary.json index 7016154..86b10b5 100644 --- a/docs/task/facture/full_samples/facture_thresholds_summary.json +++ b/docs/task/facture/full_samples/facture_thresholds_summary.json @@ -40,6 +40,18 @@ "FN": 0, "TN": 0 } + }, + "density_intersection": { + "threshold": 3.260110186564513, + "acc": 0.7977031802120141, + "f1": 0.8818166179253397, + "confusion": { + "TP": 2563, + "FP": 328, + "FN": 359, + "TN": 146 + }, + "notes": "Intersection of KDE(High) and KDE(Low), equal prior decision boundary" } }, "counts": { diff --git a/scripts/threshold_analysis.py b/scripts/threshold_analysis.py index 0a8d5a2..26b887a 100644 --- a/scripts/threshold_analysis.py +++ b/scripts/threshold_analysis.py @@ -130,12 +130,77 @@ def compute_metric_curves(scores: np.ndarray, y_true: np.ndarray) -> pd.DataFram return pd.DataFrame(data).sort_values("threshold").reset_index(drop=True) +def _robust_bandwidth(x: np.ndarray) -> float: + """Silverman-like robust bandwidth for Gaussian KDE.""" + x = np.asarray(x, dtype=float) + n = len(x) + if n <= 1: + return 0.1 if n == 1 else 0.2 + std = np.std(x, ddof=1) + iqr = np.subtract(*np.percentile(x, [75, 25])) + sigma = min(std, iqr / 1.34) if iqr > 0 else std + return 0.9 * sigma * n ** (-1/5) + + +def _kde_gaussian(x: np.ndarray, grid: np.ndarray, bw: float | None = None) -> np.ndarray: + """Univariate Gaussian KDE evaluated on grid.""" + x = np.asarray(x, dtype=float) + grid = np.asarray(grid, dtype=float) + if bw is None or bw <= 0: + bw = _robust_bandwidth(x) + if bw <= 0: + bw = max(1e-3, 0.1 * (np.max(x) - np.min(x) + 1e-6)) + z = (grid[None, :] - x[:, None]) / bw + dens = np.exp(-0.5 * z * z) / (np.sqrt(2 * np.pi)) + dens = dens.mean(axis=0) / bw + return dens + + +def find_density_intersections(x_high: np.ndarray, x_low: np.ndarray) -> list[float]: + """Find x where KDE_high == KDE_low via linear interpolation on a fine grid.""" + x_all = np.concatenate([x_high, x_low]).astype(float) + lo, hi = float(np.min(x_all)), float(np.max(x_all)) + grid = np.linspace(lo, hi, 1024) + fH = _kde_gaussian(x_high, grid) + fL = _kde_gaussian(x_low, grid) + diff = fH - fL + s = np.sign(diff) + sign_change = np.where(np.diff(s) != 0)[0] + xs: list[float] = [] + for i in sign_change: + x1, x2 = grid[i], grid[i + 1] + y1, y2 = diff[i], diff[i + 1] + if (y2 - y1) != 0: + xr = x1 - y1 * (x2 - x1) / (y2 - y1) + if lo <= xr <= hi: + xs.append(float(xr)) + return xs + + +def pick_density_threshold(df: pd.DataFrame) -> float | None: + """Pick 'prior-balanced' threshold at intersection near midpoint of class means.""" + xH = df.loc[df["label"] == "High", "score"].astype(float).to_numpy() + xL = df.loc[df["label"] == "Low", "score"].astype(float).to_numpy() + if len(xH) < 2 or len(xL) < 2: + return None + inters = find_density_intersections(xH, xL) + if not inters: + return None + mH, mL = float(np.mean(xH)), float(np.mean(xL)) + mid = 0.5 * (mH + mL) + thr = min(inters, key=lambda t: abs(t - mid)) + return float(thr) + + def plot_distributions( df: pd.DataFrame, out_path: Path, threshold: float | None = None, acc_at_thr: float | None = None, f1_at_thr: float | None = None, + density_thr: float | None = None, + density_acc: float | None = None, + density_f1: float | None = None, ) -> None: # Clean, white background without gray grid sns.set_style("white") @@ -159,7 +224,7 @@ def plot_distributions( edgecolor="white", linewidth=0.5, ) - + # KDE lines for High, Low, and All samples (three lines) try: high_scores = df.loc[df["label"] == "High", "score"].astype(float) @@ -173,14 +238,14 @@ def plot_distributions( sns.kdeplot(all_scores, color="black", linewidth=2.2, linestyle="-", label="All density") except Exception: pass - - # Threshold vertical line with styled annotation + + # Threshold vertical line with styled annotation (F1-opt) if threshold is not None: ax = plt.gca() ax.axvline(threshold, color="red", linestyle=(0, (6, 4)), linewidth=2.0) acc_str = f"{acc_at_thr:.3f}" if acc_at_thr is not None else "NA" f1_str = f"{f1_at_thr:.3f}" if f1_at_thr is not None else "NA" - label_text = f"threshold={threshold:.3f} Accuracy={acc_str} F1={f1_str}" + label_text = f"threshold(F1)={threshold:.3f} Accuracy={acc_str} F1={f1_str}" ymax = ax.get_ylim()[1] ax.text( threshold + 0.02, @@ -192,7 +257,25 @@ def plot_distributions( fontsize=10, bbox=dict(boxstyle="round,pad=0.3", facecolor="#ffecec", edgecolor="#ff9a9a", alpha=0.85), ) - + + # Density-intersection threshold (purple) + if density_thr is not None: + ax = plt.gca() + ax.axvline(density_thr, color="purple", linestyle="--", linewidth=2.0) + ymax = ax.get_ylim()[1] + dens_acc_str = f"{density_acc:.3f}" if density_acc is not None else "NA" + dens_f1_str = f"{density_f1:.3f}" if density_f1 is not None else "NA" + ax.text( + density_thr + 0.02, + ymax * 0.90, + f"threshold(density)={density_thr:.3f} Accuracy={dens_acc_str} F1={dens_f1_str}", + color="purple", + ha="left", + va="top", + fontsize=10, + bbox=dict(boxstyle="round,pad=0.3", facecolor="#efe6ff", edgecolor="#b497ff", alpha=0.85), + ) + # Add stats box in bottom-right: counts and mean/std per class and overall try: high_scores = df.loc[df["label"] == "High", "score"].astype(float) @@ -221,7 +304,7 @@ def plot_distributions( ) except Exception: pass - + plt.title("DeQA score distributions by label") plt.xlabel("DeQA score") plt.ylabel("Density") @@ -231,6 +314,133 @@ def plot_distributions( plt.close() +def plot_distributions_count( + df: pd.DataFrame, + out_path: Path, + threshold: float | None = None, + acc_at_thr: float | None = None, + f1_at_thr: float | None = None, + density_thr: float | None = None, + density_acc: float | None = None, + density_f1: float | None = None, +) -> None: + sns.set_style("white") + plt.figure(figsize=(10, 6)) + palette = {"High": "tab:blue", "Low": "tab:orange"} + used_binwidth = 0.18 + ax = plt.gca() + sns.histplot( + data=df, + x="score", + hue="label", + bins=None, + binwidth=used_binwidth, + kde=False, + stat="count", + common_norm=False, + multiple="dodge", + palette=palette, + element="bars", + shrink=0.85, + alpha=0.8, + edgecolor="white", + linewidth=0.5, + ax=ax, + ) + + # KDE lines for High, Low, and All, scaled to counts + try: + high_scores = df.loc[df["label"] == "High", "score"].astype(float) + low_scores = df.loc[df["label"] == "Low", "score"].astype(float) + all_scores = df["score"].astype(float) + if len(high_scores) > 1: + sns.kdeplot(high_scores, color="tab:blue", linewidth=2.0, label="High KDE (count)", ax=ax) + line = ax.lines[-1] + x, y = line.get_data() + line.set_data(x, y * len(high_scores) * used_binwidth) + if len(low_scores) > 1: + sns.kdeplot(low_scores, color="tab:orange", linewidth=2.0, label="Low KDE (count)", ax=ax) + line = ax.lines[-1] + x, y = line.get_data() + line.set_data(x, y * len(low_scores) * used_binwidth) + if len(all_scores) > 1: + sns.kdeplot(all_scores, color="black", linewidth=2.2, linestyle="-", label="All KDE (count)", ax=ax) + line = ax.lines[-1] + x, y = line.get_data() + line.set_data(x, y * len(all_scores) * used_binwidth) + except Exception: + pass + + if threshold is not None: + ax.axvline(threshold, color="red", linestyle=(0, (6, 4)), linewidth=2.0) + acc_str = f"{acc_at_thr:.3f}" if acc_at_thr is not None else "NA" + f1_str = f"{f1_at_thr:.3f}" if f1_at_thr is not None else "NA" + label_text = f"threshold(F1)={threshold:.3f} Accuracy={acc_str} F1={f1_str}" + ymax = ax.get_ylim()[1] + ax.text( + threshold + 0.02, + ymax * 0.97, + label_text, + color="red", + ha="left", + va="top", + fontsize=10, + bbox=dict(boxstyle="round,pad=0.3", facecolor="#ffecec", edgecolor="#ff9a9a", alpha=0.85), + ) + + if density_thr is not None: + ax.axvline(density_thr, color="purple", linestyle="--", linewidth=2.0) + ymax = ax.get_ylim()[1] + dens_acc_str = f"{density_acc:.3f}" if density_acc is not None else "NA" + dens_f1_str = f"{density_f1:.3f}" if density_f1 is not None else "NA" + ax.text( + density_thr + 0.02, + ymax * 0.90, + f"threshold(density)={density_thr:.3f} Accuracy={dens_acc_str} F1={dens_f1_str}", + color="purple", + ha="left", + va="top", + fontsize=10, + bbox=dict(boxstyle="round,pad=0.3", facecolor="#efe6ff", edgecolor="#b497ff", alpha=0.85), + ) + + # Stats box + try: + high_scores = df.loc[df["label"] == "High", "score"].astype(float) + low_scores = df.loc[df["label"] == "Low", "score"].astype(float) + n_high = int(high_scores.shape[0]) + n_low = int(low_scores.shape[0]) + mean_high = float(high_scores.mean()) if n_high > 0 else float("nan") + std_high = float(high_scores.std(ddof=1)) if n_high > 1 else float("nan") + mean_low = float(low_scores.mean()) if n_low > 0 else float("nan") + std_low = float(low_scores.std(ddof=1)) if n_low > 1 else float("nan") + all_scores = df["score"].astype(float) + mean_all = float(all_scores.mean()) if all_scores.shape[0] > 0 else float("nan") + std_all = float(all_scores.std(ddof=1)) if all_scores.shape[0] > 1 else float("nan") + stats_text = ( + f"High: n={n_high}, \u03BC={mean_high:.3f}, \u03C3={std_high:.3f}\n" + f"Low: n={n_low}, \u03BC={mean_low:.3f}, \u03C3={std_low:.3f}\n" + f"All: n={n_high+n_low}, \u03BC={mean_all:.3f}, \u03C3={std_all:.3f}" + ) + ax.text( + 0.99, 0.02, stats_text, + transform=ax.transAxes, + ha="right", va="bottom", + fontsize=9, + bbox=dict(boxstyle="round,pad=0.5", facecolor="white", edgecolor="gray", alpha=0.95), + ) + except Exception: + pass + + plt.title("DeQA score distributions by label (counts)") + plt.xlabel("DeQA score") + plt.ylabel("Count") + plt.legend() + plt.tight_layout() + plt.savefig(out_path, dpi=150) + plt.close() + + def plot_metric_curves(curve_df: pd.DataFrame, out_path: Path) -> None: plt.figure(figsize=(8, 5)) for metric in ["accuracy", "precision", "recall", "f1"]: @@ -345,6 +555,17 @@ def main() -> None: thr_prec, best_prec, conf_prec = pick_threshold(scores, y_true, metric="precision") thr_rec, best_rec, conf_rec = pick_threshold(scores, y_true, metric="recall") + # New: density-intersection threshold + density_thr = pick_density_threshold(df) + if density_thr is not None: + tp_d, fp_d, fn_d, tn_d = confusion_from_threshold(scores, y_true, density_thr) + acc_at_density = metric_from_confusion(tp_d, fp_d, fn_d, tn_d, "accuracy") + f1_at_density = metric_from_confusion(tp_d, fp_d, fn_d, tn_d, "f1") + else: + tp_d = fp_d = fn_d = tn_d = None + acc_at_density = None + f1_at_density = None + summary = { "positive_definition": "HIGH when score >= threshold", "best_thresholds": { @@ -352,6 +573,13 @@ def main() -> None: "accuracy": {"threshold": thr_acc, "value": best_acc, "confusion": conf_acc}, "precision": {"threshold": thr_prec, "value": best_prec, "confusion": conf_prec}, "recall": {"threshold": thr_rec, "value": best_rec, "confusion": conf_rec}, + "density_intersection": { + "threshold": density_thr, + "acc": acc_at_density, + "f1": f1_at_density, + "confusion": {"TP": tp_d, "FP": fp_d, "FN": fn_d, "TN": tn_d} if density_thr is not None else None, + "notes": "Intersection of KDE(High) and KDE(Low), equal prior decision boundary", + }, }, "counts": { "total": int(len(df)), @@ -372,6 +600,20 @@ def main() -> None: threshold=thr_f1, acc_at_thr=acc_at_thr, f1_at_thr=f1_at_thr, + density_thr=density_thr, + density_acc=acc_at_density, + density_f1=f1_at_density, + ) + # New: counts version + plot_distributions_count( + df, + outdir / "facture_score_distributions_count.png", + threshold=thr_f1, + acc_at_thr=acc_at_thr, + f1_at_thr=f1_at_thr, + density_thr=density_thr, + density_acc=acc_at_density, + density_f1=f1_at_density, ) plot_metric_curves(curves, outdir / "facture_metric_curves.png") # Extra plots @@ -419,6 +661,8 @@ def main() -> None: for k in ["f1", "accuracy", "precision", "recall"]: info = summary["best_thresholds"][k] print(f"- {k}: thr={info['threshold']:.3f}, value={info['value']:.3f}, conf={info['confusion']}") + if density_thr is not None: + print(f"- density_threshold: {density_thr:.3f}") if __name__ == "__main__":