update IQA results

This commit is contained in:
Nguyễn Phước Thành
2025-09-09 12:51:55 +07:00
parent eb465155d1
commit eb0a12448f
32 changed files with 3551 additions and 4 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 18 KiB

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 82 KiB

After

Width:  |  Height:  |  Size: 82 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 38 KiB

After

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 41 KiB

After

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 56 KiB

After

Width:  |  Height:  |  Size: 118 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 46 KiB

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 71 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 115 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 75 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 116 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

View File

@@ -0,0 +1,50 @@
{
"positive_definition": "HIGH when score >= threshold",
"best_thresholds": {
"f1": {
"threshold": 1.6,
"value": 0.9249762583095917,
"confusion": {
"TP": 2922,
"FP": 474,
"FN": 0,
"TN": 0
}
},
"accuracy": {
"threshold": 1.6,
"value": 0.8604240282685512,
"confusion": {
"TP": 2922,
"FP": 474,
"FN": 0,
"TN": 0
}
},
"precision": {
"threshold": 4.2,
"value": 1.0,
"confusion": {
"TP": 3,
"FP": 0,
"FN": 2919,
"TN": 474
}
},
"recall": {
"threshold": 1.6,
"value": 1.0,
"confusion": {
"TP": 2922,
"FP": 474,
"FN": 0,
"TN": 0
}
}
},
"counts": {
"total": 3396,
"positives": 2922,
"negatives": 474
}
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 70 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 44 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 64 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 42 KiB

View File

@@ -130,12 +130,102 @@ def compute_metric_curves(scores: np.ndarray, y_true: np.ndarray) -> pd.DataFram
return pd.DataFrame(data).sort_values("threshold").reset_index(drop=True) return pd.DataFrame(data).sort_values("threshold").reset_index(drop=True)
def plot_distributions(df: pd.DataFrame, out_path: Path) -> None: def plot_distributions(
plt.figure(figsize=(8, 5)) df: pd.DataFrame,
sns.histplot(data=df, x="score", hue="label", bins=30, kde=True, stat="density", common_norm=False) out_path: Path,
threshold: float | None = None,
acc_at_thr: float | None = None,
f1_at_thr: float | None = None,
) -> None:
# Clean, white background without gray grid
sns.set_style("white")
plt.figure(figsize=(10, 6))
# Side-by-side bars (dodge) with wider bars
palette = {"High": "tab:blue", "Low": "tab:orange"}
sns.histplot(
data=df,
x="score",
hue="label",
bins=None,
binwidth=0.18,
kde=False,
stat="density",
common_norm=False,
multiple="dodge",
palette=palette,
element="bars",
shrink=0.85,
alpha=0.8,
edgecolor="white",
linewidth=0.5,
)
# KDE lines for High, Low, and All samples (three lines)
try:
high_scores = df.loc[df["label"] == "High", "score"].astype(float)
low_scores = df.loc[df["label"] == "Low", "score"].astype(float)
all_scores = df["score"].astype(float)
if len(high_scores) > 1:
sns.kdeplot(high_scores, color="tab:blue", linewidth=2.0, label="High density")
if len(low_scores) > 1:
sns.kdeplot(low_scores, color="tab:orange", linewidth=2.0, label="Low density")
if len(all_scores) > 1:
sns.kdeplot(all_scores, color="black", linewidth=2.2, linestyle="-", label="All density")
except Exception:
pass
# Threshold vertical line with styled annotation
if threshold is not None:
ax = plt.gca()
ax.axvline(threshold, color="red", linestyle=(0, (6, 4)), linewidth=2.0)
acc_str = f"{acc_at_thr:.3f}" if acc_at_thr is not None else "NA"
f1_str = f"{f1_at_thr:.3f}" if f1_at_thr is not None else "NA"
label_text = f"threshold={threshold:.3f} Accuracy={acc_str} F1={f1_str}"
ymax = ax.get_ylim()[1]
ax.text(
threshold + 0.02,
ymax * 0.97,
label_text,
color="red",
ha="left",
va="top",
fontsize=10,
bbox=dict(boxstyle="round,pad=0.3", facecolor="#ffecec", edgecolor="#ff9a9a", alpha=0.85),
)
# Add stats box in bottom-right: counts and mean/std per class and overall
try:
high_scores = df.loc[df["label"] == "High", "score"].astype(float)
low_scores = df.loc[df["label"] == "Low", "score"].astype(float)
n_high = int(high_scores.shape[0])
n_low = int(low_scores.shape[0])
mean_high = float(high_scores.mean()) if n_high > 0 else float("nan")
std_high = float(high_scores.std(ddof=1)) if n_high > 1 else float("nan")
mean_low = float(low_scores.mean()) if n_low > 0 else float("nan")
std_low = float(low_scores.std(ddof=1)) if n_low > 1 else float("nan")
all_scores = df["score"].astype(float)
mean_all = float(all_scores.mean()) if all_scores.shape[0] > 0 else float("nan")
std_all = float(all_scores.std(ddof=1)) if all_scores.shape[0] > 1 else float("nan")
stats_text = (
f"High: n={n_high}, \u03BC={mean_high:.3f}, \u03C3={std_high:.3f}\n"
f"Low: n={n_low}, \u03BC={mean_low:.3f}, \u03C3={std_low:.3f}\n"
f"All: n={n_high+n_low}, \u03BC={mean_all:.3f}, \u03C3={std_all:.3f}"
)
ax = plt.gca()
ax.text(
0.99, 0.02, stats_text,
transform=ax.transAxes,
ha="right", va="bottom",
fontsize=9,
bbox=dict(boxstyle="round,pad=0.5", facecolor="white", edgecolor="gray", alpha=0.95),
)
except Exception:
pass
plt.title("DeQA score distributions by label") plt.title("DeQA score distributions by label")
plt.xlabel("DeQA score") plt.xlabel("DeQA score")
plt.ylabel("Density") plt.ylabel("Density")
plt.legend()
plt.tight_layout() plt.tight_layout()
plt.savefig(out_path, dpi=150) plt.savefig(out_path, dpi=150)
plt.close() plt.close()
@@ -272,7 +362,17 @@ def main() -> None:
# Metric curves and figures # Metric curves and figures
curves = compute_metric_curves(scores, y_true) curves = compute_metric_curves(scores, y_true)
plot_distributions(df, outdir / "facture_score_distributions.png") # Accuracy and F1 at selected threshold for annotation
tp_f1, fp_f1, fn_f1, tn_f1 = confusion_from_threshold(scores, y_true, thr_f1)
acc_at_thr = metric_from_confusion(tp_f1, fp_f1, fn_f1, tn_f1, "accuracy")
f1_at_thr = metric_from_confusion(tp_f1, fp_f1, fn_f1, tn_f1, "f1")
plot_distributions(
df,
outdir / "facture_score_distributions.png",
threshold=thr_f1,
acc_at_thr=acc_at_thr,
f1_at_thr=f1_at_thr,
)
plot_metric_curves(curves, outdir / "facture_metric_curves.png") plot_metric_curves(curves, outdir / "facture_metric_curves.png")
# Extra plots # Extra plots
plot_sorted_scores_with_threshold(df, thr_f1, outdir / "facture_sorted_scores_with_thr.png") plot_sorted_scores_with_threshold(df, thr_f1, outdir / "facture_sorted_scores_with_thr.png")