Source code for app.backend.claim_confidence
"""Per-claim confidence via NLI self-entailment.
For every decomposed claim we compute P(response entails claim) using the
ModernBERT-large-nli scorer already loaded for Kernel Language Entropy. The
response is used as the premise and each claim as the hypothesis, so a high
score means the model's own output supports the claim.
The score is `P(entailment) + 0.5 * P(neutral)`, mirroring the KLE similarity
weighting. Because the three NLI probabilities sum to 1, this one-direction
score is bounded in [0, 1] (unlike the bidirectional KLE W matrix in [0, 2]).
We only use the response -> claim direction: the reverse direction is
uninformative here because a short claim generally cannot entail the full
response, so every score would collapse toward neutral/contradiction.
This is the degenerate single-sample case of SelfCheckGPT-NLI (Manakul et
al., EMNLP 2023): rather than sampling K responses and averaging entailment,
we treat the produced response as the sole reference context.
"""
from typing import Any
[docs]
def score_to_metrics(score: float) -> dict:
"""Map a [0, 1] confidence score into the API's {confidence, level, guidance} dict."""
score = round(float(score), 2)
if score >= 0.80:
return {"confidence": score, "level": "high", "guidance": ""}
if score >= 0.65:
return {
"confidence": score,
"level": "moderate",
"guidance": "Verify with clinical reference",
}
return {
"confidence": score,
"level": "low",
"guidance": "Cross-check with authoritative source before acting",
}
[docs]
def compute_claim_confidences(
response: str,
claims: list[str],
bert_model: Any,
bert_tokenizer: Any,
) -> list[dict]:
"""Score every claim against `response` with NLI.
Returns one {confidence, level, guidance} dict per claim, in the same
order as `claims`. Raises if the NLI forward pass fails; the caller is
expected to treat the claim ledger as unavailable on exception.
"""
if not claims:
return []
from kernel_entropy.nli import (
LABEL_ENTAILMENT,
LABEL_NEUTRAL,
ModernBERTScorer,
)
scorer = ModernBERTScorer(
sentences=claims,
model=bert_model,
tokenizer=bert_tokenizer,
)
pairs = [(response, claim) for claim in claims]
probs = scorer.get_nli_probabilities(pairs)
confidences = (probs[:, LABEL_ENTAILMENT] + 0.5 * probs[:, LABEL_NEUTRAL]).tolist()
return [score_to_metrics(c) for c in confidences]