Source code for app.backend.claim_confidence

"""Per-claim confidence via NLI self-entailment.

For every decomposed claim we compute P(response entails claim) using the
ModernBERT-large-nli scorer already loaded for Kernel Language Entropy. The
response is used as the premise and each claim as the hypothesis, so a high
score means the model's own output supports the claim.

The score is `P(entailment) + 0.5 * P(neutral)`, mirroring the KLE similarity
weighting. Because the three NLI probabilities sum to 1, this one-direction
score is bounded in [0, 1] (unlike the bidirectional KLE W matrix in [0, 2]).
We only use the response -> claim direction: the reverse direction is
uninformative here because a short claim generally cannot entail the full
response, so every score would collapse toward neutral/contradiction.

This is the degenerate single-sample case of SelfCheckGPT-NLI (Manakul et
al., EMNLP 2023): rather than sampling K responses and averaging entailment,
we treat the produced response as the sole reference context.
"""

from typing import Any


[docs] def score_to_metrics(score: float) -> dict: """Map a [0, 1] confidence score into the API's {confidence, level, guidance} dict.""" score = round(float(score), 2) if score >= 0.80: return {"confidence": score, "level": "high", "guidance": ""} if score >= 0.65: return { "confidence": score, "level": "moderate", "guidance": "Verify with clinical reference", } return { "confidence": score, "level": "low", "guidance": "Cross-check with authoritative source before acting", }
[docs] def compute_claim_confidences( response: str, claims: list[str], bert_model: Any, bert_tokenizer: Any, ) -> list[dict]: """Score every claim against `response` with NLI. Returns one {confidence, level, guidance} dict per claim, in the same order as `claims`. Raises if the NLI forward pass fails; the caller is expected to treat the claim ledger as unavailable on exception. """ if not claims: return [] from kernel_entropy.nli import ( LABEL_ENTAILMENT, LABEL_NEUTRAL, ModernBERTScorer, ) scorer = ModernBERTScorer( sentences=claims, model=bert_model, tokenizer=bert_tokenizer, ) pairs = [(response, claim) for claim in claims] probs = scorer.get_nli_probabilities(pairs) confidences = (probs[:, LABEL_ENTAILMENT] + 0.5 * probs[:, LABEL_NEUTRAL]).tolist() return [score_to_metrics(c) for c in confidences]