How to Build a Risk-Aware AI Agent with Internal Critic, Self-Consistency Reasoning, and Uncertainty Estimation for Reliable Decision-Making

How to Build a Risk-Aware AI Agent with Internal Critic, Self-Consistency Reasoning, and Uncertainty Estimation for Reliable Decision-Making


class AgentAnalyzer:

@staticmethod
def plot_response_distribution(result: Dict):
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
fig.suptitle(‘Agent Response Analysis’, fontsize=16, fontweight=”bold”)

responses = result[‘all_responses’]
scores = result[‘critic_scores’]
uncertainty = result[‘uncertainty’]
selected_idx = result[‘selected_index’]

ax = axes[0, 0]
score_values = [s.overall_score for s in scores]
bars = ax.bar(range(len(scores)), score_values, alpha=0.7)
bars[selected_idx].set_color(‘green’)
bars[selected_idx].set_alpha(1.0)
ax.axhline(np.mean(score_values), color=”red”, linestyle=”–“, label=f’Mean: {np.mean(score_values):.3f}’)
ax.set_xlabel(‘Response Index’)
ax.set_ylabel(‘Critic Score’)
ax.set_title(‘Critic Scores for Each Response’)
ax.legend()
ax.grid(True, alpha=0.3)

Tokenmetrics

ax = axes[0, 1]
confidences = [r.confidence for r in responses]
bars = ax.bar(range(len(responses)), confidences, alpha=0.7, color=”orange”)
bars[selected_idx].set_color(‘green’)
bars[selected_idx].set_alpha(1.0)
ax.axhline(np.mean(confidences), color=”red”, linestyle=”–“, label=f’Mean: {np.mean(confidences):.3f}’)
ax.set_xlabel(‘Response Index’)
ax.set_ylabel(‘Confidence’)
ax.set_title(‘Model Confidence per Response’)
ax.legend()
ax.grid(True, alpha=0.3)

ax = axes[1, 0]
components = {
‘Accuracy’: [s.accuracy_score for s in scores],
‘Coherence’: [s.coherence_score for s in scores],
‘Safety’: [s.safety_score for s in scores]
}
x = np.arange(len(responses))
width = 0.25
for i, (name, values) in enumerate(components.items()):
offset = (i – 1) * width
ax.bar(x + offset, values, width, label=name, alpha=0.8)
ax.set_xlabel(‘Response Index’)
ax.set_ylabel(‘Score’)
ax.set_title(‘Critic Score Components’)
ax.set_xticks(x)
ax.legend()
ax.grid(True, alpha=0.3, axis=”y”)

ax = axes[1, 1]
uncertainty_metrics = {
‘Entropy’: uncertainty.entropy,
‘Variance’: uncertainty.variance,
‘Consistency’: uncertainty.consistency_score,
‘Epistemic’: uncertainty.epistemic_uncertainty,
‘Aleatoric’: uncertainty.aleatoric_uncertainty
}
bars = ax.barh(list(uncertainty_metrics.keys()), list(uncertainty_metrics.values()), alpha=0.7)
ax.set_xlabel(‘Value’)
ax.set_title(f’Uncertainty Estimates (Risk: {uncertainty.risk_level()})’)
ax.grid(True, alpha=0.3, axis=”x”)

plt.tight_layout()
plt.show()

@staticmethod
def plot_strategy_comparison(agent: CriticAugmentedAgent, prompt: str, ground_truth: Optional[str] = None):
strategies = [“best_score”, “most_confident”, “most_consistent”, “risk_adjusted”]
results = {}

print(“Comparing selection strategies…\n”)

for strategy in strategies:
print(f”Testing strategy: {strategy}”)
result = agent.generate_with_critic(prompt, ground_truth, strategy=strategy, verbose=False)
results[strategy] = result

fig, axes = plt.subplots(1, 2, figsize=(14, 5))
fig.suptitle(‘Strategy Comparison’, fontsize=16, fontweight=”bold”)

ax = axes[0]
selected_scores = [
results[s][‘critic_scores’][results[s][‘selected_index’]].overall_score
for s in strategies
]
bars = ax.bar(strategies, selected_scores, alpha=0.7, color=”steelblue”)
ax.set_ylabel(‘Critic Score’)
ax.set_title(‘Selected Response Quality by Strategy’)
ax.set_xticklabels(strategies, rotation=45, ha=”right”)
ax.grid(True, alpha=0.3, axis=”y”)

ax = axes[1]
for strategy in strategies:
result = results[strategy]
selected_idx = result[‘selected_index’]
confidence = result[‘all_responses’][selected_idx].confidence
score = result[‘critic_scores’][selected_idx].overall_score
ax.scatter(confidence, score, s=200, alpha=0.6, label=strategy)
ax.set_xlabel(‘Confidence’)
ax.set_ylabel(‘Critic Score’)
ax.set_title(‘Confidence vs Quality Trade-off’)
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

return results

def run_basic_demo():
print(“\n” + “=” * 80)
print(“DEMO 1: Basic Agent with Critic”)
print(“=” * 80 + “\n”)

agent = CriticAugmentedAgent(
model_quality=0.8,
risk_tolerance=0.3,
n_samples=5
)

prompt = “What is 15 + 27?”
ground_truth = “42”

result = agent.generate_with_critic(
prompt=prompt,
ground_truth=ground_truth,
strategy=”risk_adjusted”,
temperature=0.8
)

print(“\n📊 Generating visualizations…”)
AgentAnalyzer.plot_response_distribution(result)

return result

def run_strategy_comparison():
print(“\n” + “=” * 80)
print(“DEMO 2: Strategy Comparison”)
print(“=” * 80 + “\n”)

agent = CriticAugmentedAgent(
model_quality=0.75,
risk_tolerance=0.5,
n_samples=6
)

prompt = “What is 23 + 19?”
ground_truth = “42”

results = AgentAnalyzer.plot_strategy_comparison(agent, prompt, ground_truth)

return results

def run_uncertainty_analysis():
print(“\n” + “=” * 80)
print(“DEMO 3: Uncertainty Analysis”)
print(“=” * 80 + “\n”)

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

qualities = [0.5, 0.6, 0.7, 0.8, 0.9]
uncertainties = []
consistencies = []

prompt = “What is 30 + 12?”

print(“Testing model quality impact on uncertainty…\n”)
for quality in qualities:
agent = CriticAugmentedAgent(model_quality=quality, n_samples=8)
result = agent.generate_with_critic(prompt, verbose=False)
uncertainties.append(result[‘uncertainty’].entropy)
consistencies.append(result[‘uncertainty’].consistency_score)
print(f”Quality: {quality:.1f} -> Entropy: {result[‘uncertainty’].entropy:.3f}, ”
f”Consistency: {result[‘uncertainty’].consistency_score:.3f}”)

ax = axes[0]
ax.plot(qualities, uncertainties, ‘o-‘, linewidth=2, markersize=8, label=”Entropy”)
ax.set_xlabel(‘Model Quality’)
ax.set_ylabel(‘Entropy’)
ax.set_title(‘Uncertainty vs Model Quality’)
ax.grid(True, alpha=0.3)
ax.legend()

ax = axes[1]
ax.plot(qualities, consistencies, ‘s-‘, linewidth=2, markersize=8, color=”green”, label=”Consistency”)
ax.set_xlabel(‘Model Quality’)
ax.set_ylabel(‘Consistency Score’)
ax.set_title(‘Self-Consistency vs Model Quality’)
ax.grid(True, alpha=0.3)
ax.legend()

plt.tight_layout()
plt.show()

def run_risk_sensitivity_demo():
print(“\n” + “=” * 80)
print(“DEMO 4: Risk Sensitivity Analysis”)
print(“=” * 80 + “\n”)

prompt = “What is 18 + 24?”
risk_tolerances = [0.1, 0.3, 0.5, 0.7, 0.9]

results = {
‘risk_tolerance’: [],
‘selected_confidence’: [],
‘selected_score’: [],
‘uncertainty’: []
}

print(“Testing different risk tolerance levels…\n”)
for risk_tol in risk_tolerances:
agent = CriticAugmentedAgent(
model_quality=0.75,
risk_tolerance=risk_tol,
n_samples=6
)
result = agent.generate_with_critic(prompt, verbose=False)

selected_idx = result[‘selected_index’]
results[‘risk_tolerance’].append(risk_tol)
results[‘selected_confidence’].append(
result[‘all_responses’][selected_idx].confidence
)
results[‘selected_score’].append(
result[‘critic_scores’][selected_idx].overall_score
)
results[‘uncertainty’].append(result[‘uncertainty’].entropy)

print(f”Risk Tolerance: {risk_tol:.1f} -> ”
f”Confidence: {results[‘selected_confidence’][-1]:.3f}, ”
f”Score: {results[‘selected_score’][-1]:.3f}”)

fig, ax = plt.subplots(1, 1, figsize=(10, 6))
ax.plot(results[‘risk_tolerance’], results[‘selected_confidence’], ‘o-‘, linewidth=2, markersize=8, label=”Selected Confidence”)
ax.plot(results[‘risk_tolerance’], results[‘selected_score’], ‘s-‘, linewidth=2, markersize=8, label=”Selected Score”)
ax.set_xlabel(‘Risk Tolerance’)
ax.set_ylabel(‘Value’)
ax.set_title(‘Risk Tolerance Impact on Selection’)
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

def demonstrate_verbalized_uncertainty():
print(“\n” + “=” * 80)
print(“RESEARCH TOPIC: Verbalized Uncertainty”)
print(“=” * 80 + “\n”)

print(“Concept: Agent not only estimates uncertainty but explains it.\n”)

agent = CriticAugmentedAgent(model_quality=0.7, n_samples=5)
prompt = “What is 25 + 17?”
result = agent.generate_with_critic(prompt, verbose=False)

uncertainty = result[‘uncertainty’]

explanation = f”””
Uncertainty Analysis Report:
—————————
Risk Level: {uncertainty.risk_level()}

Detailed Breakdown:
• Answer Entropy: {uncertainty.entropy:.3f}
→ {‘Low’ if uncertainty.entropy < 0.5 else ‘Medium’ if uncertainty.entropy < 1.0 else ‘High’} disagreement among generated responses

• Self-Consistency: {uncertainty.consistency_score:.3f}
→ {int(uncertainty.consistency_score * 100)}% of responses agree on the answer

• Epistemic Uncertainty: {uncertainty.epistemic_uncertainty:.3f}
→ {‘Low’ if uncertainty.epistemic_uncertainty < 0.3 else ‘Medium’ if uncertainty.epistemic_uncertainty < 0.6 else ‘High’} model uncertainty (knowledge gaps)

• Aleatoric Uncertainty: {uncertainty.aleatoric_uncertainty:.3f}
→ {‘Low’ if uncertainty.aleatoric_uncertainty < 0.3 else ‘Medium’ if uncertainty.aleatoric_uncertainty < 0.6 else ‘High’} data uncertainty (inherent randomness)

Recommendation:
“””

if uncertainty.risk_level() == “LOW”:
explanation += “✓ High confidence in answer – safe to trust”
elif uncertainty.risk_level() == “MEDIUM”:
explanation += “⚠ Moderate confidence – consider verification”
else:
explanation += “⚠ Low confidence – strongly recommend verification”

print(explanation)

def demonstrate_self_consistency():
print(“\n” + “=” * 80)
print(“RESEARCH TOPIC: Self-Consistency Reasoning”)
print(“=” * 80 + “\n”)

print(“Concept: Generate multiple reasoning paths, select most common answer.\n”)

agent = CriticAugmentedAgent(model_quality=0.75, n_samples=7)
prompt = “What is 35 + 7?”
result = agent.generate_with_critic(prompt, strategy=”most_consistent”, verbose=False)

estimator = UncertaintyEstimator()
answers = [estimator._extract_answer(r.content) for r in result[‘all_responses’]]

print(“Generated Responses and Answers:”)
print(“-” * 80)
for i, (response, answer) in enumerate(zip(result[‘all_responses’], answers)):
marker = “✓ SELECTED” if i == result[‘selected_index’] else “”
print(f”\nResponse {i}: {answer} {marker}”)
print(f” Confidence: {response.confidence:.3f}”)
print(f” Content: {response.content[:80]}…”)

from collections import Counter
answer_dist = Counter(answers)

print(f”\n\nAnswer Distribution:”)
print(“-” * 80)
for answer, count in answer_dist.most_common():
percentage = (count / len(answers)) * 100
bar = “█” * int(percentage / 5)
print(f”{answer:>10}: {bar} {count}/{len(answers)} ({percentage:.1f}%)”)

print(f”\nMost Consistent Answer: {answer_dist.most_common(1)[0][0]}”)
print(f”Consistency Score: {result[‘uncertainty’].consistency_score:.3f}”)

def main():
print(“\n” + “🎯” * 40)
print(“ADVANCED AGENT WITH INTERNAL CRITIC + UNCERTAINTY ESTIMATION”)
print(“Tutorial and Demonstrations”)
print(“🎯” * 40)

plt.style.use(‘seaborn-v0_8-darkgrid’)
sns.set_palette(“husl”)

try:
result1 = run_basic_demo()
result2 = run_strategy_comparison()
run_uncertainty_analysis()
run_risk_sensitivity_demo()
demonstrate_verbalized_uncertainty()
demonstrate_self_consistency()

print(“\n” + “=” * 80)
print(“✅ ALL DEMONSTRATIONS COMPLETED SUCCESSFULLY”)
print(“=” * 80)
print(“””
Key Takeaways:
1. Internal critics improve response quality through multi-dimensional evaluation
2. Uncertainty estimation enables risk-aware decision making
3. Self-consistency reasoning increases reliability
4. Different selection strategies optimize for different objectives
5. Verbalized uncertainty helps users understand model confidence

Next Steps:
• Implement with real LLM APIs (OpenAI, Anthropic, etc.)
• Add learned critic models (fine-tuned classifiers)
• Explore ensemble methods and meta-learning
• Integrate with retrieval-augmented generation (RAG)
• Deploy in production with monitoring and feedback loops
“””)

except Exception as e:
print(f”\n❌ Error during demonstration: {e}”)
import traceback
traceback.print_exc()

if __name__ == “__main__”:
main()



Source link

[wp-stealth-ads rows="2" mobile-rows="3"]

Leave a Reply

Your email address will not be published. Required fields are marked *

Pin It on Pinterest