#!/usr/bin/env python3
"""
Analyze why we have so many signals.

The issue: We're generating signals for EVERY price level with ANY imbalance.
We should ONLY show the BEST few signals.
"""

import json
from pathlib import Path
from datetime import datetime
import statistics


def analyze_signal_noise():
    """Understand why we have so many signals."""

    signals_file = Path("/home/ubuntu/.hermes/workspace/projects/ORDER_FLOW_GRAPH/data/signals_tradeable.json")

    with open(signals_file, 'r') as f:
        data = json.load(f)

    signals = data['signals']

    print("="*80)
    print("SIGNAL NOISE ANALYSIS")
    print("="*80)

    # Group by type
    by_type = {}
    for s in signals:
        stype = s['type']
        if stype not in by_type:
            by_type[stype] = []
        by_type[stype].append(s)

    print(f"\n📊 Total signals: {len(signals)}")
    print(f"\nBy type:")
    for stype, sigs in by_type.items():
        print(f"  {stype}: {len(sigs)} signals")

    # Get current price
    entry_prices = [s['entry_price'] for s in signals]
    current_price = statistics.median(entry_prices)

    print(f"\n📊 Current price: ${current_price:.2f}")

    # Show signals within ±$10
    near_signals = [s for s in signals if abs(s['entry_price'] - current_price) <= 10]

    print(f"\n✅ Signals within ±$10: {len(near_signals)}")

    # Group by price (how many signals per price?)
    from collections import defaultdict
    price_groups = defaultdict(list)
    for s in near_signals:
        price = s['entry_price']
        price_groups[price].append(s)

    print(f"\n📊 Unique price levels: {len(price_groups)}")

    # Show prices with most signals
    print(f"\n🔍 Price levels with most signals:")
    sorted_prices = sorted(price_groups.items(), key=lambda x: len(x[1]), reverse=True)

    for price, sigs in sorted_prices[:10]:
        print(f"  ${price:.2f}: {len(sigs)} signals")

    # Show breakdown at a crowded price
    if sorted_prices:
        crowded_price, crowded_sigs = sorted_prices[0]
        print(f"\n🔍 Breakdown at ${crowded_price:.2f} ({len(crowded_sigs)} signals):")

        for i, s in enumerate(crowded_sigs[:10], 1):
            print(f"  {i}. {s['type']} - {s['direction']} - {s['confidence']} confidence")

    print("\n" + "="*80)
    print("THE PROBLEM:")
    print("="*80)
    print("❌ We're creating MULTIPLE signals at the SAME price level")
    print("❌ Every squeeze = a signal (even if minor)")
    print("❌ No quality filter (showing low-confidence signals)")
    print("\n💡 THE SOLUTION:")
    print("✅ Keep only TOP 5-10 signals by (confidence × risk_reward)")
    print("✅ Require minimum confidence of 0.85")
    print("✅ Require minimum risk-reward of 3.0")
    print("✅ Remove duplicate prices (keep only best signal at each price)")
    print("="*80)


def create_quality_filtered_signals():
    """Create a much smaller, higher-quality signal list."""

    signals_file = Path("/home/ubuntu/.hermes/workspace/projects/ORDER_FLOW_GRAPH/data/signals_tradeable.json")

    with open(signals_file, 'r') as f:
        data = json.load(f)

    signals = data['signals']

    # Get current price
    import statistics
    entry_prices = [s['entry_price'] for s in signals]
    current_price = statistics.median(entry_prices)

    print("\n" + "="*80)
    print("CREATING QUALITY-FILTERED SIGNALS")
    print("="*80)

    # Step 1: Filter by quality
    quality_signals = [
        s for s in signals
        if s['confidence'] >= 0.85 and s['risk_reward'] >= 3.0
    ]

    print(f"\n📊 Quality filter (≥85% conf, ≥3.0 R:R):")
    print(f"  Before: {len(signals)} signals")
    print(f"  After: {len(quality_signals)} signals")

    # Step 2: Keep only near price
    near_signals = [
        s for s in quality_signals
        if abs(s['entry_price'] - current_price) <= 10
    ]

    print(f"\n📊 Price filter (±$10):")
    print(f"  After: {len(near_signals)} signals")

    # Step 3: Remove duplicate prices (keep best at each price)
    from collections import defaultdict
    price_groups = defaultdict(list)
    for s in near_signals:
        price_groups[s['entry_price']].append(s)

    # Keep only the BEST signal at each price (by confidence × rr)
    best_signals = []
    for price, sigs in price_groups.items():
        best = max(sigs, key=lambda s: s['confidence'] * s['risk_reward'])
        best_signals.append(best)

    # Sort by quality
    best_signals.sort(key=lambda s: s['confidence'] * s['risk_reward'], reverse=True)

    # Keep only top 10
    top_signals = best_signals[:10]

    print(f"\n📊 Remove duplicate prices:")
    print(f"  After: {len(best_signals)} signals")

    print(f"\n📊 Keep only TOP 10:")
    print(f"  Final: {len(top_signals)} signals")

    print(f"\n✅ TOP 10 SIGNALS:")
    for i, s in enumerate(top_signals, 1):
        print(f"  {i}. {s['type']} - {s['direction'].upper()}")
        print(f"     Entry: ${s['entry_price']}, Target: ${s['target_price']}, Stop: ${s['stop_price']}")
        print(f"     Confidence: {s['confidence']}, R:R: {s['risk_reward']}")

    # Save final signals
    output_file = signals_file.parent / "signals_xautusdt_final.json"

    output_data = {
        'metadata': {
            'generated_at': datetime.now().isoformat(),
            'signal_count': len(top_signals),
            'current_price': current_price,
            'filter_criteria': '≥85% conf, ≥3.0 R:R, ±$10 price, top 10 only'
        },
        'signals': top_signals
    }

    with open(output_file, 'w') as f:
        json.dump(output_data, f, indent=2)

    # Copy to UI
    import shutil
    ui_file = Path("/home/ubuntu/.hermes/workspace/projects/ORDER_FLOW_GRAPH/outputs/data/signals_xautusdt.json")
    shutil.copy(output_file, ui_file)

    print(f"\n✓ Saved to: {output_file}")
    print(f"✓ Copied to UI directory")

    print("\n" + "="*80)
    print("RESULT:")
    print("="*80)
    print(f"✅ BEFORE: {len(signals)} signals (too many, low quality)")
    print(f"✅ AFTER: {len(top_signals)} signals (only the best)")
    print("\n🌐 Refresh visualizer:")
    print("   http://localhost:8080/multi_asset_visualizer.html")
    print("="*80)


if __name__ == "__main__":
    analyze_signal_noise()
    create_quality_filtered_signals()
