meshcore-packet-capture/packet_capture.py

#!/usr/bin/env python3
"""
MeshCore Packet Capture Tool

Captures packets from MeshCore radios and outputs to console, file, and MQTT.
Compatible with both serial and BLE connections.

Usage:
    python packet_capture.py [--output output.json] [--verbose] [--debug] [--no-mqtt]

Options:
    --output     Output file for packet data
    --verbose    Show JSON packet data
    --debug      Show detailed debugging info
    --no-mqtt    Disable MQTT publishing

The script captures packet metadata including SNR, RSSI, route type, payload type,
and raw hex data. Configuration is done via environment variables and .env files.
"""

import asyncio
import json
import logging
import hashlib
import time
import re
import os
from datetime import datetime
from pathlib import Path
from typing import Optional, Dict, Any
import argparse

# Import meshcore from PyPI
import meshcore
from meshcore import EventType

# Import our enums for packet parsing
from enums import AdvertFlags, PayloadType, PayloadVersion, RouteType, DeviceRole

# Import MQTT client
try:
    import paho.mqtt.client as mqtt
except ImportError:
    print("Error: paho-mqtt not installed. Install with:")
    print("pip install paho-mqtt")
    exit(1)

# Import auth token module
try:
    from auth_token import create_auth_token, create_auth_token_async, read_private_key_file
except ImportError:
    print("Warning: auth_token.py not found - auth token authentication will not be available")
    create_auth_token = None
    create_auth_token_async = None
    read_private_key_file = None

# Private key functionality using meshcore_py library


def get_transport(meshcore_instance):
    """Get transport from meshcore instance using the documented API structure.

    Based on meshcore library structure:
    - MeshCore.cx is a ConnectionManager
    - ConnectionManager.connection is the actual connection (TCPConnection, BLEConnection, etc.)
    - TCPConnection.transport is the asyncio transport object

    Returns the transport object or None if not available.

    Note: This function only returns a reference to the existing transport object
    owned by the meshcore instance. It does not create new objects or store references.
    Transport objects are cleaned up automatically when meshcore.disconnect() is called
    or when the meshcore instance is garbage collected.
    """
    if not meshcore_instance:
        return None

    try:
        # MeshCore.cx is a ConnectionManager
        if hasattr(meshcore_instance, 'cx'):
            connection_manager = meshcore_instance.cx
            # ConnectionManager.connection is the actual connection object
            if hasattr(connection_manager, 'connection'):
                connection = connection_manager.connection
                # TCPConnection has a transport attribute
                if hasattr(connection, 'transport'):
                    transport = connection.transport
                    if transport is not None:
                        return transport
    except Exception:
        pass

    return None


def enable_tcp_keepalive(transport, idle=10, interval=5, count=3):
    """Enable TCP keepalive on the transport's socket.

    Supports multiple transport types:
    - asyncio transport with get_extra_info('socket')
    - Direct socket objects
    - Objects with _socket attribute
    """
    import socket

    sock = None

    # Try to get socket from transport using get_extra_info
    if hasattr(transport, 'get_extra_info'):
        try:
            sock = transport.get_extra_info('socket')
        except Exception:
            pass

    # If not found, check if transport is a socket directly
    if sock is None:
        if isinstance(transport, socket.socket):
            sock = transport
        elif hasattr(transport, '_socket'):
            try:
                sock = transport._socket
            except Exception:
                pass

    if sock is None:
        return False

    try:
        # Enable TCP keepalive
        sock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)

        # Platform-specific keepalive settings
        # Linux and some BSD systems
        if hasattr(socket, 'TCP_KEEPIDLE'):
            sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, idle)
        # macOS uses different constant names
        elif hasattr(socket, 'TCP_KEEPALIVE'):
            sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPALIVE, idle)

        if hasattr(socket, 'TCP_KEEPINTVL'):
            sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, interval)
        if hasattr(socket, 'TCP_KEEPCNT'):
            sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPCNT, count)

        return True
    except Exception as e:
        # Log but don't fail the connection
        print(f"Warning: Could not enable TCP keepalive: {e}")
        return False


def load_env_files():
    """Load environment variables from .env and .env.local files"""
    script_dir = os.path.dirname(os.path.abspath(__file__))
    env_file = os.path.join(script_dir, '.env')
    env_local_file = os.path.join(script_dir, '.env.local')

    def parse_env_file(filepath):
        """Parse a .env file and return a dictionary"""
        env_vars = {}
        if not os.path.exists(filepath):
            return env_vars

        with open(filepath, 'r') as f:
            for line in f:
                line = line.strip()
                # Skip comments and empty lines
                if not line or line.startswith('#'):
                    continue
                # Parse KEY=VALUE
                if '=' in line:
                    key, value = line.split('=', 1)
                    key = key.strip()
                    value = value.strip()
                    # Remove inline comments (everything after #)
                    if '#' in value:
                        value = value.split('#')[0].strip()
                    # Remove quotes if present
                    if value and value[0] in ('"', "'") and value[-1] == value[0]:
                        value = value[1:-1]
                    env_vars[key] = value
        return env_vars

    # Load .env first (defaults)
    env_vars = parse_env_file(env_file)

    # Load .env.local (overrides)
    local_vars = parse_env_file(env_local_file)
    env_vars.update(local_vars)

    # Set environment variables
    for key, value in env_vars.items():
        if key not in os.environ:
            os.environ[key] = value

    return env_vars


# Load environment configuration
load_env_files()


class PacketCapture:
    """Standalone packet capture using meshcore package"""

    def __init__(self, output_file: Optional[str] = None, verbose: bool = False, debug: bool = False, enable_mqtt: bool = True, shutdown_event=None):
        self.output_file = output_file
        self.verbose = verbose
        self.debug = debug
        self.enable_mqtt = enable_mqtt
        self.shutdown_event = shutdown_event

        # Setup logging
        self.setup_logging()

        # Global IATA for template resolution
        self.global_iata = os.getenv('PACKETCAPTURE_IATA', 'LOC').lower()

        # Connection
        self.meshcore = None
        self.connected = False
        self.connection_type = None  # Track connection type for health checks
        self.connection_retry_count = 0
        self.max_connection_retries = self.get_env_int('MAX_CONNECTION_RETRIES', 5)
        self.connection_retry_delay = self.get_env_int('CONNECTION_RETRY_DELAY', 5)
        self.connection_retry_delay_max = self.get_env_int('CONNECTION_RETRY_DELAY_MAX', 300)  # 5 minutes max
        self.connection_retry_backoff_multiplier = self.get_env_float('CONNECTION_RETRY_BACKOFF_MULTIPLIER', 2.0)
        self.connection_retry_jitter = self.get_env_bool('CONNECTION_RETRY_JITTER', True)
        self.health_check_interval = self.get_env_int('HEALTH_CHECK_INTERVAL', 30)

        # Health check grace period for BLE connections
        self.health_check_grace_period = self.get_env_int('HEALTH_CHECK_GRACE_PERIOD', 2)  # Allow 2 consecutive failures
        self.health_check_failure_count = 0  # Track consecutive health check failures

        # Retry configuration
        self.default_retry_limit = self.get_env_int('DEVICE_COMMAND_RETRY_LIMIT', 3)  # Default retries for device commands
        self.ble_retry_limit = self.get_env_int('BLE_COMMAND_RETRY_LIMIT', 3)  # Retries for BLE connections
        self.tcp_retry_limit = self.get_env_int('TCP_COMMAND_RETRY_LIMIT', 2)  # Retries for TCP connections
        self.health_check_retry_limit = self.get_env_int('HEALTH_CHECK_RETRY_LIMIT', None)  # Override for health checks (None = use connection-specific)
        self.stats_retry_limit = self.get_env_int('STATS_RETRY_LIMIT', 2)  # Retries for stats queries (non-critical)
        self.device_info_retry_limit = self.get_env_int('DEVICE_INFO_RETRY_LIMIT', 2)  # Retries for device info queries

        # MQTT connection
        self.mqtt_clients = []  # List of MQTT client info dictionaries
        self.mqtt_connected = False
        self.should_exit = False  # Flag to exit when reconnection attempts fail

        # Stats/status publishing
        self.stats_status_enabled = self.get_env_bool('STATS_IN_STATUS_ENABLED', True)
        self.stats_refresh_interval = self.get_env_int('STATS_REFRESH_INTERVAL', 300)  # seconds
        self.latest_stats = None
        self.last_stats_fetch = 0
        self.stats_supported = False
        self.stats_capability_state = None
        self.stats_update_task = None
        self.stats_fetch_lock = asyncio.Lock()

        # Service-level failure tracking for systemd restart
        self.service_failure_count = 0
        self.max_service_failures = self.get_env_int('MAX_SERVICE_FAILURES', 3)
        self.service_failure_window = self.get_env_int('SERVICE_FAILURE_WINDOW', 300)  # 5 minutes
        self.last_service_failure = 0
        self.critical_failure_threshold = self.get_env_int('CRITICAL_FAILURE_THRESHOLD', 5)

        # Track consecutive failures for more intelligent failure detection
        self.consecutive_connection_failures = 0
        self.consecutive_mqtt_failures = 0
        self.max_consecutive_failures = self.get_env_int('MAX_CONSECUTIVE_FAILURES', 3)

        # MQTT failure tracking with grace period
        self.mqtt_health_check_interval = self.get_env_int('MQTT_HEALTH_CHECK_INTERVAL', 60)  # Check every minute
        self.mqtt_grace_period = self.get_env_int('MQTT_GRACE_PERIOD', 180)  # 3 minutes grace before counting failures
        self.mqtt_disconnect_timestamps = {}  # Track when brokers disconnected: {broker_num: timestamp}

        # Packet correlation cache
        self.rf_data_cache = {}
        self.recent_rf_packets = {}
        self.raw_duplicate_window = self.get_env_float('RAW_DUPLICATE_WINDOW', 2.0)
        # When True (default), call get_msg() on MESSAGES_WAITING to drain the device message queue.
        # Set PACKETCAPTURE_DRAIN_MESSAGES=false to capture RF packets only without pulling stored mesh messages.
        self.drain_messages = self.get_env_bool('DRAIN_MESSAGES', True)
        self.packet_count = 0

        # Device information
        self.device_name = None
        self.device_public_key = None
        self.device_private_key = None
        self.radio_info = None
        self.cached_firmware_info = None  # Cache firmware info to avoid queries during shutdown

        # Private key export capability
        self.private_key_export_available = False

        # JWT token management
        self.jwt_tokens = {}  # Store tokens per broker: {broker_num: {'token': str, 'expires_at': float}}
        self.jwt_renewal_interval = self.get_env_int('JWT_RENEWAL_INTERVAL', 3600)  # Check every hour
        self.jwt_renewal_threshold = self.get_env_int('JWT_RENEWAL_THRESHOLD', 300)  # Renew 5 minutes before expiry

        # Advert settings
        self.advert_interval_hours = self.get_env_int('ADVERT_INTERVAL_HOURS', 47)
        self.last_advert_time = 0
        self.advert_task = None

        # Load persisted advert state
        self.last_advert_time = self._load_advert_state()

        # Packet type filtering for uploads
        upload_types_str = self.get_env('UPLOAD_PACKET_TYPES', '').strip()
        if upload_types_str:
            self.allowed_upload_types = set(t.strip() for t in upload_types_str.split(','))
            self.logger.info(f"Packet type upload filter enabled: {sorted(self.allowed_upload_types)}")
        else:
            self.allowed_upload_types = None  # None means upload all (default)

        # JWT renewal task
        self.jwt_renewal_task = None

        # Task tracking to prevent duplicate tasks
        self.active_tasks = set()
        self.jwt_renewal_in_progress = False

        # TCP keepalive settings
        self.tcp_keepalive_enabled = self.get_env_bool('TCP_KEEPALIVE_ENABLED', True)
        self.tcp_keepalive_idle = self.get_env_int('TCP_KEEPALIVE_IDLE', 10)
        self.tcp_keepalive_interval = self.get_env_int('TCP_KEEPALIVE_INTERVAL', 5)
        self.tcp_keepalive_count = self.get_env_int('TCP_KEEPALIVE_COUNT', 3)

        # SDK auto-reconnect settings for TCP
        self.tcp_sdk_auto_reconnect_enabled = self.get_env_bool('TCP_SDK_AUTO_RECONNECT_ENABLED', True)
        self.tcp_sdk_max_reconnect_attempts = self.get_env_int('TCP_SDK_MAX_RECONNECT_ATTEMPTS', 100)
        self.sdk_reconnect_exhausted = False  # Track if SDK auto-reconnect has given up (TCP only)

        # Circuit breaker for JWT failures
        self.jwt_failure_count = 0
        self.max_jwt_failures = 5
        self.jwt_circuit_breaker_timeout = 300  # 5 minutes
        self.jwt_circuit_breaker_reset_time = 0

        # Resource monitoring
        self.max_active_tasks = 100  # Prevent task explosion
        self.task_monitoring_interval = 60  # Check every minute
        self.last_task_check = 0

        # Output file handle
        self.output_handle = None
        if self.output_file:
            self.output_handle = open(self.output_file, 'w')
            self.logger.info(f"Output will be written to: {self.output_file}")


    def setup_logging(self):
        """Setup logging configuration"""
        # Clear any existing handlers to avoid conflicts
        for handler in logging.root.handlers[:]:
            logging.root.removeHandler(handler)

        # Get log level from environment variable
        log_level_str = self.get_env('LOG_LEVEL', 'INFO').upper()
        log_level_map = {
            'DEBUG': logging.DEBUG,
            'INFO': logging.INFO,
            'WARNING': logging.WARNING,
            'ERROR': logging.ERROR,
            'CRITICAL': logging.CRITICAL
        }
        log_level = log_level_map.get(log_level_str, logging.INFO)

        # Create a custom formatter with timestamp
        formatter = logging.Formatter(
            fmt='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
            datefmt='%Y-%m-%d %H:%M:%S'
        )

        # Create console handler with the formatter
        console_handler = logging.StreamHandler()
        console_handler.setFormatter(formatter)

        # Configure root logger
        logging.basicConfig(
            level=log_level,
            handlers=[console_handler],
            force=True
        )

        self.logger = logging.getLogger('PacketCapture')

        # Test the logging format
        self.logger.info(f"Logging initialized with level: {log_level_str}")

    def get_env(self, key, fallback=''):
        """Get environment variable with fallback (all vars are PACKETCAPTURE_ prefixed)"""
        full_key = f"PACKETCAPTURE_{key}"
        return os.getenv(full_key, fallback)

    def get_env_bool(self, key, fallback=False):
        """Get boolean environment variable"""
        value = self.get_env(key, str(fallback)).lower()
        return value in ('true', '1', 'yes', 'on')

    def get_env_int(self, key, fallback=0):
        """Get integer environment variable"""
        try:
            return int(self.get_env(key, str(fallback)))
        except ValueError:
            return fallback

    def get_env_float(self, key, fallback=0.0):
        """Get float environment variable"""
        try:
            return float(self.get_env(key, str(fallback)))
        except ValueError:
            return fallback

    def _get_state_file_path(self):
        """Get the path to the state file for persisting last_advert_time.

        Works across all installation methods:
        - Docker: Uses /app/data/ (mounted volume)
        - NixOS: Uses cfg.dataDir (working directory)
        - Systemd: Uses script directory or data subdirectory
        """
        script_dir = os.path.dirname(os.path.abspath(__file__))

        # Try data subdirectory first (works for Docker and if created)
        data_dir = os.path.join(script_dir, 'data')
        if os.path.exists(data_dir) and os.path.isdir(data_dir):
            return os.path.join(data_dir, 'advert_state.json')

        # Fall back to script directory (works for all installation methods)
        return os.path.join(script_dir, 'advert_state.json')

    def _load_advert_state(self):
        """Load last_advert_time from persistent state file.

        Returns the timestamp if found, otherwise returns 0.
        """
        state_file = self._get_state_file_path()

        if not os.path.exists(state_file):
            if self.debug:
                self.logger.debug(f"Advert state file not found: {state_file}")
            return 0

        try:
            with open(state_file, 'r') as f:
                state = json.load(f)
                last_time = state.get('last_advert_time', 0)

                # Validate the timestamp is reasonable (not in the future, not too old)
                current_time = time.time()
                if last_time > current_time:
                    # Timestamp is in the future, ignore it
                    if self.debug:
                        self.logger.debug(f"Advert state timestamp is in the future, ignoring: {last_time}")
                    return 0

                # If timestamp is more than 1 year old, treat as invalid
                if current_time - last_time > 31536000:  # 1 year in seconds
                    if self.debug:
                        self.logger.debug(f"Advert state timestamp is too old, ignoring: {last_time}")
                    return 0

                if self.debug:
                    self.logger.debug(f"Loaded last_advert_time from state file: {last_time} ({datetime.fromtimestamp(last_time).isoformat()})")
                return last_time

        except (json.JSONDecodeError, IOError, OSError) as e:
            self.logger.warning(f"Failed to load advert state from {state_file}: {e}")
            return 0

    def _save_advert_state(self):
        """Save last_advert_time to persistent state file."""
        state_file = self._get_state_file_path()
        state_dir = os.path.dirname(state_file)

        try:
            # Create directory if it doesn't exist (for data subdirectory case)
            if state_dir and not os.path.exists(state_dir):
                os.makedirs(state_dir, mode=0o755, exist_ok=True)

            state = {
                'last_advert_time': self.last_advert_time,
                'updated_at': time.time()
            }

            # Write atomically using a temporary file
            temp_file = state_file + '.tmp'
            with open(temp_file, 'w') as f:
                json.dump(state, f, indent=2)

            # Atomic rename
            os.replace(temp_file, state_file)

            if self.debug:
                self.logger.debug(f"Saved last_advert_time to state file: {self.last_advert_time} ({datetime.fromtimestamp(self.last_advert_time).isoformat()})")

        except (IOError, OSError) as e:
            self.logger.warning(f"Failed to save advert state to {state_file}: {e}")


    def calculate_connection_retry_delay(self, attempt: int) -> float:
        """Calculate exponential backoff delay with jitter for connection retries"""
        import random

        # Calculate exponential backoff: base_delay * (multiplier ^ (attempt - 1))
        delay = self.connection_retry_delay * (self.connection_retry_backoff_multiplier ** (attempt - 1))

        # Cap at maximum delay
        delay = min(delay, self.connection_retry_delay_max)

        # Add jitter to prevent thundering herd (random factor between 0.5 and 1.5)
        if self.connection_retry_jitter:
            jitter_factor = random.uniform(0.5, 1.5)
            delay *= jitter_factor

        return max(1.0, delay)  # Minimum 1 second delay

    def track_service_failure(self, failure_type: str, details: str = ""):
        """Track service-level failures and determine if we should exit for systemd restart"""
        import time

        current_time = time.time()

        # Reset failure count if outside the failure window
        if current_time - self.last_service_failure > self.service_failure_window:
            self.service_failure_count = 0

        self.service_failure_count += 1
        self.last_service_failure = current_time

        self.logger.error(f"Service failure #{self.service_failure_count}: {failure_type}")
        if details:
            self.logger.error(f"Failure details: {details}")

        # Check if we should exit for systemd restart
        if self.service_failure_count >= self.max_service_failures:
            self.logger.critical(f"Maximum service failures ({self.max_service_failures}) reached within {self.service_failure_window}s window")
            self.logger.critical("Exiting to allow systemd to restart the service with fresh state")
            self.should_exit = True
            return True

        return False

    def track_consecutive_failure(self, failure_type: str) -> bool:
        """Track consecutive failures and determine if they warrant a service failure"""
        if failure_type == "connection":
            self.consecutive_connection_failures += 1
            self.consecutive_mqtt_failures = 0  # Reset other type
        elif failure_type == "mqtt":
            self.consecutive_mqtt_failures += 1
            self.consecutive_connection_failures = 0  # Reset other type

        # Check if consecutive failures warrant a service failure
        if (self.consecutive_connection_failures >= self.max_consecutive_failures or
            self.consecutive_mqtt_failures >= self.max_consecutive_failures):

            failure_details = f"Consecutive {failure_type} failures: {self.consecutive_connection_failures if failure_type == 'connection' else self.consecutive_mqtt_failures}"
            return self.track_service_failure(f"Consecutive {failure_type} failures", failure_details)

        return False

    def reset_consecutive_failures(self, failure_type: str):
        """Reset consecutive failure count when connection is restored"""
        if failure_type == "connection":
            self.consecutive_connection_failures = 0
        elif failure_type == "mqtt":
            self.consecutive_mqtt_failures = 0

    async def wait_with_shutdown(self, timeout: float) -> bool:
        """Wait for specified time but return immediately if shutdown is requested"""
        if self.shutdown_event:
            try:
                await asyncio.wait_for(self.shutdown_event.wait(), timeout=timeout)
                return True  # Shutdown was requested
            except asyncio.TimeoutError:
                return False  # Timeout reached, no shutdown
        else:
            await asyncio.sleep(timeout)
            return False

    async def retryable_device_command(self, command_func, command_name: str,
                                       timeout: float = 10.0, max_retries: int = None,
                                       retry_delay: float = 0.2, backoff_multiplier: float = 1.5):
        """
        Execute a device command with timeout and retry logic.

        Args:
            command_func: Async function that returns a meshcore Event
            command_name: Name of the command for logging
            timeout: Timeout in seconds for each attempt
            max_retries: Maximum number of retry attempts (including initial attempt)
                        If None, uses connection-specific default from environment variables
            retry_delay: Initial delay between retries in seconds
            backoff_multiplier: Multiplier for exponential backoff

        Returns:
            Event object from the command, or None if all retries failed
        """
        if not self._ensure_connected(command_name, "debug"):
            return None

        # Use connection-specific default if max_retries not specified
        if max_retries is None:
            if self.connection_type == 'ble':
                max_retries = self.ble_retry_limit
            elif self.connection_type == 'tcp':
                max_retries = self.tcp_retry_limit
            else:
                max_retries = self.default_retry_limit

        last_error = None
        current_delay = retry_delay

        for attempt in range(max_retries):
            try:
                # Add small delay between retries (except first attempt)
                if attempt > 0:
                    await asyncio.sleep(current_delay)
                    current_delay *= backoff_multiplier  # Exponential backoff

                # Execute command with timeout
                result = await asyncio.wait_for(
                    command_func(),
                    timeout=timeout
                )

                # Check if result is an error
                if result and hasattr(result, 'type'):
                    if result.type == EventType.ERROR:
                        error_payload = result.payload if hasattr(result, 'payload') else {}
                        error_reason = error_payload.get('reason', 'unknown')

                        # Check if it's a transient error that we should retry
                        if error_reason == 'no_event_received' and attempt < max_retries - 1:
                            last_error = f"{command_name} failed: {error_reason}"
                            if self.debug:
                                self.logger.debug(f"{last_error} (attempt {attempt + 1}/{max_retries})")
                            continue
                        else:
                            # Permanent error or last attempt
                            self.logger.debug(f"{command_name} failed: {error_payload}")
                            return result
                    else:
                        # Success - return the result
                        if attempt > 0:
                            self.logger.debug(f"{command_name} succeeded on attempt {attempt + 1}")
                        return result
                else:
                    # Unexpected result format
                    self.logger.debug(f"{command_name} returned unexpected result format")
                    return result

            except asyncio.TimeoutError:
                last_error = f"{command_name} timed out after {timeout}s"
                if attempt < max_retries - 1:
                    if self.debug:
                        self.logger.debug(f"{last_error} (attempt {attempt + 1}/{max_retries})")
                    continue
                else:
                    self.logger.debug(f"{last_error} (all {max_retries} attempts exhausted)")
                    return None
            except Exception as e:
                last_error = f"{command_name} raised exception: {e}"
                if attempt < max_retries - 1:
                    if self.debug:
                        self.logger.debug(f"{last_error} (attempt {attempt + 1}/{max_retries})")
                    continue
                else:
                    self.logger.debug(f"{last_error} (all {max_retries} attempts exhausted)")
                    return None

        # All retries failed
        if last_error:
            self.logger.debug(f"{command_name} failed after {max_retries} attempts: {last_error}")
        return None

    def should_exit_for_systemd_restart(self) -> bool:
        """Determine if we should exit to allow systemd restart"""
        import time

        # Check for critical failure threshold
        if self.service_failure_count >= self.critical_failure_threshold:
            self.logger.critical(f"Critical failure threshold ({self.critical_failure_threshold}) reached")
            return True

        # Check for recent failure pattern
        current_time = time.time()
        if (current_time - self.last_service_failure) < self.service_failure_window:
            if self.service_failure_count >= self.max_service_failures:
                self.logger.critical(f"Too many failures ({self.service_failure_count}) in {self.service_failure_window}s")
                return True

        return False

    def _load_client_version(self):
        """Load client version from .version_info file or git"""
        try:
            script_dir = os.path.dirname(os.path.abspath(__file__))
            version_file = os.path.join(script_dir, '.version_info')

            # First try to load from .version_info file (created by installer)
            if os.path.exists(version_file):
                with open(version_file, 'r') as f:
                    version_data = json.load(f)
                    installer_ver = version_data.get('installer_version', 'unknown')
                    git_hash = version_data.get('git_hash', 'unknown')
                    return f"meshcore-packet-capture/{installer_ver}-{git_hash}"

            # Fallback: try to get git information directly
            try:
                import subprocess
                result = subprocess.run(['git', 'rev-parse', '--short', 'HEAD'],
                                      cwd=script_dir, capture_output=True, text=True, timeout=5)
                if result.returncode == 0:
                    git_hash = result.stdout.strip()
                    return f"meshcore-packet-capture/dev-{git_hash}"
            except (subprocess.TimeoutExpired, subprocess.CalledProcessError, FileNotFoundError):
                pass

        except Exception as e:
            self.logger.debug(f"Could not load version info: {e}")

        # Final fallback
        return "meshcore-packet-capture/unknown"

    async def get_firmware_info(self):
        """Get firmware information from meshcore device using send_device_query()"""
        try:
            # During shutdown, always use cached info - don't query the device
            if self.should_exit:
                if self.cached_firmware_info:
                    self.logger.debug("Using cached firmware info (shutdown in progress)")
                    return self.cached_firmware_info
                else:
                    self.logger.debug("No cached firmware info available during shutdown")
                    return {"model": "unknown", "version": "unknown"}

            # Return cached info if available and device is not connected
            if self.cached_firmware_info and (not self.meshcore or not self.meshcore.is_connected):
                self.logger.debug("Using cached firmware info")
                return self.cached_firmware_info

            if not self._ensure_connected("get_firmware_info", "debug"):
                return {"model": "unknown", "version": "unknown"}

            self.logger.debug("Querying device for firmware info...")
            # Use send_device_query() to get firmware version with retry logic
            # Use connection-specific retry limit
            result = await self.retryable_device_command(
                lambda: self.meshcore.commands.send_device_query(),
                "send_device_query",
                timeout=10.0,
                max_retries=None  # Use connection-specific default
            )

            if result is None:
                self.logger.debug("Device query failed after retries")
                return {"model": "unknown", "version": "unknown"}

            self.logger.debug(f"Device query result type: {result.type}")
            self.logger.debug(f"Device query result: {result}")

            if result.type == EventType.ERROR:
                self.logger.debug(f"Device query failed: {result}")
                return {"model": "unknown", "version": "unknown"}

            if result.payload:
                payload = result.payload
                self.logger.debug(f"Device query payload: {payload}")

                # Check firmware version format
                fw_ver = payload.get('fw ver', 0)
                self.logger.debug(f"Firmware version number: {fw_ver}")

                if fw_ver >= 3:
                    # For newer firmware versions (v3+)
                    model = payload.get('model', 'Unknown')
                    version = payload.get('ver', 'Unknown')
                    build_date = payload.get('fw_build', 'Unknown')
                    # Remove 'v' prefix from version if it already has one
                    if version.startswith('v'):
                        version = version[1:]
                    version_str = f"v{version} (Build: {build_date})"
                    self.logger.debug(f"New firmware format - Model: {model}, Version: {version_str}")
                    firmware_info = {"model": model, "version": version_str}
                    self.cached_firmware_info = firmware_info  # Cache the result
                    return firmware_info
                else:
                    # For older firmware versions
                    version_str = f"v{fw_ver}"
                    self.logger.debug(f"Old firmware format - Model: unknown, Version: {version_str}")
                    firmware_info = {"model": "unknown", "version": version_str}
                    self.cached_firmware_info = firmware_info  # Cache the result
                    return firmware_info

            self.logger.debug("No payload in device query result")
            return {"model": "unknown", "version": "unknown"}

        except Exception as e:
            self.logger.debug(f"Error getting firmware info: {e}")
            return {"model": "unknown", "version": "unknown"}

    def resolve_topic_template(self, template, broker_num=None):
        """Resolve topic template with {IATA}, {IATA_lower}, and {PUBLIC_KEY} placeholders"""
        if not template:
            return template

        # Get IATA - broker-specific or global
        iata = self.global_iata
        if broker_num:
            broker_iata = self.get_env(f'MQTT{broker_num}_IATA', '')
            if broker_iata:
                iata = broker_iata.lower()

        # Replace template variables
        resolved = template.replace('{IATA}', iata.upper())  # Uppercase variant
        resolved = resolved.replace('{IATA_lower}', iata.lower())  # Lowercase variant
        resolved = resolved.replace('{PUBLIC_KEY}', self.device_public_key if self.device_public_key and self.device_public_key != 'Unknown' else 'DEVICE')
        return resolved

    def is_letsmesh_broker(self, broker_num=None) -> bool:
        """Detect if the given broker is a Let's Mesh Analyzer broker by hostname or token audience."""
        server = None
        audience = None
        if broker_num:
            server = self.get_env(f'MQTT{broker_num}_SERVER', '')
            audience = self.get_env(f'MQTT{broker_num}_TOKEN_AUDIENCE', '')
        if not server:
            server = self.get_env('MQTT1_SERVER', '')
        if not audience:
            audience = self.get_env('MQTT1_TOKEN_AUDIENCE', '')
        host = (server or '').lower()
        aud = (audience or '').lower()
        return ('letsmesh.net' in host) or ('letsmesh.net' in aud)

    def has_configured_iata(self, broker_num=None) -> bool:
        """Return True if a non-default IATA code is configured (not 'LOC')."""
        iata = self.global_iata or ''
        if broker_num:
            broker_iata = self.get_env(f'MQTT{broker_num}_IATA', '')
            if broker_iata:
                iata = broker_iata.lower()
        return bool(iata) and iata.lower() != 'loc'

    def broker_requires_iata(self, broker_num) -> bool:
        """Check if a broker requires IATA configuration.
        Returns True if:
        - It's a Let's Mesh Analyzer broker, OR
        - It has explicitly configured topics that use IATA placeholders"""
        # Check if it's a Let's Mesh broker
        if self.is_letsmesh_broker(broker_num):
            return True

        # Check if any configured topics use IATA placeholders
        topic_types = ['STATUS', 'PACKETS', 'DECODED', 'DEBUG', 'RAW']
        for topic_type in topic_types:
            # Check broker-specific topic
            broker_topic = self.get_env(f'MQTT{broker_num}_TOPIC_{topic_type}', '')
            if broker_topic and ('{IATA}' in broker_topic or '{IATA_lower}' in broker_topic):
                return True

            # Check global topic (only if no broker-specific topic)
            if not broker_topic:
                global_topic = self.get_env(f'TOPIC_{topic_type}', '')
                if global_topic and ('{IATA}' in global_topic or '{IATA_lower}' in global_topic):
                    return True

        return False

    def get_topic(self, topic_type, broker_num=None):
        """Get topic with template resolution, checking broker-specific override first"""
        topic_type_upper = topic_type.upper()

        # Check broker-specific topic override
        if broker_num:
            broker_topic = self.get_env(f'MQTT{broker_num}_TOPIC_{topic_type_upper}', '')
            if broker_topic:
                return self.resolve_topic_template(broker_topic, broker_num)

        # Fall back to global topic
        global_topic = self.get_env(f'TOPIC_{topic_type_upper}', '')
        if global_topic:
            return self.resolve_topic_template(global_topic, broker_num)

        # For RAW topic, don't provide a default - only publish if explicitly configured
        if topic_type_upper == 'RAW':
            if self.debug:
                self.logger.debug(f"No RAW topic configured for broker {broker_num}, skipping RAW publish")
            return None

        # Defaulting policy adjustment:
        # - Never use classic defaults (meshcore/status, meshcore/packets, etc.) for Let's Mesh Analyzer brokers
        # - Prefer IATA-based defaults when IATA is configured
        # - Only on custom brokers without IATA configured, fall back to classic defaults

        is_letsmesh = self.is_letsmesh_broker(broker_num)
        iata_configured = self.has_configured_iata(broker_num)

        iata_defaults = {
            'STATUS': 'meshcore/{IATA}/{PUBLIC_KEY}/status',
            'PACKETS': 'meshcore/{IATA}/{PUBLIC_KEY}/packets',
            'DECODED': 'meshcore/{IATA}/{PUBLIC_KEY}/decoded',
            'DEBUG': 'meshcore/{IATA}/{PUBLIC_KEY}/debug'
        }
        classic_defaults = {
            'STATUS': 'meshcore/status',
            'PACKETS': 'meshcore/packets',
            'DECODED': 'meshcore/decoded',
            'DEBUG': 'meshcore/debug'
        }

        if iata_configured:
            chosen_default = iata_defaults.get(topic_type_upper, f"meshcore/{{IATA}}/{{PUBLIC_KEY}}/{topic_type.lower()}")
        else:
            if is_letsmesh:
                if self.debug:
                    self.logger.debug(f"Skipping default '{topic_type}' topic for Let's Mesh broker {broker_num} because IATA is not configured")
                return None
            chosen_default = classic_defaults.get(topic_type_upper, f'meshcore/{topic_type.lower()}')

        resolved = self.resolve_topic_template(chosen_default, broker_num)
        if self.debug:
            self.logger.debug(f"Using default topic for {topic_type}: {resolved}")
        return resolved

    async def set_radio_clock(self) -> bool:
        """Set radio clock only if device time is earlier than current system time"""
        try:
            if not self._ensure_connected("set_radio_clock", "warning"):
                return False

            # Get current device time with retry logic
            self.logger.info("Checking device time...")
            time_result = await self.retryable_device_command(
                lambda: self.meshcore.commands.get_time(),
                "get_time",
                timeout=8.0,
                max_retries=self.device_info_retry_limit,  # Use device info retry limit
                retry_delay=0.2
            )
            if time_result is None or time_result.type == EventType.ERROR:
                self.logger.warning("Device does not support time commands")
                return False

            device_time = time_result.payload.get('time', 0)
            current_time = int(time.time())

            self.logger.info(f"Device time: {device_time}, System time: {current_time}")

            # Only set time if device time is earlier than current time
            if device_time < current_time:
                time_diff = current_time - device_time
                self.logger.info(f"Device time is {time_diff} seconds behind, updating...")

                result = await self.retryable_device_command(
                    lambda: self.meshcore.commands.set_time(current_time),
                    "set_time",
                    timeout=8.0,
                    max_retries=self.device_info_retry_limit,  # Use device info retry limit
                    retry_delay=0.2
                )
                if result and result.type == EventType.OK:
                    self.logger.info(f"✓ Radio clock updated to: {current_time}")
                    self.last_clock_sync_time = current_time
                    return True
                else:
                    self.logger.warning(f"Failed to update radio clock: {result}")
                    return False
            else:
                self.logger.info("Device time is current or ahead - no update needed")
                return True

        except Exception as e:
            self.logger.warning(f"Error checking/setting radio clock: {e}")
            return False

    async def fetch_private_key_from_device(self) -> bool:
        """Fetch private key from device using meshcore library"""
        try:
            self.logger.info("Fetching private key from device...")

            if not self._ensure_connected("fetch_private_key_from_device", "error"):
                return False

            # Use meshcore library to export private key with retry logic
            # Use connection-specific retry limit (defaults to 3 for BLE, 2 for TCP)
            result = await self.retryable_device_command(
                lambda: self.meshcore.commands.export_private_key(),
                "export_private_key",
                timeout=10.0,
                max_retries=None,  # Use connection-specific default
                retry_delay=0.3  # Slightly longer delay for private key operations
            )

            if result is None:
                self.logger.error("Error fetching private key: command failed after retries")
                self.private_key_export_available = False
                return False

            if result.type == EventType.PRIVATE_KEY:
                self.device_private_key = result.payload["private_key"]
                self.logger.info("✓ Private key fetched successfully from device")
                self.private_key_export_available = True
                return True
            elif result.type == EventType.DISABLED:
                self.logger.warning("Private key export is disabled on this device")
                self.logger.info("This feature requires:")
                self.logger.info("  - Companion radio firmware")
                self.logger.info("  - ENABLE_PRIVATE_KEY_EXPORT=1 compile-time flag")
                self.private_key_export_available = False
                return False
            elif result.type == EventType.ERROR:
                self.logger.error(f"Error fetching private key: {result.payload}")
                self.private_key_export_available = False
                return False
            else:
                self.logger.error(f"Unexpected response when fetching private key: {result.type}")
                self.private_key_export_available = False
                return False

        except Exception as e:
            self.logger.error(f"Error fetching private key from device: {e}")
            self.private_key_export_available = False
            return False


    async def create_jwt_with_private_key(self, audience: str = None) -> Optional[str]:
        """Create JWT using on-device signing (preferred) or private key from device"""
        try:
            if not create_auth_token_async and not create_auth_token:
                return None

            # Build claims
            claims = {}
            if audience:
                claims['aud'] = audience

            # Add optional owner public key if configured
            owner_public_key = os.getenv('PACKETCAPTURE_OWNER_PUBLIC_KEY', '').strip()
            if owner_public_key:
                # Validate it's a valid hex string of correct length (64 hex chars = 32 bytes)
                if len(owner_public_key) == 64 and all(c in '0123456789ABCDEFabcdef' for c in owner_public_key):
                    claims['owner'] = owner_public_key.upper()
                else:
                    self.logger.warning(f"Invalid owner public key format (expected 64 hex characters): {owner_public_key[:16]}...")

            # Add optional email if configured
            email = os.getenv('PACKETCAPTURE_OWNER_EMAIL', '').strip()
            if email:
                # Normalize to lowercase
                email = email.lower()
                # Validate email format using a simple regex
                import re
                email_pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
                if re.match(email_pattern, email):
                    claims['email'] = email
                else:
                    self.logger.warning(f"Invalid email format: {email}")

            # Add optional client agent/version if configured, otherwise use default from status message
            client_agent = os.getenv('PACKETCAPTURE_CLIENT_AGENT', '').strip()
            if not client_agent:
                # Default to the same value used in status messages
                client_agent = self._load_client_version()
            if client_agent:
                claims['client'] = client_agent

            # Prefer on-device signing if meshcore instance is available and connected
            if (create_auth_token_async and
                self.meshcore and
                self.meshcore.is_connected and
                os.getenv('AUTH_TOKEN_METHOD', '').lower().strip() not in ('python', 'meshcore-decoder')):
                try:
                    # Use on-device signing (no private key needed)
                    # Don't pass private_key_hex so auth_token.py will fail fast if device signing fails
                    jwt_token = await create_auth_token_async(
                        self.device_public_key,
                        meshcore_instance=self.meshcore,
                        **claims
                    )
                    self.logger.info("✓ JWT created using on-device signing")
                    return jwt_token
                except Exception as e:
                    # Device signing failed - fall back to private key if available
                    self.logger.debug(f"On-device signing failed: {e}, attempting private key fallback...")

            # Fallback to private key signing (skip if device-only mode is enabled)
            device_only = os.getenv('AUTH_TOKEN_DEVICE_ONLY', '').lower().strip() == 'true'
            if device_only:
                self.logger.error("Device-only signing mode enabled but device signing failed or not available")
                return None

            # Fallback to private key signing - load from env/file first, then try device if needed
            if not self.device_private_key:
                # Try to load from environment variable first
                env_private_key = self.get_env('PRIVATE_KEY', '')
                if env_private_key:
                    self.device_private_key = env_private_key
                    self.logger.info("Device signing failed, using private key from environment")
                # Try to read from private key file
                elif read_private_key_file:
                    private_key_file = self.get_env('PRIVATE_KEY_FILE', '')
                    if private_key_file and Path(private_key_file).exists():
                        try:
                            self.device_private_key = read_private_key_file(private_key_file)
                            self.logger.info(f"Device signing failed, using private key from file: {private_key_file}")
                        except Exception as e:
                            self.logger.warning(f"Failed to read private key from file {private_key_file}: {e}")

                # If still no private key, try fetching from device
                if not self.device_private_key:
                    self.logger.info("Device signing not available, fetching private key from device for fallback...")
                    private_key_fetch_success = await self.fetch_private_key_from_device()
                    if not private_key_fetch_success:
                        self.logger.warning("Cannot create JWT: device signing failed and private key not available from device or environment")
                        return None

            # Convert bytearray to hex string if needed
            private_key = self.device_private_key
            if isinstance(private_key, (bytes, bytearray)):
                private_key = private_key.hex()

            # Use async version if available (for consistency), otherwise sync version
            if create_auth_token_async:
                jwt_token = await create_auth_token_async(
                    self.device_public_key,
                    private_key_hex=private_key,
                    **claims
                )
            else:
                jwt_token = create_auth_token(self.device_public_key, private_key, **claims)

            self.logger.info("✓ JWT created using private key from device")
            return jwt_token

        except Exception as e:
            device_only = os.getenv('AUTH_TOKEN_DEVICE_ONLY', '').lower().strip() == 'true'
            if device_only:
                self.logger.error(f"Device-only signing mode: JWT creation failed: {e}")
            else:
                self.logger.error(f"Error creating JWT: {e}", exc_info=True)
            return None

    async def create_auth_token_jwt(self, audience: str = None, broker_num: int = None) -> Optional[str]:
        """Create JWT token using on-device signing or private key from device"""
        # Use on-device signing (preferred) or private key method (fallback)
        # The create_jwt_with_private_key() method already logs which method was used
        jwt_token = await self.create_jwt_with_private_key(audience)
        if jwt_token:
            # Store token with expiry time if broker_num is provided
            if broker_num is not None:
                import time
                import json
                import base64

                # Parse token to get expiry time
                try:
                    parts = jwt_token.split('.')
                    if len(parts) == 3:
                        # Decode payload to get expiry
                        payload_data = base64.urlsafe_b64decode(parts[1] + '==')
                        payload = json.loads(payload_data)
                        expires_at = payload.get('exp', time.time() + 86400)  # Default 24h if not found

                        self.jwt_tokens[broker_num] = {
                            'token': jwt_token,
                            'expires_at': expires_at,
                            'audience': audience
                        }

                        if self.debug:
                            self.logger.debug(f"JWT token stored for broker {broker_num}, expires at {expires_at}")
                except Exception as e:
                    self.logger.warning(f"Could not parse JWT expiry: {e}")

            return jwt_token

        self.logger.error("Failed to create JWT with private key from device")
        return None

    def is_jwt_token_expired(self, broker_num: int) -> bool:
        """Check if JWT token for broker is expired or near expiry"""
        if broker_num not in self.jwt_tokens:
            return True

        import time
        current_time = time.time()
        token_info = self.jwt_tokens[broker_num]
        expires_at = token_info['expires_at']

        # Check if token is expired or within renewal threshold
        return current_time >= (expires_at - self.jwt_renewal_threshold)

    async def renew_jwt_token(self, broker_num: int) -> bool:
        """Renew JWT token for a specific broker"""
        try:
            if broker_num not in self.jwt_tokens:
                self.logger.warning(f"No existing JWT token for broker {broker_num}")
                return False

            token_info = self.jwt_tokens[broker_num]
            audience = token_info.get('audience')

            self.logger.info(f"Renewing JWT token for broker {broker_num}...")

            # Create new token
            new_token = await self.create_auth_token_jwt(audience, broker_num)
            if new_token:
                self.logger.info(f"✓ JWT token renewed for broker {broker_num}")
                # Reset failure count on success
                self.jwt_failure_count = 0
                return True
            else:
                self.logger.error(f"Failed to renew JWT token for broker {broker_num}")
                # Increment failure count
                self.jwt_failure_count += 1
                self.jwt_circuit_breaker_reset_time = time.time()
                return False

        except Exception as e:
            self.logger.error(f"Error renewing JWT token for broker {broker_num}: {e}")
            # Increment failure count
            self.jwt_failure_count += 1
            self.jwt_circuit_breaker_reset_time = time.time()
            return False

    async def check_jwt_renewal_for_broker(self, broker_num: int):
        """Check and renew JWT token for a specific broker if needed"""
        try:
            if broker_num not in self.jwt_tokens:
                return

            if self.is_jwt_token_expired(broker_num):
                self.logger.info(f"JWT token for broker {broker_num} needs renewal")

                # Renew the token
                renewal_success = await self.renew_jwt_token(broker_num)
                if renewal_success:
                    # Find the broker client and update credentials
                    for client_info in self.mqtt_clients:
                        if client_info['broker_num'] == broker_num:
                            mqtt_client = client_info['client']
                            new_token = self.jwt_tokens[broker_num]['token']
                            username = f"v1_{self.device_public_key.upper()}"

                            # Update credentials and reconnect
                            mqtt_client.username_pw_set(username, new_token)
                            mqtt_client.reconnect()

                            self.logger.info(f"✓ Updated credentials for MQTT broker {broker_num}")
                            break
                else:
                    self.logger.error(f"Failed to renew JWT token for broker {broker_num}")

        except Exception as e:
            self.logger.error(f"Error checking JWT renewal for broker {broker_num}: {e}")

    async def check_and_renew_jwt_tokens(self):
        """Check all JWT tokens and renew if needed"""
        try:
            for broker_num in list(self.jwt_tokens.keys()):
                await self.check_jwt_renewal_for_broker(broker_num)

        except Exception as e:
            self.logger.error(f"Error checking JWT token renewals: {e}")


    def _is_tcp_sdk_auto_reconnect_active(self) -> bool:
        """
        Check if TCP SDK auto-reconnect is active and handling reconnection.

        Returns:
            True if TCP connection with SDK auto-reconnect enabled and not exhausted
        """
        return (self.connection_type == 'tcp' and
                self.tcp_sdk_auto_reconnect_enabled and
                not self.sdk_reconnect_exhausted)

    def _get_connection_timeout_config(self, default_timeout: float = 5.0, default_retries: int = None):
        """
        Get timeout and retry configuration based on connection type.

        Args:
            default_timeout: Default timeout for connections without special handling
            default_retries: Default number of retries (None = use connection-specific default)

        Returns:
            Tuple of (timeout, retries) appropriate for the current connection type
        """
        if self.connection_type == 'ble':
            retries = self.health_check_retry_limit if self.health_check_retry_limit is not None else self.ble_retry_limit
            return (12.0, retries)  # Longer timeout and more retries for BLE on Linux
        elif self._is_tcp_sdk_auto_reconnect_active():
            retries = self.health_check_retry_limit if self.health_check_retry_limit is not None else self.tcp_retry_limit
            return (8.0, retries)  # Longer timeout for TCP with SDK auto-reconnect
        else:
            retries = self.health_check_retry_limit if self.health_check_retry_limit is not None else (default_retries or self.default_retry_limit)
            return (default_timeout, retries)

    def _ensure_connected(self, command_name: str = "command", log_level: str = "debug") -> bool:
        """
        Check if device is connected, logging appropriately if not.

        Args:
            command_name: Name of the command being executed (for logging)
            log_level: Log level to use ("debug", "warning", "error")

        Returns:
            True if connected, False otherwise
        """
        if not self.meshcore or not self.meshcore.is_connected:
            message = f"Cannot execute {command_name} - not connected to device"
            if log_level == "error":
                self.logger.error(message)
            elif log_level == "warning":
                self.logger.warning(message)
            else:
                self.logger.debug(message)
            return False
        return True

    def _reset_connection_state(self):
        """
        Reset all connection-related state variables after successful connection/reconnection.
        This includes health check counters, SDK reconnect flags, and consecutive failure counts.
        """
        self.connected = True
        self.health_check_failure_count = 0
        if self.connection_type == 'tcp':
            self.sdk_reconnect_exhausted = False
        self.reset_consecutive_failures("connection")

    async def _start_auto_message_fetching_if_enabled(self):
        """Start meshcore auto-fetch loop when PACKETCAPTURE_DRAIN_MESSAGES is enabled (default)."""
        if not self.drain_messages:
            self.logger.info(
                "PACKETCAPTURE_DRAIN_MESSAGES is false: skipping auto message fetch (device message queue will not be drained)"
            )
            return
        await self.meshcore.start_auto_message_fetching()

    async def _setup_after_reconnection(self):
        """
        Perform all setup tasks required after a successful reconnection.
        This includes cleaning up old subscriptions, setting up event handlers,
        and starting auto message fetching.
        """
        # Clean up old subscriptions before re-setting up handlers
        # (SDK may have recreated the instance, leaving old subscriptions orphaned)
        self.cleanup_event_subscriptions()
        # Re-setup event handlers after reconnection
        await self.setup_event_handlers()
        await self._start_auto_message_fetching_if_enabled()

    def _check_ble_grace_period(self, failure_reason: str = "failed") -> bool:
        """
        Check if BLE health check failure should be allowed under grace period.

        Args:
            failure_reason: Description of why the health check failed (for logging)

        Returns:
            True if failure is within grace period and should be allowed, False otherwise
        """
        if self.connection_type == 'ble' and self.meshcore and self.meshcore.is_connected:
            self.health_check_failure_count += 1
            if self.health_check_failure_count <= self.health_check_grace_period:
                if self.debug:
                    self.logger.debug(
                        f"Health check {failure_reason} but BLE connection appears active "
                        f"(grace period: {self.health_check_failure_count}/{self.health_check_grace_period})"
                    )
                return True  # Allow grace period for BLE
            else:
                self.logger.warning(
                    f"Health check {failure_reason} {self.health_check_failure_count} times consecutively - "
                    "connection may be degraded"
                )
                return False
        return False

    async def check_connection_health(self) -> bool:
        """Enhanced health check with network validation"""
        try:
            # 1. Check if meshcore object exists and reports connected
            if not self.meshcore or not self.meshcore.is_connected:
                # For TCP with SDK auto-reconnect, don't log warning if SDK is still trying
                if self._is_tcp_sdk_auto_reconnect_active():
                    if self.debug:
                        self.logger.debug("MeshCore reports not connected, but SDK auto-reconnect is active")
                    return False
                self.logger.warning("MeshCore reports not connected")
                return False

            # 2. For TCP connections, verify socket state
            if self.connection_type == 'tcp':
                transport = get_transport(self.meshcore)
                if transport:
                    if transport.is_closing():
                        # For TCP with SDK auto-reconnect, SDK will handle reconnection
                        if self.tcp_sdk_auto_reconnect_enabled and not self.sdk_reconnect_exhausted:
                            if self.debug:
                                self.logger.debug("TCP transport is closing, but SDK auto-reconnect is active")
                            return False
                        self.logger.warning("TCP transport is closed or closing")
                        return False

            # 3. Try a lightweight command with timeout and retry
            # Use longer timeout for BLE connections (Linux BLE can be slow) and TCP with SDK auto-reconnect
            health_check_timeout, health_check_retries = self._get_connection_timeout_config()

            try:
                result = await self.retryable_device_command(
                    lambda: self.meshcore.commands.send_device_query(),
                    "send_device_query (health check)",
                    timeout=health_check_timeout,
                    max_retries=health_check_retries,  # Uses connection-specific or health_check_retry_limit override
                    retry_delay=0.3  # Slightly longer delay for health checks
                )
                if result and hasattr(result, 'type') and result.type != EventType.ERROR:
                    # Success - reset failure count
                    self.health_check_failure_count = 0
                    return True
                else:
                    if self.debug:
                        self.logger.debug(f"Health check device query failed: {result}")
                    # For BLE, if is_connected is True, we might still consider it healthy
                    # (BLE can have slow responses but connection might still be valid)
                    if self._check_ble_grace_period("query failed"):
                        return True
                    return False
            except asyncio.TimeoutError:
                # For TCP with SDK auto-reconnect, timeout might just mean device is busy
                # SDK will handle reconnection if needed, so don't log as warning
                if self._is_tcp_sdk_auto_reconnect_active():
                    if self.debug:
                        self.logger.debug("Health check timed out, but SDK auto-reconnect is active")
                    return False

                # For BLE, allow grace period even on timeout if connection appears active
                if self._check_ble_grace_period("timed out"):
                    return True

                self.logger.warning("Health check timed out")
                return False
            except Exception as e:
                # For TCP with SDK auto-reconnect, errors might be temporary
                if self._is_tcp_sdk_auto_reconnect_active():
                    if self.debug:
                        error_type = type(e).__name__
                        self.logger.debug(f"Health check command failed ({error_type}), but SDK auto-reconnect is active")
                    return False

                # Log detailed error information for debugging
                error_type = type(e).__name__
                error_msg = str(e)
                # Check if it's an errno error (common on macOS/Linux)
                errno_value = getattr(e, 'errno', None)
                if errno_value is not None:
                    import errno
                    try:
                        errno_name = errno.errorcode.get(errno_value, f"UNKNOWN({errno_value})")
                        self.logger.warning(f"Health check command failed: {error_type} [{errno_name}]: {error_msg}")
                    except (AttributeError, KeyError):
                        self.logger.warning(f"Health check command failed: {error_type} [errno={errno_value}]: {error_msg}")
                else:
                    self.logger.warning(f"Health check command failed: {error_type}: {error_msg}")
                return False

        except Exception as e:
            # For TCP with SDK auto-reconnect, don't log as warning if SDK is handling it
            if self._is_tcp_sdk_auto_reconnect_active():
                if self.debug:
                    self.logger.debug(f"Connection health check failed ({type(e).__name__}), but SDK auto-reconnect is active")
                return False
            self.logger.warning(f"Connection health check failed: {e}")
            return False

    def check_mqtt_health(self) -> bool:
        """Check MQTT broker health with grace period before counting failures"""
        import time

        if not self.enable_mqtt or not self.mqtt_clients:
            return True  # MQTT not enabled or no brokers configured

        current_time = time.time()
        connected_brokers = 0
        failed_brokers = 0
        total_brokers = len(self.mqtt_clients)

        # Check each broker's connection status
        for client_info in self.mqtt_clients:
            broker_num = client_info['broker_num']
            mqtt_client = client_info['client']

            if mqtt_client.is_connected():
                # Broker is connected - clear any disconnect timestamp
                if broker_num in self.mqtt_disconnect_timestamps:
                    disconnect_duration = current_time - self.mqtt_disconnect_timestamps[broker_num]
                    self.logger.info(f"MQTT{broker_num} reconnected after {disconnect_duration:.1f} seconds")
                    del self.mqtt_disconnect_timestamps[broker_num]
                    # Reset consecutive failures on successful reconnection
                    self.reset_consecutive_failures("mqtt")
                connected_brokers += 1
            else:
                # Broker is disconnected
                # Record disconnect timestamp if not already recorded
                if broker_num not in self.mqtt_disconnect_timestamps:
                    self.mqtt_disconnect_timestamps[broker_num] = current_time
                    self.logger.debug(f"MQTT{broker_num} disconnected - grace period started")

                # Check if grace period has elapsed
                disconnect_time = self.mqtt_disconnect_timestamps[broker_num]
                time_disconnected = current_time - disconnect_time

                if time_disconnected >= self.mqtt_grace_period:
                    # Grace period elapsed - this broker has persistently failed
                    failed_brokers += 1
                    if self.debug:
                        self.logger.debug(f"MQTT{broker_num} disconnected for {time_disconnected:.1f}s (grace period: {self.mqtt_grace_period}s) - persistent failure")

        # If all enabled brokers have been disconnected past grace period, this is a failure
        # We require ALL brokers to be failed, not just one, to avoid false positives with multiple brokers
        all_brokers_failed = (failed_brokers == total_brokers and total_brokers > 0)

        if all_brokers_failed:
            if self.debug:
                self.logger.debug(f"All {total_brokers} MQTT broker(s) have persistent failures")

        return not all_brokers_failed

    async def connect(self) -> bool:
        """Connect to MeshCore node using official package"""
        try:
            self.logger.info("Connecting to MeshCore node...")

            # Clean up any existing connection before attempting new one
            # This prevents pending tasks from interfering with new connections
            if self.meshcore:
                try:
                    self.cleanup_event_subscriptions()
                    self.meshcore.stop()
                    await self.meshcore.disconnect()
                except Exception as cleanup_error:
                    self.logger.debug(f"Error cleaning up existing connection before reconnect: {cleanup_error}")
                self.meshcore = None
                # Brief delay to ensure cleanup completes
                await asyncio.sleep(0.2)

            # Get connection type from environment
            connection_type = self.get_env('CONNECTION_TYPE', 'ble').lower()
            self.connection_type = connection_type  # Store for health checks
            self.logger.info(f"Using connection type: {connection_type}")

            if connection_type == 'serial':
                # Create serial connection
                serial_port = self.get_env('SERIAL_PORTS', '/dev/ttyUSB0')
                # Handle comma-separated ports (take first one for now)
                if ',' in serial_port:
                    serial_port = serial_port.split(',')[0].strip()
                self.logger.info(f"Connecting via serial port: {serial_port}")
                self.meshcore = await meshcore.MeshCore.create_serial(serial_port, debug=False)
            elif connection_type == 'tcp':
                # Create TCP connection with SDK auto-reconnect if enabled
                tcp_host = self.get_env('TCP_HOST', 'localhost')
                tcp_port = self.get_env_int('TCP_PORT', 5000)
                self.logger.info(f"Connecting via TCP to {tcp_host}:{tcp_port}")

                # Enable SDK auto-reconnect for TCP connections
                create_kwargs = {'debug': False}
                if self.tcp_sdk_auto_reconnect_enabled:
                    create_kwargs['auto_reconnect'] = True
                    create_kwargs['max_reconnect_attempts'] = self.tcp_sdk_max_reconnect_attempts
                    self.logger.info(f"SDK auto-reconnect enabled with max {self.tcp_sdk_max_reconnect_attempts} attempts")
                else:
                    self.logger.info("SDK auto-reconnect disabled - using custom reconnect logic")

                self.meshcore = await meshcore.MeshCore.create_tcp(tcp_host, tcp_port, **create_kwargs)

                # Reset SDK reconnect exhaustion flag on new connection
                self.sdk_reconnect_exhausted = False

                # Enable TCP keepalive if configured
                # Access transport via: meshcore.cx.connection.transport
                # (MeshCore.cx is ConnectionManager, connection is TCPConnection)
                if self.tcp_keepalive_enabled:
                    transport = get_transport(self.meshcore)

                    if transport:
                        try:
                            if enable_tcp_keepalive(
                                transport,
                                idle=self.tcp_keepalive_idle,
                                interval=self.tcp_keepalive_interval,
                                count=self.tcp_keepalive_count
                            ):
                                self.logger.info(f"TCP keepalive enabled (idle={self.tcp_keepalive_idle}s, interval={self.tcp_keepalive_interval}s, count={self.tcp_keepalive_count})")
                            else:
                                self.logger.warning("Failed to enable TCP keepalive")
                        except Exception as e:
                            self.logger.warning(f"Could not enable TCP keepalive: {e}")
                    else:
                        if self.debug:
                            # Only log as debug to avoid noise if transport is genuinely not accessible
                            self.logger.debug("Could not access transport for TCP keepalive configuration (transport may not be exposed by meshcore library)")
                        else:
                            # Log as info since this is a known limitation, not a critical error
                            self.logger.info("TCP keepalive configuration skipped (transport not accessible)")
                elif not self.tcp_keepalive_enabled:
                    self.logger.debug("TCP keepalive disabled by configuration")
            else:
                # Create BLE connection (default)
                # Support both BLE_ADDRESS and BLE_DEVICE for MAC address
                ble_address = self.get_env('BLE_ADDRESS', None) or self.get_env('BLE_DEVICE', None)
                # Support both BLE_DEVICE_NAME and BLE_NAME for device name
                ble_device_name = self.get_env('BLE_DEVICE_NAME', None) or self.get_env('BLE_NAME', None)

                if self.debug:
                    self.logger.debug(f"BLE connection config - Address: {ble_address}, Name: {ble_device_name}")
                    self.logger.debug(f"Environment check - BLE_ADDRESS: {self.get_env('BLE_ADDRESS', None)}, BLE_DEVICE: {self.get_env('BLE_DEVICE', None)}")
                    self.logger.debug(f"Environment check - BLE_DEVICE_NAME: {self.get_env('BLE_DEVICE_NAME', None)}, BLE_NAME: {self.get_env('BLE_NAME', None)}")

                if ble_address:
                    # Direct address connection
                    self.logger.info(f"Connecting via BLE to address: {ble_address}")
                    if self.debug:
                        self.logger.debug(f"Using BLE address from environment: {ble_address}")
                    self.meshcore = await meshcore.MeshCore.create_ble(ble_address, debug=False)
                elif ble_device_name:
                    # Try to find device by name - the meshcore library handles name matching internally
                    self.logger.info(f"Scanning for BLE device with name: {ble_device_name}")
                    try:
                        # The meshcore library will automatically find devices by name during scanning
                        self.meshcore = await meshcore.MeshCore.create_ble(ble_device_name, debug=False)
                    except Exception as e:
                        self.logger.error(f"Error connecting to device '{ble_device_name}': {e}")
                        # Clean up any partial connection
                        if self.meshcore:
                            try:
                                self.meshcore.stop()
                                await self.meshcore.disconnect()
                            except:
                                pass
                            self.meshcore = None
                        # Fallback to general scan
                        self.logger.info("Falling back to general BLE scan...")
                        self.meshcore = await meshcore.MeshCore.create_ble(debug=False)
                else:
                    # No specific device, just scan and connect to first available
                    self.logger.info("Scanning for available BLE devices...")
                    self.meshcore = await meshcore.MeshCore.create_ble(debug=False)

            # Wait a brief moment for connection to fully establish (especially for BLE)
            if self.meshcore and self.connection_type == 'ble':
                await asyncio.sleep(0.5)
                # Retry connection check a few times in case it's still establishing
                for attempt in range(3):
                    if self.meshcore.is_connected:
                        break
                    if attempt < 2:
                        await asyncio.sleep(0.5)

            if self.meshcore and self.meshcore.is_connected:
                self._reset_connection_state()
                self.logger.info(f"Connected to: {self.meshcore.self_info}")

                # Wait for self_info to be populated (it may be empty initially, especially for serial)
                # Check if self_info has actual content (not just empty dict)
                max_wait_attempts = 10
                wait_interval = 0.5
                self_info_populated = False

                for attempt in range(max_wait_attempts):
                    if self.meshcore.self_info and (
                        self.meshcore.self_info.get('name') or
                        self.meshcore.self_info.get('public_key')
                    ):
                        self_info_populated = True
                        break
                    if attempt < max_wait_attempts - 1:
                        self.logger.debug(f"Waiting for device info to populate (attempt {attempt + 1}/{max_wait_attempts})...")
                        await asyncio.sleep(wait_interval)

                # Try to trigger device info by sending a query (for serial connections especially)
                if not self_info_populated and hasattr(self.meshcore, 'commands'):
                    try:
                        self.logger.debug("Attempting to query device info...")
                        result = await self.retryable_device_command(
                            lambda: self.meshcore.commands.send_device_query(),
                            "send_device_query (device info)",
                            timeout=3.0,
                            max_retries=self.device_info_retry_limit,  # Use device info retry limit
                            retry_delay=0.2
                        )
                        # Wait a bit more after query
                        await asyncio.sleep(0.5)
                        if self.meshcore.self_info and (
                            self.meshcore.self_info.get('name') or
                            self.meshcore.self_info.get('public_key')
                        ):
                            self_info_populated = True
                    except Exception as e:
                        self.logger.debug(f"Device query failed (non-critical): {e}")

                # Store device information for origin field
                if self_info_populated and self.meshcore.self_info:
                    self.device_name = self.meshcore.self_info.get('name', 'Unknown')
                    self.device_public_key = self.meshcore.self_info.get('public_key', 'Unknown')
                    # Normalize public key to uppercase
                    if self.device_public_key != 'Unknown':
                        self.device_public_key = self.device_public_key.upper()

                    # Extract radio information
                    radio_freq = self.meshcore.self_info.get('radio_freq', 0)
                    radio_bw = self.meshcore.self_info.get('radio_bw', 0)
                    radio_sf = self.meshcore.self_info.get('radio_sf', 0)
                    radio_cr = self.meshcore.self_info.get('radio_cr', 0)
                    self.radio_info = f"{radio_freq},{radio_bw},{radio_sf},{radio_cr}"

                    self.logger.info(f"Device name: {self.device_name}")
                    self.logger.info(f"Device public key: {self.device_public_key}")
                    self.logger.info(f"Radio info: {self.radio_info}")
                else:
                    # Fallback: Use configured origin or default
                    self.logger.warning("Device info not available from connection, using fallback")
                    self.device_name = self.get_env('ORIGIN', 'MeshCore Device')
                    self.device_public_key = 'Unknown'
                    self.radio_info = "0,0,0,0"
                    self.logger.info(f"Using fallback device name: {self.device_name}")
                    self.logger.info("You can set PACKETCAPTURE_ORIGIN in .env.local to customize the device name")

                # Set radio clock to current system time
                await self.set_radio_clock()

                # Don't publish status here - wait for MQTT connections
                # Status will be published after MQTT connections are established

                # Setup JWT authentication - will use on-device signing (preferred)
                # Private key fallback will be loaded lazily only if device signing fails
                self.logger.info("Setting up JWT authentication...")
                self.logger.info("✓ JWT authentication: Will use on-device signing")

                return True
            else:
                self.logger.error("Failed to connect to MeshCore node")
                # Clean up failed connection attempt to prevent pending tasks
                if self.meshcore:
                    try:
                        self.cleanup_event_subscriptions()
                        self.meshcore.stop()
                        await self.meshcore.disconnect()
                    except Exception as cleanup_error:
                        self.logger.debug(f"Error cleaning up failed connection: {cleanup_error}")
                    self.meshcore = None
                return False

        except Exception as e:
            self.logger.error(f"Connection failed: {e}")
            # Clean up any partial connection on exception
            if self.meshcore:
                try:
                    self.cleanup_event_subscriptions()
                    self.meshcore.stop()
                    await self.meshcore.disconnect()
                except Exception as cleanup_error:
                    self.logger.debug(f"Error cleaning up failed connection: {cleanup_error}")
                self.meshcore = None
            return False

    def cleanup_event_subscriptions(self):
        """Clean up all event subscriptions before disconnecting to prevent pending tasks"""
        if not self.meshcore:
            return

        try:
            # Use meshcore.unsubscribe() method which is the proper API
            if hasattr(self.meshcore, "dispatcher") and hasattr(self.meshcore.dispatcher, "subscriptions"):
                subscription_count = len(self.meshcore.dispatcher.subscriptions)
                if subscription_count > 0:
                    self.logger.debug(f"Cleaning up {subscription_count} event subscriptions")
                    # Create a copy of the list to avoid modification during iteration
                    for subscription in list(self.meshcore.dispatcher.subscriptions):
                        try:
                            # Use meshcore.unsubscribe() - the proper API method
                            self.meshcore.unsubscribe(subscription)
                        except Exception as e:
                            self.logger.debug(f"Error unsubscribing: {e}")
                    self.logger.debug(f"Cleared {subscription_count} event subscriptions")
        except Exception as e:
            self.logger.debug(f"Error cleaning up subscriptions: {e}")

    async def reconnect_meshcore(self) -> bool:
        """Attempt to reconnect to MeshCore device with exponential backoff retry logic"""
        if self.max_connection_retries > 0 and self.connection_retry_count >= self.max_connection_retries:
            self.logger.error(f"Maximum connection retry attempts ({self.max_connection_retries}) reached")

            # Track service failure for systemd restart decision
            if self.track_service_failure("MeshCore connection exhausted",
                                        f"Failed {self.connection_retry_count} reconnection attempts"):
                return False

            return False

        self.connection_retry_count += 1

        # Calculate exponential backoff delay
        delay = self.calculate_connection_retry_delay(self.connection_retry_count)

        self.logger.info(f"Attempting MeshCore reconnection (attempt {self.connection_retry_count}/{self.max_connection_retries if self.max_connection_retries > 0 else '∞'}) with {delay:.1f}s delay...")

        # Clean up existing connection
        # Capture BLE address before disconnecting (needed for bluetoothctl cleanup)
        ble_device = None
        if self.meshcore and self.connection_type == 'ble':
            # Try to get BLE address from meshcore object before disconnecting
            try:
                # Check if meshcore has address attribute (BLE connections often do)
                if hasattr(self.meshcore, 'address') and self.meshcore.address:
                    ble_device = self.meshcore.address
            except Exception:
                pass
            # Fallback to environment variables
            if not ble_device:
                ble_device = self.get_env('BLE_DEVICE', '') or self.get_env('BLE_ADDRESS', '')

        if self.meshcore:
            try:
                # Clean up event subscriptions BEFORE stopping/disconnecting to prevent pending tasks
                self.cleanup_event_subscriptions()
                # Stop the event dispatcher task synchronously to prevent "Task was destroyed" errors
                try:
                    self.meshcore.stop()
                except Exception as e:
                    self.logger.debug(f"Error stopping meshcore event dispatcher: {e}")
                # Disconnect the connection
                await self.meshcore.disconnect()
            except Exception as e:
                self.logger.debug(f"Error disconnecting during reconnect: {e}")
            self.meshcore = None
            # For BLE connections, ensure full cleanup including OS-level disconnect
            if self.connection_type == 'ble':
                # On Linux, force disconnect via bluetoothctl to ensure clean state
                import platform
                if platform.system() == 'Linux':
                    try:
                        import subprocess
                        if ble_device and ble_device != 'Unknown':
                            self.logger.debug(f"Force disconnecting BLE device {ble_device} via bluetoothctl...")
                            subprocess.run(['bluetoothctl', 'disconnect', ble_device],
                                         capture_output=True, timeout=10)
                            await asyncio.sleep(1)  # Give time for disconnection
                    except Exception as e:
                        self.logger.debug(f"Could not force BLE disconnect via bluetoothctl: {e}")
                else:
                    # On non-Linux systems, add a short delay to ensure BLE cleanup completes
                    await asyncio.sleep(0.5)

        # Wait before retrying with exponential backoff
        if delay > 0:
            self.logger.info(f"Waiting {delay:.1f} seconds before retry (exponential backoff)...")
            if await self.wait_with_shutdown(delay):
                return False  # Shutdown was requested during delay

        # Attempt to reconnect
        success = await self.connect()
        if success:
            self.connection_retry_count = 0  # Reset counter on successful connection
            self.logger.info("MeshCore reconnection successful")
        else:
            self.logger.warning(f"MeshCore reconnection attempt {self.connection_retry_count} failed")

        return success

    async def connection_monitor(self):
        """Monitor connection health and attempt reconnection if needed"""
        if self.health_check_interval <= 0:
            if self.debug:
                self.logger.debug("Connection monitoring disabled (health_check_interval <= 0)")
            return

        if self.debug:
            self.logger.debug(f"Starting connection monitoring (health check every {self.health_check_interval} seconds)")

        # Track last MQTT health check time separately
        last_mqtt_check = 0

        while not self.should_exit:
            try:
                if await self.wait_with_shutdown(self.health_check_interval):
                    break  # Shutdown was requested

                # Check if we need to reconnect (either disconnected or health check failed)
                # For TCP with SDK auto-reconnect, only check health if SDK has exhausted
                if self._is_tcp_sdk_auto_reconnect_active():
                    # SDK is handling reconnection - just check if it succeeded
                    if self.meshcore and self.meshcore.is_connected:
                        if not self.connected:
                            # SDK reconnected - update our state
                            self._reset_connection_state()
                            self.logger.info("SDK auto-reconnect succeeded - connection restored")
                            await self._setup_after_reconnection()
                    # Skip health check and reconnect logic - let SDK handle it
                    continue

                # For other connection types or after SDK has exhausted, do normal health check
                health_check_passed = await self.check_connection_health()
                needs_reconnection = not self.connected or not health_check_passed

                if needs_reconnection:

                    # For non-TCP connections, or TCP after SDK has exhausted, use custom reconnect
                    if not self.connected:
                        self.logger.info("Connection is disconnected, attempting reconnection...")
                    else:
                        self.logger.warning("MeshCore connection health check failed, attempting reconnection...")

                    # Attempt to reconnect
                    if await self.reconnect_meshcore():
                        self.logger.info("MeshCore reconnection successful, resuming packet capture")
                        self._reset_connection_state()
                        await self._setup_after_reconnection()
                    else:
                        self.logger.error("MeshCore reconnection failed, will retry on next health check")
                        # Track consecutive failures for more intelligent failure detection
                        if self.track_consecutive_failure("connection"):
                            return  # Exit if service failure threshold reached

                # Check MQTT health periodically (separate interval to avoid being too aggressive)
                import time
                current_time = time.time()
                if self.enable_mqtt and (current_time - last_mqtt_check) >= self.mqtt_health_check_interval:
                    last_mqtt_check = current_time
                    mqtt_healthy = self.check_mqtt_health()

                    if not mqtt_healthy:
                        # All brokers have been disconnected past grace period - this is a persistent failure
                        self.logger.warning("MQTT health check failed - all brokers disconnected past grace period")
                        # Track consecutive failures for more intelligent failure detection
                        if self.track_consecutive_failure("mqtt"):
                            return  # Exit if service failure threshold reached
                    elif self.debug:
                        self.logger.debug("MQTT health check passed")

                # JWT token renewal is now handled proactively in safe_publish()
                # and by the dedicated jwt_renewal_scheduler task

            except asyncio.CancelledError:
                if self.debug:
                    self.logger.debug("Connection monitoring cancelled")
                break
            except Exception as e:
                self.logger.error(f"Error in connection monitoring: {e}")
                if await self.wait_with_shutdown(5):
                    break  # Shutdown was requested

    def sanitize_client_id(self, name):
        """Convert device name to valid MQTT client ID"""
        client_id = self.get_env("CLIENT_ID_PREFIX", "meshcore_client_") + name.replace(" ", "_")
        client_id = re.sub(r"[^a-zA-Z0-9_-]", "", client_id)
        return client_id[:23]

    def on_mqtt_connect(self, client, userdata, flags, rc, properties=None):
        broker_name = userdata.get('name', 'unknown') if userdata else 'unknown'
        broker_num = userdata.get('broker_num', None) if userdata else None
        if rc == 0:
            self.mqtt_connected = True
            self.logger.info(f"Connected to MQTT broker: {broker_name}")

            # Clear disconnect timestamp if this was a reconnection
            if broker_num and broker_num in self.mqtt_disconnect_timestamps:
                import time
                disconnect_duration = time.time() - self.mqtt_disconnect_timestamps[broker_num]
                self.logger.info(f"MQTT{broker_num} reconnected after {disconnect_duration:.1f} seconds")
                del self.mqtt_disconnect_timestamps[broker_num]
                # Reset consecutive failures on successful reconnection
                self.reset_consecutive_failures("mqtt")

            # JWT renewal is handled by the dedicated JWT renewal scheduler
            # No need to check here as it will be handled proactively

            # Don't publish status here - it will be published after device connection
            # This callback fires when MQTT connects, but device might not be ready yet
            self.logger.debug(f"MQTT broker {broker_name} connected, waiting for device connection...")
        else:
            self.logger.error(f"MQTT connection failed for {broker_name} with code {rc}")

    def on_mqtt_disconnect(self, client, userdata, disconnect_flags, reason_code, properties):
        broker_name = userdata.get('name', 'unknown') if userdata else 'unknown'

        # Handle both integer and ReasonCode object types
        if hasattr(reason_code, 'value'):
            # ReasonCode object - get the integer value
            reason_code_int = reason_code.value
        else:
            # Integer or other type
            reason_code_int = int(reason_code) if reason_code is not None else 0

        # Provide more specific logging for different disconnect reasons
        if reason_code_int == mqtt.MQTT_ERR_KEEPALIVE:
            self.logger.warning(f"Disconnected from MQTT broker {broker_name} (code: Keep alive timeout)")
            self.logger.info("This may be due to network latency or firewall timeouts. Connection will be retried.")
        elif reason_code_int == mqtt.MQTT_ERR_CONN_LOST:
            self.logger.warning(f"Disconnected from MQTT broker {broker_name} (code: Connection lost)")
            self.logger.info("Network connection was lost. Connection will be retried.")
        elif reason_code_int == mqtt.MQTT_ERR_CONN_REFUSED:
            self.logger.warning(f"Disconnected from MQTT broker {broker_name} (code: Connection refused)")
            self.logger.info("Server refused the connection. Check credentials and server configuration.")
        elif reason_code_int == mqtt.MQTT_ERR_AUTH:
            self.logger.warning(f"Disconnected from MQTT broker {broker_name} (code: Authentication failed)")
            self.logger.info("Authentication failed. Check username/password or auth token.")
        elif reason_code_int == mqtt.MQTT_ERR_ACL_DENIED:
            self.logger.warning(f"Disconnected from MQTT broker {broker_name} (code: ACL denied)")
            self.logger.info("Access denied. Check topic permissions and broker ACL settings.")
        elif reason_code_int == mqtt.MQTT_ERR_TLS:
            self.logger.warning(f"Disconnected from MQTT broker {broker_name} (code: TLS error)")
            self.logger.info("TLS/SSL error occurred. Check certificate configuration.")
        else:
            # Map numeric codes to human-readable names
            error_names = {
                0: "Success",
                1: "Out of memory",
                2: "Protocol error",
                3: "Invalid arguments",
                4: "Not connected",
                5: "Connection refused",
                6: "Not found",
                7: "Connection lost",
                8: "TLS error",
                9: "Payload too large",
                10: "Not supported",
                11: "Authentication failed",
                12: "ACL denied",
                13: "Unknown error",
                14: "System error",
                15: "Queue size exceeded",
                16: "Keepalive timeout"
            }
            error_name = error_names.get(reason_code_int, f"Unknown error code {reason_code_int}")
            self.logger.warning(f"Disconnected from MQTT broker {broker_name} (code: {reason_code_int} - {error_name})")

        # Check if any brokers are still connected (excluding the one that just disconnected)
        connected_brokers = []
        for info in self.mqtt_clients:
            if info['client'] != client and info['client'].is_connected():
                connected_brokers.append(info)

        if not connected_brokers:
            self.mqtt_connected = False
            # Record disconnect timestamp for each disconnected broker (will be tracked in health check)
            import time
            for info in self.mqtt_clients:
                if info['client'] == client and info['broker_num'] not in self.mqtt_disconnect_timestamps:
                    self.mqtt_disconnect_timestamps[info['broker_num']] = time.time()
                    self.logger.debug(f"MQTT{info['broker_num']} disconnect recorded - grace period started")

            # Only attempt reconnection if we're not shutting down
            if not self.should_exit:
                self.logger.warning("All MQTT brokers disconnected. paho-mqtt will attempt reconnection automatically...")
                self.logger.info(f"Grace period: {self.mqtt_grace_period}s before counting as persistent failure")
                # Don't exit immediately - let reconnection logic and health check handle it
            else:
                self.logger.info("All MQTT brokers disconnected during shutdown")
        else:
            self.logger.info(f"Still connected to {len(connected_brokers)} broker(s)")

    async def connect_mqtt_broker(self, broker_num):
        """Connect to a single MQTT broker"""
        if not self.device_name:
            self.logger.error("Cannot connect to MQTT without device name")
            return None

        # Check if broker is enabled
        if not self.get_env_bool(f'MQTT{broker_num}_ENABLED', False):
            self.logger.debug(f"MQTT broker {broker_num} is disabled, skipping")
            return None

        # Validate IATA configuration for brokers that require it
        if self.broker_requires_iata(broker_num) and not self.has_configured_iata(broker_num):
            server = self.get_env(f'MQTT{broker_num}_SERVER', 'unknown')
            self.logger.warning(
                f"WARNING: MQTT broker {broker_num} ({server}) requires IATA configuration but IATA code is not set.\n"
                f"  This broker will be DISABLED during startup.\n"
                f"  To fix this issue:\n"
                f"    1. Set a global IATA code: PACKETCAPTURE_IATA=<airport_code>\n"
                f"    2. Or set a broker-specific IATA: PACKETCAPTURE_MQTT{broker_num}_IATA=<airport_code>\n"
                f"    3. Valid IATA codes are 3-letter airport identifiers (e.g., JFK, LAX, SFO)\n"
                f"    4. Restart the packet capture service after setting the IATA code"
            )
            return None

        try:
            # Create client ID
            client_id = self.sanitize_client_id(self.device_public_key or self.device_name)
            if broker_num > 1:
                client_id += f"_{broker_num}"

            self.logger.info(f"Connecting to MQTT{broker_num} with client ID: {client_id}")

            # Get transport type
            transport = self.get_env(f'MQTT{broker_num}_TRANSPORT', 'tcp')

            mqtt_client = mqtt.Client(
                mqtt.CallbackAPIVersion.VERSION2,
                client_id=client_id,
                clean_session=True,
                transport=transport
            )

            # Enable paho-mqtt's built-in reconnection
            mqtt_client.enable_logger(self.logger)
            mqtt_client.reconnect_delay_set(min_delay=1, max_delay=120)

            # Set user data for callbacks
            mqtt_client.user_data_set({
                'name': f"MQTT{broker_num}",
                'broker_num': broker_num
            })

            # Handle authentication
            use_auth_token = self.get_env_bool(f'MQTT{broker_num}_USE_AUTH_TOKEN', False)

            if use_auth_token:
                try:
                    username = f"v1_{self.device_public_key.upper()}"
                    audience = self.get_env(f'MQTT{broker_num}_TOKEN_AUDIENCE', "")

                    if audience:
                        self.logger.info(f"MQTT{broker_num}: Using JWT authentication [aud: {audience}]")
                    else:
                        self.logger.info(f"MQTT{broker_num}: Using JWT authentication")

                    # Use the JWT creation method with private key from device
                    password = await self.create_auth_token_jwt(audience, broker_num)
                    if not password:
                        self.logger.error(f"MQTT{broker_num}: Failed to generate JWT token")
                        return None

                    # Log JWT details for debugging if debug mode is enabled
                    if self.debug:
                        self.logger.debug(f"MQTT{broker_num}: Generated JWT: {password}")
                        try:
                            import base64
                            parts = password.split('.')
                            if len(parts) == 3:
                                header = base64.urlsafe_b64decode(parts[0] + '==').decode('utf-8')
                                payload = base64.urlsafe_b64decode(parts[1] + '==').decode('utf-8')
                                self.logger.debug(f"MQTT{broker_num}: JWT Header: {header}")
                                self.logger.debug(f"MQTT{broker_num}: JWT Payload: {payload}")
                                self.logger.debug(f"MQTT{broker_num}: JWT Signature length: {len(base64.urlsafe_b64decode(parts[2] + '=='))} bytes")
                        except Exception as e:
                            self.logger.debug(f"Could not decode JWT for inspection: {e}")

                    mqtt_client.username_pw_set(username, password)
                except Exception as e:
                    self.logger.error(f"MQTT{broker_num}: Failed to generate auth token: {e}")
                    return None
            else:
                # Username/password authentication
                username = self.get_env(f'MQTT{broker_num}_USERNAME', "")
                password = self.get_env(f'MQTT{broker_num}_PASSWORD', "")
                if username:
                    mqtt_client.username_pw_set(username, password)

            # Set Last Will and Testament
            lwt_topic = self.get_topic("status", broker_num)
            lwt_payload = json.dumps({
                "status": "offline",
                "timestamp": datetime.now().isoformat(),
                "origin": self.device_name,
                "origin_id": self.device_public_key.upper() if self.device_public_key and self.device_public_key != 'Unknown' else 'DEVICE'
            })
            lwt_qos = self.get_env_int(f'MQTT{broker_num}_QOS', 0)
            lwt_retain = self.get_env_bool(f'MQTT{broker_num}_RETAIN', True)

            mqtt_client.will_set(lwt_topic, lwt_payload, qos=lwt_qos, retain=lwt_retain)

            # Set callbacks
            mqtt_client.on_connect = self.on_mqtt_connect
            mqtt_client.on_disconnect = self.on_mqtt_disconnect

            # Get connection parameters
            server = self.get_env(f'MQTT{broker_num}_SERVER', "")
            if not server:
                self.logger.error(f"MQTT{broker_num}: Server not configured")
                return None

            port = self.get_env_int(f'MQTT{broker_num}_PORT', 1883)

            # Handle TLS/SSL
            use_tls = self.get_env_bool(f'MQTT{broker_num}_USE_TLS', False)
            if use_tls:
                import ssl
                tls_verify = self.get_env_bool(f'MQTT{broker_num}_TLS_VERIFY', True)

                if tls_verify:
                    mqtt_client.tls_set(cert_reqs=ssl.CERT_REQUIRED)
                    mqtt_client.tls_insecure_set(False)
                else:
                    mqtt_client.tls_set(cert_reqs=ssl.CERT_NONE)
                    mqtt_client.tls_insecure_set(True)
                    self.logger.warning(f"MQTT{broker_num}: TLS certificate verification disabled (insecure)")

            # Handle WebSocket transport
            if transport == "websockets":
                mqtt_client.ws_set_options(
                    path="/",
                    headers=None
                )

            # Connect with adaptive keep-alive based on transport type
            if transport == "websockets":
                # WebSocket connections need longer keep-alive to handle network latency
                keepalive = self.get_env_int(f'MQTT{broker_num}_KEEPALIVE', 120)
            else:
                # TCP connections can use shorter keep-alive
                keepalive = self.get_env_int(f'MQTT{broker_num}_KEEPALIVE', 60)

            mqtt_client.connect(server, port, keepalive=keepalive)
            mqtt_client.loop_start()

            self.logger.info(f"Connected to MQTT{broker_num} at {server}:{port} (transport={transport}, tls={use_tls})")
            return {
                'client': mqtt_client,
                'broker_num': broker_num
            }

        except Exception as e:
            self.logger.error(f"MQTT connection error for MQTT{broker_num}: {str(e)}")
            return None

    async def connect_mqtt(self):
        """Connect to all configured MQTT brokers"""
        # Try to connect to MQTT1, MQTT2, MQTT3, MQTT4, MQTT5, MQTT6 (can expand if needed)
        for broker_num in range(1, 7):
            client_info = await self.connect_mqtt_broker(broker_num)
            if client_info:
                self.mqtt_clients.append(client_info)

        if len(self.mqtt_clients) == 0:
            self.logger.error("Failed to connect to any MQTT broker")
            return False

        self.logger.info(f"Connected to {len(self.mqtt_clients)} MQTT broker(s)")

        # Publish initial status with firmware version now that MQTT is connected
        if self.enable_mqtt:
            await asyncio.sleep(1)  # Give MQTT connections a moment to stabilize
            await self.publish_status("online")

        return True

    def disconnect_mqtt(self):
        """Disconnect from all MQTT brokers and clean up connections"""
        if self.mqtt_clients:
            self.logger.info(f"Disconnecting from {len(self.mqtt_clients)} MQTT broker(s)...")

            for client_info in self.mqtt_clients:
                try:
                    mqtt_client = client_info['client']
                    broker_num = client_info['broker_num']

                    if mqtt_client.is_connected():
                        mqtt_client.loop_stop()
                        mqtt_client.disconnect()
                        self.logger.debug(f"Disconnected from MQTT{broker_num}")

                except Exception as e:
                    self.logger.warning(f"Error disconnecting from MQTT{broker_num}: {e}")

            # Clear the clients list
            self.mqtt_clients.clear()
            self.mqtt_connected = False


    async def publish_status(self, status, client=None, broker_num=None, refresh_stats=True):
        """Publish status with additional information"""
        firmware_info = await self.get_firmware_info()
        status_msg = {
            "status": status,
            "timestamp": datetime.now().isoformat(),
            "origin": self.device_name,
            "origin_id": self.device_public_key.upper() if self.device_public_key and self.device_public_key != 'Unknown' else 'DEVICE',
            "model": firmware_info.get('model', 'unknown'),
            "firmware_version": firmware_info.get('version', 'unknown'),
            "radio": self.radio_info or "unknown",
            "client_version": self._load_client_version()
        }

        # Attach stats (online status only) if supported and enabled
        if (
            status.lower() == "online"
            and self.stats_status_enabled
        ):
            stats_payload = None
            if refresh_stats:
                # Always force refresh stats right before publishing to ensure fresh data
                stats_payload = await self.refresh_stats(force=True)
                if not stats_payload:
                    self.logger.debug("Stats refresh returned no data - stats will not be included in status message")
            elif self.latest_stats:
                stats_payload = dict(self.latest_stats)

            if stats_payload:
                status_msg["stats"] = stats_payload
            elif self.debug:
                self.logger.debug("No stats payload available - status message will not include stats")

        if client:
            self.safe_publish(None, json.dumps(status_msg), retain=True, client=client, broker_num=broker_num, topic_type="status")
        else:
            self.safe_publish(None, json.dumps(status_msg), retain=True, topic_type="status")
        if self.debug:
            self.logger.debug(f"Published status: {status}")

    def stats_commands_available(self) -> bool:
        """Detect whether the connected meshcore build exposes stats commands."""
        if not self.meshcore or not hasattr(self.meshcore, "commands"):
            return False

        commands = self.meshcore.commands
        required = ["get_stats_core", "get_stats_radio"]
        available = all(callable(getattr(commands, attr, None)) for attr in required)
        state = "available" if available else "missing"
        if state != self.stats_capability_state:
            if available:
                self.logger.info("MeshCore stats commands detected - status messages will include device stats")
            else:
                self.logger.info("MeshCore stats commands not available - skipping stats in status messages")
            self.stats_capability_state = state
        self.stats_supported = available
        return available

    async def refresh_stats(self, force: bool = False):
        """Fetch stats from the radio and cache them for status publishing."""
        if not self.stats_status_enabled:
            if self.debug:
                self.logger.debug("Stats refresh skipped: stats_status_enabled is False")
            return None

        if not self._ensure_connected("refresh_stats", "debug"):
            return None

        if self.stats_refresh_interval <= 0:
            if self.debug:
                self.logger.debug("Stats refresh skipped: stats_refresh_interval is 0 or negative")
            return None

        if not self.stats_commands_available():
            if self.debug:
                self.logger.debug("Stats refresh skipped: stats commands not available")
            return None

        now = time.time()
        if (
            not force
            and self.latest_stats
            and (now - self.last_stats_fetch) < max(60, self.stats_refresh_interval // 2)
        ):
            return dict(self.latest_stats)

        async with self.stats_fetch_lock:
            # Another coroutine may have completed the refresh while we waited
            if (
                not force
                and self.latest_stats
                and (time.time() - self.last_stats_fetch) < max(60, self.stats_refresh_interval // 2)
            ):
                return dict(self.latest_stats)

            stats_payload = {}
            try:
                core_result = await self.retryable_device_command(
                    lambda: self.meshcore.commands.get_stats_core(),
                    "get_stats_core",
                    timeout=8.0,
                    max_retries=self.stats_retry_limit,  # Use stats retry limit
                    retry_delay=0.2
                )
                if core_result and core_result.type == EventType.STATS_CORE and core_result.payload:
                    stats_payload.update(core_result.payload)
                elif core_result and core_result.type == EventType.ERROR:
                    self.logger.debug(f"Core stats unavailable: {core_result.payload}")
            except Exception as exc:
                self.logger.debug(f"Error fetching core stats: {exc}")

            try:
                radio_result = await self.retryable_device_command(
                    lambda: self.meshcore.commands.get_stats_radio(),
                    "get_stats_radio",
                    timeout=8.0,
                    max_retries=self.stats_retry_limit,  # Use stats retry limit
                    retry_delay=0.2
                )
                if radio_result and radio_result.type == EventType.STATS_RADIO and radio_result.payload:
                    stats_payload.update(radio_result.payload)
                elif radio_result and radio_result.type == EventType.ERROR:
                    self.logger.debug(f"Radio stats unavailable: {radio_result.payload}")
            except Exception as exc:
                self.logger.debug(f"Error fetching radio stats: {exc}")

            if stats_payload:
                self.latest_stats = stats_payload
                self.last_stats_fetch = time.time()
                if self.debug:
                    self.logger.debug(f"Updated stats cache: {self.latest_stats}")
            elif self.debug:
                self.logger.debug("Stats refresh completed but returned no data")

        return dict(self.latest_stats) if self.latest_stats else None

    async def stats_refresh_scheduler(self):
        """Periodically refresh stats and publish them via MQTT."""
        if self.stats_refresh_interval <= 0 or not self.stats_status_enabled:
            return

        while not self.should_exit:
            try:
                # Only fetch stats when we're about to publish status
                if self.enable_mqtt and self.mqtt_connected:
                    await self.publish_status("online", refresh_stats=True)
            except asyncio.CancelledError:
                break
            except Exception as exc:
                self.logger.debug(f"Stats refresh error: {exc}")

            if await self.wait_with_shutdown(self.stats_refresh_interval):
                break

    def safe_publish(self, topic, payload, retain=False, client=None, broker_num=None, topic_type=None):
        """Publish to one or all MQTT brokers and return publish metrics."""
        metrics = {"attempted": 0, "succeeded": 0}

        if not self.mqtt_connected:
            self.logger.warning(f"Not connected - skipping publish to {topic or topic_type}")
            return metrics

        # Proactively check for expired tokens before publishing
        if self.enable_mqtt:
            try:
                # Check if any tokens are expired and need renewal
                expired_brokers = []
                for broker_num in list(self.jwt_tokens.keys()):
                    if self.is_jwt_token_expired(broker_num):
                        expired_brokers.append(broker_num)

                if expired_brokers:
                    self.logger.warning(f"Detected expired JWT tokens for brokers: {expired_brokers}")
                    # Check circuit breaker before attempting JWT renewal
                    current_time = time.time()
                    if (current_time - self.jwt_circuit_breaker_reset_time) > self.jwt_circuit_breaker_timeout:
                        self.jwt_failure_count = 0  # Reset circuit breaker

                    if self.jwt_failure_count >= self.max_jwt_failures:
                        self.logger.warning(f"JWT circuit breaker open - too many failures ({self.jwt_failure_count}). Skipping JWT renewal.")
                        return metrics

                    # Schedule renewal only if not already in progress (prevent task explosion)
                    if not self.jwt_renewal_in_progress:
                        self.jwt_renewal_in_progress = True
                        task = asyncio.create_task(self.check_and_renew_jwt_tokens())
                        self.active_tasks.add(task)
                        task.add_done_callback(lambda t: (self.active_tasks.discard(t), setattr(self, 'jwt_renewal_in_progress', False)))
            except Exception as e:
                self.logger.debug(f"Error checking token expiry before publish: {e}")

        if client:
            clients_to_publish = [info for info in self.mqtt_clients if info['client'] == client]
        else:
            clients_to_publish = self.mqtt_clients

        for mqtt_client_info in clients_to_publish:
            current_broker_num = mqtt_client_info['broker_num']
            try:
                mqtt_client = mqtt_client_info['client']

                # Check individual client connection status
                if not mqtt_client.is_connected():
                    self.logger.warning(f"MQTT{current_broker_num} client not connected - skipping publish")
                    continue

                # CRITICAL FIX: Resolve topic properly
                if topic_type:
                    resolved_topic = self.get_topic(topic_type, current_broker_num)
                    if self.debug:
                        self.logger.debug(f"Resolved topic for MQTT{current_broker_num} {topic_type}: {resolved_topic}")
                elif topic:
                    resolved_topic = topic
                else:
                    self.logger.error("Neither topic nor topic_type provided to safe_publish")
                    continue

                # Skip publishing if topic is None (e.g., RAW topic not configured)
                if resolved_topic is None:
                    if self.debug:
                        self.logger.debug(f"Skipping publish to MQTT{current_broker_num} - topic not configured for {topic_type}")
                    continue

                # Validate topic before publishing
                if not resolved_topic:
                    self.logger.error(f"Failed to resolve topic (type={topic_type}, topic={topic})")
                    continue

                qos = self.get_env_int(f'MQTT{current_broker_num}_QOS', 0)
                # Force QoS 1 to 0 to prevent retry storms (like mctomqtt.py)
                if qos == 1:
                    qos = 0

                # Only count as attempted if we actually try to publish
                metrics["attempted"] += 1
                result = mqtt_client.publish(resolved_topic, payload, qos=qos, retain=retain)
                if result.rc != mqtt.MQTT_ERR_SUCCESS:
                    self.logger.error(f"Publish failed to {resolved_topic} on MQTT{current_broker_num}: {mqtt.error_string(result.rc)}")
                else:
                    if self.verbose:
                        self.logger.info(f"✓ Published to {resolved_topic} on MQTT{current_broker_num} (len={len(payload)})")
                    metrics["succeeded"] += 1
            except Exception as e:
                self.logger.error(f"Publish error on MQTT{current_broker_num}: {str(e)}", exc_info=True)

        return metrics

    def parse_advert(self, payload):
        """Parse advert payload - matches C++ AdvertDataHelpers.h implementation"""
        try:
            # The advert header is fixed-width: pubkey (32) + timestamp (4) + signature (64).
            if len(payload) < 100:
                self.logger.error(f"ADVERT payload too short for header: {len(payload)} bytes")
                return {
                    "advert_parse_ok": False,
                    "advert_error": "payload_too_short_header",
                    "advert_payload_len": len(payload),
                }

            # advert header
            pub_key = payload[0:32]
            timestamp = int.from_bytes(payload[32:32+4], "little")
            signature = payload[36:36+64]

            advert = {
                "advert_parse_ok": True,
                "public_key": pub_key.hex(),
                "advert_time": timestamp,
                "signature": signature.hex(),
            }

            # appdata - parse according to C++ AdvertDataParser
            app_data = payload[100:]
            if len(app_data) == 0:
                self.logger.error("ADVERT has no app data")
                return advert

            flags_byte = app_data[0]

            # Log the full flag byte for debugging
            if self.debug:
                self.logger.debug(f"ADVERT flags: 0x{flags_byte:02X} (binary: {flags_byte:08b})")

            # Create flags object with the full byte value
            flags = AdvertFlags(flags_byte)

            # Extract type from lower 4 bits (matches C++ getType())
            adv_type = flags_byte & 0x0F
            if adv_type == AdvertFlags.ADV_TYPE_CHAT:
                advert.update({"mode": DeviceRole.Companion.name})
            elif adv_type == AdvertFlags.ADV_TYPE_REPEATER:
                advert.update({"mode": DeviceRole.Repeater.name})
            elif adv_type == AdvertFlags.ADV_TYPE_ROOM:
                advert.update({"mode": DeviceRole.RoomServer.name})
            elif adv_type == AdvertFlags.ADV_TYPE_SENSOR:
                advert.update({"mode": "Sensor"})
            else:
                advert.update({"mode": f"Type{adv_type}"})

            # Parse data according to C++ AdvertDataParser logic
            i = 1  # Start after flags byte

            # Parse location data if present (matches C++ hasLatLon())
            if AdvertFlags.ADV_LATLON_MASK in flags:
                if len(app_data) < i + 8:
                    self.logger.error(f"ADVERT with location flag too short: {len(app_data)} bytes")
                    return advert

                lat = int.from_bytes(app_data[i:i+4], 'little', signed=True)
                lon = int.from_bytes(app_data[i+4:i+8], 'little', signed=True)
                advert.update({"lat": round(lat / 1000000.0, 6), "lon": round(lon / 1000000.0, 6)})
                i += 8

            # Parse feat1 data if present
            if AdvertFlags.ADV_FEAT1_MASK in flags:
                if len(app_data) < i + 2:
                    self.logger.error(f"ADVERT with feat1 flag too short: {len(app_data)} bytes")
                    return advert
                feat1 = int.from_bytes(app_data[i:i+2], 'little')
                advert.update({"feat1": feat1})
                i += 2

            # Parse feat2 data if present
            if AdvertFlags.ADV_FEAT2_MASK in flags:
                if len(app_data) < i + 2:
                    self.logger.error(f"ADVERT with feat2 flag too short: {len(app_data)} bytes")
                    return advert
                feat2 = int.from_bytes(app_data[i:i+2], 'little')
                advert.update({"feat2": feat2})
                i += 2

            # Parse name data if present (matches C++ hasName())
            if AdvertFlags.ADV_NAME_MASK in flags:
                if len(app_data) >= i:
                    name_len = len(app_data) - i
                    if name_len > 0:
                        try:
                            # Decode name and handle potential null terminators
                            name = app_data[i:].decode('utf-8', errors='ignore').rstrip('\x00')
                            advert.update({"name": name})
                        except Exception as e:
                            self.logger.warning(f"Failed to decode ADVERT name: {e}")

            return advert

        except Exception as e:
            self.logger.error(f"Error parsing ADVERT payload: {e}", exc_info=True)
            return {
                "advert_parse_ok": False,
                "advert_error": "exception",
                "advert_error_detail": str(e),
                "advert_payload_len": len(payload) if payload is not None else 0,
            }

    def decode_and_publish_message(self, raw_data):
        """Decode message - matches Packet.cpp exactly"""
        byte_data = bytes.fromhex(raw_data)
        try:
            # Validate minimum packet size
            if len(byte_data) < 2:
                self.logger.error(f"Packet too short: {len(byte_data)} bytes")
                return None

            header = byte_data[0]

            # Extract route type
            route_type = RouteType(header & 0x03)
            has_transport = route_type in [RouteType.TRANSPORT_FLOOD, RouteType.TRANSPORT_DIRECT]

            # Calculate path length offset based on presence of transport codes
            offset = 1
            if has_transport:
                offset += 4

            # Check if we have enough data for path_len
            if len(byte_data) <= offset:
                self.logger.error(f"Packet too short for path_len at offset {offset}: {len(byte_data)} bytes")
                return None

            path_len_byte = byte_data[offset]
            offset += 1

            # MeshCore packs path_len byte: low 6 bits hop count, high 2 bits hash-size mode.
            path_byte_len, path_hash_bytes = self._decode_packed_path_length(path_len_byte)
            if self.debug:
                self.logger.debug(
                    "Decoded path length: "
                    f"path_len_byte=0x{path_len_byte:02X}, path_byte_len={path_byte_len}, "
                    f"path_hash_bytes={path_hash_bytes}, offset_after_path_len={offset}"
                )

            # Check if we have enough data for the full path
            if len(byte_data) < offset + path_byte_len:
                self.logger.error(f"Packet too short for path (need {offset + path_byte_len}, have {len(byte_data)})")
                return None

            # Extract path
            path_bytes = byte_data[offset:offset + path_byte_len]
            offset += path_byte_len

            # Remaining data is payload
            payload = byte_data[offset:]
            if self.debug:
                self.logger.debug(
                    f"Packet layout: packet_len={len(byte_data)}, payload_offset={offset}, payload_len={len(payload)}"
                )

            # Extract payload version (bits 6-7)
            payload_version = PayloadVersion((header >> 6) & 0x03)

            # Only accept VER_1 (version 0)
            if payload_version != PayloadVersion.VER_1:
                self.logger.warning(f"Encountered an unknown packet version. Version: {payload_version.value} RAW: {raw_data}")
                return None

            # Extract payload type (bits 2-5)
            payload_type = PayloadType((header >> 2) & 0x0F)

            # Convert path bytes to hop tokens using decoded hash width (1/2/3 bytes)
            path_values = self._split_path_hops(path_bytes, path_hash_bytes)

            message = {
                "payload_type": payload_type.name,
                "payload_type_value": payload_type.value,
                "payload_version": payload_version.name,
                "route_type": route_type.name,
                "path": path_values,
                "path_len_byte": path_len_byte,
                "path_byte_len": path_byte_len,
                "path_hash_bytes": path_hash_bytes,
            }

            payload_value = {}
            if payload_type is PayloadType.ADVERT:
                payload_value = self.parse_advert(payload)
                if not payload_value.get("advert_parse_ok", False):
                    self.logger.warning(
                        "Dropping malformed ADVERT packet: "
                        f"{payload_value.get('advert_error', 'unknown_error')} "
                        f"(payload_len={payload_value.get('advert_payload_len', len(payload))})"
                    )
                    return None

            if payload_type is PayloadType.ADVERT:
                message.update(payload_value)
            else:
                message.update(payload_value)

            if self.debug:
                self.logger.debug(f"Successfully decoded: route={message['route_type']}, type={message['payload_type']}")
            return message

        except Exception as e:
            # Log as ERROR not DEBUG so we can see what's failing
            self.logger.error(f"Error decoding packet (len={len(byte_data)}): {e}", exc_info=True)
            self.logger.error(f"Failed packet hex: {raw_data}")
            return None

    def _decode_packed_path_length(self, path_len_byte: int, max_path_size: int = 64) -> tuple[int, int]:
        """Decode packed path length byte per MeshCore firmware.

        path_len layout:
        - low 6 bits: hop count
        - high 2 bits: bytes-per-hop minus 1
        """
        hop_count = path_len_byte & 0x3F
        bytes_per_hop = (path_len_byte >> 6) + 1

        # Mode 3 => 4 bytes/hop is reserved in firmware; fallback to legacy interpretation.
        if bytes_per_hop == 4:
            if self.debug:
                self.logger.debug(
                    "Path decode fallback to legacy length due to reserved hash-size mode: "
                    f"path_len_byte=0x{path_len_byte:02X}"
                )
            return path_len_byte, 1

        path_byte_len = hop_count * bytes_per_hop
        if path_byte_len > max_path_size:
            # Invalid packed value; fallback keeps compatibility with legacy one-byte parsing.
            if self.debug:
                self.logger.debug(
                    "Path decode fallback to legacy length due to oversized packed path: "
                    f"path_len_byte=0x{path_len_byte:02X}, hop_count={hop_count}, "
                    f"bytes_per_hop={bytes_per_hop}, computed_path_byte_len={path_byte_len}, "
                    f"max_path_size={max_path_size}"
                )
            return path_len_byte, 1

        if self.debug:
            self.logger.debug(
                "Path decode packed mode: "
                f"path_len_byte=0x{path_len_byte:02X}, hop_count={hop_count}, "
                f"bytes_per_hop={bytes_per_hop}, path_byte_len={path_byte_len}"
            )
        return path_byte_len, bytes_per_hop

    def _split_path_hops(self, path_bytes: bytes, bytes_per_hop: int) -> list[str]:
        """Split path bytes into per-hop hex tokens."""
        path_hex = path_bytes.hex()
        hop_hex_chars = max(bytes_per_hop, 1) * 2

        if hop_hex_chars <= 0:
            hop_hex_chars = 2

        nodes = [path_hex[i:i + hop_hex_chars] for i in range(0, len(path_hex), hop_hex_chars)]
        if (len(path_hex) % hop_hex_chars) != 0:
            nodes = [path_hex[i:i + 2] for i in range(0, len(path_hex), 2)]
        return nodes

    def calculate_packet_hash(self, raw_hex: str, payload_type: int = None) -> str:
        """Calculate hash for packet identification - based on packet.cpp"""
        try:
            # Parse the packet to extract payload type and payload data
            byte_data = bytes.fromhex(raw_hex)
            header = byte_data[0]

            # Get payload type from header (bits 2-5)
            if payload_type is None:
                payload_type = (header >> 2) & 0x0F

            # Check if transport codes are present
            route_type = header & 0x03
            has_transport = route_type in [0x00, 0x03]  # TRANSPORT_FLOOD or TRANSPORT_DIRECT

            # Calculate path length offset dynamically based on transport codes
            offset = 1  # After header
            if has_transport:
                offset += 4  # Skip 4 bytes of transport codes

            if len(byte_data) <= offset:
                self.logger.debug(f"Packet too short for path_len while hashing: len={len(byte_data)}, offset={offset}")
                return "0000000000000000"

            # Read packed path_len byte from wire
            path_len_byte = byte_data[offset]
            offset += 1

            # Skip past the path to get to payload
            path_byte_len, _ = self._decode_packed_path_length(path_len_byte)
            payload_start = offset + path_byte_len
            if payload_start > len(byte_data):
                self.logger.debug(
                    f"Packet too short for decoded path while hashing: need {payload_start}, have {len(byte_data)}"
                )
                return "0000000000000000"
            payload_data = byte_data[payload_start:]

            # Calculate hash exactly like MeshCore Packet::calculatePacketHash():
            # 1. Payload type (1 byte)
            # 2. Path length (2 bytes as uint16_t, little-endian) - ONLY for TRACE packets (type 9)
            # 3. Payload data
            hash_obj = hashlib.sha256()
            hash_obj.update(bytes([payload_type]))

            if payload_type == 9:  # PAYLOAD_TYPE_TRACE
                # C++ does: sha.update(&path_len, sizeof(path_len))
                # path_len is uint16_t, so sizeof(path_len) = 2 bytes
                # Convert wire path_len byte to 2-byte little-endian uint16_t
                hash_obj.update(path_len_byte.to_bytes(2, byteorder='little'))

            hash_obj.update(payload_data)

            # Return first 16 hex characters (8 bytes) in uppercase
            return hash_obj.hexdigest()[:16].upper()
        except Exception as e:
            self.logger.debug(f"Error calculating hash: {e}")
            return "0000000000000000"

    def format_packet_data(self, raw_hex: str, rf_data: Optional[Dict] = None) -> Dict[str, Any]:
        """Format packet data to match mctomqtt.py exactly"""
        current_time = datetime.now()
        timestamp = current_time.isoformat()

        # Decode packet using the same logic as mctomqtt.py
        decoded_message = self.decode_and_publish_message(raw_hex)

        # Extract basic info
        packet_len = len(raw_hex) // 2  # Convert hex string to byte count

        # Get route type from decoded message
        route = "U"  # Default
        packet_type = "0"  # Default
        payload_len = "0"  # Default

        # Initialize firmware payload length early
        firmware_payload_len = None
        if rf_data:
            firmware_payload_len = rf_data.get('payload_length')

        if decoded_message:
            # Map route type names to single letters like mctomqtt.py
            route_map = {
                "TRANSPORT_FLOOD": "F",
                "FLOOD": "F",
                "DIRECT": "D",
                "TRANSPORT_DIRECT": "T"
            }
            route = route_map.get(decoded_message.get('route_type', ''), "U")

            # Get payload type as string - now matches C++ definitions exactly
            payload_type_map = {
                "REQ": "0",
                "RESPONSE": "1",
                "TXT_MSG": "2",
                "ACK": "3",
                "ADVERT": "4",
                "GRP_TXT": "5",
                "GRP_DATA": "6",
                "ANON_REQ": "7",
                "PATH": "8",
                "TRACE": "9",
                "MULTIPART": "10",
                "CONTROL": "11",
                "Type12": "12",
                "Type13": "13",
                "Type14": "14",
                "RAW_CUSTOM": "15"
            }
            packet_type = payload_type_map.get(decoded_message.get('payload_type', ''), "0")

            # Use firmware-provided payload length if available, otherwise calculate
            if firmware_payload_len is not None:
                payload_len = str(firmware_payload_len)
            else:
                # Fallback calculation if firmware doesn't provide it
                if decoded_message and 'path' in decoded_message:
                    # Calculate actual payload length from the raw data
                    # Total bytes - header(1) - transport(4 if present) - path_length(1) - path_bytes
                    path_len_bytes = decoded_message.get('path_byte_len')
                    if path_len_bytes is None:
                        path_len_bytes = len(decoded_message.get('path', []))
                    has_transport = decoded_message.get('route_type') in ['TRANSPORT_FLOOD', 'TRANSPORT_DIRECT']
                    transport_bytes = 4 if has_transport else 0
                    payload_len = str(max(0, packet_len - 1 - transport_bytes - 1 - path_len_bytes))
                else:
                    # Fallback calculation
                    payload_len = str(max(0, packet_len - 1))

        # Get origin_id (use device info if available, otherwise use config or generate)
        origin_id = None
        if self.device_public_key and self.device_public_key != 'Unknown':
            origin_id = self.device_public_key
        else:
            # Try to get from environment as fallback
            origin_id = self.get_env('ORIGIN_ID', None)
            if not origin_id:
                # Generate a hash from device name as last resort
                device_name = self.device_name or 'Unknown'
                origin_id = hashlib.sha256(device_name.encode()).hexdigest()
                self.logger.warning(f"Using generated origin_id from device name: {origin_id}")

        # Normalize origin_id to uppercase
        if origin_id and origin_id != 'Unknown':
            origin_id = origin_id.upper()

        # Extract RF data if available
        snr = "Unknown"
        rssi = "Unknown"

        if rf_data:
            snr = str(rf_data.get('snr', 'Unknown'))
            rssi = str(rf_data.get('rssi', 'Unknown'))

        # Build the packet data structure to match mctomqtt.py exactly
        packet_data = {
            "origin": self.device_name or self.get_env('ORIGIN', 'MeshCore Device'),
            "origin_id": origin_id,
            "timestamp": timestamp,
            "type": "PACKET",
            "direction": "rx",
            "time": current_time.strftime("%H:%M:%S"),
            "date": current_time.strftime("%d/%m/%Y"),
            "len": str(packet_len),
            "packet_type": packet_type,
            "route": route,
            "payload_len": payload_len,
            "raw": raw_hex.upper(),
            "SNR": snr,
            "RSSI": rssi,
            "hash": self.calculate_packet_hash(raw_hex, decoded_message.get('payload_type_value') if decoded_message else None)
        }

        # Add path for route=D like mctomqtt.py
        if route == "D" and decoded_message and 'path' in decoded_message:
            packet_data["path"] = ",".join(decoded_message['path'])

        return packet_data

    async def handle_rf_log_data(self, event):
        """Handle RF log data events to cache SNR/RSSI information and process packets"""
        try:
            payload = event.payload

            if 'snr' in payload:
                # Try to get packet data - prefer 'payload' field, fallback to 'raw_hex'
                raw_hex = None

                # First, try the 'payload' field (already stripped of framing bytes)
                if 'payload' in payload and payload['payload']:
                    raw_hex = payload['payload']
                # Fallback to raw_hex with first 2 bytes stripped
                elif 'raw_hex' in payload and payload['raw_hex']:
                    raw_hex = payload['raw_hex'][4:]  # Skip first 2 bytes (4 hex chars)

                if raw_hex:
                    packet_prefix = raw_hex[:32]

                    rf_data = {
                        'snr': payload.get('snr'),
                        'rssi': payload.get('rssi'),
                        'timestamp': time.time(),
                        'raw_hex': raw_hex,
                        'payload_length': payload.get('payload_length')
                    }

                    self.rf_data_cache[packet_prefix] = rf_data

                    # Clean up old cache entries
                    current_time = time.time()
                    timeout = self.get_env_float('RF_DATA_TIMEOUT', 15.0)
                    self.rf_data_cache = {
                        k: v for k, v in self.rf_data_cache.items()
                        if current_time - v['timestamp'] < timeout
                    }

                    # Remember RF-originated packets so RAW_DATA for the same reception doesn't double-publish.
                    self.recent_rf_packets[raw_hex.upper()] = current_time
                    self.recent_rf_packets = {
                        k: v for k, v in self.recent_rf_packets.items()
                        if current_time - v < self.raw_duplicate_window
                    }

                    # Process the packet
                    await self.process_packet_from_rf_data(raw_hex, rf_data)
                else:
                    self.logger.warning(f"RF log data missing both 'payload' and 'raw_hex' fields: {payload.keys()}")

        except Exception as e:
            self.logger.error(f"Error handling RF log data: {e}", exc_info=True)

    async def process_packet_from_rf_data(self, raw_hex: str, rf_data: dict):
        """Process packet data from RF log data"""
        try:
            # Format packet data
            packet_data = self.format_packet_data(raw_hex, rf_data)

            # Output the packet data
            publish_metrics = self.output_packet(packet_data)

            self.packet_count += 1
            # Standard log line format for both modes
            self.logger.info(f"📦 Captured packet #{self.packet_count}: {packet_data['route']} type {packet_data['packet_type']}, {packet_data['len']} bytes, SNR: {packet_data['SNR']}, RSSI: {packet_data['RSSI']}, hash: {packet_data['hash']} (MQTT: {publish_metrics['succeeded']}/{publish_metrics['attempted']})")

            # Output full packet data structure in debug mode only
            if self.debug:
                self.logger.debug("📋 Full packet data structure:")
                import json
                self.logger.debug(json.dumps(packet_data, indent=2))

        except Exception as e:
            self.logger.error(f"Error processing packet from RF data: {e}")

    async def handle_raw_data(self, event):
        """Handle raw data events (full packet data)"""
        try:
            payload = event.payload
            self.logger.info(f"📦 RAW_DATA EVENT RECEIVED")

            # Extract raw hex data
            raw_hex = None
            if hasattr(payload, 'data'):
                raw_hex = payload.data
            elif 'data' in payload:
                raw_hex = payload['data']
            elif 'raw_hex' in payload:
                raw_hex = payload['raw_hex']

            if raw_hex:
                # Remove 0x prefix if present
                if raw_hex.startswith('0x'):
                    raw_hex = raw_hex[2:]

                raw_hex = raw_hex.upper()
                current_time = time.time()
                recent_rf_time = self.recent_rf_packets.get(raw_hex)
                if recent_rf_time is not None and (current_time - recent_rf_time) < self.raw_duplicate_window:
                    if self.debug:
                        self.logger.debug("Skipping RAW_DATA packet already processed from RX_LOG_DATA")
                    return

                self.recent_rf_packets = {
                    k: v for k, v in self.recent_rf_packets.items()
                    if current_time - v < self.raw_duplicate_window
                }

                # Find corresponding RF data
                packet_prefix = raw_hex[:32]
                rf_data = self.rf_data_cache.get(packet_prefix)

                # Format packet data
                packet_data = self.format_packet_data(raw_hex, rf_data)

                # Output the packet data
                publish_metrics = self.output_packet(packet_data)

                self.packet_count += 1
                self.logger.info(f"📦 Captured packet #{self.packet_count}: {packet_data['route']} type {packet_data['packet_type']}, {packet_data['len']} bytes, SNR: {packet_data['SNR']}, RSSI: {packet_data['RSSI']}, hash: {packet_data['hash']} (MQTT: {publish_metrics['succeeded']}/{publish_metrics['attempted']})")

        except Exception as e:
            self.logger.error(f"Error handling raw data event: {e}")

    def output_packet(self, packet_data: Dict[str, Any]):
        """Output packet data to console, file, and MQTT"""
        # Convert to JSON
        json_data = json.dumps(packet_data, indent=2)

        # Output JSON packet data to console only in verbose mode
        if self.verbose:
            self.logger.info("=" * 80)
            self.logger.info(json_data)
            self.logger.info("=" * 80)

        # Output to file if specified
        if self.output_handle:
            self.output_handle.write(json_data + "\n")
            self.output_handle.flush()

        # Filter by packet type if configured (only affects MQTT upload, not file/console output)
        if self.allowed_upload_types is not None:
            packet_type = packet_data.get('packet_type')
            if packet_type not in self.allowed_upload_types:
                # Skip MQTT upload but already wrote to file/console above
                if self.debug:
                    self.logger.debug(f"Filtered out packet type {packet_type} from upload (not in allowed types: {sorted(self.allowed_upload_types)})")
                # Return zero metrics since we didn't upload
                return {"attempted": 0, "succeeded": 0}

        # Publish to MQTT if enabled
        publish_metrics = {"attempted": 0, "succeeded": 0}
        if self.enable_mqtt:
            # Publish full packet data
            packet_metrics = self.safe_publish(None, json.dumps(packet_data), topic_type="packets")

            # Publish raw data only to brokers that have RAW topic explicitly configured
            raw_data = {
                "origin": packet_data["origin"],
                "origin_id": packet_data["origin_id"],
                "timestamp": packet_data["timestamp"],
                "type": "RAW",
                "data": packet_data["raw"]
            }
            raw_metrics = self.safe_publish(None, json.dumps(raw_data), topic_type="raw")

            # Combine metrics: sum up all successful publishes across all brokers
            # Each broker publishes to its configured topics independently
            publish_metrics["attempted"] = packet_metrics["attempted"] + raw_metrics["attempted"]
            publish_metrics["succeeded"] = packet_metrics["succeeded"] + raw_metrics["succeeded"]

        return publish_metrics

    async def setup_disconnect_handler(self):
        """Set up handler for disconnect events from meshcore"""
        async def on_disconnect(event):
            reason = event.payload.get('reason', 'unknown')
            self.logger.warning(f"Disconnect event received: {reason}")

            if reason == 'tcp_no_response':
                self.logger.error("Disconnected due to no TCP responses - possible WiFi issue")
            elif reason == 'tcp_disconnect':
                self.logger.error("TCP connection closed by remote - possible radio reset")
            elif reason == 'ble_disconnect':
                self.logger.error("BLE connection lost - device may have moved out of range")
            elif reason == 'serial_disconnect':
                self.logger.error("Serial connection lost - cable may be disconnected")
            else:
                self.logger.warning(f"Disconnected for unknown reason: {reason}")

            # For TCP connections with SDK auto-reconnect, this event means SDK has exhausted its attempts
            if self.connection_type == 'tcp' and self.tcp_sdk_auto_reconnect_enabled:
                self.sdk_reconnect_exhausted = True
                self.logger.info("SDK auto-reconnect has exhausted - custom reconnect logic will take over")

            # Update connection status - connection monitor will handle reconnection
            self.connected = False
            self.logger.info("Connection status updated - connection monitor will handle reconnection")

        self.meshcore.subscribe(EventType.DISCONNECTED, on_disconnect)
        self.logger.debug("Disconnect event handler registered")

    async def setup_event_handlers(self):
        """Setup event handlers for packet capture"""
        # Clean up any existing subscriptions before setting up new ones
        # This prevents orphaned EventDispatcher tasks when reconnecting
        self.cleanup_event_subscriptions()

        # Handle RF log data for SNR/RSSI information
        async def on_rf_data(event):
            if self.debug:
                self.logger.debug(f"RF_DATA event received: {event}")
            await self.handle_rf_log_data(event)

        # Handle raw data events (full packet data)
        async def on_raw_data(event):
            if self.debug:
                self.logger.debug(f"RAW_DATA event received: {event}")
            await self.handle_raw_data(event)

        # Handle status response events
        async def on_status_response(event):
            if self.debug:
                self.logger.debug(f"STATUS_RESPONSE event received: {event}")
                # Log the status data to see what's available
                if hasattr(event, 'payload') and event.payload:
                    self.logger.debug(f"Status data: {event.payload}")

        # Subscribe to events
        self.meshcore.subscribe(EventType.RX_LOG_DATA, on_rf_data)
        self.meshcore.subscribe(EventType.RAW_DATA, on_raw_data)
        self.meshcore.subscribe(EventType.STATUS_RESPONSE, on_status_response)

        # Setup disconnect handler
        await self.setup_disconnect_handler()

        self.logger.info("Event handlers setup complete")

        # Note: Packet capture mode is automatically enabled when subscribing to events
        self.logger.info("Packet capture mode enabled via event subscriptions")

    async def start(self):
        """Start packet capture"""
        self.logger.info("Starting MeshCore Packet Capture...")

        # Connect to MeshCore node
        if not await self.connect():
            self.logger.error("Failed to connect to MeshCore node")
            return

        # Connect to MQTT broker if enabled
        if self.enable_mqtt:
            if not await self.connect_mqtt():
                self.logger.warning("Failed to connect to MQTT broker, continuing without MQTT...")
        else:
            self.logger.info("MQTT disabled, skipping MQTT connection")

        # Setup event handlers
        await self.setup_event_handlers()

        # Start auto message fetching (optional; see PACKETCAPTURE_DRAIN_MESSAGES)
        await self._start_auto_message_fetching_if_enabled()

        self.logger.info("Packet capture is running. Press Ctrl+C to stop.")
        self.logger.info("Waiting for packets...")

        # Start connection monitoring task (delay to allow MQTT connections to stabilize)
        await asyncio.sleep(5)  # Give MQTT connections time to fully establish
        monitoring_task = asyncio.create_task(self.connection_monitor())

        # Start advert scheduler task
        if self.advert_interval_hours > 0:
            self.advert_task = asyncio.create_task(self.advert_scheduler())

        # Start JWT renewal scheduler task
        if self.jwt_renewal_interval > 0:
            self.jwt_renewal_task = asyncio.create_task(self.jwt_renewal_scheduler())

        # Start stats refresh scheduler
        if self.stats_status_enabled and self.stats_refresh_interval > 0:
            self.stats_update_task = asyncio.create_task(self.stats_refresh_scheduler())


        try:
            while not self.should_exit:
                current_time = time.time()

                # Check if we should exit for systemd restart
                if self.should_exit_for_systemd_restart():
                    self.logger.critical("Service failure threshold reached - exiting for systemd restart")
                    self.should_exit = True

                # Monitor active tasks to prevent explosion
                if current_time - self.last_task_check >= self.task_monitoring_interval:
                    active_count = len(self.active_tasks)
                    if active_count > self.max_active_tasks:
                        self.logger.warning(f"Too many active tasks ({active_count}), cleaning up...")
                        # Cancel excess tasks
                        tasks_to_cancel = list(self.active_tasks)[self.max_active_tasks:]
                        for task in tasks_to_cancel:
                            task.cancel()
                            self.active_tasks.discard(task)
                    self.last_task_check = current_time

                # Use shutdown-aware waiting
                if await self.wait_with_shutdown(5):
                    break  # Shutdown was requested
        except KeyboardInterrupt:
            self.logger.info("Received interrupt signal")
        finally:
            # Cancel all active tasks
            monitoring_task.cancel()
            if self.advert_task:
                self.advert_task.cancel()
            if self.jwt_renewal_task:
                self.jwt_renewal_task.cancel()
            if self.stats_update_task:
                self.stats_update_task.cancel()

            # Cancel all tracked active tasks
            for task in self.active_tasks.copy():
                task.cancel()

            # Wait for all tasks to complete
            try:
                await monitoring_task
            except asyncio.CancelledError:
                pass
            if self.advert_task:
                try:
                    await self.advert_task
                except asyncio.CancelledError:
                    pass
            if self.jwt_renewal_task:
                try:
                    await self.jwt_renewal_task
                except asyncio.CancelledError:
                    pass
            if self.stats_update_task:
                try:
                    await self.stats_update_task
                except asyncio.CancelledError:
                    pass

            # Wait for all active tasks to complete
            if self.active_tasks:
                await asyncio.gather(*self.active_tasks, return_exceptions=True)

            await self.stop()

    async def stop(self):
        """Stop packet capture with timeout"""
        self.logger.info("Stopping packet capture...")
        self.connected = False

        try:
            # Publish offline status with timeout
            if self.enable_mqtt and self.mqtt_connected:
                await asyncio.wait_for(self.publish_status("offline", refresh_stats=False), timeout=5.0)
        except asyncio.TimeoutError:
            self.logger.warning("Timeout publishing offline status")
        except Exception as e:
            self.logger.warning(f"Error publishing offline status: {e}")

        # Handle BLE disconnection if using BLE connection
        if self.meshcore and self.get_env('CONNECTION_TYPE', 'ble').lower() == 'ble':
            try:
                self.logger.info("Disconnecting BLE device...")
                # Clean up event subscriptions BEFORE stopping/disconnecting to prevent pending tasks
                self.cleanup_event_subscriptions()
                # Stop the event dispatcher task synchronously to prevent "Task was destroyed" errors
                try:
                    self.meshcore.stop()
                except Exception as e:
                    self.logger.debug(f"Error stopping meshcore event dispatcher: {e}")
                await asyncio.wait_for(self.meshcore.disconnect(), timeout=10.0)

                # Additional BLE disconnection using bluetoothctl on Linux
                import platform
                if platform.system() == 'Linux':
                    try:
                        import subprocess
                        ble_device = self.get_env('BLE_DEVICE', '') or self.get_env('BLE_ADDRESS', '')
                        if ble_device and ble_device != 'Unknown':
                            self.logger.info(f"Force disconnecting BLE device {ble_device}...")
                            subprocess.run(['bluetoothctl', 'disconnect', ble_device],
                                         capture_output=True, timeout=10)
                            await asyncio.sleep(1)  # Give time for disconnection
                    except Exception as e:
                        self.logger.debug(f"Could not force BLE disconnect via bluetoothctl: {e}")
                else:
                    # On non-Linux systems, add a short delay to ensure BLE cleanup completes
                    await asyncio.sleep(0.5)
            except asyncio.TimeoutError:
                self.logger.warning("Timeout disconnecting BLE device")
            except Exception as e:
                self.logger.warning(f"Error during BLE disconnection: {e}")
        elif self.meshcore:
            try:
                # Clean up event subscriptions BEFORE stopping/disconnecting to prevent pending tasks
                self.cleanup_event_subscriptions()
                # Stop the event dispatcher task synchronously to prevent "Task was destroyed" errors
                try:
                    self.meshcore.stop()
                except Exception as e:
                    self.logger.debug(f"Error stopping meshcore event dispatcher: {e}")
                await asyncio.wait_for(self.meshcore.disconnect(), timeout=5.0)
            except asyncio.TimeoutError:
                self.logger.warning("Timeout disconnecting MeshCore device")
            except Exception as e:
                self.logger.warning(f"Error disconnecting MeshCore device: {e}")

        for mqtt_client_info in self.mqtt_clients:
            try:
                mqtt_client_info['client'].disconnect()
                mqtt_client_info['client'].loop_stop()
            except:
                pass

        if self.output_handle:
            self.output_handle.close()

        self.logger.info(f"Packet capture stopped. Total packets captured: {self.packet_count}")

    async def send_advert(self):
        """Send a flood advert using meshcore commands"""
        try:
            if not self._ensure_connected("send_advert", "warning"):
                return False

            self.logger.info("Sending flood advert...")
            await self.meshcore.commands.send_advert(flood=True)
            self.last_advert_time = time.time()
            self._save_advert_state()  # Persist the timestamp
            self.logger.info("Flood advert sent successfully!")
            return True

        except Exception as e:
            self.logger.error(f"Error sending flood advert: {e}")
            return False

    async def advert_scheduler(self):
        """Background task to send adverts at configured intervals"""
        if self.advert_interval_hours <= 0:
            if self.debug:
                self.logger.debug("Advert scheduling disabled (interval = 0)")
            return

        if self.debug:
            self.logger.debug(f"Starting advert scheduler with {self.advert_interval_hours} hour interval")

        while not self.should_exit:
            try:
                # Calculate seconds until next advert
                current_time = time.time()
                time_since_last = current_time - self.last_advert_time
                interval_seconds = self.advert_interval_hours * 3600

                if time_since_last >= interval_seconds:
                    # Time to send an advert
                    await self.send_advert()
                    # Sleep for the full interval to avoid rapid-fire adverts
                    if await self.wait_with_shutdown(interval_seconds):
                        break  # Shutdown was requested
                else:
                    # Sleep until it's time for the next advert
                    sleep_time = interval_seconds - time_since_last
                    if self.debug:
                        self.logger.debug(f"Next advert in {sleep_time/3600:.1f} hours")
                    if await self.wait_with_shutdown(sleep_time):
                        break  # Shutdown was requested

            except asyncio.CancelledError:
                if self.debug:
                    self.logger.debug("Advert scheduler cancelled")
                break
            except Exception as e:
                self.logger.error(f"Error in advert scheduler: {e}")
                if await self.wait_with_shutdown(60):
                    break  # Shutdown was requested

    async def jwt_renewal_scheduler(self):
        """Background task to check and renew JWT tokens"""
        if self.jwt_renewal_interval <= 0:
            if self.debug:
                self.logger.debug("JWT renewal scheduling disabled (interval = 0)")
            return

        if self.debug:
            self.logger.debug(f"Starting JWT renewal scheduler with {self.jwt_renewal_interval} second interval")

        while not self.should_exit:
            try:
                if await self.wait_with_shutdown(self.jwt_renewal_interval):
                    break  # Shutdown was requested

                # Check and renew JWT tokens
                await self.check_and_renew_jwt_tokens()

            except asyncio.CancelledError:
                if self.debug:
                    self.logger.debug("JWT renewal scheduler cancelled")
                break
            except Exception as e:
                self.logger.error(f"Error in JWT renewal scheduler: {e}")
                if await self.wait_with_shutdown(60):
                    break  # Shutdown was requested


async def main():
    """Main entry point"""
    parser = argparse.ArgumentParser(description='MeshCore Packet Capture Script')
    parser.add_argument('--output', help='Output file path (optional)')
    parser.add_argument('--verbose', action='store_true', help='Enable verbose output (shows JSON packet data)')
    parser.add_argument('--debug', action='store_true', help='Enable debug output (shows all detailed debugging info)')
    parser.add_argument('--no-mqtt', action='store_true', help='Disable MQTT publishing')

    args = parser.parse_args()

    # Command line arguments will be handled after PacketCapture instantiation

    # Setup signal handlers for graceful shutdown
    import signal

    # Global shutdown event for immediate response
    shutdown_event = asyncio.Event()

    def signal_handler(signum, frame):
        capture.logger.info(f"Received signal {signum}, initiating immediate shutdown...")
        capture.should_exit = True
        shutdown_event.set()  # Wake up all waiting tasks immediately

    # Register signal handlers
    signal.signal(signal.SIGTERM, signal_handler)
    signal.signal(signal.SIGINT, signal_handler)

    # Create packet capture instance with shutdown event
    capture = PacketCapture(
        output_file=args.output,
        verbose=args.verbose,
        debug=args.debug,
        enable_mqtt=not args.no_mqtt,
        shutdown_event=shutdown_event
    )

    # Command line arguments override environment variable
    if args.debug:
        capture.logger.setLevel(logging.DEBUG)
    elif args.verbose:
        capture.logger.setLevel(logging.INFO)
    # If neither debug nor verbose specified, use environment variable (already set in setup_logging)

    try:
        # Start the capture in a task so we can wait on shutdown event
        capture_task = asyncio.create_task(capture.start())

        # Wait for either completion or shutdown signal
        done, pending = await asyncio.wait(
            [capture_task, asyncio.create_task(shutdown_event.wait())],
            return_when=asyncio.FIRST_COMPLETED
        )

        # Cancel any pending tasks
        for task in pending:
            task.cancel()

        # If shutdown was triggered, stop the capture
        if shutdown_event.is_set():
            capture.logger.info("Shutdown signal received, stopping capture...")
            await capture.stop()

    except KeyboardInterrupt:
        print("\nShutting down...")
        await capture.stop()
    except Exception as e:
        print(f"Error: {e}")
        await capture.stop()


if __name__ == "__main__":
    asyncio.run(main())