fix(handshake): use 60 s stall guard for Stage 2 to match SDK behavior

Stage 2 of the handshake drains the full node database, which on meshes with 50+ nodes can comfortably exceed the previous 30 s stall guard and trigger a spurious want_config_id retry mid-drain. The meshtastic-client KMP SDK uses a 60 s timeout for this stage for the same reason. Split HANDSHAKE_TIMEOUT into HANDSHAKE_TIMEOUT_STAGE1 (30 s) and HANDSHAKE_TIMEOUT_STAGE2 (60 s) and thread the duration through startHandshakeStallGuard so each call site picks the right value.
2026-04-20 22:23:37 +00:00 · 2026-04-19 12:23:48 -05:00 · 2026-04-19 12:23:48 -05:00 · 646858cc39
commit 646858cc39
parent 6135166ea5
1 changed files with 13 additions and 5 deletions
--- a/core/data/src/commonMain/kotlin/org/meshtastic/core/data/manager/MeshConnectionManagerImpl.kt
+++ b/core/data/src/commonMain/kotlin/org/meshtastic/core/data/manager/MeshConnectionManagerImpl.kt
@ -60,6 +60,7 @@ import org.meshtastic.proto.AdminMessage
 import org.meshtastic.proto.Config
 import org.meshtastic.proto.Telemetry
 import org.meshtastic.proto.ToRadio
+import kotlin.time.Duration
 import kotlin.time.Duration.Companion.milliseconds
 import kotlin.time.Duration.Companion.seconds
 import kotlin.time.DurationUnit
@ -211,11 +212,11 @@ class MeshConnectionManagerImpl(
            }
    }

-    private fun startHandshakeStallGuard(stage: Int, action: () -> Unit) {
+    private fun startHandshakeStallGuard(stage: Int, timeout: Duration, action: () -> Unit) {
        handshakeTimeout?.cancel()
        handshakeTimeout =
            scope.handledLaunch {
-                delay(HANDSHAKE_TIMEOUT)
+                delay(timeout)
                if (serviceRepository.connectionState.value is ConnectionState.Connecting) {
                    // Attempt one retry. Note: the firmware silently drops identical consecutive
                    // writes (per-connection dedup). If the first want_config_id was received and
@ -291,13 +292,13 @@ class MeshConnectionManagerImpl(

    override fun startConfigOnly() {
        val action = { packetHandler.sendToRadio(ToRadio(want_config_id = HandshakeConstants.CONFIG_NONCE)) }
-        startHandshakeStallGuard(1, action)
+        startHandshakeStallGuard(1, HANDSHAKE_TIMEOUT_STAGE1, action)
        action()
    }

    override fun startNodeInfoOnly() {
        val action = { packetHandler.sendToRadio(ToRadio(want_config_id = HandshakeConstants.NODE_INFO_NONCE)) }
-        startHandshakeStallGuard(2, action)
+        startHandshakeStallGuard(2, HANDSHAKE_TIMEOUT_STAGE2, action)
        action()
    }

@ -404,7 +405,14 @@ class MeshConnectionManagerImpl(
         */
        private const val PRE_HANDSHAKE_SETTLE_MS = 100L

-        private val HANDSHAKE_TIMEOUT = 30.seconds
+        private val HANDSHAKE_TIMEOUT_STAGE1 = 30.seconds
+
+        /**
+         * Stage 2 drains the full node database, which can be significantly larger than Stage 1 config on big meshes.
+         * 60 s matches the meshtastic-client SDK timeout and avoids premature stall-guard triggers on meshes with 50+
+         * nodes.
+         */
+        private val HANDSHAKE_TIMEOUT_STAGE2 = 60.seconds

        // Shorter window for the retry attempt: if the device genuinely didn't receive the
        // first want_config_id the retry completes within a few seconds. Waiting another 30s