mirror of
https://github.com/meshtastic/Meshtastic-Android.git
synced 2026-04-20 22:23:37 +00:00
fix: resolve bugs across connection, PKI, admin, packet flow, and stability subsystems (#5011)
This commit is contained in:
parent
cd9f1c0600
commit
60cc2f4237
24 changed files with 413 additions and 45 deletions
|
|
@ -22,6 +22,8 @@ import org.meshtastic.core.network.repository.SerialConnection
|
|||
import org.meshtastic.core.network.repository.SerialConnectionListener
|
||||
import org.meshtastic.core.network.repository.UsbRepository
|
||||
import org.meshtastic.core.repository.RadioInterfaceService
|
||||
import org.meshtastic.proto.Heartbeat
|
||||
import org.meshtastic.proto.ToRadio
|
||||
import java.util.concurrent.atomic.AtomicReference
|
||||
|
||||
/** An interface that assumes we are talking to a meshtastic device via USB serial */
|
||||
|
|
@ -119,7 +121,14 @@ class SerialInterface(
|
|||
}
|
||||
|
||||
override fun keepAlive() {
|
||||
Logger.d { "[$address] Serial keepAlive" }
|
||||
// Send a ToRadio heartbeat so the firmware resets its idle timer and responds with
|
||||
// a FromRadio queueStatus — proving the serial link is alive. Without this, the
|
||||
// serial transport has no way to detect a silently dead device (battery depleted,
|
||||
// firmware crash without the `rebooted` flag). The queueStatus response also feeds
|
||||
// into MeshMessageProcessorImpl.refreshLocalNodeLastHeard() to keep the local
|
||||
// node's lastHeard timestamp current.
|
||||
Logger.d { "[$address] Serial keepAlive — sending heartbeat" }
|
||||
handleSendToRadio(ToRadio(heartbeat = Heartbeat()).encode())
|
||||
}
|
||||
|
||||
override fun sendBytes(p: ByteArray) {
|
||||
|
|
|
|||
|
|
@ -65,6 +65,18 @@ private const val CONNECTION_TIMEOUT_MS = 15_000L
|
|||
private const val RECONNECT_FAILURE_THRESHOLD = 3
|
||||
private const val RECONNECT_BASE_DELAY_MS = 5_000L
|
||||
private const val RECONNECT_MAX_DELAY_MS = 60_000L
|
||||
private const val RECONNECT_MAX_FAILURES = 10
|
||||
|
||||
/**
|
||||
* Minimum milliseconds a BLE connection must stay up before we consider it "stable" and reset
|
||||
* [BleRadioInterface.consecutiveFailures]. Without this, a device at the edge of BLE range can repeatedly connect for a
|
||||
* fraction of a second and drop — each brief connection resets the failure counter so [RECONNECT_FAILURE_THRESHOLD] is
|
||||
* never reached, and the app never signals [ConnectionState.DeviceSleep].
|
||||
*
|
||||
* The value (5 s) is long enough that only connections that survive past the initial GATT setup are treated as genuine,
|
||||
* but short enough that normal reconnects after light-sleep still reset the counter promptly.
|
||||
*/
|
||||
private const val MIN_STABLE_CONNECTION_MS = 5_000L
|
||||
|
||||
/**
|
||||
* Returns the reconnect backoff delay in milliseconds for a given consecutive failure count.
|
||||
|
|
@ -181,7 +193,7 @@ class BleRadioInterface(
|
|||
throw RadioNotConnectedException("Device not found at address $address")
|
||||
}
|
||||
|
||||
@Suppress("LongMethod")
|
||||
@Suppress("LongMethod", "CyclomaticComplexMethod")
|
||||
private fun connect() {
|
||||
connectionJob =
|
||||
connectionScope.launch {
|
||||
|
|
@ -231,8 +243,9 @@ class BleRadioInterface(
|
|||
throw RadioNotConnectedException("Failed to connect to device at address $address")
|
||||
}
|
||||
|
||||
// Connection succeeded — reset failure counter
|
||||
consecutiveFailures = 0
|
||||
// Connection succeeded — only reset the failure counter if the
|
||||
// connection stays up long enough. See MIN_STABLE_CONNECTION_MS.
|
||||
val gattConnectedAt = nowMillis
|
||||
isFullyConnected = true
|
||||
onConnected()
|
||||
|
||||
|
|
@ -257,6 +270,39 @@ class BleRadioInterface(
|
|||
}
|
||||
|
||||
Logger.i { "[$address] BLE connection dropped, preparing to reconnect" }
|
||||
|
||||
// Only reset the failure counter if the connection was stable (lasted
|
||||
// longer than MIN_STABLE_CONNECTION_MS). A connection that drops within
|
||||
// seconds typically means the device is at the edge of BLE range or
|
||||
// powered off — the Android BLE stack may briefly "connect" to a cached
|
||||
// GATT profile before realising the device is gone. Without this guard,
|
||||
// the failure counter resets on every brief connect, preventing us from
|
||||
// ever reaching RECONNECT_FAILURE_THRESHOLD and signalling DeviceSleep.
|
||||
val connectionUptime = nowMillis - gattConnectedAt
|
||||
if (connectionUptime >= MIN_STABLE_CONNECTION_MS) {
|
||||
consecutiveFailures = 0
|
||||
} else {
|
||||
consecutiveFailures++
|
||||
Logger.w {
|
||||
"[$address] Connection lasted only ${connectionUptime}ms " +
|
||||
"(< ${MIN_STABLE_CONNECTION_MS}ms) — treating as failure " +
|
||||
"(consecutive failures: $consecutiveFailures)"
|
||||
}
|
||||
if (consecutiveFailures >= RECONNECT_MAX_FAILURES) {
|
||||
Logger.e { "[$address] Giving up after $consecutiveFailures unstable connections" }
|
||||
service.onDisconnect(
|
||||
isPermanent = true,
|
||||
errorMessage = "Device unreachable (unstable connection)",
|
||||
)
|
||||
return@launch
|
||||
}
|
||||
if (consecutiveFailures >= RECONNECT_FAILURE_THRESHOLD) {
|
||||
service.onDisconnect(
|
||||
isPermanent = false,
|
||||
errorMessage = "Device unreachable (unstable connection)",
|
||||
)
|
||||
}
|
||||
}
|
||||
} catch (e: kotlinx.coroutines.CancellationException) {
|
||||
Logger.d { "[$address] BLE connection coroutine cancelled" }
|
||||
throw e
|
||||
|
|
@ -268,10 +314,19 @@ class BleRadioInterface(
|
|||
"(consecutive failures: $consecutiveFailures)"
|
||||
}
|
||||
|
||||
// At the failure threshold, signal DeviceSleep so MeshConnectionManagerImpl can
|
||||
// start its sleep timeout. Use == (not >=) to fire exactly once; repeated
|
||||
// onDisconnect signals would reset upstream state machines unnecessarily.
|
||||
if (consecutiveFailures == RECONNECT_FAILURE_THRESHOLD) {
|
||||
// After exceeding the max failure limit, give up permanently to stop
|
||||
// draining battery on a device that is genuinely offline. The user
|
||||
// must manually reconnect from the connections screen.
|
||||
if (consecutiveFailures >= RECONNECT_MAX_FAILURES) {
|
||||
Logger.e { "[$address] Giving up after $consecutiveFailures consecutive failures" }
|
||||
val (_, msg) = e.toDisconnectReason()
|
||||
service.onDisconnect(isPermanent = true, errorMessage = msg)
|
||||
return@launch
|
||||
}
|
||||
|
||||
// At the failure threshold, signal DeviceSleep so
|
||||
// MeshConnectionManagerImpl can start its sleep timeout.
|
||||
if (consecutiveFailures >= RECONNECT_FAILURE_THRESHOLD) {
|
||||
handleFailure(e)
|
||||
}
|
||||
|
||||
|
|
@ -312,10 +367,11 @@ class BleRadioInterface(
|
|||
"Packets RX: $packetsReceived ($bytesReceived bytes), " +
|
||||
"Packets TX: $packetsSent ($bytesSent bytes)"
|
||||
}
|
||||
// Do NOT call service.onDisconnect() here. The reconnect while-loop handles retries
|
||||
// internally. Emitting DeviceSleep on every transient disconnect creates competing state
|
||||
// transitions with MeshConnectionManagerImpl's sleep timeout. Instead, handleFailure()
|
||||
// is called from the catch block after RECONNECT_FAILURE_THRESHOLD consecutive failures.
|
||||
// Signal DeviceSleep immediately so the UI reflects the disconnect while the
|
||||
// reconnect loop continues in the background. The previous approach suppressed
|
||||
// this signal until RECONNECT_FAILURE_THRESHOLD consecutive failures, leaving the
|
||||
// UI stuck on "Connected" for 35+ seconds after the device disappeared.
|
||||
service.onDisconnect(isPermanent = false)
|
||||
}
|
||||
|
||||
private suspend fun discoverServicesAndSetupCharacteristics() {
|
||||
|
|
|
|||
|
|
@ -17,6 +17,8 @@
|
|||
package org.meshtastic.core.network.radio
|
||||
|
||||
import dev.mokkery.MockMode
|
||||
import dev.mokkery.answering.returns
|
||||
import dev.mokkery.every
|
||||
import dev.mokkery.matcher.any
|
||||
import dev.mokkery.mock
|
||||
import dev.mokkery.verify
|
||||
|
|
@ -124,4 +126,52 @@ class BleRadioInterfaceTest {
|
|||
// Cancel the reconnect loop so runTest can complete.
|
||||
bleInterface.close()
|
||||
}
|
||||
|
||||
/**
|
||||
* After [RECONNECT_MAX_FAILURES] (10) consecutive failures, the reconnect loop should stop and signal a permanent
|
||||
* disconnect. This prevents infinite battery drain when the device is genuinely offline.
|
||||
*
|
||||
* Time budget for 10 failures with bonded device (no scan): Each iteration = 1s settle + connectAndAwait throw +
|
||||
* backoff Backoffs: 5s, 10s, 20s, 40s, 60s, 60s, 60s, 60s, 60s, (exit at failure 10 before backoff) Total ≈ 10×1s
|
||||
* settle + 5+10+20+40+60+60+60+60+60 = 10 + 375 = 385s ≈ 385_000ms We use a generous 400_000ms to cover any timing
|
||||
* variance.
|
||||
*/
|
||||
@Test
|
||||
fun `reconnect loop stops after RECONNECT_MAX_FAILURES with permanent disconnect`() = runTest {
|
||||
val device = FakeBleDevice(address = address, name = "Test Device")
|
||||
bluetoothRepository.bond(device)
|
||||
|
||||
connection.connectException = RadioNotConnectedException("simulated failure")
|
||||
every { service.onDisconnect(any(), any()) } returns Unit
|
||||
|
||||
val bleInterface =
|
||||
BleRadioInterface(
|
||||
serviceScope = this,
|
||||
scanner = scanner,
|
||||
bluetoothRepository = bluetoothRepository,
|
||||
connectionFactory = connectionFactory,
|
||||
service = service,
|
||||
address = address,
|
||||
)
|
||||
|
||||
// Advance enough time for all 10 failures to occur.
|
||||
advanceTimeBy(400_001L)
|
||||
|
||||
// Should have been called with isPermanent=true at least once (the final call).
|
||||
verify { service.onDisconnect(isPermanent = true, errorMessage = any()) }
|
||||
|
||||
bleInterface.close()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `computeReconnectBackoffMs returns correct backoff values`() {
|
||||
assertEquals(5_000L, computeReconnectBackoffMs(0))
|
||||
assertEquals(5_000L, computeReconnectBackoffMs(1))
|
||||
assertEquals(10_000L, computeReconnectBackoffMs(2))
|
||||
assertEquals(20_000L, computeReconnectBackoffMs(3))
|
||||
assertEquals(40_000L, computeReconnectBackoffMs(4))
|
||||
assertEquals(60_000L, computeReconnectBackoffMs(5))
|
||||
assertEquals(60_000L, computeReconnectBackoffMs(10))
|
||||
assertEquals(60_000L, computeReconnectBackoffMs(100))
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,6 +25,8 @@ import kotlinx.coroutines.isActive
|
|||
import kotlinx.coroutines.launch
|
||||
import org.meshtastic.core.network.radio.StreamInterface
|
||||
import org.meshtastic.core.repository.RadioInterfaceService
|
||||
import org.meshtastic.proto.Heartbeat
|
||||
import org.meshtastic.proto.ToRadio
|
||||
import java.io.File
|
||||
|
||||
/**
|
||||
|
|
@ -137,7 +139,11 @@ private constructor(
|
|||
}
|
||||
|
||||
override fun keepAlive() {
|
||||
// Not specifically needed for raw serial unless implemented
|
||||
// Send a ToRadio heartbeat so the firmware resets its idle timer and responds with
|
||||
// a FromRadio queueStatus — proving the serial link is alive. Without this, the
|
||||
// serial transport has no way to detect a silently dead device.
|
||||
Logger.d { "[$portName] Serial keepAlive — sending heartbeat" }
|
||||
handleSendToRadio(ToRadio(heartbeat = Heartbeat()).encode())
|
||||
}
|
||||
|
||||
private fun closePortResources() {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue