mirror of
https://github.com/meshtastic/Meshtastic-Android.git
synced 2026-04-20 22:23:37 +00:00
fix: resolve bugs across connection, PKI, admin, packet flow, and stability subsystems (#5011)
This commit is contained in:
parent
cd9f1c0600
commit
60cc2f4237
24 changed files with 413 additions and 45 deletions
|
|
@ -98,11 +98,12 @@ class CommandSenderImpl(
|
|||
/**
|
||||
* Resolves the correct channel index for sending a packet to [toNum].
|
||||
*
|
||||
* When both the local node and the destination support PKC, returns [DataPacket.PKC_CHANNEL_INDEX] so that
|
||||
* [buildMeshPacket] enables PKI encryption. Otherwise falls back to the node's heard-on channel (for general
|
||||
* packets) or the dedicated admin channel (for admin packets).
|
||||
* PKI encryption ([DataPacket.PKC_CHANNEL_INDEX]) is only used for **admin** packets, where end-to-end encryption
|
||||
* is appropriate. Protocol-level requests (traceroute, telemetry, position, nodeinfo, neighborinfo) must NOT use
|
||||
* PKI because relay nodes need to read and/or modify the inner payload (e.g. traceroute appends each hop's node
|
||||
* number). These requests fall back to the node's heard-on channel.
|
||||
*/
|
||||
private fun getChannelIndex(toNum: Int, isAdmin: Boolean = false): Int {
|
||||
private fun getAdminChannelIndex(toNum: Int): Int {
|
||||
val myNum = nodeManager.myNodeNum.value ?: return 0
|
||||
val myNode = nodeManager.nodeDBbyNodeNum[myNum]
|
||||
val destNode = nodeManager.nodeDBbyNodeNum[toNum]
|
||||
|
|
@ -110,15 +111,18 @@ class CommandSenderImpl(
|
|||
return when {
|
||||
myNum == toNum -> 0
|
||||
myNode?.hasPKC == true && destNode?.hasPKC == true -> DataPacket.PKC_CHANNEL_INDEX
|
||||
isAdmin ->
|
||||
else ->
|
||||
channelSet.value.settings
|
||||
.indexOfFirst { it.name.equals(ADMIN_CHANNEL_NAME, ignoreCase = true) }
|
||||
.coerceAtLeast(0)
|
||||
else -> destNode?.channel ?: 0
|
||||
}
|
||||
}
|
||||
|
||||
private fun getAdminChannelIndex(toNum: Int): Int = getChannelIndex(toNum, isAdmin = true)
|
||||
/**
|
||||
* Returns the heard-on channel for a non-admin request to [toNum]. Does NOT use PKI — protocol-level requests need
|
||||
* clear inner payloads.
|
||||
*/
|
||||
private fun getChannelIndex(toNum: Int): Int = nodeManager.nodeDBbyNodeNum[toNum]?.channel ?: 0
|
||||
|
||||
override fun sendData(p: DataPacket) {
|
||||
if (p.id == 0) p.id = generatePacketId()
|
||||
|
|
|
|||
|
|
@ -89,6 +89,12 @@ class FromRadioPacketHandlerImpl(
|
|||
fileInfo != null -> router.value.configFlowManager.handleFileInfo(fileInfo)
|
||||
xmodemPacket != null -> router.value.xmodemManager.handleIncomingXModem(xmodemPacket)
|
||||
clientNotification != null -> handleClientNotification(clientNotification)
|
||||
// Firmware rebooted without a transport-level disconnect (common on serial/TCP).
|
||||
// Re-handshake immediately rather than waiting for the 30s stall guard.
|
||||
proto.rebooted != null -> {
|
||||
Logger.w { "Firmware rebooted (rebooted=${proto.rebooted}), re-initiating handshake" }
|
||||
router.value.configFlowManager.triggerWantConfig()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -248,6 +248,11 @@ class MeshActionHandlerImpl(
|
|||
override fun handleSetRemoteConfig(id: Int, destNum: Int, payload: ByteArray) {
|
||||
val c = Config.ADAPTER.decode(payload)
|
||||
commandSender.sendAdmin(destNum, id) { AdminMessage(set_config = c) }
|
||||
// When targeting the local node, optimistically persist the config so the
|
||||
// UI reflects changes immediately (matching handleSetConfig behaviour).
|
||||
if (destNum == nodeManager.myNodeNum.value) {
|
||||
scope.handledLaunch { radioConfigRepository.setLocalConfig(c) }
|
||||
}
|
||||
}
|
||||
|
||||
override fun handleGetRemoteConfig(id: Int, destNum: Int, config: Int) {
|
||||
|
|
@ -310,6 +315,11 @@ class MeshActionHandlerImpl(
|
|||
if (payload != null) {
|
||||
val c = Channel.ADAPTER.decode(payload)
|
||||
commandSender.sendAdmin(destNum, id) { AdminMessage(set_channel = c) }
|
||||
// When targeting the local node, optimistically persist the channel so
|
||||
// the UI reflects changes immediately (matching handleSetChannel behaviour).
|
||||
if (destNum == nodeManager.myNodeNum.value) {
|
||||
scope.handledLaunch { radioConfigRepository.updateChannelSettings(c) }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
package org.meshtastic.core.data.manager
|
||||
|
||||
import co.touchlab.kermit.Logger
|
||||
import kotlinx.atomicfu.atomic
|
||||
import kotlinx.coroutines.CoroutineScope
|
||||
import kotlinx.coroutines.delay
|
||||
import okio.IOException
|
||||
|
|
@ -59,6 +60,9 @@ class MeshConfigFlowManagerImpl(
|
|||
private lateinit var scope: CoroutineScope
|
||||
private val wantConfigDelay = 100L
|
||||
|
||||
/** Monotonically increasing generation so async clears from a stale handshake are discarded. */
|
||||
private val handshakeGeneration = atomic(0L)
|
||||
|
||||
override fun start(scope: CoroutineScope) {
|
||||
this.scope = scope
|
||||
}
|
||||
|
|
@ -203,12 +207,18 @@ class MeshConfigFlowManagerImpl(
|
|||
handshakeState = HandshakeState.ReceivingConfig(rawMyNodeInfo = myInfo)
|
||||
nodeManager.setMyNodeNum(myInfo.my_node_num)
|
||||
|
||||
// Bump the generation so that a pending clear from a prior (interrupted) handshake
|
||||
// will see a stale snapshot and skip its writes, preventing it from wiping config
|
||||
// that was saved by this (newer) handshake's incoming packets.
|
||||
val gen = handshakeGeneration.incrementAndGet()
|
||||
|
||||
// Clear persisted radio config so the new handshake starts from a clean slate.
|
||||
// DataStore serializes its own writes, so the clear will precede subsequent
|
||||
// setLocalConfig / updateChannelSettings calls dispatched by later packets in this
|
||||
// session (handleFromRadio processes packets sequentially, so later dispatches always
|
||||
// occur after this one returns).
|
||||
scope.handledLaunch {
|
||||
if (handshakeGeneration.value != gen) return@handledLaunch // Stale handshake; skip.
|
||||
radioConfigRepository.clearChannelSet()
|
||||
radioConfigRepository.clearLocalConfig()
|
||||
radioConfigRepository.clearLocalModuleConfig()
|
||||
|
|
|
|||
|
|
@ -205,6 +205,7 @@ class MeshConnectionManagerImpl(
|
|||
|
||||
private fun tearDownConnection() {
|
||||
packetHandler.stopPacketQueue()
|
||||
commandSender.setSessionPasskey(okio.ByteString.EMPTY) // Prevent stale passkey on reconnect.
|
||||
locationManager.stop()
|
||||
mqttManager.stop()
|
||||
}
|
||||
|
|
@ -227,8 +228,11 @@ class MeshConnectionManagerImpl(
|
|||
scope.handledLaunch {
|
||||
try {
|
||||
val localConfig = radioConfigRepository.localConfigFlow.first()
|
||||
val timeout = (localConfig.power?.ls_secs ?: 0) + DEVICE_SLEEP_TIMEOUT_SECONDS
|
||||
Logger.d { "Waiting for sleeping device, timeout=$timeout secs" }
|
||||
val rawTimeout = (localConfig.power?.ls_secs ?: 0) + DEVICE_SLEEP_TIMEOUT_SECONDS
|
||||
// Cap the timeout so routers or power-saving configs (ls_secs=3600) don't
|
||||
// leave the UI stuck in DeviceSleep for over an hour.
|
||||
val timeout = rawTimeout.coerceAtMost(MAX_SLEEP_TIMEOUT_SECONDS)
|
||||
Logger.d { "Waiting for sleeping device, timeout=$timeout secs (raw=$rawTimeout)" }
|
||||
delay(timeout.seconds)
|
||||
Logger.w { "Device timed out, setting disconnected" }
|
||||
onConnectionChanged(ConnectionState.Disconnected)
|
||||
|
|
@ -354,6 +358,12 @@ class MeshConnectionManagerImpl(
|
|||
|
||||
companion object {
|
||||
private const val DEVICE_SLEEP_TIMEOUT_SECONDS = 30
|
||||
|
||||
// Maximum time (in seconds) to wait for a sleeping device before declaring it
|
||||
// disconnected, regardless of the device's ls_secs configuration. Without this
|
||||
// cap, routers (ls_secs=3600) leave the UI in DeviceSleep for over an hour.
|
||||
private const val MAX_SLEEP_TIMEOUT_SECONDS = 300
|
||||
|
||||
private val HANDSHAKE_TIMEOUT = 30.seconds
|
||||
|
||||
// Shorter window for the retry attempt: if the device genuinely didn't receive the
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ import org.meshtastic.proto.FromRadio
|
|||
import org.meshtastic.proto.LogRecord
|
||||
import org.meshtastic.proto.MeshPacket
|
||||
import org.meshtastic.proto.PortNum
|
||||
import kotlin.concurrent.Volatile
|
||||
import kotlin.uuid.Uuid
|
||||
|
||||
/** Implementation of [MeshMessageProcessor] that handles raw radio messages and prepares mesh packets for routing. */
|
||||
|
|
@ -59,6 +60,13 @@ class MeshMessageProcessorImpl(
|
|||
private val logUuidByPacketId = mutableMapOf<Int, String>()
|
||||
private val logInsertJobByPacketId = mutableMapOf<Int, Job>()
|
||||
|
||||
/**
|
||||
* Epoch-millisecond timestamp of the last local-node `lastHeard` DB write. Used to throttle updates to at most once
|
||||
* per [LOCAL_NODE_REFRESH_INTERVAL_MS] so that high-frequency FromRadio variants (log records, queue status) don't
|
||||
* flood the DB.
|
||||
*/
|
||||
@Volatile private var lastLocalNodeRefreshMs = 0L
|
||||
|
||||
private val earlyMutex = Mutex()
|
||||
private val earlyReceivedPackets = kotlin.collections.ArrayDeque<MeshPacket>()
|
||||
private val maxEarlyPacketBuffer = 10240
|
||||
|
|
@ -95,6 +103,9 @@ class MeshMessageProcessorImpl(
|
|||
}
|
||||
|
||||
private fun processFromRadio(proto: FromRadio, myNodeNum: Int?) {
|
||||
// Any decoded FromRadio proves the radio link is alive — keep the local node fresh.
|
||||
refreshLocalNodeLastHeard()
|
||||
|
||||
// Audit log every incoming variant
|
||||
logVariant(proto)
|
||||
|
||||
|
|
@ -253,5 +264,33 @@ class MeshMessageProcessorImpl(
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Refreshes the local node's [Node.lastHeard] to prove the radio link is alive.
|
||||
*
|
||||
* Without this, [lastHeard] is only set when a [MeshPacket] arrives from another node (see
|
||||
* [processReceivedMeshPacket]). On a quiet mesh the heartbeat cycle still exchanges data with the firmware (ToRadio
|
||||
* heartbeat → FromRadio queueStatus every 30 s), but that data never touched [lastHeard], causing the local node to
|
||||
* appear stale in the UI even though the connection is healthy.
|
||||
*
|
||||
* To avoid flooding the DB on high-frequency variants (log records arrive many times per second when debug logging
|
||||
* is enabled), writes are throttled to at most once per [LOCAL_NODE_REFRESH_INTERVAL_MS].
|
||||
*/
|
||||
private fun refreshLocalNodeLastHeard() {
|
||||
val now = nowMillis
|
||||
if (now - lastLocalNodeRefreshMs < LOCAL_NODE_REFRESH_INTERVAL_MS) return
|
||||
lastLocalNodeRefreshMs = now
|
||||
|
||||
val myNum = nodeManager.myNodeNum.value ?: return
|
||||
nodeManager.updateNode(myNum, withBroadcast = false) { node: Node -> node.copy(lastHeard = nowSeconds.toInt()) }
|
||||
}
|
||||
|
||||
private fun insertMeshLog(log: MeshLog): Job = scope.handledLaunch { meshLogRepository.value.insert(log) }
|
||||
|
||||
companion object {
|
||||
/**
|
||||
* Minimum interval between local-node `lastHeard` DB writes, in milliseconds. Aligned with the heartbeat
|
||||
* interval (30 s) so that one write per heartbeat cycle keeps the node fresh without unnecessary DB churn.
|
||||
*/
|
||||
private const val LOCAL_NODE_REFRESH_INTERVAL_MS = 30_000L
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -110,6 +110,7 @@ class PacketHandlerImpl(
|
|||
override fun sendToRadio(packet: MeshPacket) {
|
||||
scope.launch {
|
||||
queueMutex.withLock {
|
||||
queueStopped = false // Allow queue to resume after a disconnect/reconnect cycle.
|
||||
queuedPackets.add(packet)
|
||||
startPacketQueueLocked()
|
||||
}
|
||||
|
|
@ -123,6 +124,7 @@ class PacketHandlerImpl(
|
|||
val deferred = CompletableDeferred<Boolean>()
|
||||
responseMutex.withLock { queueResponse[packet.id] = deferred }
|
||||
queueMutex.withLock {
|
||||
queueStopped = false // Allow queue to resume after a disconnect/reconnect cycle.
|
||||
queuedPackets.add(packet)
|
||||
startPacketQueueLocked()
|
||||
}
|
||||
|
|
@ -199,15 +201,18 @@ class PacketHandlerImpl(
|
|||
Logger.d { "queueJob packet id=${packet.id.toUInt()} success $success" }
|
||||
} catch (e: TimeoutCancellationException) {
|
||||
Logger.d { "queueJob packet id=${packet.id.toUInt()} timeout" }
|
||||
// Clean up the deferred for this packet. sendToRadioAndAwait callers
|
||||
// also clean up in their own finally block (idempotent remove).
|
||||
responseMutex.withLock { queueResponse.remove(packet.id) }
|
||||
} catch (e: CancellationException) {
|
||||
throw e // Preserve structured concurrency cancellation propagation.
|
||||
} catch (e: Exception) {
|
||||
Logger.d { "queueJob packet id=${packet.id.toUInt()} failed" }
|
||||
responseMutex.withLock { queueResponse.remove(packet.id) }
|
||||
}
|
||||
// Do NOT remove from queueResponse here. Removal is owned by:
|
||||
// - handleQueueStatus (normal completion path)
|
||||
// - sendToRadioAndAwait's finally block (for await-style callers)
|
||||
// - stopPacketQueue (bulk cleanup on disconnect)
|
||||
// Deferred cleanup is now handled in the catch blocks above.
|
||||
// handleQueueStatus (normal success) and stopPacketQueue (bulk cleanup)
|
||||
// also remove entries, and these removals are idempotent.
|
||||
}
|
||||
} finally {
|
||||
// Hold queueMutex so that clearing queueJob and the restart decision are
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ import kotlinx.coroutines.channels.BufferOverflow
|
|||
import kotlinx.coroutines.flow.MutableSharedFlow
|
||||
import kotlinx.coroutines.flow.asSharedFlow
|
||||
import org.koin.core.annotation.Single
|
||||
import org.meshtastic.core.common.util.nowMillis
|
||||
import org.meshtastic.core.repository.PacketHandler
|
||||
import org.meshtastic.core.repository.XModemFile
|
||||
import org.meshtastic.core.repository.XModemManager
|
||||
|
|
@ -59,6 +60,8 @@ class XModemManagerImpl(private val packetHandler: PacketHandler) : XModemManage
|
|||
@Volatile private var transferName = ""
|
||||
|
||||
@Volatile private var expectedSeq = INITIAL_SEQ
|
||||
|
||||
@Volatile private var lastActivityMillis = 0L
|
||||
private val blocks = mutableListOf<ByteArray>()
|
||||
|
||||
override fun setTransferName(name: String) {
|
||||
|
|
@ -66,6 +69,17 @@ class XModemManagerImpl(private val packetHandler: PacketHandler) : XModemManage
|
|||
}
|
||||
|
||||
override fun handleIncomingXModem(packet: XModem) {
|
||||
// If blocks have accumulated but no activity for INACTIVITY_TIMEOUT_MS,
|
||||
// the previous transfer is stale (firmware crash, BLE disconnect, etc.).
|
||||
if (blocks.isNotEmpty() && lastActivityMillis > 0L) {
|
||||
val elapsed = nowMillis - lastActivityMillis
|
||||
if (elapsed > INACTIVITY_TIMEOUT_MS) {
|
||||
Logger.w { "XModem: inactivity timeout (${elapsed}ms) — resetting stale transfer" }
|
||||
reset()
|
||||
}
|
||||
}
|
||||
lastActivityMillis = nowMillis
|
||||
|
||||
when (packet.control) {
|
||||
XModem.Control.SOH,
|
||||
XModem.Control.STX,
|
||||
|
|
@ -135,6 +149,7 @@ class XModemManagerImpl(private val packetHandler: PacketHandler) : XModemManage
|
|||
expectedSeq = INITIAL_SEQ
|
||||
blocks.clear()
|
||||
transferName = ""
|
||||
lastActivityMillis = 0L
|
||||
}
|
||||
|
||||
// CRC-CCITT: polynomial 0x1021, initial value 0x0000 (XModem variant)
|
||||
|
|
@ -157,5 +172,6 @@ class XModemManagerImpl(private val packetHandler: PacketHandler) : XModemManage
|
|||
private const val CTRLZ = 0x1A.toByte()
|
||||
private const val CRC_POLY = 0x1021
|
||||
private const val BITS_PER_BYTE = 8
|
||||
private const val INACTIVITY_TIMEOUT_MS = 30_000L
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ import kotlinx.coroutines.ExperimentalCoroutinesApi
|
|||
import kotlinx.coroutines.flow.MutableStateFlow
|
||||
import kotlinx.coroutines.flow.flowOf
|
||||
import kotlinx.coroutines.test.UnconfinedTestDispatcher
|
||||
import kotlinx.coroutines.test.advanceTimeBy
|
||||
import kotlinx.coroutines.test.advanceUntilIdle
|
||||
import kotlinx.coroutines.test.runTest
|
||||
import org.meshtastic.core.model.ConnectionState
|
||||
|
|
@ -267,4 +268,46 @@ class MeshConnectionManagerImplTest {
|
|||
verify { mqttManager.start(any(), true, true) }
|
||||
verify { historyManager.requestHistoryReplay(any(), any(), any(), any()) }
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `DeviceSleep timeout is capped at MAX_SLEEP_TIMEOUT_SECONDS for high ls_secs`() = runTest(testDispatcher) {
|
||||
// Router with ls_secs=3600 — previously this created a 3630s timeout.
|
||||
// With the cap, it should be clamped to 300s.
|
||||
val config =
|
||||
LocalConfig(
|
||||
power = Config.PowerConfig(is_power_saving = true, ls_secs = 3600),
|
||||
device = Config.DeviceConfig(role = Config.DeviceConfig.Role.ROUTER),
|
||||
)
|
||||
every { radioConfigRepository.localConfigFlow } returns flowOf(config)
|
||||
every { packetHandler.sendToRadio(any<org.meshtastic.proto.ToRadio>()) } returns Unit
|
||||
every { serviceNotifications.updateServiceStateNotification(any(), any()) } returns Unit
|
||||
every { packetHandler.stopPacketQueue() } returns Unit
|
||||
every { locationManager.stop() } returns Unit
|
||||
every { mqttManager.stop() } returns Unit
|
||||
every { nodeManager.nodeDBbyNodeNum } returns emptyMap()
|
||||
|
||||
manager.start(backgroundScope)
|
||||
advanceUntilIdle()
|
||||
|
||||
// Transition to Connected then DeviceSleep
|
||||
radioConnectionState.value = ConnectionState.Connected
|
||||
advanceUntilIdle()
|
||||
radioConnectionState.value = ConnectionState.DeviceSleep
|
||||
advanceUntilIdle()
|
||||
|
||||
assertEquals(
|
||||
ConnectionState.DeviceSleep,
|
||||
serviceRepository.connectionState.value,
|
||||
"Should be in DeviceSleep initially",
|
||||
)
|
||||
|
||||
// Advance 300 seconds (the cap) + 1 second to trigger the timeout.
|
||||
advanceTimeBy(301_000L)
|
||||
|
||||
assertEquals(
|
||||
ConnectionState.Disconnected,
|
||||
serviceRepository.connectionState.value,
|
||||
"Should transition to Disconnected after capped timeout (300s), not the raw 3630s",
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue