mirror of
https://github.com/meshtastic/Meshtastic-Android.git
synced 2026-04-20 22:23:37 +00:00
chore: review-cleanup fleet (audit + fix + hardening) (#5158)
This commit is contained in:
parent
872c566ef1
commit
17e69c6d4c
68 changed files with 784 additions and 459 deletions
|
|
@ -19,13 +19,15 @@ package org.meshtastic.core.service
|
|||
import co.touchlab.kermit.Logger
|
||||
import co.touchlab.kermit.Severity
|
||||
import kotlinx.coroutines.CoroutineScope
|
||||
import kotlinx.coroutines.Job
|
||||
import kotlinx.coroutines.SupervisorJob
|
||||
import kotlinx.coroutines.cancel
|
||||
import kotlinx.coroutines.flow.launchIn
|
||||
import kotlinx.coroutines.flow.onEach
|
||||
import org.koin.core.annotation.Named
|
||||
import kotlinx.coroutines.isActive
|
||||
import org.koin.core.annotation.Single
|
||||
import org.meshtastic.core.common.database.DatabaseManager
|
||||
import org.meshtastic.core.common.util.handledLaunch
|
||||
import org.meshtastic.core.di.CoroutineDispatchers
|
||||
import org.meshtastic.core.repository.MeshConnectionManager
|
||||
import org.meshtastic.core.repository.MeshMessageProcessor
|
||||
import org.meshtastic.core.repository.MeshRouter
|
||||
|
|
@ -59,18 +61,15 @@ class MeshServiceOrchestrator(
|
|||
private val takPrefs: TakPrefs,
|
||||
private val databaseManager: DatabaseManager,
|
||||
private val connectionManager: MeshConnectionManager,
|
||||
@Named("ServiceScope") private val scope: CoroutineScope,
|
||||
private val dispatchers: CoroutineDispatchers,
|
||||
) {
|
||||
private var serviceJob: Job? = null
|
||||
private var takJob: Job? = null
|
||||
|
||||
/** The coroutine scope for the service. */
|
||||
val serviceScope: CoroutineScope
|
||||
get() = scope
|
||||
// Per-start coroutine scope. A fresh scope is created on each start() and cancelled on stop(), so all collectors
|
||||
// launched from start() are torn down cleanly and do not accumulate across start/stop/start cycles.
|
||||
private var scope: CoroutineScope? = null
|
||||
|
||||
/** Whether the orchestrator is currently running. */
|
||||
val isRunning: Boolean
|
||||
get() = serviceJob?.isActive == true
|
||||
get() = scope?.isActive == true
|
||||
|
||||
/**
|
||||
* Starts the mesh service components and wires up data flows.
|
||||
|
|
@ -85,27 +84,31 @@ class MeshServiceOrchestrator(
|
|||
}
|
||||
|
||||
Logger.i { "Starting mesh service orchestrator" }
|
||||
val job = Job()
|
||||
serviceJob = job
|
||||
val newScope = CoroutineScope(SupervisorJob() + dispatchers.default)
|
||||
scope = newScope
|
||||
|
||||
// Drop any bytes that piled up in the service's receivedData channel since the last stop(). The channel
|
||||
// outlives the orchestrator's per-start scope, so without this drain a stop/start cycle would replay stale
|
||||
// packets ahead of the fresh session's firmware handshake.
|
||||
radioInterfaceService.resetReceivedBuffer()
|
||||
|
||||
serviceNotifications.initChannels()
|
||||
connectionManager.updateStatusNotification()
|
||||
|
||||
// Observe TAK server pref to start/stop
|
||||
takJob =
|
||||
takPrefs.isTakServerEnabled
|
||||
.onEach { isEnabled ->
|
||||
if (isEnabled && !takServerManager.isRunning.value) {
|
||||
Logger.i { "TAK Server enabled by preference, starting integration" }
|
||||
takMeshIntegration.start(scope)
|
||||
} else if (!isEnabled && takServerManager.isRunning.value) {
|
||||
Logger.i { "TAK Server disabled by preference, stopping integration" }
|
||||
takMeshIntegration.stop()
|
||||
}
|
||||
takPrefs.isTakServerEnabled
|
||||
.onEach { isEnabled ->
|
||||
if (isEnabled && !takServerManager.isRunning.value) {
|
||||
Logger.i { "TAK Server enabled by preference, starting integration" }
|
||||
takMeshIntegration.start(newScope)
|
||||
} else if (!isEnabled && takServerManager.isRunning.value) {
|
||||
Logger.i { "TAK Server disabled by preference, stopping integration" }
|
||||
takMeshIntegration.stop()
|
||||
}
|
||||
.launchIn(scope)
|
||||
}
|
||||
.launchIn(newScope)
|
||||
|
||||
scope.handledLaunch {
|
||||
newScope.handledLaunch {
|
||||
// Ensure the per-device database is active before the radio connects.
|
||||
// On Android this is handled by MeshUtilApplication.init(); on Desktop (and any
|
||||
// future KMP host) the orchestrator is the first entry point, so it must initialize
|
||||
|
|
@ -119,18 +122,18 @@ class MeshServiceOrchestrator(
|
|||
|
||||
radioInterfaceService.receivedData
|
||||
.onEach { bytes -> messageProcessor.handleFromRadio(bytes, nodeManager.myNodeNum.value) }
|
||||
.launchIn(scope)
|
||||
.launchIn(newScope)
|
||||
|
||||
radioInterfaceService.connectionError
|
||||
.onEach { errorMessage -> serviceRepository.setErrorMessage(errorMessage, Severity.Warn) }
|
||||
.launchIn(scope)
|
||||
.launchIn(newScope)
|
||||
|
||||
// Each action is dispatched in its own supervised coroutine so that a failure in one
|
||||
// action (e.g. a timeout in sendAdminAwait) cannot terminate the collector and silently
|
||||
// drop all subsequent service actions for the rest of the session.
|
||||
serviceRepository.serviceAction
|
||||
.onEach { action -> scope.handledLaunch { router.actionHandler.onServiceAction(action) } }
|
||||
.launchIn(scope)
|
||||
.onEach { action -> newScope.handledLaunch { router.actionHandler.onServiceAction(action) } }
|
||||
.launchIn(newScope)
|
||||
|
||||
nodeManager.loadCachedNodeDB()
|
||||
}
|
||||
|
|
@ -142,13 +145,11 @@ class MeshServiceOrchestrator(
|
|||
*/
|
||||
fun stop() {
|
||||
Logger.i { "Stopping mesh service orchestrator" }
|
||||
takJob?.cancel()
|
||||
takJob = null
|
||||
// Guard stop() so we don't emit a spurious "stopped" log when TAK was never started
|
||||
if (takServerManager.isRunning.value) {
|
||||
takMeshIntegration.stop()
|
||||
}
|
||||
serviceJob?.cancel()
|
||||
serviceJob = null
|
||||
scope?.cancel()
|
||||
scope = null
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,7 +25,9 @@ import kotlinx.coroutines.Job
|
|||
import kotlinx.coroutines.SupervisorJob
|
||||
import kotlinx.coroutines.cancel
|
||||
import kotlinx.coroutines.channels.BufferOverflow
|
||||
import kotlinx.coroutines.channels.Channel
|
||||
import kotlinx.coroutines.delay
|
||||
import kotlinx.coroutines.flow.Flow
|
||||
import kotlinx.coroutines.flow.MutableSharedFlow
|
||||
import kotlinx.coroutines.flow.MutableStateFlow
|
||||
import kotlinx.coroutines.flow.SharedFlow
|
||||
|
|
@ -35,6 +37,7 @@ import kotlinx.coroutines.flow.asStateFlow
|
|||
import kotlinx.coroutines.flow.catch
|
||||
import kotlinx.coroutines.flow.launchIn
|
||||
import kotlinx.coroutines.flow.onEach
|
||||
import kotlinx.coroutines.flow.receiveAsFlow
|
||||
import kotlinx.coroutines.launch
|
||||
import kotlinx.coroutines.sync.Mutex
|
||||
import kotlinx.coroutines.sync.withLock
|
||||
|
|
@ -42,7 +45,7 @@ import org.koin.core.annotation.Named
|
|||
import org.koin.core.annotation.Single
|
||||
import org.meshtastic.core.ble.BluetoothRepository
|
||||
import org.meshtastic.core.common.util.handledLaunch
|
||||
import org.meshtastic.core.common.util.ignoreException
|
||||
import org.meshtastic.core.common.util.ignoreExceptionSuspend
|
||||
import org.meshtastic.core.common.util.nowMillis
|
||||
import org.meshtastic.core.di.CoroutineDispatchers
|
||||
import org.meshtastic.core.model.ConnectionState
|
||||
|
|
@ -95,8 +98,13 @@ class SharedRadioInterfaceService(
|
|||
private val _currentDeviceAddressFlow = MutableStateFlow<String?>(radioPrefs.devAddr.value)
|
||||
override val currentDeviceAddressFlow: StateFlow<String?> = _currentDeviceAddressFlow.asStateFlow()
|
||||
|
||||
private val _receivedData = MutableSharedFlow<ByteArray>(extraBufferCapacity = 64)
|
||||
override val receivedData: SharedFlow<ByteArray> = _receivedData
|
||||
// Unbounded Channel preserves strict FIFO delivery of incoming radio bytes, which the
|
||||
// firmware handshake depends on (initial config packet ordering). A SharedFlow with
|
||||
// `launch { emit() }` per packet reorders under concurrent dispatch and breaks config load.
|
||||
// trySend on an UNLIMITED channel never suspends and never drops, so handleFromRadio can
|
||||
// remain a non-suspend synchronous callback.
|
||||
private val _receivedData = Channel<ByteArray>(Channel.UNLIMITED)
|
||||
override val receivedData: Flow<ByteArray> = _receivedData.receiveAsFlow()
|
||||
|
||||
private val _meshActivity =
|
||||
MutableSharedFlow<MeshActivity>(extraBufferCapacity = 64, onBufferOverflow = BufferOverflow.DROP_OLDEST)
|
||||
|
|
@ -148,6 +156,7 @@ class SharedRadioInterfaceService(
|
|||
}
|
||||
}
|
||||
}
|
||||
.catch { Logger.e(it) { "devAddr flow crashed" } }
|
||||
.launchIn(processLifecycle.coroutineScope)
|
||||
|
||||
bluetoothRepository.state
|
||||
|
|
@ -216,7 +225,7 @@ class SharedRadioInterfaceService(
|
|||
|
||||
processLifecycle.coroutineScope.launch {
|
||||
transportMutex.withLock {
|
||||
ignoreException { stopTransportLocked() }
|
||||
ignoreExceptionSuspend { stopTransportLocked() }
|
||||
startTransportLocked()
|
||||
}
|
||||
}
|
||||
|
|
@ -245,7 +254,7 @@ class SharedRadioInterfaceService(
|
|||
}
|
||||
|
||||
/** Must be called under [transportMutex]. */
|
||||
private fun stopTransportLocked() {
|
||||
private suspend fun stopTransportLocked() {
|
||||
val currentTransport = radioTransport
|
||||
Logger.i { "Stopping transport $currentTransport" }
|
||||
isStarted = false
|
||||
|
|
@ -322,13 +331,28 @@ class SharedRadioInterfaceService(
|
|||
override fun handleFromRadio(bytes: ByteArray) {
|
||||
try {
|
||||
lastDataReceivedMillis = nowMillis
|
||||
processLifecycle.coroutineScope.launch(dispatchers.io) { _receivedData.emit(bytes) }
|
||||
// trySend synchronously onto the unbounded Channel so packet order matches arrival
|
||||
// order. The previous `launch { emit() }` pattern dispatched each packet onto a
|
||||
// fresh coroutine, letting the scheduler reorder them — which broke the firmware
|
||||
// config handshake (see PhoneAPI.cpp initial-handshake sequence).
|
||||
val result = _receivedData.trySend(bytes)
|
||||
if (result.isFailure) {
|
||||
Logger.e(result.exceptionOrNull()) { "Failed to enqueue ${bytes.size} received bytes; dropping packet" }
|
||||
}
|
||||
_meshActivity.tryEmit(MeshActivity.Receive)
|
||||
} catch (t: Throwable) {
|
||||
Logger.e(t) { "handleFromRadio failed while emitting data" }
|
||||
}
|
||||
}
|
||||
|
||||
override fun resetReceivedBuffer() {
|
||||
// Drain any bytes buffered while no collector was attached. Without this, a stop/start cycle
|
||||
// would replay stale bytes ahead of the next session's firmware handshake, since the channel
|
||||
// outlives the orchestrator's per-start scope.
|
||||
@Suppress("EmptyWhileBlock", "ControlFlowWithEmptyBody")
|
||||
while (_receivedData.tryReceive().isSuccess) Unit
|
||||
}
|
||||
|
||||
override fun onConnect() {
|
||||
// MutableStateFlow.value is thread-safe (backed by atomics) — assign directly rather than
|
||||
// launching a coroutine. The async launch pattern introduced a window where a concurrent
|
||||
|
|
|
|||
|
|
@ -23,13 +23,15 @@ import dev.mokkery.every
|
|||
import dev.mokkery.matcher.any
|
||||
import dev.mokkery.mock
|
||||
import dev.mokkery.verify
|
||||
import dev.mokkery.verify.VerifyMode.Companion.atLeast
|
||||
import dev.mokkery.verify.VerifyMode.Companion.exactly
|
||||
import dev.mokkery.verifySuspend
|
||||
import kotlinx.coroutines.CoroutineScope
|
||||
import kotlinx.coroutines.ExperimentalCoroutinesApi
|
||||
import kotlinx.coroutines.flow.MutableSharedFlow
|
||||
import kotlinx.coroutines.flow.MutableStateFlow
|
||||
import kotlinx.coroutines.test.UnconfinedTestDispatcher
|
||||
import org.meshtastic.core.common.database.DatabaseManager
|
||||
import org.meshtastic.core.di.CoroutineDispatchers
|
||||
import org.meshtastic.core.model.Node
|
||||
import org.meshtastic.core.model.service.ServiceAction
|
||||
import org.meshtastic.core.repository.CommandSender
|
||||
|
|
@ -70,8 +72,11 @@ class MeshServiceOrchestratorTest {
|
|||
private val databaseManager: DatabaseManager = mock(MockMode.autofill)
|
||||
private val connectionManager: MeshConnectionManager = mock(MockMode.autofill)
|
||||
|
||||
@OptIn(ExperimentalCoroutinesApi::class)
|
||||
private val testDispatcher = UnconfinedTestDispatcher()
|
||||
private val testScope = CoroutineScope(testDispatcher)
|
||||
|
||||
@OptIn(ExperimentalCoroutinesApi::class)
|
||||
private val dispatchers = CoroutineDispatchers(io = testDispatcher, main = testDispatcher, default = testDispatcher)
|
||||
|
||||
/** Stubs the shared flow dependencies used by every test and returns an orchestrator. */
|
||||
private fun createOrchestrator(
|
||||
|
|
@ -114,7 +119,7 @@ class MeshServiceOrchestratorTest {
|
|||
takPrefs = takPrefs,
|
||||
databaseManager = databaseManager,
|
||||
connectionManager = connectionManager,
|
||||
scope = testScope,
|
||||
dispatchers = dispatchers,
|
||||
)
|
||||
}
|
||||
|
||||
|
|
@ -217,4 +222,79 @@ class MeshServiceOrchestratorTest {
|
|||
orchestrator.stop()
|
||||
assertFalse(orchestrator.isRunning)
|
||||
}
|
||||
|
||||
/**
|
||||
* Regression test for a bug where `stop()` did not actually tear down the FromRadio collectors. Collectors were
|
||||
* attached to an injected process-wide ServiceScope rather than a per-start scope, so `start() -> stop() ->
|
||||
* start()` caused duplicate collectors and every FromRadio packet was handled 2x (then 3x, etc.).
|
||||
*/
|
||||
@Test
|
||||
fun testFromRadioCollectorsTornDownOnStopAndRestartedCleanlyOnStart() {
|
||||
val receivedData = MutableSharedFlow<ByteArray>(extraBufferCapacity = 8)
|
||||
val orchestrator = createOrchestrator(receivedData = receivedData)
|
||||
every { nodeManager.myNodeNum } returns MutableStateFlow(null)
|
||||
|
||||
orchestrator.start()
|
||||
val packet1 = byteArrayOf(1, 2, 3)
|
||||
receivedData.tryEmit(packet1)
|
||||
verifySuspend(exactly(1)) { messageProcessor.handleFromRadio(packet1, null) }
|
||||
|
||||
orchestrator.stop()
|
||||
val packet2 = byteArrayOf(4, 5, 6)
|
||||
receivedData.tryEmit(packet2)
|
||||
// After stop(), the collector must be gone - the handler should not be invoked for packet2.
|
||||
verifySuspend(exactly(0)) { messageProcessor.handleFromRadio(packet2, null) }
|
||||
|
||||
orchestrator.start()
|
||||
val packet3 = byteArrayOf(7, 8, 9)
|
||||
receivedData.tryEmit(packet3)
|
||||
// After restart, a single fresh collector must process packet3 exactly once (not twice).
|
||||
verifySuspend(exactly(1)) { messageProcessor.handleFromRadio(packet3, null) }
|
||||
|
||||
orchestrator.stop()
|
||||
}
|
||||
|
||||
/**
|
||||
* Regression test for a channel-buffer-replay bug: the production [RadioInterfaceService] buffers inbound bytes in
|
||||
* a process-lifetime `Channel(UNLIMITED)`. Between `stop()` and the next `start()`, any bytes that arrive sit in
|
||||
* the channel and would be replayed to the fresh collector — prepending stale packets to the next session's
|
||||
* firmware handshake. `start()` must call [RadioInterfaceService.resetReceivedBuffer] before attaching the
|
||||
* collector.
|
||||
*/
|
||||
@Test
|
||||
fun testStartDrainsReceivedBufferBeforeAttachingCollector() {
|
||||
val orchestrator = createOrchestrator()
|
||||
every { nodeManager.myNodeNum } returns MutableStateFlow(null)
|
||||
|
||||
orchestrator.start()
|
||||
orchestrator.stop()
|
||||
orchestrator.start()
|
||||
|
||||
// resetReceivedBuffer must be invoked at least once per start() (twice total for two starts).
|
||||
verify(atLeast(2)) { radioInterfaceService.resetReceivedBuffer() }
|
||||
|
||||
orchestrator.stop()
|
||||
}
|
||||
|
||||
/** Additional regression: after many start/stop cycles, collectors must not accumulate. */
|
||||
@Test
|
||||
fun testRepeatedStartStopDoesNotAccumulateCollectors() {
|
||||
val receivedData = MutableSharedFlow<ByteArray>(extraBufferCapacity = 8)
|
||||
val orchestrator = createOrchestrator(receivedData = receivedData)
|
||||
every { nodeManager.myNodeNum } returns MutableStateFlow(null)
|
||||
|
||||
repeat(5) {
|
||||
orchestrator.start()
|
||||
orchestrator.stop()
|
||||
}
|
||||
|
||||
orchestrator.start()
|
||||
val packet = byteArrayOf(42)
|
||||
receivedData.tryEmit(packet)
|
||||
|
||||
// Despite six total start() calls, only the most recent collector is live.
|
||||
verifySuspend(exactly(1)) { messageProcessor.handleFromRadio(packet, null) }
|
||||
|
||||
orchestrator.stop()
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue