fix(ble): never give up while user has device selected

The reconnect policy previously capped at 10 consecutive failures and
emitted a permanent disconnect, which terminated the reconnect loop and
required the user to manually re-select the device. BleRadioTransport is
only ever instantiated for the user-selected address (verified via
SharedRadioInterfaceService.startTransportLocked), so the only legitimate
permanent-disconnect path is explicit close() owned by the service layer.

- BleRadioTransport: pass maxFailures = Int.MAX_VALUE; backoff still
  caps at 60 s so battery impact remains bounded.
- BleExceptionClassifier: flip UnmetRequirementException (BT off /
  permission missing) to non-permanent — both can resolve without the
  user re-selecting the device.
- Test: replace the old 'gives up after DEFAULT_MAX_FAILURES' test with
  an inverted contract test that runs past the legacy threshold and
  asserts the policy never emits isPermanent=true on its own.
This commit is contained in:
James Rich 2026-04-19 12:45:28 -05:00
parent a6f3a6b4a5
commit 2137ef3410
3 changed files with 28 additions and 14 deletions

View file

@ -26,7 +26,9 @@ import com.juul.kable.UnmetRequirementException
/**
* Classification of a BLE-layer exception for the transport layer to act on.
*
* @property isPermanent `true` if the condition won't resolve without user intervention (e.g. Bluetooth disabled).
* @property isPermanent `true` if the condition cannot resolve without explicit user re-selection of the device.
* Currently always `false` all known BLE exceptions can resolve without user intervention (BT toggling, permission
* grants, transient GATT errors). Reserved for future use.
* @property gattStatus the platform GATT status code when available (Android-specific).
* @property message a human-readable description of the failure.
*/
@ -50,6 +52,9 @@ fun Throwable.classifyBleException(): BleExceptionInfo? = when (this) {
is GattRequestRejectedException ->
BleExceptionInfo(isPermanent = false, message = "GATT request rejected (busy)")
is UnmetRequirementException ->
BleExceptionInfo(isPermanent = true, message = message ?: "Bluetooth LE unavailable")
// Bluetooth disabled or runtime permission missing. Both can resolve without re-selecting the
// device (user re-enables BT, or grants permission). Surface as transient so the transport keeps
// retrying; UI can show a hint based on the message.
BleExceptionInfo(isPermanent = false, message = message ?: "Bluetooth LE unavailable")
else -> null
}

View file

@ -133,7 +133,11 @@ class BleRadioTransport(
@Volatile private var isFullyConnected = false
private var connectionJob: Job? = null
private val reconnectPolicy = BleReconnectPolicy()
// Never give up while the user has this device selected. Higher layers (SharedRadioInterfaceService)
// own the explicit-disconnect lifecycle and will close() us when the user picks a different device or
// toggles the connection off; until then, retry forever with the policy's exponential-backoff cap (60 s).
private val reconnectPolicy = BleReconnectPolicy(maxFailures = Int.MAX_VALUE)
private val heartbeatSender =
HeartbeatSender(

View file

@ -131,16 +131,17 @@ class BleRadioTransportTest {
}
/**
* After [BleReconnectPolicy.DEFAULT_MAX_FAILURES] (10) consecutive failures, the reconnect loop should stop and
* signal a permanent disconnect. This prevents infinite battery drain when the device is genuinely offline.
* Reconnect policy must NEVER give up on its own. The transport is only ever instantiated for the user-selected
* device, and explicit-disconnect is owned by the service layer (close()). Even after a sustained failure storm
* well beyond the legacy [BleReconnectPolicy.DEFAULT_MAX_FAILURES] the transport must keep retrying and must
* never call `onDisconnect(isPermanent = true)` from the give-up path.
*
* Time budget for 10 failures with bonded device (no scan): Each iteration = 3s settle + connectAndAwait throw +
* backoff Backoffs: 5s, 10s, 20s, 40s, 60s, 60s, 60s, 60s, 60s, (exit at failure 10 before backoff) Total 10×3s
* settle + 5+10+20+40+60+60+60+60+60 = 30 + 375 = 405s 405_000ms We use a generous 410_000ms to cover any timing
* variance.
* Time budget for 15 failures with bonded device (no scan): each iteration 3 s settle + immediate throw +
* backoff. Backoffs cap at 60 s after failure 5: 5+10+20+40+60+60+60+60+60+60+60+60+60+60+60 = 735 s, plus 15×3 s
* settle = 45 s, total 780 s. Use 800_000 ms to cover variance.
*/
@Test
fun `reconnect loop stops after DEFAULT_MAX_FAILURES with permanent disconnect`() = runTest {
fun `reconnect loop never gives up - no permanent disconnect from policy`() = runTest {
val device = FakeBleDevice(address = address, name = "Test Device")
bluetoothRepository.bond(device)
@ -158,11 +159,15 @@ class BleRadioTransportTest {
)
bleTransport.start()
// Advance enough time for all 10 failures to occur.
advanceTimeBy(410_001L)
// Run well past where the legacy policy (maxFailures = 10) would have given up.
advanceTimeBy(800_001L)
// Should have been called with isPermanent=true at least once (the final call).
verify { service.onDisconnect(isPermanent = true, errorMessage = any()) }
// Transient disconnects (isPermanent = false) are expected once the failure threshold is hit;
// the policy must NEVER signal a permanent disconnect on its own. Only explicit close()
// (verified separately by the service layer) may emit isPermanent = true.
verify(mode = dev.mokkery.verify.VerifyMode.not) {
service.onDisconnect(isPermanent = true, errorMessage = any())
}
bleTransport.close()
}