6.5 Background retry queue
On publish failure, enqueue the MAC in a dedup'd in-memory set and let a background loop retry with the configured backoff sequence. Reconnects signal an immediate drain so a transient broker outage no longer drops the update. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -167,7 +167,7 @@ Each type lives in `src/FrameProcessor/Domain/`. Tests in `tests/FrameProcessor.
|
||||
### [x] 6.4 `/health` reports MQTT status
|
||||
- Replace hardcoded `mqttConnected` with `MqttPublisher.IsConnected`.
|
||||
|
||||
### [ ] 6.5 Background retry queue
|
||||
### [x] 6.5 Background retry queue
|
||||
- In-memory `Channel<MacAddress>` (one slot per frame; newer publish supersedes older — per `SPEC.md` §5.1 "Multiple queued publishes for the same frame collapse to the most recent one").
|
||||
- Background loop drains with backoff sequence from `MqttOptions.RetryBackoffSeconds`.
|
||||
- On reconnect, drain immediately.
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
using System.Collections.Concurrent;
|
||||
using System.Threading.Channels;
|
||||
using FrameProcessor.Configuration;
|
||||
using FrameProcessor.Domain;
|
||||
using Microsoft.Extensions.Options;
|
||||
@@ -11,6 +13,11 @@ namespace FrameProcessor.Mqtt;
|
||||
/// Persistent MQTT client wrapped in a hosted service. Connects on startup and
|
||||
/// reconnects in the background after disconnects. Connection state is surfaced
|
||||
/// via <see cref="IsConnected"/> for the <c>/health</c> endpoint.
|
||||
///
|
||||
/// On publish failure the (mac → topic, payload) pair is enqueued for retry; a
|
||||
/// background loop drains the queue using <see cref="MqttOptions.RetryBackoffSeconds"/>.
|
||||
/// Multiple failed publishes for the same MAC collapse to a single pending entry
|
||||
/// per SPEC.md §5.1.
|
||||
/// </summary>
|
||||
public sealed class MqttPublisher : BackgroundService
|
||||
{
|
||||
@@ -20,6 +27,10 @@ public sealed class MqttPublisher : BackgroundService
|
||||
private readonly ILogger<MqttPublisher> _logger;
|
||||
private readonly IMqttClient _client;
|
||||
|
||||
private readonly ConcurrentDictionary<MacAddress, byte> _pending = new();
|
||||
private readonly Channel<bool> _wakeup = Channel.CreateBounded<bool>(
|
||||
new BoundedChannelOptions(1) { FullMode = BoundedChannelFullMode.DropWrite });
|
||||
|
||||
public MqttPublisher(IOptions<MqttOptions> options, ILogger<MqttPublisher> logger)
|
||||
{
|
||||
_options = options;
|
||||
@@ -52,6 +63,10 @@ public sealed class MqttPublisher : BackgroundService
|
||||
}
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
var retryLoop = RunRetryLoopAsync(stoppingToken);
|
||||
|
||||
try
|
||||
{
|
||||
while (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
@@ -76,6 +91,8 @@ public sealed class MqttPublisher : BackgroundService
|
||||
"MQTT reconnected to {Host}:{Port}",
|
||||
_options.Value.Host,
|
||||
_options.Value.Port);
|
||||
// Drain any queued publishes immediately now that the link is back up.
|
||||
_wakeup.Writer.TryWrite(true);
|
||||
}
|
||||
catch (Exception ex) when (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
@@ -87,6 +104,11 @@ public sealed class MqttPublisher : BackgroundService
|
||||
}
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
await retryLoop.ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Publish an <c>update</c> notification for the given frame. Returns
|
||||
@@ -94,11 +116,30 @@ public sealed class MqttPublisher : BackgroundService
|
||||
/// transport fault) — per SPEC.md §3.5, publish failure must not fail the upload,
|
||||
/// so this method never throws on MQTT errors. <see cref="OperationCanceledException"/>
|
||||
/// still propagates so callers can honor cooperative cancellation.
|
||||
///
|
||||
/// On failure, the MAC is enqueued for background retry (SPEC.md §5.1).
|
||||
/// </summary>
|
||||
public async Task<PublishResult> PublishAsync(MacAddress mac, CancellationToken cancellationToken)
|
||||
{
|
||||
var result = await PublishCoreAsync(mac, cancellationToken).ConfigureAwait(false);
|
||||
if (result == PublishResult.Failure)
|
||||
{
|
||||
EnqueueRetry(mac);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private async Task<PublishResult> PublishCoreAsync(MacAddress mac, CancellationToken cancellationToken)
|
||||
{
|
||||
var opts = _options.Value;
|
||||
var topic = $"{opts.BaseTopic}/{mac}";
|
||||
|
||||
if (!_client.IsConnected)
|
||||
{
|
||||
_logger.LogWarning("MQTT publish to {Topic} skipped — client not connected", topic);
|
||||
return PublishResult.Failure;
|
||||
}
|
||||
|
||||
var message = new MqttApplicationMessageBuilder()
|
||||
.WithTopic(topic)
|
||||
.WithPayload("update")
|
||||
@@ -125,6 +166,94 @@ public sealed class MqttPublisher : BackgroundService
|
||||
}
|
||||
}
|
||||
|
||||
private void EnqueueRetry(MacAddress mac)
|
||||
{
|
||||
_pending[mac] = 0;
|
||||
_wakeup.Writer.TryWrite(true);
|
||||
}
|
||||
|
||||
private async Task RunRetryLoopAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
var backoff = _options.Value.RetryBackoffSeconds;
|
||||
var backoffIndex = 0;
|
||||
|
||||
while (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (_pending.IsEmpty)
|
||||
{
|
||||
// Idle: wait indefinitely for an enqueue or a reconnect signal.
|
||||
await _wakeup.Reader.ReadAsync(stoppingToken).ConfigureAwait(false);
|
||||
backoffIndex = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
var delaySeconds = backoff[Math.Min(backoffIndex, backoff.Length - 1)];
|
||||
using var delayCts = CancellationTokenSource.CreateLinkedTokenSource(stoppingToken);
|
||||
delayCts.CancelAfter(TimeSpan.FromSeconds(delaySeconds));
|
||||
try
|
||||
{
|
||||
// A signal during the backoff (new failure, reconnect) wakes us early.
|
||||
await _wakeup.Reader.ReadAsync(delayCts.Token).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) when (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
// Backoff window elapsed; fall through to attempt.
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var anyAttempted = false;
|
||||
var anyFailed = false;
|
||||
foreach (var mac in _pending.Keys.ToArray())
|
||||
{
|
||||
if (!_client.IsConnected)
|
||||
{
|
||||
// Wait for the reconnect loop to bring the link back up before retrying.
|
||||
anyFailed = true;
|
||||
break;
|
||||
}
|
||||
|
||||
anyAttempted = true;
|
||||
var result = await PublishCoreAsync(mac, stoppingToken).ConfigureAwait(false);
|
||||
if (result == PublishResult.Success)
|
||||
{
|
||||
_pending.TryRemove(mac, out _);
|
||||
}
|
||||
else
|
||||
{
|
||||
anyFailed = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!anyAttempted || anyFailed)
|
||||
{
|
||||
if (!_pending.IsEmpty)
|
||||
{
|
||||
backoffIndex = Math.Min(backoffIndex + 1, backoff.Length - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
backoffIndex = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
backoffIndex = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public override async Task StopAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
await base.StopAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
Reference in New Issue
Block a user