6.5 Background retry queue
On publish failure, enqueue the MAC in a dedup'd in-memory set and let a background loop retry with the configured backoff sequence. Reconnects signal an immediate drain so a transient broker outage no longer drops the update. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -167,7 +167,7 @@ Each type lives in `src/FrameProcessor/Domain/`. Tests in `tests/FrameProcessor.
|
|||||||
### [x] 6.4 `/health` reports MQTT status
|
### [x] 6.4 `/health` reports MQTT status
|
||||||
- Replace hardcoded `mqttConnected` with `MqttPublisher.IsConnected`.
|
- Replace hardcoded `mqttConnected` with `MqttPublisher.IsConnected`.
|
||||||
|
|
||||||
### [ ] 6.5 Background retry queue
|
### [x] 6.5 Background retry queue
|
||||||
- In-memory `Channel<MacAddress>` (one slot per frame; newer publish supersedes older — per `SPEC.md` §5.1 "Multiple queued publishes for the same frame collapse to the most recent one").
|
- In-memory `Channel<MacAddress>` (one slot per frame; newer publish supersedes older — per `SPEC.md` §5.1 "Multiple queued publishes for the same frame collapse to the most recent one").
|
||||||
- Background loop drains with backoff sequence from `MqttOptions.RetryBackoffSeconds`.
|
- Background loop drains with backoff sequence from `MqttOptions.RetryBackoffSeconds`.
|
||||||
- On reconnect, drain immediately.
|
- On reconnect, drain immediately.
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
using System.Collections.Concurrent;
|
||||||
|
using System.Threading.Channels;
|
||||||
using FrameProcessor.Configuration;
|
using FrameProcessor.Configuration;
|
||||||
using FrameProcessor.Domain;
|
using FrameProcessor.Domain;
|
||||||
using Microsoft.Extensions.Options;
|
using Microsoft.Extensions.Options;
|
||||||
@@ -11,6 +13,11 @@ namespace FrameProcessor.Mqtt;
|
|||||||
/// Persistent MQTT client wrapped in a hosted service. Connects on startup and
|
/// Persistent MQTT client wrapped in a hosted service. Connects on startup and
|
||||||
/// reconnects in the background after disconnects. Connection state is surfaced
|
/// reconnects in the background after disconnects. Connection state is surfaced
|
||||||
/// via <see cref="IsConnected"/> for the <c>/health</c> endpoint.
|
/// via <see cref="IsConnected"/> for the <c>/health</c> endpoint.
|
||||||
|
///
|
||||||
|
/// On publish failure the (mac → topic, payload) pair is enqueued for retry; a
|
||||||
|
/// background loop drains the queue using <see cref="MqttOptions.RetryBackoffSeconds"/>.
|
||||||
|
/// Multiple failed publishes for the same MAC collapse to a single pending entry
|
||||||
|
/// per SPEC.md §5.1.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public sealed class MqttPublisher : BackgroundService
|
public sealed class MqttPublisher : BackgroundService
|
||||||
{
|
{
|
||||||
@@ -20,6 +27,10 @@ public sealed class MqttPublisher : BackgroundService
|
|||||||
private readonly ILogger<MqttPublisher> _logger;
|
private readonly ILogger<MqttPublisher> _logger;
|
||||||
private readonly IMqttClient _client;
|
private readonly IMqttClient _client;
|
||||||
|
|
||||||
|
private readonly ConcurrentDictionary<MacAddress, byte> _pending = new();
|
||||||
|
private readonly Channel<bool> _wakeup = Channel.CreateBounded<bool>(
|
||||||
|
new BoundedChannelOptions(1) { FullMode = BoundedChannelFullMode.DropWrite });
|
||||||
|
|
||||||
public MqttPublisher(IOptions<MqttOptions> options, ILogger<MqttPublisher> logger)
|
public MqttPublisher(IOptions<MqttOptions> options, ILogger<MqttPublisher> logger)
|
||||||
{
|
{
|
||||||
_options = options;
|
_options = options;
|
||||||
@@ -52,6 +63,10 @@ public sealed class MqttPublisher : BackgroundService
|
|||||||
}
|
}
|
||||||
|
|
||||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||||
|
{
|
||||||
|
var retryLoop = RunRetryLoopAsync(stoppingToken);
|
||||||
|
|
||||||
|
try
|
||||||
{
|
{
|
||||||
while (!stoppingToken.IsCancellationRequested)
|
while (!stoppingToken.IsCancellationRequested)
|
||||||
{
|
{
|
||||||
@@ -76,6 +91,8 @@ public sealed class MqttPublisher : BackgroundService
|
|||||||
"MQTT reconnected to {Host}:{Port}",
|
"MQTT reconnected to {Host}:{Port}",
|
||||||
_options.Value.Host,
|
_options.Value.Host,
|
||||||
_options.Value.Port);
|
_options.Value.Port);
|
||||||
|
// Drain any queued publishes immediately now that the link is back up.
|
||||||
|
_wakeup.Writer.TryWrite(true);
|
||||||
}
|
}
|
||||||
catch (Exception ex) when (!stoppingToken.IsCancellationRequested)
|
catch (Exception ex) when (!stoppingToken.IsCancellationRequested)
|
||||||
{
|
{
|
||||||
@@ -87,6 +104,11 @@ public sealed class MqttPublisher : BackgroundService
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
finally
|
||||||
|
{
|
||||||
|
await retryLoop.ConfigureAwait(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Publish an <c>update</c> notification for the given frame. Returns
|
/// Publish an <c>update</c> notification for the given frame. Returns
|
||||||
@@ -94,11 +116,30 @@ public sealed class MqttPublisher : BackgroundService
|
|||||||
/// transport fault) — per SPEC.md §3.5, publish failure must not fail the upload,
|
/// transport fault) — per SPEC.md §3.5, publish failure must not fail the upload,
|
||||||
/// so this method never throws on MQTT errors. <see cref="OperationCanceledException"/>
|
/// so this method never throws on MQTT errors. <see cref="OperationCanceledException"/>
|
||||||
/// still propagates so callers can honor cooperative cancellation.
|
/// still propagates so callers can honor cooperative cancellation.
|
||||||
|
///
|
||||||
|
/// On failure, the MAC is enqueued for background retry (SPEC.md §5.1).
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public async Task<PublishResult> PublishAsync(MacAddress mac, CancellationToken cancellationToken)
|
public async Task<PublishResult> PublishAsync(MacAddress mac, CancellationToken cancellationToken)
|
||||||
|
{
|
||||||
|
var result = await PublishCoreAsync(mac, cancellationToken).ConfigureAwait(false);
|
||||||
|
if (result == PublishResult.Failure)
|
||||||
|
{
|
||||||
|
EnqueueRetry(mac);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task<PublishResult> PublishCoreAsync(MacAddress mac, CancellationToken cancellationToken)
|
||||||
{
|
{
|
||||||
var opts = _options.Value;
|
var opts = _options.Value;
|
||||||
var topic = $"{opts.BaseTopic}/{mac}";
|
var topic = $"{opts.BaseTopic}/{mac}";
|
||||||
|
|
||||||
|
if (!_client.IsConnected)
|
||||||
|
{
|
||||||
|
_logger.LogWarning("MQTT publish to {Topic} skipped — client not connected", topic);
|
||||||
|
return PublishResult.Failure;
|
||||||
|
}
|
||||||
|
|
||||||
var message = new MqttApplicationMessageBuilder()
|
var message = new MqttApplicationMessageBuilder()
|
||||||
.WithTopic(topic)
|
.WithTopic(topic)
|
||||||
.WithPayload("update")
|
.WithPayload("update")
|
||||||
@@ -125,6 +166,94 @@ public sealed class MqttPublisher : BackgroundService
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void EnqueueRetry(MacAddress mac)
|
||||||
|
{
|
||||||
|
_pending[mac] = 0;
|
||||||
|
_wakeup.Writer.TryWrite(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task RunRetryLoopAsync(CancellationToken stoppingToken)
|
||||||
|
{
|
||||||
|
var backoff = _options.Value.RetryBackoffSeconds;
|
||||||
|
var backoffIndex = 0;
|
||||||
|
|
||||||
|
while (!stoppingToken.IsCancellationRequested)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
if (_pending.IsEmpty)
|
||||||
|
{
|
||||||
|
// Idle: wait indefinitely for an enqueue or a reconnect signal.
|
||||||
|
await _wakeup.Reader.ReadAsync(stoppingToken).ConfigureAwait(false);
|
||||||
|
backoffIndex = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
var delaySeconds = backoff[Math.Min(backoffIndex, backoff.Length - 1)];
|
||||||
|
using var delayCts = CancellationTokenSource.CreateLinkedTokenSource(stoppingToken);
|
||||||
|
delayCts.CancelAfter(TimeSpan.FromSeconds(delaySeconds));
|
||||||
|
try
|
||||||
|
{
|
||||||
|
// A signal during the backoff (new failure, reconnect) wakes us early.
|
||||||
|
await _wakeup.Reader.ReadAsync(delayCts.Token).ConfigureAwait(false);
|
||||||
|
}
|
||||||
|
catch (OperationCanceledException) when (!stoppingToken.IsCancellationRequested)
|
||||||
|
{
|
||||||
|
// Backoff window elapsed; fall through to attempt.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (OperationCanceledException)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stoppingToken.IsCancellationRequested)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
var anyAttempted = false;
|
||||||
|
var anyFailed = false;
|
||||||
|
foreach (var mac in _pending.Keys.ToArray())
|
||||||
|
{
|
||||||
|
if (!_client.IsConnected)
|
||||||
|
{
|
||||||
|
// Wait for the reconnect loop to bring the link back up before retrying.
|
||||||
|
anyFailed = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
anyAttempted = true;
|
||||||
|
var result = await PublishCoreAsync(mac, stoppingToken).ConfigureAwait(false);
|
||||||
|
if (result == PublishResult.Success)
|
||||||
|
{
|
||||||
|
_pending.TryRemove(mac, out _);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
anyFailed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!anyAttempted || anyFailed)
|
||||||
|
{
|
||||||
|
if (!_pending.IsEmpty)
|
||||||
|
{
|
||||||
|
backoffIndex = Math.Min(backoffIndex + 1, backoff.Length - 1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
backoffIndex = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
backoffIndex = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public override async Task StopAsync(CancellationToken cancellationToken)
|
public override async Task StopAsync(CancellationToken cancellationToken)
|
||||||
{
|
{
|
||||||
await base.StopAsync(cancellationToken).ConfigureAwait(false);
|
await base.StopAsync(cancellationToken).ConfigureAwait(false);
|
||||||
|
|||||||
Reference in New Issue
Block a user