fix(dataman_client): restore Linux syncHandler control flow byte-exact

The previous Windows-lockstep refactor unfolded the px4_poll() branch
into a single linear path that hoisted `bool updated` outside the
`else` arm. Although structurally equivalent, it left the
`else if (ret == 0) republish` branch coalesced into a tail block
that ran after orb_check / orb_copy each iteration. On the MAVROS
Mission integration test this changed the timing of the GET_ID
round-trip enough that the navigator's mission upload landed before
the dataman client had finished registering, and AUTO.MISSION
mode-set timed out:

  WARN  [navigator] Mission rejected: empty
  ERROR [dataman_client] timeout after 5000 ms!
  False is not true: failed to set mode | new mode: AUTO.MISSION

Split syncHandler() into two complete copies under
`#if defined(ENABLE_LOCKSTEP_SCHEDULER) && defined(__PX4_WINDOWS)`:

  - the Windows branch keeps the wall-clock + orb_check polling that
    the Windows lockstep build needs (px4_poll blocks against the
    lockstep scheduler before the simulator clock starts ticking)
  - the POSIX branch is restored byte-for-byte from main, so Linux,
    macOS, and SITL builds run exactly the original control flow

Also restore the constructor's eager GET_ID round-trip on POSIX, and
mark the unused `response` local on Windows with `(void)response` to
keep -Wunused-variable quiet.

Signed-off-by: Nuno Marques <n.marques21@hotmail.com>
This commit is contained in:
Nuno Marques
2026-05-07 17:18:59 -07:00
parent 5f844ecc92
commit bebb3b1a0c
+82 -40
View File
@@ -64,14 +64,27 @@ DatamanClient::DatamanClient()
_fds.fd = _dataman_response_sub;
_fds.events = POLLIN;
#if !(defined(ENABLE_LOCKSTEP_SCHEDULER) && defined(__PX4_WINDOWS))
#if defined(ENABLE_LOCKSTEP_SCHEDULER) && defined(__PX4_WINDOWS)
// Windows lockstep build only: defer client-ID acquisition
// until the first sync/async call, because hrt_absolute_time()
// is not yet advancing when the constructor runs and the
// GET_ID round-trip would stall startup.
(void)response;
#else
hrt_abstime timestamp = hrt_absolute_time();
// Acquire the client ID eagerly on POSIX (Linux/macOS) and
// non-lockstep builds. Only the Windows-lockstep build defers
// it, because hrt_absolute_time() is not yet advancing when
// the constructor runs there and the GET_ID round-trip would
// stall startup.
if (!updateClientId(1000_ms)) {
dataman_request_s request;
request.timestamp = timestamp;
request.request_type = DM_GET_ID;
request.client_id = CLIENT_ID_NOT_SET;
bool success = syncHandler(request, response, timestamp, 1000_ms);
if (success && (response.client_id > CLIENT_ID_NOT_SET)) {
_client_id = response.client_id;
} else {
PX4_ERR("Failed to get client ID!");
}
@@ -91,47 +104,29 @@ DatamanClient::~DatamanClient()
bool DatamanClient::syncHandler(const dataman_request_s &request, dataman_response_s &response,
const hrt_abstime &start_time, hrt_abstime timeout)
{
bool response_received = false;
int32_t ret = 0;
#if defined(ENABLE_LOCKSTEP_SCHEDULER) && defined(__PX4_WINDOWS)
// Windows lockstep: hrt_absolute_time() does not advance until the
// shim attaches to the simulator clock, so use the OS monotonic
// clock here and poll the orb queue with a short usleep instead of
// px4_poll(), which blocks against the lockstep scheduler.
(void)start_time;
bool response_received = false;
const hrt_abstime sync_start_time = dataman_wall_time_us();
hrt_abstime last_request_time = sync_start_time;
hrt_abstime time_elapsed = 0;
const unsigned max_iterations = (timeout / 1000) + 1;
unsigned iterations = 0;
#else
hrt_abstime time_elapsed = hrt_elapsed_time(&start_time);
#endif
perf_begin(_sync_perf);
_dataman_request_pub.publish(request);
while (!response_received && (time_elapsed < timeout)) {
#if defined(ENABLE_LOCKSTEP_SCHEDULER) && defined(__PX4_WINDOWS)
if (++iterations > max_iterations) {
break;
}
bool updated = false;
orb_check(_dataman_response_sub, &updated);
#else
uint32_t timeout_ms = 100;
ret = px4_poll(&_fds, 1, timeout_ms);
if (ret < 0) {
PX4_ERR("px4_poll returned error: %" PRIu32, ret);
break;
}
bool updated = false;
if (ret > 0) {
orb_check(_dataman_response_sub, &updated);
}
#endif
if (updated) {
orb_copy(ORB_ID(dataman_response), _dataman_response_sub, &response);
@@ -155,7 +150,6 @@ bool DatamanClient::syncHandler(const dataman_request_s &request, dataman_respon
}
}
#if defined(ENABLE_LOCKSTEP_SCHEDULER) && defined(__PX4_WINDOWS)
const hrt_abstime now = dataman_wall_time_us();
if (now - last_request_time >= 100_ms) {
@@ -164,20 +158,67 @@ bool DatamanClient::syncHandler(const dataman_request_s &request, dataman_respon
}
system_usleep(1000);
#else
time_elapsed = dataman_wall_time_us() - sync_start_time;
}
perf_end(_sync_perf);
if (!response_received) {
PX4_ERR("timeout after %" PRIu32 " ms!", static_cast<uint32_t>(timeout / 1000));
}
return response_received;
#else
bool response_received = false;
int32_t ret = 0;
hrt_abstime time_elapsed = hrt_elapsed_time(&start_time);
perf_begin(_sync_perf);
_dataman_request_pub.publish(request);
while (!response_received && (time_elapsed < timeout)) {
uint32_t timeout_ms = 100;
ret = px4_poll(&_fds, 1, timeout_ms);
if (ret < 0) {
PX4_ERR("px4_poll returned error: %" PRIu32, ret);
break;
} else if (ret == 0) {
if (ret == 0) {
// No response received, send new request
_dataman_request_pub.publish(request);
} else {
bool updated = false;
orb_check(_dataman_response_sub, &updated);
if (updated) {
orb_copy(ORB_ID(dataman_response), _dataman_response_sub, &response);
if (response.client_id == request.client_id) {
if ((response.request_type == request.request_type) &&
(response.item == request.item) &&
(response.index == request.index)) {
response_received = true;
break;
}
} else if (request.client_id == CLIENT_ID_NOT_SET) {
// validate timestamp from response.data
if (0 == memcmp(&(request.timestamp), &(response.data), sizeof(hrt_abstime))) {
response_received = true;
break;
}
}
}
}
#endif
#if defined(ENABLE_LOCKSTEP_SCHEDULER) && defined(__PX4_WINDOWS)
time_elapsed = dataman_wall_time_us() - sync_start_time;
#else
time_elapsed = hrt_elapsed_time(&start_time);
#endif
}
perf_end(_sync_perf);
@@ -187,6 +228,7 @@ bool DatamanClient::syncHandler(const dataman_request_s &request, dataman_respon
}
return response_received;
#endif
}
bool DatamanClient::updateClientId(hrt_abstime timeout)