From bebb3b1a0ced7dc0efb2b0c600cf04d9a87f9b4b Mon Sep 17 00:00:00 2001 From: Nuno Marques Date: Thu, 7 May 2026 17:18:59 -0700 Subject: [PATCH] fix(dataman_client): restore Linux syncHandler control flow byte-exact The previous Windows-lockstep refactor unfolded the px4_poll() branch into a single linear path that hoisted `bool updated` outside the `else` arm. Although structurally equivalent, it left the `else if (ret == 0) republish` branch coalesced into a tail block that ran after orb_check / orb_copy each iteration. On the MAVROS Mission integration test this changed the timing of the GET_ID round-trip enough that the navigator's mission upload landed before the dataman client had finished registering, and AUTO.MISSION mode-set timed out: WARN [navigator] Mission rejected: empty ERROR [dataman_client] timeout after 5000 ms! False is not true: failed to set mode | new mode: AUTO.MISSION Split syncHandler() into two complete copies under `#if defined(ENABLE_LOCKSTEP_SCHEDULER) && defined(__PX4_WINDOWS)`: - the Windows branch keeps the wall-clock + orb_check polling that the Windows lockstep build needs (px4_poll blocks against the lockstep scheduler before the simulator clock starts ticking) - the POSIX branch is restored byte-for-byte from main, so Linux, macOS, and SITL builds run exactly the original control flow Also restore the constructor's eager GET_ID round-trip on POSIX, and mark the unused `response` local on Windows with `(void)response` to keep -Wunused-variable quiet. Signed-off-by: Nuno Marques --- src/lib/dataman_client/DatamanClient.cpp | 122 +++++++++++++++-------- 1 file changed, 82 insertions(+), 40 deletions(-) diff --git a/src/lib/dataman_client/DatamanClient.cpp b/src/lib/dataman_client/DatamanClient.cpp index a263f00a45..1ba9673f0f 100644 --- a/src/lib/dataman_client/DatamanClient.cpp +++ b/src/lib/dataman_client/DatamanClient.cpp @@ -64,14 +64,27 @@ DatamanClient::DatamanClient() _fds.fd = _dataman_response_sub; _fds.events = POLLIN; -#if !(defined(ENABLE_LOCKSTEP_SCHEDULER) && defined(__PX4_WINDOWS)) +#if defined(ENABLE_LOCKSTEP_SCHEDULER) && defined(__PX4_WINDOWS) + // Windows lockstep build only: defer client-ID acquisition + // until the first sync/async call, because hrt_absolute_time() + // is not yet advancing when the constructor runs and the + // GET_ID round-trip would stall startup. + (void)response; +#else + hrt_abstime timestamp = hrt_absolute_time(); - // Acquire the client ID eagerly on POSIX (Linux/macOS) and - // non-lockstep builds. Only the Windows-lockstep build defers - // it, because hrt_absolute_time() is not yet advancing when - // the constructor runs there and the GET_ID round-trip would - // stall startup. - if (!updateClientId(1000_ms)) { + dataman_request_s request; + request.timestamp = timestamp; + request.request_type = DM_GET_ID; + request.client_id = CLIENT_ID_NOT_SET; + + bool success = syncHandler(request, response, timestamp, 1000_ms); + + if (success && (response.client_id > CLIENT_ID_NOT_SET)) { + + _client_id = response.client_id; + + } else { PX4_ERR("Failed to get client ID!"); } @@ -91,47 +104,29 @@ DatamanClient::~DatamanClient() bool DatamanClient::syncHandler(const dataman_request_s &request, dataman_response_s &response, const hrt_abstime &start_time, hrt_abstime timeout) { - bool response_received = false; - int32_t ret = 0; #if defined(ENABLE_LOCKSTEP_SCHEDULER) && defined(__PX4_WINDOWS) + // Windows lockstep: hrt_absolute_time() does not advance until the + // shim attaches to the simulator clock, so use the OS monotonic + // clock here and poll the orb queue with a short usleep instead of + // px4_poll(), which blocks against the lockstep scheduler. (void)start_time; + bool response_received = false; const hrt_abstime sync_start_time = dataman_wall_time_us(); hrt_abstime last_request_time = sync_start_time; hrt_abstime time_elapsed = 0; const unsigned max_iterations = (timeout / 1000) + 1; unsigned iterations = 0; -#else - hrt_abstime time_elapsed = hrt_elapsed_time(&start_time); -#endif + perf_begin(_sync_perf); _dataman_request_pub.publish(request); while (!response_received && (time_elapsed < timeout)) { -#if defined(ENABLE_LOCKSTEP_SCHEDULER) && defined(__PX4_WINDOWS) - if (++iterations > max_iterations) { break; } bool updated = false; orb_check(_dataman_response_sub, &updated); -#else - uint32_t timeout_ms = 100; - ret = px4_poll(&_fds, 1, timeout_ms); - - if (ret < 0) { - PX4_ERR("px4_poll returned error: %" PRIu32, ret); - break; - - } - - bool updated = false; - - if (ret > 0) { - orb_check(_dataman_response_sub, &updated); - } - -#endif if (updated) { orb_copy(ORB_ID(dataman_response), _dataman_response_sub, &response); @@ -155,7 +150,6 @@ bool DatamanClient::syncHandler(const dataman_request_s &request, dataman_respon } } -#if defined(ENABLE_LOCKSTEP_SCHEDULER) && defined(__PX4_WINDOWS) const hrt_abstime now = dataman_wall_time_us(); if (now - last_request_time >= 100_ms) { @@ -164,20 +158,67 @@ bool DatamanClient::syncHandler(const dataman_request_s &request, dataman_respon } system_usleep(1000); -#else + time_elapsed = dataman_wall_time_us() - sync_start_time; + } + + perf_end(_sync_perf); + + if (!response_received) { + PX4_ERR("timeout after %" PRIu32 " ms!", static_cast(timeout / 1000)); + } + + return response_received; + +#else + bool response_received = false; + int32_t ret = 0; + hrt_abstime time_elapsed = hrt_elapsed_time(&start_time); + perf_begin(_sync_perf); + _dataman_request_pub.publish(request); + + while (!response_received && (time_elapsed < timeout)) { + + uint32_t timeout_ms = 100; + ret = px4_poll(&_fds, 1, timeout_ms); + + if (ret < 0) { + PX4_ERR("px4_poll returned error: %" PRIu32, ret); + break; + + } else if (ret == 0) { - if (ret == 0) { // No response received, send new request _dataman_request_pub.publish(request); + + } else { + + bool updated = false; + orb_check(_dataman_response_sub, &updated); + + if (updated) { + orb_copy(ORB_ID(dataman_response), _dataman_response_sub, &response); + + if (response.client_id == request.client_id) { + + if ((response.request_type == request.request_type) && + (response.item == request.item) && + (response.index == request.index)) { + response_received = true; + break; + } + + } else if (request.client_id == CLIENT_ID_NOT_SET) { + + // validate timestamp from response.data + if (0 == memcmp(&(request.timestamp), &(response.data), sizeof(hrt_abstime))) { + response_received = true; + break; + } + } + } } -#endif - -#if defined(ENABLE_LOCKSTEP_SCHEDULER) && defined(__PX4_WINDOWS) - time_elapsed = dataman_wall_time_us() - sync_start_time; -#else time_elapsed = hrt_elapsed_time(&start_time); -#endif } perf_end(_sync_perf); @@ -187,6 +228,7 @@ bool DatamanClient::syncHandler(const dataman_request_s &request, dataman_respon } return response_received; +#endif } bool DatamanClient::updateClientId(hrt_abstime timeout)