mirror of
https://github.com/PX4/PX4-Autopilot.git
synced 2026-05-26 09:26:25 +08:00
feat(posix): tune Windows sleep primitives
Add host-calibrated sleep and spin-tail handling for Windows SITL, plus scheduler and clock helpers needed to keep short lockstep waits precise under MSVC and MinGW. Signed-off-by: Nuno Marques <n.marques21@hotmail.com>
This commit is contained in:
@@ -178,6 +178,11 @@ int pthread_cond_signal(pthread_cond_t *cond);
|
||||
int pthread_cond_broadcast(pthread_cond_t *cond);
|
||||
/** @} */
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
typedef void (*px4_pthread_cond_notify_callback_t)(pthread_cond_t *cond, int broadcast);
|
||||
int px4_pthread_cond_set_notify_callback(px4_pthread_cond_notify_callback_t callback);
|
||||
#endif
|
||||
|
||||
/** @name Thread lifecycle functions
|
||||
*
|
||||
* Wrap CreateThread/WaitForSingleObject/CloseHandle with pthread-compatible
|
||||
@@ -191,6 +196,8 @@ int pthread_detach(pthread_t thread);
|
||||
void pthread_exit(void *value_ptr);
|
||||
pthread_t pthread_self(void);
|
||||
int pthread_equal(pthread_t t1, pthread_t t2);
|
||||
int pthread_getschedparam(pthread_t thread, int *policy, struct sched_param *param);
|
||||
int pthread_setschedparam(pthread_t thread, int policy, const struct sched_param *param);
|
||||
int pthread_cancel(pthread_t thread);
|
||||
int pthread_kill(pthread_t thread, int sig);
|
||||
/** @} */
|
||||
@@ -244,3 +251,17 @@ int pthread_getname_np(pthread_t thread, char *name, size_t len);
|
||||
#ifndef PTHREAD_STACK_MIN
|
||||
#define PTHREAD_STACK_MIN 16384
|
||||
#endif
|
||||
|
||||
#if (defined(__PX4_WINDOWS) || defined(_WIN32)) && !defined(_MSC_VER) && \
|
||||
(defined(ENABLE_LOCKSTEP_SCHEDULER) || defined(PX4_WINDOWS_PTHREAD_LOCKSTEP_BRIDGE))
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int px4_lockstep_pthread_cond_signal(pthread_cond_t *cond);
|
||||
int px4_lockstep_pthread_cond_broadcast(pthread_cond_t *cond);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#define pthread_cond_signal(cond_) px4_lockstep_pthread_cond_signal(cond_)
|
||||
#define pthread_cond_broadcast(cond_) px4_lockstep_pthread_cond_broadcast(cond_)
|
||||
#endif
|
||||
|
||||
@@ -67,12 +67,14 @@ typedef int clockid_t;
|
||||
#ifndef CLOCK_MONOTONIC_COARSE
|
||||
#define CLOCK_MONOTONIC_COARSE CLOCK_MONOTONIC
|
||||
#endif
|
||||
#ifndef _TIMEVAL_DEFINED
|
||||
#define _TIMEVAL_DEFINED
|
||||
struct timeval {
|
||||
long tv_sec;
|
||||
long tv_usec;
|
||||
};
|
||||
/* The MSVC SDK declares `struct timeval` only inside <winsock.h> /
|
||||
* <winsock2.h>, and does so unconditionally — no header guard. Pull it
|
||||
* from there so any later <sys/socket.h>-via-<winsock2.h> include doesn't
|
||||
* trigger a "redefinition" (C2011). NOMINMAX / WIN32_LEAN_AND_MEAN are
|
||||
* already in effect via the SITL compile flags, so the cost here is
|
||||
* mostly the winsock typedefs. */
|
||||
#ifndef _WINSOCK2API_
|
||||
#include <winsock2.h>
|
||||
#endif
|
||||
#else
|
||||
#include_next <time.h>
|
||||
|
||||
@@ -46,6 +46,15 @@
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#include <sys/types.h>
|
||||
#elif defined(_WIN32)
|
||||
/*
|
||||
* MinGW declares its own usleep() in <unistd.h>. Pull the rest of that
|
||||
* header through normally, but hide only that declaration so PX4 can provide
|
||||
* the same high-resolution Windows implementation for system_usleep.
|
||||
*/
|
||||
#define usleep _px4_mingw_runtime_usleep
|
||||
#include_next <unistd.h>
|
||||
#undef usleep
|
||||
#else
|
||||
#include_next <unistd.h>
|
||||
#endif
|
||||
@@ -115,14 +124,258 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32)
|
||||
/* CREATE_WAITABLE_TIMER_HIGH_RESOLUTION (Windows 10 1803+; build 17134)
|
||||
* may not be defined in older SDK headers - fall back to the literal
|
||||
* value documented by Microsoft. Same for the manual-reset flag. */
|
||||
#ifndef CREATE_WAITABLE_TIMER_MANUAL_RESET
|
||||
#define CREATE_WAITABLE_TIMER_MANUAL_RESET 0x00000001
|
||||
#endif
|
||||
#ifndef CREATE_WAITABLE_TIMER_HIGH_RESOLUTION
|
||||
#define CREATE_WAITABLE_TIMER_HIGH_RESOLUTION 0x00000002
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
/** @brief Sleep for at least @p usec microseconds using Windows Sleep(). */
|
||||
#define PX4_WINDOWS_SLEEP_TLS __declspec(thread)
|
||||
#else
|
||||
#define PX4_WINDOWS_SLEEP_TLS __thread
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Runtime-tuned thresholds that drive the spin-residual hybrid below.
|
||||
*
|
||||
* Defined and (optionally) auto-calibrated by
|
||||
* px4_windows_calibrate_usleep_threshold() in
|
||||
* platforms/posix/src/px4/windows/runtime/init.cpp. The calibration runs
|
||||
* immediately after timeBeginPeriod(1), before any module thread starts
|
||||
* calling usleep(), so the first usleep() the process performs already
|
||||
* sees the tuned value.
|
||||
*
|
||||
* Override at process startup with the PX4_USLEEP_SPIN_US environment
|
||||
* variable (clamped to [0, 50000] microseconds). Values <= 50000 are
|
||||
* accepted; 0 effectively forces every wait > 0 us through the timer +
|
||||
* spin-tail path.
|
||||
*
|
||||
* @c g_usleep_spin_tail_us is the *upper bound* of the QPC spin closing
|
||||
* the residual after the high-resolution waitable timer wakes. The
|
||||
* adaptive controller in usleep() shrinks the tail per-thread toward
|
||||
* the observed timer overshoot via an EWMA, so a quiet host pays only
|
||||
* ~p95 jitter of CPU spin per call instead of the worst-case bound.
|
||||
*/
|
||||
extern long g_usleep_pure_spin_us;
|
||||
extern long g_usleep_spin_tail_us;
|
||||
|
||||
/* Floor for the per-thread adaptive spin tail. Initialised by the
|
||||
* calibration routine to the host-measured P95 waitable-timer jitter so
|
||||
* the controller never trims the spin below the value we already know
|
||||
* is needed to cover this host's observed long-tail wakes. Defaults to
|
||||
* a conservative 700 us when calibration cannot run. */
|
||||
extern long g_usleep_adaptive_min_tail_us;
|
||||
|
||||
/**
|
||||
* @brief Sleep for at least @p usec microseconds with microsecond accuracy.
|
||||
*
|
||||
* Windows Sleep() is quantized to the system timer tick (~15.6 ms by
|
||||
* default; 1 ms after timeBeginPeriod(1) in init.cpp). A 4 ms sleep
|
||||
* therefore rounds up to a full HPET tick, throttling SITL sim time.
|
||||
*
|
||||
* The naive Sleep() path loses ~10 % of wall time. A pure HPET-backed
|
||||
* waitable timer wakes within 0.3 - 0.7 ms of the target on a quiet
|
||||
* system but quantizes to 1 ms under load, so a tight SITL producer
|
||||
* (250 Hz - 1 kHz lockstep loop) accumulates 5 - 10 % drift.
|
||||
*
|
||||
* The current implementation is a spin-residual hybrid:
|
||||
*
|
||||
* - Requests <= @c g_usleep_pure_spin_us are held entirely on the QPC
|
||||
* deadline. This covers SIH's normal lockstep wall-sleep cadence
|
||||
* (200 Hz - 2 kHz, 500 - 5000 us). Even a single 0.5 - 1 ms
|
||||
* scheduler-late wake in that band becomes visible as sim/wall
|
||||
* drift, so the short simulation waits pay CPU for determinism.
|
||||
*
|
||||
* - For requests > @c g_usleep_pure_spin_us the bulk of the wait runs
|
||||
* on a high-resolution waitable timer (CreateWaitableTimerExW +
|
||||
* CREATE_WAITABLE_TIMER_HIGH_RESOLUTION, Windows 10 1803+). The
|
||||
* timer is armed to wake @c g_usleep_spin_tail_us microseconds
|
||||
* early and the residual is closed by a QueryPerformanceCounter
|
||||
* busy-loop. This trades ~g_usleep_spin_tail_us of CPU per call for
|
||||
* microsecond-scale accuracy against the absolute QPC target.
|
||||
*
|
||||
* The HANDLE is cached per-thread in compiler-native TLS so we pay one
|
||||
* CreateWaitableTimerExW per thread for the lifetime of the process.
|
||||
*/
|
||||
static inline int usleep(useconds_t usec)
|
||||
{
|
||||
Sleep((DWORD)((usec + 999U) / 1000U));
|
||||
if (usec == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Snapshot the tuned thresholds once per call. Reads of an unaligned
|
||||
// long are atomic on x86_64; the calibration in init.cpp runs before
|
||||
// any other thread starts, so no further synchronization is needed.
|
||||
const long pure_spin_us = g_usleep_pure_spin_us;
|
||||
const long spin_tail_us = g_usleep_spin_tail_us;
|
||||
const long adaptive_floor_us = g_usleep_adaptive_min_tail_us;
|
||||
|
||||
LARGE_INTEGER qpc_freq;
|
||||
LARGE_INTEGER qpc_start;
|
||||
QueryPerformanceFrequency(&qpc_freq);
|
||||
QueryPerformanceCounter(&qpc_start);
|
||||
|
||||
// Absolute QPC target = start + usec. The conversion uses 64-bit
|
||||
// integer math throughout: at 10 MHz QPC and a 1-second sleep the
|
||||
// product is 1e7, well within LONGLONG range.
|
||||
const LONGLONG qpc_target = qpc_start.QuadPart
|
||||
+ ((LONGLONG)usec * qpc_freq.QuadPart) / 1000000LL;
|
||||
|
||||
if ((long)usec > pure_spin_us) {
|
||||
// Use compiler-native TLS instead of C++ thread_local because this
|
||||
// header is also included from .c translation units.
|
||||
static PX4_WINDOWS_SLEEP_TLS HANDLE timer = NULL;
|
||||
// Per-thread adaptive spin-tail state. We track the timer wake
|
||||
// overshoot (how late WaitForSingleObject returned past the
|
||||
// requested bulk deadline) as an EWMA in microseconds, then size
|
||||
// the spin tail at (overshoot_ewma + small_margin), bounded by
|
||||
// [PX4_USLEEP_ADAPTIVE_MIN_TAIL_US, spin_tail_us].
|
||||
// The EWMA is initialized with sentinel -1 so the first call
|
||||
// uses the configured upper-bound tail; subsequent calls
|
||||
// converge toward the host's actual jitter and trim the spin.
|
||||
static PX4_WINDOWS_SLEEP_TLS long adaptive_tail_us = -1;
|
||||
static PX4_WINDOWS_SLEEP_TLS long overshoot_ewma_us = -1;
|
||||
|
||||
if (timer == NULL) {
|
||||
timer = CreateWaitableTimerExW(NULL, NULL,
|
||||
CREATE_WAITABLE_TIMER_HIGH_RESOLUTION
|
||||
| CREATE_WAITABLE_TIMER_MANUAL_RESET,
|
||||
TIMER_ALL_ACCESS);
|
||||
|
||||
if (timer == NULL) {
|
||||
// Older Windows: legacy manual-reset timer
|
||||
// still honors timeBeginPeriod(1).
|
||||
timer = CreateWaitableTimerW(NULL, TRUE, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
// Decide the spin tail for this call. First call (sentinel) -
|
||||
// fall back to the configured upper bound so we definitely
|
||||
// cover the deadline while we collect data. Subsequent calls
|
||||
// use the EWMA-derived value.
|
||||
long tail_us = (adaptive_tail_us < 0) ? spin_tail_us : adaptive_tail_us;
|
||||
|
||||
// Floor at the host-measured P95 jitter (set by the calibration
|
||||
// routine in init.cpp). Trimming below this would force the QPC
|
||||
// spin to absorb wakes past the deadline, which directly bleeds
|
||||
// into sim/wall ratio.
|
||||
if (tail_us < adaptive_floor_us) { tail_us = adaptive_floor_us; }
|
||||
|
||||
if (tail_us > spin_tail_us) { tail_us = spin_tail_us; }
|
||||
|
||||
if (timer != NULL) {
|
||||
LARGE_INTEGER due;
|
||||
// Wake tail_us early and close the gap by spin.
|
||||
// Negative due time = relative interval, 100 ns units.
|
||||
// Clamp the bulk wait to >= 0 in case the caller asked
|
||||
// for a value just above pure_spin_us with a larger
|
||||
// spin_tail_us; the QPC spin still enforces the deadline.
|
||||
const LONGLONG bulk_us = (LONGLONG)usec - (LONGLONG)tail_us;
|
||||
const LONGLONG bulk_us_clamped = bulk_us > 0 ? bulk_us : 0;
|
||||
due.QuadPart = -(bulk_us_clamped * 10);
|
||||
|
||||
if (SetWaitableTimer(timer, &due, 0, NULL, NULL, FALSE)) {
|
||||
// Use a millisecond timeout slightly longer than
|
||||
// the requested sleep rather than INFINITE: a
|
||||
// rare WaitForSingleObject misbehavior on Windows
|
||||
// (observed under heavy SITL lockstep load) can
|
||||
// otherwise hang the producer thread permanently.
|
||||
// The QPC spin below still enforces the absolute
|
||||
// deadline, so a premature wake is harmless.
|
||||
const DWORD wait_ms_bulk = (DWORD)((bulk_us_clamped + 999LL) / 1000LL);
|
||||
const DWORD wait_ms = wait_ms_bulk + 5U; // +5 ms safety margin
|
||||
WaitForSingleObject(timer, wait_ms);
|
||||
|
||||
// Adaptive update: measure how late we woke vs the
|
||||
// requested bulk deadline (qpc_target - tail_us).
|
||||
// Negative = woke early (good); positive = woke late
|
||||
// and the spin tail had to absorb it. We track an
|
||||
// upper-envelope EWMA: a late wake snaps the value up
|
||||
// immediately, a stretch of clean wakes decays it down
|
||||
// at 1/64 per call (~30 ms settle at 250 Hz). The plain
|
||||
// mean would undersize the tail because the timer jitter
|
||||
// distribution has a long upper tail and 5 % of waits
|
||||
// can wake far past the mean - each such miss bleeds
|
||||
// 100 - 1000 us into wall time and accumulates as
|
||||
// sim/wall ratio drift.
|
||||
LARGE_INTEGER wake_now;
|
||||
QueryPerformanceCounter(&wake_now);
|
||||
const LONGLONG bulk_target_qpc = qpc_target
|
||||
- ((LONGLONG)tail_us * qpc_freq.QuadPart) / 1000000LL;
|
||||
LONGLONG overshoot_qpc = wake_now.QuadPart - bulk_target_qpc;
|
||||
if (overshoot_qpc < 0) { overshoot_qpc = 0; }
|
||||
const long overshoot_us =
|
||||
(long)((overshoot_qpc * 1000000LL) / qpc_freq.QuadPart);
|
||||
|
||||
if (overshoot_ewma_us < 0) {
|
||||
// First sample: seed at the configured upper
|
||||
// bound so we don't undershoot before any
|
||||
// data has been gathered.
|
||||
overshoot_ewma_us = spin_tail_us;
|
||||
}
|
||||
|
||||
// Fast attack, slow decay.
|
||||
if (overshoot_us > overshoot_ewma_us) {
|
||||
overshoot_ewma_us = overshoot_us;
|
||||
|
||||
} else {
|
||||
overshoot_ewma_us =
|
||||
(overshoot_ewma_us * 63 + overshoot_us + 32) / 64;
|
||||
}
|
||||
|
||||
// Size the next call's tail at envelope + 200 us
|
||||
// margin. The margin covers the residual gap between
|
||||
// the slow-decay envelope and the instantaneous
|
||||
// worst-case wake jitter; the floor and upper-bound
|
||||
// clamps keep the controller from collapsing or
|
||||
// running away.
|
||||
adaptive_tail_us = overshoot_ewma_us + 200;
|
||||
|
||||
if (adaptive_tail_us < adaptive_floor_us) {
|
||||
adaptive_tail_us = adaptive_floor_us;
|
||||
}
|
||||
|
||||
if (adaptive_tail_us > spin_tail_us) {
|
||||
adaptive_tail_us = spin_tail_us;
|
||||
}
|
||||
|
||||
} else {
|
||||
// Arming failed (very rare). Fall through to
|
||||
// the QPC spin below; it will still hit the
|
||||
// deadline, just with a brief CPU burn.
|
||||
}
|
||||
} else {
|
||||
// No timer available at all: kernel Sleep() rounded
|
||||
// up to the nearest millisecond. The spin tail below
|
||||
// still corrects the residual.
|
||||
const LONGLONG bulk_us = (LONGLONG)usec - (LONGLONG)tail_us;
|
||||
const LONGLONG bulk_us_clamped = bulk_us > 0 ? bulk_us : 0;
|
||||
Sleep((DWORD)((bulk_us_clamped + 999LL) / 1000LL));
|
||||
}
|
||||
}
|
||||
|
||||
// Close the residual against the absolute QPC target. For longer waits
|
||||
// this is at most ~spin_tail_us of spin (often less because the waitable
|
||||
// timer wakes slightly late). For SIH-sized waits it is the full request.
|
||||
LARGE_INTEGER now;
|
||||
|
||||
do {
|
||||
YieldProcessor();
|
||||
QueryPerformanceCounter(&now);
|
||||
} while (now.QuadPart < qpc_target);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#undef PX4_WINDOWS_SLEEP_TLS
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
/** @brief Sleep for at least @p seconds seconds using Windows Sleep(). */
|
||||
static inline unsigned int sleep(unsigned int seconds)
|
||||
{
|
||||
@@ -130,6 +383,7 @@ static inline unsigned int sleep(unsigned int seconds)
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* POSIX pipe(fd[2]) - default to 64 KiB buffer and binary mode. */
|
||||
#ifndef _PX4_PIPE_SHIM_DEFINED
|
||||
|
||||
@@ -46,7 +46,11 @@
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
|
||||
#include <atomic>
|
||||
#include <process.h>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
namespace
|
||||
{
|
||||
@@ -59,6 +63,72 @@ struct PX4ThreadStart {
|
||||
pthread_t self;
|
||||
};
|
||||
|
||||
/* POSIX requires pthread_key_create() destructors to run on thread exit. The
|
||||
* Win32 TLS API has no such hook, so we keep our own registry of keys with
|
||||
* non-null destructors and walk it from the thread trampoline tail. Without
|
||||
* this, every per-thread allocation registered via pthread_setspecific() leaks
|
||||
* on the MSVC SITL build (e.g. CmdThreadSpecificData in px4_daemon::Server). */
|
||||
std::mutex &tls_destructor_mutex()
|
||||
{
|
||||
static std::mutex m;
|
||||
return m;
|
||||
}
|
||||
|
||||
std::unordered_map<pthread_key_t, void (*)(void *)> &tls_destructors()
|
||||
{
|
||||
static std::unordered_map<pthread_key_t, void (*)(void *)> map;
|
||||
return map;
|
||||
}
|
||||
|
||||
std::atomic<px4_pthread_cond_notify_callback_t> &cond_notify_callback()
|
||||
{
|
||||
static std::atomic<px4_pthread_cond_notify_callback_t> callback{nullptr};
|
||||
return callback;
|
||||
}
|
||||
|
||||
void run_tls_destructors_on_exit()
|
||||
{
|
||||
/* POSIX allows up to PTHREAD_DESTRUCTOR_ITERATIONS (typically 4) passes
|
||||
* because a destructor may install new TLS values. Snapshot under the
|
||||
* mutex, run unlocked so destructors can call pthread_key_delete()/
|
||||
* pthread_setspecific() without deadlocking. */
|
||||
for (int pass = 0; pass < 4; ++pass) {
|
||||
struct Pending {
|
||||
pthread_key_t key;
|
||||
void (*destructor)(void *);
|
||||
void *value;
|
||||
};
|
||||
std::vector<Pending> pending;
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(tls_destructor_mutex());
|
||||
pending.reserve(tls_destructors().size());
|
||||
|
||||
for (const auto &entry : tls_destructors()) {
|
||||
if (entry.second == nullptr) {
|
||||
continue;
|
||||
}
|
||||
|
||||
void *value = TlsGetValue(entry.first);
|
||||
|
||||
if (value == nullptr) {
|
||||
continue;
|
||||
}
|
||||
|
||||
pending.push_back({entry.first, entry.second, value});
|
||||
}
|
||||
}
|
||||
|
||||
if (pending.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (const auto &p : pending) {
|
||||
TlsSetValue(p.key, nullptr);
|
||||
p.destructor(p.value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BOOL CALLBACK init_mutex_once(PINIT_ONCE, PVOID parameter, PVOID *)
|
||||
{
|
||||
/* Static pthread mutex initializers cannot run a constructor. INIT_ONCE lets
|
||||
@@ -95,6 +165,30 @@ static HANDLE handle_for_pthread(pthread_t thread)
|
||||
return reinterpret_cast<HANDLE>(thread);
|
||||
}
|
||||
|
||||
static int windows_thread_priority(int priority)
|
||||
{
|
||||
if (priority >= THREAD_PRIORITY_TIME_CRITICAL) {
|
||||
return THREAD_PRIORITY_TIME_CRITICAL;
|
||||
|
||||
} else if (priority >= THREAD_PRIORITY_HIGHEST) {
|
||||
return THREAD_PRIORITY_HIGHEST;
|
||||
|
||||
} else if (priority >= THREAD_PRIORITY_ABOVE_NORMAL) {
|
||||
return THREAD_PRIORITY_ABOVE_NORMAL;
|
||||
|
||||
} else if (priority == THREAD_PRIORITY_NORMAL) {
|
||||
return THREAD_PRIORITY_NORMAL;
|
||||
|
||||
} else if (priority <= THREAD_PRIORITY_IDLE) {
|
||||
return THREAD_PRIORITY_IDLE;
|
||||
|
||||
} else if (priority <= THREAD_PRIORITY_LOWEST) {
|
||||
return THREAD_PRIORITY_LOWEST;
|
||||
}
|
||||
|
||||
return THREAD_PRIORITY_BELOW_NORMAL;
|
||||
}
|
||||
|
||||
static DWORD abstime_to_timeout_ms(const timespec *abstime)
|
||||
{
|
||||
if (!abstime) {
|
||||
@@ -128,6 +222,7 @@ unsigned __stdcall thread_trampoline(void *arg)
|
||||
delete start;
|
||||
|
||||
const uintptr_t result = reinterpret_cast<uintptr_t>(entry(entry_arg));
|
||||
run_tls_destructors_on_exit();
|
||||
_endthreadex(static_cast<unsigned>(result));
|
||||
return static_cast<unsigned>(result);
|
||||
}
|
||||
@@ -459,6 +554,10 @@ int pthread_cond_signal(pthread_cond_t *cond)
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
if (px4_pthread_cond_notify_callback_t callback = cond_notify_callback().load(std::memory_order_acquire)) {
|
||||
callback(cond, 0);
|
||||
}
|
||||
|
||||
WakeConditionVariable(cond);
|
||||
return 0;
|
||||
}
|
||||
@@ -469,10 +568,20 @@ int pthread_cond_broadcast(pthread_cond_t *cond)
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
if (px4_pthread_cond_notify_callback_t callback = cond_notify_callback().load(std::memory_order_acquire)) {
|
||||
callback(cond, 1);
|
||||
}
|
||||
|
||||
WakeAllConditionVariable(cond);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int px4_pthread_cond_set_notify_callback(px4_pthread_cond_notify_callback_t callback)
|
||||
{
|
||||
cond_notify_callback().store(callback, std::memory_order_release);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pthread_create(pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg)
|
||||
{
|
||||
if (!thread || !start_routine) {
|
||||
@@ -499,7 +608,7 @@ int pthread_create(pthread_t *thread, const pthread_attr_t *attr, void *(*start_
|
||||
*thread = static_cast<pthread_t>(handle);
|
||||
|
||||
if (attr && attr->sched.sched_priority != 0) {
|
||||
SetThreadPriority(reinterpret_cast<HANDLE>(handle), attr->sched.sched_priority);
|
||||
SetThreadPriority(reinterpret_cast<HANDLE>(handle), windows_thread_priority(attr->sched.sched_priority));
|
||||
}
|
||||
|
||||
ResumeThread(reinterpret_cast<HANDLE>(handle));
|
||||
@@ -522,6 +631,10 @@ int pthread_join(pthread_t thread, void **value_ptr)
|
||||
HANDLE handle = reinterpret_cast<HANDLE>(thread);
|
||||
|
||||
if (WaitForSingleObject(handle, INFINITE) == WAIT_FAILED) {
|
||||
/* Even on failure the caller has handed ownership of the handle
|
||||
* to pthread_join() per POSIX semantics; close it so we don't
|
||||
* leak the Win32 thread object. */
|
||||
CloseHandle(handle);
|
||||
return ESRCH;
|
||||
}
|
||||
|
||||
@@ -547,6 +660,7 @@ int pthread_detach(pthread_t thread)
|
||||
|
||||
void pthread_exit(void *value_ptr)
|
||||
{
|
||||
run_tls_destructors_on_exit();
|
||||
_endthreadex(static_cast<unsigned>(reinterpret_cast<uintptr_t>(value_ptr)));
|
||||
}
|
||||
|
||||
@@ -564,6 +678,34 @@ int pthread_equal(pthread_t t1, pthread_t t2)
|
||||
return t1 == t2;
|
||||
}
|
||||
|
||||
int pthread_getschedparam(pthread_t thread, int *policy, struct sched_param *param)
|
||||
{
|
||||
if (!policy || !param) {
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
const int priority = GetThreadPriority(handle_for_pthread(thread));
|
||||
|
||||
if (priority == THREAD_PRIORITY_ERROR_RETURN && GetLastError() != ERROR_SUCCESS) {
|
||||
return ESRCH;
|
||||
}
|
||||
|
||||
*policy = SCHED_OTHER;
|
||||
param->sched_priority = priority;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pthread_setschedparam(pthread_t thread, int policy, const struct sched_param *param)
|
||||
{
|
||||
(void)policy;
|
||||
|
||||
if (!param) {
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
return SetThreadPriority(handle_for_pthread(thread), windows_thread_priority(param->sched_priority)) ? 0 : ESRCH;
|
||||
}
|
||||
|
||||
int pthread_cancel(pthread_t thread)
|
||||
{
|
||||
if (thread == 0) {
|
||||
@@ -584,8 +726,6 @@ int pthread_kill(pthread_t thread, int sig)
|
||||
|
||||
int pthread_key_create(pthread_key_t *key, void (*destructor)(void *))
|
||||
{
|
||||
(void)destructor;
|
||||
|
||||
if (!key) {
|
||||
return EINVAL;
|
||||
}
|
||||
@@ -596,12 +736,21 @@ int pthread_key_create(pthread_key_t *key, void (*destructor)(void *))
|
||||
return EAGAIN;
|
||||
}
|
||||
|
||||
if (destructor) {
|
||||
std::lock_guard<std::mutex> guard(tls_destructor_mutex());
|
||||
tls_destructors()[index] = destructor;
|
||||
}
|
||||
|
||||
*key = index;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pthread_key_delete(pthread_key_t key)
|
||||
{
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(tls_destructor_mutex());
|
||||
tls_destructors().erase(key);
|
||||
}
|
||||
return TlsFree(key) ? 0 : EINVAL;
|
||||
}
|
||||
|
||||
|
||||
@@ -61,16 +61,27 @@ int clock_gettime(clockid_t clk_id, struct timespec *tp)
|
||||
|
||||
if (clk_id == CLOCK_MONOTONIC) {
|
||||
/* QPC is monotonic and high resolution, but relative to an arbitrary
|
||||
* boot-time counter. That is exactly what CLOCK_MONOTONIC promises. */
|
||||
LARGE_INTEGER frequency {};
|
||||
* boot-time counter. That is exactly what CLOCK_MONOTONIC promises.
|
||||
*
|
||||
* Per Microsoft's QueryPerformanceCounter guidance, the QPC frequency
|
||||
* is fixed at system boot and consistent across processors, so we
|
||||
* only need to query it once. clock_gettime is on PX4's hot path
|
||||
* (drv_hrt's hrt_absolute_time, lockstep_scheduler, every uORB
|
||||
* publish/subscribe), and a syscall here adds up quickly.
|
||||
*/
|
||||
static const int64_t frequency = []() {
|
||||
LARGE_INTEGER f {};
|
||||
QueryPerformanceFrequency(&f);
|
||||
return f.QuadPart;
|
||||
}();
|
||||
|
||||
LARGE_INTEGER counter {};
|
||||
QueryPerformanceFrequency(&frequency);
|
||||
QueryPerformanceCounter(&counter);
|
||||
|
||||
const uint64_t seconds = static_cast<uint64_t>(counter.QuadPart / frequency.QuadPart);
|
||||
const uint64_t remainder = static_cast<uint64_t>(counter.QuadPart % frequency.QuadPart);
|
||||
const uint64_t seconds = static_cast<uint64_t>(counter.QuadPart / frequency);
|
||||
const uint64_t remainder = static_cast<uint64_t>(counter.QuadPart % frequency);
|
||||
tp->tv_sec = static_cast<time_t>(seconds);
|
||||
tp->tv_nsec = static_cast<long>((remainder * 1000000000ULL) / static_cast<uint64_t>(frequency.QuadPart));
|
||||
tp->tv_nsec = static_cast<long>((remainder * 1000000000ULL) / static_cast<uint64_t>(frequency));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -43,7 +43,35 @@
|
||||
|
||||
#include "px4_windows_internal.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
// timeBeginPeriod / timeEndPeriod live in winmm. Without raising the
|
||||
// system timer resolution, the Windows scheduler quantizes Sleep() to
|
||||
// the default ~15.6 ms HPET tick, which throttles SITL sim time to
|
||||
// ~40 % of wall time. Requesting 1 ms resolution drops the floor to
|
||||
// the documented minimum.
|
||||
#include <timeapi.h>
|
||||
|
||||
// CREATE_WAITABLE_TIMER_HIGH_RESOLUTION (Windows 10 1803+, build 17134)
|
||||
// may not be defined in older SDK headers. Mirror the literal values
|
||||
// documented by Microsoft - same fallback as windows_shim/unistd.h.
|
||||
#ifndef CREATE_WAITABLE_TIMER_MANUAL_RESET
|
||||
#define CREATE_WAITABLE_TIMER_MANUAL_RESET 0x00000001
|
||||
#endif
|
||||
#ifndef CREATE_WAITABLE_TIMER_HIGH_RESOLUTION
|
||||
#define CREATE_WAITABLE_TIMER_HIGH_RESOLUTION 0x00000002
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
// MSVC CRT debug heap: dumps unfreed allocations to stderr.
|
||||
// We invoke it explicitly from px4_windows_exit() because the daemon
|
||||
// shuts down via ExitProcess(), which bypasses _CRTDBG_LEAK_CHECK_DF.
|
||||
#include <crtdbg.h>
|
||||
#endif
|
||||
|
||||
/* --------------------------------------------------------------------------
|
||||
* One-time process-wide initialisation.
|
||||
@@ -57,6 +85,181 @@
|
||||
* the extern declaration in proc/ids.cpp resolves. */
|
||||
volatile LONG g_px4_session_id = 0;
|
||||
|
||||
/* Runtime-tuned thresholds consumed by the inline usleep() shim in
|
||||
* platforms/posix/include/windows_shim/unistd.h. The defaults are sized
|
||||
* to give a high-resolution-timer-equipped host (Windows 10 1803+) a
|
||||
* safe starting point: 5 ms pure-spin ceiling and 1 ms spin-tail. The
|
||||
* tail is the *upper bound* the thread-local adaptive controller in
|
||||
* usleep() may expand to; it shrinks toward the observed timer overshoot
|
||||
* via an EWMA so a quiet host pays only ~p95-jitter of CPU spin per
|
||||
* call. They live at file scope because every translation unit that
|
||||
* includes <unistd.h> on Windows references them through the inline body
|
||||
* of usleep(). */
|
||||
extern "C" long g_usleep_pure_spin_us = 5000;
|
||||
extern "C" long g_usleep_spin_tail_us = 1000;
|
||||
/* Floor for the per-thread adaptive spin tail. Initialised by
|
||||
* px4_windows_calibrate_usleep_threshold() to the host-measured P95
|
||||
* waitable-timer jitter so the controller never collapses below the
|
||||
* value we already know is needed to cover the observed long-tail wakes.
|
||||
* Defaults to a conservative 700 us when calibration cannot run (e.g.
|
||||
* pre-1803 Windows with no high-resolution timer). */
|
||||
extern "C" long g_usleep_adaptive_min_tail_us = 700;
|
||||
|
||||
/**
|
||||
* @brief Auto-tune g_usleep_pure_spin_us against the host's measured
|
||||
* high-resolution waitable-timer jitter and apply an optional
|
||||
* environment override.
|
||||
*
|
||||
* Must be invoked exactly once and BEFORE any thread starts calling
|
||||
* usleep(). The constructor of PX4WindowsGlobalInit calls it directly
|
||||
* after timeBeginPeriod(1) - the earliest hookable point in the PX4
|
||||
* Windows startup sequence.
|
||||
*
|
||||
* Honors PX4_USLEEP_SPIN_US (microseconds, clamped to [0, 50000]). When
|
||||
* unset, probes the *exact* primitive the inline usleep() shim uses for
|
||||
* the bulk wait: a CREATE_WAITABLE_TIMER_HIGH_RESOLUTION waitable timer
|
||||
* armed for 1 ms via SetWaitableTimer + WaitForSingleObject. The chosen
|
||||
* threshold is g_usleep_spin_tail_us + p95_jitter + 500 us margin, so
|
||||
* any wait above the threshold can be served by (timer + spin tail) and
|
||||
* still hit the absolute QPC deadline. Floored to 500 us and capped at
|
||||
* 5000 us.
|
||||
*
|
||||
* The previous heuristic measured Sleep(1) jitter, but Sleep is not on
|
||||
* the hot path - usleep() uses the high-resolution waitable timer, which
|
||||
* is far more accurate than Sleep on Win10 1803+. Probing the wrong
|
||||
* primitive made the auto-tune saturate at 5000 us on quiet hosts, which
|
||||
* forced every SIH 4 ms tick into pure-spin and pegged one full core.
|
||||
*/
|
||||
static void px4_windows_calibrate_usleep_threshold()
|
||||
{
|
||||
// 1. Honor an explicit env override first; most users / CI runs set
|
||||
// this from the launcher script, so skip the probe entirely when present.
|
||||
if (const char *env = std::getenv("PX4_USLEEP_SPIN_US")) {
|
||||
char *end = nullptr;
|
||||
long v = std::strtol(env, &end, 10);
|
||||
|
||||
if (end != env && v >= 0 && v <= 50000) {
|
||||
g_usleep_pure_spin_us = v;
|
||||
// Env override skips probing the host so we
|
||||
// have no measured P95 - keep the conservative
|
||||
// upper-bound default for the adaptive floor.
|
||||
g_usleep_adaptive_min_tail_us = g_usleep_spin_tail_us;
|
||||
std::printf("INFO [px4_windows] usleep spin threshold (env): %ld us "
|
||||
"(adaptive tail floor: %ld us)\n",
|
||||
v, g_usleep_adaptive_min_tail_us);
|
||||
std::fflush(stdout);
|
||||
return;
|
||||
}
|
||||
|
||||
std::printf("WARN [px4_windows] PX4_USLEEP_SPIN_US=\"%s\" out of range [0, 50000], ignored\n",
|
||||
env);
|
||||
std::fflush(stdout);
|
||||
}
|
||||
|
||||
// 2. Probe the actual primitive usleep() uses for the bulk wait: a
|
||||
// CREATE_WAITABLE_TIMER_HIGH_RESOLUTION waitable timer armed via
|
||||
// SetWaitableTimer + WaitForSingleObject. On Win10 1803+ this gives
|
||||
// sub-millisecond accuracy; the residual is closed by the QPC spin
|
||||
// tail (g_usleep_spin_tail_us). The threshold we want is the smallest
|
||||
// value such that (waitable_timer + spin_tail) reliably hits the
|
||||
// deadline.
|
||||
HANDLE timer = CreateWaitableTimerExW(NULL, NULL,
|
||||
CREATE_WAITABLE_TIMER_HIGH_RESOLUTION
|
||||
| CREATE_WAITABLE_TIMER_MANUAL_RESET,
|
||||
TIMER_ALL_ACCESS);
|
||||
|
||||
if (timer == NULL) {
|
||||
// Older Windows (pre-1803) lacks the high-res flag. Keep the
|
||||
// historical 5000 us default - on those hosts the legacy timer
|
||||
// quantizes to ~1 ms tick and the wide spin band is the safest
|
||||
// behavior available.
|
||||
g_usleep_pure_spin_us = 5000;
|
||||
// Without a high-res timer the legacy 1 ms tick dominates;
|
||||
// lock the adaptive floor to spin_tail_us so the controller
|
||||
// can't shrink the spin below the safe bound on this host.
|
||||
g_usleep_adaptive_min_tail_us = g_usleep_spin_tail_us;
|
||||
std::printf("INFO [px4_windows] usleep spin threshold (auto): 5000 us "
|
||||
"(high-res waitable timer unavailable, using legacy default)\n");
|
||||
std::fflush(stdout);
|
||||
return;
|
||||
}
|
||||
|
||||
LARGE_INTEGER freq;
|
||||
QueryPerformanceFrequency(&freq);
|
||||
|
||||
// N=500 keeps ~25 samples in the p95 tail (vs 5 at N=100), which removes
|
||||
// the intermittent low-p95 outlier that under-provisioned the spin tail
|
||||
// and tripped the sim/wall ratio below 0.99 once every few cold boots.
|
||||
// Probe cost is ~500 ms of one-time startup time (each iteration waits 1
|
||||
// ms on the high-res timer); negligible vs the robustness gain.
|
||||
constexpr int N = 500;
|
||||
long jitter_us[N];
|
||||
|
||||
for (int i = 0; i < N; ++i) {
|
||||
LARGE_INTEGER t0;
|
||||
LARGE_INTEGER t1;
|
||||
LARGE_INTEGER due;
|
||||
// Ask for 1 ms (10 000 x 100 ns units, negative = relative).
|
||||
due.QuadPart = -10000;
|
||||
QueryPerformanceCounter(&t0);
|
||||
|
||||
if (SetWaitableTimer(timer, &due, 0, NULL, NULL, FALSE)) {
|
||||
WaitForSingleObject(timer, INFINITE);
|
||||
}
|
||||
|
||||
QueryPerformanceCounter(&t1);
|
||||
const long actual_us = (long)(((t1.QuadPart - t0.QuadPart) * 1000000LL) / freq.QuadPart);
|
||||
long delta = actual_us - 1000;
|
||||
|
||||
if (delta < 0) { delta = 0; }
|
||||
|
||||
jitter_us[i] = delta;
|
||||
}
|
||||
|
||||
CloseHandle(timer);
|
||||
std::sort(jitter_us, jitter_us + N);
|
||||
const long p95 = jitter_us[(int)(0.95 * N)];
|
||||
|
||||
// 3. Choose threshold = spin_tail + p95_jitter + 500 us margin. Any
|
||||
// wait above this is served by (waitable timer wakes ~p95 us late
|
||||
// at most + spin tail closes the residual). Floor at 500 us so the
|
||||
// short-sleep band never collapses (avoids degenerate 0-cost loops);
|
||||
// cap at 5000 us so a freakishly noisy host still falls back to the
|
||||
// legacy behavior.
|
||||
long chosen = g_usleep_spin_tail_us + p95 + 500;
|
||||
|
||||
if (chosen < 500) { chosen = 500; }
|
||||
|
||||
if (chosen > 5000) { chosen = 5000; }
|
||||
|
||||
g_usleep_pure_spin_us = chosen;
|
||||
// Right-size the spin-tail upper bound to (P95 + 500 us). This is
|
||||
// the largest value the per-thread adaptive controller in usleep()
|
||||
// is allowed to grow to, so we trade a couple hundred microseconds
|
||||
// of CPU spin per call for a robust deadline guarantee. The +500
|
||||
// (was +300) absorbs the residual P95 underestimate even when the
|
||||
// N=500 probe still under-samples a freakishly quiet host. Floored
|
||||
// at 700 us (so quiet hosts still cover the typical Win10 1803+
|
||||
// jitter floor) and capped at 2000 us (the historical safe value).
|
||||
long sized_tail = p95 + 500;
|
||||
if (sized_tail < 700) { sized_tail = 700; }
|
||||
if (sized_tail > 2000) { sized_tail = 2000; }
|
||||
g_usleep_spin_tail_us = sized_tail;
|
||||
// Set the adaptive tail floor to the host-measured P95 jitter
|
||||
// (clamped to [200, sized_tail]) so the per-thread controller in
|
||||
// usleep() can never trim the spin below the value we already know
|
||||
// is needed to cover this host's observed long-tail wakes.
|
||||
long adaptive_floor = p95;
|
||||
if (adaptive_floor < 200) { adaptive_floor = 200; }
|
||||
if (adaptive_floor > sized_tail) { adaptive_floor = sized_tail; }
|
||||
g_usleep_adaptive_min_tail_us = adaptive_floor;
|
||||
std::printf("INFO [px4_windows] usleep spin threshold (auto): %ld us "
|
||||
"(p95 high-res timer jitter: %ld us [N=%d], spin tail: %ld us, "
|
||||
"adaptive tail floor: %ld us)\n",
|
||||
chosen, p95, N, sized_tail, adaptive_floor);
|
||||
std::fflush(stdout);
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
@@ -134,6 +337,7 @@ struct PX4WindowsGlobalInit {
|
||||
// Inline Linux syscall helpers (x86_64 ABI).
|
||||
static long long linux_syscall1(long long num, long long a)
|
||||
{
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
long long ret;
|
||||
__asm__ volatile (
|
||||
"syscall"
|
||||
@@ -142,10 +346,16 @@ struct PX4WindowsGlobalInit {
|
||||
: "rcx", "r11", "memory"
|
||||
);
|
||||
return ret;
|
||||
#else
|
||||
(void)num;
|
||||
(void)a;
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
static long long linux_syscall3(long long num, long long a, long long b, long long c)
|
||||
{
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
long long ret;
|
||||
__asm__ volatile (
|
||||
"syscall"
|
||||
@@ -154,6 +364,13 @@ struct PX4WindowsGlobalInit {
|
||||
: "rcx", "r11", "memory"
|
||||
);
|
||||
return ret;
|
||||
#else
|
||||
(void)num;
|
||||
(void)a;
|
||||
(void)b;
|
||||
(void)c;
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
static long long open_host_tty()
|
||||
@@ -260,6 +477,8 @@ struct PX4WindowsGlobalInit {
|
||||
}
|
||||
}
|
||||
|
||||
bool timer_resolution_raised = false;
|
||||
|
||||
PX4WindowsGlobalInit()
|
||||
{
|
||||
WSADATA wsaData;
|
||||
@@ -268,6 +487,24 @@ struct PX4WindowsGlobalInit {
|
||||
}
|
||||
SetConsoleOutputCP(CP_UTF8);
|
||||
|
||||
// Raise the global timer resolution to 1 ms. The default
|
||||
// (~15.6 ms) makes every usleep() round up to a full HPET
|
||||
// tick, throttling SITL sim time to ~40 % of wall time. The
|
||||
// matching timeEndPeriod(1) lives in the destructor; Windows
|
||||
// also clears the request on process exit, so a hard
|
||||
// ExitProcess() path is still safe.
|
||||
if (timeBeginPeriod(1) == TIMERR_NOERROR) {
|
||||
timer_resolution_raised = true;
|
||||
}
|
||||
|
||||
// Tune g_usleep_pure_spin_us either from PX4_USLEEP_SPIN_US or
|
||||
// by probing this host's Sleep(1) jitter. Must run AFTER
|
||||
// timeBeginPeriod(1) so the probe sees the same scheduler
|
||||
// behavior usleep() will see, and BEFORE any module thread
|
||||
// has had a chance to start (we are in a static constructor,
|
||||
// so PX4 main() has not yet been entered).
|
||||
px4_windows_calibrate_usleep_threshold();
|
||||
|
||||
// PX4 stores binary data (parameters.bson, dataman) and expects
|
||||
// read/write to preserve bytes exactly. MSVCRT's default text
|
||||
// mode maps CRLF<->LF, which corrupts arbitrary binary content.
|
||||
@@ -326,11 +563,101 @@ struct PX4WindowsGlobalInit {
|
||||
{
|
||||
restore_console_modes();
|
||||
WSACleanup();
|
||||
|
||||
if (timer_resolution_raised) {
|
||||
timeEndPeriod(1);
|
||||
timer_resolution_raised = false;
|
||||
}
|
||||
}
|
||||
};
|
||||
static PX4WindowsGlobalInit _px4_win_init;
|
||||
|
||||
// Filesystem paths the process owns and must remove on any exit path.
|
||||
// Used by px4_windows_exit() to undo the byte-range lock files that the
|
||||
// daemon installs in %TEMP% via set_server_running(); the explicit unlink
|
||||
// in main.cpp only runs when the pxh shell loop returns normally, but the
|
||||
// `pxh shutdown` command leaves via px4_platform_exit() -> ExitProcess()
|
||||
// and would otherwise leak the lock and PID-companion files.
|
||||
std::mutex _px4_exit_unlink_mutex;
|
||||
std::vector<std::string> _px4_exit_unlink_paths;
|
||||
|
||||
// File descriptors held open for the lifetime of the process (typically the
|
||||
// byte-range lock fd installed by set_server_running). Windows refuses to
|
||||
// unlink a file while any handle to it is open in the same process, so the
|
||||
// exit path must close these BEFORE running the registered unlinks.
|
||||
std::vector<int> _px4_exit_close_fds;
|
||||
|
||||
void px4_run_exit_unlinks()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_px4_exit_unlink_mutex);
|
||||
|
||||
// Close fds first so subsequent unlink() calls don't hit ERROR_SHARING_VIOLATION.
|
||||
for (int fd : _px4_exit_close_fds) {
|
||||
if (fd >= 0) {
|
||||
(void)::_close(fd);
|
||||
}
|
||||
}
|
||||
|
||||
_px4_exit_close_fds.clear();
|
||||
|
||||
for (const std::string &path : _px4_exit_unlink_paths) {
|
||||
// Best effort: ignore errors — the path may already be gone if a
|
||||
// different shutdown route ran the explicit cleanup first.
|
||||
(void)::_unlink(path.c_str());
|
||||
}
|
||||
|
||||
_px4_exit_unlink_paths.clear();
|
||||
}
|
||||
} // namespace
|
||||
|
||||
extern "C" void px4_windows_register_exit_unlink(const char *path)
|
||||
{
|
||||
if (path == nullptr || path[0] == '\0') {
|
||||
return;
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lock(_px4_exit_unlink_mutex);
|
||||
|
||||
for (const std::string &existing : _px4_exit_unlink_paths) {
|
||||
if (existing == path) {
|
||||
return; // already registered
|
||||
}
|
||||
}
|
||||
|
||||
// Hard cap on entries so a buggy caller can't grow this unboundedly;
|
||||
// the daemon only registers two paths (lock + .pid).
|
||||
constexpr std::size_t kMaxRegistered = 16;
|
||||
|
||||
if (_px4_exit_unlink_paths.size() >= kMaxRegistered) {
|
||||
return;
|
||||
}
|
||||
|
||||
_px4_exit_unlink_paths.emplace_back(path);
|
||||
}
|
||||
|
||||
extern "C" void px4_windows_register_exit_close_fd(int fd)
|
||||
{
|
||||
if (fd < 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lock(_px4_exit_unlink_mutex);
|
||||
|
||||
for (int existing : _px4_exit_close_fds) {
|
||||
if (existing == fd) {
|
||||
return; // already registered
|
||||
}
|
||||
}
|
||||
|
||||
constexpr std::size_t kMaxRegistered = 16;
|
||||
|
||||
if (_px4_exit_close_fds.size() >= kMaxRegistered) {
|
||||
return;
|
||||
}
|
||||
|
||||
_px4_exit_close_fds.push_back(fd);
|
||||
}
|
||||
|
||||
extern "C" void px4_windows_restore_console_modes()
|
||||
{
|
||||
_px4_win_init.restore_console_modes();
|
||||
@@ -381,12 +708,33 @@ extern "C" void px4_windows_exit(int status)
|
||||
{
|
||||
fflush(stdout);
|
||||
fflush(stderr);
|
||||
|
||||
// Drop server lock + PID-companion files before tearing down the
|
||||
// console. Done early so a follow-up launch racing this process can
|
||||
// re-acquire the byte-range lock without falling through to the
|
||||
// stale-lock recovery path in get_server_running().
|
||||
px4_run_exit_unlinks();
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
// ExitProcess()/TerminateProcess() skip the CRT exit chain, so
|
||||
// _CRTDBG_LEAK_CHECK_DF never runs. Dump the leak report explicitly
|
||||
// here, BEFORE FreeConsole() invalidates the stderr handle the CRT
|
||||
// would write to.
|
||||
_CrtDumpMemoryLeaks();
|
||||
fflush(stderr);
|
||||
#endif
|
||||
|
||||
_px4_win_init.restore_console_modes();
|
||||
|
||||
if (!_px4_win_init.running_under_wine) {
|
||||
FreeConsole();
|
||||
}
|
||||
|
||||
// Static dtors do not run under ExitProcess()/TerminateProcess().
|
||||
// Match WSAStartup() from the constructor by calling WSACleanup()
|
||||
// explicitly so a soft-exit path does not appear to leak winsock state.
|
||||
WSACleanup();
|
||||
|
||||
if (_px4_win_init.running_under_wine) {
|
||||
TerminateProcess(GetCurrentProcess(), static_cast<UINT>(status));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user