ci(ros_integration_tests): restart XRCE-DDS Agent between tests

Restart the Micro-XRCE-DDS Agent before each integration test so DDS
graph state from a previous PX4 instance does not leak into the next
test.

The MicroXRCEAgent is started once per session, but PX4 reboots between
tests. The Agent retains writer entries from the previous PX4, so when
the new PX4 reconnects, count_publishers() in px4-ros2-interface-lib's
waitForFMU returns >0 immediately against a stale entry. Phase 1
(discovery) returns instantly, then Phase 2 (heartbeat) times out
waiting for a message on a subscription matched to a dead writer.

This is why ModesTest.denyArming (first test) passes while every later
ModesTest fails with "timeout while waiting for FMU heartbeat".

Adds an optional pre_test_hook on test_runner.Tester so ROS-specific
lifecycle stays out of the shared test_runner. The workflow stops
starting the Agent externally; ros_test_runner.py owns the lifecycle.

Refs #27328

Signed-off-by: Ramon Roche <mrpollo@gmail.com>
This commit is contained in:
Ramon Roche
2026-05-13 09:50:38 -07:00
parent 78a28b0e47
commit 6baef6ba88
3 changed files with 30 additions and 3 deletions
+4 -1
View File
@@ -121,7 +121,10 @@ jobs:
shell: bash
run: |
. /opt/px4_ws/install/setup.bash
/opt/Micro-XRCE-DDS-Agent/build/MicroXRCEAgent udp4 localhost -p 8888 -v 0 &
# The Agent is started (and restarted between tests) by ros_test_runner.py
# so DDS graph state from a previous PX4 instance does not leak into the
# next test. Make the Agent binary discoverable on PATH.
export PATH="/opt/Micro-XRCE-DDS-Agent/build:$PATH"
test/ros_test_runner.py --verbose --model iris --force-color
timeout-minutes: 45
@@ -5,7 +5,7 @@ import os
import re
import sys
import time
from typing import Any, Dict, List, NoReturn, TextIO, Optional
from typing import Any, Callable, Dict, List, NoReturn, TextIO, Optional
from types import FrameType
from . import process_helper as ph
from .logger_helper import color, colorize
@@ -75,6 +75,7 @@ class Tester:
self.tester_interface = tester_interface
self.tests = self.determine_tests(config['tests'], model, case)
self.active_runners = []
self.pre_test_hook: Optional[Callable[[str, str], None]] = None
@staticmethod
def wildcard_match(pattern: str, potential_match: str) -> bool:
@@ -203,6 +204,9 @@ class Tester:
.format(log_dir))
os.makedirs(log_dir, exist_ok=True)
if self.pre_test_hook is not None:
self.pre_test_hook(test['model'], key)
was_success = self.run_test_case(test, key, log_dir)
print("--- Test case {} of {}: '{}' {}."
+21 -1
View File
@@ -7,6 +7,7 @@ import psutil # type: ignore
import signal
import subprocess
import sys
import time
from mavsdk_tests.integration_test_runner import test_runner, process_helper as ph, logger_helper
from typing import Any, Dict, List, NoReturn
@@ -45,8 +46,21 @@ class MicroXrceAgent:
if self._verbose:
print('Stopping micro-xrce-dds-agent')
self._proc.kill()
self._proc.wait()
self._proc = None
def restart(self):
"""Force a fresh Agent process so DDS graph state does not leak across tests.
The Agent retains writer entries from prior PX4 instances; a fresh PX4 reconnects
but the stale entries make count_publishers() return >0 before the new writers
are matched, breaking waitForFMU's two-phase discovery in px4-ros2-interface-lib.
"""
self.stop_process_if_started()
# Give the OS a moment to release the UDP port before rebinding.
time.sleep(0.2)
self.start_process()
class TesterInterfaceRos(test_runner.TesterInterface):
@@ -187,9 +201,15 @@ def main() -> NoReturn:
# Automatically start & stop the XRCE Agent if not running already
micro_xrce_agent = MicroXrceAgent(args.verbose)
if not micro_xrce_agent.is_running():
agent_managed_here = not micro_xrce_agent.is_running()
if agent_managed_here:
micro_xrce_agent.start_process()
def restart_agent(_model: str, _case: str) -> None:
micro_xrce_agent.restart()
tester.pre_test_hook = restart_agent
try:
result = tester.run()
finally: