diff --git a/Documentation/platforms/arm/qemu/boards/qemu-armv7r/index.rst b/Documentation/platforms/arm/qemu/boards/qemu-armv7r/index.rst index 8b417aa8ed2..39800ee33fc 100644 --- a/Documentation/platforms/arm/qemu/boards/qemu-armv7r/index.rst +++ b/Documentation/platforms/arm/qemu/boards/qemu-armv7r/index.rst @@ -41,7 +41,7 @@ NSH (Flat Build) Configuring NuttX and compile:: $ ./tools/configure.sh -l qemu-armv7r:nsh - $ make -j`nproc` + $ make -j$(nproc) Running with qemu:: @@ -58,7 +58,7 @@ with MPU support:: $ cd nuttx $ ./tools/configure.sh qemu-armv7r:pnsh - $ make -j`nproc` + $ make -j$(nproc) Running with qemu (note: both nuttx and nuttx_user must be loaded):: @@ -106,4 +106,93 @@ The nuttx ELF image can be debugged with QEMU. $ arm-none-eabi-gdb -tui --eval-command='target remote localhost:1234' nuttx (gdb) add-symbol-file nuttx_user - (gdb) c \ No newline at end of file + (gdb) c + +===================== +Userspace PMU Access +===================== + +Overview +======== + +This document describes how to enable and use Performance Monitoring Unit (PMU) +access from userspace applications on the ARM v7-R QEMU board. The PMU provides +hardware performance counters that can be used to analyze and profile application +performance. + +1. ARM v7-R QEMU board support +2. NuttX kernel with PMU support enabled +3. User-space access permissions configured + +Kernel Configuration +-------------------- + +To enable PMU support and userspace access, add the following configuration +options to your defconfig:: + + +CONFIG_ARCH_PERF_EVENTS_USER_ACCESS=y + +CONFIG_ARCH_PERF_EVENTS=y + + +Userspace API + +**perf_gettime** +================ + +Basic Usage +----------- + +.. code-block:: c + + clock_t start, end; + unsigned long frequency; + unsigned long cycles; + + frequency = perf_getfreq(); + if (frequency == 0) { + printf("ERROR: Performance frequency not available\n"); + return; + } + + printf("Operation Profiling Results:\n"); + printf("CPU Frequency: %lu Hz\n\n", frequency); + printf("%-40s | %-12s | %-12s\n", "Operation", "Cycles", "Time (us)"); + printf("%-40s-+-%-12s-+-%-12s\n", + "----------------------------------------", + "------------", "------------"); + + start = perf_gettime(); + result = 1; + for (int i = 1; i < 1000; i++) { + result *= i; + } + + end = perf_gettime(); + cycles = end - start; + printf("%-40s | %12lu | %12lu\n", "Multiplication (1K times)", cycles, (cycles * 1000000UL) / frequency); + + start = perf_gettime(); + result = 1000000; + for (int i = 1; i < 1000; i++) { + result /= (i + 1); + } + end = perf_gettime(); + cycles = end - start; + + printf("%-40s | %12lu | %12lu\n", "Division (1K times)", cycles, (cycles * 1000000UL) / frequency); + +Testing with QEMU +================= + +PNSH (Protected) Configuration +------------------------------- + +For protected build with userspace PMU access:: + + $ ./tools/configure.sh qemu-armv7r:pnsh + $ make -j$(nproc) + +2. Run QEMU (load both kernel and userspace):: + + $ qemu-system-arm -M virt -semihosting -nographic -cpu cortex-r5f -device loader,file=nuttx_user -device loader,file=nuttx + diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 68c44b660cb..b555909041c 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1005,6 +1005,7 @@ config ARCH_ARMV7R default n select ARCH_HAVE_CPUINFO select ARCH_HAVE_PERF_EVENTS + select ARCH_HAVE_PERF_EVENTS_USER_ACCESS config ARCH_CORTEXR4 bool diff --git a/arch/arm/src/armv7-r/CMakeLists.txt b/arch/arm/src/armv7-r/CMakeLists.txt index f127f628ae4..7db5e42d837 100644 --- a/arch/arm/src/armv7-r/CMakeLists.txt +++ b/arch/arm/src/armv7-r/CMakeLists.txt @@ -81,3 +81,8 @@ if(CONFIG_SMP) endif() target_sources(arch PRIVATE ${SRCS}) + +if(NOT CONFIG_BUILD_FLAT AND CONFIG_ARCH_HAVE_PERF_EVENTS_USER_ACCESS) + target_sources(arch_interface PRIVATE arm_perf.c) + target_include_directories(arch_interface PRIVATE ${NUTTX_CHIP_ABS_DIR}) +endif() diff --git a/arch/arm/src/armv7-r/Make.defs b/arch/arm/src/armv7-r/Make.defs index 70480712b2b..0a9bebcb496 100644 --- a/arch/arm/src/armv7-r/Make.defs +++ b/arch/arm/src/armv7-r/Make.defs @@ -67,3 +67,7 @@ ifeq ($(CONFIG_SMP),y) CMN_CSRCS += arm_cpustart.c arm_smpcall.c CMN_CSRCS += arm_cpuidlestack.c arm_scu.c endif + +ifeq ($(CONFIG_ARCH_HAVE_PERF_EVENTS_USER_ACCESS),y) + CMN_UCSRCS += arm_perf.c +endif diff --git a/arch/arm/src/armv7-r/arm_perf.c b/arch/arm/src/armv7-r/arm_perf.c index d32a7a365e1..7f96b6b0075 100644 --- a/arch/arm/src/armv7-r/arm_perf.c +++ b/arch/arm/src/armv7-r/arm_perf.c @@ -33,6 +33,7 @@ #ifdef CONFIG_ARCH_HAVE_PERF_EVENTS +#if defined(CONFIG_BUILD_FLAT) || defined(__KERNEL__) /**************************************************************************** * Private Data ****************************************************************************/ @@ -84,11 +85,6 @@ unsigned long up_perf_getfreq(void) return g_cpu_freq; } -clock_t up_perf_gettime(void) -{ - return cp15_pmu_rdccr(); -} - void up_perf_convert(clock_t elapsed, struct timespec *ts) { clock_t left; @@ -97,4 +93,11 @@ void up_perf_convert(clock_t elapsed, struct timespec *ts) left = elapsed - ts->tv_sec * g_cpu_freq; ts->tv_nsec = NSEC_PER_SEC * (uint64_t)left / g_cpu_freq; } +#endif /* CONFIG_BUILD_FLAT || __KERNEL__ */ + +clock_t up_perf_gettime(void) +{ + return cp15_pmu_rdccr(); +} + #endif diff --git a/sched/clock/clock_perf.c b/sched/clock/clock_perf.c index 33c8398410c..8d0a6347073 100644 --- a/sched/clock/clock_perf.c +++ b/sched/clock/clock_perf.c @@ -31,7 +31,7 @@ #include #include -#ifndef CONFIG_ARCH_HAVE_PERF_EVENTS_USER_ACCESS +#ifndef CONFIG_ARCH_PERF_EVENTS_USER_ACCESS /**************************************************************************** * Preprocessors @@ -134,7 +134,7 @@ clock_t perf_gettime(void) } # endif -#endif /* !CONFIG_ARCH_HAVE_PERF_EVENTS_USER_ACCESS */ +#endif /* !CONFIG_ARCH_PERF_EVENTS_USER_ACCESS */ #if defined(CONFIG_ALARM_ARCH) || defined (CONFIG_TIMER_ARCH) || \ defined(CONFIG_ARCH_PERF_EVENTS)