diff --git a/Kconfig b/Kconfig
index 621f1a15fc..aacc6291f8 100644
--- a/Kconfig
+++ b/Kconfig
@@ -363,6 +363,8 @@ menu "LVGL configuration"
 				bool "1: NEON"
 			config LV_DRAW_SW_ASM_HELIUM
 				bool "2: HELIUM"
+			config LV_DRAW_SW_ASM_RISCV_V
+				bool "3: RISC-V Vector"
 			config LV_DRAW_SW_ASM_CUSTOM
 				bool "255: CUSTOM"
 		endchoice
@@ -372,6 +374,7 @@ menu "LVGL configuration"
 			default 0 if LV_DRAW_SW_ASM_NONE
 			default 1 if LV_DRAW_SW_ASM_NEON
 			default 2 if LV_DRAW_SW_ASM_HELIUM
+			default 3 if LV_DRAW_SW_ASM_RISCV_V
 			default 255 if LV_DRAW_SW_ASM_CUSTOM
 
 		config LV_DRAW_SW_ASM_CUSTOM_INCLUDE
diff --git a/scripts/lv_conf_internal_gen.py b/scripts/lv_conf_internal_gen.py
index 7e0462db08..24fb05ae5b 100755
--- a/scripts/lv_conf_internal_gen.py
+++ b/scripts/lv_conf_internal_gen.py
@@ -66,6 +66,7 @@ fout.write(
 #define LV_DRAW_SW_ASM_NONE             0
 #define LV_DRAW_SW_ASM_NEON             1
 #define LV_DRAW_SW_ASM_HELIUM           2
+#define LV_DRAW_SW_ASM_RISCV_V          3
 #define LV_DRAW_SW_ASM_CUSTOM           255
 
 #define LV_NEMA_HAL_CUSTOM          0
diff --git a/src/draw/sw/blend/lv_draw_sw_blend_to_rgb888.c b/src/draw/sw/blend/lv_draw_sw_blend_to_rgb888.c
index 7a9daa0839..c92ce07df2 100644
--- a/src/draw/sw/blend/lv_draw_sw_blend_to_rgb888.c
+++ b/src/draw/sw/blend/lv_draw_sw_blend_to_rgb888.c
@@ -22,6 +22,8 @@
     #include "neon/lv_blend_neon.h"
 #elif LV_USE_DRAW_SW_ASM == LV_DRAW_SW_ASM_HELIUM
     #include "helium/lv_blend_helium.h"
+#elif LV_USE_DRAW_SW_ASM == LV_DRAW_SW_ASM_RISCV_V
+    #include "riscv_v/lv_blend_riscv_v.h"
 #elif LV_USE_DRAW_SW_ASM == LV_DRAW_SW_ASM_CUSTOM
     #include LV_DRAW_SW_ASM_CUSTOM_INCLUDE
 #endif
diff --git a/src/draw/sw/blend/riscv_v/lv_blend_riscv_v.h b/src/draw/sw/blend/riscv_v/lv_blend_riscv_v.h
new file mode 100644
index 0000000000..eb1d3ee8d3
--- /dev/null
+++ b/src/draw/sw/blend/riscv_v/lv_blend_riscv_v.h
@@ -0,0 +1,25 @@
+/**
+ * @file lv_blend_riscv_v.h
+ * RISC-V Vector extension blend header
+ */
+
+#ifndef LV_BLEND_RISCV_V_H
+#define LV_BLEND_RISCV_V_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "../lv_draw_sw_blend.h"
+
+#if LV_USE_DRAW_SW_ASM == LV_DRAW_SW_ASM_RISCV_V
+
+#include "lv_draw_sw_blend_riscv_v_to_rgb888.h"
+
+#endif /* LV_USE_DRAW_SW_ASM == LV_DRAW_SW_ASM_RISCV_V */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* LV_BLEND_RISCV_V_H */
diff --git a/src/draw/sw/blend/riscv_v/lv_blend_riscv_v_private.h b/src/draw/sw/blend/riscv_v/lv_blend_riscv_v_private.h
new file mode 100644
index 0000000000..357d4d4373
--- /dev/null
+++ b/src/draw/sw/blend/riscv_v/lv_blend_riscv_v_private.h
@@ -0,0 +1,396 @@
+/**
+ * @file lv_blend_riscv_v_private.h
+ * Common macros and utilities for RISC-V Vector Extension (RVV 1.0) blend operations
+ *
+ * This header provides reusable RVV macros for:
+ * - Segmented load/store operations (RGB888/XRGB8888/RGB565)
+ * - Alpha blending with scalar or vector alpha
+ * - Color format conversions (RGB565 <-> RGB888)
+ * - Effective alpha calculations (alpha, mask, opa combinations)
+ */
+
+#ifndef LV_BLEND_RISCV_V_PRIVATE_H
+#define LV_BLEND_RISCV_V_PRIVATE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*********************
+ *      INCLUDES
+ *********************/
+
+#include "../../../../lv_conf_internal.h"
+#if LV_USE_DRAW_SW_ASM == LV_DRAW_SW_ASM_RISCV_V
+
+/* Try to use real RVV, fall back to emulation if not available */
+#ifdef __riscv_v
+#include <riscv_vector.h>
+#else
+/* No real RVV available, use emulation */
+#include "lv_blend_riscv_vector_emulation.h"
+#endif
+
+/*********************
+ *      DEFINES
+ *********************/
+
+/**********************
+ *  RVV SEGMENTED LOAD/STORE MACROS
+ *
+ *  Emulate segmented load/store using stride operations.
+ *  Compatible with compilers that don't support RVV 1.0 tuple types.
+ **********************/
+
+/* RGB888: 3 channels (B,G,R) with stride=3 */
+#define LV_RVV_VLSEG3E8_U8M2(base, vl, v_b, v_g, v_r) \
+    do { \
+        (v_b) = __riscv_vlse8_v_u8m2((base) + 0, 3, (vl)); \
+        (v_g) = __riscv_vlse8_v_u8m2((base) + 1, 3, (vl)); \
+        (v_r) = __riscv_vlse8_v_u8m2((base) + 2, 3, (vl)); \
+    } while(0)
+
+#define LV_RVV_VSSEG3E8_U8M2(base, v_b, v_g, v_r, vl) \
+    do { \
+        __riscv_vsse8_v_u8m2((base) + 0, 3, (v_b), (vl)); \
+        __riscv_vsse8_v_u8m2((base) + 1, 3, (v_g), (vl)); \
+        __riscv_vsse8_v_u8m2((base) + 2, 3, (v_r), (vl)); \
+    } while(0)
+
+/* XRGB8888/ARGB8888: 4 channels (B,G,R,X) with stride=4 */
+#define LV_RVV_VLSEG4E8_U8M2(base, vl, v_b, v_g, v_r, v_x) \
+    do { \
+        (v_b) = __riscv_vlse8_v_u8m2((base) + 0, 4, (vl)); \
+        (v_g) = __riscv_vlse8_v_u8m2((base) + 1, 4, (vl)); \
+        (v_r) = __riscv_vlse8_v_u8m2((base) + 2, 4, (vl)); \
+        (v_x) = __riscv_vlse8_v_u8m2((base) + 3, 4, (vl)); \
+    } while(0)
+
+#define LV_RVV_VSSEG4E8_U8M2(base, v_b, v_g, v_r, v_x, vl) \
+    do { \
+        __riscv_vsse8_v_u8m2((base) + 0, 4, (v_b), (vl)); \
+        __riscv_vsse8_v_u8m2((base) + 1, 4, (v_g), (vl)); \
+        __riscv_vsse8_v_u8m2((base) + 2, 4, (v_r), (vl)); \
+        __riscv_vsse8_v_u8m2((base) + 3, 4, (v_x), (vl)); \
+    } while(0)
+
+/**********************
+ *  RGB565 <-> RGB888 CONVERSION MACROS
+ *
+ *  RGB565 format: RRRRRGGGGGGBBBBB (5-6-5 bits)
+ *  Conversion formulas:
+ *    R8 = (R5 * 2106) >> 8   (2106 ≈ 255/31 * 256)
+ *    G8 = (G6 * 1037) >> 8   (1037 ≈ 255/63 * 256)
+ *    B8 = (B5 * 2106) >> 8
+ **********************/
+
+/* Extract and convert RGB565 to separate R8, G8, B8 channels (16-bit width) */
+#define LV_RVV_RGB565_TO_RGB888_U16M2(v_rgb565, v_r8, v_g8, v_b8, vl) \
+    do { \
+        vuint16m2_t _r5 = __riscv_vand_vx_u16m2(__riscv_vsrl_vx_u16m2((v_rgb565), 11, (vl)), 0x1F, (vl)); \
+        vuint16m2_t _g6 = __riscv_vand_vx_u16m2(__riscv_vsrl_vx_u16m2((v_rgb565), 5, (vl)), 0x3F, (vl)); \
+        vuint16m2_t _b5 = __riscv_vand_vx_u16m2((v_rgb565), 0x1F, (vl)); \
+        (v_r8) = __riscv_vsrl_vx_u16m2(__riscv_vmul_vx_u16m2(_r5, 2106, (vl)), 8, (vl)); \
+        (v_g8) = __riscv_vsrl_vx_u16m2(__riscv_vmul_vx_u16m2(_g6, 1037, (vl)), 8, (vl)); \
+        (v_b8) = __riscv_vsrl_vx_u16m2(__riscv_vmul_vx_u16m2(_b5, 2106, (vl)), 8, (vl)); \
+    } while(0)
+
+/* Convert R8, G8, B8 to RGB565 (16-bit) */
+#define LV_RVV_RGB888_TO_RGB565_U16M2(v_r8, v_g8, v_b8, v_rgb565, vl) \
+    do { \
+        vuint16m2_t _r5 = __riscv_vsrl_vx_u16m2((v_r8), 3, (vl)); \
+        vuint16m2_t _g6 = __riscv_vsrl_vx_u16m2((v_g8), 2, (vl)); \
+        vuint16m2_t _b5 = __riscv_vsrl_vx_u16m2((v_b8), 3, (vl)); \
+        (v_rgb565) = __riscv_vor_vv_u16m2(__riscv_vsll_vx_u16m2(_r5, 11, (vl)), \
+                                          __riscv_vor_vv_u16m2(__riscv_vsll_vx_u16m2(_g6, 5, (vl)), _b5, (vl)), (vl)); \
+    } while(0)
+
+/**********************
+ *  ALPHA BLENDING MACROS
+ *
+ *  Standard blend formula: result = (src * alpha + dst * (255 - alpha)) >> 8
+ *
+ *  Using vwmaccu (widening multiply-accumulate unsigned):
+ *    tmp = dst * (255 - alpha)           // Initialize with dst contribution
+ *    tmp = tmp + src * alpha             // vwmaccu adds src contribution
+ *    result = tmp >> 8
+ *
+ *  This reduces operations by combining multiply and add.
+ **********************/
+
+/**
+ * Blend single channel using vwmaccu (8-bit src/dst -> 16-bit intermediate)
+ * LMUL relationship: m1 -> m2, m2 -> m4
+ */
+#define LV_RVV_BLEND_CHANNEL_U8M1_TO_U16M2(v_src, v_dst, alpha, v_result, vl) \
+    do { \
+        uint8_t _alpha_inv = 255 - (alpha); \
+        vuint16m2_t _tmp = __riscv_vwmulu_vx_u16m2((v_dst), _alpha_inv, (vl)); \
+        _tmp = __riscv_vwmaccu_vx_u16m2(_tmp, (alpha), (v_src), (vl)); \
+        (v_result) = __riscv_vnsrl_wx_u8m1(_tmp, 8, (vl)); \
+    } while(0)
+
+#define LV_RVV_BLEND_CHANNEL_U8M2_TO_U16M4(v_src, v_dst, alpha, v_result, vl) \
+    do { \
+        uint8_t _alpha_inv = 255 - (alpha); \
+        vuint16m4_t _tmp = __riscv_vwmulu_vx_u16m4((v_dst), _alpha_inv, (vl)); \
+        _tmp = __riscv_vwmaccu_vx_u16m4(_tmp, (alpha), (v_src), (vl)); \
+        (v_result) = __riscv_vnsrl_wx_u8m2(_tmp, 8, (vl)); \
+    } while(0)
+
+/**
+ * Blend single channel with vector alpha (per-pixel mask)
+ */
+#define LV_RVV_BLEND_CHANNEL_VMASK_U8M1_TO_U16M2(v_src, v_dst, v_alpha, v_result, vl) \
+    do { \
+        vuint8m1_t _v_alpha_inv = __riscv_vrsub_vx_u8m1((v_alpha), 255, (vl)); \
+        vuint16m2_t _tmp = __riscv_vwmulu_vv_u16m2((v_dst), _v_alpha_inv, (vl)); \
+        _tmp = __riscv_vwmaccu_vv_u16m2(_tmp, (v_alpha), (v_src), (vl)); \
+        (v_result) = __riscv_vnsrl_wx_u8m1(_tmp, 8, (vl)); \
+    } while(0)
+
+#define LV_RVV_BLEND_CHANNEL_VMASK_U8M2_TO_U16M4(v_src, v_dst, v_alpha, v_result, vl) \
+    do { \
+        vuint8m2_t _v_alpha_inv = __riscv_vrsub_vx_u8m2((v_alpha), 255, (vl)); \
+        vuint16m4_t _tmp = __riscv_vwmulu_vv_u16m4((v_dst), _v_alpha_inv, (vl)); \
+        _tmp = __riscv_vwmaccu_vv_u16m4(_tmp, (v_alpha), (v_src), (vl)); \
+        (v_result) = __riscv_vnsrl_wx_u8m2(_tmp, 8, (vl)); \
+    } while(0)
+
+/**
+ * Blend RGB channels with scalar alpha (all 3 channels at once)
+ * Uses m1->m2 widening
+ */
+#define LV_RVV_BLEND_RGB_U8M1(v_src_r, v_src_g, v_src_b, v_dst_r, v_dst_g, v_dst_b, \
+                              alpha, v_out_r, v_out_g, v_out_b, vl) \
+do { \
+    LV_RVV_BLEND_CHANNEL_U8M1_TO_U16M2((v_src_r), (v_dst_r), (alpha), (v_out_r), (vl)); \
+    LV_RVV_BLEND_CHANNEL_U8M1_TO_U16M2((v_src_g), (v_dst_g), (alpha), (v_out_g), (vl)); \
+    LV_RVV_BLEND_CHANNEL_U8M1_TO_U16M2((v_src_b), (v_dst_b), (alpha), (v_out_b), (vl)); \
+} while(0)
+
+/**
+ * Blend RGB channels with scalar alpha (all 3 channels at once)
+ * Uses m2->m4 widening
+ */
+#define LV_RVV_BLEND_RGB_U8M2(v_src_r, v_src_g, v_src_b, v_dst_r, v_dst_g, v_dst_b, \
+                              alpha, v_out_r, v_out_g, v_out_b, vl) \
+do { \
+    LV_RVV_BLEND_CHANNEL_U8M2_TO_U16M4((v_src_r), (v_dst_r), (alpha), (v_out_r), (vl)); \
+    LV_RVV_BLEND_CHANNEL_U8M2_TO_U16M4((v_src_g), (v_dst_g), (alpha), (v_out_g), (vl)); \
+    LV_RVV_BLEND_CHANNEL_U8M2_TO_U16M4((v_src_b), (v_dst_b), (alpha), (v_out_b), (vl)); \
+} while(0)
+
+/**
+ * Blend solid color (pre-multiplied) with destination RGB channels
+ * fg_color_opa: pre-computed (color * opa) for each channel
+ * opa_inv: 255 - opa
+ * Formula: result = (dst * opa_inv + fg_color_opa) >> 8
+ * Uses m2->m4 widening
+ */
+#define LV_RVV_BLEND_SOLID_RGB_U8M2(v_dst_r, v_dst_g, v_dst_b, \
+                                    fg_r_opa, fg_g_opa, fg_b_opa, opa_inv, \
+                                    v_out_r, v_out_g, v_out_b, vl) \
+do { \
+    vuint16m4_t _v_r16 = __riscv_vwmulu_vx_u16m4((v_dst_r), (opa_inv), (vl)); \
+    vuint16m4_t _v_g16 = __riscv_vwmulu_vx_u16m4((v_dst_g), (opa_inv), (vl)); \
+    vuint16m4_t _v_b16 = __riscv_vwmulu_vx_u16m4((v_dst_b), (opa_inv), (vl)); \
+    _v_r16 = __riscv_vadd_vx_u16m4(_v_r16, (fg_r_opa), (vl)); \
+    _v_g16 = __riscv_vadd_vx_u16m4(_v_g16, (fg_g_opa), (vl)); \
+    _v_b16 = __riscv_vadd_vx_u16m4(_v_b16, (fg_b_opa), (vl)); \
+    (v_out_r) = __riscv_vnsrl_wx_u8m2(_v_r16, 8, (vl)); \
+    (v_out_g) = __riscv_vnsrl_wx_u8m2(_v_g16, 8, (vl)); \
+    (v_out_b) = __riscv_vnsrl_wx_u8m2(_v_b16, 8, (vl)); \
+} while(0)
+
+/**
+ * Blend solid color (scalar) with destination RGB channels using vector alpha mask
+ * fg_r/g/b: foreground color scalar values
+ * v_alpha: per-pixel alpha values (vuint8m2_t)
+ * Formula: result = (fg * alpha + dst * (255 - alpha)) >> 8
+ * Uses m2->m4 widening with vwmaccu for efficiency
+ */
+#define LV_RVV_BLEND_SOLID_RGB_VMASK_U8M2(v_dst_r, v_dst_g, v_dst_b, \
+                                          fg_r, fg_g, fg_b, v_alpha, \
+                                          v_out_r, v_out_g, v_out_b, vl) \
+do { \
+    vuint8m2_t _v_alpha_inv = __riscv_vrsub_vx_u8m2((v_alpha), 255, (vl)); \
+    vuint16m4_t _v_r16 = __riscv_vwmulu_vv_u16m4((v_dst_r), _v_alpha_inv, (vl)); \
+    vuint16m4_t _v_g16 = __riscv_vwmulu_vv_u16m4((v_dst_g), _v_alpha_inv, (vl)); \
+    vuint16m4_t _v_b16 = __riscv_vwmulu_vv_u16m4((v_dst_b), _v_alpha_inv, (vl)); \
+    _v_r16 = __riscv_vwmaccu_vx_u16m4(_v_r16, (fg_r), (v_alpha), (vl)); \
+    _v_g16 = __riscv_vwmaccu_vx_u16m4(_v_g16, (fg_g), (v_alpha), (vl)); \
+    _v_b16 = __riscv_vwmaccu_vx_u16m4(_v_b16, (fg_b), (v_alpha), (vl)); \
+    (v_out_r) = __riscv_vnsrl_wx_u8m2(_v_r16, 8, (vl)); \
+    (v_out_g) = __riscv_vnsrl_wx_u8m2(_v_g16, 8, (vl)); \
+    (v_out_b) = __riscv_vnsrl_wx_u8m2(_v_b16, 8, (vl)); \
+} while(0)
+
+/**
+ * Blend RGB channels with vector alpha (per-pixel mask)
+ */
+#define LV_RVV_BLEND_RGB_VMASK_U8M1(v_src_r, v_src_g, v_src_b, v_dst_r, v_dst_g, v_dst_b, \
+                                    v_alpha, v_out_r, v_out_g, v_out_b, vl) \
+do { \
+    LV_RVV_BLEND_CHANNEL_VMASK_U8M1_TO_U16M2((v_src_r), (v_dst_r), (v_alpha), (v_out_r), (vl)); \
+    LV_RVV_BLEND_CHANNEL_VMASK_U8M1_TO_U16M2((v_src_g), (v_dst_g), (v_alpha), (v_out_g), (vl)); \
+    LV_RVV_BLEND_CHANNEL_VMASK_U8M1_TO_U16M2((v_src_b), (v_dst_b), (v_alpha), (v_out_b), (vl)); \
+} while(0)
+
+/**
+ * Optimize blend results for zero and full mask cases (u8m1)
+ * When mask is 0, use destination; when mask is >= 255, use source
+ */
+#define LV_RVV_BLEND_OPTIMIZE_MASK_U8M1(v_r, v_g, v_b, v_src_r, v_src_g, v_src_b, \
+                                        v_dst_r, v_dst_g, v_dst_b, v_mask, vl) \
+do { \
+    vbool8_t _zero_mask = __riscv_vmseq_vx_u8m1_b8((v_mask), 0, (vl)); \
+    vbool8_t _full_mask = __riscv_vmsgeu_vx_u8m1_b8((v_mask), LV_OPA_MAX, (vl)); \
+    (v_b) = __riscv_vmerge_vvm_u8m1((v_b), (v_dst_b), _zero_mask, (vl)); \
+    (v_g) = __riscv_vmerge_vvm_u8m1((v_g), (v_dst_g), _zero_mask, (vl)); \
+    (v_r) = __riscv_vmerge_vvm_u8m1((v_r), (v_dst_r), _zero_mask, (vl)); \
+    (v_b) = __riscv_vmerge_vvm_u8m1((v_b), (v_src_b), _full_mask, (vl)); \
+    (v_g) = __riscv_vmerge_vvm_u8m1((v_g), (v_src_g), _full_mask, (vl)); \
+    (v_r) = __riscv_vmerge_vvm_u8m1((v_r), (v_src_r), _full_mask, (vl)); \
+} while(0)
+
+/**
+ * Optimize blend results for zero and full mask cases (u8m2) with scalar source
+ * When mask is 0, use destination; when mask is >= 255, use scalar source
+ */
+#define LV_RVV_BLEND_OPTIMIZE_MASK_SCALAR_U8M2(v_r, v_g, v_b, src_r, src_g, src_b, \
+                                               v_dst_r, v_dst_g, v_dst_b, v_mask, vl) \
+do { \
+    vbool4_t _zero_mask = __riscv_vmseq_vx_u8m2_b4((v_mask), 0, (vl)); \
+    vbool4_t _full_mask = __riscv_vmsgeu_vx_u8m2_b4((v_mask), LV_OPA_MAX, (vl)); \
+    (v_b) = __riscv_vmerge_vvm_u8m2((v_b), (v_dst_b), _zero_mask, (vl)); \
+    (v_g) = __riscv_vmerge_vvm_u8m2((v_g), (v_dst_g), _zero_mask, (vl)); \
+    (v_r) = __riscv_vmerge_vvm_u8m2((v_r), (v_dst_r), _zero_mask, (vl)); \
+    (v_b) = __riscv_vmerge_vxm_u8m2((v_b), (src_b), _full_mask, (vl)); \
+    (v_g) = __riscv_vmerge_vxm_u8m2((v_g), (src_g), _full_mask, (vl)); \
+    (v_r) = __riscv_vmerge_vxm_u8m2((v_r), (src_r), _full_mask, (vl)); \
+} while(0)
+
+/**********************
+ *  EFFECTIVE ALPHA CALCULATION MACROS
+ *
+ *  These macros compute the effective alpha from combinations of:
+ *    - v_alpha: source alpha channel (per-pixel)
+ *    - mask: mask value (per-pixel)
+ *    - opa: global opacity (scalar)
+ *
+ *  Formula: eff_alpha = (alpha * mask * opa) >> 16
+ *  Intermediate calculations use 16-bit to prevent overflow.
+ **********************/
+
+/**
+ * Calculate effective alpha from source alpha and global opa
+ */
+#define LV_RVV_CALC_EFF_ALPHA_OPA_U8M1(v_src_a, opa, v_eff_a, vl) \
+    do { \
+        vuint16m2_t _v_alpha16 = __riscv_vzext_vf2_u16m2((v_src_a), (vl)); \
+        vuint16m2_t _v_eff16 = __riscv_vsrl_vx_u16m2(__riscv_vmul_vx_u16m2(_v_alpha16, (opa), (vl)), 8, (vl)); \
+        (v_eff_a) = __riscv_vnsrl_wx_u8m1(_v_eff16, 0, (vl)); \
+    } while(0)
+
+/**
+ * Calculate effective alpha from source alpha and mask
+ */
+#define LV_RVV_CALC_EFF_ALPHA_MASK_U8M1(v_src_a, v_mask, v_eff_a, vl) \
+    do { \
+        vuint16m2_t _v_alpha16 = __riscv_vzext_vf2_u16m2((v_src_a), (vl)); \
+        vuint16m2_t _v_mask16 = __riscv_vzext_vf2_u16m2((v_mask), (vl)); \
+        vuint16m2_t _v_eff16 = __riscv_vsrl_vx_u16m2(__riscv_vmul_vv_u16m2(_v_alpha16, _v_mask16, (vl)), 8, (vl)); \
+        (v_eff_a) = __riscv_vnsrl_wx_u8m1(_v_eff16, 0, (vl)); \
+    } while(0)
+
+/**
+ * Calculate effective alpha from source alpha, mask, and global opa
+ * Formula: eff_alpha = (alpha * mask * opa) >> 16
+ * Widen to u32m4 to avoid precision loss from double shift
+ */
+#define LV_RVV_CALC_EFF_ALPHA_MASK_OPA_U8M1(v_src_a, v_mask, opa, v_eff_a, vl) \
+    do { \
+        vuint16m2_t _v_alpha16 = __riscv_vzext_vf2_u16m2((v_src_a), (vl)); \
+        vuint16m2_t _v_mask16 = __riscv_vzext_vf2_u16m2((v_mask), (vl)); \
+        vuint16m2_t _v_prod16 = __riscv_vmul_vv_u16m2(_v_alpha16, _v_mask16, (vl)); \
+        vuint32m4_t _v_prod32 = __riscv_vwmulu_vx_u32m4(_v_prod16, (opa), (vl)); \
+        vuint16m2_t _v_eff16 = __riscv_vnsrl_wx_u16m2(_v_prod32, 16, (vl)); \
+        (v_eff_a) = __riscv_vnsrl_wx_u8m1(_v_eff16, 0, (vl)); \
+    } while(0)
+
+/**********************
+ *  RGB/ARGB CHANNEL LOAD/STORE MACROS (for ARGB8888/RGB888/XRGB8888)
+ *
+ *  Load/Store RGB channels to/from memory in different formats using m1 LMUL.
+ **********************/
+
+#define LV_RVV_LOAD_ARGB8888_U8M1(ptr, x, v_b, v_g, v_r, v_a, vl) \
+    do { \
+        (v_b) = __riscv_vlse8_v_u8m1((ptr) + (x) * 4 + 0, 4, (vl)); \
+        (v_g) = __riscv_vlse8_v_u8m1((ptr) + (x) * 4 + 1, 4, (vl)); \
+        (v_r) = __riscv_vlse8_v_u8m1((ptr) + (x) * 4 + 2, 4, (vl)); \
+        (v_a) = __riscv_vlse8_v_u8m1((ptr) + (x) * 4 + 3, 4, (vl)); \
+    } while(0)
+
+#define LV_RVV_LOAD_RGB888_U8M1(ptr, x, v_b, v_g, v_r, vl) \
+    do { \
+        (v_b) = __riscv_vlse8_v_u8m1((ptr) + (x) * 3 + 0, 3, (vl)); \
+        (v_g) = __riscv_vlse8_v_u8m1((ptr) + (x) * 3 + 1, 3, (vl)); \
+        (v_r) = __riscv_vlse8_v_u8m1((ptr) + (x) * 3 + 2, 3, (vl)); \
+    } while(0)
+
+#define LV_RVV_LOAD_XRGB8888_U8M1(ptr, x, v_b, v_g, v_r, vl) \
+    do { \
+        (v_b) = __riscv_vlse8_v_u8m1((ptr) + (x) * 4 + 0, 4, (vl)); \
+        (v_g) = __riscv_vlse8_v_u8m1((ptr) + (x) * 4 + 1, 4, (vl)); \
+        (v_r) = __riscv_vlse8_v_u8m1((ptr) + (x) * 4 + 2, 4, (vl)); \
+    } while(0)
+
+#define LV_RVV_STORE_RGB888_U8M1(ptr, x, v_b, v_g, v_r, vl) \
+    do { \
+        __riscv_vsse8_v_u8m1((ptr) + (x) * 3 + 0, 3, (v_b), (vl)); \
+        __riscv_vsse8_v_u8m1((ptr) + (x) * 3 + 1, 3, (v_g), (vl)); \
+        __riscv_vsse8_v_u8m1((ptr) + (x) * 3 + 2, 3, (v_r), (vl)); \
+    } while(0)
+
+#define LV_RVV_STORE_XRGB8888_U8M1(ptr, x, v_b, v_g, v_r, v_a, vl) \
+    do { \
+        __riscv_vsse8_v_u8m1((ptr) + (x) * 4 + 0, 4, (v_b), (vl)); \
+        __riscv_vsse8_v_u8m1((ptr) + (x) * 4 + 1, 4, (v_g), (vl)); \
+        __riscv_vsse8_v_u8m1((ptr) + (x) * 4 + 2, 4, (v_r), (vl)); \
+        __riscv_vsse8_v_u8m1((ptr) + (x) * 4 + 3, 4, (v_a), (vl)); \
+    } while(0)
+
+/**********************
+ *      MACROS
+ **********************/
+
+/**********************
+ *      TYPEDEFS
+ **********************/
+
+/**********************
+ *  STATIC PROTOTYPES
+ **********************/
+
+static inline void * LV_ATTRIBUTE_FAST_MEM drawbuf_next_row(const void * buf, uint32_t stride)
+{
+    return (void *)((uint8_t *)buf + stride);
+}
+
+/**********************
+ * GLOBAL PROTOTYPES
+ **********************/
+
+/**********************
+ *      MACROS
+ **********************/
+
+#endif
+
+#ifdef __cplusplus
+} /*extern "C"*/
+#endif
+
+#endif /*LV_BLEND_RISCV_V_PRIVATE_H*/
diff --git a/src/draw/sw/blend/riscv_v/lv_blend_riscv_vector_emulation.h b/src/draw/sw/blend/riscv_v/lv_blend_riscv_vector_emulation.h
new file mode 100644
index 0000000000..3ed621f1ef
--- /dev/null
+++ b/src/draw/sw/blend/riscv_v/lv_blend_riscv_vector_emulation.h
@@ -0,0 +1,921 @@
+/**
+ * @file lv_blend_riscv_vector_emulation.h
+ * Software emulation of RISC-V Vector Extension (RVV 1.0) intrinsics
+ *
+ * This header provides pure C implementations of RVV intrinsics to enable
+ * testing and verification on non-RVV platforms. The implementations follow
+ * the RVV specification as documented in:
+ * https://dzaima.github.io/intrinsics-viewer/
+ *
+ * Usage:
+ * 1. On systems without RVV support, include this header BEFORE <riscv_vector.h>
+ * 2. Or define RISCV_VECTOR_EMULATION before including actual <riscv_vector.h>
+ * 3. All __riscv_* functions will be emulated in software
+ *
+ * Limitations:
+ * - No performance optimization (this is software emulation)
+ * - Vector length (vl) is tracked but all operations work on single elements in a loop
+ * - Predication and masking are simplified but functionally correct
+ * - LMUL < 1 (fractional multipliers) are not supported
+ */
+
+#ifndef LV_BLEND_RISCV_VECTOR_EMULATION_H
+#define LV_BLEND_RISCV_VECTOR_EMULATION_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "../../../../lv_conf_internal.h"
+#if LV_USE_DRAW_SW_ASM == LV_DRAW_SW_ASM_RISCV_V
+
+#include <stdint.h>
+#include <string.h>
+#include <stddef.h>
+
+/* ============================================================================
+ * Vector Type Definitions
+ * ============================================================================
+ *
+ * For emulation, we use structs that hold data and the current vector length.
+ * Real RVV is much more sophisticated, but this allows us to verify logic.
+ *
+ * Assumption: VLEN = 128 bits (a common RVV configuration)
+ * - e8m1:  128 bits / 8 bits  = 16 elements
+ * - e8m2:  256 bits / 8 bits  = 32 elements
+ * - e8m4:  512 bits / 8 bits  = 64 elements
+ * - e16m1: 128 bits / 16 bits = 8 elements
+ * - e16m2: 256 bits / 16 bits = 16 elements
+ * - e16m4: 512 bits / 16 bits = 32 elements
+ * - e32m1: 128 bits / 32 bits = 4 elements
+ * - e32m2: 256 bits / 32 bits = 8 elements
+ * - e32m4: 512 bits / 32 bits = 16 elements
+ */
+
+/* LMUL = 1 (1 vector register, VLEN=128 bits) */
+typedef struct {
+    uint8_t data[16];   /* 128 bits / 8 bits per element = 16 elements */
+    size_t vl;          /* Current vector length */
+} vuint8m1_t;
+
+typedef struct {
+    uint16_t data[8];   /* 128 bits / 16 bits per element = 8 elements */
+    size_t vl;
+} vuint16m1_t;
+
+typedef struct {
+    uint32_t data[4];   /* 128 bits / 32 bits per element = 4 elements */
+    size_t vl;
+} vuint32m1_t;
+
+typedef struct {
+    uint64_t data[2];   /* 128 bits / 64 bits per element = 2 elements */
+    size_t vl;
+} vuint64m1_t;
+
+/* LMUL = 2 (2 vector registers, total 256 bits) */
+typedef struct {
+    uint8_t data[32];   /* 256 bits / 8 bits per element = 32 elements */
+    size_t vl;
+} vuint8m2_t;
+
+typedef struct {
+    uint16_t data[16];  /* 256 bits / 16 bits per element = 16 elements */
+    size_t vl;
+} vuint16m2_t;
+
+typedef struct {
+    uint32_t data[8];   /* 256 bits / 32 bits per element = 8 elements */
+    size_t vl;
+} vuint32m2_t;
+
+typedef struct {
+    uint64_t data[4];   /* 256 bits / 64 bits per element = 4 elements */
+    size_t vl;
+} vuint64m2_t;
+
+/* LMUL = 4 (4 vector registers, total 512 bits) */
+typedef struct {
+    uint32_t data[16];  /* 512 bits / 32 bits per element = 16 elements */
+    size_t vl;
+} vuint32m4_t;
+
+typedef struct {
+    uint16_t data[32];  /* 512 bits / 16 bits per element = 32 elements */
+    size_t vl;
+} vuint16m4_t;
+
+/* LMUL = 8 (8 vector registers, total 1024 bits) */
+typedef struct {
+    uint8_t data[128];  /* 1024 bits / 8 bits per element = 128 elements */
+    size_t vl;
+} vuint8m8_t;
+
+/* Boolean/mask types (vbool4 means SEW/LMUL=4, for e8m2 -> 8/2=4) */
+typedef struct {
+    uint8_t data[32];   /* Same size as the vector it masks (e8m2 = 32 elements) */
+    size_t vl;
+} vbool4_t;
+
+typedef struct {
+    uint8_t data[16];   /* Mask for e8m1 (16 elements) */
+    size_t vl;
+} vbool8_t;
+
+/* ============================================================================
+ * Vector Length Management
+ * ============================================================================
+ *
+ * Operations:
+ * - __riscv_vsetvl_* : Set vector length for given element type and LMUL
+ * - __riscv_vsetvlmax_* : Get maximum vector length
+ */
+
+/**
+ * Get maximum vector length for given element type and LMUL
+ * Based on VLEN=128 bits
+ */
+static inline size_t __riscv_vsetvlmax_e8m1(void)
+{
+    return 16;    /* 128/8 = 16 */
+}
+static inline size_t __riscv_vsetvlmax_e8m2(void)
+{
+    return 32;    /* 256/8 = 32 */
+}
+static inline size_t __riscv_vsetvlmax_e8m4(void)
+{
+    return 64;    /* 512/8 = 64 */
+}
+
+static inline size_t __riscv_vsetvlmax_e16m1(void)
+{
+    return 8;    /* 128/16 = 8 */
+}
+static inline size_t __riscv_vsetvlmax_e16m2(void)
+{
+    return 16;    /* 256/16 = 16 */
+}
+static inline size_t __riscv_vsetvlmax_e16m4(void)
+{
+    return 32;    /* 512/16 = 32 */
+}
+
+static inline size_t __riscv_vsetvlmax_e32m1(void)
+{
+    return 4;    /* 128/32 = 4 */
+}
+static inline size_t __riscv_vsetvlmax_e32m2(void)
+{
+    return 8;    /* 256/32 = 8 */
+}
+static inline size_t __riscv_vsetvlmax_e32m4(void)
+{
+    return 16;    /* 512/32 = 16 */
+}
+
+static inline size_t __riscv_vsetvlmax_e8m8(void)
+{
+    return 128;    /* 1024/8 = 128 */
+}
+
+static inline size_t __riscv_vsetvlmax_e64m1(void)
+{
+    return 2;    /* 128/64 = 2 */
+}
+static inline size_t __riscv_vsetvlmax_e64m2(void)
+{
+    return 4;    /* 256/64 = 4 */
+}
+static inline size_t __riscv_vsetvlmax_e64m4(void)
+{
+    return 8;    /* 512/64 = 8 */
+}
+
+/**
+ * Set vector length to requested value (or max if requested > max)
+ */
+static inline size_t __riscv_vsetvl_e8m1(size_t avl)
+{
+    return avl > 16 ? 16 : avl;
+}
+
+static inline size_t __riscv_vsetvl_e8m2(size_t avl)
+{
+    return avl > 32 ? 32 : avl;
+}
+
+static inline size_t __riscv_vsetvl_e8m4(size_t avl)
+{
+    return avl > 64 ? 64 : avl;
+}
+
+static inline size_t __riscv_vsetvl_e16m1(size_t avl)
+{
+    return avl > 8 ? 8 : avl;
+}
+
+static inline size_t __riscv_vsetvl_e16m2(size_t avl)
+{
+    return avl > 16 ? 16 : avl;
+}
+
+static inline size_t __riscv_vsetvl_e16m4(size_t avl)
+{
+    return avl > 32 ? 32 : avl;
+}
+
+static inline size_t __riscv_vsetvl_e32m1(size_t avl)
+{
+    return avl > 4 ? 4 : avl;
+}
+
+static inline size_t __riscv_vsetvl_e32m2(size_t avl)
+{
+    return avl > 8 ? 8 : avl;
+}
+
+static inline size_t __riscv_vsetvl_e32m4(size_t avl)
+{
+    return avl > 16 ? 16 : avl;
+}
+
+static inline size_t __riscv_vsetvl_e8m8(size_t avl)
+{
+    return avl > 128 ? 128 : avl;
+}
+
+/* ============================================================================
+ * Vector Initialize Operations (vmv.v.x - broadcast)
+ * ============================================================================
+ */
+
+/**
+ * vmv.v.x: Broadcast scalar to all vector elements
+ */
+static inline vuint8m1_t __riscv_vmv_v_x_u8m1(uint8_t src, size_t vl)
+{
+    vuint8m1_t res;
+    res.vl = vl > 16 ? 16 : vl;
+    for(size_t i = 0; i < res.vl; i++) {
+        res.data[i] = src;
+    }
+    return res;
+}
+
+static inline vuint8m2_t __riscv_vmv_v_x_u8m2(uint8_t src, size_t vl)
+{
+    vuint8m2_t res;
+    res.vl = vl > 32 ? 32 : vl;
+    for(size_t i = 0; i < res.vl; i++) {
+        res.data[i] = src;
+    }
+    return res;
+}
+
+static inline vuint16m2_t __riscv_vmv_v_x_u16m2(uint16_t src, size_t vl)
+{
+    vuint16m2_t res;
+    res.vl = vl > 16 ? 16 : vl;
+    for(size_t i = 0; i < res.vl; i++) {
+        res.data[i] = src;
+    }
+    return res;
+}
+
+static inline vuint16m4_t __riscv_vmv_v_x_u16m4(uint16_t src, size_t vl)
+{
+    vuint16m4_t res;
+    res.vl = vl > 32 ? 32 : vl;
+    for(size_t i = 0; i < res.vl; i++) {
+        res.data[i] = src;
+    }
+    return res;
+}
+
+static inline vuint32m4_t __riscv_vmv_v_x_u32m4(uint32_t src, size_t vl)
+{
+    vuint32m4_t res;
+    res.vl = vl > 16 ? 16 : vl;
+    for(size_t i = 0; i < res.vl; i++) {
+        res.data[i] = src;
+    }
+    return res;
+}
+
+/* ============================================================================
+ * Vector Load Operations (vle, vlse)
+ * ============================================================================
+ */
+
+/**
+ * vle8: Load vector of 8-bit elements with unit stride
+ */
+static inline vuint8m1_t __riscv_vle8_v_u8m1(const uint8_t * base, size_t vl)
+{
+    vuint8m1_t res;
+    res.vl = vl > 16 ? 16 : vl;
+    for(size_t i = 0; i < res.vl; i++) {
+        res.data[i] = base[i];
+    }
+    return res;
+}
+
+static inline vuint8m2_t __riscv_vle8_v_u8m2(const uint8_t * base, size_t vl)
+{
+    vuint8m2_t res;
+    res.vl = vl > 32 ? 32 : vl;
+    for(size_t i = 0; i < res.vl; i++) {
+        res.data[i] = base[i];
+    }
+    return res;
+}
+
+static inline vuint8m8_t __riscv_vle8_v_u8m8(const uint8_t * base, size_t vl)
+{
+    vuint8m8_t res;
+    res.vl = vl > 128 ? 128 : vl;
+    for(size_t i = 0; i < res.vl; i++) {
+        res.data[i] = base[i];
+    }
+    return res;
+}
+
+/**
+ * vlse8: Load vector with stride
+ * Load from address base + i * stride for each element i
+ */
+static inline vuint8m2_t __riscv_vlse8_v_u8m2(const uint8_t * base, ptrdiff_t stride, size_t vl)
+{
+    vuint8m2_t res;
+    res.vl = vl > 32 ? 32 : vl;
+    for(size_t i = 0; i < res.vl; i++) {
+        res.data[i] = *(const uint8_t *)((const char *)base + i * stride);
+    }
+    return res;
+}
+
+static inline vuint8m1_t __riscv_vlse8_v_u8m1(const uint8_t * base, ptrdiff_t stride, size_t vl)
+{
+    vuint8m1_t res;
+    res.vl = vl > 16 ? 16 : vl;
+    for(size_t i = 0; i < res.vl; i++) {
+        res.data[i] = *(const uint8_t *)((const char *)base + i * stride);
+    }
+    return res;
+}
+
+/**
+ * vle16: Load 16-bit vector
+ */
+static inline vuint16m2_t __riscv_vle16_v_u16m2(const uint16_t * base, size_t vl)
+{
+    vuint16m2_t res;
+    res.vl = vl > 16 ? 16 : vl;
+    for(size_t i = 0; i < res.vl; i++) {
+        res.data[i] = base[i];
+    }
+    return res;
+}
+
+/**
+ * vlse16: Load 16-bit vector with stride
+ */
+static inline vuint16m2_t __riscv_vlse16_v_u16m2(const uint16_t * base, ptrdiff_t stride, size_t vl)
+{
+    vuint16m2_t res;
+    res.vl = vl > 16 ? 16 : vl;
+    for(size_t i = 0; i < res.vl; i++) {
+        res.data[i] = *(const uint16_t *)((const char *)base + i * stride);
+    }
+    return res;
+}
+
+/* ============================================================================
+ * Vector Store Operations (vse, vsse)
+ * ============================================================================
+ */
+
+/**
+ * vse8: Store vector of 8-bit elements with unit stride
+ */
+static inline void __riscv_vse8_v_u8m1(uint8_t * base, vuint8m1_t v, size_t vl)
+{
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        base[i] = v.data[i];
+    }
+}
+
+static inline void __riscv_vse8_v_u8m2(uint8_t * base, vuint8m2_t v, size_t vl)
+{
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        base[i] = v.data[i];
+    }
+}
+
+static inline void __riscv_vse8_v_u8m8(uint8_t * base, vuint8m8_t v, size_t vl)
+{
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        base[i] = v.data[i];
+    }
+}
+
+/**
+ * vsse8: Store vector with stride
+ * Store to address base + i * stride for each element i
+ */
+static inline void __riscv_vsse8_v_u8m2(uint8_t * base, ptrdiff_t stride, vuint8m2_t v, size_t vl)
+{
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        *(uint8_t *)((char *)base + i * stride) = v.data[i];
+    }
+}
+
+static inline void __riscv_vsse8_v_u8m1(uint8_t * base, ptrdiff_t stride, vuint8m1_t v, size_t vl)
+{
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        *(uint8_t *)((char *)base + i * stride) = v.data[i];
+    }
+}
+
+/**
+ * vse16: Store 16-bit vector
+ */
+static inline void __riscv_vse16_v_u16m2(uint16_t * base, vuint16m2_t v, size_t vl)
+{
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        base[i] = v.data[i];
+    }
+}
+
+static inline void __riscv_vse16_v_u16m4(uint16_t * base, vuint16m4_t v, size_t vl)
+{
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        base[i] = v.data[i];
+    }
+}
+
+/**
+ * vsse16: Store 16-bit vector with stride
+ */
+static inline void __riscv_vsse16_v_u16m2(uint16_t * base, ptrdiff_t stride, vuint16m2_t v, size_t vl)
+{
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        *(uint16_t *)((char *)base + i * stride) = v.data[i];
+    }
+}
+
+/**
+ * vse32: Store 32-bit vector
+ */
+static inline void __riscv_vse32_v_u32m4(uint32_t * base, vuint32m4_t v, size_t vl)
+{
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        base[i] = v.data[i];
+    }
+}
+
+/* ============================================================================
+ * Vector Arithmetic Operations
+ * ============================================================================
+ */
+
+/**
+ * vmul: Vector multiply (scalar * vector)
+ */
+static inline vuint16m2_t __riscv_vmul_vx_u16m2(vuint16m2_t v, uint16_t x, size_t vl)
+{
+    vuint16m2_t res;
+    res.vl = v.vl;
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        res.data[i] = v.data[i] * x;
+    }
+    return res;
+}
+
+static inline vuint16m2_t __riscv_vmul_vv_u16m2(vuint16m2_t v1, vuint16m2_t v2, size_t vl)
+{
+    vuint16m2_t res;
+    res.vl = v1.vl;
+    for(size_t i = 0; i < v1.vl && i < vl; i++) {
+        res.data[i] = v1.data[i] * v2.data[i];
+    }
+    return res;
+}
+
+/**
+ * vwmulu: Vector widening multiply unsigned (scalar * vector, 8-bit -> 16-bit)
+ */
+static inline vuint16m4_t __riscv_vwmulu_vx_u16m4(vuint8m2_t v, uint8_t x, size_t vl)
+{
+    vuint16m4_t res;
+    res.vl = v.vl;
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        res.data[i] = (uint16_t)v.data[i] * (uint16_t)x;
+    }
+    return res;
+}
+
+static inline vuint16m2_t __riscv_vwmulu_vx_u16m2(vuint8m1_t v, uint8_t x, size_t vl)
+{
+    vuint16m2_t res;
+    res.vl = v.vl;
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        res.data[i] = (uint16_t)v.data[i] * (uint16_t)x;
+    }
+    return res;
+}
+
+static inline vuint16m2_t __riscv_vwmulu_vv_u16m2(vuint8m1_t v1, vuint8m1_t v2, size_t vl)
+{
+    vuint16m2_t res;
+    res.vl = v1.vl;
+    for(size_t i = 0; i < v1.vl && i < vl; i++) {
+        res.data[i] = (uint16_t)v1.data[i] * (uint16_t)v2.data[i];
+    }
+    return res;
+}
+
+static inline vuint16m4_t __riscv_vwmulu_vv_u16m4(vuint8m2_t v1, vuint8m2_t v2, size_t vl)
+{
+    vuint16m4_t res;
+    res.vl = v1.vl;
+    for(size_t i = 0; i < v1.vl && i < vl; i++) {
+        res.data[i] = (uint16_t)v1.data[i] * (uint16_t)v2.data[i];
+    }
+    return res;
+}
+
+static inline vuint16m2_t __riscv_vwmaccu_vx_u16m2(vuint16m2_t acc, uint8_t x, vuint8m1_t v, size_t vl)
+{
+    vuint16m2_t res = acc;
+    for(size_t i = 0; i < acc.vl && i < vl; i++) {
+        res.data[i] += (uint16_t)x * (uint16_t)v.data[i];
+    }
+    return res;
+}
+
+static inline vuint16m4_t __riscv_vwmaccu_vx_u16m4(vuint16m4_t acc, uint8_t x, vuint8m2_t v, size_t vl)
+{
+    vuint16m4_t res = acc;
+    for(size_t i = 0; i < acc.vl && i < vl; i++) {
+        res.data[i] += (uint16_t)x * (uint16_t)v.data[i];
+    }
+    return res;
+}
+
+static inline vuint16m2_t __riscv_vwmaccu_vv_u16m2(vuint16m2_t acc, vuint8m1_t v1, vuint8m1_t v2, size_t vl)
+{
+    vuint16m2_t res = acc;
+    for(size_t i = 0; i < acc.vl && i < vl; i++) {
+        res.data[i] += (uint16_t)v1.data[i] * (uint16_t)v2.data[i];
+    }
+    return res;
+}
+
+static inline vuint16m4_t __riscv_vwmaccu_vv_u16m4(vuint16m4_t acc, vuint8m2_t v1, vuint8m2_t v2, size_t vl)
+{
+    vuint16m4_t res = acc;
+    for(size_t i = 0; i < acc.vl && i < vl; i++) {
+        res.data[i] += (uint16_t)v1.data[i] * (uint16_t)v2.data[i];
+    }
+    return res;
+}
+
+/* ============================================================================
+ * Vector Shift Operations
+ * ============================================================================
+ */
+
+/**
+ * vsrl: Vector shift right logical (scalar shift amount)
+ */
+static inline vuint16m2_t __riscv_vsrl_vx_u16m2(vuint16m2_t v, uint32_t x, size_t vl)
+{
+    vuint16m2_t res;
+    res.vl = v.vl;
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        res.data[i] = v.data[i] >> x;
+    }
+    return res;
+}
+
+static inline vuint16m4_t __riscv_vsrl_vx_u16m4(vuint16m4_t v, uint32_t x, size_t vl)
+{
+    vuint16m4_t res;
+    res.vl = v.vl;
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        res.data[i] = v.data[i] >> x;
+    }
+    return res;
+}
+
+static inline vuint8m1_t __riscv_vnsrl_wx_u8m1(vuint16m2_t v, uint32_t x, size_t vl)
+{
+    vuint8m1_t res;
+    res.vl = v.vl;
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        res.data[i] = (uint8_t)(v.data[i] >> x);
+    }
+    return res;
+}
+
+/**
+ * vnsrl: Vector narrow shift right logical
+ * Narrow from 16-bit to 8-bit with shift
+ */
+static inline vuint8m2_t __riscv_vnsrl_wx_u8m2(vuint16m4_t v, uint32_t x, size_t vl)
+{
+    vuint8m2_t res;
+    res.vl = v.vl;
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        res.data[i] = (uint8_t)(v.data[i] >> x);
+    }
+    return res;
+}
+
+/* ============================================================================
+ * Vector Bitwise Operations
+ * ============================================================================
+ */
+
+/**
+ * vand: Vector bitwise AND (scalar)
+ */
+static inline vuint16m2_t __riscv_vand_vx_u16m2(vuint16m2_t v, uint16_t x, size_t vl)
+{
+    vuint16m2_t res;
+    res.vl = v.vl;
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        res.data[i] = v.data[i] & x;
+    }
+    return res;
+}
+
+/**
+ * vor: Vector bitwise OR (vector)
+ */
+static inline vuint16m2_t __riscv_vor_vv_u16m2(vuint16m2_t v1, vuint16m2_t v2, size_t vl)
+{
+    vuint16m2_t res;
+    res.vl = v1.vl;
+    for(size_t i = 0; i < v1.vl && i < vl; i++) {
+        res.data[i] = v1.data[i] | v2.data[i];
+    }
+    return res;
+}
+
+/* ============================================================================
+ * Vector Shift Left Operations
+ * ============================================================================
+ */
+
+/**
+ * vsll: Vector shift left logical (scalar shift amount)
+ */
+static inline vuint16m2_t __riscv_vsll_vx_u16m2(vuint16m2_t v, uint32_t x, size_t vl)
+{
+    vuint16m2_t res;
+    res.vl = v.vl;
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        res.data[i] = v.data[i] << x;
+    }
+    return res;
+}
+
+/* ============================================================================
+ * Vector Comparison Operations
+ * ============================================================================
+ */
+
+static inline vbool8_t __riscv_vmseq_vx_u8m1_b8(vuint8m1_t v, uint8_t x, size_t vl)
+{
+    vbool8_t res;
+    res.vl = v.vl;
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        res.data[i] = (v.data[i] == x) ? 1 : 0;
+    }
+    return res;
+}
+
+/**
+ * vmseq: Vector equal comparison (scalar)
+ * Returns boolean mask (1 if equal, 0 if not)
+ */
+static inline vbool4_t __riscv_vmseq_vx_u8m2_b4(vuint8m2_t v, uint8_t x, size_t vl)
+{
+    vbool4_t res;
+    res.vl = v.vl;
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        res.data[i] = (v.data[i] == x) ? 1 : 0;
+    }
+    return res;
+}
+
+/**
+ * vmsgeu: Vector greater or equal comparison (scalar)
+ * Returns boolean mask
+ */
+static inline vbool8_t __riscv_vmsgeu_vx_u8m1_b8(vuint8m1_t v, uint8_t x, size_t vl)
+{
+    vbool8_t res;
+    res.vl = v.vl;
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        res.data[i] = (v.data[i] >= x) ? 1 : 0;
+    }
+    return res;
+}
+
+static inline vbool4_t __riscv_vmsgeu_vx_u8m2_b4(vuint8m2_t v, uint8_t x, size_t vl)
+{
+    vbool4_t res;
+    res.vl = v.vl;
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        res.data[i] = (v.data[i] >= x) ? 1 : 0;
+    }
+    return res;
+}
+
+/* ============================================================================
+ * Vector Merge Operations (Conditional)
+ * ============================================================================
+ */
+
+/**
+ * vmerge: Merge vector under predicate mask (vector paths)
+ * Result = mask ? v2 : v1
+ */
+static inline vuint8m1_t __riscv_vmerge_vvm_u8m1(vuint8m1_t v1, vuint8m1_t v2, vbool8_t mask, size_t vl)
+{
+    vuint8m1_t res;
+    res.vl = v1.vl;
+    for(size_t i = 0; i < v1.vl && i < vl; i++) {
+        res.data[i] = mask.data[i] ? v2.data[i] : v1.data[i];
+    }
+    return res;
+}
+
+static inline vuint8m2_t __riscv_vmerge_vvm_u8m2(vuint8m2_t v1, vuint8m2_t v2, vbool4_t mask, size_t vl)
+{
+    vuint8m2_t res;
+    res.vl = v1.vl;
+    for(size_t i = 0; i < v1.vl && i < vl; i++) {
+        res.data[i] = mask.data[i] ? v2.data[i] : v1.data[i];
+    }
+    return res;
+}
+
+/**
+ * vmerge: Merge scalar under predicate mask (scalar path)
+ * Result = mask ? scalar : vector
+ */
+static inline vuint8m1_t __riscv_vmerge_vxm_u8m1(vuint8m1_t v, uint8_t x, vbool8_t mask, size_t vl)
+{
+    vuint8m1_t res;
+    res.vl = v.vl;
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        res.data[i] = mask.data[i] ? x : v.data[i];
+    }
+    return res;
+}
+
+static inline vuint8m2_t __riscv_vmerge_vxm_u8m2(vuint8m2_t v, uint8_t x, vbool4_t mask, size_t vl)
+{
+    vuint8m2_t res;
+    res.vl = v.vl;
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        res.data[i] = mask.data[i] ? x : v.data[i];
+    }
+    return res;
+}
+
+/* ==========================================================================
+ * Vector Reverse Subtract Operations
+ * ========================================================================= */
+
+static inline vuint8m1_t __riscv_vrsub_vx_u8m1(vuint8m1_t v, uint8_t x, size_t vl)
+{
+    vuint8m1_t res;
+    res.vl = v.vl;
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        res.data[i] = x - v.data[i];
+    }
+    return res;
+}
+
+static inline vuint8m2_t __riscv_vrsub_vx_u8m2(vuint8m2_t v, uint8_t x, size_t vl)
+{
+    vuint8m2_t res;
+    res.vl = v.vl;
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        res.data[i] = x - v.data[i];
+    }
+    return res;
+}
+
+static inline vuint16m2_t __riscv_vrsub_vx_u16m2(vuint16m2_t v, uint16_t x, size_t vl)
+{
+    vuint16m2_t res;
+    res.vl = v.vl;
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        res.data[i] = x - v.data[i];
+    }
+    return res;
+}
+
+/* ==========================================================================
+ * Vector Add Operations
+ * ========================================================================= */
+
+static inline vuint8m1_t __riscv_vadd_vv_u8m1(vuint8m1_t v1, vuint8m1_t v2, size_t vl)
+{
+    vuint8m1_t res;
+    res.vl = v1.vl;
+    for(size_t i = 0; i < v1.vl && i < vl; i++) {
+        res.data[i] = v1.data[i] + v2.data[i];
+    }
+    return res;
+}
+
+static inline vuint16m4_t __riscv_vadd_vx_u16m4(vuint16m4_t v, uint16_t x, size_t vl)
+{
+    vuint16m4_t res;
+    res.vl = v.vl;
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        res.data[i] = v.data[i] + x;
+    }
+    return res;
+}
+
+static inline vuint16m2_t __riscv_vadd_vx_u16m2(vuint16m2_t v, uint16_t x, size_t vl)
+{
+    vuint16m2_t res;
+    res.vl = v.vl;
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        res.data[i] = v.data[i] + x;
+    }
+    return res;
+}
+
+/* ==========================================================================
+ * Vector Zero-Extend Operations
+ * ========================================================================= */
+
+static inline vuint16m2_t __riscv_vzext_vf2_u16m2(vuint8m1_t v, size_t vl)
+{
+    vuint16m2_t res;
+    res.vl = v.vl;
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        res.data[i] = (uint16_t)v.data[i];
+    }
+    return res;
+}
+
+/* ==========================================================================
+ * Widening multiply/accumulate for 16-bit -> 32-bit (m2 -> m4)
+ * ========================================================================= */
+static inline vuint32m4_t __riscv_vwmulu_vx_u32m4(vuint16m2_t v, uint32_t x, size_t vl)
+{
+    vuint32m4_t res;
+    res.vl = v.vl;
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        res.data[i] = (uint32_t)v.data[i] * x;
+    }
+    return res;
+}
+
+static inline vuint32m4_t __riscv_vwmulu_vv_u32m4(vuint16m2_t v1, vuint16m2_t v2, size_t vl)
+{
+    vuint32m4_t res;
+    res.vl = v1.vl;
+    for(size_t i = 0; i < v1.vl && i < vl; i++) {
+        res.data[i] = (uint32_t)v1.data[i] * (uint32_t)v2.data[i];
+    }
+    return res;
+}
+
+static inline vuint32m4_t __riscv_vwmaccu_vx_u32m4(vuint32m4_t acc, uint32_t x, vuint16m2_t v, size_t vl)
+{
+    vuint32m4_t res = acc;
+    for(size_t i = 0; i < acc.vl && i < vl; i++) {
+        res.data[i] += (uint32_t)x * (uint32_t)v.data[i];
+    }
+    return res;
+}
+
+static inline vuint16m2_t __riscv_vnsrl_wx_u16m2(vuint32m4_t v, uint32_t x, size_t vl)
+{
+    vuint16m2_t res;
+    res.vl = v.vl;
+    for(size_t i = 0; i < v.vl && i < vl; i++) {
+        res.data[i] = (uint16_t)(v.data[i] >> x);
+    }
+    return res;
+}
+
+#endif /* LV_USE_DRAW_SW_ASM_RISCV_V */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* LV_BLEND_RISCV_VECTOR_EMULATION_H */
diff --git a/src/draw/sw/blend/riscv_v/lv_draw_sw_blend_riscv_v_to_rgb888.c b/src/draw/sw/blend/riscv_v/lv_draw_sw_blend_riscv_v_to_rgb888.c
new file mode 100644
index 0000000000..37e3b4e0ad
--- /dev/null
+++ b/src/draw/sw/blend/riscv_v/lv_draw_sw_blend_riscv_v_to_rgb888.c
@@ -0,0 +1,1643 @@
+/**
+ * @file lv_draw_sw_blend_riscv_v_to_rgb888.c
+ * RGB888/XRGB8888 blend implementation for RISC-V Vector Extension (RVV 1.0)
+ *
+ * Supports both dest_px_size=3 (RGB888) and dest_px_size=4 (XRGB8888)
+ * Reference: lv_draw_sw_blend_neon_to_rgb888.c
+ *
+ * NOTE: All RVV blend logic is inlined to avoid passing vuint32m4_t as function
+ * parameters, which causes complex stack operations that can corrupt the stack.
+ */
+
+/*********************
+ *      INCLUDES
+ *********************/
+#include "lv_draw_sw_blend_riscv_v_to_rgb888.h"
+#if LV_USE_DRAW_SW_ASM == LV_DRAW_SW_ASM_RISCV_V
+
+#include "../../../../misc/lv_color.h"
+#include "../../../../misc/lv_types.h"
+#include "../lv_draw_sw_blend_private.h"
+#include "lv_blend_riscv_v_private.h"
+
+/*********************
+ *      DEFINES
+ *********************/
+
+/**********************
+ *      TYPEDEFS
+ **********************/
+
+/**********************
+ *  STATIC PROTOTYPES
+ **********************/
+
+/**********************
+ *   GLOBAL FUNCTIONS
+ **********************/
+
+/**
+ * Fill with solid color (no blending needed, opa >= 255)
+ */
+lv_result_t lv_draw_sw_blend_riscv_v_color_to_rgb888(lv_draw_sw_blend_fill_dsc_t * dsc, uint32_t dest_px_size)
+{
+    LV_ASSERT(dest_px_size == 3 || dest_px_size == 4);
+    LV_ASSERT(dsc->opa >= LV_OPA_MAX);
+    LV_ASSERT(dsc->mask_buf == NULL);
+
+    const int32_t w           = dsc->dest_w;
+    const int32_t h           = dsc->dest_h;
+    const int32_t dest_stride = dsc->dest_stride;
+    uint8_t * dest_buf        = dsc->dest_buf;
+    size_t vl;
+
+    if(dest_px_size == 3) {
+        /* RGB888: 3 bytes per pixel (B, G, R) - use RVV segmented store */
+        /* Initialize color vectors once with max vl */
+        size_t vlmax = __riscv_vsetvlmax_e8m2();
+        vuint8m2_t v_b = __riscv_vmv_v_x_u8m2(dsc->color.blue, vlmax);
+        vuint8m2_t v_g = __riscv_vmv_v_x_u8m2(dsc->color.green, vlmax);
+        vuint8m2_t v_r = __riscv_vmv_v_x_u8m2(dsc->color.red, vlmax);
+
+        for(int32_t y = 0; y < h; y++) {
+            /* Process with RVV using segmented store for 3-byte pixels */
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e8m2(w - x);
+                LV_RVV_VSSEG3E8_U8M2(dest_buf + x * 3, v_b, v_g, v_r, vl);
+            }
+
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+        }
+    }
+    else { /* dest_px_size == 4 */
+        /* XRGB8888: 4 bytes per pixel */
+        const uint32_t color32 = 0xFF000000 | ((uint32_t)dsc->color.red << 16) |
+                                 ((uint32_t)dsc->color.green << 8) | dsc->color.blue;
+
+        /* Initialize color vector once with max vl */
+        size_t vlmax = __riscv_vsetvlmax_e32m4();
+        vuint32m4_t v_color = __riscv_vmv_v_x_u32m4(color32, vlmax);
+
+        for(int32_t y = 0; y < h; y++) {
+            /* Process with RVV - use m4 to reduce register pressure */
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e32m4(w - x);
+                __riscv_vse32_v_u32m4((uint32_t *)(dest_buf + x * 4), v_color, vl);
+            }
+
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+        }
+    }
+
+    return LV_RESULT_OK;
+}
+
+/**
+ * Fill with color and opacity (opa < 255)
+ * blend formula: result = (fg * opa + bg * (255 - opa)) >> 8
+ */
+lv_result_t lv_draw_sw_blend_riscv_v_color_to_rgb888_with_opa(lv_draw_sw_blend_fill_dsc_t * dsc, uint32_t dest_px_size)
+{
+    LV_ASSERT(dest_px_size == 3 || dest_px_size == 4);
+    LV_ASSERT(dsc->opa < LV_OPA_MAX);
+    LV_ASSERT(dsc->mask_buf == NULL);
+
+    const int32_t w           = dsc->dest_w;
+    const int32_t h           = dsc->dest_h;
+    const int32_t dest_stride = dsc->dest_stride;
+    const uint8_t opa         = dsc->opa;
+    const uint8_t opa_inv     = 255 - opa;
+    const uint16_t fg_b_opa   = (uint16_t)dsc->color.blue * opa;
+    const uint16_t fg_g_opa   = (uint16_t)dsc->color.green * opa;
+    const uint16_t fg_r_opa   = (uint16_t)dsc->color.red * opa;
+    uint8_t * dest_buf        = dsc->dest_buf;
+    size_t vl;
+
+    /* Early exit if fully transparent */
+    if(opa == 0) return LV_RESULT_OK;
+
+    if(dest_px_size == 3) {
+        for(int32_t y = 0; y < h; y++) {
+            /* Process with RVV using segmented load/store for 3-byte pixels */
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e8m2(w - x);
+
+                /* Load destination B, G, R channels using segmented load */
+                vuint8m2_t v_dst_b, v_dst_g, v_dst_r;
+                LV_RVV_VLSEG3E8_U8M2(dest_buf + x * 3, vl, v_dst_b, v_dst_g, v_dst_r);
+
+                /* Blend solid color with destination */
+                vuint8m2_t v_b, v_g, v_r;
+                LV_RVV_BLEND_SOLID_RGB_U8M2(v_dst_r, v_dst_g, v_dst_b,
+                                            fg_r_opa, fg_g_opa, fg_b_opa, opa_inv,
+                                            v_r, v_g, v_b, vl);
+
+                /* Store result using segmented store */
+                LV_RVV_VSSEG3E8_U8M2(dest_buf + x * 3, v_b, v_g, v_r, vl);
+            }
+
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+        }
+    }
+    else { /* dest_px_size == 4 */
+        for(int32_t y = 0; y < h; y++) {
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e8m2(w - x);
+
+                /* Load destination B, G, R, X channels using segmented load */
+                vuint8m2_t v_dst_b, v_dst_g, v_dst_r, v_dst_x;
+                LV_RVV_VLSEG4E8_U8M2(dest_buf + x * 4, vl, v_dst_b, v_dst_g, v_dst_r, v_dst_x);
+                /* v_dst_x is X/Alpha, ignored for input */
+                (void)v_dst_x;
+
+                /* Blend solid color with destination */
+                vuint8m2_t v_b, v_g, v_r;
+                LV_RVV_BLEND_SOLID_RGB_U8M2(v_dst_r, v_dst_g, v_dst_b,
+                                            fg_r_opa, fg_g_opa, fg_b_opa, opa_inv,
+                                            v_r, v_g, v_b, vl);
+
+                vuint8m2_t v_x = __riscv_vmv_v_x_u8m2(0xFF, vl);  /* Alpha = 0xFF */
+
+                /* Store result using segmented store */
+                LV_RVV_VSSEG4E8_U8M2(dest_buf + x * 4, v_b, v_g, v_r, v_x, vl);
+            }
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+        }
+    }
+
+    return LV_RESULT_OK;
+}
+
+/**
+ * Fill with color and per-pixel mask (opa >= 255)
+ */
+lv_result_t lv_draw_sw_blend_riscv_v_color_to_rgb888_with_mask(lv_draw_sw_blend_fill_dsc_t * dsc, uint32_t dest_px_size)
+{
+    LV_ASSERT(dest_px_size == 3 || dest_px_size == 4);
+    LV_ASSERT(dsc->opa >= LV_OPA_MAX);
+    LV_ASSERT(dsc->mask_buf != NULL);
+
+    const int32_t w           = dsc->dest_w;
+    const int32_t h           = dsc->dest_h;
+    const int32_t dest_stride = dsc->dest_stride;
+    const int32_t mask_stride = dsc->mask_stride;
+    const uint8_t * mask_buf  = dsc->mask_buf;
+    const uint8_t fg_b        = dsc->color.blue;
+    const uint8_t fg_g        = dsc->color.green;
+    const uint8_t fg_r        = dsc->color.red;
+    uint8_t * dest_buf        = dsc->dest_buf;
+    size_t vl;
+
+    if(dest_px_size == 3) {
+        /* RGB888: 3 bytes per pixel - use RVV for blending with mask */
+        for(int32_t y = 0; y < h; y++) {
+            /* Process with RVV using segmented load/store for 3-byte pixels */
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e8m2(w - x);
+
+                /* Load mask values */
+                vuint8m2_t v_mask8 = __riscv_vle8_v_u8m2(&mask_buf[x], vl);
+
+                /* Load destination B, G, R channels using segmented load */
+                vuint8m2_t v_dst_b, v_dst_g, v_dst_r;
+                LV_RVV_VLSEG3E8_U8M2(dest_buf + x * 3, vl, v_dst_b, v_dst_g, v_dst_r);
+
+                /* Blend solid color with mask */
+                vuint8m2_t v_b, v_g, v_r;
+                LV_RVV_BLEND_SOLID_RGB_VMASK_U8M2(v_dst_r, v_dst_g, v_dst_b,
+                                                  fg_r, fg_g, fg_b, v_mask8,
+                                                  v_r, v_g, v_b, vl);
+
+                /* Optional: Handle special cases for mask == 0 or mask >= 255.
+                 * Without this, max error is ±1 (e.g., (x*255)>>8 ≈ x*0.996).
+                 * For graphics rendering, ±1 error is typically acceptable.
+                 * Uncomment below if exact values are required. */
+                LV_RVV_BLEND_OPTIMIZE_MASK_SCALAR_U8M2(v_r, v_g, v_b,
+                                                       fg_r, fg_g, fg_b,
+                                                       v_dst_r, v_dst_g, v_dst_b,
+                                                       v_mask8, vl);
+                /* Store result using segmented store */
+                LV_RVV_VSSEG3E8_U8M2(dest_buf + x * 3, v_b, v_g, v_r, vl);
+            }
+
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+            mask_buf += mask_stride;
+        }
+    }
+    else { /* dest_px_size == 4 */
+        /* XRGB8888: 4 bytes per pixel - use segmented load/store like RGB888 */
+        for(int32_t y = 0; y < h; y++) {
+            /* Process with RVV using segmented load/store for 4-byte pixels */
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e8m2(w - x);
+
+                /* Load mask values */
+                vuint8m2_t v_mask8 = __riscv_vle8_v_u8m2(&mask_buf[x], vl);
+
+                /* Load destination B, G, R, X channels using segmented load */
+                vuint8m2_t v_dst_b, v_dst_g, v_dst_r, v_dst_x;
+                LV_RVV_VLSEG4E8_U8M2(dest_buf + x * 4, vl, v_dst_b, v_dst_g, v_dst_r, v_dst_x);
+                /* v_dst_x is X/Alpha, ignored for input */
+                (void)v_dst_x;
+
+                /* Blend solid color with mask */
+                vuint8m2_t v_b, v_g, v_r;
+                LV_RVV_BLEND_SOLID_RGB_VMASK_U8M2(v_dst_r, v_dst_g, v_dst_b,
+                                                  fg_r, fg_g, fg_b, v_mask8,
+                                                  v_r, v_g, v_b, vl);
+                vuint8m2_t v_x = __riscv_vmv_v_x_u8m2(0xFF, vl);  /* Alpha = 0xFF */
+
+                /* Optional: Handle special cases for mask == 0 or mask >= 255.
+                 * Without this, max error is ±1. Uncomment if exact values required. */
+                LV_RVV_BLEND_OPTIMIZE_MASK_SCALAR_U8M2(v_r, v_g, v_b,
+                                                       fg_r, fg_g, fg_b,
+                                                       v_dst_r, v_dst_g, v_dst_b,
+                                                       v_mask8, vl);
+
+                /* Store result using segmented store */
+                LV_RVV_VSSEG4E8_U8M2(dest_buf + x * 4, v_b, v_g, v_r, v_x, vl);
+            }
+
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+            mask_buf += mask_stride;
+        }
+    }
+
+    return LV_RESULT_OK;
+}
+
+/**
+ * Fill with color, opacity, and per-pixel mask
+ * Effective mix = (mask * opa) >> 8
+ */
+lv_result_t lv_draw_sw_blend_riscv_v_color_to_rgb888_with_opa_mask(lv_draw_sw_blend_fill_dsc_t * dsc,
+                                                                   uint32_t dest_px_size)
+{
+    LV_ASSERT(dest_px_size == 3 || dest_px_size == 4);
+    LV_ASSERT(dsc->opa < LV_OPA_MAX);
+    LV_ASSERT(dsc->mask_buf != NULL);
+
+    const int32_t w           = dsc->dest_w;
+    const int32_t h           = dsc->dest_h;
+    const int32_t dest_stride = dsc->dest_stride;
+    const uint8_t opa         = dsc->opa;
+    const int32_t mask_stride = dsc->mask_stride;
+    const uint8_t * mask_buf  = dsc->mask_buf;
+    uint8_t * dest_buf = dsc->dest_buf;
+    const uint8_t fg_b = dsc->color.blue;
+    const uint8_t fg_g = dsc->color.green;
+    const uint8_t fg_r = dsc->color.red;
+    size_t vl;
+
+    /* Early exit if fully transparent */
+    if(opa == 0) return LV_RESULT_OK;
+
+    if(dest_px_size == 3) {
+        /* RGB888: 3 bytes per pixel - use RVV for blending with opa and mask */
+
+        for(int32_t y = 0; y < h; y++) {
+            /* Process with RVV using segmented load/store for 3-byte pixels */
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e8m2(w - x);
+
+                /* Load mask values */
+                vuint8m2_t v_mask8 = __riscv_vle8_v_u8m2(&mask_buf[x], vl);
+
+                /* Load destination B, G, R channels using segmented load */
+                vuint8m2_t v_dst_b, v_dst_g, v_dst_r;
+                LV_RVV_VLSEG3E8_U8M2(dest_buf + x * 3, vl, v_dst_b, v_dst_g, v_dst_r);
+
+                /* Compute mix = (mask * opa) >> 8 using widening multiply then narrow */
+                vuint16m4_t v_mix16 = __riscv_vwmulu_vx_u16m4(v_mask8, opa, vl);
+                vuint8m2_t v_mix8 = __riscv_vnsrl_wx_u8m2(v_mix16, 8, vl);
+
+                /* Blend solid color with mix (mask * opa) */
+                vuint8m2_t v_b, v_g, v_r;
+                LV_RVV_BLEND_SOLID_RGB_VMASK_U8M2(v_dst_r, v_dst_g, v_dst_b,
+                                                  fg_r, fg_g, fg_b, v_mix8,
+                                                  v_r, v_g, v_b, vl);
+
+                /* Optional: Handle special cases for mix == 0 or mix >= 255.
+                 * Without this, max error is ±1. Uncomment if exact values required. */
+
+                LV_RVV_BLEND_OPTIMIZE_MASK_SCALAR_U8M2(v_r, v_g, v_b,
+                                                       fg_r, fg_g, fg_b,
+                                                       v_dst_r, v_dst_g, v_dst_b,
+                                                       v_mix8, vl);
+
+                /* Store result using segmented store */
+                LV_RVV_VSSEG3E8_U8M2(dest_buf + x * 3, v_b, v_g, v_r, vl);
+            }
+
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+            mask_buf += mask_stride;
+        }
+    }
+    else { /* dest_px_size == 4 */
+        /* XRGB8888: 4 bytes per pixel - use segmented load/store like RGB888 */
+
+
+        for(int32_t y = 0; y < h; y++) {
+            /* Process with RVV using segmented load/store for 4-byte pixels */
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e8m2(w - x);
+
+                /* Load mask values */
+                vuint8m2_t v_mask8 = __riscv_vle8_v_u8m2(&mask_buf[x], vl);
+
+                /* Compute mix = (mask * opa) >> 8 using widening multiply */
+                vuint16m4_t v_mix16 = __riscv_vsrl_vx_u16m4(
+                                          __riscv_vwmulu_vx_u16m4(v_mask8, opa, vl), 8, vl);
+                vuint8m2_t v_mix8 = __riscv_vnsrl_wx_u8m2(v_mix16, 0, vl);
+
+                /* Load destination B, G, R, X channels using segmented load */
+                vuint8m2_t v_dst_b, v_dst_g, v_dst_r, v_dst_x;
+                LV_RVV_VLSEG4E8_U8M2(dest_buf + x * 4, vl, v_dst_b, v_dst_g, v_dst_r, v_dst_x);
+                (void)v_dst_x;  /* v_dst_x is X/Alpha, ignored for input */
+
+                /* Blend solid color with mix (mask * opa) */
+                vuint8m2_t v_b, v_g, v_r;
+                LV_RVV_BLEND_SOLID_RGB_VMASK_U8M2(v_dst_r, v_dst_g, v_dst_b,
+                                                  fg_r, fg_g, fg_b, v_mix8,
+                                                  v_r, v_g, v_b, vl);
+
+                LV_RVV_BLEND_OPTIMIZE_MASK_SCALAR_U8M2(v_r, v_g, v_b,
+                                                       fg_r, fg_g, fg_b,
+                                                       v_dst_r, v_dst_g, v_dst_b,
+                                                       v_mix8, vl);
+
+                vuint8m2_t v_x = __riscv_vmv_v_x_u8m2(0xFF, vl);  /* Alpha = 0xFF */
+
+                /* Store result using segmented store */
+                LV_RVV_VSSEG4E8_U8M2(dest_buf + x * 4, v_b, v_g, v_r, v_x, vl);
+            }
+
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+            mask_buf += mask_stride;
+        }
+    }
+
+    return LV_RESULT_OK;
+}
+
+/**********************
+ *  RGB565 TO RGB888 BLEND FUNCTIONS
+ **********************/
+
+/**
+ * RGB565 to RGB888/XRGB8888 simple copy (no blending, opa >= 255)
+ * RGB565 format: RRRRRGGGGGGBBBBB (5-6-5 bits)
+ */
+lv_result_t lv_draw_sw_blend_riscv_v_rgb565_to_rgb888(lv_draw_sw_blend_image_dsc_t * dsc, uint32_t dest_px_size)
+{
+    LV_ASSERT(dest_px_size == 3 || dest_px_size == 4);
+    LV_ASSERT(dsc->opa >= LV_OPA_MAX);
+    LV_ASSERT(dsc->mask_buf == NULL);
+
+    const int32_t w = dsc->dest_w;
+    const int32_t h = dsc->dest_h;
+    const int32_t dest_stride = dsc->dest_stride;
+    const int32_t src_stride = dsc->src_stride;
+    uint8_t * dest_buf = dsc->dest_buf;
+    const uint16_t * src_buf = dsc->src_buf;
+    size_t vl;
+
+    if(dest_px_size == 3) {
+        for(int32_t y = 0; y < h; y++) {
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e16m2(w - x);
+
+                /* Load RGB565 pixels */
+                vuint16m2_t v_rgb565 = __riscv_vle16_v_u16m2(&src_buf[x], vl);
+
+                /* Extract R5, G6, B5 components */
+                vuint16m2_t v_r5 = __riscv_vand_vx_u16m2(__riscv_vsrl_vx_u16m2(v_rgb565, 11, vl), 0x1F, vl);
+                vuint16m2_t v_g6 = __riscv_vand_vx_u16m2(__riscv_vsrl_vx_u16m2(v_rgb565, 5, vl), 0x3F, vl);
+                vuint16m2_t v_b5 = __riscv_vand_vx_u16m2(v_rgb565, 0x1F, vl);
+
+                /* Convert to 8-bit: R8 = (R5 * 2106) >> 8, G8 = (G6 * 1037) >> 8, B8 = (B5 * 2106) >> 8 */
+                vuint16m2_t v_r8_16 = __riscv_vsrl_vx_u16m2(__riscv_vmul_vx_u16m2(v_r5, 2106, vl), 8, vl);
+                vuint16m2_t v_g8_16 = __riscv_vsrl_vx_u16m2(__riscv_vmul_vx_u16m2(v_g6, 1037, vl), 8, vl);
+                vuint16m2_t v_b8_16 = __riscv_vsrl_vx_u16m2(__riscv_vmul_vx_u16m2(v_b5, 2106, vl), 8, vl);
+
+                /* Narrow to 8-bit */
+                vuint8m1_t v_r = __riscv_vnsrl_wx_u8m1(v_r8_16, 0, vl);
+                vuint8m1_t v_g = __riscv_vnsrl_wx_u8m1(v_g8_16, 0, vl);
+                vuint8m1_t v_b = __riscv_vnsrl_wx_u8m1(v_b8_16, 0, vl);
+
+                /* Store using stride store for RGB888 (3 bytes per pixel) */
+                LV_RVV_STORE_RGB888_U8M1(dest_buf, x, v_b, v_g, v_r, vl);
+            }
+
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+            src_buf = drawbuf_next_row(src_buf, src_stride);
+        }
+    }
+    else { /* dest_px_size == 4 */
+        for(int32_t y = 0; y < h; y++) {
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e16m2(w - x);
+
+                /* Load RGB565 pixels */
+                vuint16m2_t v_rgb565 = __riscv_vle16_v_u16m2(&src_buf[x], vl);
+
+                /* Extract R5, G6, B5 components */
+                vuint16m2_t v_r5 = __riscv_vand_vx_u16m2(__riscv_vsrl_vx_u16m2(v_rgb565, 11, vl), 0x1F, vl);
+                vuint16m2_t v_g6 = __riscv_vand_vx_u16m2(__riscv_vsrl_vx_u16m2(v_rgb565, 5, vl), 0x3F, vl);
+                vuint16m2_t v_b5 = __riscv_vand_vx_u16m2(v_rgb565, 0x1F, vl);
+
+                /* Convert to 8-bit */
+                vuint16m2_t v_r8_16 = __riscv_vsrl_vx_u16m2(__riscv_vmul_vx_u16m2(v_r5, 2106, vl), 8, vl);
+                vuint16m2_t v_g8_16 = __riscv_vsrl_vx_u16m2(__riscv_vmul_vx_u16m2(v_g6, 1037, vl), 8, vl);
+                vuint16m2_t v_b8_16 = __riscv_vsrl_vx_u16m2(__riscv_vmul_vx_u16m2(v_b5, 2106, vl), 8, vl);
+
+                /* Narrow to 8-bit */
+                vuint8m1_t v_r = __riscv_vnsrl_wx_u8m1(v_r8_16, 0, vl);
+                vuint8m1_t v_g = __riscv_vnsrl_wx_u8m1(v_g8_16, 0, vl);
+                vuint8m1_t v_b = __riscv_vnsrl_wx_u8m1(v_b8_16, 0, vl);
+                vuint8m1_t v_a = __riscv_vmv_v_x_u8m1(0xFF, vl);
+
+                /* Store using stride store for XRGB8888 (4 bytes per pixel) */
+                LV_RVV_STORE_XRGB8888_U8M1(dest_buf, x, v_b, v_g, v_r, v_a, vl);
+            }
+
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+            src_buf = drawbuf_next_row(src_buf, src_stride);
+        }
+    }
+
+    return LV_RESULT_OK;
+}
+
+/**
+ * RGB565 to RGB888/XRGB8888 with opacity
+ * blend formula: result = (src * opa + dst * (255 - opa)) >> 8
+ * Optimized using vwmaccu for blend calculation
+ */
+lv_result_t lv_draw_sw_blend_riscv_v_rgb565_to_rgb888_with_opa(lv_draw_sw_blend_image_dsc_t * dsc,
+                                                               uint32_t dest_px_size)
+{
+    LV_ASSERT(dest_px_size == 3 || dest_px_size == 4);
+    LV_ASSERT(dsc->opa < LV_OPA_MAX);
+    LV_ASSERT(dsc->mask_buf == NULL);
+
+    const int32_t w = dsc->dest_w;
+    const int32_t h = dsc->dest_h;
+    const int32_t dest_stride = dsc->dest_stride;
+    const int32_t src_stride = dsc->src_stride;
+    const uint8_t opa = dsc->opa;
+    uint8_t * dest_buf = dsc->dest_buf;
+    const uint16_t * src_buf = dsc->src_buf;
+    size_t vl;
+
+    if(dest_px_size == 3) {
+        for(int32_t y = 0; y < h; y++) {
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e8m1(w - x);
+
+                /* Load RGB565 source pixels */
+                vuint16m2_t v_rgb565 = __riscv_vle16_v_u16m2(&src_buf[x], vl);
+
+                /* Convert RGB565 to RGB888 using macro, then narrow to 8-bit */
+                vuint16m2_t v_src_r16, v_src_g16, v_src_b16;
+                LV_RVV_RGB565_TO_RGB888_U16M2(v_rgb565, v_src_r16, v_src_g16, v_src_b16, vl);
+                vuint8m1_t v_src_r = __riscv_vnsrl_wx_u8m1(v_src_r16, 0, vl);
+                vuint8m1_t v_src_g = __riscv_vnsrl_wx_u8m1(v_src_g16, 0, vl);
+                vuint8m1_t v_src_b = __riscv_vnsrl_wx_u8m1(v_src_b16, 0, vl);
+
+                /* Load destination RGB888 using stride load */
+                vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                LV_RVV_LOAD_RGB888_U8M1(dest_buf, x, v_dst_b, v_dst_g, v_dst_r, vl);
+
+                /* Blend using vwmaccu */
+                vuint8m1_t v_r, v_g, v_b;
+                LV_RVV_BLEND_RGB_U8M1(v_src_r, v_src_g, v_src_b,
+                                      v_dst_r, v_dst_g, v_dst_b,
+                                      opa, v_r, v_g, v_b, vl);
+
+                /* Store result */
+                LV_RVV_STORE_RGB888_U8M1(dest_buf, x, v_b, v_g, v_r, vl);
+            }
+
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+            src_buf = drawbuf_next_row(src_buf, src_stride);
+        }
+    }
+    else { /* dest_px_size == 4 */
+        for(int32_t y = 0; y < h; y++) {
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e8m1(w - x);
+
+                /* Load RGB565 source pixels */
+                vuint16m2_t v_rgb565 = __riscv_vle16_v_u16m2(&src_buf[x], vl);
+
+                /* Convert RGB565 to RGB888 using macro, then narrow to 8-bit */
+                vuint16m2_t v_src_r16, v_src_g16, v_src_b16;
+                LV_RVV_RGB565_TO_RGB888_U16M2(v_rgb565, v_src_r16, v_src_g16, v_src_b16, vl);
+                vuint8m1_t v_src_r = __riscv_vnsrl_wx_u8m1(v_src_r16, 0, vl);
+                vuint8m1_t v_src_g = __riscv_vnsrl_wx_u8m1(v_src_g16, 0, vl);
+                vuint8m1_t v_src_b = __riscv_vnsrl_wx_u8m1(v_src_b16, 0, vl);
+
+                /* Load destination XRGB8888 using stride load */
+                vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                LV_RVV_LOAD_XRGB8888_U8M1(dest_buf, x, v_dst_b, v_dst_g, v_dst_r, vl);
+
+                /* Blend using vwmaccu */
+                vuint8m1_t v_r, v_g, v_b;
+                LV_RVV_BLEND_RGB_U8M1(v_src_r, v_src_g, v_src_b,
+                                      v_dst_r, v_dst_g, v_dst_b,
+                                      opa, v_r, v_g, v_b, vl);
+                vuint8m1_t v_a = __riscv_vmv_v_x_u8m1(0xFF, vl);
+
+                /* Store result */
+                LV_RVV_STORE_XRGB8888_U8M1(dest_buf, x, v_b, v_g, v_r, v_a, vl);
+            }
+
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+            src_buf = drawbuf_next_row(src_buf, src_stride);
+        }
+    }
+
+    return LV_RESULT_OK;
+}
+
+/**
+ * RGB565 to RGB888/XRGB8888 with per-pixel mask
+ * blend formula: result = (src * mask + dst * (255 - mask)) >> 8
+ * Optimized using vwmaccu for blend calculation
+ */
+lv_result_t lv_draw_sw_blend_riscv_v_rgb565_to_rgb888_with_mask(lv_draw_sw_blend_image_dsc_t * dsc,
+                                                                uint32_t dest_px_size)
+{
+    LV_ASSERT(dest_px_size == 3 || dest_px_size == 4);
+    LV_ASSERT(dsc->opa >= LV_OPA_MAX);
+    LV_ASSERT(dsc->mask_buf != NULL);
+
+    const int32_t w = dsc->dest_w;
+    const int32_t h = dsc->dest_h;
+    const int32_t dest_stride = dsc->dest_stride;
+    const int32_t src_stride = dsc->src_stride;
+    const int32_t mask_stride = dsc->mask_stride;
+    uint8_t * dest_buf = dsc->dest_buf;
+    const uint16_t * src_buf = dsc->src_buf;
+    const uint8_t * mask_buf = dsc->mask_buf;
+    size_t vl;
+
+    if(dest_px_size == 3) {
+        for(int32_t y = 0; y < h; y++) {
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e8m1(w - x);
+
+                /* Load mask */
+                vuint8m1_t v_mask = __riscv_vle8_v_u8m1(&mask_buf[x], vl);
+
+                /* Load RGB565 source */
+                vuint16m2_t v_rgb565 = __riscv_vle16_v_u16m2(&src_buf[x], vl);
+
+                /* Convert RGB565 to RGB888 and narrow to 8-bit */
+                vuint16m2_t v_src_r16, v_src_g16, v_src_b16;
+                LV_RVV_RGB565_TO_RGB888_U16M2(v_rgb565, v_src_r16, v_src_g16, v_src_b16, vl);
+                vuint8m1_t v_src_r = __riscv_vnsrl_wx_u8m1(v_src_r16, 0, vl);
+                vuint8m1_t v_src_g = __riscv_vnsrl_wx_u8m1(v_src_g16, 0, vl);
+                vuint8m1_t v_src_b = __riscv_vnsrl_wx_u8m1(v_src_b16, 0, vl);
+
+                /* Load destination */
+                vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                LV_RVV_LOAD_RGB888_U8M1(dest_buf, x, v_dst_b, v_dst_g, v_dst_r, vl);
+
+                /* Blend with mask using vwmaccu */
+                vuint8m1_t v_r, v_g, v_b;
+                LV_RVV_BLEND_RGB_VMASK_U8M1(v_src_r, v_src_g, v_src_b,
+                                            v_dst_r, v_dst_g, v_dst_b,
+                                            v_mask, v_r, v_g, v_b, vl);
+                LV_RVV_BLEND_OPTIMIZE_MASK_U8M1(v_r, v_g, v_b,
+                                                v_src_r, v_src_g, v_src_b,
+                                                v_dst_r, v_dst_g, v_dst_b,
+                                                v_mask, vl);
+                LV_RVV_STORE_RGB888_U8M1(dest_buf, x, v_b, v_g, v_r, vl);
+            }
+
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+            src_buf = drawbuf_next_row(src_buf, src_stride);
+            mask_buf += mask_stride;
+        }
+    }
+    else { /* dest_px_size == 4 */
+        for(int32_t y = 0; y < h; y++) {
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e8m1(w - x);
+
+                /* Load mask */
+                vuint8m1_t v_mask = __riscv_vle8_v_u8m1(&mask_buf[x], vl);
+
+                /* Load RGB565 source */
+                vuint16m2_t v_rgb565 = __riscv_vle16_v_u16m2(&src_buf[x], vl);
+
+                /* Convert RGB565 to RGB888 and narrow to 8-bit */
+                vuint16m2_t v_src_r16, v_src_g16, v_src_b16;
+                LV_RVV_RGB565_TO_RGB888_U16M2(v_rgb565, v_src_r16, v_src_g16, v_src_b16, vl);
+                vuint8m1_t v_src_r = __riscv_vnsrl_wx_u8m1(v_src_r16, 0, vl);
+                vuint8m1_t v_src_g = __riscv_vnsrl_wx_u8m1(v_src_g16, 0, vl);
+                vuint8m1_t v_src_b = __riscv_vnsrl_wx_u8m1(v_src_b16, 0, vl);
+
+                /* Load destination */
+                vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                LV_RVV_LOAD_XRGB8888_U8M1(dest_buf, x, v_dst_b, v_dst_g, v_dst_r, vl);
+
+                /* Blend with mask using vwmaccu */
+                vuint8m1_t v_r, v_g, v_b;
+                LV_RVV_BLEND_RGB_VMASK_U8M1(v_src_r, v_src_g, v_src_b,
+                                            v_dst_r, v_dst_g, v_dst_b,
+                                            v_mask, v_r, v_g, v_b, vl);
+
+                LV_RVV_BLEND_OPTIMIZE_MASK_U8M1(v_r, v_g, v_b,
+                                                v_src_r, v_src_g, v_src_b,
+                                                v_dst_r, v_dst_g, v_dst_b,
+                                                v_mask, vl);
+
+                vuint8m1_t v_a = __riscv_vmv_v_x_u8m1(0xFF, vl);
+
+                LV_RVV_STORE_XRGB8888_U8M1(dest_buf, x, v_b, v_g, v_r, v_a, vl);
+            }
+
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+            src_buf = drawbuf_next_row(src_buf, src_stride);
+            mask_buf += mask_stride;
+        }
+    }
+
+    return LV_RESULT_OK;
+}
+
+/**
+ * RGB565 to RGB888/XRGB8888 with opacity and per-pixel mask
+ * effective mix = (mask * opa) >> 8
+ * blend formula: result = (src * mix + dst * (255 - mix)) >> 8
+ *
+ * Note: with_opa_mask needs 16-bit intermediate for mix calculation,
+ * so we cannot directly use the vwmaccu optimization for this case.
+ */
+lv_result_t lv_draw_sw_blend_riscv_v_rgb565_to_rgb888_with_opa_mask(lv_draw_sw_blend_image_dsc_t * dsc,
+                                                                    uint32_t dest_px_size)
+{
+    LV_ASSERT(dest_px_size == 3 || dest_px_size == 4);
+    LV_ASSERT(dsc->opa < LV_OPA_MAX);
+    LV_ASSERT(dsc->mask_buf != NULL);
+
+    const int32_t w = dsc->dest_w;
+    const int32_t h = dsc->dest_h;
+    const int32_t dest_stride = dsc->dest_stride;
+    const int32_t src_stride = dsc->src_stride;
+    const int32_t mask_stride = dsc->mask_stride;
+    const uint8_t opa = dsc->opa;
+    uint8_t * dest_buf = dsc->dest_buf;
+    const uint16_t * src_buf = dsc->src_buf;
+    const uint8_t * mask_buf = dsc->mask_buf;
+    size_t vl;
+
+    if(dest_px_size == 3) {
+        for(int32_t y = 0; y < h; y++) {
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e8m1(w - x);
+
+                /* Load mask and compute effective mix = (mask * opa) >> 8 */
+                vuint8m1_t v_mask = __riscv_vle8_v_u8m1(&mask_buf[x], vl);
+                vuint16m2_t v_mask16 = __riscv_vzext_vf2_u16m2(v_mask, vl);
+                vuint16m2_t v_mix16 = __riscv_vsrl_vx_u16m2(__riscv_vmul_vx_u16m2(v_mask16, opa, vl), 8, vl);
+                vuint8m1_t v_mix = __riscv_vnsrl_wx_u8m1(v_mix16, 0, vl);
+
+                /* Load RGB565 source */
+                vuint16m2_t v_rgb565 = __riscv_vle16_v_u16m2(&src_buf[x], vl);
+
+                /* Convert RGB565 to RGB888 and narrow to 8-bit */
+                vuint16m2_t v_src_r16, v_src_g16, v_src_b16;
+                LV_RVV_RGB565_TO_RGB888_U16M2(v_rgb565, v_src_r16, v_src_g16, v_src_b16, vl);
+                vuint8m1_t v_src_r = __riscv_vnsrl_wx_u8m1(v_src_r16, 0, vl);
+                vuint8m1_t v_src_g = __riscv_vnsrl_wx_u8m1(v_src_g16, 0, vl);
+                vuint8m1_t v_src_b = __riscv_vnsrl_wx_u8m1(v_src_b16, 0, vl);
+
+                /* Load destination */
+                vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                LV_RVV_LOAD_RGB888_U8M1(dest_buf, x, v_dst_b, v_dst_g, v_dst_r, vl);
+
+                /* Blend with effective mix using vwmaccu */
+                vuint8m1_t v_r, v_g, v_b;
+                LV_RVV_BLEND_RGB_VMASK_U8M1(v_src_r, v_src_g, v_src_b,
+                                            v_dst_r, v_dst_g, v_dst_b,
+                                            v_mix, v_r, v_g, v_b, vl);
+
+                LV_RVV_BLEND_OPTIMIZE_MASK_U8M1(v_r, v_g, v_b,
+                                                v_src_r, v_src_g, v_src_b,
+                                                v_dst_r, v_dst_g, v_dst_b,
+                                                v_mix, vl);
+
+                LV_RVV_STORE_RGB888_U8M1(dest_buf, x, v_b, v_g, v_r, vl);
+            }
+
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+            src_buf = drawbuf_next_row(src_buf, src_stride);
+            mask_buf += mask_stride;
+        }
+    }
+    else { /* dest_px_size == 4 */
+        for(int32_t y = 0; y < h; y++) {
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e8m1(w - x);
+
+                /* Load mask and compute effective mix */
+                vuint8m1_t v_mask = __riscv_vle8_v_u8m1(&mask_buf[x], vl);
+                vuint16m2_t v_mask16 = __riscv_vzext_vf2_u16m2(v_mask, vl);
+                vuint16m2_t v_mix16 = __riscv_vsrl_vx_u16m2(__riscv_vmul_vx_u16m2(v_mask16, opa, vl), 8, vl);
+                vuint8m1_t v_mix = __riscv_vnsrl_wx_u8m1(v_mix16, 0, vl);
+
+                /* Load RGB565 source */
+                vuint16m2_t v_rgb565 = __riscv_vle16_v_u16m2(&src_buf[x], vl);
+
+                /* Convert RGB565 to RGB888 and narrow to 8-bit */
+                vuint16m2_t v_src_r16, v_src_g16, v_src_b16;
+                LV_RVV_RGB565_TO_RGB888_U16M2(v_rgb565, v_src_r16, v_src_g16, v_src_b16, vl);
+                vuint8m1_t v_src_r = __riscv_vnsrl_wx_u8m1(v_src_r16, 0, vl);
+                vuint8m1_t v_src_g = __riscv_vnsrl_wx_u8m1(v_src_g16, 0, vl);
+                vuint8m1_t v_src_b = __riscv_vnsrl_wx_u8m1(v_src_b16, 0, vl);
+
+                /* Load destination */
+                vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                LV_RVV_LOAD_XRGB8888_U8M1(dest_buf, x, v_dst_b, v_dst_g, v_dst_r, vl);
+
+                /* Blend with effective mix using vwmaccu */
+                vuint8m1_t v_r, v_g, v_b;
+                LV_RVV_BLEND_RGB_VMASK_U8M1(v_src_r, v_src_g, v_src_b,
+                                            v_dst_r, v_dst_g, v_dst_b,
+                                            v_mix, v_r, v_g, v_b, vl);
+                LV_RVV_BLEND_OPTIMIZE_MASK_U8M1(v_r, v_g, v_b,
+                                                v_src_r, v_src_g, v_src_b,
+                                                v_dst_r, v_dst_g, v_dst_b,
+                                                v_mix, vl);
+                vuint8m1_t v_a = __riscv_vmv_v_x_u8m1(0xFF, vl);
+
+                LV_RVV_STORE_XRGB8888_U8M1(dest_buf, x, v_b, v_g, v_r, v_a, vl);
+            }
+
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+            src_buf = drawbuf_next_row(src_buf, src_stride);
+            mask_buf += mask_stride;
+        }
+    }
+
+    return LV_RESULT_OK;
+}
+
+/**********************
+ *  RGB888/XRGB8888 TO RGB888/XRGB8888 BLEND FUNCTIONS
+ **********************/
+
+/**
+ * RGB888/XRGB8888 to RGB888/XRGB8888 simple copy (no blending, opa >= 255)
+ * src_px_size: 3 for RGB888, 4 for XRGB8888
+ * dest_px_size: 3 for RGB888, 4 for XRGB8888
+ */
+lv_result_t lv_draw_sw_blend_riscv_v_rgb888_to_rgb888(lv_draw_sw_blend_image_dsc_t * dsc,
+                                                      uint32_t dest_px_size, uint32_t src_px_size)
+{
+    LV_ASSERT(dest_px_size == 3 || dest_px_size == 4);
+    LV_ASSERT(src_px_size == 3 || src_px_size == 4);
+    LV_ASSERT(dsc->opa >= LV_OPA_MAX);
+    LV_ASSERT(dsc->mask_buf == NULL);
+
+    const int32_t w = dsc->dest_w;
+    const int32_t h = dsc->dest_h;
+    const int32_t dest_stride = dsc->dest_stride;
+    const int32_t src_stride = dsc->src_stride;
+    uint8_t * dest_buf = dsc->dest_buf;
+    const uint8_t * src_buf = dsc->src_buf;
+    size_t vl;
+
+    /* Fast path: same pixel size, use RVV memcpy */
+    if(src_px_size == dest_px_size) {
+        const int32_t row_bytes = w * dest_px_size;
+        for(int32_t y = 0; y < h; y++) {
+            for(int32_t x = 0; x < row_bytes; x += vl) {
+                vl = __riscv_vsetvl_e8m8(row_bytes - x);
+                vuint8m8_t v_data = __riscv_vle8_v_u8m8(src_buf + x, vl);
+                __riscv_vse8_v_u8m8(dest_buf + x, v_data, vl);
+            }
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+            src_buf = drawbuf_next_row(src_buf, src_stride);
+        }
+        return LV_RESULT_OK;
+    }
+
+    /* Different pixel sizes: need per-pixel conversion */
+    if(dest_px_size == 3) {
+        /* Source: XRGB8888 -> RGB888 */
+        for(int32_t y = 0; y < h; y++) {
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e8m1(w - x);
+                vuint8m1_t v_src_b, v_src_g, v_src_r;
+                LV_RVV_LOAD_XRGB8888_U8M1(src_buf, x, v_src_b, v_src_g, v_src_r, vl);
+                LV_RVV_STORE_RGB888_U8M1(dest_buf, x, v_src_b, v_src_g, v_src_r, vl);
+            }
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+            src_buf = drawbuf_next_row(src_buf, src_stride);
+        }
+    }
+    else {
+        /* Destination: XRGB8888 */
+        size_t max_vl = __riscv_vsetvlmax_e8m1();
+        vuint8m1_t v_a = __riscv_vmv_v_x_u8m1(0xFF, max_vl);
+
+        /* Source: RGB888 -> XRGB8888 */
+        for(int32_t y = 0; y < h; y++) {
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e8m1(w - x);
+                vuint8m1_t v_src_b, v_src_g, v_src_r;
+                LV_RVV_LOAD_RGB888_U8M1(src_buf, x, v_src_b, v_src_g, v_src_r, vl);
+                LV_RVV_STORE_XRGB8888_U8M1(dest_buf, x, v_src_b, v_src_g, v_src_r, v_a, vl);
+            }
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+            src_buf = drawbuf_next_row(src_buf, src_stride);
+        }
+    }
+
+    return LV_RESULT_OK;
+}
+
+/**
+ * RGB888/XRGB8888 to RGB888/XRGB8888 with opacity
+ * blend formula: result = (src * opa + dst * (255 - opa)) >> 8
+ */
+lv_result_t lv_draw_sw_blend_riscv_v_rgb888_to_rgb888_with_opa(lv_draw_sw_blend_image_dsc_t * dsc,
+                                                               uint32_t dest_px_size, uint32_t src_px_size)
+{
+    LV_ASSERT(dest_px_size == 3 || dest_px_size == 4);
+    LV_ASSERT(src_px_size == 3 || src_px_size == 4);
+    LV_ASSERT(dsc->opa < LV_OPA_MAX);
+    LV_ASSERT(dsc->mask_buf == NULL);
+
+    const int32_t w = dsc->dest_w;
+    const int32_t h = dsc->dest_h;
+    const int32_t dest_stride = dsc->dest_stride;
+    const int32_t src_stride = dsc->src_stride;
+    const uint8_t opa = dsc->opa;
+    uint8_t * dest_buf = dsc->dest_buf;
+    const uint8_t * src_buf = dsc->src_buf;
+    size_t vl;
+
+    if(dest_px_size == 3) {
+        if(src_px_size == 3) {
+            /* RGB888 -> RGB888 */
+            for(int32_t y = 0; y < h; y++) {
+                for(int32_t x = 0; x < w; x += vl) {
+                    vl = __riscv_vsetvl_e8m1(w - x);
+                    vuint8m1_t v_src_b, v_src_g, v_src_r;
+                    LV_RVV_LOAD_RGB888_U8M1(src_buf, x, v_src_b, v_src_g, v_src_r, vl);
+                    vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                    LV_RVV_LOAD_RGB888_U8M1(dest_buf, x, v_dst_b, v_dst_g, v_dst_r, vl);
+                    vuint8m1_t v_r, v_g, v_b;
+                    LV_RVV_BLEND_RGB_U8M1(v_src_r, v_src_g, v_src_b, v_dst_r, v_dst_g, v_dst_b, opa, v_r, v_g, v_b, vl);
+                    LV_RVV_STORE_RGB888_U8M1(dest_buf, x, v_b, v_g, v_r, vl);
+                }
+                dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+                src_buf = drawbuf_next_row(src_buf, src_stride);
+            }
+        }
+        else {
+            /* XRGB8888 -> RGB888 */
+            for(int32_t y = 0; y < h; y++) {
+                for(int32_t x = 0; x < w; x += vl) {
+                    vl = __riscv_vsetvl_e8m1(w - x);
+                    vuint8m1_t v_src_b, v_src_g, v_src_r;
+                    LV_RVV_LOAD_XRGB8888_U8M1(src_buf, x, v_src_b, v_src_g, v_src_r, vl);
+                    vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                    LV_RVV_LOAD_RGB888_U8M1(dest_buf, x, v_dst_b, v_dst_g, v_dst_r, vl);
+                    vuint8m1_t v_r, v_g, v_b;
+                    LV_RVV_BLEND_RGB_U8M1(v_src_r, v_src_g, v_src_b, v_dst_r, v_dst_g, v_dst_b, opa, v_r, v_g, v_b, vl);
+                    LV_RVV_STORE_RGB888_U8M1(dest_buf, x, v_b, v_g, v_r, vl);
+                }
+                dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+                src_buf = drawbuf_next_row(src_buf, src_stride);
+            }
+        }
+    }
+    else {
+        size_t max_vl = __riscv_vsetvlmax_e8m1();
+        vuint8m1_t v_a = __riscv_vmv_v_x_u8m1(0xFF, max_vl);
+        if(src_px_size == 3) {
+            /* RGB888 -> XRGB8888 */
+            for(int32_t y = 0; y < h; y++) {
+                uint8_t * dest_row = dest_buf;
+                const uint8_t * src_row = src_buf;
+                for(int32_t x = 0; x < w; x += vl) {
+                    vl = __riscv_vsetvl_e8m1(w - x);
+                    vuint8m1_t v_src_b, v_src_g, v_src_r;
+                    LV_RVV_LOAD_RGB888_U8M1(src_row, x, v_src_b, v_src_g, v_src_r, vl);
+                    vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                    LV_RVV_LOAD_XRGB8888_U8M1(dest_row, x, v_dst_b, v_dst_g, v_dst_r, vl);
+                    vuint8m1_t v_r, v_g, v_b;
+                    LV_RVV_BLEND_RGB_U8M1(v_src_r, v_src_g, v_src_b, v_dst_r, v_dst_g, v_dst_b, opa, v_r, v_g, v_b, vl);
+                    LV_RVV_STORE_XRGB8888_U8M1(dest_row, x, v_b, v_g, v_r, v_a, vl);
+                }
+                dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+                src_buf = drawbuf_next_row(src_buf, src_stride);
+            }
+        }
+        else {
+            /* XRGB8888 -> XRGB8888 */
+            for(int32_t y = 0; y < h; y++) {
+                for(int32_t x = 0; x < w; x += vl) {
+                    vl = __riscv_vsetvl_e8m1(w - x);
+                    vuint8m1_t v_src_b, v_src_g, v_src_r;
+                    LV_RVV_LOAD_XRGB8888_U8M1(src_buf, x, v_src_b, v_src_g, v_src_r, vl);
+                    vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                    LV_RVV_LOAD_XRGB8888_U8M1(dest_buf, x, v_dst_b, v_dst_g, v_dst_r, vl);
+                    vuint8m1_t v_r, v_g, v_b;
+                    LV_RVV_BLEND_RGB_U8M1(v_src_r, v_src_g, v_src_b, v_dst_r, v_dst_g, v_dst_b, opa, v_r, v_g, v_b, vl);
+                    LV_RVV_STORE_XRGB8888_U8M1(dest_buf, x, v_b, v_g, v_r, v_a, vl);
+                }
+                dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+                src_buf = drawbuf_next_row(src_buf, src_stride);
+            }
+        }
+    }
+
+    return LV_RESULT_OK;
+}
+
+/**
+ * RGB888/XRGB8888 to RGB888/XRGB8888 with per-pixel mask
+ * blend formula: result = (src * mask + dst * (255 - mask)) >> 8
+ */
+lv_result_t lv_draw_sw_blend_riscv_v_rgb888_to_rgb888_with_mask(lv_draw_sw_blend_image_dsc_t * dsc,
+                                                                uint32_t dest_px_size, uint32_t src_px_size)
+{
+    LV_ASSERT(dest_px_size == 3 || dest_px_size == 4);
+    LV_ASSERT(src_px_size == 3 || src_px_size == 4);
+    LV_ASSERT(dsc->opa >= LV_OPA_MAX);
+    LV_ASSERT(dsc->mask_buf != NULL);
+
+    const int32_t w = dsc->dest_w;
+    const int32_t h = dsc->dest_h;
+    const int32_t dest_stride = dsc->dest_stride;
+    const int32_t src_stride = dsc->src_stride;
+    const int32_t mask_stride = dsc->mask_stride;
+    uint8_t * dest_buf = dsc->dest_buf;
+    const uint8_t * src_buf = dsc->src_buf;
+    const uint8_t * mask_buf = dsc->mask_buf;
+    size_t vl;
+
+    if(dest_px_size == 3) {
+        if(src_px_size == 3) {
+            /* RGB888 -> RGB888 */
+            for(int32_t y = 0; y < h; y++) {
+                for(int32_t x = 0; x < w; x += vl) {
+                    vl = __riscv_vsetvl_e8m1(w - x);
+                    vuint8m1_t v_mask = __riscv_vle8_v_u8m1(&mask_buf[x], vl);
+                    vuint8m1_t v_src_b, v_src_g, v_src_r;
+                    LV_RVV_LOAD_RGB888_U8M1(src_buf, x, v_src_b, v_src_g, v_src_r, vl);
+                    vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                    LV_RVV_LOAD_RGB888_U8M1(dest_buf, x, v_dst_b, v_dst_g, v_dst_r, vl);
+                    vuint8m1_t v_r, v_g, v_b;
+                    LV_RVV_BLEND_RGB_VMASK_U8M1(v_src_r, v_src_g, v_src_b,
+                                                v_dst_r, v_dst_g, v_dst_b,
+                                                v_mask,
+                                                v_r, v_g, v_b,
+                                                vl);
+                    LV_RVV_BLEND_OPTIMIZE_MASK_U8M1(v_r, v_g, v_b,
+                                                    v_src_r, v_src_g, v_src_b,
+                                                    v_dst_r, v_dst_g, v_dst_b,
+                                                    v_mask, vl);
+
+                    LV_RVV_STORE_RGB888_U8M1(dest_buf, x, v_b, v_g, v_r, vl);
+                }
+                dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+                src_buf = drawbuf_next_row(src_buf, src_stride);
+                mask_buf += mask_stride;
+            }
+        }
+        else {
+            /* XRGB8888 -> RGB888 */
+            for(int32_t y = 0; y < h; y++) {
+                for(int32_t x = 0; x < w; x += vl) {
+                    vl = __riscv_vsetvl_e8m1(w - x);
+                    vuint8m1_t v_mask = __riscv_vle8_v_u8m1(&mask_buf[x], vl);
+                    vuint8m1_t v_src_b, v_src_g, v_src_r;
+                    LV_RVV_LOAD_XRGB8888_U8M1(src_buf, x, v_src_b, v_src_g, v_src_r, vl);
+                    vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                    LV_RVV_LOAD_RGB888_U8M1(dest_buf, x, v_dst_b, v_dst_g, v_dst_r, vl);
+                    vuint8m1_t v_r, v_g, v_b;
+                    LV_RVV_BLEND_RGB_VMASK_U8M1(v_src_r, v_src_g, v_src_b,
+                                                v_dst_r, v_dst_g, v_dst_b,
+                                                v_mask,
+                                                v_r, v_g, v_b,
+                                                vl);
+                    LV_RVV_BLEND_OPTIMIZE_MASK_U8M1(v_r, v_g, v_b,
+                                                    v_src_r, v_src_g, v_src_b,
+                                                    v_dst_r, v_dst_g, v_dst_b,
+                                                    v_mask, vl);
+                    LV_RVV_STORE_RGB888_U8M1(dest_buf, x, v_b, v_g, v_r, vl);
+                }
+                dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+                src_buf = drawbuf_next_row(src_buf, src_stride);
+                mask_buf += mask_stride;
+            }
+        }
+    }
+    else {
+        size_t max_vl = __riscv_vsetvlmax_e8m1();
+        vuint8m1_t v_a = __riscv_vmv_v_x_u8m1(0xFF, max_vl);
+        if(src_px_size == 3) {
+            /* RGB888 -> XRGB8888 */
+            for(int32_t y = 0; y < h; y++) {
+                for(int32_t x = 0; x < w; x += vl) {
+                    vl = __riscv_vsetvl_e8m1(w - x);
+                    vuint8m1_t v_mask = __riscv_vle8_v_u8m1(&mask_buf[x], vl);
+                    vuint8m1_t v_src_b, v_src_g, v_src_r;
+                    LV_RVV_LOAD_RGB888_U8M1(src_buf, x, v_src_b, v_src_g, v_src_r, vl);
+                    vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                    LV_RVV_LOAD_XRGB8888_U8M1(dest_buf, x, v_dst_b, v_dst_g, v_dst_r, vl);
+                    vuint8m1_t v_r, v_g, v_b;
+                    LV_RVV_BLEND_RGB_VMASK_U8M1(v_src_r, v_src_g, v_src_b,
+                                                v_dst_r, v_dst_g, v_dst_b,
+                                                v_mask,
+                                                v_r, v_g, v_b,
+                                                vl);
+                    LV_RVV_BLEND_OPTIMIZE_MASK_U8M1(v_r, v_g, v_b,
+                                                    v_src_r, v_src_g, v_src_b,
+                                                    v_dst_r, v_dst_g, v_dst_b,
+                                                    v_mask, vl);
+                    LV_RVV_STORE_XRGB8888_U8M1(dest_buf, x, v_b, v_g, v_r, v_a, vl);
+                }
+                dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+                src_buf = drawbuf_next_row(src_buf, src_stride);
+                mask_buf += mask_stride;
+            }
+        }
+        else {
+            /* XRGB8888 -> XRGB8888 */
+            for(int32_t y = 0; y < h; y++) {
+                for(int32_t x = 0; x < w; x += vl) {
+                    vl = __riscv_vsetvl_e8m1(w - x);
+                    vuint8m1_t v_mask = __riscv_vle8_v_u8m1(&mask_buf[x], vl);
+                    vuint8m1_t v_src_b, v_src_g, v_src_r;
+                    LV_RVV_LOAD_XRGB8888_U8M1(src_buf, x, v_src_b, v_src_g, v_src_r, vl);
+                    vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                    LV_RVV_LOAD_XRGB8888_U8M1(dest_buf, x, v_dst_b, v_dst_g, v_dst_r, vl);
+                    vuint8m1_t v_r, v_g, v_b;
+                    LV_RVV_BLEND_RGB_VMASK_U8M1(v_src_r, v_src_g, v_src_b,
+                                                v_dst_r, v_dst_g, v_dst_b,
+                                                v_mask, v_r, v_g, v_b, vl);
+                    LV_RVV_BLEND_OPTIMIZE_MASK_U8M1(v_r, v_g, v_b,
+                                                    v_src_r, v_src_g, v_src_b,
+                                                    v_dst_r, v_dst_g, v_dst_b,
+                                                    v_mask, vl);
+                    LV_RVV_STORE_XRGB8888_U8M1(dest_buf, x, v_b, v_g, v_r, v_a, vl);
+                }
+                dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+                src_buf = drawbuf_next_row(src_buf, src_stride);
+                mask_buf += mask_stride;
+            }
+        }
+    }
+
+    return LV_RESULT_OK;
+}
+
+/**
+ * RGB888/XRGB8888 to RGB888/XRGB8888 with opacity and per-pixel mask
+ * effective mix = (mask * opa) >> 8
+ * blend formula: result = (src * mix + dst * (255 - mix)) >> 8
+ */
+lv_result_t lv_draw_sw_blend_riscv_v_rgb888_to_rgb888_with_opa_mask(lv_draw_sw_blend_image_dsc_t * dsc,
+                                                                    uint32_t dest_px_size, uint32_t src_px_size)
+{
+    LV_ASSERT(dest_px_size == 3 || dest_px_size == 4);
+    LV_ASSERT(src_px_size == 3 || src_px_size == 4);
+    LV_ASSERT(dsc->opa < LV_OPA_MAX);
+    LV_ASSERT(dsc->mask_buf != NULL);
+
+    const int32_t w = dsc->dest_w;
+    const int32_t h = dsc->dest_h;
+    const int32_t dest_stride = dsc->dest_stride;
+    const int32_t src_stride = dsc->src_stride;
+    const int32_t mask_stride = dsc->mask_stride;
+    const uint8_t opa = dsc->opa;
+    uint8_t * dest_buf = dsc->dest_buf;
+    const uint8_t * src_buf = dsc->src_buf;
+    const uint8_t * mask_buf = dsc->mask_buf;
+    size_t vl;
+
+    if(dest_px_size == 3) {
+        if(src_px_size == 3) {
+            /* RGB888 -> RGB888 */
+            for(int32_t y = 0; y < h; y++) {
+                for(int32_t x = 0; x < w; x += vl) {
+                    vl = __riscv_vsetvl_e8m1(w - x);
+                    vuint8m1_t v_mask = __riscv_vle8_v_u8m1(&mask_buf[x], vl);
+                    vuint16m2_t v_mask16 = __riscv_vzext_vf2_u16m2(v_mask, vl);
+                    vuint16m2_t v_mix16 = __riscv_vsrl_vx_u16m2(__riscv_vmul_vx_u16m2(v_mask16, opa, vl), 8, vl);
+                    vuint8m1_t v_mix = __riscv_vnsrl_wx_u8m1(v_mix16, 0, vl);
+                    vuint8m1_t v_src_b, v_src_g, v_src_r;
+                    LV_RVV_LOAD_RGB888_U8M1(src_buf, x, v_src_b, v_src_g, v_src_r, vl);
+                    vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                    LV_RVV_LOAD_RGB888_U8M1(dest_buf, x, v_dst_b, v_dst_g, v_dst_r, vl);
+                    vuint8m1_t v_r, v_g, v_b;
+                    LV_RVV_BLEND_RGB_VMASK_U8M1(v_src_r, v_src_g, v_src_b,
+                                                v_dst_r, v_dst_g, v_dst_b,
+                                                v_mix, v_r, v_g, v_b, vl);
+                    LV_RVV_BLEND_OPTIMIZE_MASK_U8M1(v_r, v_g, v_b,
+                                                    v_src_r, v_src_g, v_src_b,
+                                                    v_dst_r, v_dst_g, v_dst_b,
+                                                    v_mix, vl);
+                    LV_RVV_STORE_RGB888_U8M1(dest_buf, x, v_b, v_g, v_r, vl);
+                }
+                dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+                src_buf = drawbuf_next_row(src_buf, src_stride);
+                mask_buf += mask_stride;
+            }
+        }
+        else {
+            /* XRGB8888 -> RGB888 */
+            for(int32_t y = 0; y < h; y++) {
+                for(int32_t x = 0; x < w; x += vl) {
+                    vl = __riscv_vsetvl_e8m1(w - x);
+                    vuint8m1_t v_mask = __riscv_vle8_v_u8m1(&mask_buf[x], vl);
+                    vuint16m2_t v_mask16 = __riscv_vzext_vf2_u16m2(v_mask, vl);
+                    vuint16m2_t v_mix16 = __riscv_vsrl_vx_u16m2(__riscv_vmul_vx_u16m2(v_mask16, opa, vl), 8, vl);
+                    vuint8m1_t v_mix = __riscv_vnsrl_wx_u8m1(v_mix16, 0, vl);
+                    vuint8m1_t v_src_b, v_src_g, v_src_r;
+                    LV_RVV_LOAD_XRGB8888_U8M1(src_buf, x, v_src_b, v_src_g, v_src_r, vl);
+                    vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                    LV_RVV_LOAD_RGB888_U8M1(dest_buf, x, v_dst_b, v_dst_g, v_dst_r, vl);
+                    vuint8m1_t v_r, v_g, v_b;
+                    LV_RVV_BLEND_RGB_VMASK_U8M1(v_src_r, v_src_g, v_src_b, v_dst_r, v_dst_g, v_dst_b, v_mix, v_r, v_g, v_b, vl);
+                    LV_RVV_BLEND_OPTIMIZE_MASK_U8M1(v_r, v_g, v_b,
+                                                    v_src_r, v_src_g, v_src_b,
+                                                    v_dst_r, v_dst_g, v_dst_b,
+                                                    v_mix, vl);
+                    LV_RVV_STORE_RGB888_U8M1(dest_buf, x, v_b, v_g, v_r, vl);
+                }
+                dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+                src_buf = drawbuf_next_row(src_buf, src_stride);
+                mask_buf += mask_stride;
+            }
+        }
+    }
+    else {
+        size_t max_vl = __riscv_vsetvlmax_e8m1();
+        vuint8m1_t v_a = __riscv_vmv_v_x_u8m1(0xFF, max_vl);
+        if(src_px_size == 3) {
+            /* RGB888 -> XRGB8888 */
+            for(int32_t y = 0; y < h; y++) {
+                for(int32_t x = 0; x < w; x += vl) {
+                    vl = __riscv_vsetvl_e8m1(w - x);
+                    vuint8m1_t v_mask = __riscv_vle8_v_u8m1(&mask_buf[x], vl);
+                    vuint16m2_t v_mask16 = __riscv_vzext_vf2_u16m2(v_mask, vl);
+                    vuint16m2_t v_mix16 = __riscv_vsrl_vx_u16m2(__riscv_vmul_vx_u16m2(v_mask16, opa, vl), 8, vl);
+                    vuint8m1_t v_mix = __riscv_vnsrl_wx_u8m1(v_mix16, 0, vl);
+                    vuint8m1_t v_src_b, v_src_g, v_src_r;
+                    LV_RVV_LOAD_RGB888_U8M1(src_buf, x, v_src_b, v_src_g, v_src_r, vl);
+                    vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                    LV_RVV_LOAD_XRGB8888_U8M1(dest_buf, x, v_dst_b, v_dst_g, v_dst_r, vl);
+                    vuint8m1_t v_r, v_g, v_b;
+                    LV_RVV_BLEND_RGB_VMASK_U8M1(v_src_r, v_src_g, v_src_b, v_dst_r, v_dst_g, v_dst_b, v_mix, v_r, v_g, v_b, vl);
+                    LV_RVV_BLEND_OPTIMIZE_MASK_U8M1(v_r, v_g, v_b,
+                                                    v_src_r, v_src_g, v_src_b,
+                                                    v_dst_r, v_dst_g, v_dst_b,
+                                                    v_mix, vl);
+                    LV_RVV_STORE_XRGB8888_U8M1(dest_buf, x, v_b, v_g, v_r, v_a, vl);
+                }
+                dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+                src_buf = drawbuf_next_row(src_buf, src_stride);
+                mask_buf += mask_stride;
+            }
+        }
+        else {
+            /* XRGB8888 -> XRGB8888 */
+            for(int32_t y = 0; y < h; y++) {
+                for(int32_t x = 0; x < w; x += vl) {
+                    vl = __riscv_vsetvl_e8m1(w - x);
+                    vuint8m1_t v_mask = __riscv_vle8_v_u8m1(&mask_buf[x], vl);
+                    vuint16m2_t v_mask16 = __riscv_vzext_vf2_u16m2(v_mask, vl);
+                    vuint16m2_t v_mix16 = __riscv_vsrl_vx_u16m2(__riscv_vmul_vx_u16m2(v_mask16, opa, vl), 8, vl);
+                    vuint8m1_t v_mix = __riscv_vnsrl_wx_u8m1(v_mix16, 0, vl);
+                    vuint8m1_t v_src_b, v_src_g, v_src_r;
+                    LV_RVV_LOAD_XRGB8888_U8M1(src_buf, x, v_src_b, v_src_g, v_src_r, vl);
+                    vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                    LV_RVV_LOAD_XRGB8888_U8M1(dest_buf, x, v_dst_b, v_dst_g, v_dst_r, vl);
+                    vuint8m1_t v_r, v_g, v_b;
+                    LV_RVV_BLEND_RGB_VMASK_U8M1(v_src_r, v_src_g, v_src_b, v_dst_r, v_dst_g, v_dst_b, v_mix, v_r, v_g, v_b, vl);
+                    LV_RVV_BLEND_OPTIMIZE_MASK_U8M1(v_r, v_g, v_b,
+                                                    v_src_r, v_src_g, v_src_b,
+                                                    v_dst_r, v_dst_g, v_dst_b,
+                                                    v_mix, vl);
+                    LV_RVV_STORE_XRGB8888_U8M1(dest_buf, x, v_b, v_g, v_r, v_a, vl);
+                }
+                dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+                src_buf = drawbuf_next_row(src_buf, src_stride);
+                mask_buf += mask_stride;
+            }
+        }
+    }
+
+    return LV_RESULT_OK;
+}
+
+/**********************
+ *  ARGB8888 TO RGB888/XRGB8888 BLEND FUNCTIONS
+ **********************/
+
+/**
+ * ARGB8888 to RGB888/XRGB8888 blend using source alpha
+ * blend formula: result = (src * src_alpha + dst * (255 - src_alpha)) >> 8
+ */
+lv_result_t lv_draw_sw_blend_riscv_v_argb8888_to_rgb888(lv_draw_sw_blend_image_dsc_t * dsc,
+                                                        uint32_t dest_px_size)
+{
+    LV_ASSERT(dest_px_size == 3 || dest_px_size == 4);
+    LV_ASSERT(dsc->opa >= LV_OPA_MAX);
+    LV_ASSERT(dsc->mask_buf == NULL);
+
+    const int32_t w = dsc->dest_w;
+    const int32_t h = dsc->dest_h;
+    const int32_t dest_stride = dsc->dest_stride;
+    const int32_t src_stride = dsc->src_stride;
+    uint8_t * dest_buf = dsc->dest_buf;
+    const uint8_t * src_buf = dsc->src_buf;
+    size_t vl;
+
+    if(dest_px_size == 3) {
+        /* ARGB8888 -> RGB888 */
+        for(int32_t y = 0; y < h; y++) {
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e8m1(w - x);
+                vuint8m1_t v_src_b, v_src_g, v_src_r, v_src_a;
+                vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                LV_RVV_LOAD_ARGB8888_U8M1(src_buf, x, v_src_b, v_src_g, v_src_r, v_src_a, vl);
+                LV_RVV_LOAD_RGB888_U8M1(dest_buf, x, v_dst_b, v_dst_g, v_dst_r, vl);
+                vuint8m1_t v_r, v_g, v_b;
+                LV_RVV_BLEND_RGB_VMASK_U8M1(v_src_r, v_src_g, v_src_b, v_dst_r, v_dst_g, v_dst_b, v_src_a, v_r, v_g, v_b, vl);
+                LV_RVV_BLEND_OPTIMIZE_MASK_U8M1(v_r, v_g, v_b,
+                                                v_src_r, v_src_g, v_src_b,
+                                                v_dst_r, v_dst_g, v_dst_b,
+                                                v_src_a, vl);
+                LV_RVV_STORE_RGB888_U8M1(dest_buf, x, v_b, v_g, v_r, vl);
+            }
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+            src_buf = drawbuf_next_row(src_buf, src_stride);
+        }
+    }
+    else {
+        /* ARGB8888 -> XRGB8888 */
+        size_t max_vl = __riscv_vsetvlmax_e8m1();
+        vuint8m1_t v_a = __riscv_vmv_v_x_u8m1(0xFF, max_vl);
+        for(int32_t y = 0; y < h; y++) {
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e8m1(w - x);
+                vuint8m1_t v_src_b, v_src_g, v_src_r, v_src_a;
+                vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                LV_RVV_LOAD_ARGB8888_U8M1(src_buf, x, v_src_b, v_src_g, v_src_r, v_src_a, vl);
+                LV_RVV_LOAD_XRGB8888_U8M1(dest_buf, x, v_dst_b, v_dst_g, v_dst_r, vl);
+                vuint8m1_t v_r, v_g, v_b;
+                LV_RVV_BLEND_RGB_VMASK_U8M1(v_src_r, v_src_g, v_src_b, v_dst_r, v_dst_g, v_dst_b, v_src_a, v_r, v_g, v_b, vl);
+                LV_RVV_BLEND_OPTIMIZE_MASK_U8M1(v_r, v_g, v_b,
+                                                v_src_r, v_src_g, v_src_b,
+                                                v_dst_r, v_dst_g, v_dst_b,
+                                                v_src_a, vl);
+                LV_RVV_STORE_XRGB8888_U8M1(dest_buf, x, v_b, v_g, v_r, v_a, vl);
+            }
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+            src_buf = drawbuf_next_row(src_buf, src_stride);
+        }
+    }
+
+    return LV_RESULT_OK;
+}
+
+/**
+ * ARGB8888 to RGB888/XRGB8888 with global opacity
+ * effective_alpha = (src_alpha * opa) >> 8
+ * blend formula: result = (src * effective_alpha + dst * (255 - effective_alpha)) >> 8
+ */
+lv_result_t lv_draw_sw_blend_riscv_v_argb8888_to_rgb888_with_opa(lv_draw_sw_blend_image_dsc_t * dsc,
+                                                                 uint32_t dest_px_size)
+{
+    LV_ASSERT(dest_px_size == 3 || dest_px_size == 4);
+    LV_ASSERT(dsc->opa < LV_OPA_MAX);
+    LV_ASSERT(dsc->mask_buf == NULL);
+
+    const int32_t w = dsc->dest_w;
+    const int32_t h = dsc->dest_h;
+    const int32_t dest_stride = dsc->dest_stride;
+    const int32_t src_stride = dsc->src_stride;
+    const uint8_t opa = dsc->opa;
+    uint8_t * dest_buf = dsc->dest_buf;
+    const uint8_t * src_buf = dsc->src_buf;
+    size_t vl;
+
+    if(dest_px_size == 3) {
+        /* ARGB8888 -> RGB888 */
+        for(int32_t y = 0; y < h; y++) {
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e8m1(w - x);
+                vuint8m1_t v_src_b, v_src_g, v_src_r, v_src_a;
+                vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                LV_RVV_LOAD_ARGB8888_U8M1(src_buf, x, v_src_b, v_src_g, v_src_r, v_src_a, vl);
+                vuint8m1_t v_eff_a;
+                LV_RVV_CALC_EFF_ALPHA_OPA_U8M1(v_src_a, opa, v_eff_a, vl);
+                LV_RVV_LOAD_RGB888_U8M1(dest_buf, x, v_dst_b, v_dst_g, v_dst_r, vl);
+                vuint8m1_t v_r, v_g, v_b;
+                LV_RVV_BLEND_RGB_VMASK_U8M1(v_src_r, v_src_g, v_src_b, v_dst_r, v_dst_g, v_dst_b, v_eff_a, v_r, v_g, v_b, vl);
+                LV_RVV_BLEND_OPTIMIZE_MASK_U8M1(v_r, v_g, v_b,
+                                                v_src_r, v_src_g, v_src_b,
+                                                v_dst_r, v_dst_g, v_dst_b,
+                                                v_eff_a, vl);
+                LV_RVV_STORE_RGB888_U8M1(dest_buf, x, v_b, v_g, v_r, vl);
+            }
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+            src_buf = drawbuf_next_row(src_buf, src_stride);
+        }
+    }
+    else {
+        /* ARGB8888 -> XRGB8888 */
+        size_t max_vl = __riscv_vsetvlmax_e8m1();
+        vuint8m1_t v_a = __riscv_vmv_v_x_u8m1(0xFF, max_vl);
+        for(int32_t y = 0; y < h; y++) {
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e8m1(w - x);
+                vuint8m1_t v_src_b, v_src_g, v_src_r, v_src_a;
+                vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                LV_RVV_LOAD_ARGB8888_U8M1(src_buf, x, v_src_b, v_src_g, v_src_r, v_src_a, vl);
+                vuint8m1_t v_eff_a;
+                LV_RVV_CALC_EFF_ALPHA_OPA_U8M1(v_src_a, opa, v_eff_a, vl);
+                LV_RVV_LOAD_XRGB8888_U8M1(dest_buf, x, v_dst_b, v_dst_g, v_dst_r, vl);
+                vuint8m1_t v_r, v_g, v_b;
+                LV_RVV_BLEND_RGB_VMASK_U8M1(v_src_r, v_src_g, v_src_b, v_dst_r, v_dst_g, v_dst_b, v_eff_a, v_r, v_g, v_b, vl);
+                LV_RVV_BLEND_OPTIMIZE_MASK_U8M1(v_r, v_g, v_b,
+                                                v_src_r, v_src_g, v_src_b,
+                                                v_dst_r, v_dst_g, v_dst_b,
+                                                v_eff_a, vl);
+                LV_RVV_STORE_XRGB8888_U8M1(dest_buf, x, v_b, v_g, v_r, v_a, vl);
+            }
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+            src_buf = drawbuf_next_row(src_buf, src_stride);
+        }
+    }
+
+    return LV_RESULT_OK;
+}
+
+/**
+ * ARGB8888 to RGB888/XRGB8888 with per-pixel mask
+ * effective_alpha = (src_alpha * mask) >> 8
+ * blend formula: result = (src * effective_alpha + dst * (255 - effective_alpha)) >> 8
+ */
+lv_result_t lv_draw_sw_blend_riscv_v_argb8888_to_rgb888_with_mask(lv_draw_sw_blend_image_dsc_t * dsc,
+                                                                  uint32_t dest_px_size)
+{
+    LV_ASSERT(dest_px_size == 3 || dest_px_size == 4);
+    LV_ASSERT(dsc->opa >= LV_OPA_MAX);
+    LV_ASSERT(dsc->mask_buf != NULL);
+
+    const int32_t w = dsc->dest_w;
+    const int32_t h = dsc->dest_h;
+    const int32_t dest_stride = dsc->dest_stride;
+    const int32_t src_stride = dsc->src_stride;
+    const int32_t mask_stride = dsc->mask_stride;
+    uint8_t * dest_buf = dsc->dest_buf;
+    const uint8_t * src_buf = dsc->src_buf;
+    const uint8_t * mask_buf = dsc->mask_buf;
+    size_t vl;
+
+    if(dest_px_size == 3) {
+        /* ARGB8888 -> RGB888 */
+        for(int32_t y = 0; y < h; y++) {
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e8m1(w - x);
+                vuint8m1_t v_mask = __riscv_vle8_v_u8m1(&mask_buf[x], vl);
+                vuint8m1_t v_src_b, v_src_g, v_src_r, v_src_a;
+                LV_RVV_LOAD_ARGB8888_U8M1(src_buf, x, v_src_b, v_src_g, v_src_r, v_src_a, vl);
+                vuint8m1_t v_eff_a;
+                LV_RVV_CALC_EFF_ALPHA_MASK_U8M1(v_src_a, v_mask, v_eff_a, vl);
+                vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                LV_RVV_LOAD_RGB888_U8M1(dest_buf, x, v_dst_b, v_dst_g, v_dst_r, vl);
+                vuint8m1_t v_r, v_g, v_b;
+                LV_RVV_BLEND_RGB_VMASK_U8M1(v_src_r, v_src_g, v_src_b,
+                                            v_dst_r, v_dst_g, v_dst_b,
+                                            v_eff_a, v_r, v_g, v_b, vl);
+                LV_RVV_BLEND_OPTIMIZE_MASK_U8M1(v_r, v_g, v_b,
+                                                v_src_r, v_src_g, v_src_b,
+                                                v_dst_r, v_dst_g, v_dst_b,
+                                                v_eff_a, vl);
+                LV_RVV_STORE_RGB888_U8M1(dest_buf, x, v_b, v_g, v_r, vl);
+            }
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+            src_buf = drawbuf_next_row(src_buf, src_stride);
+            mask_buf += mask_stride;
+        }
+    }
+    else {
+        /* ARGB8888 -> XRGB8888 */
+        size_t max_vl = __riscv_vsetvlmax_e8m1();
+        vuint8m1_t v_a = __riscv_vmv_v_x_u8m1(0xFF, max_vl);
+        for(int32_t y = 0; y < h; y++) {
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e8m1(w - x);
+                vuint8m1_t v_mask = __riscv_vle8_v_u8m1(&mask_buf[x], vl);
+                vuint8m1_t v_src_b, v_src_g, v_src_r, v_src_a;
+                LV_RVV_LOAD_ARGB8888_U8M1(src_buf, x, v_src_b, v_src_g, v_src_r, v_src_a, vl);
+                vuint8m1_t v_eff_a;
+                LV_RVV_CALC_EFF_ALPHA_MASK_U8M1(v_src_a, v_mask, v_eff_a, vl);
+                vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                LV_RVV_LOAD_XRGB8888_U8M1(dest_buf, x, v_dst_b, v_dst_g, v_dst_r, vl);
+                vuint8m1_t v_r, v_g, v_b;
+                LV_RVV_BLEND_RGB_VMASK_U8M1(v_src_r, v_src_g, v_src_b, v_dst_r, v_dst_g, v_dst_b, v_eff_a, v_r, v_g, v_b, vl);
+                LV_RVV_BLEND_OPTIMIZE_MASK_U8M1(v_r, v_g, v_b,
+                                                v_src_r, v_src_g, v_src_b,
+                                                v_dst_r, v_dst_g, v_dst_b,
+                                                v_eff_a, vl);
+                LV_RVV_STORE_XRGB8888_U8M1(dest_buf, x, v_b, v_g, v_r, v_a, vl);
+            }
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+            src_buf = drawbuf_next_row(src_buf, src_stride);
+            mask_buf += mask_stride;
+        }
+    }
+
+    return LV_RESULT_OK;
+}
+
+/**
+ * ARGB8888 to RGB888/XRGB8888 with opacity and per-pixel mask
+ * effective_alpha = (src_alpha * mask * opa) >> 16
+ * blend formula: result = (src * effective_alpha + dst * (255 - effective_alpha)) >> 8
+ */
+lv_result_t lv_draw_sw_blend_riscv_v_argb8888_to_rgb888_with_opa_mask(lv_draw_sw_blend_image_dsc_t * dsc,
+                                                                      uint32_t dest_px_size)
+{
+    LV_ASSERT(dest_px_size == 3 || dest_px_size == 4);
+    LV_ASSERT(dsc->opa < LV_OPA_MAX);
+    LV_ASSERT(dsc->mask_buf != NULL);
+
+    const int32_t w = dsc->dest_w;
+    const int32_t h = dsc->dest_h;
+    const int32_t dest_stride = dsc->dest_stride;
+    const int32_t src_stride = dsc->src_stride;
+    const int32_t mask_stride = dsc->mask_stride;
+    const uint8_t opa = dsc->opa;
+    uint8_t * dest_buf = dsc->dest_buf;
+    const uint8_t * src_buf = dsc->src_buf;
+    const uint8_t * mask_buf = dsc->mask_buf;
+    size_t vl;
+
+    if(dest_px_size == 3) {
+        /* ARGB8888 -> RGB888 */
+        for(int32_t y = 0; y < h; y++) {
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e8m1(w - x);
+                vuint8m1_t v_mask = __riscv_vle8_v_u8m1(&mask_buf[x], vl);
+                vuint8m1_t v_src_b, v_src_g, v_src_r, v_src_a;
+                vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                LV_RVV_LOAD_ARGB8888_U8M1(src_buf, x, v_src_b, v_src_g, v_src_r, v_src_a, vl);
+                vuint8m1_t v_eff_a;
+                LV_RVV_CALC_EFF_ALPHA_MASK_OPA_U8M1(v_src_a, v_mask, opa, v_eff_a, vl);
+                LV_RVV_LOAD_RGB888_U8M1(dest_buf, x, v_dst_b, v_dst_g, v_dst_r, vl);
+                vuint8m1_t v_r, v_g, v_b;
+                LV_RVV_BLEND_RGB_VMASK_U8M1(v_src_r, v_src_g, v_src_b,
+                                            v_dst_r, v_dst_g, v_dst_b,
+                                            v_eff_a,
+                                            v_r, v_g, v_b,
+                                            vl);
+
+                LV_RVV_BLEND_OPTIMIZE_MASK_U8M1(v_r, v_g, v_b,
+                                                v_src_r, v_src_g, v_src_b,
+                                                v_dst_r, v_dst_g, v_dst_b,
+                                                v_eff_a, vl);
+                LV_RVV_STORE_RGB888_U8M1(dest_buf, x, v_b, v_g, v_r, vl);
+            }
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+            src_buf = drawbuf_next_row(src_buf, src_stride);
+            mask_buf += mask_stride;
+        }
+    }
+    else {
+        /* ARGB8888 -> XRGB8888 */
+        size_t max_vl = __riscv_vsetvlmax_e8m1();
+        vuint8m1_t v_a = __riscv_vmv_v_x_u8m1(0xFF, max_vl);
+        for(int32_t y = 0; y < h; y++) {
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e8m1(w - x);
+                vuint8m1_t v_mask = __riscv_vle8_v_u8m1(&mask_buf[x], vl);
+                vuint8m1_t v_src_b, v_src_g, v_src_r, v_src_a;
+                LV_RVV_LOAD_ARGB8888_U8M1(src_buf, x, v_src_b, v_src_g, v_src_r, v_src_a, vl);
+                vuint8m1_t v_eff_a;
+                LV_RVV_CALC_EFF_ALPHA_MASK_OPA_U8M1(v_src_a, v_mask, opa, v_eff_a, vl);
+                vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                LV_RVV_LOAD_XRGB8888_U8M1(dest_buf, x, v_dst_b, v_dst_g, v_dst_r, vl);
+                vuint8m1_t v_r, v_g, v_b;
+                LV_RVV_BLEND_RGB_VMASK_U8M1(v_src_r, v_src_g, v_src_b, v_dst_r, v_dst_g, v_dst_b, v_eff_a, v_r, v_g, v_b, vl);
+                LV_RVV_BLEND_OPTIMIZE_MASK_U8M1(v_r, v_g, v_b,
+                                                v_src_r, v_src_g, v_src_b,
+                                                v_dst_r, v_dst_g, v_dst_b,
+                                                v_eff_a, vl);
+                LV_RVV_STORE_XRGB8888_U8M1(dest_buf, x, v_b, v_g, v_r, v_a, vl);
+            }
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+            src_buf = drawbuf_next_row(src_buf, src_stride);
+            mask_buf += mask_stride;
+        }
+    }
+
+    return LV_RESULT_OK;
+}
+
+/**
+ * ARGB8888 premultiplied to RGB888/XRGB8888
+ * For premultiplied alpha, source RGB is already multiplied by alpha:
+ *   src_premul = src * src_alpha / 255
+ * blend formula: result = src_premul + dst * (255 - src_alpha) / 255
+ *              = src_premul + (dst * (255 - src_alpha)) >> 8
+ */
+lv_result_t lv_draw_sw_blend_riscv_v_argb8888_premultiplied_to_rgb888(lv_draw_sw_blend_image_dsc_t * dsc,
+                                                                      uint32_t dest_px_size)
+{
+    LV_ASSERT(dest_px_size == 3 || dest_px_size == 4);
+
+    const int32_t w = dsc->dest_w;
+    const int32_t h = dsc->dest_h;
+    const int32_t dest_stride = dsc->dest_stride;
+    const int32_t src_stride = dsc->src_stride;
+    uint8_t * dest_buf = dsc->dest_buf;
+    const uint8_t * src_buf = dsc->src_buf;
+    size_t vl;
+
+    if(dest_px_size == 3) {
+        /* ARGB8888 premultiplied -> RGB888 */
+        for(int32_t y = 0; y < h; y++) {
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e8m1(w - x);
+                vuint8m1_t v_src_b, v_src_g, v_src_r, v_src_a;
+                LV_RVV_LOAD_ARGB8888_U8M1(src_buf, x, v_src_b, v_src_g, v_src_r, v_src_a, vl);
+                vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                LV_RVV_LOAD_RGB888_U8M1(dest_buf, x, v_dst_b, v_dst_g, v_dst_r, vl);
+                vuint8m1_t v_inv_a = __riscv_vrsub_vx_u8m1(v_src_a, 255, vl);
+                vuint16m2_t v_dst_r16 = __riscv_vzext_vf2_u16m2(v_dst_r, vl);
+                vuint16m2_t v_dst_g16 = __riscv_vzext_vf2_u16m2(v_dst_g, vl);
+                vuint16m2_t v_dst_b16 = __riscv_vzext_vf2_u16m2(v_dst_b, vl);
+                vuint16m2_t v_inv_a16 = __riscv_vzext_vf2_u16m2(v_inv_a, vl);
+                vuint16m2_t v_r16 = __riscv_vsrl_vx_u16m2(__riscv_vmul_vv_u16m2(v_dst_r16, v_inv_a16, vl), 8, vl);
+                vuint16m2_t v_g16 = __riscv_vsrl_vx_u16m2(__riscv_vmul_vv_u16m2(v_dst_g16, v_inv_a16, vl), 8, vl);
+                vuint16m2_t v_b16 = __riscv_vsrl_vx_u16m2(__riscv_vmul_vv_u16m2(v_dst_b16, v_inv_a16, vl), 8, vl);
+                vuint8m1_t v_r = __riscv_vadd_vv_u8m1(v_src_r, __riscv_vnsrl_wx_u8m1(v_r16, 0, vl), vl);
+                vuint8m1_t v_g = __riscv_vadd_vv_u8m1(v_src_g, __riscv_vnsrl_wx_u8m1(v_g16, 0, vl), vl);
+                vuint8m1_t v_b = __riscv_vadd_vv_u8m1(v_src_b, __riscv_vnsrl_wx_u8m1(v_b16, 0, vl), vl);
+
+                LV_RVV_BLEND_OPTIMIZE_MASK_U8M1(v_r, v_g, v_b,
+                                                v_src_r, v_src_g, v_src_b,
+                                                v_dst_r, v_dst_g, v_dst_b,
+                                                v_src_a, vl);
+
+                LV_RVV_STORE_RGB888_U8M1(dest_buf, x, v_b, v_g, v_r, vl);
+            }
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+            src_buf = drawbuf_next_row(src_buf, src_stride);
+        }
+    }
+    else {
+        /* ARGB8888 premultiplied -> XRGB8888 */
+        size_t max_vl = __riscv_vsetvlmax_e8m1();
+        vuint8m1_t v_a = __riscv_vmv_v_x_u8m1(0xFF, max_vl);
+        for(int32_t y = 0; y < h; y++) {
+            for(int32_t x = 0; x < w; x += vl) {
+                vl = __riscv_vsetvl_e8m1(w - x);
+                vuint8m1_t v_src_b, v_src_g, v_src_r, v_src_a;
+                LV_RVV_LOAD_ARGB8888_U8M1(src_buf, x, v_src_b, v_src_g, v_src_r, v_src_a, vl);
+                vuint8m1_t v_dst_b, v_dst_g, v_dst_r;
+                LV_RVV_LOAD_XRGB8888_U8M1(dest_buf, x, v_dst_b, v_dst_g, v_dst_r, vl);
+                vuint8m1_t v_inv_a = __riscv_vrsub_vx_u8m1(v_src_a, 255, vl);
+                vuint16m2_t v_dst_r16 = __riscv_vzext_vf2_u16m2(v_dst_r, vl);
+                vuint16m2_t v_dst_g16 = __riscv_vzext_vf2_u16m2(v_dst_g, vl);
+                vuint16m2_t v_dst_b16 = __riscv_vzext_vf2_u16m2(v_dst_b, vl);
+                vuint16m2_t v_inv_a16 = __riscv_vzext_vf2_u16m2(v_inv_a, vl);
+                vuint16m2_t v_r16 = __riscv_vsrl_vx_u16m2(__riscv_vmul_vv_u16m2(v_dst_r16, v_inv_a16, vl), 8, vl);
+                vuint16m2_t v_g16 = __riscv_vsrl_vx_u16m2(__riscv_vmul_vv_u16m2(v_dst_g16, v_inv_a16, vl), 8, vl);
+                vuint16m2_t v_b16 = __riscv_vsrl_vx_u16m2(__riscv_vmul_vv_u16m2(v_dst_b16, v_inv_a16, vl), 8, vl);
+                vuint8m1_t v_r = __riscv_vadd_vv_u8m1(v_src_r, __riscv_vnsrl_wx_u8m1(v_r16, 0, vl), vl);
+                vuint8m1_t v_g = __riscv_vadd_vv_u8m1(v_src_g, __riscv_vnsrl_wx_u8m1(v_g16, 0, vl), vl);
+                vuint8m1_t v_b = __riscv_vadd_vv_u8m1(v_src_b, __riscv_vnsrl_wx_u8m1(v_b16, 0, vl), vl);
+
+                LV_RVV_BLEND_OPTIMIZE_MASK_U8M1(v_r, v_g, v_b,
+                                                v_src_r, v_src_g, v_src_b,
+                                                v_dst_r, v_dst_g, v_dst_b,
+                                                v_src_a, vl);
+
+                LV_RVV_STORE_XRGB8888_U8M1(dest_buf, x, v_b, v_g, v_r, v_a, vl);
+            }
+            dest_buf = drawbuf_next_row(dest_buf, dest_stride);
+            src_buf = drawbuf_next_row(src_buf, src_stride);
+        }
+    }
+
+    return LV_RESULT_OK;
+}
+
+#endif /* LV_USE_DRAW_SW_ASM == LV_DRAW_SW_ASM_RISCV_V */
\ No newline at end of file
diff --git a/src/draw/sw/blend/riscv_v/lv_draw_sw_blend_riscv_v_to_rgb888.h b/src/draw/sw/blend/riscv_v/lv_draw_sw_blend_riscv_v_to_rgb888.h
new file mode 100644
index 0000000000..ae887bf140
--- /dev/null
+++ b/src/draw/sw/blend/riscv_v/lv_draw_sw_blend_riscv_v_to_rgb888.h
@@ -0,0 +1,170 @@
+/**
+ * @file lv_draw_sw_blend_riscv_v_to_rgb888.h
+ */
+
+#ifndef LV_DRAW_SW_BLEND_RISCV_V_TO_RGB888_H
+#define LV_DRAW_SW_BLEND_RISCV_V_TO_RGB888_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*********************
+ *      INCLUDES
+ *********************/
+
+#include "../../../../lv_conf_internal.h"
+#if LV_USE_DRAW_SW_ASM == LV_DRAW_SW_ASM_RISCV_V
+
+#include "../../../../misc/lv_types.h"
+/*********************
+ *      DEFINES
+ *********************/
+
+/* Color fill to RGB888/XRGB8888 */
+#ifndef LV_DRAW_SW_COLOR_BLEND_TO_RGB888
+#define LV_DRAW_SW_COLOR_BLEND_TO_RGB888(dsc, dest_px_size) \
+    lv_draw_sw_blend_riscv_v_color_to_rgb888(dsc, dest_px_size)
+#endif
+
+#ifndef LV_DRAW_SW_COLOR_BLEND_TO_RGB888_WITH_OPA
+#define LV_DRAW_SW_COLOR_BLEND_TO_RGB888_WITH_OPA(dsc, dest_px_size) \
+    lv_draw_sw_blend_riscv_v_color_to_rgb888_with_opa(dsc, dest_px_size)
+#endif
+
+#ifndef LV_DRAW_SW_COLOR_BLEND_TO_RGB888_WITH_MASK
+#define LV_DRAW_SW_COLOR_BLEND_TO_RGB888_WITH_MASK(dsc, dest_px_size) \
+    lv_draw_sw_blend_riscv_v_color_to_rgb888_with_mask(dsc, dest_px_size)
+#endif
+
+#ifndef LV_DRAW_SW_COLOR_BLEND_TO_RGB888_MIX_MASK_OPA
+#define LV_DRAW_SW_COLOR_BLEND_TO_RGB888_MIX_MASK_OPA(dsc, dest_px_size) \
+    lv_draw_sw_blend_riscv_v_color_to_rgb888_with_opa_mask(dsc, dest_px_size)
+#endif
+
+
+/* RGB565 image blend to RGB888/XRGB8888 */
+#ifndef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888
+#define LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888(dsc, dest_px_size) \
+    lv_draw_sw_blend_riscv_v_rgb565_to_rgb888(dsc, dest_px_size)
+#endif
+
+#ifndef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888_WITH_OPA
+#define LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888_WITH_OPA(dsc, dest_px_size) \
+    lv_draw_sw_blend_riscv_v_rgb565_to_rgb888_with_opa(dsc, dest_px_size)
+#endif
+
+#ifndef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888_WITH_MASK
+#define LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888_WITH_MASK(dsc, dest_px_size) \
+    lv_draw_sw_blend_riscv_v_rgb565_to_rgb888_with_mask(dsc, dest_px_size)
+#endif
+
+#ifndef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888_MIX_MASK_OPA
+#define LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888_MIX_MASK_OPA(dsc, dest_px_size) \
+    lv_draw_sw_blend_riscv_v_rgb565_to_rgb888_with_opa_mask(dsc, dest_px_size)
+#endif
+
+/* RGB888/XRGB8888 image blend to RGB888/XRGB8888 */
+#ifndef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888
+#define LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888(dsc, dest_px_size, src_px_size) \
+    lv_draw_sw_blend_riscv_v_rgb888_to_rgb888(dsc, dest_px_size, src_px_size)
+#endif
+
+#ifndef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888_WITH_OPA
+#define LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888_WITH_OPA(dsc, dest_px_size, src_px_size) \
+    lv_draw_sw_blend_riscv_v_rgb888_to_rgb888_with_opa(dsc, dest_px_size, src_px_size)
+#endif
+
+#ifndef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888_WITH_MASK
+#define LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888_WITH_MASK(dsc, dest_px_size, src_px_size) \
+    lv_draw_sw_blend_riscv_v_rgb888_to_rgb888_with_mask(dsc, dest_px_size, src_px_size)
+#endif
+
+#ifndef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888_MIX_MASK_OPA
+#define LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888_MIX_MASK_OPA(dsc, dest_px_size, src_px_size) \
+    lv_draw_sw_blend_riscv_v_rgb888_to_rgb888_with_opa_mask(dsc, dest_px_size, src_px_size)
+#endif
+
+/* ARGB8888 image blend to RGB888/XRGB8888 */
+#ifndef LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB888
+#define LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB888(dsc, dest_px_size) \
+    lv_draw_sw_blend_riscv_v_argb8888_to_rgb888(dsc, dest_px_size)
+#endif
+
+#ifndef LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB888_WITH_OPA
+#define LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB888_WITH_OPA(dsc, dest_px_size) \
+    lv_draw_sw_blend_riscv_v_argb8888_to_rgb888_with_opa(dsc, dest_px_size)
+#endif
+
+#ifndef LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB888_WITH_MASK
+#define LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB888_WITH_MASK(dsc, dest_px_size) \
+    lv_draw_sw_blend_riscv_v_argb8888_to_rgb888_with_mask(dsc, dest_px_size)
+#endif
+
+#ifndef LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB888_MIX_MASK_OPA
+#define LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB888_MIX_MASK_OPA(dsc, dest_px_size) \
+    lv_draw_sw_blend_riscv_v_argb8888_to_rgb888_with_opa_mask(dsc, dest_px_size)
+#endif
+
+#ifndef LV_DRAW_SW_ARGB8888_PREMULTIPLIED_BLEND_NORMAL_TO_RGB888
+#define LV_DRAW_SW_ARGB8888_PREMULTIPLIED_BLEND_NORMAL_TO_RGB888(dsc, dest_px_size) \
+    lv_draw_sw_blend_riscv_v_argb8888_premultiplied_to_rgb888(dsc, dest_px_size)
+#endif
+
+/**********************
+ *      TYPEDEFS
+ **********************/
+
+/**********************
+ * GLOBAL PROTOTYPES
+ **********************/
+
+/* Color fill functions */
+lv_result_t lv_draw_sw_blend_riscv_v_color_to_rgb888(lv_draw_sw_blend_fill_dsc_t * dsc, uint32_t dest_px_size);
+lv_result_t lv_draw_sw_blend_riscv_v_color_to_rgb888_with_opa(lv_draw_sw_blend_fill_dsc_t * dsc, uint32_t dest_px_size);
+lv_result_t lv_draw_sw_blend_riscv_v_color_to_rgb888_with_mask(lv_draw_sw_blend_fill_dsc_t * dsc,
+                                                               uint32_t dest_px_size);
+lv_result_t lv_draw_sw_blend_riscv_v_color_to_rgb888_with_opa_mask(lv_draw_sw_blend_fill_dsc_t * dsc,
+                                                                   uint32_t dest_px_size);
+
+/* RGB565 to RGB888/XRGB8888 blend functions */
+lv_result_t lv_draw_sw_blend_riscv_v_rgb565_to_rgb888(lv_draw_sw_blend_image_dsc_t * dsc, uint32_t dest_px_size);
+lv_result_t lv_draw_sw_blend_riscv_v_rgb565_to_rgb888_with_opa(lv_draw_sw_blend_image_dsc_t * dsc,
+                                                               uint32_t dest_px_size);
+lv_result_t lv_draw_sw_blend_riscv_v_rgb565_to_rgb888_with_mask(lv_draw_sw_blend_image_dsc_t * dsc,
+                                                                uint32_t dest_px_size);
+lv_result_t lv_draw_sw_blend_riscv_v_rgb565_to_rgb888_with_opa_mask(lv_draw_sw_blend_image_dsc_t * dsc,
+                                                                    uint32_t dest_px_size);
+
+/* RGB888/XRGB8888 to RGB888/XRGB8888 blend functions */
+lv_result_t lv_draw_sw_blend_riscv_v_rgb888_to_rgb888(lv_draw_sw_blend_image_dsc_t * dsc, uint32_t dest_px_size,
+                                                      uint32_t src_px_size);
+lv_result_t lv_draw_sw_blend_riscv_v_rgb888_to_rgb888_with_opa(lv_draw_sw_blend_image_dsc_t * dsc,
+                                                               uint32_t dest_px_size, uint32_t src_px_size);
+lv_result_t lv_draw_sw_blend_riscv_v_rgb888_to_rgb888_with_mask(lv_draw_sw_blend_image_dsc_t * dsc,
+                                                                uint32_t dest_px_size, uint32_t src_px_size);
+lv_result_t lv_draw_sw_blend_riscv_v_rgb888_to_rgb888_with_opa_mask(lv_draw_sw_blend_image_dsc_t * dsc,
+                                                                    uint32_t dest_px_size, uint32_t src_px_size);
+
+/* ARGB8888 to RGB888/XRGB8888 blend functions */
+lv_result_t lv_draw_sw_blend_riscv_v_argb8888_to_rgb888(lv_draw_sw_blend_image_dsc_t * dsc, uint32_t dest_px_size);
+lv_result_t lv_draw_sw_blend_riscv_v_argb8888_to_rgb888_with_opa(lv_draw_sw_blend_image_dsc_t * dsc,
+                                                                 uint32_t dest_px_size);
+lv_result_t lv_draw_sw_blend_riscv_v_argb8888_to_rgb888_with_mask(lv_draw_sw_blend_image_dsc_t * dsc,
+                                                                  uint32_t dest_px_size);
+lv_result_t lv_draw_sw_blend_riscv_v_argb8888_to_rgb888_with_opa_mask(lv_draw_sw_blend_image_dsc_t * dsc,
+                                                                      uint32_t dest_px_size);
+lv_result_t lv_draw_sw_blend_riscv_v_argb8888_premultiplied_to_rgb888(lv_draw_sw_blend_image_dsc_t * dsc,
+                                                                      uint32_t dest_px_size);
+
+/**********************
+ *      MACROS
+ **********************/
+
+#endif /* LV_USE_DRAW_SW_ASM == LV_DRAW_SW_ASM_RISCV_V */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* LV_DRAW_SW_BLEND_RISCV_V_TO_RGB888_H */
diff --git a/src/lv_conf_internal.h b/src/lv_conf_internal.h
index 04fa55f786..414c3eec4f 100644
--- a/src/lv_conf_internal.h
+++ b/src/lv_conf_internal.h
@@ -28,6 +28,7 @@
 #define LV_DRAW_SW_ASM_NONE             0
 #define LV_DRAW_SW_ASM_NEON             1
 #define LV_DRAW_SW_ASM_HELIUM           2
+#define LV_DRAW_SW_ASM_RISCV_V          3
 #define LV_DRAW_SW_ASM_CUSTOM           255
 
 #define LV_NEMA_HAL_CUSTOM          0
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 11a27d3a5a..9c609dbdbe 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -69,6 +69,12 @@ set(LVGL_TEST_OPTIONS_VG_LITE
     -Wno-dangling-pointer # workaround for thorvg dangling-pointer warning
 )
 
+set(LVGL_TEST_OPTIONS_RISCV_V
+    -DLV_TEST_OPTION=5
+    -DLVGL_CI_USING_SYS_HEAP
+    -DLV_USE_DRAW_SW_ASM=LV_DRAW_SW_ASM_RISCV_V
+)
+
 set(LVGL_TEST_OPTIONS_SDL
     -DLV_TEST_OPTION=7
 )
@@ -149,6 +155,13 @@ elseif (OPTIONS_TEST_VG_LITE)
         # Set a tolerance value for the VG-Lite tests.
         add_definitions(-DREF_IMG_TOLERANCE=9)
     endif()
+elseif (OPTIONS_TEST_RISCV_V)
+    set (BUILD_OPTIONS ${LVGL_TEST_OPTIONS_RISCV_V} ${SANITIZE_AND_COVERAGE_OPTIONS})
+    filter_compiler_options (C TEST_LIBS ${SANITIZE_AND_COVERAGE_OPTIONS})
+    set (CONFIG_LV_BUILD_EXAMPLES OFF CACHE BOOL "disable examples" FORCE)
+    set (ENABLE_TESTS ON)
+    add_definitions(-DREF_IMGS_PATH="ref_imgs/")
+    message(STATUS "RISC-V Vector (RVV) software emulation test enabled")
 else()
     message(FATAL_ERROR "Must provide a known options value (check main.py?).")
 endif()
diff --git a/tests/main.py b/tests/main.py
index 115f7f65e1..4ddc4b3b0b 100755
--- a/tests/main.py
+++ b/tests/main.py
@@ -34,6 +34,7 @@ test_options = {
     'OPTIONS_TEST_SYSHEAP': 'Test config, system heap, 32 bit color depth',
     'OPTIONS_TEST_DEFHEAP': 'Test config, LVGL heap, 32 bit color depth',
     'OPTIONS_TEST_VG_LITE': 'VG-Lite simulator with full config, 32 bit color depth',
+    'OPTIONS_TEST_RISCV_V': 'RISC-V Vector emulation with full config, 32 bit color depth',
 }