feat(text/ap): add text_ap_strlen and text ap tests (#9791)

2026-05-10 04:37:55 +08:00 · 2026-04-02 06:11:34 +02:00
parent 51b9a384ae
commit 0de4a15f52
10 changed files with 203 additions and 16 deletions
@@ -525,7 +525,7 @@ char * lv_text_set_text_vfmt(const char * fmt, va_list ap)
    lv_vsnprintf(raw_txt, len + 1, fmt, ap);

    /*Get the size of the Arabic text and process it*/
-    size_t len_ap = lv_text_ap_calc_bytes_count(raw_txt);
+    size_t len_ap = lv_text_ap_strlen(raw_txt);
    text = lv_malloc(len_ap + 1);
    LV_ASSERT_MALLOC(text);
    if(text == NULL) {
@@ -107,8 +107,10 @@ const ap_chars_map_t ap_chars_map[] = {
 /**********************
 *   GLOBAL FUNCTIONS
 **********************/
-uint32_t lv_text_ap_calc_bytes_count(const char * txt)
+
+size_t lv_text_ap_strlen(const char * txt)
 {
+    LV_ASSERT_NULL(txt);
    uint32_t txt_length = 0;
    uint32_t chars_cnt = 0;
    uint32_t current_ap_idx = 0;
@@ -138,7 +140,7 @@ uint32_t lv_text_ap_calc_bytes_count(const char * txt)
        i++;
    }

-    return chars_cnt + 1;
+    return chars_cnt;
 }

 void lv_text_ap_proc(const char * txt, char * txt_out)
@@ -148,6 +150,9 @@ void lv_text_ap_proc(const char * txt, char * txt_out)
    uint32_t * ch_enc;
    uint32_t * ch_fin;
    char * txt_out_temp;
+    if(!txt || !txt_out) {
+        return;
+    }

    txt_length = lv_text_get_encoded_length(txt);

@@ -33,7 +33,35 @@ extern "C" {
 /**********************
 * GLOBAL PROTOTYPES
 **********************/
-uint32_t lv_text_ap_calc_bytes_count(const char * txt);
+
+/**
+ * Calculate the number of bytes required to store the Arabic/Persian
+ * processed version of a UTF-8 string, excluding the null terminator.
+ * Analogous to strlen() but accounts for character form substitutions
+ * (e.g. initial, medial, final, isolated forms).
+ *
+ * @param txt  Null-terminated UTF-8 input string. Must not be NULL.
+ * @return     Number of bytes in the processed output, excluding '\0'.
+ *             Allocate (lv_text_ap_strlen(txt) + 1) bytes for the output buffer.
+ */
+size_t lv_text_ap_strlen(const char * txt);
+
+/**
+ * Process a UTF-8 string and replace Arabic/Persian characters with their
+ * correct contextual forms (isolated, initial, medial, final) based on
+ * their position within each word.
+ *
+ * The output buffer must be pre-allocated with at least
+ * (lv_text_ap_strlen(txt) + 1) bytes.
+ *
+ * @param txt      Null-terminated UTF-8 input string.
+ * @param txt_out  Output buffer to write the processed UTF-8 string into.
+ *                 Must not overlap with @p txt.
+ *
+ * Returns early if @p txt or @p txt_out are null
+ *
+ * @note Non-Arabic/Persian characters are copied to the output unchanged.
+ */
 void lv_text_ap_proc(const char * txt, char * txt_out);

 /**********************
@@ -737,7 +737,7 @@ static void draw_main(lv_event_t * e)

 #if LV_USE_ARABIC_PERSIAN_CHARS
        /*Get the size of the Arabic text and process it*/
-        size_t len_ap = lv_text_ap_calc_bytes_count(txt);
+        size_t len_ap = lv_text_ap_strlen(txt) + 1;
        if(len_ap < sizeof(txt_ap)) {
            lv_text_ap_proc(txt, txt_ap);
            txt = txt_ap;
@@ -90,7 +90,7 @@ void lv_checkbox_set_text(lv_obj_t * obj, const char * txt)
        size_t len;

 #if LV_USE_ARABIC_PERSIAN_CHARS
-        len = lv_text_ap_calc_bytes_count(txt) + 1;
+        len = lv_text_ap_strlen(txt) + 1;
 #else
        len = lv_strlen(txt) + 1;
 #endif
@@ -221,7 +221,7 @@ void lv_dropdown_set_options(lv_obj_t * obj, const char * options)
 #if LV_USE_ARABIC_PERSIAN_CHARS == 0
    size_t len = lv_strlen(options) + 1;
 #else
-    size_t len = lv_text_ap_calc_bytes_count(options) + 1;
+    size_t len = lv_text_ap_strlen(options) + 1;
 #endif

    if(dropdown->options != NULL && dropdown->static_options == 0) {
@@ -304,7 +304,7 @@ void lv_dropdown_add_option(lv_obj_t * obj, const char * option, uint32_t pos)
 #if LV_USE_ARABIC_PERSIAN_CHARS == 0
    size_t ins_len = lv_strlen(option) + 1;
 #else
-    size_t ins_len = lv_text_ap_calc_bytes_count(option) + 1;
+    size_t ins_len = lv_text_ap_strlen(option) + 1;
 #endif

    size_t new_len = ins_len + old_len + 2; /*+2 for terminating NULL and possible \n*/
@@ -1416,7 +1416,7 @@ static size_t get_text_length(const char * text)
 {
    size_t len = 0;
 #if LV_USE_ARABIC_PERSIAN_CHARS
-    len = lv_text_ap_calc_bytes_count(text);
+    len = lv_text_ap_strlen(text) + 1;
 #else
    len = lv_strlen(text) + 1;
 #endif
@@ -217,7 +217,7 @@ void lv_span_set_text(lv_span_t * span, const char * text)
    size_t text_alloc_len = 0;

 #if LV_USE_ARABIC_PERSIAN_CHARS
-    text_alloc_len = lv_text_ap_calc_bytes_count(text);
+    text_alloc_len = lv_text_ap_strlen(text) + 1;
 #else
    text_alloc_len = lv_strlen(text) + 1;
 #endif
@@ -287,7 +287,7 @@ void lv_span_set_text_static(lv_span_t * span, const char * text)
    span->static_flag = 1;

 #if LV_USE_ARABIC_PERSIAN_CHARS
-    size_t text_alloc_len = lv_text_ap_calc_bytes_count(text);
+    size_t text_alloc_len = lv_text_ap_strlen(text) + 1;
    span->txt = lv_malloc(text_alloc_len);
    LV_ASSERT_MALLOC(span->txt)
    lv_text_ap_proc(text, span->txt);
@@ -186,13 +186,14 @@ void lv_table_set_cell_value_fmt(lv_obj_t * obj, uint32_t row, uint32_t col, con
    lv_vsnprintf(raw_txt, len + 1, fmt, ap2);

    /*Get the size of the Arabic text and process it*/
-    size_t len_ap = lv_text_ap_calc_bytes_count(raw_txt);
-    table->cell_data[cell] = lv_realloc(table->cell_data[cell], sizeof(lv_table_cell_t) + len_ap + 1);
-    LV_ASSERT_MALLOC(table->cell_data[cell]);
-    if(table->cell_data[cell] == NULL) {
+    size_t len_ap = lv_text_ap_strlen(raw_txt) + 1;
+    lv_table_cell_t * cell_data = lv_realloc(table->cell_data[cell], sizeof(lv_table_cell_t) + len_ap);
+    LV_ASSERT_MALLOC(cell_data);
+    if(!cell_data) {
        va_end(ap2);
        return;
    }
+    table->cell_data[cell] = cell_data;
    lv_text_ap_proc(raw_txt, table->cell_data[cell]->txt);

    lv_free(raw_txt);
@@ -1074,7 +1075,7 @@ static size_t get_cell_txt_len(const char * txt)
    size_t retval = 0;

 #if LV_USE_ARABIC_PERSIAN_CHARS
-    retval = sizeof(lv_table_cell_t) + lv_text_ap_calc_bytes_count(txt) + 1;
+    retval = sizeof(lv_table_cell_t) + lv_text_ap_strlen(txt) + 1;
 #else
    retval = sizeof(lv_table_cell_t) + lv_strlen(txt) + 1;
 #endif
@@ -0,0 +1,153 @@
+#if LV_BUILD_TEST
+
+#include <stdlib.h>
+#include <string.h>
+#include "../lvgl.h"
+#include "unity/unity.h"
+#include "../src/misc/lv_text_ap.h"
+
+static char * output = NULL;
+
+void setUp(void)
+{
+    /* Function run before every test */
+}
+
+void tearDown(void)
+{
+    /* Function run after every test */
+    lv_obj_clean(lv_screen_active());
+    free(output);
+    output = NULL;
+}
+void test_ap_strlen_empty_string_returns_zero(void)
+{
+    /* An empty string should not crash and return 0 */
+    TEST_ASSERT_EQUAL_SIZE_T(0, lv_text_ap_strlen(""));
+}
+
+void test_ap_strlen_ascii_only(void)
+{
+    /* Pure ASCII has no Arabic/Persian chars – result equals original byte count */
+    const char * txt = "Hello World";
+    size_t result = lv_text_ap_strlen(txt);
+    TEST_ASSERT_EQUAL_SIZE_T(strlen(txt), result);
+}
+
+void test_ap_strlen_single_arabic_char(void)
+{
+    /* U+0622 ARABIC LETTER ALEF WITH MADDA ABOVE – LV_AP_ALPHABET_BASE_CODE */
+    const char * txt = "\xD8\xA2"; /* UTF-8 for U+0622 */
+    size_t result = lv_text_ap_strlen(txt);
+    TEST_ASSERT_EQUAL_SIZE_T(3, result);
+}
+
+void test_ap_strlen_arabic_sentence(void)
+{
+    /* Simple Arabic sentence: "مرحبا" (Marhaba / Hello) */
+    const char * txt = "\xD9\x85\xD8\xB1\xD8\xAD\xD8\xA8\xD8\xA7";
+    size_t result = lv_text_ap_strlen(txt);
+    TEST_ASSERT_EQUAL_SIZE_T(15, result);
+}
+
+void test_ap_strlen_mixed_arabic_latin(void)
+{
+    /* Mixed text: "abc مرحبا xyz" */
+    const char * txt = "abc \xD9\x85\xD8\xB1\xD8\xAD\xD8\xA8\xD8\xA7 xyz";
+    size_t result = lv_text_ap_strlen(txt);
+    TEST_ASSERT_EQUAL_SIZE_T(23, result);
+}
+
+void test_ap_strlen_treats_invalid_utf8_as_raw_bytes(void)
+{
+    const char * txt = "\xFF\xFE"; /* Invalid UTF-8 */
+    size_t result = lv_text_ap_strlen(txt);
+    TEST_ASSERT_EQUAL(2, result);
+}
+void test_ap_proc_null_output(void)
+{
+    const char * input = "Hello";
+    lv_text_ap_proc(input, NULL);
+    /* Should get here without crashing*/
+    TEST_PASS();
+}
+
+void test_ap_proc_empty_string(void)
+{
+    const char * input = "";
+    char s_output[16] = {0};
+    lv_text_ap_proc(input, s_output);
+    TEST_ASSERT_EQUAL_STRING("", s_output);
+}
+
+void test_ap_proc_ascii_passthrough(void)
+{
+    const char * input = "Hello";
+    char s_output[32] = {0};
+    lv_text_ap_proc(input, s_output);
+    TEST_ASSERT_EQUAL_STRING(input, s_output);
+}
+
+void test_ap_proc_output_not_null_for_arabic(void)
+{
+    /* "مرحبا" */
+    const char * input = "\xD9\x85\xD8\xB1\xD8\xAD\xD8\xA8\xD8\xA7";
+    size_t out_bytes = lv_text_ap_strlen(input);
+
+    output = (char *)calloc(out_bytes + 1, 1);
+    TEST_ASSERT_NOT_NULL(output);
+
+    lv_text_ap_proc(input, output);
+
+    TEST_ASSERT_EQUAL_STRING("\xEF\xBB\xA3\xEF\xBA\xAE\xEF\xBA\xA3\xEF\xBA\x92\xEF\xBA\x8E", output);
+}
+
+void test_ap_proc_output_length_matches_calc(void)
+{
+    /* The byte count reported by calc must match what proc actually writes */
+    const char * input = "\xD9\x85\xD8\xB1\xD8\xAD\xD8\xA8\xD8\xA7";
+    size_t expected_len = lv_text_ap_strlen(input);
+
+    output = (char *)calloc(expected_len + 1, 1);
+    TEST_ASSERT_NOT_NULL(output);
+
+    lv_text_ap_proc(input, output);
+
+    TEST_ASSERT_EQUAL_SIZE_T(expected_len, strlen(output));
+}
+
+void test_ap_proc_idempotent_ascii(void)
+{
+    /* Running proc twice on ASCII must yield the same result */
+    const char * input = "Test 123";
+    char out1[64] = {0};
+    char out2[64] = {0};
+
+    lv_text_ap_proc(input, out1);
+    lv_text_ap_proc(input, out2);
+
+    TEST_ASSERT_EQUAL_STRING(out1, out2);
+}
+
+void test_ap_proc_does_not_overflow_with_long_arabic(void)
+{
+    /* Stress: 10 repetitions of "مرحبا" */
+    const char * word = "\xD9\x85\xD8\xB1\xD8\xAD\xD8\xA8\xD8\xA7";
+    char input[256] = {0};
+    for(int i = 0; i < 10; i++) strcat(input, word);
+
+    size_t len = lv_text_ap_strlen(input);
+
+    /* len + \0 + sentinel value*/
+    output = (char *)calloc(len + 2, 1);
+    TEST_ASSERT_NOT_NULL(output);
+
+    output[len + 1] = 0x7E;
+
+    lv_text_ap_proc(input, output); /* Must not crash / overwrite sentinel */
+    TEST_ASSERT_NOT_EQUAL('\0', output[0]);
+    TEST_ASSERT_EQUAL('\0', output[len]);
+    TEST_ASSERT_EQUAL(0x7E, output[len + 1]);
+}
+
+#endif /*LV_BUILD_TEST*/