diff --git a/libs/libc/string/lib_strstr.c b/libs/libc/string/lib_strstr.c index d8ee9f2768d..ab7bfd7cc17 100644 --- a/libs/libc/string/lib_strstr.c +++ b/libs/libc/string/lib_strstr.c @@ -25,67 +25,224 @@ #include #include +#include +#include + +/**************************************************************************** + * Pre-processor Definitions + ****************************************************************************/ + +#if LONG_MAX == INT32_MAX + #define LONG_INT_IS_4_BYTES 1 + #define LONG_INT_N_BYTES 4 +#else + #define LONG_INT_N_BYTES sizeof(long) +#endif + +/* Determine the size, in bytes, of a long integer. */ + +#if defined (CONFIG_ENDIAN_BIG) + #define ULONG_BIGENDIAN(x) (x) +#elif defined (LONG_INT_IS_4_BYTES) + #define ULONG_BIGENDIAN(x) htonl(x) +#else + #undef ULONG_BIGENDIAN +#endif /**************************************************************************** * Public Functions ****************************************************************************/ -#undef strstr /* See mm/README.txt */ -FAR char *strstr(FAR const char *str, FAR const char *substr) +/* Finds the first occurrence of the sub-string needle in the + * string haystack. Returns NULL if needle was not found. + */ + +FAR char *strstr(FAR const char *haystack, FAR const char *needle) { - FAR const char *candidate; /* Candidate in str with matching start character */ - char ch; /* First character of the substring */ - size_t len; /* The length of the substring */ +#ifndef ULONG_BIGENDIAN + FAR const unsigned char *needle_cmp_end; +#endif + FAR const unsigned char *i_haystack; + const char needle_first = *needle; + FAR const unsigned char *i_needle; + unsigned long last_haystack_chars; + unsigned long last_needle_chars; + FAR const char *sub_start; + size_t needle_cmp_len; + bool identical = true; + unsigned long mask; + size_t compare_len; + size_t needle_len; - /* Special case the empty substring */ - - len = strlen(substr); - ch = *substr; - - if (!ch) + if (!*needle) { - /* We'll say that an empty substring matches at the beginning of - * the string - */ - - return (FAR char *)str; + return (FAR char *)haystack; } - /* Search for the substring */ - - candidate = str; - for (; ; ) - { - /* strchr() will return a pointer to the next occurrence of the - * character ch in the string - */ - - candidate = strchr(candidate, ch); - if (!candidate || strlen(candidate) < len) - { - /* First character of the substring does not appear in the string - * or the remainder of the string is not long enough to contain the - * substring. - */ - - return NULL; - } - - /* Check if this is the beginning of a matching substring */ - - if (strncmp(candidate, substr, len) == 0) - { - return (FAR char *)candidate; - } - - /* No, find the next candidate after this one */ - - candidate++; - } - - /* Won't get here, but some compilers might complain. Other compilers - * might complain about this code being unreachable too. + /* Runs strchr() on the first section of the haystack as it has a lower + * algorithmic complexity for discarding the first non-matching characters. */ + haystack = strchr(haystack, needle_first); + if (!haystack) /* First character of needle is not in the haystack. */ + { + return NULL; + } + + /* First characters of haystack and needle are the same now. Both are + * guaranteed to be at least one character long. + * Now computes the sum of the first needle_len characters of haystack + * minus the sum of characters values of needle. + */ + + i_haystack = (FAR const unsigned char *)haystack + 1; + i_needle = (FAR const unsigned char *)needle + 1; + + while (*i_haystack && *i_needle) + { + identical &= *i_haystack++ == *i_needle++; + } + + /* i_haystack now references the (needle_len + 1)-th character. */ + + if (*i_needle) /* haystack is smaller than needle. */ + { + return NULL; + } + else if (identical) + { + return (FAR char *)haystack; + } + + needle_len = i_needle - (FAR const unsigned char *)needle; + + /* Note: + * needle_len > 1, because we checked that it isn't zero, and if it + * is 1 then identical must be true because the first strchr() ensured + * that the first characters are identical + */ + + sub_start = haystack; + needle_cmp_len = (needle_len < LONG_INT_N_BYTES) ? + needle_len : LONG_INT_N_BYTES; + +#ifdef ULONG_BIGENDIAN + last_needle_chars = + ULONG_BIGENDIAN(*((FAR unsigned long *)(i_needle - needle_cmp_len))) + >> (8 * (LONG_INT_N_BYTES - needle_cmp_len)); + last_haystack_chars = + ULONG_BIGENDIAN(*((FAR unsigned long *)(i_haystack - needle_cmp_len))) + >> (8 * (LONG_INT_N_BYTES - needle_cmp_len)); +#else + needle_cmp_end = i_needle; + + i_needle -= needle_cmp_len; + i_haystack -= needle_cmp_len; + last_needle_chars = 0; + last_haystack_chars = 0; + + while (i_needle != needle_cmp_end) + { + last_needle_chars <<= 8; + last_needle_chars ^= *i_needle++; + last_haystack_chars <<= 8; + last_haystack_chars ^= *i_haystack++; + } +#endif + + /* At this point: + * needle is at least two characters long + * haystack is at least needle_len characters long (also at least two) + * the first characters of needle and haystack are identical + */ + + if (needle_len > LONG_INT_N_BYTES + 1) + { + /* we will call memcmp() only once we know that the LONG_INT_N_BYTES + * last chars are equal, so it will be enough to compare all but the + * last LONG_INT_N_BYTES characters + */ + + compare_len = needle_len - LONG_INT_N_BYTES; + + /* iterate through the remainder of haystack while checking for + * identity of the last LONG_INT_N_BYTES, and checking the rest + * with memcmp() + */ + + while (*i_haystack) + { + last_haystack_chars <<= 8; + last_haystack_chars ^= *i_haystack++; + + sub_start++; + if (last_haystack_chars == last_needle_chars && + memcmp(sub_start, needle, compare_len) == 0) + { + return (FAR char *)sub_start; + } + } + } + else if (needle_len == LONG_INT_N_BYTES + 1) + { + /* iterate through the remainder of haystack while checking for + * identity of the last LONG_INT_N_BYTES as well as the single + * additional character, which is the first one + */ + + while (*i_haystack) + { + last_haystack_chars <<= 8; + last_haystack_chars ^= *i_haystack++; + + sub_start++; + if (last_haystack_chars == last_needle_chars && + *sub_start == needle_first) + { + return (FAR char *)sub_start; + } + } + } + else if (needle_len == LONG_INT_N_BYTES) + { + /* iterate through the remainder of haystack while checking for + * identity of the last LONG_INT_N_BYTES characters, which + * should exactly match the entire needle + */ + + while (*i_haystack) + { + last_haystack_chars <<= 8; + last_haystack_chars ^= *i_haystack++; + + if (last_haystack_chars == last_needle_chars) + { + return (FAR char *)(i_haystack - needle_len); + } + } + } + else /* needle_len < LONG_INT_N_BYTES */ + { + mask = (((unsigned long)1) << (needle_len * 8)) - 1; + last_needle_chars &= mask; + + /* iterate through the remainder of haystack, updating the sums' + * difference and checking for identity whenever the difference + * is zero + */ + + while (*i_haystack) + { + last_haystack_chars <<= 8; + last_haystack_chars ^= *i_haystack++; + last_haystack_chars &= mask; + + if (last_haystack_chars == last_needle_chars) + { + return (FAR char *)(i_haystack - needle_len); + } + } + } + return NULL; }