libc/regex: add regex implementation

add regex implementation for libc, the implementation are ported from
musl project

Signed-off-by: guoshichao <guoshichao@xiaomi.com>
This commit is contained in:
guoshichao
2023-05-09 21:25:16 +08:00
committed by Xiang Xiao
parent 81bdb97536
commit e1096bd35c
9 changed files with 6044 additions and 0 deletions

32
LICENSE
View File

@@ -8391,3 +8391,35 @@ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
libs/libc/regcomp.c
libs/libc/regerror.c
libs/libc/regexec.c
libs/libc/tre.h
libs/libc/tre-mem.c
include/regex.h
======================
musl as a whole is licensed under the following standard MIT license:
----------------------------------------------------------------------
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------

104
include/regex.h Normal file
View File

@@ -0,0 +1,104 @@
/****************************************************************************
* include/regex.h
*
* regex.h - TRE POSIX compatible regex compilation functions.
*
* Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
****************************************************************************/
#ifndef _INCLUDE_REGEX_H
#define _INCLUDE_REGEX_H
/****************************************************************************
* Included Files
****************************************************************************/
#include <sys/types.h>
#define regoff_t int
typedef struct re_pattern_buffer
{
size_t re_nsub;
void *__opaque;
void *__padding[4];
size_t __nsub2;
char __padding2;
} regex_t;
typedef struct
{
regoff_t rm_so;
regoff_t rm_eo;
} regmatch_t;
#define RE_DUP_MAX 255
#define REG_EXTENDED 1
#define REG_ICASE 2
#define REG_NEWLINE 4
#define REG_NOSUB 8
#define REG_NOTBOL 1
#define REG_NOTEOL 2
#define REG_OK 0
#define REG_NOMATCH 1
#define REG_BADPAT 2
#define REG_ECOLLATE 3
#define REG_ECTYPE 4
#define REG_EESCAPE 5
#define REG_ESUBREG 6
#define REG_EBRACK 7
#define REG_EPAREN 8
#define REG_EBRACE 9
#define REG_BADBR 10
#define REG_ERANGE 11
#define REG_ESPACE 12
#define REG_BADRPT 13
#define REG_ENOSYS -1
#ifdef __cplusplus
extern "C"
{
#endif
int regcomp(regex_t *__restrict, const char *__restrict, int);
int regexec(const regex_t * __restrict, const char *__restrict, size_t,
regmatch_t *__restrict, int);
void regfree(regex_t *);
size_t regerror(int, const regex_t *__restrict, char *__restrict, size_t);
#ifdef __cplusplus
}
#endif
#endif /* _INCLUDE_REGEX_H */

View File

@@ -45,6 +45,7 @@ include obstack/Make.defs
include pthread/Make.defs
include pwd/Make.defs
include queue/Make.defs
include regex/Make.defs
include sched/Make.defs
include semaphore/Make.defs
include signal/Make.defs

30
libs/libc/regex/Make.defs Normal file
View File

@@ -0,0 +1,30 @@
############################################################################
# libs/libc/regex/Make.defs
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership. The
# ASF licenses this file to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance with the
# License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
############################################################################
ifeq ($(CONFIG_ALLOW_MIT_COMPONENTS),y)
# Add the regex C files to the build
CSRCS += regcomp.c regexec.c regerror.c tre-mem.c
# Add the regex directory to the build
DEPPATH += --dep-path regex
VPATH += :regex
endif

4036
libs/libc/regex/regcomp.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,92 @@
/****************************************************************************
* libs/libc/regex/regerror.c
*
* regerror.c - TRE POSIX compatible matching functions (and more).
*
* Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
****************************************************************************/
/****************************************************************************
* Included Files
****************************************************************************/
#include <string.h>
#include <regex.h>
#include <stdio.h>
/* Error message strings for error codes listed in `regex.h'. This list
* needs to be in sync with the codes listed there, naturally.
*/
/* Converted to single string by Rich Felker to remove the need for
* data relocations at runtime, 27 Feb 2006.
*/
/****************************************************************************
* Private Data
****************************************************************************/
static const char messages[] =
{
"No error\0"
"No match\0"
"Invalid regexp\0"
"Unknown collating element\0"
"Unknown character class name\0"
"Trailing backslash\0"
"Invalid back reference\0"
"Missing ']'\0"
"Missing ')'\0"
"Missing '}'\0"
"Invalid contents of {}\0"
"Invalid character range\0"
"Out of memory\0"
"Repetition not preceded by valid expression\0"
"\0Unknown error"
};
/****************************************************************************
* Public Functions
****************************************************************************/
size_t regerror(int e, const regex_t *restrict preg, char *restrict buf,
size_t size)
{
const char *s;
for (s = messages; e && *s; e--, s += strlen(s) + 1)
{
}
if (!*s)
{
s++;
}
return 1 + snprintf(buf, size, "%s", s);
}

1267
libs/libc/regex/regexec.c Normal file

File diff suppressed because it is too large Load Diff

194
libs/libc/regex/tre-mem.c Normal file
View File

@@ -0,0 +1,194 @@
/****************************************************************************
* libs/libc/regex/tre-mem.c
*
* tre-mem.c - TRE memory allocator
*
* Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
****************************************************************************/
/****************************************************************************
* Included Files
****************************************************************************/
/* This memory allocator is for allocating small memory blocks efficiently
* in terms of memory overhead and execution speed. The allocated blocks
* cannot be freed individually, only all at once. There can be multiple
* allocators, though.
*/
#include <stdlib.h>
#include <string.h>
#include "tre.h"
/****************************************************************************
* Private Functions
****************************************************************************/
/* This memory allocator is for allocating small memory blocks efficiently
* in terms of memory overhead and execution speed. The allocated blocks
* cannot be freed individually, only all at once. There can be multiple
* allocators, though.
*/
/* Returns a new memory allocator or NULL if out of memory. */
tre_mem_t tre_mem_new_impl(int provided, void *provided_block)
{
tre_mem_t mem;
if (provided)
{
mem = provided_block;
memset(mem, 0, sizeof(*mem));
}
else
{
mem = xcalloc(1, sizeof(*mem));
}
if (mem == NULL)
{
return NULL;
}
return mem;
}
/* Frees the memory allocator and all memory allocated with it. */
void tre_mem_destroy(tre_mem_t mem)
{
tre_list_t *tmp, *l = mem->blocks;
while (l != NULL)
{
xfree(l->data);
tmp = l->next;
xfree(l);
l = tmp;
}
xfree(mem);
}
/* Allocates a block of `size' bytes from `mem'. Returns a pointer to the
* allocated block or NULL if an underlying malloc() failed.
*/
void *tre_mem_alloc_impl(tre_mem_t mem, int provided, void *provided_block,
int zero, size_t size)
{
void *ptr;
if (mem->failed)
{
return NULL;
}
if (mem->n < size)
{
/* We need more memory than is available in the current block.
* Allocate a new block.
*/
tre_list_t *l;
if (provided)
{
if (provided_block == NULL)
{
mem->failed = 1;
return NULL;
}
mem->ptr = provided_block;
mem->n = TRE_MEM_BLOCK_SIZE;
}
else
{
int block_size;
if (size * 8 > TRE_MEM_BLOCK_SIZE)
{
block_size = size * 8;
}
else
{
block_size = TRE_MEM_BLOCK_SIZE;
}
l = xmalloc(sizeof(*l));
if (l == NULL)
{
mem->failed = 1;
return NULL;
}
l->data = xmalloc(block_size);
if (l->data == NULL)
{
xfree(l);
mem->failed = 1;
return NULL;
}
l->next = NULL;
if (mem->current != NULL)
{
mem->current->next = l;
}
if (mem->blocks == NULL)
{
mem->blocks = l;
}
mem->current = l;
mem->ptr = l->data;
mem->n = block_size;
}
}
/* Make sure the next pointer will be aligned. */
size += ALIGN(mem->ptr + size, long);
/* Allocate from current block. */
ptr = mem->ptr;
mem->ptr += size;
mem->n -= size;
/* Set to zero if needed. */
if (zero)
{
memset(ptr, 0, size);
}
return ptr;
}

288
libs/libc/regex/tre.h Normal file
View File

@@ -0,0 +1,288 @@
/****************************************************************************
* libs/libc/regex/tre.h
*
* tre-internal.h - TRE internal definitions
*
* Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
****************************************************************************/
/****************************************************************************
* Included Files
****************************************************************************/
#ifndef _REGEX_TRE_H
#define _REGEX_TRE_H
#include <regex.h>
#include <wchar.h>
#include <wctype.h>
#undef TRE_MBSTATE
#ifndef NDEBUG
#define NDEBUG
#endif
#define TRE_REGEX_T_FIELD __opaque
typedef int reg_errcode_t;
typedef wchar_t tre_char_t;
#define DPRINT(msg) do { } while (0)
#define elementsof(x) ( sizeof(x) / sizeof(x[0]) )
#define tre_mbrtowc(pwc, s, n, ps) (mbtowc((pwc), (s), (n)))
/* Wide characters. */
typedef wint_t tre_cint_t;
#define TRE_CHAR_MAX 0x10ffff
#define tre_isalnum iswalnum
#define tre_isalpha iswalpha
#define tre_isblank iswblank
#define tre_iscntrl iswcntrl
#define tre_isdigit iswdigit
#define tre_isgraph iswgraph
#define tre_islower iswlower
#define tre_isprint iswprint
#define tre_ispunct iswpunct
#define tre_isspace iswspace
#define tre_isupper iswupper
#define tre_isxdigit iswxdigit
#define tre_tolower towlower
#define tre_toupper towupper
#define tre_strlen wcslen
/* Use system provided iswctype() and wctype(). */
typedef wctype_t tre_ctype_t;
#define tre_isctype iswctype
#define tre_ctype wctype
/* Returns number of bytes to add to (char *)ptr to make it
* properly aligned for the type.
*/
#define ALIGN(ptr, type) \
((((long)ptr) % sizeof(type)) \
? (sizeof(type) - (((long)ptr) % sizeof(type))) \
: 0)
#undef MAX
#undef MIN
#define MAX(a, b) (((a) >= (b)) ? (a) : (b))
#define MIN(a, b) (((a) <= (b)) ? (a) : (b))
/* TNFA transition type. A TNFA state is an array of transitions,
* the terminator is a transition with NULL `state'.
*/
typedef struct tnfa_transition tre_tnfa_transition_t;
struct tnfa_transition
{
/* Range of accepted characters. */
tre_cint_t code_min;
tre_cint_t code_max;
/* Pointer to the destination state. */
tre_tnfa_transition_t *state;
/* ID number of the destination state. */
int state_id;
/* -1 terminated array of tags (or NULL). */
int *tags;
/* Assertion bitmap. */
int assertions;
/* Assertion parameters. */
union
{
/* Character class assertion. */
tre_ctype_t class;
/* Back reference assertion. */
int backref;
} u;
/* Negative character class assertions. */
tre_ctype_t *neg_classes;
};
/* Assertions.
*/
#define ASSERT_AT_BOL 1 /* Beginning of line. */
#define ASSERT_AT_EOL 2 /* End of line. */
#define ASSERT_CHAR_CLASS 4 /* Character class in `class'. */
#define ASSERT_CHAR_CLASS_NEG 8 /* Character classes in `neg_classes'. */
#define ASSERT_AT_BOW 16 /* Beginning of word. */
#define ASSERT_AT_EOW 32 /* End of word. */
#define ASSERT_AT_WB 64 /* Word boundary. */
#define ASSERT_AT_WB_NEG 128 /* Not a word boundary */
#define ASSERT_BACKREF 256 /* A back reference in `backref' */
#define ASSERT_LAST 256
/* Tag directions. */
typedef enum
{
TRE_TAG_MINIMIZE = 0,
TRE_TAG_MAXIMIZE = 1
} tre_tag_direction_t;
/* Instructions to compute submatch register values from tag values
* after a successful match.
*/
struct tre_submatch_data
{
/* Tag that gives the value for rm_so (submatch start offset). */
int so_tag;
/* Tag that gives the value for rm_eo (submatch end offset). */
int eo_tag;
/* List of submatches this submatch is contained in. */
int *parents;
};
typedef struct tre_submatch_data tre_submatch_data_t;
/* TNFA definition. */
typedef struct tnfa tre_tnfa_t;
struct tnfa
{
tre_tnfa_transition_t *transitions;
unsigned int num_transitions;
tre_tnfa_transition_t *initial;
tre_tnfa_transition_t *final;
tre_submatch_data_t *submatch_data;
char *firstpos_chars;
int first_char;
unsigned int num_submatches;
tre_tag_direction_t *tag_directions;
int *minimal_tags;
int num_tags;
int num_minimals;
int end_tag;
int num_states;
int cflags;
int have_backrefs;
int have_approx;
};
/* from tre-mem.h: */
#define TRE_MEM_BLOCK_SIZE 1024
typedef struct tre_list
{
void *data;
struct tre_list *next;
} tre_list_t;
struct tre_mem_struct
{
tre_list_t *blocks;
tre_list_t *current;
char *ptr;
size_t n;
int failed;
void **provided;
};
typedef struct tre_mem_struct *tre_mem_t;
#define tre_mem_new_impl __tre_mem_new_impl
#define tre_mem_alloc_impl __tre_mem_alloc_impl
#define tre_mem_destroy __tre_mem_destroy
tre_mem_t tre_mem_new_impl(int provided, void *provided_block);
void *tre_mem_alloc_impl(tre_mem_t mem, int provided, void *provided_block,
int zero, size_t size);
/* Returns a new memory allocator or NULL if out of memory. */
#define tre_mem_new() tre_mem_new_impl(0, NULL)
/* Allocates a block of `size' bytes from `mem'. Returns a pointer to the
* allocated block or NULL if an underlying malloc() failed.
*/
#define tre_mem_alloc(mem, size) tre_mem_alloc_impl(mem, 0, NULL, 0, size)
/* Allocates a block of `size' bytes from `mem'. Returns a pointer to the
* allocated block or NULL if an underlying malloc() failed. The memory
* is set to zero.
*/
#define tre_mem_calloc(mem, size) tre_mem_alloc_impl(mem, 0, NULL, 1, size)
#ifdef TRE_USE_ALLOCA
/* alloca() versions. Like above, but memory is allocated with alloca()
* instead of malloc().
*/
#define tre_mem_newa() \
tre_mem_new_impl(1, alloca(sizeof(struct tre_mem_struct)))
#define tre_mem_alloca(mem, size) \
((mem)->n >= (size) \
? tre_mem_alloc_impl((mem), 1, NULL, 0, (size)) \
: tre_mem_alloc_impl((mem), 1, alloca(TRE_MEM_BLOCK_SIZE), 0, (size)))
#endif /* TRE_USE_ALLOCA */
/* Frees the memory allocator and all memory allocated with it. */
void tre_mem_destroy(tre_mem_t mem);
#define xmalloc malloc
#define xcalloc calloc
#define xfree free
#define xrealloc realloc
#endif /* _REGEX_TRE_H */