/*********************************************************************** THIS FILE IS AUTOMATICALLY GENERATED. DO NOT MODIFY DEVELOPER: Zihan Chen(vczh) ***********************************************************************/ #include "Vlpp.h" #include "VlppOS.h" /*********************************************************************** .\REGEX.H ***********************************************************************/ /*********************************************************************** Author: Zihan Chen (vczh) Licensed under https://github.com/vczh-libraries/License ***********************************************************************/ #ifndef VCZH_REGEX_REGEX #define VCZH_REGEX_REGEX namespace vl { namespace stream { class IStream; } namespace regex_internal { class PureResult; class PureInterpretor; class RichResult; class RichInterpretor; } namespace regex { class RegexBase_; class RegexLexerBase_; template class RegexLexer_; /*********************************************************************** Data Structure ***********************************************************************/ /// A sub string of the string that a is matched against. /// . /// The sub string. const ObjectString& Value() const { return value; } bool operator==(const RegexString_& string) const { return start == string.start && length == string.length && value == string.value; } }; /// A match produces by a . /// Captures()) /// { /// Console::WriteLine(capture.Value()); /// } /// } /// ]]> const CaptureList& Captures()const; /// Get all sub strings that are captured by named groups. /// All sub strings that are captured by named groups. /// C/S+)(/.*?))+$"); /// auto match = regex.MatchHead(L"C++ and C# are my favorite programing languages"); /// for (auto capture : match->Groups().Get(regex.CaptureNames().IndexOf(L"lang"))) /// { /// Console::WriteLine(capture.Value()); /// } /// } /// ]]> const CaptureGroup& Groups()const; }; /*********************************************************************** Regex ***********************************************************************/ class RegexBase_ abstract : public Object { protected: regex_internal::PureInterpretor* pure = nullptr; regex_internal::RichInterpretor* rich = nullptr; template void Process(const ObjectString& text, bool keepEmpty, bool keepSuccess, bool keepFail, typename RegexMatch_::List& matches)const; public: RegexBase_() = default; ~RegexBase_(); /// Test is a DFA used to match a string. /// Returns true if a DFA is used. bool IsPureMatch() const { return rich ? false : true; } /// Test is a DFA used to test a string. It ignores all capturing. /// Returns true if a DFA is used. bool IsPureTest() const { return pure ? true : false; } /// Match a prefix of the text. /// The text to match. /// Result().Value()); /// } /// ]]> template typename RegexMatch_::Ref MatchHead(const ObjectString& text)const; template typename RegexMatch_::Ref MatchHead(const T* text) const { return MatchHead(ObjectString(text)); } /// Match a sub string of the text. /// The text to match. /// Result().Value()); /// } /// ]]> template typename RegexMatch_::Ref Match(const ObjectString& text)const; template typename RegexMatch_::Ref Match(const T* text) const { return Match(ObjectString(text)); } /// Match a prefix of the text, ignoring all capturing. /// The text to match. template bool TestHead(const ObjectString& text)const; template bool TestHead(const T* text) const { return TestHead(ObjectString(text)); } /// Match a sub string of the text, ignoring all capturing. /// The text to match. template bool Test(const ObjectString& text)const; template bool Test(const T* text) const { return Test(ObjectString(text)); } /// Find all matched fragments in the given text, returning all matched sub strings. /// The text to match. /// Returns all succeeded matches. /// Result().Value()); /// } /// } /// ]]> template void Search(const ObjectString& text, typename RegexMatch_::List& matches)const; template void Search(const T* text, typename RegexMatch_::List& matches) const { return Search(ObjectString(text), matches); } /// Split the text by matched sub strings, returning all unmatched sub strings. /// The text to match. /// Set to true to keep all empty unmatched sub strings. This could happen when there is nothing between two matched sub strings. /// Returns all failed matches. /// Result().Value()); /// } /// } /// ]]> template void Split(const ObjectString& text, bool keepEmptyMatch, typename RegexMatch_::List& matches)const; template void Split(const T* text, bool keepEmptyMatch, typename RegexMatch_::List& matches) const { return Split(ObjectString(text), keepEmptyMatch, matches); } /// Cut the text by matched sub strings, returning all matched and unmatched sub strings. /// The text to match. /// Set to true to keep all empty matches. This could happen when there is nothing between two matched sub strings. /// Returns all succeeded and failed matches. /// Result().Value()); /// } /// } /// ]]> template void Cut(const ObjectString& text, bool keepEmptyMatch, typename RegexMatch_::List& matches)const; template void Cut(const T* text, bool keepEmptyMatch, typename RegexMatch_::List& matches) const { return Cut(ObjectString(text), keepEmptyMatch, matches); } }; /// /// ///
  • \\, \/, \(, \), \+, \*, \?, \{, \}, \[, \], \<, \>, \^, \$, \!, \=: represents itself
  • /// /// ///
  • /// Escaped characters in charset defined in a square bracket: ///
      ///
    • \r: the CR character
    • ///
    • \n: the LF character
    • ///
    • \t: the tab character
    • ///
    • \-, \[, \], \\, \/, \^, \$: represents itself
    • ///
    ///
  • /// /// ///
  • /// Loops: ///
      ///
    • regex{3}: repeats 3 times
    • ///
    • regex{3,}: repeats 3 or more times
    • ///
    • regex{1,3}: repeats 1 to 3 times
    • ///
    • regex?: repeats 0 or 1 times
    • ///
    • regex*: repeats 0 or more times
    • ///
    • regex+: repeats 1 or more times
    • ///
    /// if you add an additional ? right after a loop, it means repeating as less as possible (DFA incompatible) ///
  • ///
  • /// Capturing: (DFA incompatible) ///
      ///
    • (regex): No capturing, just change the operators' association
    • ///
    • (?regex): Capture matched fragment
    • ///
    • (<name>regex): Capture matched fragment in a named group called "name"
    • ///
    • (<$i>): Match the i-th captured fragment, begins from 0
    • ///
    • (<$name;i>): Match the i-th captured fragment in the named group called "name", begins from 0
    • ///
    • (<$name>): Match any captured fragment in the named group called "name"
    • ///
    ///
  • ///
  • /// MISC ///
      ///
    • (=regex): The prefix of the following text should match the regex, but it is not counted in the whole match (DFA incompatible)
    • ///
    • (!regex): Any prefix of the following text should not match the regex, and it is not counted in the whole match (DFA incompatible)
    • ///
    • (<#name>regex): Name the regex "name", and it applies here
    • ///
    • (<&name>): Copy the named regex "name" here and apply
    • ///
    ///
  • /// ///

    ///

    /// The regular expression has pupre mode and rich mode. /// Pure mode means the regular expression is driven by a DFA, while the rich mode is not. ///

    ///

    /// The regular expression can test a string instead of matching. /// Testing only returns a bool very indicating success or failure. ///

    ///
    template class Regex_ : public RegexBase_ { protected: collections::List> captureNames; public: NOT_COPYABLE(Regex_); /// Create a regular expression. It will crash if the regular expression produces syntax error. /// The regular expression in a string. /// Set to true to use DFA if possible. Regex_(const ObjectString& code, bool preferPure = true); ~Regex_() = default; /// Get all names of named captures /// All names of named captures. const collections::List>& CaptureNames()const { return captureNames; } }; /*********************************************************************** Tokenizer ***********************************************************************/ /// A token. /// ) that matches this token. -1 means this token is produced by an error. vint token; /// The pointer to where this token starts in the input string . /// This pointer comes from a that used to be analyzed. You should keep a variable to that string alive, so that to keep this pointer alive. const T* reading; /// The "codeIndex" argument from [M:vl.regex.RegexLexer.Parse]. vint codeIndex; /// True if this token is complete. False if this token does not end here. This could happend when colorizing a text line by line. bool completeToken; /// Row number of the first character, begins at 0. vint rowStart; /// Column number of the first character, begins at 0. vint columnStart; /// Row number of the last character, begins at 0. vint rowEnd; /// Column number of the last character, begins at 0. vint columnEnd; bool operator==(const RegexToken_& _token)const { return length == _token.length && token == _token.token && reading == _token.reading; } }; /// Token information for . struct RegexProcessingToken { /// /// The read only start position of the token. /// This value will be -1 if is not null. /// const vint start; /// /// The length of the token, allowing to be updated by the callback. /// When the callback returns, the length is not allowed to be decreased. /// This value will be -1 if is not null. /// vint length; /// /// The id of the token, allowing to be updated by the callback. /// vint token; /// /// The flag indicating if this token is completed, allowing to be updated by the callback. /// bool completeToken; /// /// The inter token state object, allowing to be updated by the callback. /// When the callback returns: ///
      ///
    • if the completeText parameter is true in , it should be nullptr.
    • ///
    • if the token does not end at the end of the input, it should not be nullptr.
    • ///
    • if a token is completed in one attemp of extending, it should be nullptr.
    • ///
    ///
    void* interTokenState; RegexProcessingToken(vint _start, vint _length, vint _token, bool _completeToken, void* _interTokenState) :start(_start) , length(_length) , token(_token) , completeToken(_completeToken) , interTokenState(_interTokenState) { } }; using RegexInterTokenStateDeleter = void(*)(void* interTokenState); template using RegexTokenExtendProc = void(*)(void* argument, const T* reading, vint length, bool completeText, RegexProcessingToken& processingToken); using RegexTokenColorizeProc = void(*)(void* argument, vint start, vint length, vint token); /// Callback procedures /// created by . /// This callback is not called automatically. /// It is here to make the maintainance convenient for the caller. /// RegexInterTokenStateDeleter deleter = nullptr; /// ///

    The token extend callback. It is called after recognizing any token, and run a customized procedure to modify the token based on the given context.

    ///

    If the length parameter is -1, it means the caller does not measure the incoming text buffer, which automatically indicates that the buffer is null-terminated.

    ///

    If the length parameter is not -1, it means the number of available characters in the buffer.

    ///

    The completeText parameter could be true or false. When it is false, it means that the buffer does not contain all the text.

    ///
    /// ///

    /// This is very useful to recognize any token that cannot be expressed using a regular expression. /// For example, a C++ literal string R"tag(the conteng)tag". /// It is recommended to add a token for R"tag(, /// and then use this extend proc to search for a )tag" to complete the token. ///

    ///

    /// Important: /// when colorizing a text line by line, /// a cross-line token could be incomplete at the end of the line. /// Because a given buffer ends at the end of that line, /// the extend proc is not able to know right now about what is going on in the future. /// Here is what is designed for, /// the extend proc can store anything it wants using that pointer. ///

    ///

    /// The caller can get this pointer from the return value of . /// This pointer only available for cross-line tokens, it is obvious that one line produces at most one such pointer. /// Then the caller keeps calling that function to walk throught the whole string. /// When the return value is changed, the pointer is no longer used, and it can be deleted by calling manually. ///

    ///

    /// The first argument is . ///

    ///

    /// The second argument is a pointer to the buffer of the first character in this token. /// If the previous token is incomplete, then the buffer begins at the first character of the new buffer. ///

    ///

    /// The third argument is the length of the recognized token in characters. ///

    ///

    /// The fourth character indicates if the token is completed. /// Even if a token is completed, but the extend proc found that, the extend exceeds the end of the buffer, /// then it can update the value to make it incomplete. ///

    ///

    /// The fifth contains the context for this token. Fields except "start" are allowed to be updated by the extend proc. ///

    ///
    /// tokenDefs; /// tokenDefs.Add(L"/d+"); /// tokenDefs.Add(L"[a-zA-Z_]/w*"); /// tokenDefs.Add(L"\"([^\"/\\]|/\\/.)*\""); /// tokenDefs.Add(L"R\"[^(]*/("); /// tokenDefs.Add(L"[(){};]"); /// tokenDefs.Add(L"/s+"); /// tokenDefs.Add(L"///*+([^//*]|/*+[^//])*/*+//"); /// /// const wchar_t* lines[] = { /// L"/*********************", /// L"MAIN.CPP", /// L"*********************/", /// L"", /// L"int main()", /// L"{", /// L" printf(\"This is a \\\"simple\\\" text.\");", /// L" printf(R\"____(This is a", /// L"\"multiple lined\"", /// L"literal text)____\");", /// L" return 0;", /// L"}", /// }; /// /// struct Argument /// { /// // for a real colorizer, you can put a color buffer here. /// // the buffer is reused for every line of code. /// // but for the demo, I put the current processing text instead. /// // so that I am able to print what is processed. /// const wchar_t* processingText = nullptr; /// } argument; /// /// struct InterTokenState /// { /// WString postfix; /// }; /// /// RegexProc proc; /// proc.argument = &argument; /// proc.colorizeProc = [](void* argument, vint start, vint length, vint token) /// { /// // this is guaranteed by "proc.argument = &argument;" /// auto text = reinterpret_cast(argument)->processingText; /// Console::WriteLine(itow(token) + L": <" + WString(text + start, length) + L">"); /// }; /// proc.deleter = [](void* interTokenState) /// { /// delete reinterpret_cast(interTokenState); /// }; /// proc.extendProc = [](void* argument, const wchar_t* reading, vint length, bool completeText, RegexProcessingToken& processingToken) /// { /// // 3 is R"[^(]*/( /// // 7 is not used in tokenDefs, it is occupied to represent an extended literal string /// if (processingToken.token == 3 || processingToken.token == 7) /// { /// // for calling wcsstr, create a buffer that is zero terminated /// WString readingBuffer = length == -1 ? WString(reading, false) : WString(reading, length); /// reading = readingBuffer.Buffer(); /// /// // get the postfix, which is )____" in this case /// WString postfix; /// if (processingToken.interTokenState) /// { /// postfix = reinterpret_cast(processingToken.interTokenState)->postfix; /// } /// else /// { /// postfix = L")" + WString(reading + 2, processingToken.length - 3) + L"\""; /// } /// /// // try to find if the postfix, which is )____" in this case, appear in the given buffer /// auto find = wcsstr(reading, postfix.Buffer()); /// if (find) /// { /// // if we find the postfix, it means we find the end of the literal string /// // here processingToken.token automatically becomes 7 /// // interTokenState needs to be nullptr to indicate this /// processingToken.length = (vint)(find - reading) + postfix.Length(); /// processingToken.completeToken = true; /// processingToken.interTokenState = nullptr; /// } /// else /// { /// // if we don't find the postfix, it means the end of the literal string is in future lines /// // we need to set the token to 7, which is the real token id for literal strings /// // since we change any token from 3 to 7, 3 will never be passed to colorizeProc in "token" argument /// processingToken.length = readingBuffer.Length(); /// processingToken.token = 7; /// processingToken.completeToken = false; /// /// // we need to ensure that interTokenState is not nullptr, and we can save the postfix here /// if (!completeText && !processingToken.interTokenState) /// { /// auto state = new InterTokenState; /// state->postfix = postfix; /// processingToken.interTokenState = state; /// } /// } /// } /// }; /// /// RegexLexer lexer(tokenDefs, proc); /// RegexLexerColorizer colorizer = lexer.Colorize(); /// /// void* lastInterTokenState = nullptr; /// for (auto [line, index] : indexed(From(lines))) /// { /// Console::WriteLine(L"Begin line " + itow(index)); /// argument.processingText = line; /// void* interTokenState = colorizer.Colorize(line, wcslen(line)); /// /// if (lastInterTokenState && lastInterTokenState != interTokenState) /// { /// // call the deleter manually /// proc.deleter(lastInterTokenState); /// } /// lastInterTokenState = interTokenState; /// /// argument.processingText = nullptr; /// colorizer.Pass(L'\r'); /// colorizer.Pass(L'\n'); /// Console::WriteLine(L""); /// } /// } /// ]]> RegexTokenExtendProc extendProc = nullptr; /// ///

    /// The colorizer callback. It is called when a token is recognized. ///

    ///

    /// The first argument is . ///

    ///

    /// The second argument is the position of the first character of the token in characters. ///

    ///

    /// The third argument is the length of the recognized token in characters. ///

    ///

    /// The fourth character is the regular expression in the list (the first argument in the contructor of ) that matches this token. ///

    ///
    RegexTokenColorizeProc colorizeProc = nullptr; /// /// The argument object that is the first argument for and . /// void* argument = nullptr; }; /// Token collection representing the result from the lexical analyzer. Call to create this object. /// "); /// } /// } /// ]]> template class RegexTokens_ : public collections::EnumerableBase> { friend class RegexLexerBase_; protected: regex_internal::PureInterpretor* pure; const collections::Array& stateTokens; ObjectString code; vint codeIndex; RegexProc_ proc; RegexTokens_(regex_internal::PureInterpretor* _pure, const collections::Array& _stateTokens, const ObjectString& _code, vint _codeIndex, RegexProc_ _proc); public: RegexTokens_(const RegexTokens_& tokens); ~RegexTokens_() = default; collections::IEnumerator>* CreateEnumerator() const override; /// Copy all tokens. /// Returns all tokens. /// A callback to decide which kind of tokens to discard. The input is [F:vl.regex.RegexToken.token]. Returns true to discard this kind of tokens. /// tokenDefs; /// tokenDefs.Add(L"/d+"); /// tokenDefs.Add(L"/w+"); /// tokenDefs.Add(L"/s+"); /// /// RegexLexer lexer(tokenDefs, {}); /// WString input = L"I have 2 books."; /// auto tokenResult = lexer.Parse(input); /// /// List filtered; /// tokenResult.ReadToEnd(filtered, [](vint token) { return token < 0 || token == 2; }); /// /// for (auto token : tokenResult) /// { /// // input must be in a variable /// // because token.reading points to a position from input.Buffer(); /// Console::WriteLine(itow(token.token) + L": <" + WString(token.reading, token.length) + L">"); /// } /// } /// ]]> void ReadToEnd(collections::List>& tokens, bool(*discard)(vint)=0)const; }; /*********************************************************************** RegexLexerWalker ***********************************************************************/ /// A type for walking through a text against a . Call to create this object. /// "); /// tokenBegin = reading; /// tokenEnd = nullptr; /// tokenId = -1; /// state = walker.GetStartState(); /// } /// else /// { /// Console::WriteLine(L"Recognized token: " + itow(tokenId) + L": <" + WString(tokenBegin, tokenEnd - tokenBegin) + L">"); /// tokenBegin = reading = tokenEnd; /// tokenEnd = nullptr; /// tokenId = -1; /// state = walker.GetStartState(); /// } /// } /// else /// { /// Console::WriteLine(L"Unrecognized character: <" + WString(*tokenBegin) + L">"); /// tokenBegin++; /// state = walker.GetStartState(); /// } /// } /// else if (finalState) /// { /// tokenEnd = reading; /// tokenId = token; /// } /// } /// } /// ]]> template class RegexLexerWalker_ : public Object { friend class RegexLexerBase_; protected: regex_internal::PureInterpretor* pure; const collections::Array& stateTokens; RegexLexerWalker_(regex_internal::PureInterpretor* _pure, const collections::Array& _stateTokens); public: RegexLexerWalker_(const RegexLexerWalker_& tokens); ~RegexLexerWalker_() = default; /// Get the start DFA state number, which represents the correct state before parsing any input. /// The DFA state number. /// When calling for the first character, the return value should be passed to the second parameter. vint GetStartState()const; /// Test if this state can only lead to the end of one kind of token. /// Returns the token index if this state can only lead to the end of one kind of token. Returns -1 if not. /// The DFA state number. vint GetRelatedToken(vint state)const; /// Step forward by one character. /// The input character. /// The current state. Returns the new current state when this function returns. /// Returns the token index at the end of the token. /// Returns true if it reach the end of the token. /// Returns true if the previous character is the end of the token. /// ///

    /// The "finalState" argument is important. /// When "previousTokenStop" becomes true, /// it tells you that this character can no longer form a token with previous consumed characters. /// But it does not mean that the recognized token ends at the previous token. /// The recognized token could end eariler, /// which is indiated at the last time when "finalState" becomes true. ///

    ///

    /// See the example for about how to use this function. ///

    ///
    void Walk(T input, vint& state, vint& token, bool& finalState, bool& previousTokenStop)const; /// Step forward by one character. /// Returns the new current state. It is used to walk the next character. /// The input character. /// The current state. vint Walk(T input, vint state)const; /// Test if the input text is a closed token. /// Returns true if the input text is a closed token. /// The input text. /// Size of the input text in characters. /// ///

    /// A closed token means that, /// there is a prefix that is a recognized token. /// At the same time, the input string itself could not be a token, or a prefix of any token. /// the recognized token has ended before reaching the end of the string. ///

    ///

    /// An unrecognized token is also considered as closed. ///

    ///

    /// For example, assume we have a token defined by "/d+./d+": ///

      ///
    • "2" is not a closed token, because it has not ended.
    • ///
    • /// "2.5." is a closed token, because it has ended at "2.5", /// and "2.5." could never be a prefix of any token, /// unless we have another token defined by "/d+./d+./d+". ///
    • ///
    ///

    ///
    /// tokenDefs; /// tokenDefs.Add(L"/d+./d+"); /// tokenDefs.Add(L"/d+"); /// /// RegexLexer lexer(tokenDefs, {}); /// RegexLexerWalker walker = lexer.Walk(); /// /// WString tests[] = { L".", L"2", L"2.", L"2.5", L"2.5." }; /// for (auto test : From(tests)) /// { /// if (walker.IsClosedToken(test.Buffer(), test.Length())) /// { /// Console::WriteLine(test + L" is a closed token."); /// } /// else /// { /// Console::WriteLine(test + L" is not a closed token."); /// } /// } /// } /// ]]> bool IsClosedToken(const T* input, vint length)const; /// Test if the input is a closed token. /// Returns true if the input text is a closed token. /// The input text. /// ///

    /// A closed token means that, /// there is a prefix that is a recognized token. /// At the same time, the input string itself could not be a token, or a prefix of any token. /// the recognized token has ended before reaching the end of the string. ///

    ///

    /// An unrecognized token is also considered as closed. ///

    ///

    /// For example, assume we have a token defined by "/d+./d+": ///

      ///
    • "2" is not a closed token, because it has not ended.
    • ///
    • /// "2.5." is a closed token, because it has ended at "2.5", /// and "2.5." could never be a prefix of any token, /// unless we have another token defined by "/d+./d+./d+". ///
    • ///
    ///

    ///
    /// tokenDefs; /// tokenDefs.Add(L"/d+./d+"); /// tokenDefs.Add(L"/d+"); /// /// RegexLexer lexer(tokenDefs, {}); /// RegexLexerWalker walker = lexer.Walk(); /// /// WString tests[] = { L".", L"2", L"2.", L"2.5", L"2.5." }; /// for (auto test : From(tests)) /// { /// if (walker.IsClosedToken(test)) /// { /// Console::WriteLine(test + L" is a closed token."); /// } /// else /// { /// Console::WriteLine(test + L" is not a closed token."); /// } /// } /// } /// ]]> bool IsClosedToken(const ObjectString& input)const; }; /*********************************************************************** RegexLexerColorizer ***********************************************************************/ /// Lexical colorizer. Call to create this object. /// (argument)->processingText; /// Console::WriteLine(itow(token) + L": <" + WString(text + start, length) + L">"); /// }; /// /// RegexLexer lexer(tokenDefs, proc); /// RegexLexerColorizer colorizer = lexer.Colorize(); /// /// for (auto [line, index] : indexed(From(lines))) /// { /// Console::WriteLine(L"Begin line " + itow(index)); /// argument.processingText = line; /// colorizer.Colorize(line, wcslen(line)); /// /// argument.processingText = nullptr; /// colorizer.Pass(L'\r'); /// colorizer.Pass(L'\n'); /// Console::WriteLine(L""); /// } /// } /// ]]> template class RegexLexerColorizer_ : public Object { friend class RegexLexerBase_; public: struct InternalState { vint currentState = -1; vint interTokenId = -1; void* interTokenState = nullptr; }; protected: RegexLexerWalker_ walker; RegexProc_ proc; InternalState internalState; void CallExtendProcAndColorizeProc(const T* input, vint length, RegexProcessingToken& token, bool colorize); vint WalkOneToken(const T* input, vint length, vint start, bool colorize); RegexLexerColorizer_(const RegexLexerWalker_& _walker, RegexProc_ _proc); public: RegexLexerColorizer_(const RegexLexerColorizer_& colorizer) = default; ~RegexLexerColorizer_() = default; /// Get the internal state. /// The internal state. /// ///

    /// If has not been called, the return value of this function is the start state. ///

    ///

    /// If a text is multi-lined, could be called line by line, and the internal state is changed. ///

    ///

    /// In order to colorize another piece of multi-lined text, /// you can either save the start state and call to reset the state, /// or call for a new colorizer. ///

    ///
    InternalState GetInternalState(); /// Restore the colorizer to a specified state. /// The state to restore. void SetInternalState(InternalState state); /// Step forward by one character. /// The input character. /// Callbacks in will be called except colorizeProc, which is from the second argument of the constructor of . void Pass(T input); /// Get the start DFA state number, which represents the correct state before colorizing any characters. /// The DFA state number. vint GetStartState()const; /// Colorize a text. /// An inter token state at the end of this line. It could be the same object to which is returned from the previous call. /// The text to colorize. /// Size of the text in characters. /// ///

    See and for more information about the return value.

    ///

    Callbacks in will be called, which is from the second argument of the constructor of .

    ///
    void* Colorize(const T* input, vint length); }; /*********************************************************************** RegexLexer ***********************************************************************/ class RegexLexerBase_ abstract : public Object { protected: regex_internal::PureInterpretor* pure = nullptr; collections::Array stateTokens; public: ~RegexLexerBase_(); /// Tokenize an input text. /// The text to tokenize. /// Configuration of all callbacks. /// Extra information that will be copied to [F:vl.regex.RegexToken.codeIndex]. /// Callbacks in will be called when iterating through tokens, which is from the second argument of the constructor of . template RegexTokens_ Parse(const ObjectString& code, RegexProc_ proc = {}, vint codeIndex = -1)const; template RegexTokens_ Parse(const T* code, RegexProc_ proc = {}, vint codeIndex = -1) const { return Parse(ObjectString(code), proc, codeIndex); } /// Create a equivalence walker from this lexical analyzer. A walker enable you to walk throught characters one by one, /// The character type of the text to parse. /// The colorizer. /// Configuration of all callbacks. template RegexLexerColorizer_ Colorize(RegexProc_ proc)const; }; /// Lexical analyzer. /// ALl regular expression, each one represent a kind of tokens. RegexLexer_(const collections::IEnumerable>& tokens); RegexLexer_(stream::IStream& inputStream); ~RegexLexer_() = default; void Serialize(stream::IStream & outputStream); }; /*********************************************************************** Template Instantiation ***********************************************************************/ extern template class RegexString_; extern template class RegexString_; extern template class RegexString_; extern template class RegexString_; extern template class RegexMatch_; extern template class RegexMatch_; extern template class RegexMatch_; extern template class RegexMatch_; extern template RegexMatch_::Ref RegexBase_::MatchHead (const ObjectString& text)const; extern template RegexMatch_::Ref RegexBase_::Match (const ObjectString& text)const; extern template bool RegexBase_::TestHead (const ObjectString& text)const; extern template bool RegexBase_::Test (const ObjectString& text)const; extern template void RegexBase_::Search (const ObjectString& text, RegexMatch_::List& matches)const; extern template void RegexBase_::Split (const ObjectString& text, bool keepEmptyMatch, RegexMatch_::List& matches)const; extern template void RegexBase_::Cut (const ObjectString& text, bool keepEmptyMatch, RegexMatch_::List& matches)const; extern template RegexMatch_::Ref RegexBase_::MatchHead (const ObjectString& text)const; extern template RegexMatch_::Ref RegexBase_::Match (const ObjectString& text)const; extern template bool RegexBase_::TestHead (const ObjectString& text)const; extern template bool RegexBase_::Test (const ObjectString& text)const; extern template void RegexBase_::Search (const ObjectString& text, RegexMatch_::List& matches)const; extern template void RegexBase_::Split (const ObjectString& text, bool keepEmptyMatch, RegexMatch_::List& matches)const; extern template void RegexBase_::Cut (const ObjectString& text, bool keepEmptyMatch, RegexMatch_::List& matches)const; extern template RegexMatch_::Ref RegexBase_::MatchHead (const ObjectString& text)const; extern template RegexMatch_::Ref RegexBase_::Match (const ObjectString& text)const; extern template bool RegexBase_::TestHead (const ObjectString& text)const; extern template bool RegexBase_::Test (const ObjectString& text)const; extern template void RegexBase_::Search (const ObjectString& text, RegexMatch_::List& matches)const; extern template void RegexBase_::Split (const ObjectString& text, bool keepEmptyMatch, RegexMatch_::List& matches)const; extern template void RegexBase_::Cut (const ObjectString& text, bool keepEmptyMatch, RegexMatch_::List& matches)const; extern template RegexMatch_::Ref RegexBase_::MatchHead (const ObjectString& text)const; extern template RegexMatch_::Ref RegexBase_::Match (const ObjectString& text)const; extern template bool RegexBase_::TestHead (const ObjectString& text)const; extern template bool RegexBase_::Test (const ObjectString& text)const; extern template void RegexBase_::Search (const ObjectString& text, RegexMatch_::List& matches)const; extern template void RegexBase_::Split (const ObjectString& text, bool keepEmptyMatch, RegexMatch_::List& matches)const; extern template void RegexBase_::Cut (const ObjectString& text, bool keepEmptyMatch, RegexMatch_::List& matches)const; extern template class Regex_; extern template class Regex_; extern template class Regex_; extern template class Regex_; extern template class RegexTokens_; extern template class RegexTokens_; extern template class RegexTokens_; extern template class RegexTokens_; extern template class RegexLexerWalker_; extern template class RegexLexerWalker_; extern template class RegexLexerWalker_; extern template class RegexLexerWalker_; extern template class RegexLexerColorizer_; extern template class RegexLexerColorizer_; extern template class RegexLexerColorizer_; extern template class RegexLexerColorizer_; extern template RegexTokens_ RegexLexerBase_::Parse (const ObjectString& code, RegexProc_ _proc, vint codeIndex)const; extern template RegexLexerWalker_ RegexLexerBase_::Walk ()const; extern template RegexLexerColorizer_ RegexLexerBase_::Colorize (RegexProc_ _proc)const; extern template RegexTokens_ RegexLexerBase_::Parse (const ObjectString& code, RegexProc_ _proc, vint codeIndex)const; extern template RegexLexerWalker_ RegexLexerBase_::Walk ()const; extern template RegexLexerColorizer_ RegexLexerBase_::Colorize (RegexProc_ _proc)const; extern template RegexTokens_ RegexLexerBase_::Parse (const ObjectString& code, RegexProc_ _proc, vint codeIndex)const; extern template RegexLexerWalker_ RegexLexerBase_::Walk ()const; extern template RegexLexerColorizer_ RegexLexerBase_::Colorize (RegexProc_ _proc)const; extern template RegexTokens_ RegexLexerBase_::Parse (const ObjectString& code, RegexProc_ _proc, vint codeIndex)const; extern template RegexLexerWalker_ RegexLexerBase_::Walk ()const; extern template RegexLexerColorizer_ RegexLexerBase_::Colorize (RegexProc_ _proc)const; extern template class RegexLexer_; extern template class RegexLexer_; extern template class RegexLexer_; extern template class RegexLexer_; using RegexString = RegexString_; using RegexMatch = RegexMatch_; using Regex = Regex_; using RegexToken = RegexToken_; using RegexProc = RegexProc_; using RegexTokens = RegexTokens_; using RegexLexerWalker = RegexLexerWalker_; using RegexLexerColorizer = RegexLexerColorizer_; using RegexLexer = RegexLexer_; } } #endif /*********************************************************************** .\REGEXCHARREADER.H ***********************************************************************/ /*********************************************************************** Author: Zihan Chen (vczh) Licensed under https://github.com/vczh-libraries/License ***********************************************************************/ #ifndef VCZH_REGEX_REGEXCHARREADER #define VCZH_REGEX_REGEXCHARREADER namespace vl { namespace regex_internal { template struct CharReader { private: encoding::UtfStringTo32Reader reader; const T* input; public: CharReader(const T* _input) : reader(_input) , input(_input) { } const T* Reading() { return input + reader.SourceCluster().index; } vint Index() { return reader.SourceCluster().index; } char32_t Read() { return reader.Read(); } }; template<> struct CharReader { private: const char32_t* input; vint index = 0; bool finished = false; public: CharReader(const char32_t* _input) : input(_input) { } char32_t Read() { if (finished) return 0; if (auto c = input[index]) { index++; return c; } else { finished = true; return 0; } } const char32_t* Reading() { return input + Index(); } vint Index() { return finished ? index : index - 1; } }; } } #endif /*********************************************************************** .\AUTOMATON\REGEXDATA.H ***********************************************************************/ /*********************************************************************** Author: Zihan Chen (vczh) Licensed under https://github.com/vczh-libraries/License ***********************************************************************/ #ifndef VCZH_REGEX_REGEXDATA #define VCZH_REGEX_REGEXDATA namespace vl { namespace regex_internal { /*********************************************************************** CharRange ***********************************************************************/ class CharRange { public: typedef collections::SortedList List; char32_t begin = 0; char32_t end = 0; CharRange() = default; CharRange(char32_t _begin, char32_t _end) : begin(_begin), end(_end) {} bool operator<(CharRange item) const { return end < item.begin; } bool operator<=(CharRange item) const { return *this < item || *this == item; } bool operator>(CharRange item) const { return item.end < begin; } bool operator>=(CharRange item) const { return *this > item || *this == item; } bool operator==(CharRange item) const { return begin == item.begin && end == item.end; } bool operator!=(CharRange item) const { return begin != item.begin || item.end != end; } bool operator<(char32_t item) const { return end < item; } bool operator<=(char32_t item) const { return begin <= item; } bool operator>(char32_t item) const { return item < begin; } bool operator>=(char32_t item) const { return item <= end; } bool operator==(char32_t item) const { return begin <= item && item <= end; } bool operator!=(char32_t item) const { return item < begin || end < item; } }; } } #endif /*********************************************************************** .\AUTOMATON\REGEXAUTOMATON.H ***********************************************************************/ /*********************************************************************** Author: Zihan Chen (vczh) Licensed under https://github.com/vczh-libraries/License ***********************************************************************/ #ifndef VCZH_REGEX_REGEXAUTOMATON #define VCZH_REGEX_REGEXAUTOMATON namespace vl { namespace regex_internal { constexpr char32_t MaxChar32 = 0x10FFFF; class State; class Transition; class Transition { public: enum Type { Chars, // Character range transition Epsilon, BeginString, EndString, Nop, // Non-epsilon transition with no input Capture, // Begin capture transition Match, // Capture matching transition Positive, // Begin positive lookahead Negative, // Begin negative lookahead NegativeFail, // Negative lookahead failure End // For Capture, Position, Negative }; State* source; State* target; CharRange range; Type type; vint capture; vint index; }; class State { public: collections::List transitions; collections::List inputs; bool finalState; void* userData; }; class Automaton { public: typedef Ptr Ref; collections::List> states; collections::List> transitions; collections::List captureNames; State* startState; Automaton(); State* NewState(); Transition* NewTransition(State* start, State* end); Transition* NewChars(State* start, State* end, CharRange range); Transition* NewEpsilon(State* start, State* end); Transition* NewBeginString(State* start, State* end); Transition* NewEndString(State* start, State* end); Transition* NewNop(State* start, State* end); Transition* NewCapture(State* start, State* end, vint capture); Transition* NewMatch(State* start, State* end, vint capture, vint index=-1); Transition* NewPositive(State* start, State* end); Transition* NewNegative(State* start, State* end); Transition* NewNegativeFail(State* start, State* end); Transition* NewEnd(State* start, State* end); }; extern bool PureEpsilonChecker(Transition* transition); extern bool RichEpsilonChecker(Transition* transition); extern bool AreEqual(Transition* transA, Transition* transB); extern Automaton::Ref EpsilonNfaToNfa(Automaton::Ref source, bool(*epsilonChecker)(Transition*), collections::Dictionary& nfaStateMap); extern Automaton::Ref NfaToDfa(Automaton::Ref source, collections::Group& dfaStateMap); } } #endif /*********************************************************************** .\AST\REGEXEXPRESSION.H ***********************************************************************/ /*********************************************************************** Author: Zihan Chen (vczh) Licensed under https://github.com/vczh-libraries/License Classes: Expression : Base class of expressions | CharSetExpression : Character set | a, [a-b], [^a-b0_9], \.rnt\/()+*?{}[]<>^$!=SsDdLlWw, [\rnt-[]\/^$] LoopExpression : Repeat | a{3}, a{3,}, a{1,3}, a+, a*, a?, LOOP? SequenceExpression : Sequence of two regex | ab AlternateExpression : Alternative of two regex | a|b BeginExpression : String begin | ^ EndExpression : String end | $ CaptureExpression : Capture | (expr), (?expr) MatchExpression : Capture matching | (<$name>), (<$name;i>), (<$i>) PositiveExpression : Positive lookahead | (=expr) NegativeExpression : Negative lookahead | (!expr) UsingExpression : refer a regex | (<#name1>expr)...(<&name1>)... RegexExpression : Regular Expression Functions: ParseRegexExpression : Regex Syntax Analyzer ***********************************************************************/ #ifndef VCZH_REGEX_REGEXEXPRESSION #define VCZH_REGEX_REGEXEXPRESSION namespace vl { namespace regex_internal { class IRegexExpressionAlgorithm; /*********************************************************************** Regex Expression AST ***********************************************************************/ class Expression : public Object { public: NOT_COPYABLE(Expression); Expression() = default; typedef Ptr Ref; typedef collections::Dictionary Map; virtual void Apply(IRegexExpressionAlgorithm& algorithm)=0; bool IsEqual(Expression* expression); bool HasNoExtension(); bool CanTreatAsPure(); void NormalizeCharSet(CharRange::List& subsets); void CollectCharSet(CharRange::List& subsets); void ApplyCharSet(CharRange::List& subsets); Automaton::Ref GenerateEpsilonNfa(); }; class CharSetExpression : public Expression { public: CharRange::List ranges; bool reverse; bool AddRangeWithConflict(CharRange range); void Apply(IRegexExpressionAlgorithm& algorithm); }; class LoopExpression : public Expression { public: Expression::Ref expression; // The regex to loop vint min; // Minimum count of looping vint max; // Maximum count of looping, -1 for infinite bool preferLong; // Prefer longer matching void Apply(IRegexExpressionAlgorithm& algorithm); }; class SequenceExpression : public Expression { public: Expression::Ref left; // First regex to match Expression::Ref right; // Last regex to match void Apply(IRegexExpressionAlgorithm& algorithm); }; class AlternateExpression : public Expression { public: Expression::Ref left; // First regex to match Expression::Ref right; // Last regex to match void Apply(IRegexExpressionAlgorithm& algorithm); }; class BeginExpression: public Expression { public: void Apply(IRegexExpressionAlgorithm& algorithm); }; class EndExpression : public Expression { public: void Apply(IRegexExpressionAlgorithm& algorithm); }; class CaptureExpression : public Expression { public: U32String name; // Capture name, empty for anonymous capture Expression::Ref expression; // Regex to match void Apply(IRegexExpressionAlgorithm& algorithm); }; class MatchExpression : public Expression { public: U32String name; // Capture name, empty for anonymous vint index; // The index of captured text to match associated the name, -1 for all of them void Apply(IRegexExpressionAlgorithm& algorithm); }; class PositiveExpression : public Expression { public: Expression::Ref expression; // Regex to match void Apply(IRegexExpressionAlgorithm& algorithm); }; class NegativeExpression : public Expression { public: Expression::Ref expression; // Regex to match void Apply(IRegexExpressionAlgorithm& algorithm); }; class UsingExpression : public Expression { public: U32String name; // Name of the regex to refer void Apply(IRegexExpressionAlgorithm& algorithm); }; class RegexExpression : public Object { public: typedef Ptr Ref; Expression::Map definitions; // Named regex to be referred Expression::Ref expression; // Regex to match NOT_COPYABLE(RegexExpression); RegexExpression() = default; Expression::Ref Merge(); }; /*********************************************************************** Visitor ***********************************************************************/ class IRegexExpressionAlgorithm : public Interface { public: virtual void Visit(CharSetExpression* expression)=0; virtual void Visit(LoopExpression* expression)=0; virtual void Visit(SequenceExpression* expression)=0; virtual void Visit(AlternateExpression* expression)=0; virtual void Visit(BeginExpression* expression)=0; virtual void Visit(EndExpression* expression)=0; virtual void Visit(CaptureExpression* expression)=0; virtual void Visit(MatchExpression* expression)=0; virtual void Visit(PositiveExpression* expression)=0; virtual void Visit(NegativeExpression* expression)=0; virtual void Visit(UsingExpression* expression)=0; }; template class RegexExpressionAlgorithm : public Object, public IRegexExpressionAlgorithm { private: ReturnType returnValue; ParameterType* parameterValue; public: ReturnType Invoke(Expression* expression, ParameterType parameter) { parameterValue=¶meter; expression->Apply(*this); return returnValue; } ReturnType Invoke(Expression::Ref expression, ParameterType parameter) { parameterValue=¶meter; expression->Apply(*this); return returnValue; } virtual ReturnType Apply(CharSetExpression* expression, ParameterType parameter)=0; virtual ReturnType Apply(LoopExpression* expression, ParameterType parameter)=0; virtual ReturnType Apply(SequenceExpression* expression, ParameterType parameter)=0; virtual ReturnType Apply(AlternateExpression* expression, ParameterType parameter)=0; virtual ReturnType Apply(BeginExpression* expression, ParameterType parameter)=0; virtual ReturnType Apply(EndExpression* expression, ParameterType parameter)=0; virtual ReturnType Apply(CaptureExpression* expression, ParameterType parameter)=0; virtual ReturnType Apply(MatchExpression* expression, ParameterType parameter)=0; virtual ReturnType Apply(PositiveExpression* expression, ParameterType parameter)=0; virtual ReturnType Apply(NegativeExpression* expression, ParameterType parameter)=0; virtual ReturnType Apply(UsingExpression* expression, ParameterType parameter)=0; public: void Visit(CharSetExpression* expression) { returnValue=Apply(expression, *parameterValue); } void Visit(LoopExpression* expression) { returnValue=Apply(expression, *parameterValue); } void Visit(SequenceExpression* expression) { returnValue=Apply(expression, *parameterValue); } void Visit(AlternateExpression* expression) { returnValue=Apply(expression, *parameterValue); } void Visit(BeginExpression* expression) { returnValue=Apply(expression, *parameterValue); } void Visit(EndExpression* expression) { returnValue=Apply(expression, *parameterValue); } void Visit(CaptureExpression* expression) { returnValue=Apply(expression, *parameterValue); } void Visit(MatchExpression* expression) { returnValue=Apply(expression, *parameterValue); } void Visit(PositiveExpression* expression) { returnValue=Apply(expression, *parameterValue); } void Visit(NegativeExpression* expression) { returnValue=Apply(expression, *parameterValue); } void Visit(UsingExpression* expression) { returnValue=Apply(expression, *parameterValue); } }; template class RegexExpressionAlgorithm : public Object, public IRegexExpressionAlgorithm { private: ParameterType* parameterValue; public: void Invoke(Expression* expression, ParameterType parameter) { parameterValue=¶meter; expression->Apply(*this); } void Invoke(Expression::Ref expression, ParameterType parameter) { parameterValue=¶meter; expression->Apply(*this); } virtual void Apply(CharSetExpression* expression, ParameterType parameter)=0; virtual void Apply(LoopExpression* expression, ParameterType parameter)=0; virtual void Apply(SequenceExpression* expression, ParameterType parameter)=0; virtual void Apply(AlternateExpression* expression, ParameterType parameter)=0; virtual void Apply(BeginExpression* expression, ParameterType parameter)=0; virtual void Apply(EndExpression* expression, ParameterType parameter)=0; virtual void Apply(CaptureExpression* expression, ParameterType parameter)=0; virtual void Apply(MatchExpression* expression, ParameterType parameter)=0; virtual void Apply(PositiveExpression* expression, ParameterType parameter)=0; virtual void Apply(NegativeExpression* expression, ParameterType parameter)=0; virtual void Apply(UsingExpression* expression, ParameterType parameter)=0; public: void Visit(CharSetExpression* expression) { Apply(expression, *parameterValue); } void Visit(LoopExpression* expression) { Apply(expression, *parameterValue); } void Visit(SequenceExpression* expression) { Apply(expression, *parameterValue); } void Visit(AlternateExpression* expression) { Apply(expression, *parameterValue); } void Visit(BeginExpression* expression) { Apply(expression, *parameterValue); } void Visit(EndExpression* expression) { Apply(expression, *parameterValue); } void Visit(CaptureExpression* expression) { Apply(expression, *parameterValue); } void Visit(MatchExpression* expression) { Apply(expression, *parameterValue); } void Visit(PositiveExpression* expression) { Apply(expression, *parameterValue); } void Visit(NegativeExpression* expression) { Apply(expression, *parameterValue); } void Visit(UsingExpression* expression) { Apply(expression, *parameterValue); } }; /*********************************************************************** Helper Functions ***********************************************************************/ extern Ptr ParseLoop(const char32_t*& input); extern Ptr ParseCharSet(const char32_t*& input); extern Ptr ParseFunction(const char32_t*& input); extern Ptr ParseUnit(const char32_t*& input); extern Ptr ParseJoin(const char32_t*& input); extern Ptr ParseAlt(const char32_t*& input); extern Ptr ParseExpression(const char32_t*& input); extern RegexExpression::Ref ParseRegexExpression(const U32String& code); extern U32String EscapeTextForRegex(const U32String& literalString); extern U32String UnescapeTextForRegex(const U32String& escapedText); extern U32String NormalizeEscapedTextForRegex(const U32String& escapedText); extern bool IsRegexEscapedLiteralString(const U32String& regex); class RegexException : public Exception { public: U32String code; vint position; public: RegexException(const WString& _message, const U32String& _code, vint _position) : Exception(_message) , code(_code) , position(_position) { } const U32String& GetCode() const { return code; } vint GetPosition() const { return position; } }; } } #endif /*********************************************************************** .\AST\REGEXWRITER.H ***********************************************************************/ /*********************************************************************** Author: Zihan Chen (vczh) Licensed under https://github.com/vczh-libraries/License ***********************************************************************/ #ifndef VCZH_REGEX_REGEXWRITER #define VCZH_REGEX_REGEXWRITER namespace vl { namespace regex { class RegexNode : public Object { public: vl::regex_internal::Expression::Ref expression; RegexNode(vl::regex_internal::Expression::Ref _expression); RegexNode Some()const; RegexNode Any()const; RegexNode Opt()const; RegexNode Loop(vint min, vint max)const; RegexNode AtLeast(vint min)const; RegexNode operator+(const RegexNode& node)const; RegexNode operator|(const RegexNode& node)const; RegexNode operator+()const; RegexNode operator-()const; RegexNode operator!()const; RegexNode operator%(const RegexNode& node)const; }; extern RegexNode rCapture(const U32String& name, const RegexNode& node); extern RegexNode rUsing(const U32String& name); extern RegexNode rMatch(const U32String& name, vint index=-1); extern RegexNode rMatch(vint index); extern RegexNode rBegin(); extern RegexNode rEnd(); extern RegexNode rC(char32_t a, char32_t b=0); extern RegexNode r_d(); extern RegexNode r_l(); extern RegexNode r_w(); extern RegexNode rAnyChar(); } } #endif /*********************************************************************** .\REGEXPURE.H ***********************************************************************/ /*********************************************************************** Author: Zihan Chen (vczh) Licensed under https://github.com/vczh-libraries/License ***********************************************************************/ #ifndef VCZH_REGEX_REGEXPURE #define VCZH_REGEX_REGEXPURE namespace vl { namespace stream { class IStream; } namespace regex_internal { class PureResult { public: vint start; vint length; vint finalState; vint terminateState; }; class PureInterpretor : public Object { using CharRangeArray = collections::Array; protected: static const vint SupportedCharCount = MaxChar32 + 1; CharRangeArray charRanges; vint charMap[SupportedCharCount]; // char -> char set index vint* transitions = nullptr; // (state * charSetCount + charSetIndex) -> state bool* finalState = nullptr; // state -> bool vint* relatedFinalState = nullptr; // state -> (finalState or -1) vint stateCount; vint charSetCount; vint startState; void ExpandCharRanges(); public: PureInterpretor(Automaton::Ref dfa, CharRange::List& subsets); PureInterpretor(stream::IStream& inputStream); ~PureInterpretor(); void Serialize(stream::IStream& outputStream); template bool MatchHead(const TChar* input, const TChar* start, PureResult& result); template bool Match(const TChar* input, const TChar* start, PureResult& result); vint GetStartState(); vint Transit(char32_t input, vint state); bool IsFinalState(vint state); bool IsDeadState(vint state); void PrepareForRelatedFinalStateTable(); vint GetRelatedFinalState(vint state); }; extern template bool PureInterpretor::MatchHead(const wchar_t* input, const wchar_t* start, PureResult& result); extern template bool PureInterpretor::MatchHead(const char8_t* input, const char8_t* start, PureResult& result); extern template bool PureInterpretor::MatchHead(const char16_t* input, const char16_t* start, PureResult& result); extern template bool PureInterpretor::MatchHead(const char32_t* input, const char32_t* start, PureResult& result); extern template bool PureInterpretor::Match(const wchar_t* input, const wchar_t* start, PureResult& result); extern template bool PureInterpretor::Match(const char8_t* input, const char8_t* start, PureResult& result); extern template bool PureInterpretor::Match(const char16_t* input, const char16_t* start, PureResult& result); extern template bool PureInterpretor::Match(const char32_t* input, const char32_t* start, PureResult& result); } } #endif /*********************************************************************** .\REGEXRICH.H ***********************************************************************/ /*********************************************************************** Author: Zihan Chen (vczh) Licensed under https://github.com/vczh-libraries/License ***********************************************************************/ #ifndef VCZH_REGEX_REGEXRICH #define VCZH_REGEX_REGEXRICH namespace vl { namespace regex_internal { class CaptureRecord { public: vint capture; vint start; vint length; bool operator==(const CaptureRecord& record)const; }; } namespace regex_internal { class RichResult { public: vint start; vint length; collections::List captures; }; class RichInterpretor : public Object { public: protected: class UserData { public: bool NeedKeepState; }; Automaton::Ref dfa; UserData* datas; public: RichInterpretor(Automaton::Ref _dfa); ~RichInterpretor(); template bool MatchHead(const TChar* input, const TChar* start, RichResult& result); template bool Match(const TChar* input, const TChar* start, RichResult& result); const collections::List& CaptureNames(); }; extern template bool RichInterpretor::MatchHead(const wchar_t* input, const wchar_t* start, RichResult& result); extern template bool RichInterpretor::MatchHead(const char8_t* input, const char8_t* start, RichResult& result); extern template bool RichInterpretor::MatchHead(const char16_t* input, const char16_t* start, RichResult& result); extern template bool RichInterpretor::MatchHead(const char32_t* input, const char32_t* start, RichResult& result); extern template bool RichInterpretor::Match(const wchar_t* input, const wchar_t* start, RichResult& result); extern template bool RichInterpretor::Match(const char8_t* input, const char8_t* start, RichResult& result); extern template bool RichInterpretor::Match(const char16_t* input, const char16_t* start, RichResult& result); extern template bool RichInterpretor::Match(const char32_t* input, const char32_t* start, RichResult& result); }; } #endif