GacUI/Import/VlppRegex.h

/***********************************************************************
THIS FILE IS AUTOMATICALLY GENERATED. DO NOT MODIFY
DEVELOPER: Zihan Chen(vczh)
***********************************************************************/
#include "Vlpp.h"

/***********************************************************************
.\REGEX.H
***********************************************************************/
/***********************************************************************
Author: Zihan Chen (vczh)
Licensed under https://github.com/vczh-libraries/License
***********************************************************************/

#ifndef VCZH_REGEX_REGEX
#define VCZH_REGEX_REGEX


namespace vl
{
	namespace regex_internal
	{
		class PureResult;
		class PureInterpretor;
		class RichResult;
		class RichInterpretor;
	}

	namespace regex
	{

/***********************************************************************
Data Structure
***********************************************************************/

		/// <summary>A sub string of the string that a <see cref="Regex"/> is matched against.</summary>
		class RegexString : public Object
		{
		protected:
			WString										value;
			vint										start;
			vint										length;

		public:
			RegexString(vint _start=0);
			RegexString(const WString& _string, vint _start, vint _length);

			/// <summary>The position of the input string in characters.</summary>
			/// <returns>The position.</returns>
			vint										Start()const;
			/// <summary>The size of the sub string in characters.</summary>
			/// <returns>The size.</returns>
			vint										Length()const;
			/// <summary>Get the sub string as a <see cref="WString"/>.</summary>
			/// <returns>The sub string.</returns>
			const WString&								Value()const;
			bool										operator==(const RegexString& string)const;
		};

		/// <summary>A match produces by a <see cref="Regex"/>.</summary>
		class RegexMatch : public Object, private NotCopyable
		{
			friend class Regex;
		public:
			typedef Ptr<RegexMatch>										Ref;
			typedef collections::List<Ref>								List;
			typedef collections::List<RegexString>						CaptureList;
			typedef collections::Group<WString, RegexString>			CaptureGroup;
		protected:
			collections::List<RegexString>				captures;
			collections::Group<WString, RegexString>	groups;
			bool										success;
			RegexString									result;

			RegexMatch(const WString& _string, regex_internal::PureResult* _result);
			RegexMatch(const WString& _string, regex_internal::RichResult* _result, regex_internal::RichInterpretor* _rich);
			RegexMatch(const RegexString& _result);
		public:

			/// <summary>
			/// Test if this match is a succeeded match or a failed match.
			/// A failed match will only appear when calling [M:vl.regex.Regex.Split] or [M:vl.regex.Regex.Cut].
			/// In other cases, failed matches are either not included in the result.
			/// </summary>
			/// <returns>Returns true if this match is a succeeded match.</returns>
			bool										Success()const;
			/// <summary>Get the matched sub string.</summary>
			/// <returns>The matched sub string.</returns>
			const RegexString&							Result()const;
			/// <summary>Get all sub strings that are captured anonymously.</summary>
			/// <returns>All sub strings that are captured anonymously.</returns>
			/// <example><![CDATA[
			/// int main()
			/// {
			///     Regex regex(L"^/.*?((?C/S+)(/.*?))+$");
			///     auto match = regex.MatchHead(L"C++ and C# are my favorite programing languages");
			///     FOREACH(RegexString, capture, match->Captures())
			///     {
			///         Console::WriteLine(capture.Value());
			///     }
			/// }
			/// ]]></example>
			const CaptureList&							Captures()const;
			/// <summary>Get all sub strings that are captured by named groups.</summary>
			/// <returns>All sub strings that are captured by named groups.</returns>
			/// <example><![CDATA[
			/// int main()
			/// {
			///     Regex regex(L"^/.*?((<lang>C/S+)(/.*?))+$");
			///     auto match = regex.MatchHead(L"C++ and C# are my favorite programing languages");
			///     FOREACH(RegexString, capture, match->Groups().Get(L"lang"))
			///     {
			///         Console::WriteLine(capture.Value());
			///     }
			/// }
			/// ]]></example>
			const CaptureGroup&							Groups()const;
		};

/***********************************************************************
Regex
***********************************************************************/

		/// <summary>
		/// <p>
		///     Regular Expression. Here is a brief description of the regular expression grammar.
		/// </p>
		/// <p>
		///     <ul>
		///         <li>
		///             <b>Charset</b>:
		///             <ul>
		///                 <li><b>a</b>, <b>[a-z]</b>, <b>[^a-z]</b></li>
		///             </ul>
		///         </li>
		///         <li>
		///             <b>Functional characters</b>:
		///             <ul>
		///                 <li><b>^</b>: the beginning of the input (DFA incompatible)</li>
		///                 <li><b>$</b>: the end of the input (DFA incompatible)</li>
		///                 <li><b>regex1|regex2</b>: match either regex1 or regex2</li>
		///             </ul>
		///         </li>
		///         <li>
		///             <b>Escaping</b> (both \ and / mean the next character is escaped):
		///             <ul>
		///                 <li>
		///                     Escaped characters:
		///                     <ul>
		///                         <li><b>\r</b>: the CR character</li>
		///                         <li><b>\n</b>: the LF character</li>
		///                         <li><b>\t</b>: the tab character</li>
		///                         <li><b>\s</b>: spacing characters (including space, \r, \n, \t)</li>
		///                         <li><b>\S</b>: non-spacing characters</li>
		///                         <li><b>\d</b>: [0-9]</li>
		///                         <li><b>\D</b>: [^0-9]</li>
		///                         <li><b>\l</b>: [a-zA-Z]</li>
		///                         <li><b>\L</b>: [^a-zA-Z]</li>
		///                         <li><b>\w</b>: [a-zA-Z0-9_]</li>
		///                         <li><b>\W</b>: [^a-zA-Z0-9_]</li>
		///                         <li><b>\.</b>: any character (this is the main different from other regex, which treat "." as any characters and "\." as the dot character)</li>
		///                         <li><b>\\</b>, <b>\/</b>, <b>\(</b>, <b>\)</b>, <b>\+</b>, <b>\*</b>, <b>\?</b>, <b>\{</b>, <b>\}</b>, <b>\[</b>, <b>\]</b>, <b>\&lt;</b>, <b>\&gt;</b>, <b>\^</b>, <b>\$</b>, <b>\!</b>, <b>\=</b>: represents itself</li>
		///                     </ul>
		///                 </li>
		///                 <li>
		///                     Escaped characters in charset defined in a square bracket:
		///                     <ul>
		///                         <li><b>\r</b>: the CR character</li>
		///                         <li><b>\n</b>: the LF character</li>
		///                         <li><b>\t</b>: the tab character</li>
		///                         <li><b>\-</b>, <b>\[</b>, <b>\]</b>, <b>\\</b>, <b>\/</b>, <b>\^</b>, <b>\$</b>: represents itself</li>
		///                     </ul>
		///                 </li>
		///             </ul>
		///         </li>
		///         <li>
		///             <b>Loops</b>:
		///             <ul>
		///                 <li><b>regex{3}</b>: repeats 3 times</li>
		///                 <li><b>regex{3,}</b>: repeats 3 or more times</li>
		///                 <li><b>regex{1,3}</b>: repeats 1 to 3 times</li>
		///                 <li><b>regex?</b>: repeats 0 or 1 times</li>
		///                 <li><b>regex*</b>: repeats 0 or more times</li>
		///                 <li><b>regex+</b>: repeats 1 or more times</li>
		///             </ul>
		///             if you add an additional <b>?</b> right after a loop, it means repeating as less as possible <b>(DFA incompatible)</b>
		///         </li>
		///         <li>
		///             <b>Capturing</b>: <b>(DFA incompatible)</b>
		///             <ul>
		///                 <li><b>(regex)</b>: No capturing, just change the operators' association</li>
		///                 <li><b>(?regex)</b>: Capture matched fragment</li>
		///                 <li><b>(&lt;name&gt;regex)</b>: Capture matched fragment in a named group called "name"</li>
		///                 <li><b>(&lt;$i&gt;)</b>: Match the i-th captured fragment, begins from 0</li>
		///                 <li><b>(&lt;$name;i&gt;)</b>: Match the i-th captured fragment in the named group called "name", begins from 0</li>
		///                 <li><b>(&lt;$name&gt;)</b>: Match any captured fragment in the named group called "name"</li>
		///             </ul>
		///         </li>
		///         <li>
		///             <b>MISC</b>
		///             <ul>
		///                 <li><b>(=regex)</b>: The prefix of the following text should match the regex, but it is not counted in the whole match <b>(DFA incompatible)</b></li>
		///                 <li><b>(!regex)</b>: Any prefix of the following text should not match the regex, and it is not counted in the whole match <b>(DFA incompatible)</b></li>
		///                 <li><b>(&lt;#name&gt;regex)</b>: Name the regex "name", and it applies here</li>
		///                 <li><b>(&lt;&name&gt;)</b>: Copy the named regex "name" here and apply</li>
		///             </ul>
		///         </li>
		///     </ul>
		/// </p>
		/// <p>
		///     The regular expression has pupre mode and rich mode.
		///     Pure mode means the regular expression is driven by a DFA, while the rich mode is not.
		/// </p>
		/// <p>
		///     The regular expression can test a string instead of matching.
		///     Testing only returns a bool very indicating success or failure.
		/// </p>
		/// </summary>
		class Regex : public Object, private NotCopyable
		{
		protected:
			regex_internal::PureInterpretor*			pure = nullptr;
			regex_internal::RichInterpretor*			rich = nullptr;

			void										Process(const WString& text, bool keepEmpty, bool keepSuccess, bool keepFail, RegexMatch::List& matches)const;
		public:
			/// <summary>Create a regular expression. It will crash if the regular expression produces syntax error.</summary>
			/// <param name="code">The regular expression in a string.</param>
			/// <param name="preferPure">Set to true to use DFA if possible.</param>
			Regex(const WString& code, bool preferPure = true);
			~Regex();

			/// <summary>Test is a DFA used to match a string.</summary>
			/// <returns>Returns true if a DFA is used.</returns>
			bool										IsPureMatch()const;
			/// <summary>Test is a DFA used to test a string. It ignores all capturing.</summary>
			/// <returns>Returns true if a DFA is used.</returns>
			bool										IsPureTest()const;

			/// <summary>Match a prefix of the text.</summary>
			/// <returns>Returns the match. Returns null if failed.</returns>
			/// <param name="text">The text to match.</param>
			/// <example><![CDATA[
			/// int main()
			/// {
			///     Regex regex(L"C/S+");
			///     auto match = regex.MatchHead(L"C++ and C# are my favorite programing languages");
			///     Console::WriteLine(match->Result().Value());
			/// }
			/// ]]></example>
			RegexMatch::Ref								MatchHead(const WString& text)const;
			/// <summary>Match a sub string of the text.</summary>
			/// <returns>Returns the first match. Returns null if failed.</returns>
			/// <param name="text">The text to match.</param>
			/// <example><![CDATA[
			/// int main()
			/// {
			///     Regex regex(L"C/S+");
			///     auto match = regex.Match(L"C++ and C# are my favorite programing languages");
			///     Console::WriteLine(match->Result().Value());
			/// }
			/// ]]></example>
			RegexMatch::Ref								Match(const WString& text)const;
			/// <summary>Match a prefix of the text, ignoring all capturing.</summary>
			/// <returns>Returns true if it succeeded.</returns>
			/// <param name="text">The text to match.</param>
			bool										TestHead(const WString& text)const;
			/// <summary>Match a sub string of the text, ignoring all capturing.</summary>
			/// <returns>Returns true if succeeded.</returns>
			/// <param name="text">The text to match.</param>
			bool										Test(const WString& text)const;
			/// <summary>Find all matched fragments in the given text, returning all matched sub strings.</summary>
			/// <param name="text">The text to match.</param>
			/// <param name="matches">Returns all succeeded matches.</param>
			/// <example><![CDATA[
			/// int main()
			/// {
			///     Regex regex(L"C/S+");
			///     RegexMatch::List matches;
			///     regex.Search(L"C++ and C# are my favorite programing languages", matches);
			///     FOREACH(Ptr<RegexMatch>, match, matches)
			///     {
			///         Console::WriteLine(match->Result().Value());
			///     }
			/// }
			/// ]]></example>
			void										Search(const WString& text, RegexMatch::List& matches)const;
			/// <summary>Split the text by matched sub strings, returning all unmatched sub strings.</summary>
			/// <param name="text">The text to match.</param>
			/// <param name="keepEmptyMatch">Set to true to keep all empty unmatched sub strings. This could happen when there is nothing between two matched sub strings.</param>
			/// <param name="matches">Returns all failed matches.</param>
			/// <example><![CDATA[
			/// int main()
			/// {
			///     Regex regex(L"C/S+");
			///     RegexMatch::List matches;
			///     regex.Split(L"C++ and C# are my favorite programing languages", false, matches);
			///     FOREACH(Ptr<RegexMatch>, match, matches)
			///     {
			///         Console::WriteLine(match->Result().Value());
			///     }
			/// }
			/// ]]></example>
			void										Split(const WString& text, bool keepEmptyMatch, RegexMatch::List& matches)const;
			/// <summary>Cut the text by matched sub strings, returning all matched and unmatched sub strings.</summary>
			/// <param name="text">The text to match.</param>
			/// <param name="keepEmptyMatch">Set to true to keep all empty matches. This could happen when there is nothing between two matched sub strings.</param>
			/// <param name="matches">Returns all succeeded and failed matches.</param>
			/// <example><![CDATA[
			/// int main()
			/// {
			///     Regex regex(L"C/S+");
			///     RegexMatch::List matches;
			///     regex.Cut(L"C++ and C# are my favorite programing languages", false, matches);
			///     FOREACH(Ptr<RegexMatch>, match, matches)
			///     {
			///         Console::WriteLine(match->Result().Value());
			///     }
			/// }
			/// ]]></example>
			void										Cut(const WString& text, bool keepEmptyMatch, RegexMatch::List& matches)const;
		};

/***********************************************************************
Tokenizer
***********************************************************************/

		/// <summary>A token.</summary>
		struct RegexToken
		{
			/// <summary>Position in the input string in characters.</summary>
			vint										start;
			/// <summary>Size of this token in characters.</summary>
			vint										length;
			/// <summary>The token id, begins at 0, represents the regular expression in the list (the first argument in the contructor of <see cref="RegexLexer"/>) that matches this token. -1 means this token is produced by an error.</summary>
			vint										token;
			/// <summary>The pointer to where this token starts in the input string .</summary>
			/// <remarks>This pointer comes from a <see cref="WString"/> that used to be analyzed. You should keep a variable to that string alive, so that to keep this pointer alive.</remarks>
			const wchar_t*								reading;
			/// <summary>The "codeIndex" argument from [M:vl.regex.RegexLexer.Parse].</summary>
			vint										codeIndex;
			/// <summary>True if this token is complete. False if this token does not end here. This could happend when colorizing a text line by line.</summary>
			bool										completeToken;

			/// <summary>Row number of the first character, begins at 0.</summary>
			vint										rowStart;
			/// <summary>Column number of the first character, begins at 0.</summary>
			vint										columnStart;
			/// <summary>Row number of the last character, begins at 0.</summary>
			vint										rowEnd;
			/// <summary>Column number of the last character, begins at 0.</summary>
			vint										columnEnd;

			bool										operator==(const RegexToken& _token)const;
			bool										operator==(const wchar_t* _token)const;
		};

		/// <summary>Token information for <see cref="RegexProc::extendProc"/>.</summary>
		struct RegexProcessingToken
		{
			/// <summary>
			/// The read only start position of the token.
			/// This value will be -1 if <see cref="interTokenState"/> is not null.
			/// </summary>
			const vint									start;
			/// <summary>
			/// The length of the token, allowing to be updated by the callback.
			/// When the callback returns, the length is not allowed to be decreased.
			/// This value will be -1 if <see cref="interTokenState"/> is not null.
			/// </summary>
			vint										length;
			/// <summary>
			/// The id of the token, allowing to be updated by the callback.
			/// </summary>
			vint										token;
			/// <summary>
			/// The flag indicating if this token is completed, allowing to be updated by the callback.
			/// </summary>
			bool										completeToken;
			/// <summary>
			/// The inter token state object, allowing to be updated by the callback.
			/// When the callback returns:
			/// <ul>
			///   <li>if the completeText parameter is true in <see cref="RegexProc::extendProc"/>, it should be nullptr.</li>
			///   <li>if the token does not end at the end of the input, it should not be nullptr.</li>
			///   <li>if a token is completed in one attemp of extending, it should be nullptr.</li>
			/// </ul>
			/// </summary>
			void*										interTokenState;

			RegexProcessingToken(vint _start, vint _length, vint _token, bool _completeToken, void* _interTokenState)
				:start(_start)
				, length(_length)
				, token(_token)
				, completeToken(_completeToken)
				, interTokenState(_interTokenState)
			{
			}
		};

		using RegexInterTokenStateDeleter = void(*)(void* interTokenState);
		using RegexTokenExtendProc = void(*)(void* argument, const wchar_t* reading, vint length, bool completeText, RegexProcessingToken& processingToken);
		using RegexTokenColorizeProc =  void(*)(void* argument, vint start, vint length, vint token);

		/// <summary>Callback procedures</summary>
		struct RegexProc
		{
			/// <summary>
			/// The deleter which deletes <see cref="RegexProcessingToken::interTokenState"/> created by <see cref="extendProc"/>.
			/// This callback is not called automatically.
			/// It is here to make the maintainance convenient for the caller.
			/// </summary>
			RegexInterTokenStateDeleter					deleter = nullptr;
			/// <summary>
			/// <p>The token extend callback. It is called after recognizing any token, and run a customized procedure to modify the token based on the given context.</p>
			/// <p>If the length parameter is -1, it means the caller does not measure the incoming text buffer, which automatically indicates that the buffer is null-terminated.</p>
			/// <p>If the length parameter is not -1, it means the number of available characters in the buffer.</p>
			/// <p>The completeText parameter could be true or false. When it is false, it means that the buffer does not contain all the text.</p>
			/// </summary>
			/// <remarks>
			/// <p>
			/// This is very useful to recognize any token that cannot be expressed using a regular expression.
			/// For example, a C++ literal string R"tag(the conteng)tag".
			/// It is recommended to add a token for <b>R"tag(</b>,
			/// and then use this extend proc to search for a <b>)tag"</b> to complete the token.
			/// </p>
			/// <p>
			/// <b>Important</b>:
			/// when colorizing a text line by line,
			/// a cross-line token could be incomplete at the end of the line.
			/// Because a given buffer ends at the end of that line,
			/// the extend proc is not able to know right now about what is going on in the future.
			/// Here is what <see cref="RegexProcessingToken::interTokenState"/> is designed for,
			/// the extend proc can store anything it wants using that pointer.
			/// </p>
			/// <p>
			/// The caller can get this pointer from the return value of <see cref="RegexLexerColorizer::Colorize"/>.
			/// This pointer only available for cross-line tokens, it is obvious that one line produces at most one such pointer.
			/// Then the caller keeps calling that function to walk throught the whole string.
			/// When the return value is changed, the pointer is no longer used, and it can be deleted by calling <see cref="deleter"/> manually.
			/// </p>
			/// <p>
			/// The first argument is <see cref="argument"/>.
			/// </p>
			/// <p>
			/// The second argument is a pointer to the buffer of the first character in this token.
			/// If the previous token is incomplete, then the buffer begins at the first character of the new buffer.
			/// </p>
			/// <p>
			/// The third argument is the length of the recognized token in characters.
			/// </p>
			/// <p>
			/// The fourth character indicates if the token is completed.
			/// Even if a token is completed, but the extend proc found that, the extend exceeds the end of the buffer,
			/// then it can update the value to make it incomplete.
			/// </p>
			/// <p>
			/// The fifth contains the context for this token. Fields except "start" are allowed to be updated by the extend proc.
			/// </p>
			/// </remarks>
			/// <example><![CDATA[
			/// int main()
			/// {
			///     List<WString> tokenDefs;
			///     tokenDefs.Add(L"/d+");
			///     tokenDefs.Add(L"[a-zA-Z_]/w*");
			///     tokenDefs.Add(L"\"([^\"/\\]|/\\/.)*\"");
			///     tokenDefs.Add(L"R\"[^(]*/(");
			///     tokenDefs.Add(L"[(){};]");
			///     tokenDefs.Add(L"/s+");
			///     tokenDefs.Add(L"///*+([^//*]|/*+[^//])*/*+//");
			///
			///     const wchar_t* lines[] = {
			///         L"/*********************",
			///         L"MAIN.CPP",
			///         L"*********************/",
			///         L"",
			///         L"int main()",
			///         L"{",
			///         L"    printf(\"This is a \\\"simple\\\" text.\");",
			///         L"    printf(R\"____(This is a",
			///         L"\"multiple lined\"",
			///         L"literal text)____\");",
			///         L"    return 0;",
			///         L"}",
			///     };
			///
			///     struct Argument
			///     {
			///         // for a real colorizer, you can put a color buffer here.
			///         // the buffer is reused for every line of code.
			///         // but for the demo, I put the current processing text instead.
			///         // so that I am able to print what is processed.
			///         const wchar_t* processingText = nullptr;
			///     } argument;
			///
			///     struct InterTokenState
			///     {
			///         WString postfix;
			///     };
			///
			///     RegexProc proc;
			///     proc.argument = &argument;
			///     proc.colorizeProc = [](void* argument, vint start, vint length, vint token)
			///     {
			///         // this is guaranteed by "proc.argument = &argument;"
			///         auto text = reinterpret_cast<Argument*>(argument)->processingText;
			///         Console::WriteLine(itow(token) + L": <" + WString(text + start, length) + L">");
			///     };
			///     proc.deleter = [](void* interTokenState)
			///     {
			///         delete reinterpret_cast<InterTokenState*>(interTokenState);
			///     };
			///     proc.extendProc = [](void* argument, const wchar_t* reading, vint length, bool completeText, RegexProcessingToken& processingToken)
			///     {
			///         // 3 is R"[^(]*/(
			///         // 7 is not used in tokenDefs, it is occupied to represent an extended literal string
			///         if (processingToken.token == 3 || processingToken.token == 7)
			///         {
			///             // for calling wcsstr, create a buffer that is zero terminated
			///             WString readingBuffer = length == -1 ? WString(reading, false) : WString(reading, length);
			///             reading = readingBuffer.Buffer();
			///
			///             // get the postfix, which is )____" in this case
			///             WString postfix;
			///             if (processingToken.interTokenState)
			///             {
			///                 postfix = reinterpret_cast<InterTokenState*>(processingToken.interTokenState)->postfix;
			///             }
			///             else
			///             {
			///                 postfix = L")" + WString(reading + 2, processingToken.length - 3) + L"\"";
			///             }
			///
			///             // try to find if the postfix, which is )____" in this case, appear in the given buffer
			///             auto find = wcsstr(reading, postfix.Buffer());
			///             if (find)
			///             {
			///                 // if we find the postfix, it means we find the end of the literal string
			///                 // here processingToken.token automatically becomes 7
			///                 // interTokenState needs to be nullptr to indicate this
			///                 processingToken.length = (vint)(find - reading) + postfix.Length();
			///                 processingToken.completeToken = true;
			///                 processingToken.interTokenState = nullptr;
			///             }
			///             else
			///             {
			///                 // if we don't find the postfix, it means the end of the literal string is in future lines
			///                 // we need to set the token to 7, which is the real token id for literal strings
			///                 // since we change any token from 3 to 7, 3 will never be passed to colorizeProc in "token" argument
			///                 processingToken.length = readingBuffer.Length();
			///                 processingToken.token = 7;
			///                 processingToken.completeToken = false;
			///
			///                 // we need to ensure that interTokenState is not nullptr, and we can save the postfix here
			///                 if (!completeText && !processingToken.interTokenState)
			///                 {
			///                     auto state = new InterTokenState;
			///                     state->postfix = postfix;
			///                     processingToken.interTokenState = state;
			///                 }
			///             }
			///         }
			///     };
			///
			///     RegexLexer lexer(tokenDefs, proc);
			///     RegexLexerColorizer colorizer = lexer.Colorize();
			///
			///     void* lastInterTokenState = nullptr;
			///     FOREACH_INDEXER(const wchar_t*, line, index, From(lines))
			///     {
			///         Console::WriteLine(L"Begin line " + itow(index));
			///         argument.processingText = line;
			///         void* interTokenState = colorizer.Colorize(line, wcslen(line));
			///
			///         if (lastInterTokenState && lastInterTokenState != interTokenState)
			///         {
			///             // call the deleter manually
			///             proc.deleter(lastInterTokenState);
			///         }
			///         lastInterTokenState = interTokenState;
			///
			///         argument.processingText = nullptr;
			///         colorizer.Pass(L'\r');
			///         colorizer.Pass(L'\n');
			///         Console::WriteLine(L"");
			///     }
			/// }
			/// ]]></example>
			RegexTokenExtendProc						extendProc = nullptr;
			/// <summary>
			/// <p>
			/// The colorizer callback. It is called when a token is recognized.
			/// </p>
			/// <p>
			/// The first argument is <see cref="argument"/>.
			/// </p>
			/// <p>
			/// The second argument is the position of the first character of the token in characters.
			/// </p>
			/// <p>
			/// The third argument is the length of the recognized token in characters.
			/// </p>
			/// <p>
			/// The fourth character is the regular expression in the list (the first argument in the contructor of <see cref="RegexLexer"/>) that matches this token.
			/// </p>
			/// </summary>
			RegexTokenColorizeProc						colorizeProc = nullptr;
			/// <summary>
			/// The argument object that is the first argument for <see cref="extendProc"/> and <see cref="colorizeProc"/>.
			/// </summary>
			void*										argument = nullptr;
		};

		/// <summary>Token collection representing the result from the lexical analyzer. Call <see cref="RegexLexer::Parse"/> to create this object.</summary>
		/// <example><![CDATA[
		/// int main()
		/// {
		///     List<WString> tokenDefs;
		///     tokenDefs.Add(L"/d+");
		///     tokenDefs.Add(L"/w+");
		///     tokenDefs.Add(L"/s+");
		///
		///     RegexLexer lexer(tokenDefs, {});
		///     WString input = L"I have 2 books.";
		///     auto tokenResult = lexer.Parse(input);
		///
		///     FOREACH(RegexToken, token, tokenResult)
		///     {
		///         // input must be in a variable
		///         // because token.reading points to a position from input.Buffer();
		///         Console::WriteLine(itow(token.token) + L": <" + WString(token.reading, token.length) + L">");
		///     }
		/// }
		/// ]]></example>
		class RegexTokens : public Object, public collections::IEnumerable<RegexToken>
		{
			friend class RegexLexer;
		protected:
			regex_internal::PureInterpretor*			pure;
			const collections::Array<vint>&				stateTokens;
			WString										code;
			vint										codeIndex;
			RegexProc									proc;

			RegexTokens(regex_internal::PureInterpretor* _pure, const collections::Array<vint>& _stateTokens, const WString& _code, vint _codeIndex, RegexProc _proc);
		public:
			RegexTokens(const RegexTokens& tokens);
			~RegexTokens();

			collections::IEnumerator<RegexToken>*		CreateEnumerator()const;

			/// <summary>Copy all tokens.</summary>
			/// <param name="tokens">Returns all tokens.</param>
			/// <param name="discard">A callback to decide which kind of tokens to discard. The input is [F:vl.regex.RegexToken.token]. Returns true to discard this kind of tokens.</param>
			/// <example><![CDATA[
			/// int main()
			/// {
			///     List<WString> tokenDefs;
			///     tokenDefs.Add(L"/d+");
			///     tokenDefs.Add(L"/w+");
			///     tokenDefs.Add(L"/s+");
			///
			///     RegexLexer lexer(tokenDefs, {});
			///     WString input = L"I have 2 books.";
			///     auto tokenResult = lexer.Parse(input);
			///
			///     List<RegexToken> filtered;
			///     tokenResult.ReadToEnd(filtered, [](vint token) { return token < 0 || token == 2; });
			///
			///     FOREACH(RegexToken, token, tokenResult)
			///     {
			///         // input must be in a variable
			///         // because token.reading points to a position from input.Buffer();
			///         Console::WriteLine(itow(token.token) + L": <" + WString(token.reading, token.length) + L">");
			///     }
			/// }
			/// ]]></example>
			void										ReadToEnd(collections::List<RegexToken>& tokens, bool(*discard)(vint)=0)const;
		};

		/// <summary>A type for walking through a text against a <see cref="RegexLexer"/>. Call <see cref="RegexLexer::Walk"/> to create this object.</summary>
		/// <example><![CDATA[
		/// int main()
		/// {
		///     List<WString> tokenDefs;
		///     tokenDefs.Add(L"/d+./d+");
		///     tokenDefs.Add(L"/d+");
		///     tokenDefs.Add(L"/w+");
		///     tokenDefs.Add(L"/s+");
		///
		///     RegexLexer lexer(tokenDefs, {});
		///     RegexLexerWalker walker = lexer.Walk();
		///
		///     WString input = L"This book costs 2.5. That book costs 2.";
		///     const wchar_t* reading = input.Buffer();
		///
		///     const wchar_t* tokenBegin = reading;
		///     const wchar_t* tokenEnd = nullptr;
		///     vint tokenId = -1;
		///
		///     vint state = walker.GetStartState();
		///     while (*reading)
		///     {
		///         vint token = -1;
		///         bool finalState = false;
		///         bool previousTokenStop = false;
		///         walker.Walk(*reading++, state, token, finalState, previousTokenStop);
		///
		///         if (previousTokenStop || !*reading)
		///         {
		///             if (tokenEnd)
		///             {
		///                 if (tokenBegin == tokenEnd)
		///                 {
		///                     Console::WriteLine(L"Recognized token: " + itow(tokenId) + L": <" + WString(*tokenBegin) + L">");
		///                     tokenBegin = reading;
		///                     tokenEnd = nullptr;
		///                     tokenId = -1;
		///                     state = walker.GetStartState();
		///                 }
		///                 else
		///                 {
		///                     Console::WriteLine(L"Recognized token: " + itow(tokenId) + L": <" + WString(tokenBegin, tokenEnd - tokenBegin) + L">");
		///                     tokenBegin = reading = tokenEnd;
		///                     tokenEnd = nullptr;
		///                     tokenId = -1;
		///                     state = walker.GetStartState();
		///                 }
		///             }
		///             else
		///             {
		///                 Console::WriteLine(L"Unrecognized character: <" + WString(*tokenBegin) + L">");
		///                 tokenBegin++;
		///                 state = walker.GetStartState();
		///             }
		///         }
		///         else if (finalState)
		///         {
		///             tokenEnd = reading;
		///             tokenId = token;
		///         }
		///     }
		/// }
		/// ]]></example>
		class RegexLexerWalker : public Object
		{
			friend class RegexLexer;
		protected:
			regex_internal::PureInterpretor*			pure;
			const collections::Array<vint>&				stateTokens;

			RegexLexerWalker(regex_internal::PureInterpretor* _pure, const collections::Array<vint>& _stateTokens);
		public:
			RegexLexerWalker(const RegexLexerWalker& tokens);
			~RegexLexerWalker();

			/// <summary>Get the start DFA state number, which represents the correct state before parsing any input.</summary>
			/// <returns>The DFA state number.</returns>
			/// <remarks>When calling <see cref="Walk"/> for the first character, the return value should be passed to the second parameter.</remarks>
			vint										GetStartState()const;
			/// <summary>Test if this state can only lead to the end of one kind of token.</summary>
			/// <returns>Returns the token index if this state can only lead to the end of one kind of token. Returns -1 if not.</returns>
			/// <param name="state">The DFA state number.</param>
			vint										GetRelatedToken(vint state)const;
			/// <summary>Step forward by one character.</summary>
			/// <param name="input">The input character.</param>
			/// <param name="state">The current state. Returns the new current state when this function returns.</param>
			/// <param name="token">Returns the token index at the end of the token.</param>
			/// <param name="finalState">Returns true if it reach the end of the token.</param>
			/// <param name="previousTokenStop">Returns true if the previous character is the end of the token.</param>
			/// <remarks>
			/// <p>
			/// The "finalState" argument is important.
			/// When "previousTokenStop" becomes true,
			/// it tells you that this character can no longer form a token with previous consumed characters.
			/// But it does not mean that the recognized token ends at the previous token.
			/// The recognized token could end eariler,
			/// which is indiated at the last time when "finalState" becomes true.
			/// </p>
			/// <p>
			/// See the example for <see cref="RegexLexerWalker"/> about how to use this function.
			/// </p>
			/// </remarks>
			void										Walk(wchar_t input, vint& state, vint& token, bool& finalState, bool& previousTokenStop)const;
			/// <summary>Step forward by one character.</summary>
			/// <returns>Returns the new current state. It is used to walk the next character.</returns>
			/// <param name="input">The input character.</param>
			/// <param name="state">The current state.</param>
			vint										Walk(wchar_t input, vint state)const;
			/// <summary>Test if the input text is a closed token.</summary>
			/// <returns>Returns true if the input text is a closed token.</returns>
			/// <param name="input">The input text.</param>
			/// <param name="length">Size of the input text in characters.</param>
			/// <remarks>
			/// <p>
			/// A closed token means that,
			/// there is a prefix that is a recognized token.
			/// At the same time, the input string itself could not be a token, or a prefix of any token.
			/// the recognized token has ended before reaching the end of the string.
			/// </p>
			/// <p>
			/// An unrecognized token is also considered as closed.
			/// </p>
			/// <p>
			/// For example, assume we have a token defined by "/d+./d+":
			/// <ul>
			///     <li>"2" is not a closed token, because it has not ended.</li>
			///     <li>
			///         "2.5." is a closed token, because it has ended at "2.5",
			///         and "2.5." could never be a prefix of any token,
			///         unless we have another token defined by "/d+./d+./d+".
			///     </li>
			/// </ul>
			/// </p>
			/// </remarks>
			/// <example><![CDATA[
			/// int main()
			/// {
			///     List<WString> tokenDefs;
			///     tokenDefs.Add(L"/d+./d+");
			///     tokenDefs.Add(L"/d+");
			///
			///     RegexLexer lexer(tokenDefs, {});
			///     RegexLexerWalker walker = lexer.Walk();
			///
			///     WString tests[] = { L".", L"2", L"2.", L"2.5", L"2.5." };
			///     FOREACH(WString, test, From(tests))
			///     {
			///         if (walker.IsClosedToken(test.Buffer(), test.Length()))
			///         {
			///             Console::WriteLine(test + L" is a closed token.");
			///         }
			///         else
			///         {
			///             Console::WriteLine(test + L" is not a closed token.");
			///         }
			///     }
			/// }
			/// ]]></example>
			bool										IsClosedToken(const wchar_t* input, vint length)const;
			/// <summary>Test if the input is a closed token.</summary>
			/// <returns>Returns true if the input text is a closed token.</returns>
			/// <param name="input">The input text.</param>
			/// <remarks>
			/// <p>
			/// A closed token means that,
			/// there is a prefix that is a recognized token.
			/// At the same time, the input string itself could not be a token, or a prefix of any token.
			/// the recognized token has ended before reaching the end of the string.
			/// </p>
			/// <p>
			/// An unrecognized token is also considered as closed.
			/// </p>
			/// <p>
			/// For example, assume we have a token defined by "/d+./d+":
			/// <ul>
			///     <li>"2" is not a closed token, because it has not ended.</li>
			///     <li>
			///         "2.5." is a closed token, because it has ended at "2.5",
			///         and "2.5." could never be a prefix of any token,
			///         unless we have another token defined by "/d+./d+./d+".
			///     </li>
			/// </ul>
			/// </p>
			/// </remarks>
			/// <example><![CDATA[
			/// int main()
			/// {
			///     List<WString> tokenDefs;
			///     tokenDefs.Add(L"/d+./d+");
			///     tokenDefs.Add(L"/d+");
			///
			///     RegexLexer lexer(tokenDefs, {});
			///     RegexLexerWalker walker = lexer.Walk();
			///
			///     WString tests[] = { L".", L"2", L"2.", L"2.5", L"2.5." };
			///     FOREACH(WString, test, From(tests))
			///     {
			///         if (walker.IsClosedToken(test))
			///         {
			///             Console::WriteLine(test + L" is a closed token.");
			///         }
			///         else
			///         {
			///             Console::WriteLine(test + L" is not a closed token.");
			///         }
			///     }
			/// }
			/// ]]></example>
			bool										IsClosedToken(const WString& input)const;
		};

		/// <summary>Lexical colorizer. Call <see cref="RegexLexer::Colorize"/> to create this object.</summary>
		/// <example><![CDATA[
		/// int main()
		/// {
		///     List<WString> tokenDefs;
		///     tokenDefs.Add(L"/d+");
		///     tokenDefs.Add(L"[a-zA-Z_]/w*");
		///     tokenDefs.Add(L"[(){};]");
		///     tokenDefs.Add(L"/s+");
		///     tokenDefs.Add(L"///*+([^//*]|/*+[^//])*/*+//");
		///
		///     const wchar_t* lines[] = {
		///         L"/*********************",
		///         L"MAIN.CPP",
		///         L"*********************/",
		///         L"",
		///         L"int main()",
		///         L"{",
		///         L"    return 0;",
		///         L"}",
		///     };
		///
		///     struct Argument
		///     {
		///         // for a real colorizer, you can put a color buffer here.
		///         // the buffer is reused for every line of code.
		///         // but for the demo, I put the current processing text instead.
		///         // so that I am able to print what is processed.
		///         const wchar_t* processingText = nullptr;
		///     } argument;
		///
		///     RegexProc proc;
		///     proc.argument = &argument;
		///     proc.colorizeProc = [](void* argument, vint start, vint length, vint token)
		///     {
		///         // this is guaranteed by "proc.argument = &argument;"
		///         auto text = reinterpret_cast<Argument*>(argument)->processingText;
		///         Console::WriteLine(itow(token) + L": <" + WString(text + start, length) + L">");
		///     };
		///
		///     RegexLexer lexer(tokenDefs, proc);
		///     RegexLexerColorizer colorizer = lexer.Colorize();
		///
		///     FOREACH_INDEXER(const wchar_t*, line, index, From(lines))
		///     {
		///         Console::WriteLine(L"Begin line " + itow(index));
		///         argument.processingText = line;
		///         colorizer.Colorize(line, wcslen(line));
		///
		///         argument.processingText = nullptr;
		///         colorizer.Pass(L'\r');
		///         colorizer.Pass(L'\n');
		///         Console::WriteLine(L"");
		///     }
		/// }
		/// ]]></example>
		class RegexLexerColorizer : public Object
		{
			friend class RegexLexer;
		public:
			struct InternalState
			{
				vint									currentState = -1;
				vint									interTokenId = -1;
				void*									interTokenState = nullptr;
			};

		protected:
			RegexLexerWalker							walker;
			RegexProc									proc;
			InternalState								internalState;

			void										CallExtendProcAndColorizeProc(const wchar_t* input, vint length, RegexProcessingToken& token, bool colorize);
			vint										WalkOneToken(const wchar_t* input, vint length, vint start, bool colorize);

			RegexLexerColorizer(const RegexLexerWalker& _walker, RegexProc _proc);
		public:
			RegexLexerColorizer(const RegexLexerColorizer& colorizer);
			~RegexLexerColorizer();

			/// <summary>Get the internal state.</summary>
			/// <returns>The internal state.</returns>
			/// <remarks>
			/// <p>
			/// If <see cref="Colorize"/> has not been called, the return value of this function is the start state.
			/// </p>
			/// <p>
			/// If a text is multi-lined, <see cref="Colorize"/> could be called line by line, and the internal state is changed.
			/// </p>
			/// <p>
			/// In order to colorize another piece of multi-lined text,
			/// you can either save the start state and call <see cref="SetInternalState"/> to reset the state,
			/// or call <see cref="RegexLexer::Colorize"/> for a new colorizer.
			/// </p>
			/// </remarks>
			InternalState								GetInternalState();
			/// <summary>Restore the colorizer to a specified state.</summary>
			/// <param name="state">The state to restore.</param>
			void										SetInternalState(InternalState state);
			/// <summary>Step forward by one character.</summary>
			/// <param name="input">The input character.</param>
			/// <remarks>Callbacks in <see cref="RegexProc"/> will be called <b>except colorizeProc</b>, which is from the second argument of the constructor of <see cref="RegexLexer"/>.</remarks>
			void										Pass(wchar_t input);
			/// <summary>Get the start DFA state number, which represents the correct state before colorizing any characters.</summary>
			/// <returns>The DFA state number.</returns>
			vint										GetStartState()const;
			/// <summary>Colorize a text.</summary>
			/// <returns>An inter token state at the end of this line. It could be the same object to which is returned from the previous call.</returns>
			/// <param name="input">The text to colorize.</param>
			/// <param name="length">Size of the text in characters.</param>
			/// <remarks>
			/// <p>See <see cref="RegexProcessingToken::interTokenState"/> and <see cref="RegexProc::extendProc"/> for more information about the return value.</p>
			/// <p>Callbacks in <see cref="RegexProc"/> will be called, which is from the second argument of the constructor of <see cref="RegexLexer"/>.</p>
			/// </remarks>
			void*										Colorize(const wchar_t* input, vint length);
		};

		/// <summary>Lexical analyzer.</summary>
		class RegexLexer : public Object, private NotCopyable
		{
		protected:
			regex_internal::PureInterpretor*			pure = nullptr;
			collections::Array<vint>					ids;
			collections::Array<vint>					stateTokens;
			RegexProc									proc;

		public:
			/// <summary>Create a lexical analyzer by a set of regular expressions. [F:vl.regex.RegexToken.token] will be the index of the matched regular expression in the first argument.</summary>
			/// <param name="tokens">ALl regular expression, each one represent a kind of tokens.</param>
			/// <param name="_proc">Configuration of all callbacks.</param>
			RegexLexer(const collections::IEnumerable<WString>& tokens, RegexProc _proc);
			~RegexLexer();

			/// <summary>Tokenize an input text.</summary>
			/// <returns>All tokens, including recognized tokens or unrecognized tokens. For unrecognized tokens, [F:vl.regex.RegexToken.token] will be -1.</returns>
			/// <param name="code">The text to tokenize.</param>
			/// <param name="codeIndex">Extra information that will be copied to [F:vl.regex.RegexToken.codeIndex].</param>
			/// <remarks>Callbacks in <see cref="RegexProc"/> will be called when iterating through tokens, which is from the second argument of the constructor of <see cref="RegexLexer"/>.</remarks>
			RegexTokens									Parse(const WString& code, vint codeIndex=-1)const;
			/// <summary>Create a equivalence walker from this lexical analyzer. A walker enable you to walk throught characters one by one,</summary>
			/// <returns>The walker.</returns>
			RegexLexerWalker							Walk()const;
			/// <summary>Create a equivalence colorizer from this lexical analyzer.</summary>
			/// <returns>The colorizer.</returns>
			RegexLexerColorizer							Colorize()const;
		};
	}
}

#endif

/***********************************************************************
.\REGEXDATA.H
***********************************************************************/
/***********************************************************************
Author: Zihan Chen (vczh)
Licensed under https://github.com/vczh-libraries/License
***********************************************************************/

#ifndef VCZH_REGEX_REGEXDATA
#define VCZH_REGEX_REGEXDATA


namespace vl
{
	namespace regex_internal
	{

/***********************************************************************
Data Structure
***********************************************************************/

		class CharRange
		{
		public:
			typedef collections::SortedList<CharRange>		List;

			wchar_t					begin;
			wchar_t					end;

			CharRange();
			CharRange(wchar_t _begin, wchar_t _end);

			bool					operator<(CharRange item)const;
			bool					operator<=(CharRange item)const;
			bool					operator>(CharRange item)const;
			bool					operator>=(CharRange item)const;
			bool					operator==(CharRange item)const;
			bool					operator!=(CharRange item)const;

			bool					operator<(wchar_t item)const;
			bool					operator<=(wchar_t item)const;
			bool					operator>(wchar_t item)const;
			bool					operator>=(wchar_t item)const;
			bool					operator==(wchar_t item)const;
			bool					operator!=(wchar_t item)const;
		};
	}

	template<>
	struct POD<regex_internal::CharRange>
	{
		static const bool Result=true;
	};
}

#endif

/***********************************************************************
.\REGEXAUTOMATON.H
***********************************************************************/
/***********************************************************************
Author: Zihan Chen (vczh)
Licensed under https://github.com/vczh-libraries/License
***********************************************************************/

#ifndef VCZH_REGEX_REGEXAUTOMATON
#define VCZH_REGEX_REGEXAUTOMATON


namespace vl
{
	namespace regex_internal
	{
		class State;
		class Transition;

		class Transition
		{
		public:
			enum Type
			{
				Chars,				// Character range transition
				Epsilon,
				BeginString,
				EndString,
				Nop,				// Non-epsilon transition with no input
				Capture,			// Begin capture transition
				Match,				// Capture matching transition
				Positive,			// Begin positive lookahead
				Negative,			// Begin negative lookahead
				NegativeFail,		// Negative lookahead failure
				End					// For Capture, Position, Negative
			};

			State*								source;
			State*								target;
			CharRange							range;
			Type								type;
			vint								capture;
			vint								index;
		};

		class State
		{
		public:
			collections::List<Transition*>		transitions;
			collections::List<Transition*>		inputs;
			bool								finalState;
			void*								userData;
		};

		class Automaton
		{
		public:
			typedef Ptr<Automaton>		Ref;

			collections::List<Ptr<State>>		states;
			collections::List<Ptr<Transition>>	transitions;
			collections::List<WString>			captureNames;
			State*								startState;

			Automaton();

			State*								NewState();
			Transition*							NewTransition(State* start, State* end);
			Transition*							NewChars(State* start, State* end, CharRange range);
			Transition*							NewEpsilon(State* start, State* end);
			Transition*							NewBeginString(State* start, State* end);
			Transition*							NewEndString(State* start, State* end);
			Transition*							NewNop(State* start, State* end);
			Transition*							NewCapture(State* start, State* end, vint capture);
			Transition*							NewMatch(State* start, State* end, vint capture, vint index=-1);
			Transition*							NewPositive(State* start, State* end);
			Transition*							NewNegative(State* start, State* end);
			Transition*							NewNegativeFail(State* start, State* end);
			Transition*							NewEnd(State* start, State* end);
		};

		extern bool								PureEpsilonChecker(Transition* transition);
		extern bool								RichEpsilonChecker(Transition* transition);
		extern bool								AreEqual(Transition* transA, Transition* transB);
		extern Automaton::Ref					EpsilonNfaToNfa(Automaton::Ref source, bool(*epsilonChecker)(Transition*), collections::Dictionary<State*, State*>& nfaStateMap);
		extern Automaton::Ref					NfaToDfa(Automaton::Ref source, collections::Group<State*, State*>& dfaStateMap);
	}
}

#endif

/***********************************************************************
.\REGEXEXPRESSION.H
***********************************************************************/
/***********************************************************************
Author: Zihan Chen (vczh)
Licensed under https://github.com/vczh-libraries/License

Classes:
	Expression						: Base class of expressions	|
	CharSetExpression				: Character set				| a, [a-b], [^a-b0_9], \.rnt\/()+*?{}[]<>^$!=SsDdLlWw, [\rnt-[]\/^$]
	LoopExpression					: Repeat					| a{3}, a{3,}, a{1,3}, a+, a*, a?, LOOP?
	SequenceExpression				: Sequence of two regex		| ab
	AlternateExpression				: Alternative of two regex	| a|b
	BeginExpression					: <Rich> String begin		| ^
	EndExpression					: <Rich> String end			| $
	CaptureExpression				: <Rich> Capture			| (<name>expr), (?expr)
	MatchExpression					: <Rich> Capture matching	| (<$name>), (<$name;i>), (<$i>)
	PositiveExpression				: <Rich> Positive lookahead	| (=expr)
	NegativeExpression				: <Rich> Negative lookahead	| (!expr)
	UsingExpression					: refer a regex				| (<#name1>expr)...(<&name1>)...

	RegexExpression					: Regular Expression

Functions:
	ParseRegexExpression			: Regex Syntax Analyzer
***********************************************************************/

#ifndef VCZH_REGEX_REGEXEXPRESSION
#define VCZH_REGEX_REGEXEXPRESSION


namespace vl
{
	namespace regex_internal
	{
		class IRegexExpressionAlgorithm;

/***********************************************************************
Regex Expression AST
***********************************************************************/

		class Expression : public Object, private NotCopyable
		{
		public:
			typedef Ptr<Expression>											Ref;
			typedef collections::Dictionary<WString, Expression::Ref>		Map;

			virtual void				Apply(IRegexExpressionAlgorithm& algorithm)=0;
			bool						IsEqual(Expression* expression);
			bool						HasNoExtension();
			bool						CanTreatAsPure();
			void						NormalizeCharSet(CharRange::List& subsets);
			void						CollectCharSet(CharRange::List& subsets);
			void						ApplyCharSet(CharRange::List& subsets);
			Automaton::Ref				GenerateEpsilonNfa();
		};

		class CharSetExpression : public Expression
		{
		public:
			CharRange::List				ranges;
			bool						reverse;

			bool						AddRangeWithConflict(CharRange range);
			void						Apply(IRegexExpressionAlgorithm& algorithm);
		};

		class LoopExpression : public Expression
		{
		public:
			Expression::Ref				expression;		// The regex to loop
			vint						min;			// Minimum count of looping
			vint						max;			// Maximum count of looping, -1 for infinite
			bool						preferLong;		// Prefer longer matching

			void						Apply(IRegexExpressionAlgorithm& algorithm);
		};

		class SequenceExpression : public Expression
		{
		public:
			Expression::Ref				left;			// First regex to match
			Expression::Ref				right;			// Last regex to match

			void						Apply(IRegexExpressionAlgorithm& algorithm);
		};

		class AlternateExpression : public Expression
		{
		public:
			Expression::Ref				left;			// First regex to match
			Expression::Ref				right;			// Last regex to match

			void						Apply(IRegexExpressionAlgorithm& algorithm);
		};

		class BeginExpression: public Expression
		{
		public:

			void						Apply(IRegexExpressionAlgorithm& algorithm);
		};

		class EndExpression : public Expression
		{
		public:

			void						Apply(IRegexExpressionAlgorithm& algorithm);
		};

		class CaptureExpression : public Expression
		{
		public:
			WString						name;			// Capture name, empty for anonymous capture
			Expression::Ref				expression;		// Regex to match

			void						Apply(IRegexExpressionAlgorithm& algorithm);
		};

		class MatchExpression : public Expression
		{
		public:
			WString						name;			// Capture name, empty for anonymous
			vint						index;			// The index of captured text to match associated the name, -1 for all of them

			void						Apply(IRegexExpressionAlgorithm& algorithm);
		};

		class PositiveExpression : public Expression
		{
		public:
			Expression::Ref				expression;		// Regex to match

			void						Apply(IRegexExpressionAlgorithm& algorithm);
		};

		class NegativeExpression : public Expression
		{
		public:
			Expression::Ref				expression;		// Regex to match

			void						Apply(IRegexExpressionAlgorithm& algorithm);
		};

		class UsingExpression : public Expression
		{
		public:
			WString						name;			// Name of the regex to refer

			void						Apply(IRegexExpressionAlgorithm& algorithm);
		};

		class RegexExpression : public Object, private NotCopyable
		{
		public:
			typedef Ptr<RegexExpression>						Ref;

			Expression::Map				definitions;	// Named regex to be referred
			Expression::Ref				expression;		// Regex to match

			Expression::Ref				Merge();
		};

/***********************************************************************
Visitor
***********************************************************************/

		class IRegexExpressionAlgorithm : public Interface
		{
		public:
			virtual void				Visit(CharSetExpression* expression)=0;
			virtual void				Visit(LoopExpression* expression)=0;
			virtual void				Visit(SequenceExpression* expression)=0;
			virtual void				Visit(AlternateExpression* expression)=0;
			virtual void				Visit(BeginExpression* expression)=0;
			virtual void				Visit(EndExpression* expression)=0;
			virtual void				Visit(CaptureExpression* expression)=0;
			virtual void				Visit(MatchExpression* expression)=0;
			virtual void				Visit(PositiveExpression* expression)=0;
			virtual void				Visit(NegativeExpression* expression)=0;
			virtual void				Visit(UsingExpression* expression)=0;
		};

		template<typename ReturnType, typename ParameterType=void*>
		class RegexExpressionAlgorithm : public Object, public IRegexExpressionAlgorithm
		{
		private:
			ReturnType					returnValue;
			ParameterType*				parameterValue;
		public:

			ReturnType Invoke(Expression* expression, ParameterType parameter)
			{
				parameterValue=&parameter;
				expression->Apply(*this);
				return returnValue;
			}

			ReturnType Invoke(Expression::Ref expression, ParameterType parameter)
			{
				parameterValue=&parameter;
				expression->Apply(*this);
				return returnValue;
			}

			virtual ReturnType			Apply(CharSetExpression* expression, ParameterType parameter)=0;
			virtual ReturnType			Apply(LoopExpression* expression, ParameterType parameter)=0;
			virtual ReturnType			Apply(SequenceExpression* expression, ParameterType parameter)=0;
			virtual ReturnType			Apply(AlternateExpression* expression, ParameterType parameter)=0;
			virtual ReturnType			Apply(BeginExpression* expression, ParameterType parameter)=0;
			virtual ReturnType			Apply(EndExpression* expression, ParameterType parameter)=0;
			virtual ReturnType			Apply(CaptureExpression* expression, ParameterType parameter)=0;
			virtual ReturnType			Apply(MatchExpression* expression, ParameterType parameter)=0;
			virtual ReturnType			Apply(PositiveExpression* expression, ParameterType parameter)=0;
			virtual ReturnType			Apply(NegativeExpression* expression, ParameterType parameter)=0;
			virtual ReturnType			Apply(UsingExpression* expression, ParameterType parameter)=0;
		public:
			void Visit(CharSetExpression* expression)
			{
				returnValue=Apply(expression, *parameterValue);
			}

			void Visit(LoopExpression* expression)
			{
				returnValue=Apply(expression, *parameterValue);
			}

			void Visit(SequenceExpression* expression)
			{
				returnValue=Apply(expression, *parameterValue);
			}

			void Visit(AlternateExpression* expression)
			{
				returnValue=Apply(expression, *parameterValue);
			}

			void Visit(BeginExpression* expression)
			{
				returnValue=Apply(expression, *parameterValue);
			}

			void Visit(EndExpression* expression)
			{
				returnValue=Apply(expression, *parameterValue);
			}

			void Visit(CaptureExpression* expression)
			{
				returnValue=Apply(expression, *parameterValue);
			}

			void Visit(MatchExpression* expression)
			{
				returnValue=Apply(expression, *parameterValue);
			}

			void Visit(PositiveExpression* expression)
			{
				returnValue=Apply(expression, *parameterValue);
			}

			void Visit(NegativeExpression* expression)
			{
				returnValue=Apply(expression, *parameterValue);
			}

			void Visit(UsingExpression* expression)
			{
				returnValue=Apply(expression, *parameterValue);
			}
		};

		template<typename ParameterType>
		class RegexExpressionAlgorithm<void, ParameterType> : public Object, public IRegexExpressionAlgorithm
		{
		private:
			ParameterType*				parameterValue;
		public:

			void Invoke(Expression* expression, ParameterType parameter)
			{
				parameterValue=&parameter;
				expression->Apply(*this);
			}

			void Invoke(Expression::Ref expression, ParameterType parameter)
			{
				parameterValue=&parameter;
				expression->Apply(*this);
			}

			virtual void				Apply(CharSetExpression* expression, ParameterType parameter)=0;
			virtual void				Apply(LoopExpression* expression, ParameterType parameter)=0;
			virtual void				Apply(SequenceExpression* expression, ParameterType parameter)=0;
			virtual void				Apply(AlternateExpression* expression, ParameterType parameter)=0;
			virtual void				Apply(BeginExpression* expression, ParameterType parameter)=0;
			virtual void				Apply(EndExpression* expression, ParameterType parameter)=0;
			virtual void				Apply(CaptureExpression* expression, ParameterType parameter)=0;
			virtual void				Apply(MatchExpression* expression, ParameterType parameter)=0;
			virtual void				Apply(PositiveExpression* expression, ParameterType parameter)=0;
			virtual void				Apply(NegativeExpression* expression, ParameterType parameter)=0;
			virtual void				Apply(UsingExpression* expression, ParameterType parameter)=0;
		public:
			void Visit(CharSetExpression* expression)
			{
				Apply(expression, *parameterValue);
			}

			void Visit(LoopExpression* expression)
			{
				Apply(expression, *parameterValue);
			}

			void Visit(SequenceExpression* expression)
			{
				Apply(expression, *parameterValue);
			}

			void Visit(AlternateExpression* expression)
			{
				Apply(expression, *parameterValue);
			}

			void Visit(BeginExpression* expression)
			{
				Apply(expression, *parameterValue);
			}

			void Visit(EndExpression* expression)
			{
				Apply(expression, *parameterValue);
			}

			void Visit(CaptureExpression* expression)
			{
				Apply(expression, *parameterValue);
			}

			void Visit(MatchExpression* expression)
			{
				Apply(expression, *parameterValue);
			}

			void Visit(PositiveExpression* expression)
			{
				Apply(expression, *parameterValue);
			}

			void Visit(NegativeExpression* expression)
			{
				Apply(expression, *parameterValue);
			}

			void Visit(UsingExpression* expression)
			{
				Apply(expression, *parameterValue);
			}
		};

/***********************************************************************
Helper Functions
***********************************************************************/

		extern Ptr<LoopExpression>		ParseLoop(const wchar_t*& input);
		extern Ptr<Expression>			ParseCharSet(const wchar_t*& input);
		extern Ptr<Expression>			ParseFunction(const wchar_t*& input);
		extern Ptr<Expression>			ParseUnit(const wchar_t*& input);
		extern Ptr<Expression>			ParseJoin(const wchar_t*& input);
		extern Ptr<Expression>			ParseAlt(const wchar_t*& input);
		extern Ptr<Expression>			ParseExpression(const wchar_t*& input);
		extern RegexExpression::Ref		ParseRegexExpression(const WString& code);

		extern WString					EscapeTextForRegex(const WString& literalString);
		extern WString					UnescapeTextForRegex(const WString& escapedText);
		extern WString					NormalizeEscapedTextForRegex(const WString& escapedText);
		extern bool						IsRegexEscapedLiteralString(const WString& regex);
	}
}

#endif

/***********************************************************************
.\REGEXPURE.H
***********************************************************************/
/***********************************************************************
Author: Zihan Chen (vczh)
Licensed under https://github.com/vczh-libraries/License
***********************************************************************/

#ifndef VCZH_REGEX_REGEXPURE
#define VCZH_REGEX_REGEXPURE


namespace vl
{
	namespace regex_internal
	{
		class PureResult
		{
		public:
			vint				start;
			vint				length;
			vint				finalState;
			vint				terminateState;
		};

		class PureInterpretor : public Object
		{
		protected:
#if defined VCZH_MSVC
			static const vint	SupportedCharCount = 0x10000;		// UTF-16
#elif defined VCZH_GCC
			static const vint	SupportedCharCount = 0x110000;		// UTF-32
#endif

			vint				charMap[SupportedCharCount];		// char -> char set index
			vint**				transition;							// (state * char set index) -> state*
			bool*				finalState;							// state -> bool
			vint*				relatedFinalState;					// sate -> (finalState or -1)
			vint				stateCount;
			vint				charSetCount;
			vint				startState;
		public:
			PureInterpretor(Automaton::Ref dfa, CharRange::List& subsets);
			~PureInterpretor();

			bool				MatchHead(const wchar_t* input, const wchar_t* start, PureResult& result);
			bool				Match(const wchar_t* input, const wchar_t* start, PureResult& result);

			vint				GetStartState();
			vint				Transit(wchar_t input, vint state);
			bool				IsFinalState(vint state);
			bool				IsDeadState(vint state);

			void				PrepareForRelatedFinalStateTable();
			vint				GetRelatedFinalState(vint state);
		};
	}
}

#endif

/***********************************************************************
.\REGEXRICH.H
***********************************************************************/
/***********************************************************************
Author: Zihan Chen (vczh)
Licensed under https://github.com/vczh-libraries/License
***********************************************************************/

#ifndef VCZH_REGEX_REGEXRICH
#define VCZH_REGEX_REGEXRICH


namespace vl
{
	namespace regex_internal
	{
		class CaptureRecord
		{
		public:
			vint								capture;
			vint								start;
			vint								length;

			bool								operator==(const CaptureRecord& record)const;
		};
	}

	template<>
	struct POD<regex_internal::CaptureRecord>
	{
		static const bool Result=true;
	};

	namespace regex_internal
	{
		class RichResult
		{
		public:
			vint								start;
			vint								length;
			collections::List<CaptureRecord>	captures;
		};

		class RichInterpretor : public Object
		{
		public:
		protected:
			class UserData
			{
			public:
				bool							NeedKeepState;
			};

			Automaton::Ref						dfa;
			UserData*							datas;
		public:
			RichInterpretor(Automaton::Ref _dfa);
			~RichInterpretor();

			bool								MatchHead(const wchar_t* input, const wchar_t* start, RichResult& result);
			bool								Match(const wchar_t* input, const wchar_t* start, RichResult& result);
			const collections::List<WString>&	CaptureNames();
		};
	};
}

#endif

/***********************************************************************
.\REGEXWRITER.H
***********************************************************************/
/***********************************************************************
Author: Zihan Chen (vczh)
Licensed under https://github.com/vczh-libraries/License
***********************************************************************/

#ifndef VCZH_REGEX_REGEXWRITER
#define VCZH_REGEX_REGEXWRITER


namespace vl
{
	namespace regex
	{
		class RegexNode : public Object
		{
		public:
			vl::regex_internal::Expression::Ref		expression;

			RegexNode(vl::regex_internal::Expression::Ref _expression);

			RegexNode					Some()const;
			RegexNode					Any()const;
			RegexNode					Opt()const;
			RegexNode					Loop(vint min, vint max)const;
			RegexNode					AtLeast(vint min)const;
			RegexNode					operator+(const RegexNode& node)const;
			RegexNode					operator|(const RegexNode& node)const;
			RegexNode					operator+()const;
			RegexNode					operator-()const;
			RegexNode					operator!()const;
			RegexNode					operator%(const RegexNode& node)const;
		};

		extern RegexNode				rCapture(const WString& name, const RegexNode& node);
		extern RegexNode				rUsing(const WString& name);
		extern RegexNode				rMatch(const WString& name, vint index=-1);
		extern RegexNode				rMatch(vint index);
		extern RegexNode				rBegin();
		extern RegexNode				rEnd();
		extern RegexNode				rC(wchar_t a, wchar_t b=L'\0');
		extern RegexNode				r_d();
		extern RegexNode				r_l();
		extern RegexNode				r_w();
		extern RegexNode				rAnyChar();
	}
}

#endif