GacUI/Import/VlppRegex.cpp

/***********************************************************************
THIS FILE IS AUTOMATICALLY GENERATED. DO NOT MODIFY
DEVELOPER: Zihan Chen(vczh)
***********************************************************************/
#include "VlppRegex.h"

/***********************************************************************
.\REGEX.CPP
***********************************************************************/
/***********************************************************************
Author: Zihan Chen (vczh)
Licensed under https://github.com/vczh-libraries/License
***********************************************************************/


namespace vl
{
	namespace regex_internal
	{
		void ReadInt(stream::IStream& inputStream, vint& value);
		void ReadInts(stream::IStream& inputStream, vint count, vint* values);
		void WriteInt(stream::IStream& outputStream, vint value);
		void WriteInts(stream::IStream& outputStream, vint count, vint* values);
	}

	namespace regex
	{
		using namespace collections;
		using namespace regex_internal;

/***********************************************************************
String Conversion
***********************************************************************/

		template<typename T>
		struct U32;

		template<>
		struct U32<wchar_t>
		{
			static constexpr U32String(*ToU32)(const WString&) = &wtou32;
			static constexpr WString(*FromU32)(const U32String&) = &u32tow;
		};

		template<>
		struct U32<char8_t>
		{
			static constexpr U32String(*ToU32)(const U8String&) = &u8tou32;
			static constexpr U8String(*FromU32)(const U32String&) = &u32tou8;
		};

		template<>
		struct U32<char16_t>
		{
			static constexpr U32String(*ToU32)(const U16String&) = &u16tou32;
			static constexpr U16String(*FromU32)(const U32String&) = &u32tou16;
		};

		template<>
		struct U32<char32_t>
		{
			static U32String ToU32(const U32String& text) { return text; }
			static U32String FromU32(const U32String& text) { return text; }
		};

/***********************************************************************
RegexMatch_<T>
***********************************************************************/

		template<typename T>
		RegexMatch_<T>::RegexMatch_(const ObjectString<T>& _string, PureResult* _result)
			:success(true)
			, result(_string, _result->start, _result->length)
		{
		}

		template<typename T>
		RegexMatch_<T>::RegexMatch_(const ObjectString<T>& _string, RichResult* _result)
			: success(true)
			, result(_string, _result->start, _result->length)
		{
			// TODO: (enumerable) foreach
			for (vint i = 0; i < _result->captures.Count(); i++)
			{
				CaptureRecord& capture = _result->captures[i];
				if (capture.capture == -1)
				{
					captures.Add(RegexString_<T>(_string, capture.start, capture.length));
				}
				else
				{
					groups.Add(capture.capture, RegexString_<T>(_string, capture.start, capture.length));
				}
			}
		}

		template<typename T>
		RegexMatch_<T>::RegexMatch_(const RegexString_<T>& _result)
			:success(false)
			, result(_result)
		{
		}

		template<typename T>
		bool RegexMatch_<T>::Success()const
		{
			return success;
		}

		template<typename T>
		const RegexString_<T>& RegexMatch_<T>::Result()const
		{
			return result;
		}

		template<typename T>
		const typename RegexMatch_<T>::CaptureList& RegexMatch_<T>::Captures()const
		{
			return captures;
		}

		template<typename T>
		const typename RegexMatch_<T>::CaptureGroup& RegexMatch_<T>::Groups()const
		{
			return groups;
		}

/***********************************************************************
RegexBase_
***********************************************************************/

		template<typename T>
		void RegexBase_::Process(const ObjectString<T>& text, bool keepEmpty, bool keepSuccess, bool keepFail, typename RegexMatch_<T>::List& matches)const
		{
			if (rich)
			{
				const T* start = text.Buffer();
				const T* input = start;
				RichResult result;
				while (rich->Match(input, start, result))
				{
					vint offset = input - start;
					if (keepFail)
					{
						if (result.start > offset || keepEmpty)
						{
							matches.Add(Ptr(new RegexMatch_<T>(RegexString_<T>(text, offset, result.start - offset))));
						}
					}
					if (keepSuccess)
					{
						matches.Add(Ptr(new RegexMatch_<T>(text, &result)));
					}
					input = start + result.start + result.length;
				}
				if (keepFail)
				{
					vint remain = input - start;
					vint length = text.Length() - remain;
					if (length || keepEmpty)
					{
						matches.Add(Ptr(new RegexMatch_<T>(RegexString_<T>(text, remain, length))));
					}
				}
			}
			else
			{
				const T* start = text.Buffer();
				const T* input = start;
				PureResult result;
				while (pure->Match(input, start, result))
				{
					vint offset = input - start;
					if (keepFail)
					{
						if (result.start > offset || keepEmpty)
						{
							matches.Add(Ptr(new RegexMatch_<T>(RegexString_<T>(text, offset, result.start - offset))));
						}
					}
					if (keepSuccess)
					{
						matches.Add(Ptr(new RegexMatch_<T>(text, &result)));
					}
					input = start + result.start + result.length;
				}
				if (keepFail)
				{
					vint remain = input - start;
					vint length = text.Length() - remain;
					if (length || keepEmpty)
					{
						matches.Add(Ptr(new RegexMatch_<T>(RegexString_<T>(text, remain, length))));
					}
				}
			}
		}

		RegexBase_::~RegexBase_()
		{
			if (pure) delete pure;
			if (rich) delete rich;
		}

		template<typename T>
		typename RegexMatch_<T>::Ref RegexBase_::MatchHead(const ObjectString<T>& text)const
		{
			if (rich)
			{
				RichResult result;
				if (rich->MatchHead(text.Buffer(), text.Buffer(), result))
				{
					return Ptr(new RegexMatch_<T>(text, &result));
				}
				else
				{
					return nullptr;
				}
			}
			else
			{
				PureResult result;
				if (pure->MatchHead(text.Buffer(), text.Buffer(), result))
				{
					return Ptr(new RegexMatch_<T>(text, &result));
				}
				else
				{
					return nullptr;
				}
			}
		}

		template<typename T>
		typename RegexMatch_<T>::Ref RegexBase_::Match(const ObjectString<T>& text)const
		{
			if (rich)
			{
				RichResult result;
				if (rich->Match(text.Buffer(), text.Buffer(), result))
				{
					return Ptr(new RegexMatch_<T>(text, &result));
				}
				else
				{
					return nullptr;
				}
			}
			else
			{
				PureResult result;
				if (pure->Match(text.Buffer(), text.Buffer(), result))
				{
					return Ptr(new RegexMatch_<T>(text, &result));
				}
				else
				{
					return nullptr;
				}
			}
		}

		template<typename T>
		bool RegexBase_::TestHead(const ObjectString<T>& text)const
		{
			if (pure)
			{
				PureResult result;
				return pure->MatchHead(text.Buffer(), text.Buffer(), result);
			}
			else
			{
				RichResult result;
				return rich->MatchHead(text.Buffer(), text.Buffer(), result);
			}
		}

		template<typename T>
		bool RegexBase_::Test(const ObjectString<T>& text)const
		{
			if (pure)
			{
				PureResult result;
				return pure->Match(text.Buffer(), text.Buffer(), result);
			}
			else
			{
				RichResult result;
				return rich->Match(text.Buffer(), text.Buffer(), result);
			}
		}

		template<typename T>
		void RegexBase_::Search(const ObjectString<T>& text, typename RegexMatch_<T>::List& matches)const
		{
			Process(text, false, true, false, matches);
		}

		template<typename T>
		void RegexBase_::Split(const ObjectString<T>& text, bool keepEmptyMatch, typename RegexMatch_<T>::List& matches)const
		{
			Process(text, keepEmptyMatch, false, true, matches);
		}

		template<typename T>
		void RegexBase_::Cut(const ObjectString<T>& text, bool keepEmptyMatch, typename RegexMatch_<T>::List& matches)const
		{
			Process(text, keepEmptyMatch, true, true, matches);
		}

/***********************************************************************
Regex_<T>
***********************************************************************/

		template<typename T>
		Regex_<T>::Regex_(const ObjectString<T>& code, bool preferPure)
		{
			CharRange::List subsets;
			auto regex = ParseRegexExpression(U32<T>::ToU32(code));
			auto expression = regex->Merge();
			expression->NormalizeCharSet(subsets);

			bool pureRequired = false;
			bool richRequired = false;
			if (preferPure)
			{
				if (expression->HasNoExtension())
				{
					pureRequired = true;
				}
				else
				{
					if (expression->CanTreatAsPure())
					{
						pureRequired = true;
						richRequired = true;
					}
					else
					{
						richRequired = true;
					}
				}
			}
			else
			{
				richRequired = true;
			}

			try
			{
				if (pureRequired)
				{
					Dictionary<State*, State*> nfaStateMap;
					Group<State*, State*> dfaStateMap;
					Ptr<Automaton> eNfa = expression->GenerateEpsilonNfa();
					Ptr<Automaton> nfa = EpsilonNfaToNfa(eNfa, PureEpsilonChecker, nfaStateMap);
					Ptr<Automaton> dfa = NfaToDfa(nfa, dfaStateMap);
					pure = new PureInterpretor(dfa, subsets);
				}
				if (richRequired)
				{
					Dictionary<State*, State*> nfaStateMap;
					Group<State*, State*> dfaStateMap;
					Ptr<Automaton> eNfa = expression->GenerateEpsilonNfa();
					Ptr<Automaton> nfa = EpsilonNfaToNfa(eNfa, RichEpsilonChecker, nfaStateMap);
					Ptr<Automaton> dfa = NfaToDfa(nfa, dfaStateMap);
					rich = new RichInterpretor(dfa);

					for (auto&& name : rich->CaptureNames())
					{
						captureNames.Add(U32<T>::FromU32(name));
					}
				}
			}
			catch (...)
			{
				if (pure)delete pure;
				if (rich)delete rich;
				throw;
			}
		}

/***********************************************************************
RegexTokens_<T>
***********************************************************************/

		template<typename T>
		class RegexTokenEnumerator : public Object, public IEnumerator<RegexToken_<T>>
		{
		protected:
			RegexToken_<T>			token;
			vint					index = -1;

			PureInterpretor*		pure;
			const Array<vint>&		stateTokens;
			const T*				start;
			vint					codeIndex;
			RegexProc_<T>			proc;

			const T*				reading;
			vint					rowStart = 0;
			vint					columnStart = 0;
			bool					cacheAvailable = false;
			RegexToken_<T>			cacheToken;

		public:
			RegexTokenEnumerator(const RegexTokenEnumerator& enumerator)
				: token(enumerator.token)
				, index(enumerator.index)
				, pure(enumerator.pure)
				, stateTokens(enumerator.stateTokens)
				, start(enumerator.start)
				, codeIndex(enumerator.codeIndex)
				, proc(enumerator.proc)
				, reading(enumerator.reading)
				, rowStart(enumerator.rowStart)
				, columnStart(enumerator.columnStart)
				, cacheAvailable(enumerator.cacheAvailable)
				, cacheToken(enumerator.cacheToken)
			{
			}

			RegexTokenEnumerator(PureInterpretor* _pure, const Array<vint>& _stateTokens, const T* _start, vint _codeIndex, RegexProc_<T> _proc)
				:index(-1)
				, pure(_pure)
				, stateTokens(_stateTokens)
				, start(_start)
				, codeIndex(_codeIndex)
				, proc(_proc)
				, reading(_start)
			{
			}

			IEnumerator<RegexToken_<T>>* Clone()const
			{
				return new RegexTokenEnumerator<T>(*this);
			}

			const RegexToken_<T>& Current()const
			{
				return token;
			}

			vint Index()const
			{
				return index;
			}

			bool Next()
			{
				if (!cacheAvailable && !*reading) return false;
				if (cacheAvailable)
				{
					token = cacheToken;
					cacheAvailable = false;
				}
				else
				{
					token.reading = reading;
					token.start = 0;
					token.length = 0;
					token.token = -2;
					token.completeToken = true;
				}

				token.rowStart = rowStart;
				token.columnStart = columnStart;
				token.rowEnd = rowStart;
				token.columnEnd = columnStart;
				token.codeIndex = codeIndex;

				PureResult result;
				while (*reading)
				{
					vint id = -1;
					bool completeToken = true;
					if (!pure->MatchHead(reading, start, result))
					{
						result.start = reading - start;

						if (id == -1 && result.terminateState != -1)
						{
							vint state = pure->GetRelatedFinalState(result.terminateState);
							if (state != -1)
							{
								id = stateTokens[state];
							}
						}

						if (id == -1)
						{
							result.length = 1;
						}
						else
						{
							completeToken = false;
						}
					}
					else
					{
						id = stateTokens.Get(result.finalState);
					}

					if (id != -1 && proc.extendProc)
					{
						RegexProcessingToken token(result.start, result.length, id, completeToken, nullptr);
						proc.extendProc(proc.argument, reading, -1, true, token);
#if _DEBUG
						CHECK_ERROR(token.interTokenState == nullptr, L"RegexTokenEnumerator::Next()#The extendProc is only allowed to create interTokenState in RegexLexerColorizer.");
#endif
						result.length = token.length;
						id = token.token;
						completeToken = token.completeToken;
					}

					if (token.token == -2)
					{
						token.start = result.start;
						token.length = result.length;
						token.token = id;
						token.completeToken = completeToken;
					}
					else if (token.token == id && id == -1)
					{
						token.length += result.length;
					}
					else
					{
						cacheAvailable = true;
						cacheToken.reading = reading;
						cacheToken.start = result.start;
						cacheToken.length = result.length;
						cacheToken.codeIndex = codeIndex;
						cacheToken.token = id;
						cacheToken.completeToken = completeToken;
					}
					reading += result.length;

					if (cacheAvailable)
					{
						break;
					}
				}

				index++;

				for (vint i = 0; i < token.length; i++)
				{
					token.rowEnd = rowStart;
					token.columnEnd = columnStart;
					if (token.reading[i] == L'\n')
					{
						rowStart++;
						columnStart = 0;
					}
					else
					{
						columnStart++;
					}
				}
				return true;
			}

			void Reset()
			{
				index = -1;
				reading = start;
				cacheAvailable = false;
			}

			void ReadToEnd(List<RegexToken_<T>>& tokens, bool(*discard)(vint))
			{
				while (Next())
				{
					if (!discard(token.token))
					{
						tokens.Add(token);
					}
				}
			}
		};

		template<typename T>
		RegexTokens_<T>::RegexTokens_(PureInterpretor* _pure, const Array<vint>& _stateTokens, const ObjectString<T>& _code, vint _codeIndex, RegexProc_<T> _proc)
			:pure(_pure)
			, stateTokens(_stateTokens)
			, code(_code)
			, codeIndex(_codeIndex)
			, proc(_proc)
		{
		}

		template<typename T>
		RegexTokens_<T>::RegexTokens_(const RegexTokens_<T>& tokens)
			:pure(tokens.pure)
			, stateTokens(tokens.stateTokens)
			, code(tokens.code)
			, codeIndex(tokens.codeIndex)
			, proc(tokens.proc)
		{
		}

		template<typename T>
		IEnumerator<RegexToken_<T>>* RegexTokens_<T>::CreateEnumerator() const
		{
			return new RegexTokenEnumerator<T>(pure, stateTokens, code.Buffer(), codeIndex, proc);
		}

		bool DefaultDiscard(vint token)
		{
			return false;
		}

		template<typename T>
		void RegexTokens_<T>::ReadToEnd(collections::List<RegexToken_<T>>& tokens, bool(*discard)(vint))const
		{
			if (discard == 0)
			{
				discard = &DefaultDiscard;
			}
			RegexTokenEnumerator<T>(pure, stateTokens, code.Buffer(), codeIndex, proc).ReadToEnd(tokens, discard);
		}

/***********************************************************************
RegexLexerWalker_<T>
***********************************************************************/

		template<typename T>
		RegexLexerWalker_<T>::RegexLexerWalker_(PureInterpretor* _pure, const Array<vint>& _stateTokens)
			:pure(_pure)
			, stateTokens(_stateTokens)
		{
		}

		template<typename T>
		RegexLexerWalker_<T>::RegexLexerWalker_(const RegexLexerWalker_<T>& tokens)
			: pure(tokens.pure)
			, stateTokens(tokens.stateTokens)
		{
		}

		template<typename T>
		vint RegexLexerWalker_<T>::GetStartState()const
		{
			return pure->GetStartState();
		}

		template<typename T>
		vint RegexLexerWalker_<T>::GetRelatedToken(vint state)const
		{
			vint finalState = state == -1 ? -1 : pure->GetRelatedFinalState(state);
			return finalState == -1 ? -1 : stateTokens.Get(finalState);
		}

		template<typename T>
		void RegexLexerWalker_<T>::Walk(T input, vint& state, vint& token, bool& finalState, bool& previousTokenStop)const
		{
			vint previousState = state;
			token = -1;
			finalState = false;
			previousTokenStop = false;
			if (state == -1)
			{
				state = pure->GetStartState();
				previousTokenStop = true;
			}

			state = pure->Transit(input, state);
			if (state == -1)
			{
				previousTokenStop = true;
				if (previousState == -1)
				{
					finalState = true;
					return;
				}
				else if (pure->IsFinalState(previousState))
				{
					state = pure->Transit(input, pure->GetStartState());
				}
			}
			if (pure->IsFinalState(state))
			{
				token = stateTokens.Get(state);
				finalState = true;
				return;
			}
			else
			{
				finalState = state == -1;
				return;
			}
		}

		template<typename T>
		vint RegexLexerWalker_<T>::Walk(T input, vint state)const
		{
			vint token = -1;
			bool finalState = false;
			bool previousTokenStop = false;
			Walk(input, state, token, finalState, previousTokenStop);
			return state;
		}

		template<typename T>
		bool RegexLexerWalker_<T>::IsClosedToken(const T* input, vint length)const
		{
			vint state = pure->GetStartState();
			for (vint i = 0; i < length; i++)
			{
				state = pure->Transit(input[i], state);
				if (state == -1) return true;
				if (pure->IsDeadState(state)) return true;
			}
			return false;
		}

		template<typename T>
		bool RegexLexerWalker_<T>::IsClosedToken(const ObjectString<T>& input)const
		{
			return IsClosedToken(input.Buffer(), input.Length());
		}

/***********************************************************************
RegexLexerColorizer_<T>
***********************************************************************/

		template<typename T>
		RegexLexerColorizer_<T>::RegexLexerColorizer_(const RegexLexerWalker_<T>& _walker, RegexProc_<T> _proc)
			:walker(_walker)
			, proc(_proc)
		{
			internalState.currentState = walker.GetStartState();
		}

		template<typename T>
		typename RegexLexerColorizer_<T>::InternalState RegexLexerColorizer_<T>::GetInternalState()
		{
			return internalState;
		}

		template<typename T>
		void RegexLexerColorizer_<T>::SetInternalState(InternalState state)
		{
			internalState = state;
		}

		template<typename T>
		void RegexLexerColorizer_<T>::Pass(T input)
		{
			WalkOneToken(&input, 1, 0, false);
		}

		template<typename T>
		vint RegexLexerColorizer_<T>::GetStartState()const
		{
			return walker.GetStartState();
		}

		template<typename T>
		void RegexLexerColorizer_<T>::CallExtendProcAndColorizeProc(const T* input, vint length, RegexProcessingToken& token, bool colorize)
		{
			vint oldTokenLength = token.length;
			proc.extendProc(proc.argument, input + token.start, length - token.start, false, token);
#if _DEBUG
			{
				bool pausedAtTheEnd = token.start + token.length == length && !token.completeToken;
				CHECK_ERROR(
					token.completeToken || pausedAtTheEnd,
					L"RegexLexerColorizer::WalkOneToken(const char32_t*, vint, vint, bool)#The extendProc is not allowed pause before the end of the input."
				);
				CHECK_ERROR(
					token.completeToken || token.token != -1,
					L"RegexLexerColorizer::WalkOneToken(const char32_t*, vint, vint, bool)#The extendProc is not allowed to pause without a valid token id."
				);
				CHECK_ERROR(
					oldTokenLength <= token.length,
					L"RegexLexerColorizer::WalkOneToken(const char32_t*, vint, vint, bool)#The extendProc is not allowed to decrease the token length."
				);
				CHECK_ERROR(
					(token.interTokenState == nullptr) == !pausedAtTheEnd,
					L"RegexLexerColorizer::Colorize(const char32_t*, vint, void*)#The extendProc should return an inter token state object if and only if a valid token does not end at the end of the input."
				);
			}
#endif
			if ((internalState.interTokenState = token.interTokenState))
			{
				internalState.interTokenId = token.token;
			}
			if (colorize)
			{
				proc.colorizeProc(proc.argument, token.start, token.length, token.token);
			}
		}

		template<typename T>
		vint RegexLexerColorizer_<T>::WalkOneToken(const T* input, vint length, vint start, bool colorize)
		{
			if (internalState.interTokenState)
			{
				RegexProcessingToken token(-1, -1, internalState.interTokenId, false, internalState.interTokenState);
				proc.extendProc(proc.argument, input, length, false, token);
#if _DEBUG
				{
					bool pausedAtTheEnd = token.length == length && !token.completeToken;
					CHECK_ERROR(
						token.completeToken || pausedAtTheEnd,
						L"RegexLexerColorizer::WalkOneToken(const char32_t*, vint, vint, bool)#The extendProc is not allowed to pause before the end of the input."
					);
					CHECK_ERROR(
						token.completeToken || token.token == internalState.interTokenId,
						L"RegexLexerColorizer::WalkOneToken(const char32_t*, vint, vint, bool)#The extendProc is not allowed to continue pausing with a different token id."
					);
					CHECK_ERROR(
						(token.interTokenState == nullptr) == !pausedAtTheEnd,
						L"RegexLexerColorizer::Colorize(const char32_t*, vint, void*)#The extendProc should return an inter token state object if and only if a valid token does not end at the end of the input."
					);
				}
#endif
				if (colorize)
				{
					proc.colorizeProc(proc.argument, 0, token.length, token.token);
				}
				if (!(internalState.interTokenState = token.interTokenState))
				{
					internalState.interTokenId = -1;
				}
				return token.length;
			}

			vint lastFinalStateLength = 0;
			vint lastFinalStateToken = -1;
			vint lastFinalStateState = -1;

			vint tokenStartState = internalState.currentState;
			for (vint i = start; i < length; i++)
			{
				vint currentToken = -1;
				bool finalState = false;
				bool previousTokenStop = false;
				walker.Walk(input[i], internalState.currentState, currentToken, finalState, previousTokenStop);

				if (previousTokenStop)
				{
					if (proc.extendProc && lastFinalStateToken != -1)
					{
						RegexProcessingToken token(start, lastFinalStateLength, lastFinalStateToken, true, nullptr);
						CallExtendProcAndColorizeProc(input, length, token, colorize);
						if (token.completeToken)
						{
							internalState.currentState = walker.GetStartState();
						}
						return start + token.length;
					}
					else if (i == start)
					{
						if (tokenStartState == GetStartState())
						{
							if (colorize)
							{
								proc.colorizeProc(proc.argument, start, 1, -1);
							}
							internalState.currentState = walker.GetStartState();
							return i + 1;
						}
					}
					else
					{
						if (colorize)
						{
							proc.colorizeProc(proc.argument, start, lastFinalStateLength, lastFinalStateToken);
						}
						internalState.currentState = lastFinalStateState;
						return start + lastFinalStateLength;
					}
				}

				if (finalState)
				{
					lastFinalStateLength = i + 1 - start;
					lastFinalStateToken = currentToken;
					lastFinalStateState = internalState.currentState;
				}
			}

			if (lastFinalStateToken != -1 && start + lastFinalStateLength == length)
			{
				if (proc.extendProc)
				{
					RegexProcessingToken token(start, lastFinalStateLength, lastFinalStateToken, true, nullptr);
					CallExtendProcAndColorizeProc(input, length, token, colorize);
				}
				else if (colorize)
				{
					proc.colorizeProc(proc.argument, start, lastFinalStateLength, lastFinalStateToken);
				}
			}
			else if (colorize)
			{
				proc.colorizeProc(proc.argument, start, length - start, walker.GetRelatedToken(internalState.currentState));
			}
			return length;
		}

		template<typename T>
		void* RegexLexerColorizer_<T>::Colorize(const T* input, vint length)
		{
			vint index = 0;
			while (index != length)
			{
				index = WalkOneToken(input, length, index, true);
			}
			return internalState.interTokenState;
		}

/***********************************************************************
RegexLexerBase_
***********************************************************************/

		RegexLexerBase_::~RegexLexerBase_()
		{
			if (pure) delete pure;
		}

		template<typename T>
		RegexTokens_<T> RegexLexerBase_::Parse(const ObjectString<T>& code, RegexProc_<T> proc, vint codeIndex)const
		{
			code.Buffer();
			pure->PrepareForRelatedFinalStateTable();
			return RegexTokens_<T>(pure, stateTokens, code, codeIndex, proc);
		}

		template<typename T>
		RegexLexerWalker_<T> RegexLexerBase_::Walk()const
		{
			pure->PrepareForRelatedFinalStateTable();
			return RegexLexerWalker_<T>(pure, stateTokens);
		}

		RegexLexerWalker_<wchar_t> RegexLexerBase_::Walk()const
		{
			pure->PrepareForRelatedFinalStateTable();
			return RegexLexerWalker_<wchar_t>(pure, stateTokens);
		}

		template<typename T>
		RegexLexerColorizer_<T> RegexLexerBase_::Colorize(RegexProc_<T> proc)const
		{
			return RegexLexerColorizer_<T>(Walk<T>(), proc);
		}

/***********************************************************************
RegexLexer_<T> (Serialization)
***********************************************************************/

		template<typename T>
		RegexLexer_<T>::RegexLexer_(stream::IStream& inputStream)
		{
			pure = new PureInterpretor(inputStream);
			vint count = 0;
			ReadInt(inputStream, count);
			stateTokens.Resize(count);
			if (count > 0)
			{
				ReadInts(inputStream, count, &stateTokens[0]);
			}
		}

		template<typename T>
		void RegexLexer_<T>::Serialize(stream::IStream& outputStream)
		{
			pure->Serialize(outputStream);
			WriteInt(outputStream, stateTokens.Count());
			if (stateTokens.Count() > 0)
			{
				WriteInts(outputStream, stateTokens.Count(), &stateTokens[0]);
			}
		}

/***********************************************************************
RegexLexer_<T>
***********************************************************************/

		template<typename T>
		RegexLexer_<T>::RegexLexer_(const collections::IEnumerable<ObjectString<T>>& tokens)
		{
			// Build DFA for all tokens
			List<Ptr<Expression>> expressions;
			List<Ptr<Automaton>> dfas;
			CharRange::List subsets;
			for (auto&& code : tokens)
			{
				auto regex = ParseRegexExpression(U32<T>::ToU32(code));
				auto expression = regex->Merge();
				expression->CollectCharSet(subsets);
				expressions.Add(expression);
			}
			// TODO: (enumerable) foreach
			for (vint i = 0; i < expressions.Count(); i++)
			{
				Dictionary<State*, State*> nfaStateMap;
				Group<State*, State*> dfaStateMap;
				expressions[i]->ApplyCharSet(subsets);
				auto eNfa = expressions[i]->GenerateEpsilonNfa();
				auto nfa = EpsilonNfaToNfa(eNfa, PureEpsilonChecker, nfaStateMap);
				auto dfa = NfaToDfa(nfa, dfaStateMap);
				dfas.Add(dfa);
			}

			// Mark all states in DFAs
			// TODO: (enumerable) foreach
			for (vint i = 0; i < dfas.Count(); i++)
			{
				Ptr<Automaton> dfa = dfas[i];
				// TODO: (enumerable) foreach
				for (vint j = 0; j < dfa->states.Count(); j++)
				{
					if (dfa->states[j]->finalState)
					{
						dfa->states[j]->userData = (void*)i;
					}
					else
					{
						dfa->states[j]->userData = (void*)dfas.Count();
					}
				}
			}

			// Connect all DFAs to an e-NFA
			auto bigEnfa = Ptr(new Automaton);
			// TODO: (enumerable) foreach
			for (vint i = 0; i < dfas.Count(); i++)
			{
				CopyFrom(bigEnfa->states, dfas[i]->states, true);
				CopyFrom(bigEnfa->transitions, dfas[i]->transitions, true);
			}
			bigEnfa->startState = bigEnfa->NewState();
			// TODO: (enumerable) foreach
			for (vint i = 0; i < dfas.Count(); i++)
			{
				bigEnfa->NewEpsilon(bigEnfa->startState, dfas[i]->startState);
			}

			// Build a single DFA out of the e-NFA
			Dictionary<State*, State*> nfaStateMap;
			Group<State*, State*> dfaStateMap;
			auto bigNfa = EpsilonNfaToNfa(bigEnfa, PureEpsilonChecker, nfaStateMap);
			// TODO: (enumerable) foreach on dictionary
			for (vint i = 0; i < nfaStateMap.Keys().Count(); i++)
			{
				void* userData = nfaStateMap.Values().Get(i)->userData;
				nfaStateMap.Keys()[i]->userData = userData;
			}
			auto bigDfa = NfaToDfa(bigNfa, dfaStateMap);
			// TODO: (enumerable) foreach on group
			for (vint i = 0; i < dfaStateMap.Keys().Count(); i++)
			{
				void* userData = dfaStateMap.GetByIndex(i).Get(0)->userData;
				for (vint j = 1; j < dfaStateMap.GetByIndex(i).Count(); j++)
				{
					void* newData = dfaStateMap.GetByIndex(i).Get(j)->userData;
					if (userData > newData)
					{
						userData = newData;
					}
				}
				dfaStateMap.Keys()[i]->userData = userData;
			}

			// Build state machine
			pure = new PureInterpretor(bigDfa, subsets);
			stateTokens.Resize(bigDfa->states.Count());
			for (vint i = 0; i < stateTokens.Count(); i++)
			{
				void* userData = bigDfa->states[i]->userData;
				stateTokens[i] = (vint)userData;
			}
		}

/***********************************************************************
Template Instantiation
***********************************************************************/

		template class RegexString_<wchar_t>;
		template class RegexString_<char8_t>;
		template class RegexString_<char16_t>;
		template class RegexString_<char32_t>;

		template class RegexMatch_<wchar_t>;
		template class RegexMatch_<char8_t>;
		template class RegexMatch_<char16_t>;
		template class RegexMatch_<char32_t>;

		template RegexMatch_<wchar_t>::Ref		RegexBase_::MatchHead<wchar_t>	(const ObjectString<wchar_t>& text)const;
		template RegexMatch_<wchar_t>::Ref		RegexBase_::Match<wchar_t>		(const ObjectString<wchar_t>& text)const;
		template bool							RegexBase_::TestHead<wchar_t>	(const ObjectString<wchar_t>& text)const;
		template bool							RegexBase_::Test<wchar_t>		(const ObjectString<wchar_t>& text)const;
		template void							RegexBase_::Search<wchar_t>		(const ObjectString<wchar_t>& text, RegexMatch_<wchar_t>::List& matches)const;
		template void							RegexBase_::Split<wchar_t>		(const ObjectString<wchar_t>& text, bool keepEmptyMatch, RegexMatch_<wchar_t>::List& matches)const;
		template void							RegexBase_::Cut<wchar_t>		(const ObjectString<wchar_t>& text, bool keepEmptyMatch, RegexMatch_<wchar_t>::List& matches)const;

		template RegexMatch_<char8_t>::Ref		RegexBase_::MatchHead<char8_t>	(const ObjectString<char8_t>& text)const;
		template RegexMatch_<char8_t>::Ref		RegexBase_::Match<char8_t>		(const ObjectString<char8_t>& text)const;
		template bool							RegexBase_::TestHead<char8_t>	(const ObjectString<char8_t>& text)const;
		template bool							RegexBase_::Test<char8_t>		(const ObjectString<char8_t>& text)const;
		template void							RegexBase_::Search<char8_t>		(const ObjectString<char8_t>& text, RegexMatch_<char8_t>::List& matches)const;
		template void							RegexBase_::Split<char8_t>		(const ObjectString<char8_t>& text, bool keepEmptyMatch, RegexMatch_<char8_t>::List& matches)const;
		template void							RegexBase_::Cut<char8_t>		(const ObjectString<char8_t>& text, bool keepEmptyMatch, RegexMatch_<char8_t>::List& matches)const;

		template RegexMatch_<char16_t>::Ref		RegexBase_::MatchHead<char16_t>	(const ObjectString<char16_t>& text)const;
		template RegexMatch_<char16_t>::Ref		RegexBase_::Match<char16_t>		(const ObjectString<char16_t>& text)const;
		template bool							RegexBase_::TestHead<char16_t>	(const ObjectString<char16_t>& text)const;
		template bool							RegexBase_::Test<char16_t>		(const ObjectString<char16_t>& text)const;
		template void							RegexBase_::Search<char16_t>	(const ObjectString<char16_t>& text, RegexMatch_<char16_t>::List& matches)const;
		template void							RegexBase_::Split<char16_t>		(const ObjectString<char16_t>& text, bool keepEmptyMatch, RegexMatch_<char16_t>::List& matches)const;
		template void							RegexBase_::Cut<char16_t>		(const ObjectString<char16_t>& text, bool keepEmptyMatch, RegexMatch_<char16_t>::List& matches)const;

		template RegexMatch_<char32_t>::Ref		RegexBase_::MatchHead<char32_t>	(const ObjectString<char32_t>& text)const;
		template RegexMatch_<char32_t>::Ref		RegexBase_::Match<char32_t>		(const ObjectString<char32_t>& text)const;
		template bool							RegexBase_::TestHead<char32_t>	(const ObjectString<char32_t>& text)const;
		template bool							RegexBase_::Test<char32_t>		(const ObjectString<char32_t>& text)const;
		template void							RegexBase_::Search<char32_t>	(const ObjectString<char32_t>& text, RegexMatch_<char32_t>::List& matches)const;
		template void							RegexBase_::Split<char32_t>		(const ObjectString<char32_t>& text, bool keepEmptyMatch, RegexMatch_<char32_t>::List& matches)const;
		template void							RegexBase_::Cut<char32_t>		(const ObjectString<char32_t>& text, bool keepEmptyMatch, RegexMatch_<char32_t>::List& matches)const;

		template class Regex_<wchar_t>;
		template class Regex_<char8_t>;
		template class Regex_<char16_t>;
		template class Regex_<char32_t>;

		template class RegexTokens_<wchar_t>;
		template class RegexTokens_<char8_t>;
		template class RegexTokens_<char16_t>;
		template class RegexTokens_<char32_t>;

		template class RegexLexerWalker_<wchar_t>;
		template class RegexLexerWalker_<char8_t>;
		template class RegexLexerWalker_<char16_t>;
		template class RegexLexerWalker_<char32_t>;

		template class RegexLexerColorizer_<wchar_t>;
		template class RegexLexerColorizer_<char8_t>;
		template class RegexLexerColorizer_<char16_t>;
		template class RegexLexerColorizer_<char32_t>;

		template RegexTokens_<wchar_t>				RegexLexerBase_::Parse<wchar_t>		(const ObjectString<wchar_t>& code, RegexProc_<wchar_t> _proc, vint codeIndex)const;
		template RegexLexerWalker_<wchar_t>			RegexLexerBase_::Walk<wchar_t>		()const;
		template RegexLexerColorizer_<wchar_t>		RegexLexerBase_::Colorize<wchar_t>	(RegexProc_<wchar_t> _proc)const;

		template RegexTokens_<char8_t>				RegexLexerBase_::Parse<char8_t>		(const ObjectString<char8_t>& code, RegexProc_<char8_t> _proc, vint codeIndex)const;
		template RegexLexerWalker_<char8_t>			RegexLexerBase_::Walk<char8_t>		()const;
		template RegexLexerColorizer_<char8_t>		RegexLexerBase_::Colorize<char8_t>	(RegexProc_<char8_t> _proc)const;

		template RegexTokens_<char16_t>				RegexLexerBase_::Parse<char16_t>	(const ObjectString<char16_t>& code, RegexProc_<char16_t> _proc, vint codeIndex)const;
		template RegexLexerWalker_<char16_t>		RegexLexerBase_::Walk<char16_t>		()const;
		template RegexLexerColorizer_<char16_t>		RegexLexerBase_::Colorize<char16_t>	(RegexProc_<char16_t> _proc)const;

		template RegexTokens_<char32_t>				RegexLexerBase_::Parse<char32_t>	(const ObjectString<char32_t>& code, RegexProc_<char32_t> _proc, vint codeIndex)const;
		template RegexLexerWalker_<char32_t>		RegexLexerBase_::Walk<char32_t>		()const;
		template RegexLexerColorizer_<char32_t>		RegexLexerBase_::Colorize<char32_t>	(RegexProc_<char32_t> _proc)const;

		template class RegexLexer_<wchar_t>;
		template class RegexLexer_<char8_t>;
		template class RegexLexer_<char16_t>;
		template class RegexLexer_<char32_t>;
	}
}

/***********************************************************************
.\REGEXPURE.CPP
***********************************************************************/
/***********************************************************************
Author: Zihan Chen (vczh)
Licensed under https://github.com/vczh-libraries/License
***********************************************************************/


namespace vl
{
	namespace regex_internal
	{
		using namespace collections;

/***********************************************************************
Read
***********************************************************************/

		void ReadInt(stream::IStream& inputStream, vint& value)
		{
#ifdef VCZH_64
			vint32_t x = 0;
			CHECK_ERROR(
				inputStream.Read(&x, sizeof(vint32_t)) == sizeof(vint32_t),
				L"Failed to deserialize RegexLexer."
				);
			value = (vint)x;
#else
			CHECK_ERROR(
				inputStream.Read(&value, sizeof(vint32_t)) == sizeof(vint32_t),
				L"Failed to deserialize RegexLexer."
				);
#endif
		}

		void ReadInts(stream::IStream& inputStream, vint count, vint* values)
		{
#ifdef VCZH_64
			Array<vint32_t> xs(count);
			CHECK_ERROR(
				inputStream.Read(&xs[0], sizeof(vint32_t) * count) == sizeof(vint32_t) * count,
				L"Failed to deserialize RegexLexer."
				);
			for (vint i = 0; i < count; i++)
			{
				values[i] = (vint)xs[i];
			}
#else
			CHECK_ERROR(
				inputStream.Read(values, sizeof(vint32_t) * count) == sizeof(vint32_t) * count,
				L"Failed to deserialize RegexLexer."
				);
#endif
		}

		void ReadBools(stream::IStream& inputStream, vint count, bool* values)
		{
			Array<vuint8_t> bits((count + 7) / 8);
			CHECK_ERROR(
				inputStream.Read(&bits[0], sizeof(vuint8_t) * bits.Count()) == sizeof(vuint8_t) * bits.Count(),
				L"Failed to deserialize RegexLexer."
			);

			for (vint i = 0; i < count; i++)
			{
				vint x = i / 8;
				vint y = i % 8;
				values[i] = ((bits[x] >> y) & 1) == 1;
			}
		}

/***********************************************************************
Write
***********************************************************************/

		void WriteInt(stream::IStream& outputStream, vint value)
		{
#ifdef VCZH_64
			vint32_t x = (vint32_t)value;
			CHECK_ERROR(
				outputStream.Write(&x, sizeof(vint32_t)) == sizeof(vint32_t),
				L"Failed to serialize RegexLexer."
				);
#else
			CHECK_ERROR(
				outputStream.Write(&value, sizeof(vint32_t)) == sizeof(vint32_t),
				L"Failed to serialize RegexLexer."
				);
#endif
		}

		void WriteInts(stream::IStream& outputStream, vint count, vint* values)
		{
#ifdef VCZH_64
			Array<vint32_t> xs(count);
			for (vint i = 0; i < count; i++)
			{
				xs[i] = (vint32_t)values[i];
			}
			CHECK_ERROR(
				outputStream.Write(&xs[0], sizeof(vint32_t) * count) == sizeof(vint32_t) * count,
				L"Failed to serialize RegexLexer."
				);
#else
			CHECK_ERROR(
				outputStream.Write(values, sizeof(vint32_t) * count) == sizeof(vint32_t) * count,
				L"Failed to serialize RegexLexer."
				);
#endif
		}

		void WriteBools(stream::IStream& outputStream, vint count, bool* values)
		{
			Array<vuint8_t> bits((count + 7) / 8);
			memset(&bits[0], 0, sizeof(vuint8_t) * bits.Count());

			for (vint i = 0; i < count; i++)
			{
				if (values[i])
				{
					vint x = i / 8;
					vint y = i % 8;
					bits[x] |= (vuint8_t)1 << y;
				}
			}

			CHECK_ERROR(
				outputStream.Write(&bits[0], sizeof(vuint8_t) * bits.Count()) == sizeof(vuint8_t) * bits.Count(),
				L"Failed to serialize RegexLexer."
				);
		}

/***********************************************************************
PureInterpretor (Serialization)
***********************************************************************/

		PureInterpretor::PureInterpretor(stream::IStream& inputStream)
		{
			ReadInt(inputStream, stateCount);
			ReadInt(inputStream, charSetCount);
			ReadInt(inputStream, startState);
			{
				vint count = 0;
				ReadInt(inputStream, count);
				charRanges.Resize(count);
				if (count > 0)
				{
					vint size = charRanges.Count() * sizeof(CharRange);
					CHECK_ERROR(inputStream.Read(&charRanges[0], size) == size, L"Failed to serialize RegexLexer.");
				}
				ExpandCharRanges();
			}

			transitions = new vint[stateCount * charSetCount];
			ReadInts(inputStream, stateCount * charSetCount, transitions);

			finalState = new bool[stateCount];
			ReadBools(inputStream, stateCount, finalState);
		}

		void PureInterpretor::Serialize(stream::IStream& outputStream)
		{
			WriteInt(outputStream, stateCount);
			WriteInt(outputStream, charSetCount);
			WriteInt(outputStream, startState);
			{
				WriteInt(outputStream, charRanges.Count());
				if (charRanges.Count() > 0)
				{
					vint size = charRanges.Count() * sizeof(CharRange);
					CHECK_ERROR(outputStream.Write(&charRanges[0], size) == size, L"Failed to serialize RegexLexer.");
				}
			}
			WriteInts(outputStream, stateCount * charSetCount, transitions);
			WriteBools(outputStream, stateCount, finalState);
		}

/***********************************************************************
PureInterpretor
***********************************************************************/

		void PureInterpretor::ExpandCharRanges()
		{
			for (vint i = 0; i < SupportedCharCount; i++)
			{
				charMap[i] = charSetCount - 1;
			}
			// TODO: (enumerable) foreach
			for (vint i = 0; i < charRanges.Count(); i++)
			{
				CharRange range = charRanges[i];
				for (char32_t j = range.begin; j <= range.end; j++)
				{
					if (j > MaxChar32) break;
					charMap[j] = i;
				}
			}
		}

		PureInterpretor::PureInterpretor(Ptr<Automaton> dfa, CharRange::List& subsets)
		{
			stateCount = dfa->states.Count();
			charSetCount = subsets.Count() + 1;
			startState = dfa->states.IndexOf(dfa->startState);

			// Map char to input index (equivalent char class)
			CopyFrom(charRanges, subsets);
			ExpandCharRanges();

			// Create transitions from DFA, using input index to represent input char
			transitions = new vint[stateCount * charSetCount];
			for (vint i = 0; i < stateCount; i++)
			{
				for (vint j = 0; j < charSetCount; j++)
				{
					transitions[i * charSetCount + j] = -1;
				}

				State* state = dfa->states[i].Obj();
				// TODO: (enumerable) foreach
				for (vint j = 0; j < state->transitions.Count(); j++)
				{
					Transition* dfaTransition = state->transitions[j];
					switch (dfaTransition->type)
					{
					case Transition::Chars:
						{
							vint index = subsets.IndexOf(dfaTransition->range);
							if (index == -1)
							{
								CHECK_ERROR(false, L"PureInterpretor::PureInterpretor(Ptr<Automaton>, CharRange::List&)#Specified chars don't appear in the normalized char ranges.");
							}
							transitions[i * charSetCount + index] = dfa->states.IndexOf(dfaTransition->target);
						}
						break;
					default:
						CHECK_ERROR(false, L"PureInterpretor::PureInterpretor(Ptr<Automaton>, CharRange::List&)#PureInterpretor only accepts Transition::Chars transitions.");
					}
				}
			}

			// Mark final states
			finalState = new bool[stateCount];
			for (vint i = 0; i < stateCount; i++)
			{
				finalState[i] = dfa->states[i]->finalState;
			}
		}

		PureInterpretor::~PureInterpretor()
		{
			if (relatedFinalState) delete[] relatedFinalState;
			delete[] finalState;
			delete[] transitions;
		}

		template<typename TChar>
		bool PureInterpretor::MatchHead(const TChar* input, const TChar* start, PureResult& result)
		{
			CharReader<TChar> reader(input);
			vint currentState = startState;
			vint terminateState = -1;
			vint terminateLength = -1;

			result.start = input - start;
			result.length = -1;
			result.finalState = -1;
			result.terminateState = -1;

			while (currentState != -1)
			{
				auto c = reader.Read();

				terminateState = currentState;
				terminateLength = reader.Index();
				if (finalState[currentState])
				{
					result.length = terminateLength;
					result.finalState = currentState;
				}

				if (!c) break;
				if (c >= SupportedCharCount) break;

				vint charIndex = charMap[c];
				currentState = transitions[currentState * charSetCount + charIndex];
			}

			if (result.finalState == -1)
			{
				if (terminateLength > 0)
				{
					result.terminateState = terminateState;
				}
				result.length = terminateLength;
				return false;
			}
			else
			{
				return true;
			}
		}

		template<typename TChar>
		bool PureInterpretor::Match(const TChar* input, const TChar* start, PureResult& result)
		{
			CharReader<TChar> reader(input);
			while (reader.Read())
			{
				if (MatchHead(reader.Reading(), start, result))
				{
					return true;
				}
			}
			return false;
		}

		vint PureInterpretor::GetStartState()
		{
			return startState;
		}

		vint PureInterpretor::Transit(char32_t input, vint state)
		{
			if (0 <= state && state < stateCount && 0 <= input && input <= MaxChar32)
			{
				vint charIndex = charMap[input];
				vint nextState = transitions[state * charSetCount + charIndex];
				return nextState;
			}
			else
			{
				return -1;
			}
		}

		bool PureInterpretor::IsFinalState(vint state)
		{
			return 0 <= state && state < stateCount&& finalState[state];
		}

		bool PureInterpretor::IsDeadState(vint state)
		{
			if (state == -1) return true;
			for (vint i = 0; i < charSetCount; i++)
			{
				if (transitions[state * charSetCount + i] != -1)
				{
					return false;
				}
			}
			return true;
		}

		void PureInterpretor::PrepareForRelatedFinalStateTable()
		{
			if (!relatedFinalState)
			{
				relatedFinalState = new vint[stateCount];
				for (vint i = 0; i < stateCount; i++)
				{
					relatedFinalState[i] = finalState[i] ? i : -1;
				}
				while (true)
				{
					vint modifyCount = 0;
					for (vint i = 0; i < stateCount; i++)
					{
						if (relatedFinalState[i] == -1)
						{
							vint state = -1;
							for (vint j = 0; j < charSetCount; j++)
							{
								vint nextState = transitions[i * charSetCount + j];
								if (nextState != -1)
								{
									state = relatedFinalState[nextState];
									if (state != -1)
									{
										break;
									}
								}
							}
							if (state != -1)
							{
								relatedFinalState[i] = state;
								modifyCount++;
							}
						}
					}
					if (modifyCount == 0)
					{
						break;
					}
				}
			}
		}

		vint PureInterpretor::GetRelatedFinalState(vint state)
		{
			return relatedFinalState ? relatedFinalState[state] : -1;
		}

		template bool			PureInterpretor::MatchHead<wchar_t>(const wchar_t* input, const wchar_t* start, PureResult& result);
		template bool			PureInterpretor::MatchHead<char8_t>(const char8_t* input, const char8_t* start, PureResult& result);
		template bool			PureInterpretor::MatchHead<char16_t>(const char16_t* input, const char16_t* start, PureResult& result);
		template bool			PureInterpretor::MatchHead<char32_t>(const char32_t* input, const char32_t* start, PureResult& result);

		template bool			PureInterpretor::Match<wchar_t>(const wchar_t* input, const wchar_t* start, PureResult& result);
		template bool			PureInterpretor::Match<char8_t>(const char8_t* input, const char8_t* start, PureResult& result);
		template bool			PureInterpretor::Match<char16_t>(const char16_t* input, const char16_t* start, PureResult& result);
		template bool			PureInterpretor::Match<char32_t>(const char32_t* input, const char32_t* start, PureResult& result);
	}
}

/***********************************************************************
.\REGEXRICH.CPP
***********************************************************************/
/***********************************************************************
Author: Zihan Chen (vczh)
Licensed under https://github.com/vczh-libraries/License
***********************************************************************/


namespace vl
{
	namespace regex_internal
	{

/***********************************************************************
Data Structures for Backtracking
***********************************************************************/

		enum class StateStoreType
		{
			Positive,
			Negative,
			Other
		};

		template<typename TChar>
		class StateSaver
		{
		public:

			CharReader<TChar>		reader;										// Current reading position
			char32_t				ch;											// Current character
			State*					currentState;								// Current state
			vint					minTransition = 0;							// The first transition to backtrack
			vint					captureCount = 0;							// Available capture count			(the list size may larger than this)
			vint					stateSaverCount = 0;						// Available saver count			(the list size may larger than this)
			vint					extensionSaverAvailable = -1;				// Available extension saver count	(the list size may larger than this)
			vint					extensionSaverCount = 0;					// Available extension saver count	(during executing)
			StateStoreType			storeType = StateStoreType::Other;			// Reason to keep this record

			StateSaver(const TChar* input, State* _currentState)
				: reader(input)
				, currentState(_currentState)
			{
				ch = reader.Read();
			}

			StateSaver(const StateSaver&) = default;
			StateSaver& operator=(const StateSaver&) = default;

			void RestoreReaderTo(StateSaver<TChar>& saver)
			{
				saver.reader = reader;
				saver.ch = ch;
			}
		};

		template<typename TChar>
		class ExtensionSaver
		{
		public:
			CharReader<TChar>		reader;										// The reading position
			char32_t				ch;											// Current character
			vint					previous;									// Previous extension saver index
			vint					captureListIndex;							// Where to write the captured text
			Transition*				transition;									// The extension begin transition (Capture, Positive, Negative)

			ExtensionSaver(const StateSaver<TChar>& saver)
				: reader(saver.reader)
				, ch(saver.ch)
			{
			}

			ExtensionSaver(const ExtensionSaver&) = default;
			ExtensionSaver& operator=(const ExtensionSaver&) = default;

			void RestoreReaderTo(StateSaver<TChar>& saver)
			{
				saver.reader = reader;
				saver.ch = ch;
			}
		};
	}

	namespace regex_internal
	{
		using namespace collections;

		template<typename TChar>
		void Push(List<ExtensionSaver<TChar>>& elements, vint& available, vint& count, const ExtensionSaver<TChar>& element)
		{
			if (elements.Count() == count)
			{
				elements.Add(element);
			}
			else
			{
				elements[count] = element;
			}
			auto& current = elements[count];
			current.previous = available;
			available = count++;
		}

		template<typename TChar>
		ExtensionSaver<TChar> Pop(List<ExtensionSaver<TChar>>& elements, vint& available, vint& count)
		{
			auto& current = elements[available];
			available = current.previous;
			return current;
		}

		template<typename T>
		void PushNonSaver(List<T>& elements, vint& count, const T& element)
		{
			if (elements.Count() == count)
			{
				elements.Add(element);
			}
			else
			{
				elements[count] = element;
			}
			count++;
		}

		template<typename T>
		T PopNonSaver(List<T>& elements, vint& count)
		{
			return elements[--count];
		}
	}

	namespace regex_internal
	{

/***********************************************************************
RichInterpretor
***********************************************************************/

		RichInterpretor::RichInterpretor(Ptr<Automaton> _dfa)
			:dfa(_dfa)
		{
			datas = new UserData[dfa->states.Count()];

			// TODO: (enumerable) foreach
			for (vint i = 0; i < dfa->states.Count(); i++)
			{
				State* state = dfa->states[i].Obj();
				vint charEdges = 0;
				vint nonCharEdges = 0;
				bool mustSave = false;
				// TODO: (enumerable) foreach
				for (vint j = 0; j < state->transitions.Count(); j++)
				{
					if (state->transitions[j]->type == Transition::Chars)
					{
						charEdges++;
					}
					else
					{
						if (state->transitions[j]->type == Transition::Negative ||
							state->transitions[j]->type == Transition::Positive)
						{
							mustSave = true;
						}
						nonCharEdges++;
					}
				}
				datas[i].NeedKeepState = mustSave || nonCharEdges > 1 || (nonCharEdges != 0 && charEdges != 0);
				state->userData = &datas[i];
			}
		}

		RichInterpretor::~RichInterpretor()
		{
			delete[] datas;
		}

		template<typename TChar>
		bool RichInterpretor::MatchHead(const TChar* input, const TChar* start, RichResult& result)
		{
			List<StateSaver<TChar>> stateSavers;
			List<ExtensionSaver<TChar>> extensionSavers;

			StateSaver<TChar> currentState(input, dfa->startState);

			while (!currentState.currentState->finalState)
			{
				bool found = false; // true means at least one transition matches the input
				StateSaver<TChar> oldState = currentState;
				// Iterate through all transitions from the current state
				// TODO: (enumerable) foreach:reversed
				for (vint i = currentState.minTransition; i < currentState.currentState->transitions.Count(); i++)
				{
					Transition* transition = currentState.currentState->transitions[i];
					switch (transition->type)
					{
					case Transition::Chars:
						{
							// match the input if the current character fall into the range
							CharRange range = transition->range;
							found =
								range.begin <= currentState.ch &&
								range.end >= currentState.ch;
							if (found)
							{
								currentState.ch = currentState.reader.Read();
							}
						}
						break;
					case Transition::BeginString:
						{
							// match the input if this is the first character, and it is not consumed
							found = currentState.reader.Index() == 0 && input == start;
						}
						break;
					case Transition::EndString:
						{
							// match the input if this is after the last character, and it is not consumed
							found = currentState.ch == 0;
						}
						break;
					case Transition::Nop:
						{
							// match without any condition
							found = true;
						}
						break;
					case Transition::Capture:
						{
							// Push the capture information
							ExtensionSaver<TChar> saver(currentState);
							saver.captureListIndex = currentState.captureCount;
							saver.transition = transition;
							Push(extensionSavers, currentState.extensionSaverAvailable, currentState.extensionSaverCount, saver);

							// Push the capture record, and it will be written if the input matches the regex
							CaptureRecord capture;
							capture.capture = transition->capture;
							capture.start = currentState.reader.Index() + (input - start);
							capture.length = -1;
							PushNonSaver(result.captures, currentState.captureCount, capture);

							found = true;
						}
						break;
					case Transition::Match:
						{
							vint index = 0;
							for (vint j = 0; j < currentState.captureCount; j++)
							{
								CaptureRecord& capture = result.captures[j];
								// If the capture name matched
								if (capture.capture == transition->capture)
								{
									// If the capture index matched, or it is -1
									if (capture.length != -1 && (transition->index == -1 || transition->index == index))
									{
										// If the captured text matched
										if (memcmp(start + capture.start, input + currentState.reader.Index(), sizeof(TChar) * capture.length) == 0)
										{
											// Consume so much input
											vint targetIndex = currentState.reader.Index() + capture.length;
											while (currentState.reader.Index() < targetIndex)
											{
												currentState.ch = currentState.reader.Read();
											}
											CHECK_ERROR(currentState.reader.Index() == targetIndex, L"vl::regex_internal::RichInterpretor::MatchHead<TChar>(const TChar*, const TChar*, RichResult&)#Input code could be an incorrect unicode sequence.");
											found = true;
											break;
										}
									}

									// Fail if f the captured text with the specified name and index doesn't match
									if (transition->index != -1 && index == transition->index)
									{
										break;
									}
									else
									{
										index++;
									}
								}
							}
						}
						break;
					case Transition::Positive:
						{
							// Push the positive lookahead information
							ExtensionSaver<TChar> saver(currentState);
							saver.captureListIndex = -1;
							saver.transition = transition;
							Push(extensionSavers, currentState.extensionSaverAvailable, currentState.extensionSaverCount, saver);

							// Set found = true so that PushNonSaver(oldState) happens later
							oldState.storeType = StateStoreType::Positive;
							found = true;
						}
						break;
					case Transition::Negative:
						{
							// Push the positive lookahead information

							ExtensionSaver<TChar> saver(currentState);
							saver.captureListIndex = -1;
							saver.transition = transition;
							Push(extensionSavers, currentState.extensionSaverAvailable, currentState.extensionSaverCount, saver);

							// Set found = true so that PushNonSaver(oldState) happens later
							oldState.storeType = StateStoreType::Negative;
							found = true;
						}
						break;
					case Transition::NegativeFail:
						{
							// NegativeFail will be used when the nagative lookahead failed
						}
						break;
					case Transition::End:
						{
							// Find the corresponding extension saver so that we can know how to deal with a matched sub regex that ends here
							ExtensionSaver extensionSaver = Pop(extensionSavers, currentState.extensionSaverAvailable, currentState.extensionSaverCount);
							switch (extensionSaver.transition->type)
							{
							case Transition::Capture:
								{
									// Write the captured text
									CaptureRecord& capture = result.captures[extensionSaver.captureListIndex];
									capture.length = currentState.reader.Index() + (input - start) - capture.start;
									found = true;
								}
								break;
							case Transition::Positive:
								// Find the last positive lookahead state saver
								for (vint j = currentState.stateSaverCount - 1; j >= 0; j--)
								{
									auto& stateSaver = stateSavers[j];
									if (stateSaver.storeType == StateStoreType::Positive)
									{
										// restore the parsing state just before matching the positive lookahead, since positive lookahead doesn't consume input
										stateSaver.RestoreReaderTo(oldState);
										oldState.stateSaverCount = j;
										stateSaver.RestoreReaderTo(currentState);
										currentState.stateSaverCount = j;
										break;
									}
								}
								found = true;
								break;
							case Transition::Negative:
								// Find the last negative lookahead state saver
								for (vint j = currentState.stateSaverCount - 1; j >= 0; j--)
								{
									auto& stateSaver = stateSavers[j];
									if (stateSaver.storeType == StateStoreType::Negative)
									{
										// restore the parsing state just before matching the negative lookahead, since positive lookahead doesn't consume input
										oldState = stateSaver;
										oldState.storeType = StateStoreType::Other;
										currentState = stateSaver;
										currentState.storeType = StateStoreType::Other;
										i = currentState.minTransition - 1;
										break;
									}
								}
								break;
							default:;
							}
						}
						break;
					default:;
					}

					// Save the parsing state when necessary
					if (found)
					{
						UserData* data = (UserData*)currentState.currentState->userData;
						if (data->NeedKeepState)
						{
							oldState.minTransition = i + 1;
							PushNonSaver(stateSavers, currentState.stateSaverCount, oldState);
						}
						currentState.currentState = transition->target;
						currentState.minTransition = 0;
						break;
					}
				}

				// If no transition from the current state can be used
				if (!found)
				{
					// If there is a chance to do backtracking
					if (currentState.stateSaverCount)
					{
						currentState = PopNonSaver(stateSavers, currentState.stateSaverCount);
						// minTransition - 1 is always valid since the value is stored with adding 1
						// So minTransition - 1 record the transition, which is the reason the parsing state is saved
						if (currentState.currentState->transitions[currentState.minTransition - 1]->type == Transition::Negative)
						{
							// Find the next NegativeFail transition
							// Because when a negative lookahead regex failed to match, it is actually succeeded
							// Since a negative lookahead means we don't want to match this regex
							// TODO: (enumerable) foreach:reversed
							for (vint i = 0; i < currentState.currentState->transitions.Count(); i++)
							{
								Transition* transition = currentState.currentState->transitions[i];
								if (transition->type == Transition::NegativeFail)
								{
									// Restore the state to the target of NegativeFail to let the parsing continue
									currentState.currentState = transition->target;
									currentState.minTransition = 0;
									currentState.storeType = StateStoreType::Other;
									break;
								}
							}
						}
					}
					else
					{
						break;
					}
				}
			}

			if (currentState.currentState->finalState)
			{
				// Keep available captures if succeeded
				result.start = input - start;
				result.length = currentState.reader.Index();
				for (vint i = result.captures.Count() - 1; i >= currentState.captureCount; i--)
				{
					result.captures.RemoveAt(i);
				}
				return true;
			}
			else
			{
				// Clear captures if failed
				result.captures.Clear();
				return false;
			}
		}

		template<typename TChar>
		bool RichInterpretor::Match(const TChar* input, const TChar* start, RichResult& result)
		{
			CharReader<TChar> reader(input);
			while (reader.Read())
			{
				if (MatchHead(reader.Reading(), start, result))
				{
					return true;
				}
			}
			return false;
		}

		const List<U32String>& RichInterpretor::CaptureNames()
		{
			return dfa->captureNames;
		}

		template bool			RichInterpretor::MatchHead<wchar_t>(const wchar_t* input, const wchar_t* start, RichResult& result);
		template bool			RichInterpretor::MatchHead<char8_t>(const char8_t* input, const char8_t* start, RichResult& result);
		template bool			RichInterpretor::MatchHead<char16_t>(const char16_t* input, const char16_t* start, RichResult& result);
		template bool			RichInterpretor::MatchHead<char32_t>(const char32_t* input, const char32_t* start, RichResult& result);

		template bool			RichInterpretor::Match<wchar_t>(const wchar_t* input, const wchar_t* start, RichResult& result);
		template bool			RichInterpretor::Match<char8_t>(const char8_t* input, const char8_t* start, RichResult& result);
		template bool			RichInterpretor::Match<char16_t>(const char16_t* input, const char16_t* start, RichResult& result);
		template bool			RichInterpretor::Match<char32_t>(const char32_t* input, const char32_t* start, RichResult& result);
	}
}

/***********************************************************************
.\AST\REGEXEXPRESSION.CPP
***********************************************************************/
/***********************************************************************
Author: Zihan Chen (vczh)
Licensed under https://github.com/vczh-libraries/License
***********************************************************************/


namespace vl
{
	namespace regex_internal
	{

/***********************************************************************
MergeAlgorithm
***********************************************************************/

		class MergeParameter
		{
		public:
			Expression::Map			definitions;
			RegexExpression*		regex = nullptr;
		};

		class MergeAlgorithm : public RegexExpressionAlgorithm<Ptr<Expression>, MergeParameter*>
		{
		public:
			Ptr<Expression> Apply(CharSetExpression* expression, MergeParameter* target) override
			{
				auto result = Ptr(new CharSetExpression);
				CopyFrom(result->ranges, expression->ranges);
				result->reverse = expression->reverse;
				return result;
			}

			Ptr<Expression> Apply(LoopExpression* expression, MergeParameter* target) override
			{
				auto result = Ptr(new LoopExpression);
				result->max = expression->max;
				result->min = expression->min;
				result->preferLong = expression->preferLong;
				result->expression = Invoke(expression->expression, target);
				return result;
			}

			Ptr<Expression> Apply(SequenceExpression* expression, MergeParameter* target) override
			{
				auto result = Ptr(new SequenceExpression);
				result->left = Invoke(expression->left, target);
				result->right = Invoke(expression->right, target);
				return result;
			}

			Ptr<Expression> Apply(AlternateExpression* expression, MergeParameter* target) override
			{
				auto result = Ptr(new AlternateExpression);
				result->left = Invoke(expression->left, target);
				result->right = Invoke(expression->right, target);
				return result;
			}

			Ptr<Expression> Apply(BeginExpression* expression, MergeParameter* target) override
			{
				return Ptr(new BeginExpression);
			}

			Ptr<Expression> Apply(EndExpression* expression, MergeParameter* target) override
			{
				return Ptr(new EndExpression);
			}

			Ptr<Expression> Apply(CaptureExpression* expression, MergeParameter* target) override
			{
				auto result = Ptr(new CaptureExpression);
				result->expression = Invoke(expression->expression, target);
				result->name = expression->name;
				return result;
			}

			Ptr<Expression> Apply(MatchExpression* expression, MergeParameter* target) override
			{
				auto result = Ptr(new MatchExpression);
				result->name = expression->name;
				result->index = expression->index;
				return result;
			}

			Ptr<Expression> Apply(PositiveExpression* expression, MergeParameter* target) override
			{
				auto result = Ptr(new PositiveExpression);
				result->expression = Invoke(expression->expression, target);
				return result;
			}

			Ptr<Expression> Apply(NegativeExpression* expression, MergeParameter* target) override
			{
				auto result = Ptr(new NegativeExpression);
				result->expression = Invoke(expression->expression, target);
				return result;
			}

			Ptr<Expression> Apply(UsingExpression* expression, MergeParameter* target) override
			{
				if (target->definitions.Keys().Contains(expression->name))
				{
					Ptr<Expression> reference = target->definitions[expression->name];
					if (reference)
					{
						return reference;
					}
					else
					{
						throw ArgumentException(L"Regular expression syntax error: Found reference loops in\"" + u32tow(expression->name) + L"\".", L"vl::regex_internal::RegexExpression::Merge", L"");
					}
				}
				else if (target->regex->definitions.Keys().Contains(expression->name))
				{
					target->definitions.Add(expression->name, nullptr);
					Ptr<Expression> result = Invoke(target->regex->definitions[expression->name], target);
					target->definitions.Set(expression->name, result);
					return result;
				}
				else
				{
					throw ArgumentException(L"Regular expression syntax error: Cannot find sub expression reference\"" + u32tow(expression->name) + L"\".", L"vl::regex_internal::RegexExpression::Merge", L"");
				}
			}
		};

/***********************************************************************
CharSetExpression
***********************************************************************/

		bool CharSetExpression::AddRangeWithConflict(CharRange range)
		{
			if (range.begin > range.end)
			{
				char32_t t = range.begin;
				range.begin = range.end;
				range.end = t;
			}
			// TODO: (enumerable) foreach
			for (vint i = 0; i < ranges.Count(); i++)
			{
				if (!(range<ranges[i] || range>ranges[i]))
				{
					return false;
				}
			}
			ranges.Add(range);
			return true;
		}

/***********************************************************************
RegexExpression
***********************************************************************/

		Ptr<Expression> RegexExpression::Merge()
		{
			MergeParameter merge;
			merge.regex = this;
			return MergeAlgorithm().Invoke(expression, &merge);
		}

/***********************************************************************
Expression::Apply
***********************************************************************/

		void CharSetExpression::Apply(IRegexExpressionAlgorithm& algorithm)
		{
			algorithm.Visit(this);
		}

		void LoopExpression::Apply(IRegexExpressionAlgorithm& algorithm)
		{
			algorithm.Visit(this);
		}

		void SequenceExpression::Apply(IRegexExpressionAlgorithm& algorithm)
		{
			algorithm.Visit(this);
		}

		void AlternateExpression::Apply(IRegexExpressionAlgorithm& algorithm)
		{
			algorithm.Visit(this);
		}

		void BeginExpression::Apply(IRegexExpressionAlgorithm& algorithm)
		{
			algorithm.Visit(this);
		}

		void EndExpression::Apply(IRegexExpressionAlgorithm& algorithm)
		{
			algorithm.Visit(this);
		}

		void CaptureExpression::Apply(IRegexExpressionAlgorithm& algorithm)
		{
			algorithm.Visit(this);
		}

		void MatchExpression::Apply(IRegexExpressionAlgorithm& algorithm)
		{
			algorithm.Visit(this);
		}

		void PositiveExpression::Apply(IRegexExpressionAlgorithm& algorithm)
		{
			algorithm.Visit(this);
		}

		void NegativeExpression::Apply(IRegexExpressionAlgorithm& algorithm)
		{
			algorithm.Visit(this);
		}

		void UsingExpression::Apply(IRegexExpressionAlgorithm& algorithm)
		{
			algorithm.Visit(this);
		}
	}
}

/***********************************************************************
.\AST\REGEXEXPRESSION_CANTREATASPURE.CPP
***********************************************************************/
/***********************************************************************
Author: Zihan Chen (vczh)
Licensed under https://github.com/vczh-libraries/License
***********************************************************************/


namespace vl
{
	namespace regex_internal
	{

/***********************************************************************
CanTreatAsPureAlgorithm
***********************************************************************/

		class CanTreatAsPureAlgorithm : public RegexExpressionAlgorithm<bool, void*>
		{
		public:
			bool Apply(CharSetExpression* expression, void* target) override
			{
				return true;
			}

			bool Apply(LoopExpression* expression, void* target) override
			{
				return expression->preferLong && Invoke(expression->expression, 0);
			}

			bool Apply(SequenceExpression* expression, void* target) override
			{
				return Invoke(expression->left, 0) && Invoke(expression->right, 0);
			}

			bool Apply(AlternateExpression* expression, void* target) override
			{
				return Invoke(expression->left, 0) && Invoke(expression->right, 0);
			}

			bool Apply(BeginExpression* expression, void* target) override
			{
				return false;
			}

			bool Apply(EndExpression* expression, void* target) override
			{
				return false;
			}

			bool Apply(CaptureExpression* expression, void* target) override
			{
				return Invoke(expression->expression, 0);
			}

			bool Apply(MatchExpression* expression, void* target) override
			{
				return false;
			}

			bool Apply(PositiveExpression* expression, void* target) override
			{
				return false;
			}

			bool Apply(NegativeExpression* expression, void* target) override
			{
				return false;
			}

			bool Apply(UsingExpression* expression, void* target) override
			{
				return false;
			}
		};

/***********************************************************************
Expression
***********************************************************************/

		bool Expression::CanTreatAsPure()
		{
			return CanTreatAsPureAlgorithm().Invoke(this, 0);
		}
	}
}

/***********************************************************************
.\AST\REGEXEXPRESSION_CHARSET.CPP
***********************************************************************/
/***********************************************************************
Author: Zihan Chen (vczh)
Licensed under https://github.com/vczh-libraries/License
***********************************************************************/


namespace vl
{
	namespace regex_internal
	{
		class NormalizedCharSet
		{
		public:
			CharRange::List			ranges;
		};

/***********************************************************************
CharSetAlgorithm
***********************************************************************/

		class CharSetAlgorithm : public RegexExpressionAlgorithm<void, NormalizedCharSet*>
		{
		public:
			virtual void Process(CharSetExpression* expression, NormalizedCharSet* target, CharRange range) = 0;

			void Loop(CharSetExpression* expression, CharRange::List& ranges, NormalizedCharSet* target)
			{
				if (expression->reverse)
				{
					char32_t begin = 1;
					// TODO: (enumerable) foreach
					for (vint i = 0; i < ranges.Count(); i++)
					{
						CharRange range = ranges[i];
						if (range.begin > begin)
						{
							Process(expression, target, CharRange(begin, range.begin - 1));
						}
						begin = range.end + 1;
					}
					if (begin <= MaxChar32)
					{
						Process(expression, target, CharRange(begin, MaxChar32));
					}
				}
				else
				{
					// TODO: (enumerable) foreach
					for (vint i = 0; i < ranges.Count(); i++)
					{
						Process(expression, target, ranges[i]);
					}
				}
			}

			void Apply(LoopExpression* expression, NormalizedCharSet* target) override
			{
				Invoke(expression->expression, target);
			}

			void Apply(SequenceExpression* expression, NormalizedCharSet* target) override
			{
				Invoke(expression->left, target);
				Invoke(expression->right, target);
			}

			void Apply(AlternateExpression* expression, NormalizedCharSet* target) override
			{
				Invoke(expression->left, target);
				Invoke(expression->right, target);
			}

			void Apply(BeginExpression* expression, NormalizedCharSet* target) override
			{
			}

			void Apply(EndExpression* expression, NormalizedCharSet* target) override
			{
			}

			void Apply(CaptureExpression* expression, NormalizedCharSet* target) override
			{
				Invoke(expression->expression, target);
			}

			void Apply(MatchExpression* expression, NormalizedCharSet* target) override
			{
			}

			void Apply(PositiveExpression* expression, NormalizedCharSet* target) override
			{
				Invoke(expression->expression, target);
			}

			void Apply(NegativeExpression* expression, NormalizedCharSet* target) override
			{
				Invoke(expression->expression, target);
			}

			void Apply(UsingExpression* expression, NormalizedCharSet* target) override
			{
			}
		};

/***********************************************************************
BuildNormalizedCharSetAlgorithm
***********************************************************************/

		class BuildNormalizedCharSetAlgorithm : public CharSetAlgorithm
		{
		public:
			void Process(CharSetExpression* expression, NormalizedCharSet* target, CharRange range)
			{
				vint index = 0;
				while (index < target->ranges.Count())
				{
					CharRange current = target->ranges[index];
					if (current<range || current>range)
					{
						index++;
					}
					else if (current.begin < range.begin)
					{
						// range   :    [    ?
						// current : [       ]
						target->ranges.RemoveAt(index);
						target->ranges.Add(CharRange(current.begin, range.begin - 1));
						target->ranges.Add(CharRange(range.begin, current.end));
						index++;
					}
					else if (current.begin > range.begin)
					{
						// range  :  [       ]
						// current  :   [    ?
						target->ranges.Add(CharRange(range.begin, current.begin - 1));
						range.begin = current.begin;
					}
					else if (current.end < range.end)
					{
						// range   : [       ]
						// current : [    ]
						range.begin = current.end + 1;
						index++;
					}
					else if (current.end > range.end)
					{
						// range   : [    ]
						// current : [       ]
						target->ranges.RemoveAt(index);
						target->ranges.Add(range);
						target->ranges.Add(CharRange(range.end + 1, current.end));
						return;
					}
					else
					{
						// range   : [       ]
						// current : [       ]
						return;
					}
				}
				target->ranges.Add(range);
			}

			void Apply(CharSetExpression* expression, NormalizedCharSet* target)
			{
				Loop(expression, expression->ranges, target);
			}
		};

/***********************************************************************
SetNormalizedCharSetAlgorithm
***********************************************************************/

		class SetNormalizedCharSetAlgorithm : public CharSetAlgorithm
		{
		public:
			void Process(CharSetExpression* expression, NormalizedCharSet* target, CharRange range)
			{
				// TODO: (enumerable) foreach
				for (vint j = 0; j < target->ranges.Count(); j++)
				{
					CharRange targetRange = target->ranges[j];
					if (range.begin <= targetRange.begin && targetRange.end <= range.end)
					{
						expression->ranges.Add(targetRange);
					}
				}
			}

			void Apply(CharSetExpression* expression, NormalizedCharSet* target)
			{
				CharRange::List source;
				CopyFrom(source, expression->ranges);
				expression->ranges.Clear();
				Loop(expression, source, target);
				expression->reverse = false;
			}
		};

/***********************************************************************
Expression
***********************************************************************/

		void Expression::NormalizeCharSet(CharRange::List& subsets)
		{
			NormalizedCharSet normalized;
			BuildNormalizedCharSetAlgorithm().Invoke(this, &normalized);
			SetNormalizedCharSetAlgorithm().Invoke(this, &normalized);
			CopyFrom(subsets, normalized.ranges);
		}

		void Expression::CollectCharSet(CharRange::List& subsets)
		{
			NormalizedCharSet normalized;
			CopyFrom(normalized.ranges, subsets);
			BuildNormalizedCharSetAlgorithm().Invoke(this, &normalized);
			CopyFrom(subsets, normalized.ranges);
		}

		void Expression::ApplyCharSet(CharRange::List& subsets)
		{
			NormalizedCharSet normalized;
			CopyFrom(normalized.ranges, subsets);
			SetNormalizedCharSetAlgorithm().Invoke(this, &normalized);
		}
	}
}

/***********************************************************************
.\AST\REGEXEXPRESSION_GENERATEEPSILONNFA.CPP
***********************************************************************/
/***********************************************************************
Author: Zihan Chen (vczh)
Licensed under https://github.com/vczh-libraries/License
***********************************************************************/


namespace vl
{
	namespace regex_internal
	{

/***********************************************************************
EpsilonNfaAlgorithm
***********************************************************************/

		class EpsilonNfaInfo
		{
		public:
			Ptr<Automaton>		automaton;
		};

		class EpsilonNfa
		{
		public:
			State* start;
			State* end;

			EpsilonNfa()
			{
				start = 0;
				end = 0;
			}
		};

		class EpsilonNfaAlgorithm : public RegexExpressionAlgorithm<EpsilonNfa, Automaton*>
		{
		public:
			EpsilonNfa Connect(EpsilonNfa a, EpsilonNfa b, Automaton* target)
			{
				if (a.start)
				{
					target->NewEpsilon(a.end, b.start);
					a.end = b.end;
					return a;
				}
				else
				{
					return b;
				}
			}

			EpsilonNfa Apply(CharSetExpression* expression, Automaton* target) override
			{
				EpsilonNfa nfa;
				nfa.start = target->NewState();
				nfa.end = target->NewState();
				// TODO: (enumerable) foreach
				for (vint i = 0; i < expression->ranges.Count(); i++)
				{
					target->NewChars(nfa.start, nfa.end, expression->ranges[i]);
				}
				return nfa;
			}

			EpsilonNfa Apply(LoopExpression* expression, Automaton* target) override
			{
				EpsilonNfa head;
				for (vint i = 0; i < expression->min; i++)
				{
					EpsilonNfa body = Invoke(expression->expression, target);
					head = Connect(head, body, target);
				}
				if (expression->max == -1)
				{
					EpsilonNfa body = Invoke(expression->expression, target);
					if (!head.start)
					{
						head.start = head.end = target->NewState();
					}
					State* loopBegin = head.end;
					State* loopEnd = target->NewState();
					if (expression->preferLong)
					{
						target->NewEpsilon(loopBegin, body.start);
						target->NewEpsilon(body.end, loopBegin);
						target->NewNop(loopBegin, loopEnd);
					}
					else
					{
						target->NewNop(loopBegin, loopEnd);
						target->NewEpsilon(loopBegin, body.start);
						target->NewEpsilon(body.end, loopBegin);
					}
					head.end = loopEnd;
				}
				else if (expression->max > expression->min)
				{
					for (vint i = expression->min; i < expression->max; i++)
					{
						EpsilonNfa body = Invoke(expression->expression, target);
						State* start = target->NewState();
						State* end = target->NewState();
						if (expression->preferLong)
						{
							target->NewEpsilon(start, body.start);
							target->NewEpsilon(body.end, end);
							target->NewNop(start, end);
						}
						else
						{
							target->NewNop(start, end);
							target->NewEpsilon(start, body.start);
							target->NewEpsilon(body.end, end);
						}
						body.start = start;
						body.end = end;
						head = Connect(head, body, target);
					}
				}
				return head;
			}

			EpsilonNfa Apply(SequenceExpression* expression, Automaton* target) override
			{
				EpsilonNfa a = Invoke(expression->left, target);
				EpsilonNfa b = Invoke(expression->right, target);
				return Connect(a, b, target);
			}

			EpsilonNfa Apply(AlternateExpression* expression, Automaton* target) override
			{
				EpsilonNfa result;
				result.start = target->NewState();
				result.end = target->NewState();
				EpsilonNfa a = Invoke(expression->left, target);
				EpsilonNfa b = Invoke(expression->right, target);
				target->NewEpsilon(result.start, a.start);
				target->NewEpsilon(a.end, result.end);
				target->NewEpsilon(result.start, b.start);
				target->NewEpsilon(b.end, result.end);
				return result;
			}

			EpsilonNfa Apply(BeginExpression* expression, Automaton* target) override
			{
				EpsilonNfa result;
				result.start = target->NewState();
				result.end = target->NewState();
				target->NewBeginString(result.start, result.end);
				return result;
			}

			EpsilonNfa Apply(EndExpression* expression, Automaton* target) override
			{
				EpsilonNfa result;
				result.start = target->NewState();
				result.end = target->NewState();
				target->NewEndString(result.start, result.end);
				return result;
			}

			EpsilonNfa Apply(CaptureExpression* expression, Automaton* target) override
			{
				EpsilonNfa result;
				result.start = target->NewState();
				result.end = target->NewState();

				vint capture = -1;
				if (expression->name != U32String::Empty)
				{
					capture = target->captureNames.IndexOf(expression->name);
					if (capture == -1)
					{
						capture = target->captureNames.Count();
						target->captureNames.Add(expression->name);
					}
				}

				EpsilonNfa body = Invoke(expression->expression, target);
				target->NewCapture(result.start, body.start, capture);
				target->NewEnd(body.end, result.end);
				return result;
			}

			EpsilonNfa Apply(MatchExpression* expression, Automaton* target) override
			{
				vint capture = -1;
				if (expression->name != U32String::Empty)
				{
					capture = target->captureNames.IndexOf(expression->name);
					if (capture == -1)
					{
						capture = target->captureNames.Count();
						target->captureNames.Add(expression->name);
					}
				}
				EpsilonNfa result;
				result.start = target->NewState();
				result.end = target->NewState();
				target->NewMatch(result.start, result.end, capture, expression->index);
				return result;
			}

			EpsilonNfa Apply(PositiveExpression* expression, Automaton* target) override
			{
				EpsilonNfa result;
				result.start = target->NewState();
				result.end = target->NewState();
				EpsilonNfa body = Invoke(expression->expression, target);
				target->NewPositive(result.start, body.start);
				target->NewEnd(body.end, result.end);
				return result;
			}

			EpsilonNfa Apply(NegativeExpression* expression, Automaton* target) override
			{
				EpsilonNfa result;
				result.start = target->NewState();
				result.end = target->NewState();
				EpsilonNfa body = Invoke(expression->expression, target);
				target->NewNegative(result.start, body.start);
				target->NewEnd(body.end, result.end);
				target->NewNegativeFail(result.start, result.end);
				return result;
			}

			EpsilonNfa Apply(UsingExpression* expression, Automaton* target) override
			{
				CHECK_FAIL(L"RegexExpression::GenerateEpsilonNfa()#UsingExpression cannot create state machine.");
			}
		};

/***********************************************************************
Expression
***********************************************************************/

		Ptr<Automaton> Expression::GenerateEpsilonNfa()
		{
			auto automaton = Ptr(new Automaton);
			EpsilonNfa result = EpsilonNfaAlgorithm().Invoke(this, automaton.Obj());
			automaton->startState = result.start;
			result.end->finalState = true;
			return automaton;
		}
	}
}

/***********************************************************************
.\AST\REGEXEXPRESSION_HASNOEXTENSION.CPP
***********************************************************************/
/***********************************************************************
Author: Zihan Chen (vczh)
Licensed under https://github.com/vczh-libraries/License
***********************************************************************/


namespace vl
{
	namespace regex_internal
	{

/***********************************************************************
HasNoExtensionAlgorithm
***********************************************************************/

		class HasNoExtensionAlgorithm : public RegexExpressionAlgorithm<bool, void*>
		{
		public:
			bool Apply(CharSetExpression* expression, void* target) override
			{
				return true;
			}

			bool Apply(LoopExpression* expression, void* target) override
			{
				return expression->preferLong && Invoke(expression->expression, 0);
			}

			bool Apply(SequenceExpression* expression, void* target) override
			{
				return Invoke(expression->left, 0) && Invoke(expression->right, 0);
			}

			bool Apply(AlternateExpression* expression, void* target) override
			{
				return Invoke(expression->left, 0) && Invoke(expression->right, 0);
			}

			bool Apply(BeginExpression* expression, void* target) override
			{
				return false;
			}

			bool Apply(EndExpression* expression, void* target) override
			{
				return false;
			}

			bool Apply(CaptureExpression* expression, void* target) override
			{
				return false;
			}

			bool Apply(MatchExpression* expression, void* target) override
			{
				return false;
			}

			bool Apply(PositiveExpression* expression, void* target) override
			{
				return false;
			}

			bool Apply(NegativeExpression* expression, void* target) override
			{
				return false;
			}

			bool Apply(UsingExpression* expression, void* target) override
			{
				return false;
			}
		};

/***********************************************************************
Expression
***********************************************************************/

		bool Expression::HasNoExtension()
		{
			return HasNoExtensionAlgorithm().Invoke(this, 0);
		}
	}
}

/***********************************************************************
.\AST\REGEXEXPRESSION_ISEQUAL.CPP
***********************************************************************/
/***********************************************************************
Author: Zihan Chen (vczh)
Licensed under https://github.com/vczh-libraries/License
***********************************************************************/


namespace vl
{
	namespace regex_internal
	{

/***********************************************************************
IsEqualAlgorithm
***********************************************************************/

		class IsEqualAlgorithm : public RegexExpressionAlgorithm<bool, Expression*>
		{
		public:
			bool Apply(CharSetExpression* expression, Expression* target) override
			{
				CharSetExpression* expected = dynamic_cast<CharSetExpression*>(target);
				if (expected)
				{
					if (expression->reverse != expected->reverse)return false;
					if (expression->ranges.Count() != expected->ranges.Count())return false;
					// TODO: (enumerable) foreach:indexed
					for (vint i = 0; i < expression->ranges.Count(); i++)
					{
						if (expression->ranges[i] != expected->ranges[i])return false;
					}
					return true;
				}
				return false;
			}

			bool Apply(LoopExpression* expression, Expression* target) override
			{
				LoopExpression* expected = dynamic_cast<LoopExpression*>(target);
				if (expected)
				{
					if (expression->min != expected->min)return false;
					if (expression->max != expected->max)return false;
					if (expression->preferLong != expected->preferLong)return false;
					if (!Invoke(expression->expression, expected->expression.Obj()))return false;
					return true;
				}
				return false;
			}

			bool Apply(SequenceExpression* expression, Expression* target) override
			{
				SequenceExpression* expected = dynamic_cast<SequenceExpression*>(target);
				if (expected)
				{
					if (!Invoke(expression->left, expected->left.Obj()))return false;
					if (!Invoke(expression->right, expected->right.Obj()))return false;
					return true;
				}
				return false;
			}

			bool Apply(AlternateExpression* expression, Expression* target) override
			{
				AlternateExpression* expected = dynamic_cast<AlternateExpression*>(target);
				if (expected)
				{
					if (!Invoke(expression->left, expected->left.Obj()))return false;
					if (!Invoke(expression->right, expected->right.Obj()))return false;
					return true;
				}
				return false;
			}

			bool Apply(BeginExpression* expression, Expression* target) override
			{
				BeginExpression* expected = dynamic_cast<BeginExpression*>(target);
				if (expected)
				{
					return true;
				}
				return false;
			}

			bool Apply(EndExpression* expression, Expression* target) override
			{
				EndExpression* expected = dynamic_cast<EndExpression*>(target);
				if (expected)
				{
					return true;
				}
				return false;
			}

			bool Apply(CaptureExpression* expression, Expression* target) override
			{
				CaptureExpression* expected = dynamic_cast<CaptureExpression*>(target);
				if (expected)
				{
					if (expression->name != expected->name)return false;
					if (!Invoke(expression->expression, expected->expression.Obj()))return false;
					return true;
				}
				return false;
			}

			bool Apply(MatchExpression* expression, Expression* target) override
			{
				MatchExpression* expected = dynamic_cast<MatchExpression*>(target);
				if (expected)
				{
					if (expression->name != expected->name)return false;
					if (expression->index != expected->index)return false;
					return true;
				}
				return false;
			}

			bool Apply(PositiveExpression* expression, Expression* target) override
			{
				PositiveExpression* expected = dynamic_cast<PositiveExpression*>(target);
				if (expected)
				{
					if (!Invoke(expression->expression, expected->expression.Obj()))return false;
					return true;
				}
				return false;
			}

			bool Apply(NegativeExpression* expression, Expression* target) override
			{
				NegativeExpression* expected = dynamic_cast<NegativeExpression*>(target);
				if (expected)
				{
					if (!Invoke(expression->expression, expected->expression.Obj()))return false;
					return true;
				}
				return false;
			}

			bool Apply(UsingExpression* expression, Expression* target) override
			{
				UsingExpression* expected = dynamic_cast<UsingExpression*>(target);
				if (expected)
				{
					if (expression->name != expected->name)return false;
					return true;
				}
				return false;
			}
		};

/***********************************************************************
Expression
***********************************************************************/

		bool Expression::IsEqual(vl::regex_internal::Expression* expression)
		{
			return IsEqualAlgorithm().Invoke(this, expression);
		}
	}
}

/***********************************************************************
.\AST\REGEXPARSER.CPP
***********************************************************************/
/***********************************************************************
Author: Zihan Chen (vczh)
Licensed under https://github.com/vczh-libraries/License
***********************************************************************/


namespace vl
{
	namespace regex_internal
	{

/***********************************************************************
Helper Functions
***********************************************************************/

		bool IsChar(const char32_t*& input, char32_t c)
		{
			if (*input == c)
			{
				input++;
				return true;
			}
			else
			{
				return false;
			}
		}

		template<vint Size>
		bool IsChars(const char32_t*& input, const char32_t(&chars)[Size])
		{
			for (char32_t c : chars)
			{
				if (*input == c)
				{
					input++;
					return true;
				}
			}
			return false;
		}

		template<vint Size>
		bool IsStr(const char32_t*& input, const char32_t(&str)[Size])
		{
			for (vint i = 0; i < Size - 1; i++)
			{
				if (input[i] != str[i]) return false;
			}
			input += Size - 1;
			return true;
		}

		bool IsPositiveInteger(const char32_t*& input, vint& number)
		{
			bool readed = false;
			number = 0;
			while (U'0' <= *input && *input <= U'9')
			{
				number = number * 10 + (*input++) - U'0';
				readed = true;
			}
			return readed;
		}

		bool IsName(const char32_t*& input, U32String& name)
		{
			const char32_t* read = input;
			if ((U'A' <= *read && *read <= U'Z') || (U'a' <= *read && *read <= U'z') || *read == U'_')
			{
				read++;
				while ((U'A' <= *read && *read <= U'Z') || (U'a' <= *read && *read <= U'z') || (U'0' <= *read && *read <= U'9') || *read == U'_')
				{
					read++;
				}
			}
			if (input == read)
			{
				return false;
			}
			else
			{
				name = U32String::CopyFrom(input, vint(read - input));
				input = read;
				return true;
			}
		}

		Ptr<LoopExpression> ParseLoop(const char32_t*& input)
		{
			vint min = 0;
			vint max = 0;
			if (!*input)
			{
				return 0;
			}
			else if (IsChar(input, U'+'))
			{
				min = 1;
				max = -1;
			}
			else if (IsChar(input, U'*'))
			{
				min = 0;
				max = -1;
			}
			else if (IsChar(input, U'?'))
			{
				min = 0;
				max = 1;
			}
			else if (IsChar(input, U'{'))
			{
				if (IsPositiveInteger(input, min))
				{
					if (IsChar(input, U','))
					{
						if (!IsPositiveInteger(input, max))
						{
							max = -1;
						}
					}
					else
					{
						max = min;
					}
					if (!IsChar(input, U'}'))
					{
						goto THROW_EXCEPTION;
					}
				}
				else
				{
					goto THROW_EXCEPTION;
				}
			}
			else
			{
				return 0;
			}

			{
				auto expression = Ptr(new LoopExpression);
				expression->min = min;
				expression->max = max;
				expression->preferLong = !IsChar(input, U'?');
				return expression;
			}
		THROW_EXCEPTION:
			throw ArgumentException(L"Regular expression syntax error: Illegal loop expression.", L"vl::regex_internal::ParseLoop", L"input");
		}

		Ptr<Expression> ParseCharSet(const char32_t*& input)
		{
			if (!*input)
			{
				return 0;
			}
			else if (IsChar(input, U'^'))
			{
				return Ptr(new BeginExpression);
			}
			else if (IsChar(input, U'$'))
			{
				return Ptr(new EndExpression);
			}
			else if (IsChar(input, U'\\') || IsChar(input, U'/'))
			{
				auto expression = Ptr(new CharSetExpression);
				expression->reverse = false;
				switch (*input)
				{
				case U'.':
					expression->ranges.Add(CharRange(1, MaxChar32));
					break;
				case U'r':
					expression->ranges.Add(CharRange(U'\r', U'\r'));
					break;
				case U'n':
					expression->ranges.Add(CharRange(U'\n', U'\n'));
					break;
				case U't':
					expression->ranges.Add(CharRange(U'\t', U'\t'));
					break;
				case U'\\':case U'/':case U'(':case U')':case U'+':case U'*':case U'?':case U'|':
				case U'{':case U'}':case U'[':case U']':case U'<':case U'>':
				case U'^':case U'$':case U'!':case U'=':
					expression->ranges.Add(CharRange(*input, *input));
					break;
				case U'S':
					expression->reverse = true;
				case U's':
					expression->ranges.Add(CharRange(U' ', U' '));
					expression->ranges.Add(CharRange(U'\r', U'\r'));
					expression->ranges.Add(CharRange(U'\n', U'\n'));
					expression->ranges.Add(CharRange(U'\t', U'\t'));
					break;
				case U'D':
					expression->reverse = true;
				case U'd':
					expression->ranges.Add(CharRange(U'0', U'9'));
					break;
				case U'L':
					expression->reverse = true;
				case U'l':
					expression->ranges.Add(CharRange(U'_', U'_'));
					expression->ranges.Add(CharRange(U'A', U'Z'));
					expression->ranges.Add(CharRange(U'a', U'z'));
					break;
				case U'W':
					expression->reverse = true;
				case U'w':
					expression->ranges.Add(CharRange(U'_', U'_'));
					expression->ranges.Add(CharRange(U'0', U'9'));
					expression->ranges.Add(CharRange(U'A', U'Z'));
					expression->ranges.Add(CharRange(U'a', U'z'));
					break;
				default:
					throw ArgumentException(L"Regular expression syntax error: Illegal character escaping.", L"vl::regex_internal::ParseCharSet", L"input");
				}
				input++;
				return expression;
			}
			else if (IsChar(input, U'['))
			{
				auto expression = Ptr(new CharSetExpression);
				if (IsChar(input, U'^'))
				{
					expression->reverse = true;
				}
				else
				{
					expression->reverse = false;
				}
				bool midState = false;
				char32_t a = U'\0';
				char32_t b = U'\0';
				while (true)
				{
					if (IsChar(input, U'\\') || IsChar(input, U'/'))
					{
						char32_t c = U'\0';
						switch (*input)
						{
						case U'r':
							c = U'\r';
							break;
						case U'n':
							c = U'\n';
							break;
						case U't':
							c = U'\t';
							break;
						case U'-':case U'[':case U']':case U'\\':case U'/':case U'^':case U'$':
							c = *input;
							break;
						default:
							throw ArgumentException(L"Regular expression syntax error: Illegal character escaping, only \"rnt-[]\\/\" are legal escaped characters in [].", L"vl::regex_internal::ParseCharSet", L"input");
						}
						input++;
						midState ? b = c : a = c;
						midState = !midState;
					}
					else if (IsChars(input, U"-]"))
					{
						goto THROW_EXCEPTION;
					}
					else if (*input)
					{
						midState ? b = *input++ : a = *input++;
						midState = !midState;
					}
					else
					{
						goto THROW_EXCEPTION;
					}
					if (IsChar(input, U']'))
					{
						if (midState)
						{
							b = a;
						}
						if (!expression->AddRangeWithConflict(CharRange(a, b)))
						{
							goto THROW_EXCEPTION;
						}
						break;
					}
					else if (IsChar(input, U'-'))
					{
						if (!midState)
						{
							goto THROW_EXCEPTION;
						}
					}
					else
					{
						if (midState)
						{
							b = a;
						}
						if (expression->AddRangeWithConflict(CharRange(a, b)))
						{
							midState = false;
						}
						else
						{
							goto THROW_EXCEPTION;
						}
					}
				}
				return expression;
			THROW_EXCEPTION:
				throw ArgumentException(L"Regular expression syntax error: Illegal character set definition.");
			}
			else if (IsChars(input, U"()+*?{}|"))
			{
				input--;
				return 0;
			}
			else
			{
				auto expression = Ptr(new CharSetExpression);
				expression->reverse = false;
				expression->ranges.Add(CharRange(*input, *input));
				input++;
				return expression;
			}
		}

		Ptr<Expression> ParseFunction(const char32_t*& input)
		{
			if (IsStr(input, U"(="))
			{
				Ptr<Expression> sub = ParseExpression(input);
				if (!IsChar(input, U')'))
				{
					goto NEED_RIGHT_BRACKET;
				}
				auto expression = Ptr(new PositiveExpression);
				expression->expression = sub;
				return expression;
			}
			else if (IsStr(input, U"(!"))
			{
				Ptr<Expression> sub = ParseExpression(input);
				if (!IsChar(input, U')'))
				{
					goto NEED_RIGHT_BRACKET;
				}
				auto expression = Ptr(new NegativeExpression);
				expression->expression = sub;
				return expression;
			}
			else if (IsStr(input, U"(<&"))
			{
				U32String name;
				if (!IsName(input, name))
				{
					goto NEED_NAME;
				}
				if (!IsChar(input, U'>'))
				{
					goto NEED_GREATER;
				}
				if (!IsChar(input, U')'))
				{
					goto NEED_RIGHT_BRACKET;
				}
				auto expression = Ptr(new UsingExpression);
				expression->name = name;
				return expression;
			}
			else if (IsStr(input, U"(<$"))
			{
				U32String name;
				vint index = -1;
				if (IsName(input, name))
				{
					if (IsChar(input, U';'))
					{
						if (!IsPositiveInteger(input, index))
						{
							goto NEED_NUMBER;
						}
					}
				}
				else if (!IsPositiveInteger(input, index))
				{
					goto NEED_NUMBER;
				}
				if (!IsChar(input, U'>'))
				{
					goto NEED_GREATER;
				}
				if (!IsChar(input, U')'))
				{
					goto NEED_RIGHT_BRACKET;
				}
				auto expression = Ptr(new MatchExpression);
				expression->name = name;
				expression->index = index;
				return expression;
			}
			else if (IsStr(input, U"(<"))
			{
				U32String name;
				if (!IsName(input, name))
				{
					goto NEED_NAME;
				}
				if (!IsChar(input, U'>'))
				{
					goto NEED_GREATER;
				}
				auto sub = ParseExpression(input);
				if (!IsChar(input, U')'))
				{
					goto NEED_RIGHT_BRACKET;
				}
				auto expression = Ptr(new CaptureExpression);
				expression->name = name;
				expression->expression = sub;
				return expression;
			}
			else if (IsStr(input, U"(?"))
			{
				auto sub = ParseExpression(input);
				if (!IsChar(input, U')'))
				{
					goto NEED_RIGHT_BRACKET;
				}
				auto expression = Ptr(new CaptureExpression);
				expression->expression = sub;
				return expression;
			}
			else if (IsChar(input, U'('))
			{
				auto sub = ParseExpression(input);
				if (!IsChar(input, U')'))
				{
					goto NEED_RIGHT_BRACKET;
				}
				return sub;
			}
			else
			{
				return 0;
			}
		NEED_RIGHT_BRACKET:
			throw ArgumentException(L"Regular expression syntax error: \")\" expected.", L"vl::regex_internal::ParseFunction", L"input");
		NEED_GREATER:
			throw ArgumentException(L"Regular expression syntax error: \">\" expected.", L"vl::regex_internal::ParseFunction", L"input");
		NEED_NAME:
			throw ArgumentException(L"Regular expression syntax error: Identifier expected.", L"vl::regex_internal::ParseFunction", L"input");
		NEED_NUMBER:
			throw ArgumentException(L"Regular expression syntax error: Number expected.", L"vl::regex_internal::ParseFunction", L"input");
		}

		Ptr<Expression> ParseUnit(const char32_t*& input)
		{
			Ptr<Expression> unit = ParseCharSet(input);
			if (!unit)
			{
				unit = ParseFunction(input);
			}
			if (!unit)
			{
				return 0;
			}
			Ptr<LoopExpression> loop;
			while ((loop = ParseLoop(input)))
			{
				loop->expression = unit;
				unit = loop;
			}
			return unit;
		}

		Ptr<Expression> ParseJoin(const char32_t*& input)
		{
			auto expression = ParseUnit(input);
			while (true)
			{
				auto right = ParseUnit(input);
				if (right)
				{
					auto sequence = Ptr(new SequenceExpression);
					sequence->left = expression;
					sequence->right = right;
					expression = sequence;
				}
				else
				{
					break;
				}
			}
			return expression;
		}

		Ptr<Expression> ParseAlt(const char32_t*& input)
		{
			auto expression = ParseJoin(input);
			while (true)
			{
				if (IsChar(input, U'|'))
				{
					auto right = ParseJoin(input);
					if (right)
					{
						auto alternate = Ptr(new AlternateExpression);
						alternate->left = expression;
						alternate->right = right;
						expression = alternate;
					}
					else
					{
						throw ArgumentException(L"Regular expression syntax error: Expression expected.", L"vl::regex_internal::ParseAlt", L"input");
					}
				}
				else
				{
					break;
				}
			}
			return expression;
		}

		Ptr<Expression> ParseExpression(const char32_t*& input)
		{
			return ParseAlt(input);
		}

		Ptr<RegexExpression> ParseRegexExpression(const U32String& code)
		{
			auto regex = Ptr(new RegexExpression);
			const char32_t* start = code.Buffer();
			const char32_t* input = start;
			try
			{
				while (IsStr(input, U"(<#"))
				{
					U32String name;
					if (!IsName(input, name))
					{
						throw ArgumentException(L"Regular expression syntax error: Identifier expected.", L"vl::regex_internal::ParseRegexExpression", L"code");
					}
					if (!IsChar(input, U'>'))
					{
						throw ArgumentException(L"Regular expression syntax error: \">\" expected.", L"vl::regex_internal::ParseFunction", L"input");
					}
					Ptr<Expression> sub = ParseExpression(input);
					if (!IsChar(input, U')'))
					{
						throw ArgumentException(L"Regular expression syntax error: \")\" expected.", L"vl::regex_internal::ParseFunction", L"input");
					}
					if (regex->definitions.Keys().Contains(name))
					{
						throw ArgumentException(L"Regular expression syntax error: Found duplicated sub expression name: \"" + u32tow(name) + L"\". ", L"vl::regex_internal::ParseFunction", L"input");
					}
					else
					{
						regex->definitions.Add(name, sub);
					}
				}
				regex->expression = ParseExpression(input);
				if (!regex->expression)
				{
					throw ArgumentException(L"Regular expression syntax error: Expression expected.", L"vl::regex_internal::ParseUnit", L"input");
				}
				if (*input)
				{
					throw ArgumentException(L"Regular expression syntax error: Found unnecessary tokens.", L"vl::regex_internal::ParseUnit", L"input");
				}
				return regex;
			}
			catch (const ArgumentException& e)
			{
				throw RegexException(e.Message(), code, input - start);
			}
		}

		U32String EscapeTextForRegex(const U32String& literalString)
		{
			U32String result;
			for (vint i = 0; i < literalString.Length(); i++)
			{
				char32_t c = literalString[i];
				switch (c)
				{
				case U'\\':case U'/':case U'(':case U')':case U'+':case U'*':case U'?':case U'|':
				case U'{':case U'}':case U'[':case U']':case U'<':case U'>':
				case U'^':case U'$':case U'!':case U'=':
					result += U32String(U"\\") + U32String::FromChar(c);
					break;
				case U'\r':
					result += U"\\r";
					break;
				case U'\n':
					result += U"\\n";
					break;
				case U'\t':
					result += U"\\t";
					break;
				default:
					result += U32String::FromChar(c);
				}
			}
			return result;
		}

		U32String UnescapeTextForRegex(const U32String& escapedText)
		{
			U32String result;
			for (vint i = 0; i < escapedText.Length(); i++)
			{
				char32_t c = escapedText[i];
				if (c == U'\\' || c == U'/')
				{
					if (i < escapedText.Length() - 1)
					{
						i++;
						c = escapedText[i];
						switch (c)
						{
						case U'r':
							result += U"\r";
							break;
						case U'n':
							result += U"\n";
							break;
						case U't':
							result += U"\t";
							break;
						default:
							result += U32String::FromChar(c);
						}
						continue;
					}
				}
				result += U32String::FromChar(c);
			}
			return result;
		}

		U32String NormalizeEscapedTextForRegex(const U32String& escapedText)
		{
			U32String result;
			for (vint i = 0; i < escapedText.Length(); i++)
			{
				char32_t c = escapedText[i];
				if (c == U'\\' || c == U'/')
				{
					if (i < escapedText.Length() - 1)
					{
						i++;
						c = escapedText[i];
						result += U32String(U"\\") + U32String::FromChar(c);
						continue;
					}
				}
				result += U32String::FromChar(c);
			}
			return result;
		}

		bool IsRegexEscapedLiteralString(const U32String& regex)
		{
			for (vint i = 0; i < regex.Length(); i++)
			{
				char32_t c = regex[i];
				if (c == U'\\' || c == U'/')
				{
					i++;
				}
				else
				{
					switch (c)
					{
					case U'\\':case U'/':case U'(':case U')':case U'+':case U'*':case U'?':case U'|':
					case U'{':case U'}':case U'[':case U']':case U'<':case U'>':
					case U'^':case U'$':case U'!':case U'=':
						return false;
					}
				}
			}
			return true;
		}
	}
}

/***********************************************************************
.\AST\REGEXWRITER.CPP
***********************************************************************/
/***********************************************************************
Author: Zihan Chen (vczh)
Licensed under https://github.com/vczh-libraries/License
***********************************************************************/


namespace vl
{
	namespace regex
	{
		using namespace vl::regex_internal;

/***********************************************************************
RegexNode
***********************************************************************/

		RegexNode::RegexNode(Ptr<vl::regex_internal::Expression> _expression)
			:expression(_expression)
		{
		}

		RegexNode RegexNode::Some()const
		{
			return Loop(1, -1);
		}

		RegexNode RegexNode::Any()const
		{
			return Loop(0, -1);
		}

		RegexNode RegexNode::Opt()const
		{
			return Loop(0, 1);
		}

		RegexNode RegexNode::Loop(vint min, vint max)const
		{
			auto target = Ptr(new LoopExpression);
			target->min = min;
			target->max = max;
			target->preferLong = true;
			target->expression = expression;
			return RegexNode(target);
		}

		RegexNode RegexNode::AtLeast(vint min)const
		{
			return Loop(min, -1);
		}

		RegexNode RegexNode::operator+(const RegexNode& node)const
		{
			auto target = Ptr(new SequenceExpression);
			target->left = expression;
			target->right = node.expression;
			return RegexNode(target);
		}

		RegexNode RegexNode::operator|(const RegexNode& node)const
		{
			auto target = Ptr(new AlternateExpression);
			target->left = expression;
			target->right = node.expression;
			return RegexNode(target);
		}

		RegexNode RegexNode::operator+()const
		{
			auto target = Ptr(new PositiveExpression);
			target->expression = expression;
			return RegexNode(target);
		}

		RegexNode RegexNode::operator-()const
		{
			auto target = Ptr(new NegativeExpression);
			target->expression = expression;
			return RegexNode(target);
		}

		RegexNode RegexNode::operator!()const
		{
			auto source = dynamic_cast<CharSetExpression*>(expression.Obj());
			CHECK_ERROR(source, L"RegexNode::operator!()#operator ! can only applies on charset expressions.");
			auto target = Ptr(new CharSetExpression);
			CopyFrom(target->ranges, source->ranges);
			target->reverse = !source->reverse;
			return RegexNode(target);
		}

		RegexNode RegexNode::operator%(const RegexNode& node)const
		{
			auto left = dynamic_cast<CharSetExpression*>(expression.Obj());
			auto right = dynamic_cast<CharSetExpression*>(node.expression.Obj());
			CHECK_ERROR(left && right && !left->reverse && !right->reverse, L"RegexNode::operator%(const RegexNode&)#operator % only connects non-reverse charset expressions.");
			auto target = Ptr(new CharSetExpression);
			target->reverse = false;
			CopyFrom(target->ranges, left->ranges);
			// TODO: (enumerable) foreach
			for (vint i = 0; i < right->ranges.Count(); i++)
			{
				if (!target->AddRangeWithConflict(right->ranges[i]))
				{
					CHECK_ERROR(false, L"RegexNode::operator%(const RegexNode&)#Failed to create charset expression from operator %.");
				}
			}
			return RegexNode(target);
		}

/***********************************************************************
Regex Writer
***********************************************************************/

		RegexNode rCapture(const U32String& name, const RegexNode& node)
		{
			auto target = Ptr(new CaptureExpression);
			target->name = name;
			target->expression = node.expression;
			return RegexNode(target);
		}

		RegexNode rUsing(const U32String& name)
		{
			auto target = Ptr(new UsingExpression);
			target->name = name;
			return RegexNode(target);
		}

		RegexNode rMatch(const U32String& name, vint index)
		{
			auto target = Ptr(new MatchExpression);
			target->name = name;
			target->index = index;
			return RegexNode(target);
		}

		RegexNode rMatch(vint index)
		{
			auto target = Ptr(new MatchExpression);
			target->index = index;
			return RegexNode(target);
		}

		RegexNode rBegin()
		{
			return RegexNode(Ptr(new BeginExpression));
		}

		RegexNode rEnd()
		{
			return RegexNode(Ptr(new EndExpression));
		}

		RegexNode rC(char32_t a, char32_t b)
		{
			if (!b)b = a;
			auto target = Ptr(new CharSetExpression);
			target->reverse = false;
			target->AddRangeWithConflict(CharRange(a, b));
			return RegexNode(target);
		}

		RegexNode r_d()
		{
			return rC(U'0', U'9');
		}

		RegexNode r_l()
		{
			return rC(U'a', U'z') % rC(U'A', U'Z') % rC(U'_');
		}

		RegexNode r_w()
		{
			return rC(U'0', U'9') % rC(U'a', U'z') % rC(U'A', U'Z') % rC(U'_');
		}

		RegexNode rAnyChar()
		{
			return rC(1, MaxChar32);
		}
	}
}

/***********************************************************************
.\AUTOMATON\REGEXAUTOMATON.CPP
***********************************************************************/
/***********************************************************************
Author: Zihan Chen (vczh)
Licensed under https://github.com/vczh-libraries/License
***********************************************************************/


namespace vl
{
	namespace regex_internal
	{
		using namespace collections;

/***********************************************************************
Automaton
***********************************************************************/

		Automaton::Automaton()
		{
			startState = 0;
		}

		State* Automaton::NewState()
		{
			auto state = Ptr(new State);
			state->finalState = false;
			state->userData = 0;
			states.Add(state);
			return state.Obj();
		}

		Transition* Automaton::NewTransition(State* start, State* end)
		{
			auto transition = Ptr(new Transition);
			transition->source = start;
			transition->target = end;
			start->transitions.Add(transition.Obj());
			end->inputs.Add(transition.Obj());
			transitions.Add(transition);
			return transition.Obj();
		}

		Transition* Automaton::NewChars(State* start, State* end, CharRange range)
		{
			auto transition = NewTransition(start, end);
			transition->type = Transition::Chars;
			transition->range = range;
			return transition;
		}

		Transition* Automaton::NewEpsilon(State* start, State* end)
		{
			auto transition = NewTransition(start, end);
			transition->type = Transition::Epsilon;
			return transition;
		}

		Transition* Automaton::NewBeginString(State* start, State* end)
		{
			auto transition = NewTransition(start, end);
			transition->type = Transition::BeginString;
			return transition;
		}

		Transition* Automaton::NewEndString(State* start, State* end)
		{
			auto transition = NewTransition(start, end);
			transition->type = Transition::EndString;
			return transition;
		}

		Transition* Automaton::NewNop(State* start, State* end)
		{
			auto transition = NewTransition(start, end);
			transition->type = Transition::Nop;
			return transition;
		}

		Transition* Automaton::NewCapture(State* start, State* end, vint capture)
		{
			auto transition = NewTransition(start, end);
			transition->type = Transition::Capture;
			transition->capture = capture;
			return transition;
		}

		Transition* Automaton::NewMatch(State* start, State* end, vint capture, vint index)
		{
			auto transition = NewTransition(start, end);
			transition->type = Transition::Match;
			transition->capture = capture;
			transition->index = index;
			return transition;
		}

		Transition* Automaton::NewPositive(State* start, State* end)
		{
			auto transition = NewTransition(start, end);
			transition->type = Transition::Positive;
			return transition;
		}

		Transition* Automaton::NewNegative(State* start, State* end)
		{
			auto transition = NewTransition(start, end);
			transition->type = Transition::Negative;
			return transition;
		}

		Transition* Automaton::NewNegativeFail(State* start, State* end)
		{
			auto transition = NewTransition(start, end);
			transition->type = Transition::NegativeFail;
			return transition;
		}

		Transition* Automaton::NewEnd(State* start, State* end)
		{
			auto transition = NewTransition(start, end);
			transition->type = Transition::End;
			return transition;
		}

/***********************************************************************
Helpers
***********************************************************************/

		bool PureEpsilonChecker(Transition* transition)
		{
			switch (transition->type)
			{
			case Transition::Epsilon:
			case Transition::Nop:
			case Transition::Capture:
			case Transition::End:
				return true;
			default:
				return false;
			}
		}

		bool RichEpsilonChecker(Transition* transition)
		{
			switch (transition->type)
			{
			case Transition::Epsilon:
				return true;
			default:
				return false;
			}
		}

		bool AreEqual(Transition* transA, Transition* transB)
		{
			if (transA->type != transB->type)return false;
			switch (transA->type)
			{
			case Transition::Chars:
				return transA->range == transB->range;
			case Transition::Capture:
				return transA->capture == transB->capture;
			case Transition::Match:
				return transA->capture == transB->capture && transA->index == transB->index;
			default:
				return true;
			}
		}

		// Collect epsilon states and non-epsilon transitions, their order are maintained to match the e-NFA
		void CollectEpsilon(State* targetState, State* sourceState, bool(*epsilonChecker)(Transition*), List<State*>& epsilonStates, List<Transition*>& transitions)
		{
			if (!epsilonStates.Contains(sourceState))
			{
				epsilonStates.Add(sourceState);
				// TODO: (enumerable) foreach:alterable
				for (vint i = 0; i < sourceState->transitions.Count(); i++)
				{
					Transition* transition = sourceState->transitions[i];
					if (epsilonChecker(transition))
					{
						if (!epsilonStates.Contains(transition->target))
						{
							if (transition->target->finalState)
							{
								targetState->finalState = true;
							}
							CollectEpsilon(targetState, transition->target, epsilonChecker, epsilonStates, transitions);
						}
					}
					else
					{
						transitions.Add(transition);
					}
				}
			}
		}

		Ptr<Automaton> EpsilonNfaToNfa(Ptr<Automaton> source, bool(*epsilonChecker)(Transition*), Dictionary<State*, State*>& nfaStateMap)
		{
			auto target = Ptr(new Automaton);
			Dictionary<State*, State*> stateMap;	// source->target
			List<State*> epsilonStates;				// current epsilon closure
			List<Transition*> transitions;			// current non-epsilon transitions

			stateMap.Add(source->startState, target->NewState());
			nfaStateMap.Add(stateMap[source->startState], source->startState);
			target->startState = target->states[0].Obj();
			CopyFrom(target->captureNames, source->captureNames);

			// TODO: (enumerable) foreach
			for (vint i = 0; i < target->states.Count(); i++)
			{
				// Clear cache
				State* targetState = target->states[i].Obj();
				State* sourceState = nfaStateMap[targetState];
				if (sourceState->finalState)
				{
					targetState->finalState = true;
				}
				epsilonStates.Clear();
				transitions.Clear();

				// Collect epsilon states and non-epsilon transitions
				CollectEpsilon(targetState, sourceState, epsilonChecker, epsilonStates, transitions);

				// Iterate through all non-epsilon transitions
				// TODO: (enumerable) foreach
				for (vint j = 0; j < transitions.Count(); j++)
				{
					Transition* transition = transitions[j];
					// Create and map a new target state if a new non-epsilon state is found in the e-NFA
					if (!stateMap.Keys().Contains(transition->target))
					{
						stateMap.Add(transition->target, target->NewState());
						nfaStateMap.Add(stateMap[transition->target], transition->target);
					}
					// Copy transition to connect between two non-epsilon state
					Transition* newTransition = target->NewTransition(targetState, stateMap[transition->target]);
					newTransition->capture = transition->capture;
					newTransition->index = transition->index;
					newTransition->range = transition->range;
					newTransition->type = transition->type;
				}
			}
			return target;
		}

		Ptr<Automaton> NfaToDfa(Ptr<Automaton> source, Group<State*, State*>& dfaStateMap)
		{
			auto target = Ptr(new Automaton);
			CopyFrom(target->captureNames, source->captureNames);
			State* startState = target->NewState();
			target->startState = startState;
			dfaStateMap.Add(startState, source->startState);

			for (auto currentState_ : target->states)
			{
				Group<Transition*, Transition*>			nfaClassToTransitions;
				Dictionary<Transition*, Transition*>	nfaTransitionToClass;
				List<Transition*>						orderedTransitionClasses;

				State* currentState = currentState_.Obj();

				// Iterate through all NFA states which represent the DFA state
				for (auto nfaState : dfaStateMap[currentState])
				{
					// Iterate through all transitions from those NFA states
					for (auto nfaTransition : nfaState->transitions)
					{
						Transition* transitionClass = nullptr;

						// Check if there is any key in nfaTransitions that has the same input as the current transition
						{
							vint index = nfaTransitionToClass.Keys().IndexOf(nfaTransition);
							if (index != -1) transitionClass = nfaTransitionToClass.Values()[index];
						}

						if (transitionClass == nullptr)
						{
							// TODO: (enumerable) foreach
							for (vint l = 0; l < orderedTransitionClasses.Count(); l++)
							{
								Transition* key = orderedTransitionClasses[l];
								if (AreEqual(key, nfaTransition))
								{
									transitionClass = key;
									break;
								}
							}
						}

						// Create a new key if not
						if (transitionClass == nullptr)
						{
							transitionClass = nfaTransition;
							orderedTransitionClasses.Add(transitionClass);
						}
						// Group the transition
						nfaClassToTransitions.Add(transitionClass, nfaTransition);
						nfaTransitionToClass.Add(nfaTransition, transitionClass);
					}
				}

				// Iterate through all key transition that represent all existing transition inputs from the same state
				for (auto transitionClass : orderedTransitionClasses)
				{
					auto&& equivalentTransitions = nfaClassToTransitions[transitionClass];

					// Sort all target states and keep unique
					List<State*> transitionTargets;
					CopyFrom(
						transitionTargets,
						From(equivalentTransitions)
							.Select([](auto t) { return t->target; })
							.Distinct()
						);

					// Check if these NFA states represent a created DFA state
					State* dfaState = 0;
					// TODO: (enumerable) foreach on dictionary
					for (vint k = 0; k < dfaStateMap.Count(); k++)
					{
						// Compare two NFA states set
						if (CompareEnumerable(transitionTargets, dfaStateMap.GetByIndex(k)) == 0)
						{
							dfaState = dfaStateMap.Keys()[k];
						}
					}
					// Create a new DFA state if there is not
					if (!dfaState)
					{
						dfaState = target->NewState();
						// TODO: (enumerable) foreach
						for (vint k = 0; k < transitionTargets.Count(); k++)
						{
							dfaStateMap.Add(dfaState, transitionTargets[k]);
							if (transitionTargets[k]->finalState)
							{
								dfaState->finalState = true;
							}
						}
					}
					// Create corresponding DFA transition
					Transition* newTransition = target->NewTransition(currentState, dfaState);
					newTransition->capture = transitionClass->capture;
					newTransition->index = transitionClass->index;
					newTransition->range = transitionClass->range;
					newTransition->type = transitionClass->type;
				}
			}

			return target;
		}
	}
}