...

2026-05-25 09:17:37 +08:00 · 2018-09-18 01:45:51 -07:00
parent 90c212ee9f
commit df13c62a90
4 changed files with 425 additions and 233 deletions
@@ -17807,25 +17807,30 @@ GuiTextBoxRegexColorizer

 			void GuiTextBoxRegexColorizer::Setup()
 			{
-				if(lexer || tokenRegexes.Count()==0)
+				if (lexer || tokenRegexes.Count() == 0)
 				{
 					colors.Resize(1);
-					colors[0]=defaultColor;
+					colors[0] = defaultColor;
 				}
 				else
 				{
-					lexer=new regex::RegexLexer(tokenRegexes);
-					colors.Resize(1+tokenRegexes.Count()+extraTokenColors.Count());
-					colors[0]=defaultColor;
-					for(vint i=0;i<tokenColors.Count();i++)
 					{
-						colors[i+1]=tokenColors[i];
+						regex::RegexProc proc;
+						proc.colorizeProc = &GuiTextBoxRegexColorizer::ColorizerProc;
+						proc.argument = colorizerArgument;
+						lexer = new regex::RegexLexer(tokenRegexes, proc);
 					}
-					for(vint i=0;i<extraTokenColors.Count();i++)
+					colors.Resize(1 + tokenRegexes.Count() + extraTokenColors.Count());
+					colors[0] = defaultColor;
+					for (vint i = 0; i < tokenColors.Count(); i++)
 					{
-						colors[i+1+tokenColors.Count()]=extraTokenColors[i];
+						colors[i + 1] = tokenColors[i];
 					}
-					colorizer=new regex::RegexLexerColorizer(lexer->Colorize());
+					for (vint i = 0; i < extraTokenColors.Count(); i++)
+					{
+						colors[i + 1 + tokenColors.Count()] = extraTokenColors[i];
+					}
+					colorizer = new regex::RegexLexerColorizer(lexer->Colorize());
 				}
 			}

@@ -17846,25 +17851,28 @@ GuiTextBoxRegexColorizer
 			void GuiTextBoxRegexColorizer::ColorizeLineWithCRLF(vint lineIndex, const wchar_t* text, vuint32_t* colors, vint length, vint& lexerState, vint& contextState)
 			{
 				memset(colors, 0, sizeof(*colors)*length);
-				if(lexer)
+				if (lexer)
 				{
 					GuiTextBoxRegexColorizerProcData data;
-					data.colorizer=this;
-					data.lineIndex=lineIndex;
-					data.text=text;
-					data.colors=colors;
-					data.contextState=contextState;
+					data.colorizer = this;
+					data.lineIndex = lineIndex;
+					data.text = text;
+					data.colors = colors;
+					data.contextState = contextState;

-					colorizer->Reset(lexerState);
-					colorizer->Colorize(text, length, &GuiTextBoxRegexColorizer::ColorizerProc, &data);
+					regex::RegexLexerColorizer::InternalState internalState;
+					internalState.currentState = lexerState;
+					colorizer->SetInternalState(internalState);
+					colorizerArgument[0] = &data;
+					colorizer->Colorize(text, length);

-					lexerState=colorizer->GetCurrentState();
-					contextState=data.contextState;
+					lexerState = colorizer->GetInternalState().currentState;
+					contextState = data.contextState;
 				}
 				else
 				{
-					lexerState=-1;
-					contextState=-1;
+					lexerState = -1;
+					contextState = -1;
 				}
 			}

@@ -16207,6 +16207,7 @@ GuiTextBoxRegexColorizer
 			protected:
 				Ptr<regex::RegexLexer>										lexer;
 				Ptr<regex::RegexLexerColorizer>								colorizer;
+				void*														colorizerArgument[1] { nullptr };
 				ColorArray													colors;

 				elements::text::ColorEntry									defaultColor;
@@ -6820,9 +6820,8 @@ Tokenizer
 ***********************************************************************/

 		/// <summary>A token.</summary>
-		class RegexToken
+		struct RegexToken
 		{
-		public:
 			/// <summary>Position in the input string.</summary>
 			vint										start;
 			/// <summary>Size of this token in characters.</summary>
@@ -6849,6 +6848,75 @@ Tokenizer
 			bool										operator==(const wchar_t* _token)const;
 		};

+		/// <summary>Token information for <see cref="RegexProc::extendProc"/>.</summary>
+		struct RegexProcessingToken
+		{
+			/// <summary>
+			/// The read only start position of the token.
+			/// This value will be -1 if <see cref="interTokenState"/> is not null.
+			/// </summary>
+			const vint									start;
+			/// <summary>
+			/// The length of the token, could be modified after the callback.
+			/// When the callback returns, the length is not allowed to be decreased.
+			/// This value will be -1 if <see cref="interTokenState"/> is not null.
+			/// </summary>
+			vint										length;
+			/// <summary>
+			/// The id of the token, could be modified after the callback.
+			/// </summary>
+			vint										token;
+			/// <summary>
+			/// The flag indicating if this token is completed, could be modified after the callback.
+			/// </summary>
+			bool										completeToken;
+			/// <summary>
+			/// The inter token state object, could be modified after the callback.
+			/// When the callback returns:
+			///   if the completeText parameter is true in <see cref="RegexProc::extendProc"/>, it should be nullptr.
+			///   if the token does not end at the end of the input, it should not be nullptr.
+			///   if a token is completed, it should be nullptr.
+			/// </summary>
+			void*										interTokenState;
+
+			RegexProcessingToken(vint _start, vint _length, vint _token, bool _completeToken, void* _interTokenState)
+				:start(_start)
+				, length(_length)
+				, token(_token)
+				, completeToken(_completeToken)
+				, interTokenState(_interTokenState)
+			{
+			}
+		};
+
+		using RegexInterTokenStateDeleter = void(*)(void* interTokenState);
+		using RegexTokenExtendProc = void(*)(void* argument, const wchar_t* reading, vint length, bool completeText, RegexProcessingToken& processingToken);
+		using RegexTokenColorizeProc =  void(*)(void* argument, vint start, vint length, vint token);
+
+		/// <summary>Callback procedures</summary>
+		struct RegexProc
+		{
+			/// <summary>
+			/// The deleter which deletes inter token state objects created by <see cref="extendProc"/>. This callback is not called automatically.
+			/// </summary>
+			RegexInterTokenStateDeleter					deleter = nullptr;
+			/// <summary>
+			/// The token extend callback. It is called after recognizing any token, and run a customized procedure to modify the token based on the given context.
+			/// If the length parameter is -1, it means the caller does not measure the incoming text buffer, which automatically indicates that the buffer is null-terminated.
+			/// If the length parameter is not -1, it means the number of available characters in the buffer.
+			/// The completeText parameter could be true or false. When it is false, it means that the buffer does not contain all the text.
+			/// </summary>
+			RegexTokenExtendProc						extendProc = nullptr;
+			/// <summary>
+			/// The colorizer callback. It is called when a token is recognized.
+			/// </summary>
+			RegexTokenColorizeProc						colorizeProc = nullptr;
+			/// <summary>
+			/// The argument object that is the first argument for <see cref="extendProc"/> and <see cref="colorizeProc"/>.
+			/// </summary>
+			void*										argument = nullptr;
+		};
+
 		/// <summary>Token collection representing the result from the lexical analyzer.</summary>
 		class RegexTokens : public Object, public collections::IEnumerable<RegexToken>
 		{
@@ -6858,10 +6926,12 @@ Tokenizer
 			const collections::Array<vint>&				stateTokens;
 			WString										code;
 			vint										codeIndex;
+			RegexProc									proc;
 			
-			RegexTokens(regex_internal::PureInterpretor* _pure, const collections::Array<vint>& _stateTokens, const WString& _code, vint _codeIndex);
+			RegexTokens(regex_internal::PureInterpretor* _pure, const collections::Array<vint>& _stateTokens, const WString& _code, vint _codeIndex, RegexProc _proc);
 		public:
 			RegexTokens(const RegexTokens& tokens);
+			~RegexTokens();

 			collections::IEnumerator<RegexToken>*		CreateEnumerator()const;

@@ -6881,7 +6951,7 @@ Tokenizer
 			
 			RegexLexerWalker(regex_internal::PureInterpretor* _pure, const collections::Array<vint>& _stateTokens);
 		public:
-			RegexLexerWalker(const RegexLexerWalker& walker);
+			RegexLexerWalker(const RegexLexerWalker& tokens);
 			~RegexLexerWalker();
 			
 			/// <summary>Get the start DFA state number, which represents the correct state before parsing any input.</summary>
@@ -6919,48 +6989,59 @@ Tokenizer
 		{
 			friend class RegexLexer;
 		public:
-			typedef void(*TokenProc)(void* argument, vint start, vint length, vint token);
+			struct InternalState
+			{
+				vint									currentState = -1;
+				vint									interTokenId = -1;
+				void*									interTokenState = nullptr;
+			};

 		protected:
 			RegexLexerWalker							walker;
-			vint										currentState;
+			RegexProc									proc;
+			InternalState								internalState;

-			RegexLexerColorizer(const RegexLexerWalker& _walker);
+			void										CallExtendProcAndColorizeProc(const wchar_t* input, vint length, RegexProcessingToken& token, bool colorize);
+			vint										WalkOneToken(const wchar_t* input, vint length, vint start, bool colorize);
+
+			RegexLexerColorizer(const RegexLexerWalker& _walker, RegexProc _proc);
 		public:
 			RegexLexerColorizer(const RegexLexerColorizer& colorizer);
 			~RegexLexerColorizer();

-			/// <summary>Reset the colorizer using the DFA state number.</summary>
-			/// <param name="state">The DFA state number.</param>
-			void										Reset(vint state);
+			/// <summary>Get the internal state.</summary>
+			/// <returns>The internal state.</returns>
+			InternalState								GetInternalState();
+			/// <summary>Restore the colorizer to a internal state.</summary>
+			/// <param name="value">The internal state.</param>
+			void										SetInternalState(InternalState state);
 			/// <summary>Step forward by one character.</summary>
 			/// <param name="input">The input character.</param>
 			void										Pass(wchar_t input);
 			/// <summary>Get the start DFA state number, which represents the correct state before colorizing any characters.</summary>
 			/// <returns>The DFA state number.</returns>
 			vint										GetStartState()const;
-			/// <summary>Get the current DFA state number.</summary>
-			/// <returns>The DFA state number.</returns>
-			vint										GetCurrentState()const;
-			/// <summary>Colorize a text.</summary>
+			/// <summary>Colorize a text.</summary>	GetCurrentState()const;
+			/// <returns>An inter token state at the end of this line. It could be the same object which is returned from the previous call.</returns>
 			/// <param name="input">The text to colorize.</param>
 			/// <param name="length">Size of the text in characters.</param>
-			/// <param name="tokenProc">Colorizer callback. This callback will be called if any token is found..</param>
-			/// <param name="tokenProcArgument">The argument to call the callback.</param>
-			void										Colorize(const wchar_t* input, vint length, TokenProc tokenProc, void* tokenProcArgument);
+			void*										Colorize(const wchar_t* input, vint length);
 		};

 		/// <summary>Lexical analyzer.</summary>
 		class RegexLexer : public Object, private NotCopyable
 		{
 		protected:
-			regex_internal::PureInterpretor*			pure;
+			regex_internal::PureInterpretor*			pure = nullptr;
 			collections::Array<vint>					ids;
 			collections::Array<vint>					stateTokens;
+			RegexProc									proc;
+
 		public:
 			/// <summary>Create a lexical analyzer by a set of regular expressions. [F:vl.regex.RegexToken.token] will be the index of the matched regular expression.</summary>
 			/// <param name="tokens">The regular expressions.</param>
-			RegexLexer(const collections::IEnumerable<WString>& tokens);
+			/// <param name="_proc">Callback procedures.</param>
+			RegexLexer(const collections::IEnumerable<WString>& tokens, RegexProc _proc);
 			~RegexLexer();

 			/// <summary>Tokenize a input text.</summary>