mirror of
https://github.com/vczh-libraries/Release.git
synced 2026-06-01 23:06:39 +08:00
...
This commit is contained in:
+12
-4
@@ -17814,7 +17814,12 @@ GuiTextBoxRegexColorizer
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
lexer=new regex::RegexLexer(tokenRegexes);
|
{
|
||||||
|
regex::RegexProc proc;
|
||||||
|
proc.colorizeProc = &GuiTextBoxRegexColorizer::ColorizerProc;
|
||||||
|
proc.argument = colorizerArgument;
|
||||||
|
lexer = new regex::RegexLexer(tokenRegexes, proc);
|
||||||
|
}
|
||||||
colors.Resize(1 + tokenRegexes.Count() + extraTokenColors.Count());
|
colors.Resize(1 + tokenRegexes.Count() + extraTokenColors.Count());
|
||||||
colors[0] = defaultColor;
|
colors[0] = defaultColor;
|
||||||
for (vint i = 0; i < tokenColors.Count(); i++)
|
for (vint i = 0; i < tokenColors.Count(); i++)
|
||||||
@@ -17855,10 +17860,13 @@ GuiTextBoxRegexColorizer
|
|||||||
data.colors = colors;
|
data.colors = colors;
|
||||||
data.contextState = contextState;
|
data.contextState = contextState;
|
||||||
|
|
||||||
colorizer->Reset(lexerState);
|
regex::RegexLexerColorizer::InternalState internalState;
|
||||||
colorizer->Colorize(text, length, &GuiTextBoxRegexColorizer::ColorizerProc, &data);
|
internalState.currentState = lexerState;
|
||||||
|
colorizer->SetInternalState(internalState);
|
||||||
|
colorizerArgument[0] = &data;
|
||||||
|
colorizer->Colorize(text, length);
|
||||||
|
|
||||||
lexerState=colorizer->GetCurrentState();
|
lexerState = colorizer->GetInternalState().currentState;
|
||||||
contextState = data.contextState;
|
contextState = data.contextState;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|||||||
@@ -16207,6 +16207,7 @@ GuiTextBoxRegexColorizer
|
|||||||
protected:
|
protected:
|
||||||
Ptr<regex::RegexLexer> lexer;
|
Ptr<regex::RegexLexer> lexer;
|
||||||
Ptr<regex::RegexLexerColorizer> colorizer;
|
Ptr<regex::RegexLexerColorizer> colorizer;
|
||||||
|
void* colorizerArgument[1] { nullptr };
|
||||||
ColorArray colors;
|
ColorArray colors;
|
||||||
|
|
||||||
elements::text::ColorEntry defaultColor;
|
elements::text::ColorEntry defaultColor;
|
||||||
|
|||||||
+173
-71
@@ -13318,7 +13318,7 @@ ParsingTable
|
|||||||
{
|
{
|
||||||
discardTokenInfos[i].regexTokenIndex = regexTokenIndex++;
|
discardTokenInfos[i].regexTokenIndex = regexTokenIndex++;
|
||||||
}
|
}
|
||||||
lexer=new RegexLexer(tokens);
|
lexer = new RegexLexer(tokens, {});
|
||||||
|
|
||||||
ruleMap.Clear();
|
ruleMap.Clear();
|
||||||
FOREACH_INDEXER(RuleInfo, rule, index, ruleInfos)
|
FOREACH_INDEXER(RuleInfo, rule, index, ruleInfos)
|
||||||
@@ -20107,17 +20107,18 @@ RegexTokens
|
|||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
RegexToken token;
|
RegexToken token;
|
||||||
vint index;
|
vint index = -1;
|
||||||
|
|
||||||
PureInterpretor* pure;
|
PureInterpretor* pure;
|
||||||
const Array<vint>& stateTokens;
|
const Array<vint>& stateTokens;
|
||||||
const wchar_t* start;
|
const wchar_t* start;
|
||||||
vint codeIndex;
|
vint codeIndex;
|
||||||
|
RegexProc proc;
|
||||||
|
|
||||||
const wchar_t* reading;
|
const wchar_t* reading;
|
||||||
vint rowStart;
|
vint rowStart = 0;
|
||||||
vint columnStart;
|
vint columnStart = 0;
|
||||||
bool cacheAvailable;
|
bool cacheAvailable = false;
|
||||||
RegexToken cacheToken;
|
RegexToken cacheToken;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@@ -20126,6 +20127,7 @@ RegexTokens
|
|||||||
, index(enumerator.index)
|
, index(enumerator.index)
|
||||||
, pure(enumerator.pure)
|
, pure(enumerator.pure)
|
||||||
, stateTokens(enumerator.stateTokens)
|
, stateTokens(enumerator.stateTokens)
|
||||||
|
, proc(enumerator.proc)
|
||||||
, reading(enumerator.reading)
|
, reading(enumerator.reading)
|
||||||
, start(enumerator.start)
|
, start(enumerator.start)
|
||||||
, rowStart(enumerator.rowStart)
|
, rowStart(enumerator.rowStart)
|
||||||
@@ -20136,16 +20138,14 @@ RegexTokens
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
RegexTokenEnumerator(PureInterpretor* _pure, const Array<vint>& _stateTokens, const wchar_t* _start, vint _codeIndex)
|
RegexTokenEnumerator(PureInterpretor* _pure, const Array<vint>& _stateTokens, const wchar_t* _start, vint _codeIndex, RegexProc _proc)
|
||||||
:index(-1)
|
:index(-1)
|
||||||
, pure(_pure)
|
, pure(_pure)
|
||||||
, stateTokens(_stateTokens)
|
, stateTokens(_stateTokens)
|
||||||
,reading(_start)
|
|
||||||
, start(_start)
|
, start(_start)
|
||||||
,rowStart(0)
|
|
||||||
,columnStart(0)
|
|
||||||
, codeIndex(_codeIndex)
|
, codeIndex(_codeIndex)
|
||||||
,cacheAvailable(false)
|
, proc(_proc)
|
||||||
|
, reading(_start)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -20180,6 +20180,7 @@ RegexTokens
|
|||||||
token.token = -2;
|
token.token = -2;
|
||||||
token.completeToken = true;
|
token.completeToken = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
token.rowStart = rowStart;
|
token.rowStart = rowStart;
|
||||||
token.columnStart = columnStart;
|
token.columnStart = columnStart;
|
||||||
token.rowEnd = rowStart;
|
token.rowEnd = rowStart;
|
||||||
@@ -20217,6 +20218,19 @@ RegexTokens
|
|||||||
{
|
{
|
||||||
id = stateTokens.Get(result.finalState);
|
id = stateTokens.Get(result.finalState);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (id != -1 && proc.extendProc)
|
||||||
|
{
|
||||||
|
RegexProcessingToken token(result.start, result.length, id, completeToken, nullptr);
|
||||||
|
proc.extendProc(proc.argument, reading, -1, true, token);
|
||||||
|
#if _DEBUG
|
||||||
|
CHECK_ERROR(token.interTokenState == nullptr, L"RegexTokenEnumerator::Next()#The extendProc is only allowed to create interTokenState in RegexLexerColorizer.");
|
||||||
|
#endif
|
||||||
|
result.length = token.length;
|
||||||
|
id = token.token;
|
||||||
|
completeToken = token.completeToken;
|
||||||
|
}
|
||||||
|
|
||||||
if (token.token == -2)
|
if (token.token == -2)
|
||||||
{
|
{
|
||||||
token.start = result.start;
|
token.start = result.start;
|
||||||
@@ -20239,6 +20253,7 @@ RegexTokens
|
|||||||
cacheToken.completeToken = completeToken;
|
cacheToken.completeToken = completeToken;
|
||||||
}
|
}
|
||||||
reading += result.length;
|
reading += result.length;
|
||||||
|
|
||||||
if (cacheAvailable)
|
if (cacheAvailable)
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
@@ -20283,11 +20298,12 @@ RegexTokens
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
RegexTokens::RegexTokens(PureInterpretor* _pure, const Array<vint>& _stateTokens, const WString& _code, vint _codeIndex)
|
RegexTokens::RegexTokens(PureInterpretor* _pure, const Array<vint>& _stateTokens, const WString& _code, vint _codeIndex, RegexProc _proc)
|
||||||
:pure(_pure)
|
:pure(_pure)
|
||||||
, stateTokens(_stateTokens)
|
, stateTokens(_stateTokens)
|
||||||
, code(_code)
|
, code(_code)
|
||||||
, codeIndex(_codeIndex)
|
, codeIndex(_codeIndex)
|
||||||
|
, proc(_proc)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -20296,12 +20312,13 @@ RegexTokens
|
|||||||
, stateTokens(tokens.stateTokens)
|
, stateTokens(tokens.stateTokens)
|
||||||
, code(tokens.code)
|
, code(tokens.code)
|
||||||
, codeIndex(tokens.codeIndex)
|
, codeIndex(tokens.codeIndex)
|
||||||
|
, proc(tokens.proc)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
IEnumerator<RegexToken>* RegexTokens::CreateEnumerator()const
|
IEnumerator<RegexToken>* RegexTokens::CreateEnumerator()const
|
||||||
{
|
{
|
||||||
return new RegexTokenEnumerator(pure, stateTokens, code.Buffer(), codeIndex);
|
return new RegexTokenEnumerator(pure, stateTokens, code.Buffer(), codeIndex, proc);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DefaultDiscard(vint token)
|
bool DefaultDiscard(vint token)
|
||||||
@@ -20315,7 +20332,7 @@ RegexTokens
|
|||||||
{
|
{
|
||||||
discard=&DefaultDiscard;
|
discard=&DefaultDiscard;
|
||||||
}
|
}
|
||||||
RegexTokenEnumerator(pure, stateTokens, code.Buffer(), codeIndex).ReadToEnd(tokens, discard);
|
RegexTokenEnumerator(pure, stateTokens, code.Buffer(), codeIndex, proc).ReadToEnd(tokens, discard);
|
||||||
}
|
}
|
||||||
|
|
||||||
/***********************************************************************
|
/***********************************************************************
|
||||||
@@ -20328,9 +20345,9 @@ RegexLexerWalker
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
RegexLexerWalker::RegexLexerWalker(const RegexLexerWalker& walker)
|
RegexLexerWalker::RegexLexerWalker(const RegexLexerWalker& tokens)
|
||||||
:pure(walker.pure)
|
: pure(tokens.pure)
|
||||||
,stateTokens(walker.stateTokens)
|
, stateTokens(tokens.stateTokens)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -20338,6 +20355,10 @@ RegexLexerWalker
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
RegexTokens::~RegexTokens()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
vint RegexLexerWalker::GetStartState()const
|
vint RegexLexerWalker::GetStartState()const
|
||||||
{
|
{
|
||||||
return pure->GetStartState();
|
return pure->GetStartState();
|
||||||
@@ -20418,15 +20439,17 @@ RegexLexerWalker
|
|||||||
RegexLexerColorizer
|
RegexLexerColorizer
|
||||||
***********************************************************************/
|
***********************************************************************/
|
||||||
|
|
||||||
RegexLexerColorizer::RegexLexerColorizer(const RegexLexerWalker& _walker)
|
RegexLexerColorizer::RegexLexerColorizer(const RegexLexerWalker& _walker, RegexProc _proc)
|
||||||
:walker(_walker)
|
:walker(_walker)
|
||||||
,currentState(_walker.GetStartState())
|
, proc(_proc)
|
||||||
{
|
{
|
||||||
|
internalState.currentState = walker.GetStartState();
|
||||||
}
|
}
|
||||||
|
|
||||||
RegexLexerColorizer::RegexLexerColorizer(const RegexLexerColorizer& colorizer)
|
RegexLexerColorizer::RegexLexerColorizer(const RegexLexerColorizer& colorizer)
|
||||||
:walker(colorizer.walker)
|
:walker(colorizer.walker)
|
||||||
,currentState(colorizer.currentState)
|
, proc(colorizer.proc)
|
||||||
|
, internalState(colorizer.internalState)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -20434,14 +20457,18 @@ RegexLexerColorizer
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
void RegexLexerColorizer::Reset(vint state)
|
RegexLexerColorizer::InternalState RegexLexerColorizer::GetInternalState()
|
||||||
{
|
{
|
||||||
currentState=state;
|
return internalState;
|
||||||
|
}
|
||||||
|
void RegexLexerColorizer::SetInternalState(InternalState state)
|
||||||
|
{
|
||||||
|
internalState = state;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RegexLexerColorizer::Pass(wchar_t input)
|
void RegexLexerColorizer::Pass(wchar_t input)
|
||||||
{
|
{
|
||||||
currentState=walker.Walk(input, currentState);
|
WalkOneToken(&input, 1, 0, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
vint RegexLexerColorizer::GetStartState()const
|
vint RegexLexerColorizer::GetStartState()const
|
||||||
@@ -20449,80 +20476,155 @@ RegexLexerColorizer
|
|||||||
return walker.GetStartState();
|
return walker.GetStartState();
|
||||||
}
|
}
|
||||||
|
|
||||||
vint RegexLexerColorizer::GetCurrentState()const
|
void RegexLexerColorizer::CallExtendProcAndColorizeProc(const wchar_t* input, vint length, RegexProcessingToken& token, bool colorize)
|
||||||
{
|
{
|
||||||
return currentState;
|
vint oldTokenLength = token.length;
|
||||||
|
proc.extendProc(proc.argument, input + token.start, length - token.start, false, token);
|
||||||
|
#if _DEBUG
|
||||||
|
{
|
||||||
|
bool pausedAtTheEnd = token.start + token.length == length && !token.completeToken;
|
||||||
|
CHECK_ERROR(
|
||||||
|
token.completeToken || pausedAtTheEnd,
|
||||||
|
L"RegexLexerColorizer::WalkOneToken(const wchar_t*, vint, vint, bool)#The extendProc is not allowed pause before the end of the input."
|
||||||
|
);
|
||||||
|
CHECK_ERROR(
|
||||||
|
token.completeToken || token.token != -1,
|
||||||
|
L"RegexLexerColorizer::WalkOneToken(const wchar_t*, vint, vint, bool)#The extendProc is not allowed to pause without a valid token id."
|
||||||
|
);
|
||||||
|
CHECK_ERROR(
|
||||||
|
oldTokenLength <= token.length,
|
||||||
|
L"RegexLexerColorizer::WalkOneToken(const wchar_t*, vint, vint, bool)#The extendProc is not allowed to decrease the token length."
|
||||||
|
);
|
||||||
|
CHECK_ERROR(
|
||||||
|
(token.interTokenState == nullptr) == !pausedAtTheEnd,
|
||||||
|
L"RegexLexerColorizer::Colorize(const wchar_t*, vint, void*)#The extendProc should return an inter token state object if and only if a valid token does not end at the end of the input."
|
||||||
|
);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
if ((internalState.interTokenState = token.interTokenState))
|
||||||
|
{
|
||||||
|
internalState.interTokenId = token.token;
|
||||||
|
internalState.currentState = walker.GetStartState();
|
||||||
|
}
|
||||||
|
if (colorize)
|
||||||
|
{
|
||||||
|
proc.colorizeProc(proc.argument, token.start, token.length, token.token);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RegexLexerColorizer::Colorize(const wchar_t* input, vint length, TokenProc tokenProc, void* tokenProcArgument)
|
vint RegexLexerColorizer::WalkOneToken(const wchar_t* input, vint length, vint start, bool colorize)
|
||||||
{
|
{
|
||||||
vint start=0;
|
if (internalState.interTokenState)
|
||||||
vint stop=0;
|
{
|
||||||
vint state=-1;
|
RegexProcessingToken token(-1, -1, internalState.interTokenId, false, internalState.interTokenState);
|
||||||
vint token=-1;
|
proc.extendProc(proc.argument, input, length, false, token);
|
||||||
|
#if _DEBUG
|
||||||
|
{
|
||||||
|
bool pausedAtTheEnd = token.length == length && !token.completeToken;
|
||||||
|
CHECK_ERROR(
|
||||||
|
token.completeToken || pausedAtTheEnd,
|
||||||
|
L"RegexLexerColorizer::WalkOneToken(const wchar_t*, vint, vint, bool)#The extendProc is not allowed to pause before the end of the input."
|
||||||
|
);
|
||||||
|
CHECK_ERROR(
|
||||||
|
token.completeToken || token.token == internalState.interTokenId,
|
||||||
|
L"RegexLexerColorizer::WalkOneToken(const wchar_t*, vint, vint, bool)#The extendProc is not allowed to continue pausing with a different token id."
|
||||||
|
);
|
||||||
|
CHECK_ERROR(
|
||||||
|
(token.interTokenState == nullptr) == !pausedAtTheEnd,
|
||||||
|
L"RegexLexerColorizer::Colorize(const wchar_t*, vint, void*)#The extendProc should return an inter token state object if and only if a valid token does not end at the end of the input."
|
||||||
|
);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
if (colorize)
|
||||||
|
{
|
||||||
|
proc.colorizeProc(proc.argument, 0, token.length, token.token);
|
||||||
|
}
|
||||||
|
if (!(internalState.interTokenState = token.interTokenState))
|
||||||
|
{
|
||||||
|
internalState.interTokenId = -1;
|
||||||
|
}
|
||||||
|
return token.length;
|
||||||
|
}
|
||||||
|
|
||||||
vint index=0;
|
vint lastFinalStateLength = 0;
|
||||||
|
vint lastFinalStateToken = -1;
|
||||||
|
|
||||||
|
for (vint i = start; i < length; i++)
|
||||||
|
{
|
||||||
vint currentToken = -1;
|
vint currentToken = -1;
|
||||||
bool finalState = false;
|
bool finalState = false;
|
||||||
bool previousTokenStop = false;
|
bool previousTokenStop = false;
|
||||||
|
walker.Walk(input[i], internalState.currentState, currentToken, finalState, previousTokenStop);
|
||||||
while(index<length)
|
|
||||||
{
|
|
||||||
currentToken=-1;
|
|
||||||
finalState=false;
|
|
||||||
previousTokenStop=false;
|
|
||||||
walker.Walk(input[index], currentState, currentToken, finalState, previousTokenStop);
|
|
||||||
|
|
||||||
if (previousTokenStop)
|
if (previousTokenStop)
|
||||||
{
|
{
|
||||||
vint tokenLength=stop-start;
|
internalState.currentState = walker.GetStartState();
|
||||||
if(tokenLength>0)
|
if (proc.extendProc && lastFinalStateToken != -1)
|
||||||
{
|
{
|
||||||
tokenProc(tokenProcArgument, start, tokenLength, token);
|
RegexProcessingToken token(start, lastFinalStateLength, lastFinalStateToken, true, nullptr);
|
||||||
currentState=state;
|
CallExtendProcAndColorizeProc(input, length, token, colorize);
|
||||||
start=stop;
|
return start + token.length;
|
||||||
index=stop-1;
|
|
||||||
state=-1;
|
|
||||||
token=-1;
|
|
||||||
finalState=false;
|
|
||||||
}
|
}
|
||||||
else if(stop<index)
|
else if (i == start)
|
||||||
{
|
{
|
||||||
stop=index+1;
|
if (colorize)
|
||||||
tokenProc(tokenProcArgument, start, stop-start, -1);
|
|
||||||
start=index+1;
|
|
||||||
state=-1;
|
|
||||||
token=-1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if(finalState)
|
|
||||||
{
|
{
|
||||||
stop=index+1;
|
proc.colorizeProc(proc.argument, start, 1, -1);
|
||||||
state=currentState;
|
|
||||||
token=currentToken;
|
|
||||||
}
|
}
|
||||||
|
return i + 1;
|
||||||
index++;
|
|
||||||
}
|
|
||||||
if(start<length)
|
|
||||||
{
|
|
||||||
if(finalState)
|
|
||||||
{
|
|
||||||
tokenProc(tokenProcArgument, start, length-start, token);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
tokenProc(tokenProcArgument, start, length-start, walker.GetRelatedToken(currentState));
|
if (colorize)
|
||||||
|
{
|
||||||
|
proc.colorizeProc(proc.argument, start, lastFinalStateLength, lastFinalStateToken);
|
||||||
|
}
|
||||||
|
return start + lastFinalStateLength;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (finalState)
|
||||||
|
{
|
||||||
|
lastFinalStateLength = i + 1 - start;
|
||||||
|
lastFinalStateToken = currentToken;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lastFinalStateToken != -1)
|
||||||
|
{
|
||||||
|
if (proc.extendProc)
|
||||||
|
{
|
||||||
|
RegexProcessingToken token(start, lastFinalStateLength, lastFinalStateToken, true, nullptr);
|
||||||
|
CallExtendProcAndColorizeProc(input, length, token, colorize);
|
||||||
|
}
|
||||||
|
else if (colorize)
|
||||||
|
{
|
||||||
|
proc.colorizeProc(proc.argument, start, lastFinalStateLength, lastFinalStateToken);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (colorize)
|
||||||
|
{
|
||||||
|
proc.colorizeProc(proc.argument, start, length - start, walker.GetRelatedToken(internalState.currentState));
|
||||||
|
}
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
|
||||||
|
void* RegexLexerColorizer::Colorize(const wchar_t* input, vint length)
|
||||||
|
{
|
||||||
|
vint index = 0;
|
||||||
|
while (index != length)
|
||||||
|
{
|
||||||
|
index = WalkOneToken(input, length, index, true);
|
||||||
|
}
|
||||||
|
return internalState.interTokenState;
|
||||||
}
|
}
|
||||||
|
|
||||||
/***********************************************************************
|
/***********************************************************************
|
||||||
RegexLexer
|
RegexLexer
|
||||||
***********************************************************************/
|
***********************************************************************/
|
||||||
|
|
||||||
RegexLexer::RegexLexer(const collections::IEnumerable<WString>& tokens)
|
RegexLexer::RegexLexer(const collections::IEnumerable<WString>& tokens, RegexProc _proc)
|
||||||
:pure(0)
|
:proc(_proc)
|
||||||
{
|
{
|
||||||
// Build DFA for all tokens
|
// Build DFA for all tokens
|
||||||
List<Expression::Ref> expressions;
|
List<Expression::Ref> expressions;
|
||||||
@@ -20622,7 +20724,7 @@ RegexLexer
|
|||||||
RegexTokens RegexLexer::Parse(const WString& code, vint codeIndex)const
|
RegexTokens RegexLexer::Parse(const WString& code, vint codeIndex)const
|
||||||
{
|
{
|
||||||
pure->PrepareForRelatedFinalStateTable();
|
pure->PrepareForRelatedFinalStateTable();
|
||||||
return RegexTokens(pure, stateTokens, code, codeIndex);
|
return RegexTokens(pure, stateTokens, code, codeIndex, proc);
|
||||||
}
|
}
|
||||||
|
|
||||||
RegexLexerWalker RegexLexer::Walk()const
|
RegexLexerWalker RegexLexer::Walk()const
|
||||||
@@ -20633,7 +20735,7 @@ RegexLexer
|
|||||||
|
|
||||||
RegexLexerColorizer RegexLexer::Colorize()const
|
RegexLexerColorizer RegexLexer::Colorize()const
|
||||||
{
|
{
|
||||||
return RegexLexerColorizer(Walk());
|
return RegexLexerColorizer(Walk(), proc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
+100
-19
@@ -6820,9 +6820,8 @@ Tokenizer
|
|||||||
***********************************************************************/
|
***********************************************************************/
|
||||||
|
|
||||||
/// <summary>A token.</summary>
|
/// <summary>A token.</summary>
|
||||||
class RegexToken
|
struct RegexToken
|
||||||
{
|
{
|
||||||
public:
|
|
||||||
/// <summary>Position in the input string.</summary>
|
/// <summary>Position in the input string.</summary>
|
||||||
vint start;
|
vint start;
|
||||||
/// <summary>Size of this token in characters.</summary>
|
/// <summary>Size of this token in characters.</summary>
|
||||||
@@ -6849,6 +6848,75 @@ Tokenizer
|
|||||||
bool operator==(const wchar_t* _token)const;
|
bool operator==(const wchar_t* _token)const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// <summary>Token information for <see cref="RegexProc::extendProc"/>.</summary>
|
||||||
|
struct RegexProcessingToken
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// The read only start position of the token.
|
||||||
|
/// This value will be -1 if <see cref="interTokenState"/> is not null.
|
||||||
|
/// </summary>
|
||||||
|
const vint start;
|
||||||
|
/// <summary>
|
||||||
|
/// The length of the token, could be modified after the callback.
|
||||||
|
/// When the callback returns, the length is not allowed to be decreased.
|
||||||
|
/// This value will be -1 if <see cref="interTokenState"/> is not null.
|
||||||
|
/// </summary>
|
||||||
|
vint length;
|
||||||
|
/// <summary>
|
||||||
|
/// The id of the token, could be modified after the callback.
|
||||||
|
/// </summary>
|
||||||
|
vint token;
|
||||||
|
/// <summary>
|
||||||
|
/// The flag indicating if this token is completed, could be modified after the callback.
|
||||||
|
/// </summary>
|
||||||
|
bool completeToken;
|
||||||
|
/// <summary>
|
||||||
|
/// The inter token state object, could be modified after the callback.
|
||||||
|
/// When the callback returns:
|
||||||
|
/// if the completeText parameter is true in <see cref="RegexProc::extendProc"/>, it should be nullptr.
|
||||||
|
/// if the token does not end at the end of the input, it should not be nullptr.
|
||||||
|
/// if a token is completed, it should be nullptr.
|
||||||
|
/// </summary>
|
||||||
|
void* interTokenState;
|
||||||
|
|
||||||
|
RegexProcessingToken(vint _start, vint _length, vint _token, bool _completeToken, void* _interTokenState)
|
||||||
|
:start(_start)
|
||||||
|
, length(_length)
|
||||||
|
, token(_token)
|
||||||
|
, completeToken(_completeToken)
|
||||||
|
, interTokenState(_interTokenState)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
using RegexInterTokenStateDeleter = void(*)(void* interTokenState);
|
||||||
|
using RegexTokenExtendProc = void(*)(void* argument, const wchar_t* reading, vint length, bool completeText, RegexProcessingToken& processingToken);
|
||||||
|
using RegexTokenColorizeProc = void(*)(void* argument, vint start, vint length, vint token);
|
||||||
|
|
||||||
|
/// <summary>Callback procedures</summary>
|
||||||
|
struct RegexProc
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// The deleter which deletes inter token state objects created by <see cref="extendProc"/>. This callback is not called automatically.
|
||||||
|
/// </summary>
|
||||||
|
RegexInterTokenStateDeleter deleter = nullptr;
|
||||||
|
/// <summary>
|
||||||
|
/// The token extend callback. It is called after recognizing any token, and run a customized procedure to modify the token based on the given context.
|
||||||
|
/// If the length parameter is -1, it means the caller does not measure the incoming text buffer, which automatically indicates that the buffer is null-terminated.
|
||||||
|
/// If the length parameter is not -1, it means the number of available characters in the buffer.
|
||||||
|
/// The completeText parameter could be true or false. When it is false, it means that the buffer does not contain all the text.
|
||||||
|
/// </summary>
|
||||||
|
RegexTokenExtendProc extendProc = nullptr;
|
||||||
|
/// <summary>
|
||||||
|
/// The colorizer callback. It is called when a token is recognized.
|
||||||
|
/// </summary>
|
||||||
|
RegexTokenColorizeProc colorizeProc = nullptr;
|
||||||
|
/// <summary>
|
||||||
|
/// The argument object that is the first argument for <see cref="extendProc"/> and <see cref="colorizeProc"/>.
|
||||||
|
/// </summary>
|
||||||
|
void* argument = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
/// <summary>Token collection representing the result from the lexical analyzer.</summary>
|
/// <summary>Token collection representing the result from the lexical analyzer.</summary>
|
||||||
class RegexTokens : public Object, public collections::IEnumerable<RegexToken>
|
class RegexTokens : public Object, public collections::IEnumerable<RegexToken>
|
||||||
{
|
{
|
||||||
@@ -6858,10 +6926,12 @@ Tokenizer
|
|||||||
const collections::Array<vint>& stateTokens;
|
const collections::Array<vint>& stateTokens;
|
||||||
WString code;
|
WString code;
|
||||||
vint codeIndex;
|
vint codeIndex;
|
||||||
|
RegexProc proc;
|
||||||
|
|
||||||
RegexTokens(regex_internal::PureInterpretor* _pure, const collections::Array<vint>& _stateTokens, const WString& _code, vint _codeIndex);
|
RegexTokens(regex_internal::PureInterpretor* _pure, const collections::Array<vint>& _stateTokens, const WString& _code, vint _codeIndex, RegexProc _proc);
|
||||||
public:
|
public:
|
||||||
RegexTokens(const RegexTokens& tokens);
|
RegexTokens(const RegexTokens& tokens);
|
||||||
|
~RegexTokens();
|
||||||
|
|
||||||
collections::IEnumerator<RegexToken>* CreateEnumerator()const;
|
collections::IEnumerator<RegexToken>* CreateEnumerator()const;
|
||||||
|
|
||||||
@@ -6881,7 +6951,7 @@ Tokenizer
|
|||||||
|
|
||||||
RegexLexerWalker(regex_internal::PureInterpretor* _pure, const collections::Array<vint>& _stateTokens);
|
RegexLexerWalker(regex_internal::PureInterpretor* _pure, const collections::Array<vint>& _stateTokens);
|
||||||
public:
|
public:
|
||||||
RegexLexerWalker(const RegexLexerWalker& walker);
|
RegexLexerWalker(const RegexLexerWalker& tokens);
|
||||||
~RegexLexerWalker();
|
~RegexLexerWalker();
|
||||||
|
|
||||||
/// <summary>Get the start DFA state number, which represents the correct state before parsing any input.</summary>
|
/// <summary>Get the start DFA state number, which represents the correct state before parsing any input.</summary>
|
||||||
@@ -6919,48 +6989,59 @@ Tokenizer
|
|||||||
{
|
{
|
||||||
friend class RegexLexer;
|
friend class RegexLexer;
|
||||||
public:
|
public:
|
||||||
typedef void(*TokenProc)(void* argument, vint start, vint length, vint token);
|
struct InternalState
|
||||||
|
{
|
||||||
|
vint currentState = -1;
|
||||||
|
vint interTokenId = -1;
|
||||||
|
void* interTokenState = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
RegexLexerWalker walker;
|
RegexLexerWalker walker;
|
||||||
vint currentState;
|
RegexProc proc;
|
||||||
|
InternalState internalState;
|
||||||
|
|
||||||
RegexLexerColorizer(const RegexLexerWalker& _walker);
|
void CallExtendProcAndColorizeProc(const wchar_t* input, vint length, RegexProcessingToken& token, bool colorize);
|
||||||
|
vint WalkOneToken(const wchar_t* input, vint length, vint start, bool colorize);
|
||||||
|
|
||||||
|
RegexLexerColorizer(const RegexLexerWalker& _walker, RegexProc _proc);
|
||||||
public:
|
public:
|
||||||
RegexLexerColorizer(const RegexLexerColorizer& colorizer);
|
RegexLexerColorizer(const RegexLexerColorizer& colorizer);
|
||||||
~RegexLexerColorizer();
|
~RegexLexerColorizer();
|
||||||
|
|
||||||
/// <summary>Reset the colorizer using the DFA state number.</summary>
|
/// <summary>Get the internal state.</summary>
|
||||||
/// <param name="state">The DFA state number.</param>
|
/// <returns>The internal state.</returns>
|
||||||
void Reset(vint state);
|
InternalState GetInternalState();
|
||||||
|
/// <summary>Restore the colorizer to a internal state.</summary>
|
||||||
|
/// <param name="value">The internal state.</param>
|
||||||
|
void SetInternalState(InternalState state);
|
||||||
/// <summary>Step forward by one character.</summary>
|
/// <summary>Step forward by one character.</summary>
|
||||||
/// <param name="input">The input character.</param>
|
/// <param name="input">The input character.</param>
|
||||||
void Pass(wchar_t input);
|
void Pass(wchar_t input);
|
||||||
/// <summary>Get the start DFA state number, which represents the correct state before colorizing any characters.</summary>
|
/// <summary>Get the start DFA state number, which represents the correct state before colorizing any characters.</summary>
|
||||||
/// <returns>The DFA state number.</returns>
|
/// <returns>The DFA state number.</returns>
|
||||||
vint GetStartState()const;
|
vint GetStartState()const;
|
||||||
/// <summary>Get the current DFA state number.</summary>
|
/// <summary>Colorize a text.</summary> GetCurrentState()const;
|
||||||
/// <returns>The DFA state number.</returns>
|
/// <returns>An inter token state at the end of this line. It could be the same object which is returned from the previous call.</returns>
|
||||||
vint GetCurrentState()const;
|
|
||||||
/// <summary>Colorize a text.</summary>
|
|
||||||
/// <param name="input">The text to colorize.</param>
|
/// <param name="input">The text to colorize.</param>
|
||||||
/// <param name="length">Size of the text in characters.</param>
|
/// <param name="length">Size of the text in characters.</param>
|
||||||
/// <param name="tokenProc">Colorizer callback. This callback will be called if any token is found..</param>
|
void* Colorize(const wchar_t* input, vint length);
|
||||||
/// <param name="tokenProcArgument">The argument to call the callback.</param>
|
|
||||||
void Colorize(const wchar_t* input, vint length, TokenProc tokenProc, void* tokenProcArgument);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/// <summary>Lexical analyzer.</summary>
|
/// <summary>Lexical analyzer.</summary>
|
||||||
class RegexLexer : public Object, private NotCopyable
|
class RegexLexer : public Object, private NotCopyable
|
||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
regex_internal::PureInterpretor* pure;
|
regex_internal::PureInterpretor* pure = nullptr;
|
||||||
collections::Array<vint> ids;
|
collections::Array<vint> ids;
|
||||||
collections::Array<vint> stateTokens;
|
collections::Array<vint> stateTokens;
|
||||||
|
RegexProc proc;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/// <summary>Create a lexical analyzer by a set of regular expressions. [F:vl.regex.RegexToken.token] will be the index of the matched regular expression.</summary>
|
/// <summary>Create a lexical analyzer by a set of regular expressions. [F:vl.regex.RegexToken.token] will be the index of the matched regular expression.</summary>
|
||||||
/// <param name="tokens">The regular expressions.</param>
|
/// <param name="tokens">The regular expressions.</param>
|
||||||
RegexLexer(const collections::IEnumerable<WString>& tokens);
|
/// <param name="_proc">Callback procedures.</param>
|
||||||
|
RegexLexer(const collections::IEnumerable<WString>& tokens, RegexProc _proc);
|
||||||
~RegexLexer();
|
~RegexLexer();
|
||||||
|
|
||||||
/// <summary>Tokenize a input text.</summary>
|
/// <summary>Tokenize a input text.</summary>
|
||||||
|
|||||||
Reference in New Issue
Block a user