1394 lines
50 KiB
C++
1394 lines
50 KiB
C++
|
// Scintilla source code edit control
|
||
|
/** @file LexPerl.cxx
|
||
|
** Lexer for subset of Perl.
|
||
|
**/
|
||
|
// Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org>
|
||
|
// Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
|
||
|
// The License.txt file describes the conditions under which this software may be distributed.
|
||
|
|
||
|
#include <stdlib.h>
|
||
|
#include <string.h>
|
||
|
#include <ctype.h>
|
||
|
#include <stdio.h>
|
||
|
#include <stdarg.h>
|
||
|
|
||
|
#include "Platform.h"
|
||
|
|
||
|
#include "PropSet.h"
|
||
|
#include "Accessor.h"
|
||
|
#include "KeyWords.h"
|
||
|
#include "Scintilla.h"
|
||
|
#include "SciLexer.h"
|
||
|
|
||
|
#ifdef SCI_NAMESPACE
|
||
|
using namespace Scintilla;
|
||
|
#endif
|
||
|
|
||
|
#define PERLNUM_BINARY 1 // order is significant: 1-4 cannot have a dot
|
||
|
#define PERLNUM_HEX 2
|
||
|
#define PERLNUM_OCTAL 3
|
||
|
#define PERLNUM_FLOAT 4 // actually exponent part
|
||
|
#define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings
|
||
|
#define PERLNUM_VECTOR 6
|
||
|
#define PERLNUM_V_VECTOR 7
|
||
|
#define PERLNUM_BAD 8
|
||
|
|
||
|
#define BACK_NONE 0 // lookback state for bareword disambiguation:
|
||
|
#define BACK_OPERATOR 1 // whitespace/comments are insignificant
|
||
|
#define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation
|
||
|
|
||
|
#define HERE_DELIM_MAX 256
|
||
|
|
||
|
static inline bool isEOLChar(char ch) {
|
||
|
return (ch == '\r') || (ch == '\n');
|
||
|
}
|
||
|
|
||
|
static bool isSingleCharOp(char ch) {
|
||
|
char strCharSet[2];
|
||
|
strCharSet[0] = ch;
|
||
|
strCharSet[1] = '\0';
|
||
|
return (NULL != strstr("rwxoRWXOezsfdlpSbctugkTBMAC", strCharSet));
|
||
|
}
|
||
|
|
||
|
static inline bool isPerlOperator(char ch) {
|
||
|
if (ch == '^' || ch == '&' || ch == '\\' ||
|
||
|
ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
|
||
|
ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
|
||
|
ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
|
||
|
ch == '>' || ch == ',' ||
|
||
|
ch == '?' || ch == '!' || ch == '.' || ch == '~')
|
||
|
return true;
|
||
|
// these chars are already tested before this call
|
||
|
// ch == '%' || ch == '*' || ch == '<' || ch == '/' ||
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
|
||
|
char s[100];
|
||
|
unsigned int i, len = end - start;
|
||
|
if (len > 30) { len = 30; }
|
||
|
for (i = 0; i < len; i++, start++) s[i] = styler[start];
|
||
|
s[i] = '\0';
|
||
|
return keywords.InList(s);
|
||
|
}
|
||
|
|
||
|
// Note: as lexer uses chars, UTF-8 bytes are considered as <0 values
|
||
|
// Note: iswordchar() was used in only one place in LexPerl, it is
|
||
|
// unnecessary as '.' is processed as the concatenation operator, so
|
||
|
// only isWordStart() is used in LexPerl
|
||
|
|
||
|
static inline bool isWordStart(char ch) {
|
||
|
return !isascii(ch) || isalnum(ch) || ch == '_';
|
||
|
}
|
||
|
|
||
|
static inline bool isEndVar(char ch) {
|
||
|
return isascii(ch) && !isalnum(ch) && ch != '#' && ch != '$' &&
|
||
|
ch != '_' && ch != '\'';
|
||
|
}
|
||
|
|
||
|
static inline bool isNonQuote(char ch) {
|
||
|
return !isascii(ch) || isalnum(ch) || ch == '_';
|
||
|
}
|
||
|
|
||
|
static inline char actualNumStyle(int numberStyle) {
|
||
|
if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) {
|
||
|
return SCE_PL_STRING;
|
||
|
} else if (numberStyle == PERLNUM_BAD) {
|
||
|
return SCE_PL_ERROR;
|
||
|
}
|
||
|
return SCE_PL_NUMBER;
|
||
|
}
|
||
|
|
||
|
static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
|
||
|
if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
|
||
|
return false;
|
||
|
}
|
||
|
while (*val) {
|
||
|
if (*val != styler[pos++]) {
|
||
|
return false;
|
||
|
}
|
||
|
val++;
|
||
|
}
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
static char opposite(char ch) {
|
||
|
if (ch == '(')
|
||
|
return ')';
|
||
|
if (ch == '[')
|
||
|
return ']';
|
||
|
if (ch == '{')
|
||
|
return '}';
|
||
|
if (ch == '<')
|
||
|
return '>';
|
||
|
return ch;
|
||
|
}
|
||
|
|
||
|
static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
|
||
|
WordList *keywordlists[], Accessor &styler) {
|
||
|
|
||
|
// Lexer for perl often has to backtrack to start of current style to determine
|
||
|
// which characters are being used as quotes, how deeply nested is the
|
||
|
// start position and what the termination string is for here documents
|
||
|
|
||
|
WordList &keywords = *keywordlists[0];
|
||
|
|
||
|
// keywords that forces /PATTERN/ at all times
|
||
|
WordList reWords;
|
||
|
reWords.Set("elsif if split while");
|
||
|
|
||
|
class HereDocCls {
|
||
|
public:
|
||
|
int State; // 0: '<<' encountered
|
||
|
// 1: collect the delimiter
|
||
|
// 2: here doc text (lines after the delimiter)
|
||
|
char Quote; // the char after '<<'
|
||
|
bool Quoted; // true if Quote in ('\'','"','`')
|
||
|
int DelimiterLength; // strlen(Delimiter)
|
||
|
char *Delimiter; // the Delimiter, 256: sizeof PL_tokenbuf
|
||
|
HereDocCls() {
|
||
|
State = 0;
|
||
|
Quote = 0;
|
||
|
Quoted = false;
|
||
|
DelimiterLength = 0;
|
||
|
Delimiter = new char[HERE_DELIM_MAX];
|
||
|
Delimiter[0] = '\0';
|
||
|
}
|
||
|
~HereDocCls() {
|
||
|
delete []Delimiter;
|
||
|
}
|
||
|
};
|
||
|
HereDocCls HereDoc; // TODO: FIFO for stacked here-docs
|
||
|
|
||
|
class QuoteCls {
|
||
|
public:
|
||
|
int Rep;
|
||
|
int Count;
|
||
|
char Up;
|
||
|
char Down;
|
||
|
QuoteCls() {
|
||
|
this->New(1);
|
||
|
}
|
||
|
void New(int r) {
|
||
|
Rep = r;
|
||
|
Count = 0;
|
||
|
Up = '\0';
|
||
|
Down = '\0';
|
||
|
}
|
||
|
void Open(char u) {
|
||
|
Count++;
|
||
|
Up = u;
|
||
|
Down = opposite(Up);
|
||
|
}
|
||
|
};
|
||
|
QuoteCls Quote;
|
||
|
|
||
|
int state = initStyle;
|
||
|
char numState = PERLNUM_DECIMAL;
|
||
|
int dotCount = 0;
|
||
|
unsigned int lengthDoc = startPos + length;
|
||
|
//int sookedpos = 0; // these have no apparent use, see POD state
|
||
|
//char sooked[100];
|
||
|
//sooked[sookedpos] = '\0';
|
||
|
|
||
|
styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
|
||
|
// If in a long distance lexical state, seek to the beginning to find quote characters
|
||
|
// Perl strings can be multi-line with embedded newlines, so backtrack.
|
||
|
// Perl numbers have additional state during lexing, so backtrack too.
|
||
|
if (state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX) {
|
||
|
while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_PL_HERE_DELIM)) {
|
||
|
startPos--;
|
||
|
}
|
||
|
startPos = styler.LineStart(styler.GetLine(startPos));
|
||
|
state = styler.StyleAt(startPos - 1);
|
||
|
}
|
||
|
// Backtrack for format body.
|
||
|
if (state == SCE_PL_FORMAT) {
|
||
|
while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_PL_FORMAT_IDENT)) {
|
||
|
startPos--;
|
||
|
}
|
||
|
startPos = styler.LineStart(styler.GetLine(startPos));
|
||
|
state = styler.StyleAt(startPos - 1);
|
||
|
}
|
||
|
if ( state == SCE_PL_STRING_Q
|
||
|
|| state == SCE_PL_STRING_QQ
|
||
|
|| state == SCE_PL_STRING_QX
|
||
|
|| state == SCE_PL_STRING_QR
|
||
|
|| state == SCE_PL_STRING_QW
|
||
|
|| state == SCE_PL_REGEX
|
||
|
|| state == SCE_PL_REGSUBST
|
||
|
|| state == SCE_PL_STRING
|
||
|
|| state == SCE_PL_BACKTICKS
|
||
|
|| state == SCE_PL_CHARACTER
|
||
|
|| state == SCE_PL_NUMBER
|
||
|
|| state == SCE_PL_IDENTIFIER
|
||
|
|| state == SCE_PL_ERROR
|
||
|
|| state == SCE_PL_SUB_PROTOTYPE
|
||
|
) {
|
||
|
while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) {
|
||
|
startPos--;
|
||
|
}
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
}
|
||
|
|
||
|
// lookback at start of lexing to set proper state for backflag
|
||
|
// after this, they are updated when elements are lexed
|
||
|
int backflag = BACK_NONE;
|
||
|
unsigned int backPos = startPos;
|
||
|
if (backPos > 0) {
|
||
|
backPos--;
|
||
|
int sty = SCE_PL_DEFAULT;
|
||
|
while ((backPos > 0) && (sty = styler.StyleAt(backPos),
|
||
|
sty == SCE_PL_DEFAULT || sty == SCE_PL_COMMENTLINE))
|
||
|
backPos--;
|
||
|
if (sty == SCE_PL_OPERATOR)
|
||
|
backflag = BACK_OPERATOR;
|
||
|
else if (sty == SCE_PL_WORD)
|
||
|
backflag = BACK_KEYWORD;
|
||
|
}
|
||
|
|
||
|
styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
|
||
|
char chPrev = styler.SafeGetCharAt(startPos - 1);
|
||
|
if (startPos == 0)
|
||
|
chPrev = '\n';
|
||
|
char chNext = styler[startPos];
|
||
|
styler.StartSegment(startPos);
|
||
|
|
||
|
for (unsigned int i = startPos; i < lengthDoc; i++) {
|
||
|
char ch = chNext;
|
||
|
// if the current character is not consumed due to the completion of an
|
||
|
// earlier style, lexing can be restarted via a simple goto
|
||
|
restartLexer:
|
||
|
chNext = styler.SafeGetCharAt(i + 1);
|
||
|
char chNext2 = styler.SafeGetCharAt(i + 2);
|
||
|
|
||
|
if (styler.IsLeadByte(ch)) {
|
||
|
chNext = styler.SafeGetCharAt(i + 2);
|
||
|
chPrev = ' ';
|
||
|
i += 1;
|
||
|
continue;
|
||
|
}
|
||
|
if ((chPrev == '\r' && ch == '\n')) { // skip on DOS/Windows
|
||
|
styler.ColourTo(i, state);
|
||
|
chPrev = ch;
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
if (HereDoc.State == 1 && isEOLChar(ch)) {
|
||
|
// Begin of here-doc (the line after the here-doc delimiter):
|
||
|
// Lexically, the here-doc starts from the next line after the >>, but the
|
||
|
// first line of here-doc seem to follow the style of the last EOL sequence
|
||
|
HereDoc.State = 2;
|
||
|
if (HereDoc.Quoted) {
|
||
|
if (state == SCE_PL_HERE_DELIM) {
|
||
|
// Missing quote at end of string! We are stricter than perl.
|
||
|
// Colour here-doc anyway while marking this bit as an error.
|
||
|
state = SCE_PL_ERROR;
|
||
|
}
|
||
|
styler.ColourTo(i - 1, state);
|
||
|
switch (HereDoc.Quote) {
|
||
|
case '\'':
|
||
|
state = SCE_PL_HERE_Q ;
|
||
|
break;
|
||
|
case '"':
|
||
|
state = SCE_PL_HERE_QQ;
|
||
|
break;
|
||
|
case '`':
|
||
|
state = SCE_PL_HERE_QX;
|
||
|
break;
|
||
|
}
|
||
|
} else {
|
||
|
styler.ColourTo(i - 1, state);
|
||
|
switch (HereDoc.Quote) {
|
||
|
case '\\':
|
||
|
state = SCE_PL_HERE_Q ;
|
||
|
break;
|
||
|
default :
|
||
|
state = SCE_PL_HERE_QQ;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
if (HereDoc.State == 4 && isEOLChar(ch)) {
|
||
|
// Start of format body.
|
||
|
HereDoc.State = 0;
|
||
|
styler.ColourTo(i - 1, state);
|
||
|
state = SCE_PL_FORMAT;
|
||
|
}
|
||
|
|
||
|
if (state == SCE_PL_DEFAULT) {
|
||
|
if ((isascii(ch) && isdigit(ch)) || (isascii(chNext) && isdigit(chNext) &&
|
||
|
(ch == '.' || ch == 'v'))) {
|
||
|
state = SCE_PL_NUMBER;
|
||
|
backflag = BACK_NONE;
|
||
|
numState = PERLNUM_DECIMAL;
|
||
|
dotCount = 0;
|
||
|
if (ch == '0') { // hex,bin,octal
|
||
|
if (chNext == 'x') {
|
||
|
numState = PERLNUM_HEX;
|
||
|
} else if (chNext == 'b') {
|
||
|
numState = PERLNUM_BINARY;
|
||
|
} else if (isascii(chNext) && isdigit(chNext)) {
|
||
|
numState = PERLNUM_OCTAL;
|
||
|
}
|
||
|
if (numState != PERLNUM_DECIMAL) {
|
||
|
i++;
|
||
|
ch = chNext;
|
||
|
chNext = chNext2;
|
||
|
}
|
||
|
} else if (ch == 'v') { // vector
|
||
|
numState = PERLNUM_V_VECTOR;
|
||
|
}
|
||
|
} else if (isWordStart(ch)) {
|
||
|
// if immediately prefixed by '::', always a bareword
|
||
|
state = SCE_PL_WORD;
|
||
|
if (chPrev == ':' && styler.SafeGetCharAt(i - 2) == ':') {
|
||
|
state = SCE_PL_IDENTIFIER;
|
||
|
}
|
||
|
unsigned int kw = i + 1;
|
||
|
// first check for possible quote-like delimiter
|
||
|
if (ch == 's' && !isNonQuote(chNext)) {
|
||
|
state = SCE_PL_REGSUBST;
|
||
|
Quote.New(2);
|
||
|
} else if (ch == 'm' && !isNonQuote(chNext)) {
|
||
|
state = SCE_PL_REGEX;
|
||
|
Quote.New(1);
|
||
|
} else if (ch == 'q' && !isNonQuote(chNext)) {
|
||
|
state = SCE_PL_STRING_Q;
|
||
|
Quote.New(1);
|
||
|
} else if (ch == 'y' && !isNonQuote(chNext)) {
|
||
|
state = SCE_PL_REGSUBST;
|
||
|
Quote.New(2);
|
||
|
} else if (ch == 't' && chNext == 'r' && !isNonQuote(chNext2)) {
|
||
|
state = SCE_PL_REGSUBST;
|
||
|
Quote.New(2);
|
||
|
kw++;
|
||
|
} else if (ch == 'q' && (chNext == 'q' || chNext == 'r' || chNext == 'w' || chNext == 'x') && !isNonQuote(chNext2)) {
|
||
|
if (chNext == 'q') state = SCE_PL_STRING_QQ;
|
||
|
else if (chNext == 'x') state = SCE_PL_STRING_QX;
|
||
|
else if (chNext == 'r') state = SCE_PL_STRING_QR;
|
||
|
else if (chNext == 'w') state = SCE_PL_STRING_QW;
|
||
|
Quote.New(1);
|
||
|
kw++;
|
||
|
} else if (ch == 'x' && (chNext == '=' || // repetition
|
||
|
!isWordStart(chNext) ||
|
||
|
(isdigit(chPrev) && isdigit(chNext)))) {
|
||
|
state = SCE_PL_OPERATOR;
|
||
|
}
|
||
|
// if potentially a keyword, scan forward and grab word, then check
|
||
|
// if it's really one; if yes, disambiguation test is performed
|
||
|
// otherwise it is always a bareword and we skip a lot of scanning
|
||
|
// note: keywords assumed to be limited to [_a-zA-Z] only
|
||
|
if (state == SCE_PL_WORD) {
|
||
|
while (isWordStart(styler.SafeGetCharAt(kw))) kw++;
|
||
|
if (!isPerlKeyword(styler.GetStartSegment(), kw, keywords, styler)) {
|
||
|
state = SCE_PL_IDENTIFIER;
|
||
|
}
|
||
|
}
|
||
|
// if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
|
||
|
// for quote-like delimiters/keywords, attempt to disambiguate
|
||
|
// to select for bareword, change state -> SCE_PL_IDENTIFIER
|
||
|
if (state != SCE_PL_IDENTIFIER && i > 0) {
|
||
|
unsigned int j = i;
|
||
|
bool moreback = false; // true if passed newline/comments
|
||
|
bool brace = false; // true if opening brace found
|
||
|
char ch2;
|
||
|
// first look backwards past whitespace/comments for EOLs
|
||
|
// if BACK_NONE, neither operator nor keyword, so skip test
|
||
|
if (backflag != BACK_NONE) {
|
||
|
while (--j > backPos) {
|
||
|
if (isEOLChar(styler.SafeGetCharAt(j)))
|
||
|
moreback = true;
|
||
|
}
|
||
|
ch2 = styler.SafeGetCharAt(j);
|
||
|
if (ch2 == '{' && !moreback) {
|
||
|
// {bareword: possible variable spec
|
||
|
brace = true;
|
||
|
} else if ((ch2 == '&' && styler.SafeGetCharAt(j - 1) != '&')
|
||
|
// &bareword: subroutine call
|
||
|
|| (ch2 == '>' && styler.SafeGetCharAt(j - 1) == '-')
|
||
|
// ->bareword: part of variable spec
|
||
|
|| (ch2 == 'b' && styler.Match(j - 2, "su"))) {
|
||
|
// sub bareword: subroutine declaration
|
||
|
// (implied BACK_KEYWORD, no keywords end in 'sub'!)
|
||
|
state = SCE_PL_IDENTIFIER;
|
||
|
}
|
||
|
// if status still ambiguous, look forward after word past
|
||
|
// tabs/spaces only; if ch2 isn't one of '[{(,' it can never
|
||
|
// match anything, so skip the whole thing
|
||
|
j = kw;
|
||
|
if (state != SCE_PL_IDENTIFIER
|
||
|
&& (ch2 == '{' || ch2 == '(' || ch2 == '['|| ch2 == ',')
|
||
|
&& kw < lengthDoc) {
|
||
|
while (ch2 = styler.SafeGetCharAt(j),
|
||
|
(ch2 == ' ' || ch2 == '\t') && j < lengthDoc) {
|
||
|
j++;
|
||
|
}
|
||
|
if ((ch2 == '}' && brace)
|
||
|
// {bareword}: variable spec
|
||
|
|| (ch2 == '=' && styler.SafeGetCharAt(j + 1) == '>')) {
|
||
|
// [{(, bareword=>: hash literal
|
||
|
state = SCE_PL_IDENTIFIER;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
backflag = BACK_NONE;
|
||
|
// an identifier or bareword
|
||
|
if (state == SCE_PL_IDENTIFIER) {
|
||
|
if ((!isWordStart(chNext) && chNext != '\'')
|
||
|
|| (chNext == '.' && chNext2 == '.')) {
|
||
|
// We need that if length of word == 1!
|
||
|
// This test is copied from the SCE_PL_WORD handler.
|
||
|
styler.ColourTo(i, SCE_PL_IDENTIFIER);
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
}
|
||
|
// a keyword
|
||
|
} else if (state == SCE_PL_WORD) {
|
||
|
i = kw - 1;
|
||
|
if (ch == '_' && chNext == '_' &&
|
||
|
(isMatch(styler, lengthDoc, styler.GetStartSegment(), "__DATA__")
|
||
|
|| isMatch(styler, lengthDoc, styler.GetStartSegment(), "__END__"))) {
|
||
|
styler.ColourTo(i, SCE_PL_DATASECTION);
|
||
|
state = SCE_PL_DATASECTION;
|
||
|
} else {
|
||
|
if (isMatch(styler, lengthDoc, styler.GetStartSegment(), "format")) {
|
||
|
state = SCE_PL_FORMAT_IDENT;
|
||
|
HereDoc.State = 0;
|
||
|
} else {
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
}
|
||
|
styler.ColourTo(i, SCE_PL_WORD);
|
||
|
backflag = BACK_KEYWORD;
|
||
|
backPos = i;
|
||
|
}
|
||
|
ch = styler.SafeGetCharAt(i);
|
||
|
chNext = styler.SafeGetCharAt(i + 1);
|
||
|
// a repetition operator 'x'
|
||
|
} else if (state == SCE_PL_OPERATOR) {
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
goto handleOperator;
|
||
|
// quote-like delimiter, skip one char if double-char delimiter
|
||
|
} else {
|
||
|
i = kw - 1;
|
||
|
chNext = styler.SafeGetCharAt(i + 1);
|
||
|
}
|
||
|
} else if (ch == '#') {
|
||
|
state = SCE_PL_COMMENTLINE;
|
||
|
} else if (ch == '\"') {
|
||
|
state = SCE_PL_STRING;
|
||
|
Quote.New(1);
|
||
|
Quote.Open(ch);
|
||
|
backflag = BACK_NONE;
|
||
|
} else if (ch == '\'') {
|
||
|
if (chPrev == '&') {
|
||
|
// Archaic call
|
||
|
styler.ColourTo(i, state);
|
||
|
} else {
|
||
|
state = SCE_PL_CHARACTER;
|
||
|
Quote.New(1);
|
||
|
Quote.Open(ch);
|
||
|
}
|
||
|
backflag = BACK_NONE;
|
||
|
} else if (ch == '`') {
|
||
|
state = SCE_PL_BACKTICKS;
|
||
|
Quote.New(1);
|
||
|
Quote.Open(ch);
|
||
|
backflag = BACK_NONE;
|
||
|
} else if (ch == '$') {
|
||
|
if ((chNext == '{') || isspacechar(chNext)) {
|
||
|
styler.ColourTo(i, SCE_PL_SCALAR);
|
||
|
} else {
|
||
|
state = SCE_PL_SCALAR;
|
||
|
if ((chNext == '`' && chNext2 == '`')
|
||
|
|| (chNext == ':' && chNext2 == ':')) {
|
||
|
i += 2;
|
||
|
ch = styler.SafeGetCharAt(i);
|
||
|
chNext = styler.SafeGetCharAt(i + 1);
|
||
|
} else {
|
||
|
i++;
|
||
|
ch = chNext;
|
||
|
chNext = chNext2;
|
||
|
}
|
||
|
}
|
||
|
backflag = BACK_NONE;
|
||
|
} else if (ch == '@') {
|
||
|
if (!isascii(chNext) || isalpha(chNext) || chNext == '#' || chNext == '$'
|
||
|
|| chNext == '_' || chNext == '+' || chNext == '-') {
|
||
|
state = SCE_PL_ARRAY;
|
||
|
} else if (chNext == ':' && chNext2 == ':') {
|
||
|
state = SCE_PL_ARRAY;
|
||
|
i += 2;
|
||
|
ch = styler.SafeGetCharAt(i);
|
||
|
chNext = styler.SafeGetCharAt(i + 1);
|
||
|
} else if (chNext != '{' && chNext != '[') {
|
||
|
styler.ColourTo(i, SCE_PL_ARRAY);
|
||
|
} else {
|
||
|
styler.ColourTo(i, SCE_PL_ARRAY);
|
||
|
}
|
||
|
backflag = BACK_NONE;
|
||
|
} else if (ch == '%') {
|
||
|
backflag = BACK_NONE;
|
||
|
if (!isascii(chNext) || isalpha(chNext) || chNext == '#' || chNext == '$'
|
||
|
|| chNext == '_' || chNext == '!' || chNext == '^') {
|
||
|
state = SCE_PL_HASH;
|
||
|
i++;
|
||
|
ch = chNext;
|
||
|
chNext = chNext2;
|
||
|
} else if (chNext == ':' && chNext2 == ':') {
|
||
|
state = SCE_PL_HASH;
|
||
|
i += 2;
|
||
|
ch = styler.SafeGetCharAt(i);
|
||
|
chNext = styler.SafeGetCharAt(i + 1);
|
||
|
} else if (chNext == '{') {
|
||
|
styler.ColourTo(i, SCE_PL_HASH);
|
||
|
} else {
|
||
|
goto handleOperator;
|
||
|
}
|
||
|
} else if (ch == '*') {
|
||
|
backflag = BACK_NONE;
|
||
|
char strch[2];
|
||
|
strch[0] = chNext;
|
||
|
strch[1] = '\0';
|
||
|
if (chNext == ':' && chNext2 == ':') {
|
||
|
state = SCE_PL_SYMBOLTABLE;
|
||
|
i += 2;
|
||
|
ch = styler.SafeGetCharAt(i);
|
||
|
chNext = styler.SafeGetCharAt(i + 1);
|
||
|
} else if (!isascii(chNext) || isalpha(chNext) || chNext == '_'
|
||
|
|| NULL != strstr("^/|,\\\";#%^:?<>)[]", strch)) {
|
||
|
state = SCE_PL_SYMBOLTABLE;
|
||
|
i++;
|
||
|
ch = chNext;
|
||
|
chNext = chNext2;
|
||
|
} else if (chNext == '{') {
|
||
|
styler.ColourTo(i, SCE_PL_SYMBOLTABLE);
|
||
|
} else {
|
||
|
if (chNext == '*') { // exponentiation
|
||
|
i++;
|
||
|
ch = chNext;
|
||
|
chNext = chNext2;
|
||
|
}
|
||
|
goto handleOperator;
|
||
|
}
|
||
|
} else if (ch == '/' || (ch == '<' && chNext == '<')) {
|
||
|
// Explicit backward peeking to set a consistent preferRE for
|
||
|
// any slash found, so no longer need to track preferRE state.
|
||
|
// Find first previous significant lexed element and interpret.
|
||
|
// Test for HERE doc start '<<' shares this code, helps to
|
||
|
// determine if it should be an operator.
|
||
|
bool preferRE = false;
|
||
|
bool isHereDoc = (ch == '<');
|
||
|
bool hereDocSpace = false; // these are for corner case:
|
||
|
bool hereDocScalar = false; // SCALAR [whitespace] '<<'
|
||
|
unsigned int bk = (i > 0)? i - 1: 0;
|
||
|
unsigned int bkend;
|
||
|
char bkch;
|
||
|
styler.Flush();
|
||
|
if (styler.StyleAt(bk) == SCE_PL_DEFAULT)
|
||
|
hereDocSpace = true;
|
||
|
while ((bk > 0) && (styler.StyleAt(bk) == SCE_PL_DEFAULT ||
|
||
|
styler.StyleAt(bk) == SCE_PL_COMMENTLINE)) {
|
||
|
bk--;
|
||
|
}
|
||
|
if (bk == 0) {
|
||
|
// position 0 won't really be checked; rarely happens
|
||
|
// hard to fix due to an unsigned index i
|
||
|
preferRE = true;
|
||
|
} else {
|
||
|
int bkstyle = styler.StyleAt(bk);
|
||
|
bkch = styler.SafeGetCharAt(bk);
|
||
|
switch(bkstyle) {
|
||
|
case SCE_PL_OPERATOR:
|
||
|
preferRE = true;
|
||
|
if (bkch == ')' || bkch == ']') {
|
||
|
preferRE = false;
|
||
|
} else if (bkch == '}') {
|
||
|
// backtrack further, count balanced brace pairs
|
||
|
// if a brace pair found, see if it's a variable
|
||
|
int braceCount = 1;
|
||
|
while (--bk > 0) {
|
||
|
bkstyle = styler.StyleAt(bk);
|
||
|
if (bkstyle == SCE_PL_OPERATOR) {
|
||
|
bkch = styler.SafeGetCharAt(bk);
|
||
|
if (bkch == ';') { // early out
|
||
|
break;
|
||
|
} else if (bkch == '}') {
|
||
|
braceCount++;
|
||
|
} else if (bkch == '{') {
|
||
|
if (--braceCount == 0)
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
if (bk == 0) {
|
||
|
// at beginning, true
|
||
|
} else if (braceCount == 0) {
|
||
|
// balanced { found, bk>0, skip more whitespace
|
||
|
if (styler.StyleAt(--bk) == SCE_PL_DEFAULT) {
|
||
|
while (bk > 0) {
|
||
|
bkstyle = styler.StyleAt(--bk);
|
||
|
if (bkstyle != SCE_PL_DEFAULT)
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
bkstyle = styler.StyleAt(bk);
|
||
|
if (bkstyle == SCE_PL_SCALAR
|
||
|
|| bkstyle == SCE_PL_ARRAY
|
||
|
|| bkstyle == SCE_PL_HASH
|
||
|
|| bkstyle == SCE_PL_SYMBOLTABLE
|
||
|
|| bkstyle == SCE_PL_OPERATOR) {
|
||
|
preferRE = false;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
break;
|
||
|
case SCE_PL_IDENTIFIER:
|
||
|
preferRE = true;
|
||
|
if (bkch == '>') { // inputsymbol
|
||
|
preferRE = false;
|
||
|
break;
|
||
|
}
|
||
|
// backtrack to find "->" or "::" before identifier
|
||
|
while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) {
|
||
|
bk--;
|
||
|
}
|
||
|
while (bk > 0) {
|
||
|
bkstyle = styler.StyleAt(bk);
|
||
|
if (bkstyle == SCE_PL_DEFAULT ||
|
||
|
bkstyle == SCE_PL_COMMENTLINE) {
|
||
|
} else if (bkstyle == SCE_PL_OPERATOR) {
|
||
|
bkch = styler.SafeGetCharAt(bk);
|
||
|
// test for "->" and "::"
|
||
|
if ((bkch == '>' && styler.SafeGetCharAt(bk - 1) == '-')
|
||
|
|| (bkch == ':' && styler.SafeGetCharAt(bk - 1) == ':')) {
|
||
|
preferRE = false;
|
||
|
break;
|
||
|
}
|
||
|
} else {
|
||
|
// bare identifier, if '/', /PATTERN/ unless digit/space immediately after '/'
|
||
|
// if '//', always expect defined-or operator to follow identifier
|
||
|
if (!isHereDoc &&
|
||
|
(isspacechar(chNext) || isdigit(chNext) || chNext == '/'))
|
||
|
preferRE = false;
|
||
|
// HERE docs cannot have a space after the >>
|
||
|
if (isspacechar(chNext))
|
||
|
preferRE = false;
|
||
|
break;
|
||
|
}
|
||
|
bk--;
|
||
|
}
|
||
|
break;
|
||
|
case SCE_PL_SCALAR: // for $var<< case
|
||
|
hereDocScalar = true;
|
||
|
break;
|
||
|
// for HERE docs, always true for preferRE
|
||
|
case SCE_PL_WORD:
|
||
|
preferRE = true;
|
||
|
if (isHereDoc)
|
||
|
break;
|
||
|
// adopt heuristics similar to vim-style rules:
|
||
|
// keywords always forced as /PATTERN/: split, if, elsif, while
|
||
|
// everything else /PATTERN/ unless digit/space immediately after '/'
|
||
|
// for '//', defined-or favoured unless special keywords
|
||
|
bkend = bk + 1;
|
||
|
while (bk > 0 && styler.StyleAt(bk-1) == SCE_PL_WORD) {
|
||
|
bk--;
|
||
|
}
|
||
|
if (isPerlKeyword(bk, bkend, reWords, styler))
|
||
|
break;
|
||
|
if (isspacechar(chNext) || isdigit(chNext) || chNext == '/')
|
||
|
preferRE = false;
|
||
|
break;
|
||
|
// other styles uses the default, preferRE=false
|
||
|
case SCE_PL_POD:
|
||
|
case SCE_PL_POD_VERB:
|
||
|
case SCE_PL_HERE_Q:
|
||
|
case SCE_PL_HERE_QQ:
|
||
|
case SCE_PL_HERE_QX:
|
||
|
preferRE = true;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
backflag = BACK_NONE;
|
||
|
if (isHereDoc) { // handle HERE doc
|
||
|
// if SCALAR whitespace '<<', *always* a HERE doc
|
||
|
if (preferRE || (hereDocSpace && hereDocScalar)) {
|
||
|
state = SCE_PL_HERE_DELIM;
|
||
|
HereDoc.State = 0;
|
||
|
} else { // << operator
|
||
|
i++;
|
||
|
ch = chNext;
|
||
|
chNext = chNext2;
|
||
|
goto handleOperator;
|
||
|
}
|
||
|
} else { // handle regexp
|
||
|
if (preferRE) {
|
||
|
state = SCE_PL_REGEX;
|
||
|
Quote.New(1);
|
||
|
Quote.Open(ch);
|
||
|
} else { // / and // operators
|
||
|
if (chNext == '/') {
|
||
|
i++;
|
||
|
ch = chNext;
|
||
|
chNext = chNext2;
|
||
|
}
|
||
|
goto handleOperator;
|
||
|
}
|
||
|
}
|
||
|
} else if (ch == '<') {
|
||
|
// looks forward for matching > on same line
|
||
|
unsigned int fw = i + 1;
|
||
|
while (fw < lengthDoc) {
|
||
|
char fwch = styler.SafeGetCharAt(fw);
|
||
|
if (fwch == ' ') {
|
||
|
if (styler.SafeGetCharAt(fw-1) != '\\' ||
|
||
|
styler.SafeGetCharAt(fw-2) != '\\')
|
||
|
goto handleOperator;
|
||
|
} else if (isEOLChar(fwch) || isspacechar(fwch)) {
|
||
|
goto handleOperator;
|
||
|
} else if (fwch == '>') {
|
||
|
if ((fw - i) == 2 && // '<=>' case
|
||
|
styler.SafeGetCharAt(fw-1) == '=') {
|
||
|
goto handleOperator;
|
||
|
}
|
||
|
styler.ColourTo(fw, SCE_PL_IDENTIFIER);
|
||
|
i = fw;
|
||
|
ch = fwch;
|
||
|
chNext = styler.SafeGetCharAt(i+1);
|
||
|
}
|
||
|
fw++;
|
||
|
}
|
||
|
if (fw == lengthDoc)
|
||
|
goto handleOperator;
|
||
|
} else if (ch == '=' // POD
|
||
|
&& isalpha(chNext)
|
||
|
&& (isEOLChar(chPrev))) {
|
||
|
state = SCE_PL_POD;
|
||
|
backflag = BACK_NONE;
|
||
|
//sookedpos = 0;
|
||
|
//sooked[sookedpos] = '\0';
|
||
|
} else if (ch == '-' // file test operators
|
||
|
&& isSingleCharOp(chNext)
|
||
|
&& !isalnum((chNext2 = styler.SafeGetCharAt(i+2)))) {
|
||
|
styler.ColourTo(i + 1, SCE_PL_WORD);
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
i++;
|
||
|
ch = chNext;
|
||
|
chNext = chNext2;
|
||
|
backflag = BACK_NONE;
|
||
|
} else if (ch == '-' // bareword promotion (-FOO cases)
|
||
|
&& ((isascii(chNext) && isalpha(chNext)) || chNext == '_')
|
||
|
&& backflag != BACK_NONE) {
|
||
|
state = SCE_PL_IDENTIFIER;
|
||
|
backflag = BACK_NONE;
|
||
|
} else if (ch == '(' && i > 0) {
|
||
|
// backtrack to identify if we're starting a sub prototype
|
||
|
// for generality, we need to ignore whitespace/comments
|
||
|
unsigned int bk = i - 1; // i > 0 tested above
|
||
|
styler.Flush();
|
||
|
while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_DEFAULT ||
|
||
|
styler.StyleAt(bk) == SCE_PL_COMMENTLINE)) {
|
||
|
bk--;
|
||
|
}
|
||
|
if (bk == 0 || styler.StyleAt(bk) != SCE_PL_IDENTIFIER) // check identifier
|
||
|
goto handleOperator;
|
||
|
while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_IDENTIFIER)) {
|
||
|
bk--;
|
||
|
}
|
||
|
while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_DEFAULT ||
|
||
|
styler.StyleAt(bk) == SCE_PL_COMMENTLINE)) {
|
||
|
bk--;
|
||
|
}
|
||
|
if (bk < 2 || styler.StyleAt(bk) != SCE_PL_WORD // check "sub" keyword
|
||
|
|| !styler.Match(bk - 2, "sub")) // assume suffix is unique!
|
||
|
goto handleOperator;
|
||
|
state = SCE_PL_SUB_PROTOTYPE;
|
||
|
backflag = BACK_NONE;
|
||
|
backPos = i; // needed for restart
|
||
|
} else if (isPerlOperator(ch)) {
|
||
|
if (ch == '.' && chNext == '.') { // .. and ...
|
||
|
i++;
|
||
|
if (chNext2 == '.') { i++; }
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
ch = styler.SafeGetCharAt(i);
|
||
|
chNext = styler.SafeGetCharAt(i + 1);
|
||
|
}
|
||
|
handleOperator:
|
||
|
styler.ColourTo(i, SCE_PL_OPERATOR);
|
||
|
backflag = BACK_OPERATOR;
|
||
|
backPos = i;
|
||
|
} else if (ch == 4 || ch == 26) { // ^D and ^Z ends valid perl source
|
||
|
styler.ColourTo(i, SCE_PL_DATASECTION);
|
||
|
state = SCE_PL_DATASECTION;
|
||
|
} else {
|
||
|
// keep colouring defaults to make restart easier
|
||
|
styler.ColourTo(i, SCE_PL_DEFAULT);
|
||
|
}
|
||
|
} else if (state == SCE_PL_NUMBER) {
|
||
|
if (ch == '.') {
|
||
|
if (chNext == '.') {
|
||
|
// double dot is always an operator
|
||
|
goto numAtEnd;
|
||
|
} else if (numState <= PERLNUM_FLOAT) {
|
||
|
// non-decimal number or float exponent, consume next dot
|
||
|
styler.ColourTo(i - 1, SCE_PL_NUMBER);
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
goto handleOperator;
|
||
|
} else { // decimal or vectors allows dots
|
||
|
dotCount++;
|
||
|
if (numState == PERLNUM_DECIMAL) {
|
||
|
if (dotCount > 1) {
|
||
|
if (isdigit(chNext)) { // really a vector
|
||
|
numState = PERLNUM_VECTOR;
|
||
|
} else // number then dot
|
||
|
goto numAtEnd;
|
||
|
}
|
||
|
} else { // vectors
|
||
|
if (!isdigit(chNext)) // vector then dot
|
||
|
goto numAtEnd;
|
||
|
}
|
||
|
}
|
||
|
} else if (ch == '_') {
|
||
|
// permissive underscoring for number and vector literals
|
||
|
} else if (!isascii(ch) || isalnum(ch)) {
|
||
|
if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
|
||
|
if (!isascii(ch) || isalpha(ch)) {
|
||
|
if (dotCount == 0) { // change to word
|
||
|
state = SCE_PL_IDENTIFIER;
|
||
|
} else { // vector then word
|
||
|
goto numAtEnd;
|
||
|
}
|
||
|
}
|
||
|
} else if (numState == PERLNUM_DECIMAL) {
|
||
|
if (ch == 'E' || ch == 'e') { // exponent
|
||
|
numState = PERLNUM_FLOAT;
|
||
|
if (chNext == '+' || chNext == '-') {
|
||
|
i++;
|
||
|
ch = chNext;
|
||
|
chNext = chNext2;
|
||
|
}
|
||
|
} else if (!isascii(ch) || !isdigit(ch)) { // number then word
|
||
|
goto numAtEnd;
|
||
|
}
|
||
|
} else if (numState == PERLNUM_FLOAT) {
|
||
|
if (!isdigit(ch)) { // float then word
|
||
|
goto numAtEnd;
|
||
|
}
|
||
|
} else if (numState == PERLNUM_OCTAL) {
|
||
|
if (!isdigit(ch))
|
||
|
goto numAtEnd;
|
||
|
else if (ch > '7')
|
||
|
numState = PERLNUM_BAD;
|
||
|
} else if (numState == PERLNUM_BINARY) {
|
||
|
if (!isdigit(ch))
|
||
|
goto numAtEnd;
|
||
|
else if (ch > '1')
|
||
|
numState = PERLNUM_BAD;
|
||
|
} else if (numState == PERLNUM_HEX) {
|
||
|
int ch2 = toupper(ch);
|
||
|
if (!isdigit(ch) && !(ch2 >= 'A' && ch2 <= 'F'))
|
||
|
goto numAtEnd;
|
||
|
} else {//(numState == PERLNUM_BAD) {
|
||
|
if (!isdigit(ch))
|
||
|
goto numAtEnd;
|
||
|
}
|
||
|
} else {
|
||
|
// complete current number or vector
|
||
|
numAtEnd:
|
||
|
styler.ColourTo(i - 1, actualNumStyle(numState));
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
goto restartLexer;
|
||
|
}
|
||
|
} else if (state == SCE_PL_IDENTIFIER) {
|
||
|
if (!isWordStart(chNext) && chNext != '\'') {
|
||
|
styler.ColourTo(i, SCE_PL_IDENTIFIER);
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
ch = ' ';
|
||
|
}
|
||
|
} else {
|
||
|
if (state == SCE_PL_COMMENTLINE) {
|
||
|
if (isEOLChar(ch)) {
|
||
|
styler.ColourTo(i - 1, state);
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
goto restartLexer;
|
||
|
} else if (isEOLChar(chNext)) {
|
||
|
styler.ColourTo(i, state);
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
}
|
||
|
} else if (state == SCE_PL_HERE_DELIM) {
|
||
|
//
|
||
|
// From perldata.pod:
|
||
|
// ------------------
|
||
|
// A line-oriented form of quoting is based on the shell ``here-doc''
|
||
|
// syntax.
|
||
|
// Following a << you specify a string to terminate the quoted material,
|
||
|
// and all lines following the current line down to the terminating
|
||
|
// string are the value of the item.
|
||
|
// The terminating string may be either an identifier (a word),
|
||
|
// or some quoted text.
|
||
|
// If quoted, the type of quotes you use determines the treatment of
|
||
|
// the text, just as in regular quoting.
|
||
|
// An unquoted identifier works like double quotes.
|
||
|
// There must be no space between the << and the identifier.
|
||
|
// (If you put a space it will be treated as a null identifier,
|
||
|
// which is valid, and matches the first empty line.)
|
||
|
// (This is deprecated, -w warns of this syntax)
|
||
|
// The terminating string must appear by itself (unquoted and with no
|
||
|
// surrounding whitespace) on the terminating line.
|
||
|
//
|
||
|
// From Bash info:
|
||
|
// ---------------
|
||
|
// Specifier format is: <<[-]WORD
|
||
|
// Optional '-' is for removal of leading tabs from here-doc.
|
||
|
// Whitespace acceptable after <<[-] operator.
|
||
|
//
|
||
|
if (HereDoc.State == 0) { // '<<' encountered
|
||
|
bool gotspace = false;
|
||
|
unsigned int oldi = i;
|
||
|
if (chNext == ' ' || chNext == '\t') {
|
||
|
// skip whitespace; legal for quoted delimiters
|
||
|
gotspace = true;
|
||
|
do {
|
||
|
i++;
|
||
|
chNext = styler.SafeGetCharAt(i + 1);
|
||
|
} while ((i + 1 < lengthDoc) && (chNext == ' ' || chNext == '\t'));
|
||
|
chNext2 = styler.SafeGetCharAt(i + 2);
|
||
|
}
|
||
|
HereDoc.State = 1;
|
||
|
HereDoc.Quote = chNext;
|
||
|
HereDoc.Quoted = false;
|
||
|
HereDoc.DelimiterLength = 0;
|
||
|
HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
|
||
|
if (chNext == '\'' || chNext == '"' || chNext == '`') {
|
||
|
// a quoted here-doc delimiter
|
||
|
i++;
|
||
|
ch = chNext;
|
||
|
chNext = chNext2;
|
||
|
HereDoc.Quoted = true;
|
||
|
} else if (isspacechar(chNext) || isdigit(chNext) || chNext == '\\'
|
||
|
|| chNext == '=' || chNext == '$' || chNext == '@'
|
||
|
|| ((isalpha(chNext) || chNext == '_') && gotspace)) {
|
||
|
// left shift << or <<= operator cases
|
||
|
// restore position if operator
|
||
|
i = oldi;
|
||
|
styler.ColourTo(i, SCE_PL_OPERATOR);
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
backflag = BACK_OPERATOR;
|
||
|
backPos = i;
|
||
|
HereDoc.State = 0;
|
||
|
goto restartLexer;
|
||
|
} else {
|
||
|
// an unquoted here-doc delimiter, no special handling
|
||
|
// (cannot be prefixed by spaces/tabs), or
|
||
|
// symbols terminates; deprecated zero-length delimiter
|
||
|
}
|
||
|
|
||
|
} else if (HereDoc.State == 1) { // collect the delimiter
|
||
|
backflag = BACK_NONE;
|
||
|
if (HereDoc.Quoted) { // a quoted here-doc delimiter
|
||
|
if (ch == HereDoc.Quote) { // closing quote => end of delimiter
|
||
|
styler.ColourTo(i, state);
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
} else {
|
||
|
if (ch == '\\' && chNext == HereDoc.Quote) { // escaped quote
|
||
|
i++;
|
||
|
ch = chNext;
|
||
|
chNext = chNext2;
|
||
|
}
|
||
|
HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
|
||
|
HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
|
||
|
}
|
||
|
} else { // an unquoted here-doc delimiter
|
||
|
if (isalnum(ch) || ch == '_') {
|
||
|
HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
|
||
|
HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
|
||
|
} else {
|
||
|
styler.ColourTo(i - 1, state);
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
goto restartLexer;
|
||
|
}
|
||
|
}
|
||
|
if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
|
||
|
styler.ColourTo(i - 1, state);
|
||
|
state = SCE_PL_ERROR;
|
||
|
goto restartLexer;
|
||
|
}
|
||
|
}
|
||
|
} else if (HereDoc.State == 2) {
|
||
|
// state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX
|
||
|
if (isEOLChar(chPrev) && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
|
||
|
i += HereDoc.DelimiterLength;
|
||
|
chPrev = styler.SafeGetCharAt(i - 1);
|
||
|
ch = styler.SafeGetCharAt(i);
|
||
|
if (isEOLChar(ch)) {
|
||
|
styler.ColourTo(i - 1, state);
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
backflag = BACK_NONE;
|
||
|
HereDoc.State = 0;
|
||
|
goto restartLexer;
|
||
|
}
|
||
|
chNext = styler.SafeGetCharAt(i + 1);
|
||
|
}
|
||
|
} else if (state == SCE_PL_POD
|
||
|
|| state == SCE_PL_POD_VERB) {
|
||
|
if (isEOLChar(chPrev)) {
|
||
|
if (ch == ' ' || ch == '\t') {
|
||
|
styler.ColourTo(i - 1, state);
|
||
|
state = SCE_PL_POD_VERB;
|
||
|
} else {
|
||
|
styler.ColourTo(i - 1, state);
|
||
|
state = SCE_PL_POD;
|
||
|
if (ch == '=') {
|
||
|
if (isMatch(styler, lengthDoc, i, "=cut")) {
|
||
|
styler.ColourTo(i - 1 + 4, state);
|
||
|
i += 4;
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
ch = styler.SafeGetCharAt(i);
|
||
|
//chNext = styler.SafeGetCharAt(i + 1);
|
||
|
goto restartLexer;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
} else if (state == SCE_PL_SCALAR // variable names
|
||
|
|| state == SCE_PL_ARRAY
|
||
|
|| state == SCE_PL_HASH
|
||
|
|| state == SCE_PL_SYMBOLTABLE) {
|
||
|
if (ch == ':' && chNext == ':') { // skip ::
|
||
|
i++;
|
||
|
ch = chNext;
|
||
|
chNext = chNext2;
|
||
|
}
|
||
|
else if (isEndVar(ch)) {
|
||
|
if (i == (styler.GetStartSegment() + 1)) {
|
||
|
// Special variable: $(, $_ etc.
|
||
|
styler.ColourTo(i, state);
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
} else {
|
||
|
styler.ColourTo(i - 1, state);
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
goto restartLexer;
|
||
|
}
|
||
|
}
|
||
|
} else if (state == SCE_PL_REGEX
|
||
|
|| state == SCE_PL_STRING_QR
|
||
|
) {
|
||
|
if (!Quote.Up && !isspacechar(ch)) {
|
||
|
Quote.Open(ch);
|
||
|
} else if (ch == '\\' && Quote.Up != '\\') {
|
||
|
// SG: Is it save to skip *every* escaped char?
|
||
|
i++;
|
||
|
ch = chNext;
|
||
|
chNext = styler.SafeGetCharAt(i + 1);
|
||
|
} else {
|
||
|
if (ch == Quote.Down /*&& chPrev != '\\'*/) {
|
||
|
Quote.Count--;
|
||
|
if (Quote.Count == 0) {
|
||
|
Quote.Rep--;
|
||
|
if (Quote.Up == Quote.Down) {
|
||
|
Quote.Count++;
|
||
|
}
|
||
|
}
|
||
|
if (!isalpha(chNext)) {
|
||
|
if (Quote.Rep <= 0) {
|
||
|
styler.ColourTo(i, state);
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
ch = ' ';
|
||
|
}
|
||
|
}
|
||
|
} else if (ch == Quote.Up /*&& chPrev != '\\'*/) {
|
||
|
Quote.Count++;
|
||
|
} else if (!isascii(chNext) || !isalpha(chNext)) {
|
||
|
if (Quote.Rep <= 0) {
|
||
|
styler.ColourTo(i, state);
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
ch = ' ';
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
} else if (state == SCE_PL_REGSUBST) {
|
||
|
if (!Quote.Up && !isspacechar(ch)) {
|
||
|
Quote.Open(ch);
|
||
|
} else if (ch == '\\' && Quote.Up != '\\') {
|
||
|
// SG: Is it save to skip *every* escaped char?
|
||
|
i++;
|
||
|
ch = chNext;
|
||
|
chNext = styler.SafeGetCharAt(i + 1);
|
||
|
} else {
|
||
|
if (Quote.Count == 0 && Quote.Rep == 1) {
|
||
|
/* We matched something like s(...) or tr{...}
|
||
|
* and are looking for the next matcher characters,
|
||
|
* which could be either bracketed ({...}) or non-bracketed
|
||
|
* (/.../).
|
||
|
*
|
||
|
* Number-signs are problematic. If they occur after
|
||
|
* the close of the first part, treat them like
|
||
|
* a Quote.Up char, even if they actually start comments.
|
||
|
*
|
||
|
* If we find an alnum, we end the regsubst, and punt.
|
||
|
*
|
||
|
* Eric Promislow ericp@activestate.com Aug 9,2000
|
||
|
*/
|
||
|
if (isspacechar(ch)) {
|
||
|
// Keep going
|
||
|
}
|
||
|
else if (!isascii(ch) || isalnum(ch)) {
|
||
|
styler.ColourTo(i, state);
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
ch = ' ';
|
||
|
} else {
|
||
|
Quote.Open(ch);
|
||
|
}
|
||
|
} else if (ch == Quote.Down /*&& chPrev != '\\'*/) {
|
||
|
Quote.Count--;
|
||
|
if (Quote.Count == 0) {
|
||
|
Quote.Rep--;
|
||
|
}
|
||
|
if (!isascii(chNext) || !isalpha(chNext)) {
|
||
|
if (Quote.Rep <= 0) {
|
||
|
styler.ColourTo(i, state);
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
ch = ' ';
|
||
|
}
|
||
|
}
|
||
|
if (Quote.Up == Quote.Down) {
|
||
|
Quote.Count++;
|
||
|
}
|
||
|
} else if (ch == Quote.Up /*&& chPrev != '\\'*/) {
|
||
|
Quote.Count++;
|
||
|
} else if (!isascii(chNext) || !isalpha(chNext)) {
|
||
|
if (Quote.Rep <= 0) {
|
||
|
styler.ColourTo(i, state);
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
ch = ' ';
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
} else if (state == SCE_PL_STRING_Q
|
||
|
|| state == SCE_PL_STRING_QQ
|
||
|
|| state == SCE_PL_STRING_QX
|
||
|
|| state == SCE_PL_STRING_QW
|
||
|
|| state == SCE_PL_STRING
|
||
|
|| state == SCE_PL_CHARACTER
|
||
|
|| state == SCE_PL_BACKTICKS
|
||
|
) {
|
||
|
if (!Quote.Down && !isspacechar(ch)) {
|
||
|
Quote.Open(ch);
|
||
|
} else if (ch == '\\' && Quote.Up != '\\') {
|
||
|
i++;
|
||
|
ch = chNext;
|
||
|
chNext = styler.SafeGetCharAt(i + 1);
|
||
|
} else if (ch == Quote.Down) {
|
||
|
Quote.Count--;
|
||
|
if (Quote.Count == 0) {
|
||
|
Quote.Rep--;
|
||
|
if (Quote.Rep <= 0) {
|
||
|
styler.ColourTo(i, state);
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
ch = ' ';
|
||
|
}
|
||
|
if (Quote.Up == Quote.Down) {
|
||
|
Quote.Count++;
|
||
|
}
|
||
|
}
|
||
|
} else if (ch == Quote.Up) {
|
||
|
Quote.Count++;
|
||
|
}
|
||
|
} else if (state == SCE_PL_SUB_PROTOTYPE) {
|
||
|
char strch[2];
|
||
|
strch[0] = ch;
|
||
|
strch[1] = '\0';
|
||
|
if (NULL != strstr("\\[$@%&*];", strch)) {
|
||
|
// keep going
|
||
|
} else if (ch == ')') {
|
||
|
styler.ColourTo(i, state);
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
} else {
|
||
|
// abandon prototype, restart from '('
|
||
|
i = backPos;
|
||
|
styler.ColourTo(i, SCE_PL_OPERATOR);
|
||
|
ch = styler.SafeGetCharAt(i);
|
||
|
chNext = styler.SafeGetCharAt(i + 1);
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
}
|
||
|
} else if (state == SCE_PL_FORMAT_IDENT) {
|
||
|
// occupies different HereDoc states to avoid clashing with HERE docs
|
||
|
if (HereDoc.State == 0) {
|
||
|
if ((isascii(ch) && isalpha(ch)) || ch == '_' // probable identifier
|
||
|
|| ch == '=') { // no identifier
|
||
|
HereDoc.State = 3;
|
||
|
HereDoc.Quoted = false; // whitespace flag
|
||
|
} else if (ch == ' ' || ch == '\t') {
|
||
|
styler.ColourTo(i, SCE_PL_DEFAULT);
|
||
|
} else {
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
HereDoc.State = 0;
|
||
|
goto restartLexer;
|
||
|
}
|
||
|
}
|
||
|
if (HereDoc.State == 3) { // with just a '=', state goes 0->3->4
|
||
|
if (ch == '=') {
|
||
|
styler.ColourTo(i, SCE_PL_FORMAT_IDENT);
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
HereDoc.State = 4;
|
||
|
} else if (ch == ' ' || ch == '\t') {
|
||
|
HereDoc.Quoted = true;
|
||
|
} else if (isEOLChar(ch) || (HereDoc.Quoted && ch != '=')) {
|
||
|
// abandon format, restart from after 'format'
|
||
|
i = backPos + 1;
|
||
|
ch = styler.SafeGetCharAt(i);
|
||
|
chNext = styler.SafeGetCharAt(i + 1);
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
HereDoc.State = 0;
|
||
|
}
|
||
|
}
|
||
|
} else if (state == SCE_PL_FORMAT) {
|
||
|
if (isEOLChar(chPrev)) {
|
||
|
styler.ColourTo(i - 1, state);
|
||
|
if (ch == '.' && isEOLChar(chNext)) {
|
||
|
styler.ColourTo(i, state);
|
||
|
state = SCE_PL_DEFAULT;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
if (state == SCE_PL_ERROR) {
|
||
|
break;
|
||
|
}
|
||
|
chPrev = ch;
|
||
|
}
|
||
|
styler.ColourTo(lengthDoc - 1, state);
|
||
|
}
|
||
|
|
||
|
static bool IsCommentLine(int line, Accessor &styler) {
|
||
|
int pos = styler.LineStart(line);
|
||
|
int eol_pos = styler.LineStart(line + 1) - 1;
|
||
|
for (int i = pos; i < eol_pos; i++) {
|
||
|
char ch = styler[i];
|
||
|
int style = styler.StyleAt(i);
|
||
|
if (ch == '#' && style == SCE_PL_COMMENTLINE)
|
||
|
return true;
|
||
|
else if (ch != ' ' && ch != '\t')
|
||
|
return false;
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
static void FoldPerlDoc(unsigned int startPos, int length, int, WordList *[],
|
||
|
Accessor &styler) {
|
||
|
bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
|
||
|
bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
|
||
|
// Custom folding of POD and packages
|
||
|
bool foldPOD = styler.GetPropertyInt("fold.perl.pod", 1) != 0;
|
||
|
bool foldPackage = styler.GetPropertyInt("fold.perl.package", 1) != 0;
|
||
|
unsigned int endPos = startPos + length;
|
||
|
int visibleChars = 0;
|
||
|
int lineCurrent = styler.GetLine(startPos);
|
||
|
int levelPrev = SC_FOLDLEVELBASE;
|
||
|
if (lineCurrent > 0)
|
||
|
levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
|
||
|
int levelCurrent = levelPrev;
|
||
|
char chNext = styler[startPos];
|
||
|
char chPrev = styler.SafeGetCharAt(startPos - 1);
|
||
|
int styleNext = styler.StyleAt(startPos);
|
||
|
// Used at end of line to determine if the line was a package definition
|
||
|
bool isPackageLine = false;
|
||
|
bool isPodHeading = false;
|
||
|
for (unsigned int i = startPos; i < endPos; i++) {
|
||
|
char ch = chNext;
|
||
|
chNext = styler.SafeGetCharAt(i + 1);
|
||
|
int style = styleNext;
|
||
|
styleNext = styler.StyleAt(i + 1);
|
||
|
bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
|
||
|
bool atLineStart = isEOLChar(chPrev) || i == 0;
|
||
|
// Comment folding
|
||
|
if (foldComment && atEOL && IsCommentLine(lineCurrent, styler))
|
||
|
{
|
||
|
if (!IsCommentLine(lineCurrent - 1, styler)
|
||
|
&& IsCommentLine(lineCurrent + 1, styler))
|
||
|
levelCurrent++;
|
||
|
else if (IsCommentLine(lineCurrent - 1, styler)
|
||
|
&& !IsCommentLine(lineCurrent+1, styler))
|
||
|
levelCurrent--;
|
||
|
}
|
||
|
if (style == SCE_C_OPERATOR) {
|
||
|
if (ch == '{') {
|
||
|
levelCurrent++;
|
||
|
} else if (ch == '}') {
|
||
|
levelCurrent--;
|
||
|
}
|
||
|
}
|
||
|
// Custom POD folding
|
||
|
if (foldPOD && atLineStart) {
|
||
|
int stylePrevCh = (i) ? styler.StyleAt(i - 1):SCE_PL_DEFAULT;
|
||
|
if (style == SCE_PL_POD) {
|
||
|
if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB)
|
||
|
levelCurrent++;
|
||
|
else if (styler.Match(i, "=cut"))
|
||
|
levelCurrent--;
|
||
|
else if (styler.Match(i, "=head"))
|
||
|
isPodHeading = true;
|
||
|
} else if (style == SCE_PL_DATASECTION) {
|
||
|
if (ch == '=' && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE)
|
||
|
levelCurrent++;
|
||
|
else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE)
|
||
|
levelCurrent--;
|
||
|
else if (styler.Match(i, "=head"))
|
||
|
isPodHeading = true;
|
||
|
// if package used or unclosed brace, level > SC_FOLDLEVELBASE!
|
||
|
// reset needed as level test is vs. SC_FOLDLEVELBASE
|
||
|
else if (styler.Match(i, "__END__"))
|
||
|
levelCurrent = SC_FOLDLEVELBASE;
|
||
|
}
|
||
|
}
|
||
|
// Custom package folding
|
||
|
if (foldPackage && atLineStart) {
|
||
|
if (style == SCE_PL_WORD && styler.Match(i, "package")) {
|
||
|
isPackageLine = true;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (atEOL) {
|
||
|
int lev = levelPrev;
|
||
|
if (isPodHeading) {
|
||
|
lev = levelPrev - 1;
|
||
|
lev |= SC_FOLDLEVELHEADERFLAG;
|
||
|
isPodHeading = false;
|
||
|
}
|
||
|
// Check if line was a package declaration
|
||
|
// because packages need "special" treatment
|
||
|
if (isPackageLine) {
|
||
|
lev = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
|
||
|
levelCurrent = SC_FOLDLEVELBASE + 1;
|
||
|
isPackageLine = false;
|
||
|
}
|
||
|
lev |= levelCurrent << 16;
|
||
|
if (visibleChars == 0 && foldCompact)
|
||
|
lev |= SC_FOLDLEVELWHITEFLAG;
|
||
|
if ((levelCurrent > levelPrev) && (visibleChars > 0))
|
||
|
lev |= SC_FOLDLEVELHEADERFLAG;
|
||
|
if (lev != styler.LevelAt(lineCurrent)) {
|
||
|
styler.SetLevel(lineCurrent, lev);
|
||
|
}
|
||
|
lineCurrent++;
|
||
|
levelPrev = levelCurrent;
|
||
|
visibleChars = 0;
|
||
|
}
|
||
|
if (!isspacechar(ch))
|
||
|
visibleChars++;
|
||
|
chPrev = ch;
|
||
|
}
|
||
|
// Fill in the real level of the next line, keeping the current flags as they will be filled in later
|
||
|
int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
|
||
|
styler.SetLevel(lineCurrent, levelPrev | flagsNext);
|
||
|
}
|
||
|
|
||
|
static const char * const perlWordListDesc[] = {
|
||
|
"Keywords",
|
||
|
0
|
||
|
};
|
||
|
|
||
|
LexerModule lmPerl(SCLEX_PERL, ColourisePerlDoc, "perl", FoldPerlDoc, perlWordListDesc, 8);
|
||
|
|
||
|
|