@@ -86,6 +86,17 @@ import static org.jetbrains.plugins.scala.lang.lexer.ScalaTokenTypes.*;
86
86
}
87
87
}
88
88
89
+ private static class DedentedLevel extends InterpolatedStringLevel {
90
+ public final int delimiterLength;
91
+ public DedentedLevel (CharSequence interpolator , int delimiterLength ) {
92
+ super (interpolator);
93
+ this . delimiterLength = delimiterLength;
94
+ }
95
+ public int getState () {
96
+ return INSIDE_DEDENTED_INTERPOLATED_STRING ;
97
+ }
98
+ }
99
+
89
100
private boolean isScala3;
90
101
91
102
//
@@ -94,6 +105,7 @@ import static org.jetbrains.plugins.scala.lang.lexer.ScalaTokenTypes.*;
94
105
// to get id after $ in interpolated String
95
106
private boolean haveIdInString = false ;
96
107
private boolean haveIdInMultilineString = false ;
108
+ private boolean haveIdInDedentedString = false ;
97
109
// Currently opened interpolated Strings. Each int represents the number of the opened left structural braces in the String
98
110
private Stack<InterpolatedStringLevel > nestedString = new Stack<> ();
99
111
private CharSequence lastSeenInterpolator = null ;
@@ -105,16 +117,49 @@ import static org.jetbrains.plugins.scala.lang.lexer.ScalaTokenTypes.*;
105
117
public void resetCustom() {
106
118
haveIdInString = false ;
107
119
haveIdInMultilineString = false ;
120
+ haveIdInDedentedString = false ;
108
121
nestedString. clear();
109
122
lastSeenInterpolator = null ;
110
123
}
111
124
125
+ private int countLeadingQuotes(CharSequence text) {
126
+ int count = 0 ;
127
+ for (int i = 0 ; i < text. length() && text. charAt(i) == ' \' ' ; i++ ) {
128
+ count++ ;
129
+ }
130
+ return count;
131
+ }
132
+
133
+ private boolean endsWithQuotes(CharSequence text, int expectedCount) {
134
+ if (text. length() < expectedCount) return false ;
135
+ int count = 0 ;
136
+ for (int i = text. length() - 1 ; i >= 0 && text. charAt(i) == ' \' ' ; i-- ) {
137
+ count++ ;
138
+ }
139
+ return count >= expectedCount;
140
+ }
141
+
142
+ private boolean isValidDedentedString(CharSequence text) {
143
+ int leadingQuotes = countLeadingQuotes(text);
144
+ if (leadingQuotes < 3 ) return false ; // Must have at least 3 quotes
145
+
146
+ // Find the ending quotes
147
+ int trailingQuotes = 0 ;
148
+ for (int i = text. length() - 1 ; i >= 0 && text. charAt(i) == ' \' ' ; i-- ) {
149
+ trailingQuotes++ ;
150
+ }
151
+
152
+ return leadingQuotes == trailingQuotes;
153
+ }
154
+
112
155
public boolean isInterpolatedStringState() {
113
156
return isInsideInterpolatedString() ||
114
157
haveIdInString ||
115
158
haveIdInMultilineString ||
159
+ haveIdInDedentedString ||
116
160
yystate() == INSIDE_INTERPOLATED_STRING ||
117
- yystate() == INSIDE_MULTI_LINE_INTERPOLATED_STRING ;
161
+ yystate() == INSIDE_MULTI_LINE_INTERPOLATED_STRING ||
162
+ yystate() == INSIDE_DEDENTED_INTERPOLATED_STRING ;
118
163
}
119
164
120
165
private boolean shouldProcessBracesForInterpolated() {
@@ -145,6 +190,9 @@ import static org.jetbrains.plugins.scala.lang.lexer.ScalaTokenTypes.*;
145
190
} else if (haveIdInMultilineString) {
146
191
haveIdInMultilineString = false ;
147
192
yybegin(INSIDE_MULTI_LINE_INTERPOLATED_STRING );
193
+ } else if (haveIdInDedentedString) {
194
+ haveIdInDedentedString = false ;
195
+ yybegin(INSIDE_DEDENTED_INTERPOLATED_STRING );
148
196
}
149
197
}
150
198
@@ -158,6 +206,8 @@ import static org.jetbrains.plugins.scala.lang.lexer.ScalaTokenTypes.*;
158
206
typeAdjusted = tINTERPOLATED_RAW_STRING;
159
207
else if (type == tINTERPOLATED_MULTILINE_STRING && isInsideRawInterpolator())
160
208
typeAdjusted = tINTERPOLATED_MULTILINE_RAW_STRING;
209
+ else if (type == tINTERPOLATED_DEDENTED_STRING && isInsideRawInterpolator())
210
+ typeAdjusted = tINTERPOLATED_DEDENTED_RAW_STRING;
161
211
else
162
212
typeAdjusted = type;
163
213
@@ -166,6 +216,11 @@ import static org.jetbrains.plugins.scala.lang.lexer.ScalaTokenTypes.*;
166
216
167
217
@NotNull
168
218
private IElementType processDollarInsideString(boolean isInsideMultiline) {
219
+ return processDollarInsideString(isInsideMultiline, false );
220
+ }
221
+
222
+ @NotNull
223
+ private IElementType processDollarInsideString(boolean isInsideMultiline, boolean isInsideDedented) {
169
224
final IElementType token;
170
225
171
226
// TODO: remove this chech, this should always be false, cause $$ is handled by INTERPOLATED_STRING_ESCAPE pattern earlier
@@ -175,7 +230,9 @@ import static org.jetbrains.plugins.scala.lang.lexer.ScalaTokenTypes.*;
175
230
token = tINTERPOLATED_STRING_ESCAPE;
176
231
}
177
232
else {
178
- if (isInsideMultiline) {
233
+ if (isInsideDedented) {
234
+ haveIdInDedentedString = true ;
235
+ } else if (isInsideMultiline) {
179
236
haveIdInMultilineString = true ;
180
237
} else {
181
238
haveIdInString = true ;
@@ -271,12 +328,19 @@ hexDigit = [0-9A-Fa-f]
271
328
CHAR_ESCAPE_SEQUENCE = \\ [^\r\n]
272
329
UNICODE_ESCAPE = \\ u+ {hexDigit}{hexDigit}{hexDigit}{hexDigit} // Scala supports 1. multiple `u` chars after `\` 2. even \u000A ('\n') and \u000D (unlike Java)
273
330
ESCAPE_SEQUENCE = {UNICODE_ESCAPE} | {CHAR_ESCAPE_SEQUENCE}
274
- CHARACTER_LITERAL = "'" ( [^ \\\' \r\n] | {ESCAPE_SEQUENCE} | {OCTAL_ESCAPE_LITERAL} )( "'" | \\ ) | \'\\ u000A\' | "'''" // TODO: \'\\u000A\' is redundunt, remove
331
+ CHARACTER_LITERAL = "'" ( [^ \\\' \r\n] | {ESCAPE_SEQUENCE} | {OCTAL_ESCAPE_LITERAL} )( "'" | \\ ) | \'\\ u000A\' // TODO: \'\\u000A\' is redundunt, remove
275
332
276
333
STRING_BEGIN = \" ( [^ \\\" \r\n] | {CHAR_ESCAPE_SEQUENCE} )*
277
334
STRING_LITERAL = {STRING_BEGIN} \"
278
335
MULTI_LINE_STRING = \"\"\" ( ( \" ( \" )?)? [^ \" ] )* \"\"\" ( \" )* // Multi-line string
279
336
337
+ // Dedented string literals (Scala 3) - modeled after MULTI_LINE_STRING pattern
338
+ DEDENTED_STRING_3 = \'\'\' ( ( \' ( \' )?)? [^ \' ] )* \'\'\' ( \' )*
339
+ DEDENTED_STRING_4 = \'\'\'\' ( ( \' ( \'\' ?)?)? [^ \' ] )* \'\'\'\' ( \' )*
340
+ DEDENTED_STRING_5 = \'\'\'\'\' ( ( \' ( \'\'\' ?)?)? [^ \' ] )* \'\'\'\'\' ( \' )*
341
+ DEDENTED_STRING_6 = \'\'\'\'\'\' ( ( \' ( \'\'\'\' ?)?)? [^ \' ] )* \'\'\'\'\'\' ( \' )*
342
+ DEDENTED_STRING = {DEDENTED_STRING_6} | {DEDENTED_STRING_5} | {DEDENTED_STRING_4} | {DEDENTED_STRING_3}
343
+
280
344
// //////String Interpolation////////
281
345
INTERPOLATED_STRING_ID = {varid}
282
346
@@ -287,6 +351,9 @@ INTERPOLATED_STRING_PART_NOT_ESCAPED = [^\\\"\r\n\$]
287
351
INTERPOLATED_MULTI_LINE_STRING_BEGIN = \"\"\" {INTERPOLATED_MULTI_LINE_STRING_PART} *
288
352
INTERPOLATED_MULTI_LINE_STRING_PART = (( \" ( \" )?)? [^ \"\$ ] )
289
353
354
+ INTERPOLATED_DEDENTED_STRING_BEGIN = \'\'\' + {INTERPOLATED_DEDENTED_STRING_PART} *
355
+ INTERPOLATED_DEDENTED_STRING_PART = [^ \'\$ ] | \$ [^ {] | \' [^ \' ] +
356
+
290
357
// TODO: rename, it's missleading
291
358
INTERPOLATED_STRING_ESCAPE = "$$"
292
359
// INTERPOLATED_STRING_VARIABLE = "$"({identifier})
@@ -324,6 +391,7 @@ XML_BEGIN = "<" ("_" | [:jletter:]) | "<!--" | "<?" ("_" | [:jletter:]) | "<![CD
324
391
%xstate WAIT_FOR_INTERPOLATED_STRING
325
392
%xstate INSIDE_INTERPOLATED_STRING
326
393
%xstate INSIDE_MULTI_LINE_INTERPOLATED_STRING
394
+ %xstate INSIDE_DEDENTED_INTERPOLATED_STRING
327
395
%xstate INJ_COMMON_STATE
328
396
329
397
%%
@@ -344,7 +412,7 @@ XML_BEGIN = "<" ("_" | [:jletter:]) | "<!--" | "<?" ("_" | [:jletter:]) | "<![CD
344
412
{END_OF_LINE_COMMENT} { return process(tLINE_COMMENT); }
345
413
346
414
347
- {INTERPOLATED_STRING_ID} / ( {INTERPOLATED_STRING_BEGIN} | {INTERPOLATED_MULTI_LINE_STRING_BEGIN} ) {
415
+ {INTERPOLATED_STRING_ID} / ( {INTERPOLATED_STRING_BEGIN} | {INTERPOLATED_MULTI_LINE_STRING_BEGIN} | {INTERPOLATED_DEDENTED_STRING_BEGIN} ) {
348
416
yybegin(WAIT_FOR_INTERPOLATED_STRING );
349
417
// TODO: remove this check: looks like it's a dead code,
350
418
// yytext() should only return text that is matched by INTERPOLATED_STRING_ID, which can't end with \"\"
@@ -367,6 +435,13 @@ XML_BEGIN = "<" ("_" | [:jletter:]) | "<!--" | "<?" ("_" | [:jletter:]) | "<![CD
367
435
nestedString. push(new MultilineLevel (lastSeenInterpolator));
368
436
return process(tINTERPOLATED_MULTILINE_STRING);
369
437
}
438
+
439
+ {INTERPOLATED_DEDENTED_STRING_BEGIN} {
440
+ yybegin(INSIDE_DEDENTED_INTERPOLATED_STRING );
441
+ int delimiterLength = countLeadingQuotes(yytext());
442
+ nestedString. push(new DedentedLevel (lastSeenInterpolator, delimiterLength));
443
+ return process(tINTERPOLATED_DEDENTED_STRING);
444
+ }
370
445
}
371
446
372
447
<INJ_COMMON_STATE> {identifier} {
@@ -470,6 +545,61 @@ XML_BEGIN = "<" ("_" | [:jletter:]) | "<!--" | "<?" ("_" | [:jletter:]) | "<![CD
470
545
}
471
546
}
472
547
548
+ <INSIDE_DEDENTED_INTERPOLATED_STRING> {
549
+ {INTERPOLATED_STRING_ESCAPE} {
550
+ return process(tINTERPOLATED_STRING_ESCAPE);
551
+ }
552
+
553
+ ( \'\' ) / "$" {
554
+ return process(tINTERPOLATED_DEDENTED_STRING);
555
+ }
556
+
557
+ {INTERPOLATED_DEDENTED_STRING_PART} + {
558
+ return process(tINTERPOLATED_DEDENTED_STRING);
559
+ }
560
+
561
+ "$" {identifier} {
562
+ return processDollarInsideString(false , true );
563
+ }
564
+
565
+ \'\'\' + ( \' )+ {
566
+ yypushback(yylength() - 1 );
567
+ return process(tINTERPOLATED_DEDENTED_STRING);
568
+ }
569
+
570
+ \'\'\' + {
571
+ // Check if this ends the dedented string with matching delimiter length
572
+ if (! nestedString. isEmpty() && nestedString. peek() instanceof DedentedLevel ) {
573
+ DedentedLevel level = (DedentedLevel ) nestedString. peek();
574
+ int quoteCount = yylength();
575
+ if (quoteCount == level. delimiterLength) {
576
+ return processOutsideString();
577
+ } else if (quoteCount < level. delimiterLength) {
578
+ // Not enough quotes to close, treat as content
579
+ return process(tINTERPOLATED_DEDENTED_STRING);
580
+ } else {
581
+ // Too many quotes, pushback the excess
582
+ yypushback(quoteCount - level. delimiterLength);
583
+ return processOutsideString();
584
+ }
585
+ }
586
+ return processOutsideString();
587
+ }
588
+
589
+ "$" / "{" {
590
+ yybegin(COMMON_STATE );
591
+ return process(tINTERPOLATED_STRING_INJECTION);
592
+ }
593
+
594
+ \' / [^ \' ] {
595
+ return process(tINTERPOLATED_DEDENTED_STRING);
596
+ }
597
+
598
+ [^] {
599
+ return process(tWRONG_STRING);
600
+ }
601
+ }
602
+
473
603
474
604
"/**" ( "*" ? [^ \/ ] )* "*/" { // for comments in interpolated strings
475
605
return process(ScalaDocElementTypes . SCALA_DOC_COMMENT );
@@ -486,6 +616,8 @@ XML_BEGIN = "<" ("_" | [:jletter:]) | "<!--" | "<?" ("_" | [:jletter:]) | "<![CD
486
616
// TODO: incomplete strings should be handled the same way with interpolated strings
487
617
// what can be parsed should be parsed as tSTRING,
488
618
// tWRONG_LINE_BREAK_IN_STRING error token should be added at unexpected new line should
619
+ {DEDENTED_STRING} { if (isScala3) return process(tDEDENTED_STRING); else return process(tIDENTIFIER); }
620
+
489
621
{WRONG_STRING} { return process(tWRONG_STRING); }
490
622
491
623
0 commit comments