2828import com .sun .tools .javac .parser .Tokens .TokenKind ;
2929import com .sun .tools .javac .parser .UnicodeReader ;
3030import com .sun .tools .javac .util .Context ;
31+ import java .util .ArrayList ;
32+ import java .util .Collections ;
33+ import java .util .Comparator ;
34+ import java .util .HashSet ;
35+ import java .util .List ;
3136import java .util .Objects ;
3237import java .util .Set ;
3338
@@ -83,22 +88,53 @@ static boolean isStringFragment(TokenKind kind) {
8388 return STRINGFRAGMENT != null && Objects .equals (kind , STRINGFRAGMENT );
8489 }
8590
86- /** Lex the input and return a list of {@link RawTok}s. */
87- public static ImmutableList <RawTok > getTokens (
88- String source , Context context , Set <TokenKind > stopTokens ) {
91+ private static ImmutableList <Token > readAllTokens (
92+ String source , Context context , Set <Integer > nonTerminalStringFragments ) {
8993 if (source == null ) {
9094 return ImmutableList .of ();
9195 }
9296 ScannerFactory fac = ScannerFactory .instance (context );
9397 char [] buffer = (source + EOF_COMMENT ).toCharArray ();
9498 Scanner scanner =
9599 new AccessibleScanner (fac , new CommentSavingTokenizer (fac , buffer , buffer .length ));
100+ List <Token > tokens = new ArrayList <>();
101+ do {
102+ scanner .nextToken ();
103+ tokens .add (scanner .token ());
104+ } while (scanner .token ().kind != TokenKind .EOF );
105+ for (int i = 0 ; i < tokens .size (); i ++) {
106+ if (isStringFragment (tokens .get (i ).kind )) {
107+ int start = i ;
108+ while (isStringFragment (tokens .get (i ).kind )) {
109+ i ++;
110+ }
111+ for (int j = start ; j < i - 1 ; j ++) {
112+ nonTerminalStringFragments .add (tokens .get (j ).pos );
113+ }
114+ }
115+ }
116+ // A string template is tokenized as a series of STRINGFRAGMENT tokens containing the string
117+ // literal values, followed by the tokens for the template arguments. For the formatter, we
118+ // want the stream of tokens to appear in order by their start position.
119+ if (Runtime .version ().feature () >= 21 ) {
120+ Collections .sort (tokens , Comparator .comparingInt (t -> t .pos ));
121+ }
122+ return ImmutableList .copyOf (tokens );
123+ }
124+
125+ /** Lex the input and return a list of {@link RawTok}s. */
126+ public static ImmutableList <RawTok > getTokens (
127+ String source , Context context , Set <TokenKind > stopTokens ) {
128+ if (source == null ) {
129+ return ImmutableList .of ();
130+ }
131+ Set <Integer > nonTerminalStringFragments = new HashSet <>();
132+ ImmutableList <Token > javacTokens = readAllTokens (source , context , nonTerminalStringFragments );
133+
96134 ImmutableList .Builder <RawTok > tokens = ImmutableList .builder ();
97135 int end = source .length ();
98136 int last = 0 ;
99- do {
100- scanner .nextToken ();
101- Token t = scanner .token ();
137+ for (Token t : javacTokens ) {
102138 if (t .comments != null ) {
103139 for (Comment c : Lists .reverse (t .comments )) {
104140 if (last < c .getSourcePos (0 )) {
@@ -118,27 +154,12 @@ public static ImmutableList<RawTok> getTokens(
118154 if (last < t .pos ) {
119155 tokens .add (new RawTok (null , null , last , t .pos ));
120156 }
121- int pos = t .pos ;
122- int endPos = t .endPos ;
123157 if (isStringFragment (t .kind )) {
124- // A string template is tokenized as a series of STRINGFRAGMENT tokens containing the string
125- // literal values, followed by the tokens for the template arguments. For the formatter, we
126- // want the stream of tokens to appear in order by their start position, and also to have
127- // all the content from the original source text (including leading and trailing ", and the
128- // \ escapes from template arguments). This logic processes the token stream from javac to
129- // meet those requirements.
130- while (isStringFragment (t .kind )) {
131- endPos = t .endPos ;
132- scanner .nextToken ();
133- t = scanner .token ();
134- }
135- // Read tokens for the string template arguments, until we read the end of the string
136- // template. The last token in a string template is always a trailing string fragment. Use
137- // lookahead to defer reading the token after the template until the next iteration of the
138- // outer loop.
139- while (scanner .token (/* lookahead= */ 1 ).endPos < endPos ) {
140- scanner .nextToken ();
141- t = scanner .token ();
158+ int endPos = t .endPos ;
159+ int pos = t .pos ;
160+ if (nonTerminalStringFragments .contains (t .pos )) {
161+ // Include the \ escape from \{...} in the preceding string fragment
162+ endPos ++;
142163 }
143164 tokens .add (new RawTok (source .substring (pos , endPos ), t .kind , pos , endPos ));
144165 last = endPos ;
@@ -151,7 +172,7 @@ public static ImmutableList<RawTok> getTokens(
151172 t .endPos ));
152173 last = t .endPos ;
153174 }
154- } while ( scanner . token (). kind != TokenKind . EOF );
175+ }
155176 if (last < end ) {
156177 tokens .add (new RawTok (null , null , last , end ));
157178 }
0 commit comments