|
30 | 30 | state_i)
|
31 | 31 | from _devbuild.gen.value_asdl import (value, value_e, value_t)
|
32 | 32 | from mycpp.mylib import log
|
33 |
| -from core import pyutil |
| 33 | +from core import pyutil, pyos |
34 | 34 | from frontend import consts
|
35 | 35 | from mycpp import mylib
|
36 | 36 | from mycpp.mylib import tagswitch
|
| 37 | +from osh import glob_ |
37 | 38 |
|
38 | 39 | from typing import List, Tuple, Dict, Optional, TYPE_CHECKING, cast
|
39 | 40 | if TYPE_CHECKING:
|
@@ -175,32 +176,35 @@ def Escape(self, s):
|
175 | 176 | sp = self._GetSplitter()
|
176 | 177 | return sp.Escape(s)
|
177 | 178 |
|
| 179 | + def CreateSplitterState(self, ifs=None): |
| 180 | + # type: (Optional[str]) -> IfsSplitterState |
| 181 | + sp = self._GetSplitter(ifs=ifs) |
| 182 | + return IfsSplitterState(sp.ifs_whitespace, sp.ifs_other) |
| 183 | + |
178 | 184 | def SplitForWordEval(self, s, ifs=None):
|
179 | 185 | # type: (str, Optional[str]) -> List[str]
|
180 |
| - """Split used by word evaluation. |
181 |
| -
|
182 |
| - Also used by the explicit shSplit() function. |
| 186 | + """Split used by the explicit shSplit() function. |
183 | 187 | """
|
184 |
| - sp = self._GetSplitter(ifs=ifs) |
185 |
| - spans = sp.Split(s, True) |
| 188 | + sp = self.CreateSplitterState(ifs=ifs) |
| 189 | + sp.SetAllowEscape(True) |
| 190 | + sp.PushFragment(s) |
| 191 | + return sp.PushTerminator() |
186 | 192 |
|
187 |
| - # Note: pass allow_escape=False so \ isn't special |
188 |
| - #spans = sp.Split(s, False) |
| 193 | + def SplitForRead(self, line, allow_escape, do_split, max_parts): |
| 194 | + # type: (str, bool, bool, int) -> List[str] |
189 | 195 |
|
190 |
| - if 0: |
191 |
| - for span in spans: |
192 |
| - log('SPAN %s', span) |
193 |
| - return _SpansToParts(s, spans) |
194 |
| - |
195 |
| - def SplitForRead(self, line, allow_escape, do_split): |
196 |
| - # type: (str, bool, bool) -> List[Span] |
| 196 | + if len(line) == 0: |
| 197 | + return [] |
197 | 198 |
|
198 | 199 | # None: use the default splitter, consulting $IFS
|
199 | 200 | # '' : forces IFS='' behavior
|
200 | 201 | ifs = None if do_split else ''
|
201 | 202 |
|
202 |
| - sp = self._GetSplitter(ifs=ifs) |
203 |
| - return sp.Split(line, allow_escape) |
| 203 | + sp = self.CreateSplitterState(ifs=ifs) |
| 204 | + sp.SetAllowEscape(allow_escape) |
| 205 | + sp.SetMaxSplit(max_parts - 1) |
| 206 | + sp.PushFragment(line) |
| 207 | + return sp.PushTerminator() |
204 | 208 |
|
205 | 209 |
|
206 | 210 | class _BaseSplitter(object):
|
@@ -317,3 +321,129 @@ def Split(self, s, allow_escape):
|
317 | 321 | i += 1
|
318 | 322 |
|
319 | 323 | return spans
|
| 324 | + |
| 325 | + |
| 326 | +class IfsSplitterState(object): |
| 327 | + |
| 328 | + def __init__(self, ifs_space, ifs_other): |
| 329 | + # type: (str, str) -> None |
| 330 | + self.ifs_space = ifs_space |
| 331 | + self.ifs_other = ifs_other |
| 332 | + self.glob_escape = False |
| 333 | + self.allow_escape = False |
| 334 | + self.max_split = -1 |
| 335 | + |
| 336 | + self.state = state_i.Start |
| 337 | + self.args = [] # type: List[str] # generated words |
| 338 | + self.frags = [] # type: List[str] # str fragments of the current word |
| 339 | + self.char_buff = [] # type: List[int] # chars in the current fragment |
| 340 | + self.white_buff = None # type: Optional[List[int]] # chars for max_split space |
| 341 | + |
| 342 | + def SetGlobEscape(self, glob_escape): |
| 343 | + # type: (bool) -> None |
| 344 | + self.glob_escape = glob_escape |
| 345 | + |
| 346 | + def SetAllowEscape(self, allow_escape): |
| 347 | + # type: (bool) -> None |
| 348 | + self.allow_escape = allow_escape |
| 349 | + |
| 350 | + def SetMaxSplit(self, max_split): |
| 351 | + # type: (int) -> None |
| 352 | + self.max_split = max_split |
| 353 | + if max_split >= 0 and self.white_buff is None: |
| 354 | + self.white_buff = [] |
| 355 | + |
| 356 | + def _FlushCharBuff(self): |
| 357 | + # type: () -> None |
| 358 | + |
| 359 | + if len(self.char_buff) >= 1: |
| 360 | + frag = mylib.JoinBytes(self.char_buff) |
| 361 | + if self.glob_escape: |
| 362 | + frag = glob_.GlobEscapeBackslash(frag) |
| 363 | + self.frags.append(frag) |
| 364 | + del self.char_buff[:] |
| 365 | + |
| 366 | + def _GenerateWord(self): |
| 367 | + # type: () -> None |
| 368 | + self._FlushCharBuff() |
| 369 | + self.args.append(''.join(self.frags)) |
| 370 | + del self.frags[:] |
| 371 | + |
| 372 | + if self.max_split >= 0 and len(self.white_buff) >= 1: |
| 373 | + self.char_buff.extend(self.white_buff) |
| 374 | + del self.white_buff[:] |
| 375 | + |
| 376 | + def PushLiteral(self, s): |
| 377 | + # type: (str) -> None |
| 378 | + """ |
| 379 | + Args: |
| 380 | + s: word fragment that should be literally added |
| 381 | + """ |
| 382 | + if self.state == state_i.DE_White1: |
| 383 | + self._GenerateWord() |
| 384 | + else: |
| 385 | + self._FlushCharBuff() |
| 386 | + self.frags.append(s) |
| 387 | + self.state = state_i.Black |
| 388 | + |
| 389 | + def PushFragment(self, s): |
| 390 | + # type: (str) -> None |
| 391 | + """ |
| 392 | + Args: |
| 393 | + s: word fragment to split |
| 394 | + """ |
| 395 | + ifs_space = self.ifs_space |
| 396 | + ifs_other = self.ifs_other |
| 397 | + allow_escape = self.allow_escape |
| 398 | + max_split = self.max_split |
| 399 | + n = len(s) |
| 400 | + |
| 401 | + for i in xrange(n): |
| 402 | + byte = mylib.ByteAt(s, i) |
| 403 | + |
| 404 | + if self.state == state_i.Backslash: |
| 405 | + pass |
| 406 | + |
| 407 | + elif max_split >= 0 and len(self.args) == max_split + 1: |
| 408 | + # When max_split is reached, the processing is modified. |
| 409 | + if allow_escape and byte == pyos.BACKSLASH_CH: |
| 410 | + self.state = state_i.Backslash |
| 411 | + continue |
| 412 | + elif mylib.ByteInSet(byte, ifs_space): |
| 413 | + if self.state == state_i.Start: |
| 414 | + self.char_buff.append(byte) |
| 415 | + continue |
| 416 | + |
| 417 | + elif allow_escape and byte == pyos.BACKSLASH_CH: |
| 418 | + if self.state == state_i.DE_White1: |
| 419 | + self._GenerateWord() |
| 420 | + self.state = state_i.Backslash |
| 421 | + continue |
| 422 | + elif mylib.ByteInSet(byte, ifs_space): |
| 423 | + if self.state != state_i.Start: |
| 424 | + if len(self.args) == max_split: |
| 425 | + self.white_buff.append(byte) |
| 426 | + self.state = state_i.DE_White1 |
| 427 | + continue |
| 428 | + elif mylib.ByteInSet(byte, ifs_other): |
| 429 | + if len(self.args) == max_split: |
| 430 | + self.white_buff.append(byte) |
| 431 | + self._GenerateWord() |
| 432 | + self.state = state_i.Start |
| 433 | + continue |
| 434 | + |
| 435 | + if self.state == state_i.DE_White1: |
| 436 | + self._GenerateWord() |
| 437 | + self.char_buff.append(byte) |
| 438 | + self.state = state_i.Black |
| 439 | + |
| 440 | + def PushTerminator(self): |
| 441 | + # type: () -> List[str] |
| 442 | + if self.state in (state_i.DE_White1, state_i.Black): |
| 443 | + self._GenerateWord() |
| 444 | + if self.max_split >= 0 and len(self.args) == self.max_split + 2: |
| 445 | + # TODO: is there an algorithm without this "fix up"? |
| 446 | + last = self.args.pop() |
| 447 | + self.args[-1] = self.args[-1] + last.rstrip(self.ifs_space) |
| 448 | + self.state = state_i.Start |
| 449 | + return self.args |
0 commit comments