oils-for-unix
diff --git a/‎builtin/read_osh.py‎
Lines changed: 3 additions & 12 deletions b/‎builtin/read_osh.py‎
Lines changed: 3 additions & 12 deletions
diff --git a/‎mycpp/gc_str.cc‎
Lines changed: 30 additions & 1 deletion b/‎mycpp/gc_str.cc‎
Lines changed: 30 additions & 1 deletion
diff --git a/‎mycpp/gc_str_test.cc‎
Lines changed: 35 additions & 11 deletions b/‎mycpp/gc_str_test.cc‎
Lines changed: 35 additions & 11 deletions
diff --git a/‎osh/glob_.py‎
Lines changed: 0 additions & 2 deletions b/‎osh/glob_.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎osh/split.py‎
Lines changed: 147 additions & 17 deletions b/‎osh/split.py‎
Lines changed: 147 additions & 17 deletions
@@ -505,26 +505,17 @@ def _Read(self, arg, names):
             else:
                 delim_byte = pyos.NEWLINE_CH  # read a line
 
-        # Read MORE THAN ONE line for \ line continuation (and not read -r)
-        parts = []  # type: List[mylib.BufWriter]
         chunk, eof = _ReadPortion(delim_byte, mops.BigTruncate(arg.n), not raw,
-                                      self.cmd_ev)
+                                  self.cmd_ev)
 
         # status 1 to terminate loop.  (This is true even though we set
         # variables).
         status = 1 if eof else 0
 
         #log('LINE %r', chunk)
-        if len(chunk) > 0:
-            join_next = False
-            spans = self.splitter.SplitForRead(chunk, not raw, do_split)
-            done, join_next = _AppendParts(chunk, spans, max_results,
-                                           join_next, parts)
+        entries = self.splitter.SplitForRead(chunk, not raw, do_split,
+                                             max_results)
 
-            #log('PARTS %s continued %s', parts, continued)
-            assert done
-
-        entries = [buf.getvalue() for buf in parts]
         num_parts = len(entries)
         if arg.a is not None:
             state.BuiltinSetArray(self.mem, arg.a, entries)
 
@@ -341,6 +341,15 @@ bool OmitChar(int ch, int what) {
   }
 }
 
+bool OmitCharMany(int ch, BigStr* chars) {
+  for (int i = 0; i < len(chars); ++i) {
+    if (ch == chars->data_[i]) {
+      return true;
+    }
+  }
+  return false;
+}
+
 // StripAny is modeled after CPython's do_strip() in stringobject.c, and can
 // implement 6 functions:
 //
@@ -391,7 +400,27 @@ BigStr* BigStr::strip() {
 
 // Used for CommandSub in osh/cmd_exec.py
 BigStr* BigStr::rstrip(BigStr* chars) {
-  DCHECK(len(chars) == 1);
+  int num_chars = len(chars);
+  if (num_chars == 0) {
+    return this;
+  }
+
+  // multiple chars, for word splitting
+  if (num_chars > 1) {
+    const char* char_data = data_;
+    int j = len(this);
+    do {
+      j--;
+    } while (j >= 0 && OmitCharMany(data_[j], chars));
+    j++;
+
+    int new_len = j;
+    BigStr* result = NewStr(new_len);
+    memcpy(result->data(), data_, new_len);
+    return result;
+  }
+
+  // exactly 1 char
   int c = chars->data_[0];
   return StripAny(this, StripWhere::Right, c);
 }
 
@@ -314,11 +314,45 @@ TEST test_str_strip() {
     ASSERT(str_equals(result, StrFromC("hi")));
   }
 
+  ASSERT(str_equals0(" abc", StrFromC(" abc ")->rstrip()));
+  ASSERT(str_equals0(" def", StrFromC(" def")->rstrip()));
+
+  ASSERT(str_equals0("", kEmptyString->rstrip()));
+  ASSERT(str_equals0("", kEmptyString->strip()));
+
+  ASSERT(str_equals0("123", StrFromC(" 123 ")->strip()));
+  ASSERT(str_equals0("123", StrFromC(" 123")->strip()));
+  ASSERT(str_equals0("123", StrFromC("123 ")->strip()));
+
   printf("---------- Done ----------\n");
 
   PASS();
 }
 
+TEST test_rstrip() {
+  // rstrip() with multiple characters
+
+  BigStr* s;
+
+  s = StrFromC(" axx")->rstrip(StrFromC("x"));
+  ASSERT(str_equals0(" a", s));
+
+  s = StrFromC(" a  ")->rstrip(StrFromC(" \t"));
+  ASSERT(str_equals0(" a", s));
+
+  s = StrFromC(" axx")->rstrip(StrFromC(" x"));
+  ASSERT(str_equals0(" a", s));
+
+  s = StrFromC(" a\t\t")->rstrip(StrFromC(" \t"));
+  ASSERT(str_equals0(" a", s));
+
+  // Empty string allowed too
+  s = StrFromC(" a\t\t")->rstrip(kEmptyString);
+  ASSERT(str_equals0(" a\t\t", s));
+
+  PASS();
+}
+
 TEST test_str_upper_lower() {
   printf("\n");
 
@@ -1338,17 +1372,6 @@ TEST str_methods_test() {
   ASSERT(str_equals0("o", kStrFood->slice(-3, -2)));
   ASSERT(str_equals0("fo", kStrFood->slice(-4, -2)));
 
-  log("strip()");
-  ASSERT(str_equals0(" abc", StrFromC(" abc ")->rstrip()));
-  ASSERT(str_equals0(" def", StrFromC(" def")->rstrip()));
-
-  ASSERT(str_equals0("", kEmptyString->rstrip()));
-  ASSERT(str_equals0("", kEmptyString->strip()));
-
-  ASSERT(str_equals0("123", StrFromC(" 123 ")->strip()));
-  ASSERT(str_equals0("123", StrFromC(" 123")->strip()));
-  ASSERT(str_equals0("123", StrFromC("123 ")->strip()));
-
   BigStr* input = nullptr;
   BigStr* arg = nullptr;
   BigStr* expected = nullptr;
@@ -1522,6 +1545,7 @@ int main(int argc, char** argv) {
   // Members
   RUN_TEST(test_str_find);
   RUN_TEST(test_str_strip);
+  RUN_TEST(test_rstrip);
   RUN_TEST(test_str_upper_lower);
   RUN_TEST(test_str_replace);
   RUN_TEST(test_str_just);
 
@@ -110,8 +110,6 @@ def GlobEscapeBackslash(s):
       \* evaluates to '\*'
       - that is, the \ is preserved literally
     """
-    # XXX--This representation is affected by the known IFS='\' bug, but the
-    # bug will be fixed in the coming PR.
     return s.replace('\\', r'\@')
 
 
 
@@ -30,10 +30,11 @@
                                         state_i)
 from _devbuild.gen.value_asdl import (value, value_e, value_t)
 from mycpp.mylib import log
-from core import pyutil
+from core import pyutil, pyos
 from frontend import consts
 from mycpp import mylib
 from mycpp.mylib import tagswitch
+from osh import glob_
 
 from typing import List, Tuple, Dict, Optional, TYPE_CHECKING, cast
 if TYPE_CHECKING:
@@ -175,32 +176,35 @@ def Escape(self, s):
         sp = self._GetSplitter()
         return sp.Escape(s)
 
+    def CreateSplitterState(self, ifs=None):
+        # type: (Optional[str]) -> IfsSplitterState
+        sp = self._GetSplitter(ifs=ifs)
+        return IfsSplitterState(sp.ifs_whitespace, sp.ifs_other)
+
     def SplitForWordEval(self, s, ifs=None):
         # type: (str, Optional[str]) -> List[str]
-        """Split used by word evaluation.
-
-        Also used by the explicit shSplit() function.
+        """Split used by the explicit shSplit() function.
         """
-        sp = self._GetSplitter(ifs=ifs)
-        spans = sp.Split(s, True)
+        sp = self.CreateSplitterState(ifs=ifs)
+        sp.SetAllowEscape(True)
+        sp.PushFragment(s)
+        return sp.PushTerminator()
 
-        # Note: pass allow_escape=False so \ isn't special
-        #spans = sp.Split(s, False)
+    def SplitForRead(self, line, allow_escape, do_split, max_parts):
+        # type: (str, bool, bool, int) -> List[str]
 
-        if 0:
-            for span in spans:
-                log('SPAN %s', span)
-        return _SpansToParts(s, spans)
-
-    def SplitForRead(self, line, allow_escape, do_split):
-        # type: (str, bool, bool) -> List[Span]
+        if len(line) == 0:
+            return []
 
         # None: use the default splitter, consulting $IFS
         # ''  : forces IFS='' behavior
         ifs = None if do_split else ''
 
-        sp = self._GetSplitter(ifs=ifs)
-        return sp.Split(line, allow_escape)
+        sp = self.CreateSplitterState(ifs=ifs)
+        sp.SetAllowEscape(allow_escape)
+        sp.SetMaxSplit(max_parts - 1)
+        sp.PushFragment(line)
+        return sp.PushTerminator()
 
 
 class _BaseSplitter(object):
@@ -317,3 +321,129 @@ def Split(self, s, allow_escape):
             i += 1
 
         return spans
+
+
+class IfsSplitterState(object):
+
+    def __init__(self, ifs_space, ifs_other):
+        # type: (str, str) -> None
+        self.ifs_space = ifs_space
+        self.ifs_other = ifs_other
+        self.glob_escape = False
+        self.allow_escape = False
+        self.max_split = -1
+
+        self.state = state_i.Start
+        self.args = []  # type: List[str]  # generated words
+        self.frags = []  # type: List[str]  # str fragments of the current word
+        self.char_buff = []  # type: List[int]  # chars in the current fragment
+        self.white_buff = None  # type: Optional[List[int]] # chars for max_split space
+
+    def SetGlobEscape(self, glob_escape):
+        # type: (bool) -> None
+        self.glob_escape = glob_escape
+
+    def SetAllowEscape(self, allow_escape):
+        # type: (bool) -> None
+        self.allow_escape = allow_escape
+
+    def SetMaxSplit(self, max_split):
+        # type: (int) -> None
+        self.max_split = max_split
+        if max_split >= 0 and self.white_buff is None:
+            self.white_buff = []
+
+    def _FlushCharBuff(self):
+        # type: () -> None
+
+        if len(self.char_buff) >= 1:
+            frag = mylib.JoinBytes(self.char_buff)
+            if self.glob_escape:
+                frag = glob_.GlobEscapeBackslash(frag)
+            self.frags.append(frag)
+            del self.char_buff[:]
+
+    def _GenerateWord(self):
+        # type: () -> None
+        self._FlushCharBuff()
+        self.args.append(''.join(self.frags))
+        del self.frags[:]
+
+        if self.max_split >= 0 and len(self.white_buff) >= 1:
+            self.char_buff.extend(self.white_buff)
+            del self.white_buff[:]
+
+    def PushLiteral(self, s):
+        # type: (str) -> None
+        """
+        Args:
+          s: word fragment that should be literally added
+        """
+        if self.state == state_i.DE_White1:
+            self._GenerateWord()
+        else:
+            self._FlushCharBuff()
+        self.frags.append(s)
+        self.state = state_i.Black
+
+    def PushFragment(self, s):
+        # type: (str) -> None
+        """
+        Args:
+          s: word fragment to split
+        """
+        ifs_space = self.ifs_space
+        ifs_other = self.ifs_other
+        allow_escape = self.allow_escape
+        max_split = self.max_split
+        n = len(s)
+
+        for i in xrange(n):
+            byte = mylib.ByteAt(s, i)
+
+            if self.state == state_i.Backslash:
+                pass
+
+            elif max_split >= 0 and len(self.args) == max_split + 1:
+                # When max_split is reached, the processing is modified.
+                if allow_escape and byte == pyos.BACKSLASH_CH:
+                    self.state = state_i.Backslash
+                    continue
+                elif mylib.ByteInSet(byte, ifs_space):
+                    if self.state == state_i.Start:
+                        self.char_buff.append(byte)
+                        continue
+
+            elif allow_escape and byte == pyos.BACKSLASH_CH:
+                if self.state == state_i.DE_White1:
+                    self._GenerateWord()
+                self.state = state_i.Backslash
+                continue
+            elif mylib.ByteInSet(byte, ifs_space):
+                if self.state != state_i.Start:
+                    if len(self.args) == max_split:
+                        self.white_buff.append(byte)
+                    self.state = state_i.DE_White1
+                continue
+            elif mylib.ByteInSet(byte, ifs_other):
+                if len(self.args) == max_split:
+                    self.white_buff.append(byte)
+                self._GenerateWord()
+                self.state = state_i.Start
+                continue
+
+            if self.state == state_i.DE_White1:
+                self._GenerateWord()
+            self.char_buff.append(byte)
+            self.state = state_i.Black
+
+    def PushTerminator(self):
+        # type: () -> List[str]
+        if self.state in (state_i.DE_White1, state_i.Black):
+            self._GenerateWord()
+            if self.max_split >= 0 and len(self.args) == self.max_split + 2:
+                # TODO: is there an algorithm without this "fix up"?
+                last = self.args.pop()
+                self.args[-1] = self.args[-1] + last.rstrip(self.ifs_space)
+            self.state = state_i.Start
+        return self.args