@@ -154,16 +154,56 @@ echo $pat
154
154
# # status: 1
155
155
# # stdout-json: ""
156
156
157
- # ### Match NUL byte - failing because of CPython API right now
158
- shopt -s ysh:all
157
+ # ### Bytes are denoted \y01 in Eggex char classes (not \x01)
158
+
159
+ # That is, eggex does have MODES like re.UNICODE
160
+ #
161
+ # We UNAMBIGUOUSLY accept
162
+ # - \y01 or \u{1} - these are the same
163
+ # - \yff or \u{ff} - these are DIFFERENT
164
+
165
+ var pat = / [\y 01] /
166
+ pp test_ (b' \y01' ~ pat)
167
+ pp test_ (' a' ~ pat)
168
+
169
+ # # STDOUT:
170
+ (Bool) true
171
+ (Bool) false
172
+ # # END
173
+
174
+ # ### NUL byte can be expressed in Eggex, but not in ERE
175
+
176
+ $SH << 'EOF '
177
+ pp test_ (b'\y01' ~ / [\y01] /)
178
+ pp test_ (b'\y00' ~ / [\y00] /)
179
+ EOF
180
+ echo status=$?
159
181
160
- # BUG in osh-cpython, literal NUL is not accepted by regex API
182
+ $SH << 'EOF '
183
+ pp test_ (b'\y01' ~ / [\u{1}] /)
184
+ pp test_ (b'\y00' ~ / [\u{0}] /)
185
+ EOF
186
+ echo status=$?
187
+
188
+
189
+ # legacy synonym
190
+
191
+ $SH << 'EOF '
192
+ pp test_ (b'\y01' ~ / [\x01] /)
161
193
pp test_ (b'\y00' ~ / [\x00] /)
194
+ EOF
195
+ echo status=$?
162
196
163
197
# # STDOUT:
198
+ (Bool) true
199
+ status=1
200
+ (Bool) true
201
+ status=1
202
+ (Bool) true
203
+ status=1
164
204
# # END
165
205
166
- # ### BUG: Can you match high bytes 0x80 0xff, which are not UTF-8?
206
+ # ### High bytes 0x80 0xff usually can't be matched - Eggex is UTF-8
167
207
shopt -s ysh:all
168
208
169
209
# ascii works
@@ -186,17 +226,20 @@ pp test_ (b'\yff' ~ / [\xff] /)
186
226
# # STDOUT:
187
227
# # END
188
228
189
- # ### Bytes are denoted \y01 in Eggex char classes (not \x01)
229
+ # ### High bytes 0x80 0xff can be matched with plain ERE and LC_ALL=C
190
230
191
- # That is, eggex does have MODES like re.UNICODE
192
- #
193
- # We UNAMBIGUOUSLY accept
194
- # - \y01 or \u{1} - these are the same
195
- # - \yff or \u{ff} - these are DIFFERENT
231
+ export LC_ALL=C
196
232
197
- var pat = / [\y 01] /
198
- pp test_ (b' \y01' ~ pat)
199
- pp test_ (' a' ~ pat)
233
+ $SH << 'EOF '
234
+ var yes = b'foo \yff'
235
+ var no = b'foo'
236
+
237
+ # POSIX ERE string
238
+ var ere = b'[\yff]'
239
+
240
+ pp test_ (yes ~ ere)
241
+ pp test_ (no ~ ere)
242
+ EOF
200
243
201
244
# # STDOUT:
202
245
(Bool) true
0 commit comments