Skip to content

Commit 96c66eb

Browse files
author
Andy C
committed
[libc unicode] Make glob sort order match bash
Call setlocale() with LC_ALL instead of LC_CTYPE GNU readline calls it with LC_TYPE
1 parent dae938f commit 96c66eb

File tree

5 files changed

+28
-21
lines changed

5 files changed

+28
-21
lines changed

bin/oils_for_unix.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -83,14 +83,13 @@ def CaperDispatch():
8383

8484
def InitLocale(environ):
8585
# type: (Dict[str, str]) -> None
86-
"""Set GLOBAL libc locale from environment, and CHECK that it's valid.
87-
88-
Note: LC_COLLATE/LC_ALL might be necessary for glob
89-
LANG= is the default, LC_ALL= sets all of them
90-
https://unix.stackexchange.com/questions/576701/what-is-the-difference-between-lang-c-and-lc-all-c
91-
"""
86+
"""Set the GLOBAL libc locale from the env, and CHECK that it's valid."""
9287
try:
93-
locale_name = pylocale.setlocale(pylocale.LC_CTYPE, '')
88+
# Note: LC_ALL (rather than LC_CTYPE) makes glob order match bash
89+
#
90+
# https://unix.stackexchange.com/questions/576701/what-is-the-difference-between-lang-c-and-lc-all-c
91+
# LANG= is the default, LC_ALL= sets all of them
92+
locale_name = pylocale.setlocale(pylocale.LC_ALL, '')
9493

9594
# passing None queries it
9695
#lo = locale.setlocale(locale.LC_CTYPE, None)
@@ -109,8 +108,9 @@ def InitLocale(environ):
109108
#log('codeset %s', codeset)
110109

111110
if not match.IsUtf8Codeset(codeset):
112-
print_stderr("oils warning: codeset %r doesn't look like UTF-8" % codeset)
113-
print_stderr(' Set OILS_LOCALE_OK=1 to remove this message')
111+
print_stderr("oils warning: codeset %r doesn't look like UTF-8" %
112+
codeset)
113+
print_stderr(' OILS_LOCALE_OK=1 removes this message')
114114

115115

116116
# TODO: Hook up valid applets (including these) to completion

cpp/libc.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,14 @@ int sleep_until_error(double seconds);
4040

4141
// pylib/locale_.py
4242
namespace pylocale {
43+
4344
const int CODESET = ::CODESET;
45+
46+
constexpr int lc_all = LC_ALL;
4447
constexpr int lc_ctype = LC_CTYPE;
48+
#undef LC_ALL
4549
#undef LC_CTYPE
50+
const int LC_ALL = lc_all;
4651
const int LC_CTYPE = lc_ctype;
4752

4853
class Error {
@@ -53,6 +58,7 @@ class Error {
5358
};
5459
BigStr* setlocale(int category, BigStr* locale);
5560
BigStr* nl_langinfo(int item);
61+
5662
} // namespace pylocale
5763

5864
#endif // LIBC_H

pylib/pylocale.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import _locale # type: ignore
1010

1111
CODESET = _locale.CODESET # type: int
12+
LC_ALL = _locale.LC_ALL # type: int
1213
LC_CTYPE = _locale.LC_CTYPE # type: int
1314

1415

spec/glob.test.sh

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -259,26 +259,24 @@ __a__
259259

260260
#### Glob ordering respects LC_COLLATE (zsh respects this too)
261261

262-
# bug from test/gold
263-
# mksh ksh osh - hello-test.sh comes first
262+
# test/spec-common.sh sets LC_ALL=C.UTF_8
263+
unset LC_ALL
264+
265+
touch hello hello.py hello_preamble.sh hello-test.sh
266+
echo h*
267+
264268
# bash - hello_preamble.h comes first
265-
#
266269
# But ord('_') == 95
267270
# ord('-') == 45
268271

269272
# https://serverfault.com/questions/122737/in-bash-are-wildcard-expansions-guaranteed-to-be-in-order
270273

271-
touch hello hello.py hello_preamble.sh hello-test.sh
274+
#LC_COLLATE=C.UTF-8
275+
LC_COLLATE=en_US.UTF-8 # en_US is necessary
272276
echo h*
273277

274-
# Spec tests set LC_ALL=C.UTF_8
275-
unset LC_ALL
276-
277-
# the en_US does it
278-
LC_COLLATE=en_US.UTF-8
279-
#LC_COLLATE=C.UTF-8
278+
LC_COLLATE=en_US.UTF-8 $SH -c 'echo h*'
280279

281-
echo h*
282280

283281
# Doesn't work, probably because
284282
#LC_COLLATE=en_US.UTF-8
@@ -287,11 +285,13 @@ echo h*
287285
## STDOUT:
288286
hello hello-test.sh hello.py hello_preamble.sh
289287
hello hello_preamble.sh hello.py hello-test.sh
288+
hello hello_preamble.sh hello.py hello-test.sh
290289
## END
291290

292291
## N-I dash/mksh/ash STDOUT:
293292
hello hello-test.sh hello.py hello_preamble.sh
294293
hello hello-test.sh hello.py hello_preamble.sh
294+
hello hello-test.sh hello.py hello_preamble.sh
295295
## END
296296

297297

test/gold.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ test-dollar-sq() { _compare $GOLD_DIR/dollar-sq.sh; }
111111
# 2025-06: This is a sort order issue that doesn't show up in CI (related to
112112
# LC_COLLATE)
113113
# I reproduced this issue in spec/glob case #39
114-
TODO-test-word-eval() { _compare $GOLD_DIR/word-eval.sh; }
114+
test-word-eval() { _compare $GOLD_DIR/word-eval.sh; }
115115

116116
test-abuild() {
117117
_compare $GOLD_DIR/abuild.sh is_function is_function

0 commit comments

Comments
 (0)