Skip to content

Commit f309dc6

Browse files
committed
Extractor: use filesystem-canonical path comparison to determine built-in files
On Windows, we're getting e.g. the following mismatches, which could be due to case differences: "Skipped built-in file C:\hostedtoolcache\windows\Python\3.13.7\x64\Lib\multiprocessing\forkserver.py" vs "Extracted file C:\hostedtoolcache\windows\Python\3.13.7\x64\lib\asyncio\streams.py"
1 parent 6692653 commit f309dc6

File tree

1 file changed

+18
-1
lines changed

1 file changed

+18
-1
lines changed

python/extractor/semmle/extractors/builtin_extractor.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
import sys
2+
import os
3+
from pathlib import Path
24
from semmle import util
35
from semmle.python.passes.objects import ObjectPass
46
from semmle.extractors.base import BaseExtractor
@@ -17,7 +19,7 @@ def process(self, unit):
1719
# Modules in the standard library (e.g. `os`)
1820
if not self.options.extract_stdlib and \
1921
isinstance(unit, util.FileExtractable) and \
20-
unit.path.startswith(util.STDLIB_PATH):
22+
_is_subpath(unit.path, util.STDLIB_PATH):
2123
return SkippedBuiltin
2224
if not isinstance(unit, util.BuiltinModuleExtractable):
2325
return NotImplemented
@@ -39,3 +41,18 @@ def process(self, unit):
3941

4042
def close(self):
4143
pass
44+
45+
def _is_subpath(path, prefix):
46+
# Prefer filesystem-canonical comparison when possible
47+
try:
48+
p = Path(path).resolve()
49+
q = Path(prefix).resolve()
50+
return p == q or q in p.parents
51+
except Exception:
52+
# Fallback for non-existent paths: normalize and compare strings
53+
p_str = os.path.normcase(os.path.normpath(os.path.abspath(path)))
54+
q_str = os.path.normcase(os.path.normpath(os.path.abspath(prefix)))
55+
# Ensure prefix is a directory boundary
56+
if not q_str.endswith(os.path.sep):
57+
q_str = q_str + os.path.sep
58+
return p_str == q_str.rstrip(os.path.sep) or p_str.startswith(q_str)

0 commit comments

Comments
 (0)