Skip to content

Commit e58e45e

Browse files
Trevor Madgemmaitre314
andauthored
Merge commit from fork
* more relaxed ZipFile impl * lint --------- Co-authored-by: Matthieu Maitre <[email protected]>
1 parent f34d091 commit e58e45e

File tree

7 files changed

+131
-2
lines changed

7 files changed

+131
-2
lines changed

src/picklescan/relaxed_zipfile.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
import struct
2+
import zipfile
3+
4+
# More forgiving implementation of zipfile.ZipFile
5+
_FH_SIGNATURE = 0
6+
_FH_FILENAME_LENGTH = 10
7+
_FH_EXTRA_FIELD_LENGTH = 11
8+
9+
structFileHeader = "<4s2B4HL2L2H"
10+
stringFileHeader = b"PK\003\004"
11+
sizeFileHeader = struct.calcsize(structFileHeader)
12+
13+
14+
class RelaxedZipFile(zipfile.ZipFile):
15+
def open(self, name, mode="r", pwd=None, *, force_zip64=False):
16+
# near copy of zipfile.ZipFile.open with
17+
"""Return file-like object for 'name'.
18+
19+
name is a string for the file name within the ZIP file, or a ZipInfo
20+
object.
21+
22+
mode should be 'r' to read a file already in the ZIP file, or 'w' to
23+
write to a file newly added to the archive.
24+
25+
pwd is the password to decrypt files (only used for reading).
26+
27+
When writing, if the file size is not known in advance but may exceed
28+
2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
29+
files. If the size is known in advance, it is best to pass a ZipInfo
30+
instance for name, with zinfo.file_size set.
31+
"""
32+
if mode not in {"r", "w"}:
33+
raise ValueError('open() requires mode "r" or "w"')
34+
if pwd and not isinstance(pwd, bytes):
35+
raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
36+
if pwd and (mode == "w"):
37+
raise ValueError("pwd is only supported for reading files")
38+
if not self.fp:
39+
raise ValueError("Attempt to use ZIP archive that was already closed")
40+
41+
# Make sure we have an info object
42+
if isinstance(name, zipfile.ZipInfo):
43+
# 'name' is already an info object
44+
zinfo = name
45+
elif mode == "w":
46+
zinfo = zipfile.ZipInfo(name)
47+
zinfo.compress_type = self.compression
48+
zinfo._compresslevel = self.compresslevel
49+
else:
50+
# Get info object for name
51+
zinfo = self.getinfo(name)
52+
53+
if mode == "w":
54+
return self._open_to_write(zinfo, force_zip64=force_zip64)
55+
56+
if self._writing:
57+
raise ValueError(
58+
"Can't read from the ZIP file while there "
59+
"is an open writing handle on it. "
60+
"Close the writing handle before trying to read."
61+
)
62+
63+
# Open for reading:
64+
self._fileRefCnt += 1
65+
zef_file = zipfile._SharedFile(
66+
self.fp,
67+
zinfo.header_offset,
68+
self._fpclose,
69+
self._lock,
70+
lambda: self._writing,
71+
)
72+
try:
73+
# Skip the file header:
74+
fheader = zef_file.read(sizeFileHeader)
75+
if len(fheader) != sizeFileHeader:
76+
raise zipfile.BadZipFile("Truncated file header")
77+
fheader = struct.unpack(structFileHeader, fheader)
78+
if fheader[_FH_SIGNATURE] != stringFileHeader:
79+
raise zipfile.BadZipFile("Bad magic number for file header")
80+
81+
zef_file.read(fheader[_FH_FILENAME_LENGTH])
82+
if fheader[_FH_EXTRA_FIELD_LENGTH]:
83+
zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
84+
85+
return zipfile.ZipExtFile(zef_file, mode, zinfo, pwd, True)
86+
except BaseException:
87+
zef_file.close()
88+
raise

src/picklescan/scanner.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from typing import IO, List, Optional, Set, Tuple
1212
import urllib.parse
1313
import zipfile
14+
from .relaxed_zipfile import RelaxedZipFile
1415

1516
from .torch import (
1617
get_magic_number,
@@ -375,7 +376,7 @@ def get_magic_bytes_from_zipfile(zip: zipfile.ZipFile, num_bytes=8):
375376
def scan_zip_bytes(data: IO[bytes], file_id) -> ScanResult:
376377
result = ScanResult([])
377378

378-
with zipfile.ZipFile(data, "r") as zip:
379+
with RelaxedZipFile(data, "r") as zip:
379380
magic_bytes = get_magic_bytes_from_zipfile(zip)
380381
file_names = zip.namelist()
381382
_log.debug("Files in zip archive %s: %s", file_id, file_names)

tests/data/malicious1_0x1.zip

181 Bytes
Binary file not shown.

tests/data/malicious1_0x20.zip

181 Bytes
Binary file not shown.

tests/data/malicious1_0x40.zip

181 Bytes
Binary file not shown.
165 Bytes
Binary file not shown.

tests/test_scanner.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,22 @@ def initialize_zip_file(path, file_name, data):
249249
zip.writestr(file_name, data)
250250

251251

252+
def initialize_corrupt_zip_file_central_directory(path, file_name, data):
253+
if not os.path.exists(path):
254+
with zipfile.ZipFile(path, "w") as zip:
255+
zip.writestr(file_name, data)
256+
257+
with open(path, "rb") as f:
258+
data = f.read()
259+
260+
# Replace only the first occurrence of "data.pkl" with "datap.kl"
261+
modified_data = data.replace(b"data.pkl", b"datap.kl", 1)
262+
263+
# Write back the modified content
264+
with open(path, "wb") as f:
265+
f.write(modified_data)
266+
267+
252268
def initialize_numpy_files():
253269
import numpy as np
254270

@@ -490,6 +506,12 @@ def initialize_pickle_files():
490506
pickle.dumps(Malicious1(), protocol=4),
491507
)
492508

509+
initialize_corrupt_zip_file_central_directory(
510+
f"{_root_path}/data/malicious1_central_directory.zip",
511+
"data.pkl",
512+
pickle.dumps(Malicious1(), protocol=4),
513+
)
514+
493515
initialize_zip_file(
494516
f"{_root_path}/data/malicious1_wrong_ext.zip",
495517
"data.txt", # Pickle file with a non-standard extension
@@ -646,7 +668,22 @@ def test_scan_file_path():
646668
compare_scan_results(
647669
scan_file_path(f"{_root_path}/data/malicious1.zip"), malicious1
648670
)
649-
compare_scan_results(scan_file_path(f"{_root_path}/data/malicious1.7z"), malicious1)
671+
compare_scan_results(
672+
scan_file_path(f"{_root_path}/data/malicious1_central_directory.zip"),
673+
malicious1,
674+
)
675+
compare_scan_results(
676+
scan_file_path(f"{_root_path}/data/malicious1_0x1.zip"), malicious1
677+
)
678+
compare_scan_results(
679+
scan_file_path(f"{_root_path}/data/malicious1_0x20.zip"), malicious1
680+
)
681+
compare_scan_results(
682+
scan_file_path(f"{_root_path}/data/malicious1_0x40.zip"), malicious1
683+
)
684+
compare_scan_results(
685+
scan_file_path(f"{_root_path}/data/malicious1.7z"), malicious1
686+
)
650687
compare_scan_results(
651688
scan_file_path(f"{_root_path}/data/malicious1_wrong_ext.zip"), malicious1
652689
)
@@ -835,6 +872,9 @@ def test_scan_directory_path():
835872
Global("functools", "partial", SafetyLevel.Dangerous),
836873
Global("pip", "main", SafetyLevel.Dangerous),
837874
Global("builtins", "eval", SafetyLevel.Dangerous),
875+
Global("builtins", "eval", SafetyLevel.Dangerous),
876+
Global("builtins", "eval", SafetyLevel.Dangerous),
877+
Global("builtins", "eval", SafetyLevel.Dangerous),
838878
],
839879
scanned_files=38,
840880
issues_count=39,

0 commit comments

Comments
 (0)