Skip to content

Commit b9d2fc7

Browse files
SNOW-693548 fixing redos attack (#1327)
1 parent 200d88c commit b9d2fc7

File tree

5 files changed

+74
-14
lines changed

5 files changed

+74
-14
lines changed

DESCRIPTION.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ Source code is also available at: https://github.com/snowflakedb/snowflake-conne
1111
- v2.8.2(November 18,2022)
1212

1313
- Improved performance of OCSP response caching
14+
- Improved performance of regexes used for PUT/GET SQL statement detection
1415

1516
- v2.8.1(October 30,2022)
1617

src/snowflake/connector/_sql_util.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#
2+
# Copyright (c) 2012-2021 Snowflake Computing Inc. All rights reserved.
3+
#
4+
5+
from __future__ import annotations
6+
7+
import re
8+
9+
from .constants import FileTransferType
10+
11+
COMMENT_START_SQL_RE = re.compile(
12+
r"""
13+
^\s*(?:
14+
/\*[\w\W]*?\*/
15+
)""",
16+
re.VERBOSE,
17+
)
18+
19+
PUT_SQL_RE = re.compile(r"^\s*put", flags=re.IGNORECASE)
20+
GET_SQL_RE = re.compile(r"^\s*get", flags=re.IGNORECASE)
21+
22+
23+
def remove_starting_comments(sql: str) -> str:
24+
"""Remove all comments from the start of a SQL statement."""
25+
commentless_sql = sql
26+
while True:
27+
start_comment = COMMENT_START_SQL_RE.match(commentless_sql)
28+
if start_comment is None:
29+
break
30+
commentless_sql = commentless_sql[start_comment.end() :]
31+
return commentless_sql
32+
33+
34+
def get_file_transfer_type(sql: str) -> FileTransferType | None:
35+
"""Decide whether a SQL is a file transfer and return its type.
36+
37+
None is returned if the SQL isn't a file transfer so that this function can be
38+
used in an if-statement.
39+
"""
40+
commentless_sql = remove_starting_comments(sql)
41+
if PUT_SQL_RE.match(commentless_sql):
42+
return FileTransferType.PUT
43+
elif GET_SQL_RE.match(commentless_sql):
44+
return FileTransferType.GET
45+
46+
47+
def is_put_statement(sql: str) -> bool:
48+
return get_file_transfer_type(sql) == FileTransferType.PUT
49+
50+
51+
def is_get_statement(sql: str) -> bool:
52+
return get_file_transfer_type(sql) == FileTransferType.GET

src/snowflake/connector/cursor.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from snowflake.connector.result_set import ResultSet
3333

3434
from . import compat
35+
from ._sql_util import get_file_transfer_type
3536
from .bind_upload_agent import BindUploadAgent, BindUploadError
3637
from .constants import (
3738
FIELD_NAME_TO_ID,
@@ -183,8 +184,6 @@ class SnowflakeCursor:
183184
Calling a function is expensive in Python and most of these getters are unnecessary.
184185
"""
185186

186-
PUT_SQL_RE = re.compile(r"^(?:/\*.*\*/\s*)*put\s+", flags=re.IGNORECASE)
187-
GET_SQL_RE = re.compile(r"^(?:/\*.*\*/\s*)*get\s+", flags=re.IGNORECASE)
188187
INSERT_SQL_RE = re.compile(r"^insert\s+into", flags=re.IGNORECASE)
189188
COMMENT_SQL_RE = re.compile(r"/\*.*\*/")
190189
INSERT_SQL_VALUES_RE = re.compile(
@@ -202,11 +201,7 @@ def get_file_transfer_type(sql: str) -> FileTransferType | None:
202201
None is returned if the SQL isn't a file transfer so that this function can be
203202
used in an if-statement.
204203
"""
205-
if SnowflakeCursor.PUT_SQL_RE.match(sql):
206-
return FileTransferType.PUT
207-
elif SnowflakeCursor.GET_SQL_RE.match(sql):
208-
return FileTransferType.GET
209-
return None
204+
return get_file_transfer_type(sql)
210205

211206
def __init__(
212207
self,
@@ -464,9 +459,7 @@ def _execute_helper(
464459
self._is_file_transfer = _is_put_get
465460
else:
466461
# or detect it.
467-
self._is_file_transfer = self.PUT_SQL_RE.match(
468-
query
469-
) or self.GET_SQL_RE.match(query)
462+
self._is_file_transfer = get_file_transfer_type(query) is not None
470463
logger.debug("is_file_transfer: %s", self._is_file_transfer is not None)
471464

472465
real_timeout = (

src/snowflake/connector/gcs_storage_client.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from logging import getLogger
1111
from typing import TYPE_CHECKING, Any, NamedTuple
1212

13+
from ._sql_util import is_put_statement
1314
from .compat import quote
1415
from .constants import (
1516
FILE_PROTOCOL,
@@ -269,9 +270,7 @@ def _get_local_file_path_from_put_command(self) -> str | None:
269270
The local file path.
270271
"""
271272
command = self._command
272-
if FILE_PROTOCOL not in self._command or not self._cursor.PUT_SQL_RE.match(
273-
command
274-
):
273+
if FILE_PROTOCOL not in self._command or not is_put_statement(command):
275274
return None
276275

277276
file_path_begin_index = command.find(FILE_PROTOCOL)

test/unit/test_cursor.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,24 @@ def __init__(self):
2727
@pytest.mark.parametrize(
2828
"sql,_type",
2929
(
30+
("", None),
31+
("select 1;", None),
3032
("PUT file:///tmp/data/mydata.csv @my_int_stage;", FileTransferType.PUT),
3133
("GET @%mytable file:///tmp/data/;", FileTransferType.GET),
32-
("select 1;", None),
34+
("/**/PUT file:///tmp/data/mydata.csv @my_int_stage;", FileTransferType.PUT),
35+
("/**/ GET @%mytable file:///tmp/data/;", FileTransferType.GET),
36+
pytest.param(
37+
"/**/\n"
38+
+ "\t/*/get\t*/\t/**/\n" * 10000
39+
+ "\t*/get @~/test.csv file:///tmp\n",
40+
None,
41+
id="long_incorrect",
42+
),
43+
pytest.param(
44+
"/**/\n" + "\t/*/put\t*/\t/**/\n" * 10000 + "put file:///tmp/data.csv @~",
45+
FileTransferType.PUT,
46+
id="long_correct",
47+
),
3348
),
3449
)
3550
def test_get_filetransfer_type(sql, _type):

0 commit comments

Comments
 (0)