Skip to content

Commit 534a577

Browse files
Add validation of endpoints for API/ETL and postprocessor hook
1 parent d78e5fb commit 534a577

File tree

4 files changed

+31
-70
lines changed

4 files changed

+31
-70
lines changed

backend/notification_v2/provider/webhook/webhook.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from backend.celery_service import app as celery_app
88
from notification_v2.enums import AuthorizationType
99
from notification_v2.provider.notification_provider import NotificationProvider
10+
from unstract.sdk.adapters.url_validator import URLValidator
1011

1112
logger = logging.getLogger(__name__)
1213

@@ -51,6 +52,13 @@ def validate(self):
5152
"""
5253
if not self.notification.url:
5354
raise ValueError("Webhook URL is required.")
55+
56+
# Validate webhook URL for security
57+
is_valid, error_message = URLValidator.validate_url(self.notification.url)
58+
logger.info(f"Notification url {self.notification_url}")
59+
if not is_valid:
60+
raise ValueError(f"Webhook URL validation failed: {error_message}")
61+
5462
if not self.payload:
5563
raise ValueError("Payload is required.")
5664
return super().validate()

backend/sample.env

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ RUNNER_POLLING_INTERVAL_SECONDS=2
201201
# Examples: 900 (15 min), 1800 (30 min), 3600 (60 min)
202202
MIN_SCHEDULE_INTERVAL_SECONDS=1800
203203

204-
# WHitelisted adapter URLs to allow user to connect to locally hosted adapters.
204+
# Whitelisted adapter URLs to allow user to connect to locally hosted adapters.
205205
# Whitelisting 10.68.0.10 to allow frictionless adapter connection to
206206
# managed Postgres for VectorDB
207207
WHITELISTED_ENDPOINTS="10.68.0.10"

prompt-service/sample.env

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,8 @@ ADAPTER_LLMW_STATUS_RETRIES=5
6464
# Rentroll Service
6565
RENTROLL_SERVICE_HOST=http://unstract-rentroll-service
6666
RENTROLL_SERVICE_PORT=5003
67+
68+
# Whitelisted adapter URLs to allow user to connect to locally hosted adapters.
69+
# Whitelisting 10.68.0.10 to allow frictionless adapter connection to
70+
# managed Postgres for VectorDB
71+
WHITELISTED_ENDPOINTS="10.68.0.10"

prompt-service/src/unstract/prompt_service/services/answer_prompt.py

Lines changed: 17 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
1-
import ipaddress
2-
import socket
31
from logging import Logger
42
from typing import Any
5-
from urllib.parse import urlparse
63

74
from flask import current_app as app
85

@@ -17,6 +14,7 @@
1714
repair_json_with_best_structure,
1815
)
1916
from unstract.prompt_service.utils.log import publish_log
17+
from unstract.sdk.adapters.url_validator import URLValidator
2018
from unstract.sdk.constants import LogLevel
2119
from unstract.sdk.exceptions import RateLimitError as SdkRateLimitError
2220
from unstract.sdk.exceptions import SdkError
@@ -26,58 +24,6 @@
2624
from unstract.sdk.llm import LLM
2725

2826

29-
def _is_safe_public_url(url: str) -> bool:
30-
"""Validate webhook URL for SSRF protection.
31-
32-
Only allows HTTPS and blocks private/loopback/internal addresses.
33-
Resolves all DNS records (A/AAAA) to prevent DNS rebinding attacks.
34-
"""
35-
try:
36-
p = urlparse(url)
37-
if p.scheme not in ("https",): # Only allow HTTPS for security
38-
return False
39-
host = p.hostname or ""
40-
# Block obvious local hosts
41-
if host in ("localhost",):
42-
return False
43-
44-
addrs: set[str] = set()
45-
# If literal IP, validate directly; else resolve all records (A/AAAA)
46-
try:
47-
ipaddress.ip_address(host)
48-
addrs.add(host)
49-
except ValueError:
50-
try:
51-
for family, _type, _proto, _canonname, sockaddr in socket.getaddrinfo(
52-
host, None, type=socket.SOCK_STREAM
53-
):
54-
addr = sockaddr[0]
55-
addrs.add(addr)
56-
except Exception:
57-
return False
58-
59-
if not addrs:
60-
return False
61-
62-
# Validate all resolved addresses
63-
for addr in addrs:
64-
try:
65-
ip = ipaddress.ip_address(addr)
66-
except ValueError:
67-
return False
68-
if (
69-
ip.is_private
70-
or ip.is_loopback
71-
or ip.is_link_local
72-
or ip.is_reserved
73-
or ip.is_multicast
74-
):
75-
return False
76-
return True
77-
except Exception:
78-
return False
79-
80-
8127
class AnswerPromptService:
8228
@staticmethod
8329
def extract_variable(
@@ -342,23 +288,25 @@ def handle_json(
342288
app.logger.warning(
343289
"Postprocessing webhook enabled but URL missing; skipping."
344290
)
345-
elif not _is_safe_public_url(webhook_url):
346-
app.logger.warning(
347-
"Postprocessing webhook URL is not allowed; skipping."
348-
)
349291
else:
350-
try:
351-
processed_data, updated_highlight_data = postprocess_data(
352-
parsed_data,
353-
webhook_enabled=True,
354-
webhook_url=webhook_url,
355-
highlight_data=highlight_data,
356-
timeout=60,
357-
)
358-
except Exception as e:
292+
is_valid, error_message = URLValidator.validate_url(webhook_url)
293+
if not is_valid:
359294
app.logger.warning(
360-
f"Postprocessing webhook failed: {e}. Using unprocessed data."
295+
f"Postprocessing webhook URL validation failed: {error_message}; skipping."
361296
)
297+
else:
298+
try:
299+
processed_data, updated_highlight_data = postprocess_data(
300+
parsed_data,
301+
webhook_enabled=True,
302+
webhook_url=webhook_url,
303+
highlight_data=highlight_data,
304+
timeout=60,
305+
)
306+
except Exception as e:
307+
app.logger.warning(
308+
f"Postprocessing webhook failed: {e}. Using unprocessed data."
309+
)
362310

363311
structured_output[prompt_key] = processed_data
364312

0 commit comments

Comments
 (0)