-
Notifications
You must be signed in to change notification settings - Fork 14
feat: add dataset.create_items_public_url and key_value_store.create_keys_public_url #453
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
c34b073
6402e2b
77384c5
1e8faf6
adbcb4d
c4b4ec7
d1fe218
65b10c3
eec0233
d90ac9e
0ecbed2
1c7c41b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,8 +2,11 @@ | |
|
||
import asyncio | ||
import base64 | ||
import hashlib | ||
import hmac | ||
import json | ||
import random | ||
import string | ||
import time | ||
from collections.abc import Callable | ||
from http import HTTPStatus | ||
|
@@ -149,3 +152,59 @@ def encode_key_value_store_record_value(value: Any, content_type: str | None = N | |
value = json.dumps(value, ensure_ascii=False, indent=2, allow_nan=False, default=str).encode('utf-8') | ||
|
||
return (value, content_type) | ||
|
||
|
||
# TODO: will be removed once create_hmac_signature is moved to apify_shared.utils | ||
# https://github.com/apify/apify-shared-python/pull/44 | ||
CHARSET = string.digits + string.ascii_letters | ||
|
||
|
||
def encode_base62(num: int) -> str: | ||
"""Encode the given number to base62.""" | ||
if num == 0: | ||
return CHARSET[0] | ||
|
||
res = '' | ||
while num > 0: | ||
num, remainder = divmod(num, 62) | ||
res = CHARSET[remainder] + res | ||
return res | ||
|
||
|
||
def create_hmac_signature(secret_key: str, message: str) -> str: | ||
"""Generate an HMAC signature and encodes it using Base62. Base62 encoding reduces the signature length. | ||
|
||
HMAC signature is truncated to 30 characters to make it shorter. | ||
|
||
Args: | ||
secret_key (str): Secret key used for signing signatures | ||
message (str): Message to be signed | ||
|
||
Returns: | ||
str: Base62 encoded signature | ||
""" | ||
signature = hmac.new(secret_key.encode('utf-8'), message.encode('utf-8'), hashlib.sha256).hexdigest()[:30] | ||
|
||
|
||
decimal_signature = int(signature, 16) | ||
|
||
return encode_base62(decimal_signature) | ||
|
||
|
||
def create_storage_signature( | ||
resource_id: str, url_signing_secret_key: str, expires_in_millis: int | None, version: int = 0 | ||
) -> str: | ||
"""Create a storage signature for a resource, which can be used to generate signed URLs for accessing the resource. | ||
|
||
The signature is created using HMAC with the provided secret key and includes | ||
the resource ID, expiration time, and version. | ||
|
||
Note: expires_in_millis is optional. If not provided, the signature will not expire. | ||
|
||
""" | ||
expires_at = int(time.time() * 1000) + expires_in_millis if expires_in_millis else 0 | ||
|
||
message_to_sign = f'{version}.{expires_at}.{resource_id}' | ||
hmac = create_hmac_signature(url_signing_secret_key, message_to_sign) | ||
|
||
base64url_encoded_payload = base64.urlsafe_b64encode(f'{version}.{expires_at}.{hmac}'.encode()) | ||
return base64url_encoded_payload.decode('utf-8') |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,12 +3,13 @@ | |
import warnings | ||
from contextlib import asynccontextmanager, contextmanager | ||
from typing import TYPE_CHECKING, Any | ||
from urllib.parse import urlencode, urlparse, urlunparse | ||
|
||
from apify_shared.models import ListPage | ||
from apify_shared.utils import filter_out_none_values_recursively, ignore_docs | ||
|
||
from apify_client._errors import ApifyApiError | ||
from apify_client._utils import catch_not_found_or_throw, pluck_data | ||
from apify_client._utils import catch_not_found_or_throw, create_storage_signature, pluck_data | ||
from apify_client.clients.base import ResourceClient, ResourceClientAsync | ||
|
||
if TYPE_CHECKING: | ||
|
@@ -571,6 +572,67 @@ def get_statistics(self) -> dict | None: | |
|
||
return None | ||
|
||
def create_items_public_url( | ||
self, | ||
*, | ||
offset: int | None = None, | ||
limit: int | None = None, | ||
clean: bool | None = None, | ||
desc: bool | None = None, | ||
fields: list[str] | None = None, | ||
omit: list[str] | None = None, | ||
unwind: list[str] | None = None, | ||
skip_empty: bool | None = None, | ||
skip_hidden: bool | None = None, | ||
flatten: list[str] | None = None, | ||
view: str | None = None, | ||
expires_in_millis: int | None = None, | ||
vdusek marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
) -> str: | ||
"""Generate a URL that can be used to access dataset items. | ||
|
||
If the client has permission to access the dataset's URL signing key, | ||
the URL will include a signature to verify its authenticity. | ||
|
||
You can optionally control how long the signed URL should be valid using the `expires_in_millis` option. | ||
This value sets the expiration duration in milliseconds from the time the URL is generated. | ||
If not provided, the URL will not expire. | ||
|
||
Any other options (like `limit` or `offset`) will be included as query parameters in the URL. | ||
|
||
Returns: | ||
The public dataset items URL. | ||
""" | ||
dataset = self.get() | ||
|
||
request_params = self._params( | ||
offset=offset, | ||
limit=limit, | ||
desc=desc, | ||
clean=clean, | ||
fields=fields, | ||
omit=omit, | ||
unwind=unwind, | ||
skipEmpty=skip_empty, | ||
skipHidden=skip_hidden, | ||
flatten=flatten, | ||
view=view, | ||
) | ||
|
||
if dataset and 'urlSigningSecretKey' in dataset: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmmm starting here the rest of the function seems identical to the async variant. Now sure if there is a nice way to reuse the code? I guess this is how we do it here in the client? 🤔 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not sure about this, but this is how every other methods are written. WDYT @janbuchar? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We sure do repeat stuff here, but no need to change that now. Perhaps one day we can generate the whole client lib from an OpenAPI spec. Until then, this will have to do |
||
signature = create_storage_signature( | ||
resource_id=dataset['id'], | ||
url_signing_secret_key=dataset['urlSigningSecretKey'], | ||
expires_in_millis=expires_in_millis, | ||
) | ||
request_params['signature'] = signature | ||
|
||
items_public_url = urlparse(self._url('items')) | ||
filtered_params = {k: v for k, v in request_params.items() if v is not None} | ||
if filtered_params: | ||
items_public_url = items_public_url._replace(query=urlencode(filtered_params)) | ||
|
||
return urlunparse(items_public_url) | ||
|
||
|
||
class DatasetClientAsync(ResourceClientAsync): | ||
"""Async sub-client for manipulating a single dataset.""" | ||
|
@@ -1027,3 +1089,64 @@ async def get_statistics(self) -> dict | None: | |
catch_not_found_or_throw(exc) | ||
|
||
return None | ||
|
||
async def create_items_public_url( | ||
self, | ||
*, | ||
offset: int | None = None, | ||
limit: int | None = None, | ||
clean: bool | None = None, | ||
desc: bool | None = None, | ||
fields: list[str] | None = None, | ||
omit: list[str] | None = None, | ||
unwind: list[str] | None = None, | ||
skip_empty: bool | None = None, | ||
skip_hidden: bool | None = None, | ||
flatten: list[str] | None = None, | ||
view: str | None = None, | ||
expires_in_millis: int | None = None, | ||
) -> str: | ||
"""Generate a URL that can be used to access dataset items. | ||
|
||
If the client has permission to access the dataset's URL signing key, | ||
the URL will include a signature to verify its authenticity. | ||
|
||
You can optionally control how long the signed URL should be valid using the `expires_in_millis` option. | ||
This value sets the expiration duration in milliseconds from the time the URL is generated. | ||
If not provided, the URL will not expire. | ||
|
||
Any other options (like `limit` or `offset`) will be included as query parameters in the URL. | ||
|
||
Returns: | ||
The public dataset items URL. | ||
""" | ||
dataset = await self.get() | ||
|
||
request_params = self._params( | ||
offset=offset, | ||
limit=limit, | ||
desc=desc, | ||
clean=clean, | ||
fields=fields, | ||
omit=omit, | ||
unwind=unwind, | ||
skipEmpty=skip_empty, | ||
skipHidden=skip_hidden, | ||
flatten=flatten, | ||
view=view, | ||
) | ||
|
||
if dataset and 'urlSigningSecretKey' in dataset: | ||
signature = create_storage_signature( | ||
resource_id=dataset['id'], | ||
url_signing_secret_key=dataset['urlSigningSecretKey'], | ||
expires_in_millis=expires_in_millis, | ||
) | ||
request_params['signature'] = signature | ||
|
||
items_public_url = urlparse(self._url('items')) | ||
filtered_params = {k: v for k, v in request_params.items() if v is not None} | ||
if filtered_params: | ||
items_public_url = items_public_url._replace(query=urlencode(filtered_params)) | ||
|
||
return urlunparse(items_public_url) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
import secrets | ||
import string | ||
|
||
|
||
def random_string(length: int = 10) -> str: | ||
return ''.join(secrets.choice(string.ascii_letters) for _ in range(length)) | ||
|
||
|
||
def random_resource_name(resource: str) -> str: | ||
return f'python-client-test-{resource}-{random_string(5)}' |
Uh oh!
There was an error while loading. Please reload this page.