Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions src/apify_client/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@

import asyncio
import base64
import hashlib
import hmac
import json
import random
import string
import time
from collections.abc import Callable
from http import HTTPStatus
Expand Down Expand Up @@ -149,3 +152,59 @@ def encode_key_value_store_record_value(value: Any, content_type: str | None = N
value = json.dumps(value, ensure_ascii=False, indent=2, allow_nan=False, default=str).encode('utf-8')

return (value, content_type)


# TODO: will be removed once create_hmac_signature is moved to apify_shared.utils
# https://github.com/apify/apify-shared-python/pull/44
CHARSET = string.digits + string.ascii_letters


def encode_base62(num: int) -> str:
"""Encode the given number to base62."""
if num == 0:
return CHARSET[0]

res = ''
while num > 0:
num, remainder = divmod(num, 62)
res = CHARSET[remainder] + res
return res


def create_hmac_signature(secret_key: str, message: str) -> str:
"""Generate an HMAC signature and encodes it using Base62. Base62 encoding reduces the signature length.

HMAC signature is truncated to 30 characters to make it shorter.

Args:
secret_key (str): Secret key used for signing signatures
message (str): Message to be signed

Returns:
str: Base62 encoded signature
"""
signature = hmac.new(secret_key.encode('utf-8'), message.encode('utf-8'), hashlib.sha256).hexdigest()[:30]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just curious: Why the encode all over the place? 🤔

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because hmac.new accepts bytes, not strings


decimal_signature = int(signature, 16)

return encode_base62(decimal_signature)


def create_storage_signature(
resource_id: str, url_signing_secret_key: str, expires_in_millis: int | None, version: int = 0
) -> str:
"""Create a storage signature for a resource, which can be used to generate signed URLs for accessing the resource.

The signature is created using HMAC with the provided secret key and includes
the resource ID, expiration time, and version.

Note: expires_in_millis is optional. If not provided, the signature will not expire.

"""
expires_at = int(time.time() * 1000) + expires_in_millis if expires_in_millis else 0

message_to_sign = f'{version}.{expires_at}.{resource_id}'
hmac = create_hmac_signature(url_signing_secret_key, message_to_sign)

base64url_encoded_payload = base64.urlsafe_b64encode(f'{version}.{expires_at}.{hmac}'.encode())
return base64url_encoded_payload.decode('utf-8')
125 changes: 124 additions & 1 deletion src/apify_client/clients/resource_clients/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
import warnings
from contextlib import asynccontextmanager, contextmanager
from typing import TYPE_CHECKING, Any
from urllib.parse import urlencode, urlparse, urlunparse

from apify_shared.models import ListPage
from apify_shared.utils import filter_out_none_values_recursively, ignore_docs

from apify_client._errors import ApifyApiError
from apify_client._utils import catch_not_found_or_throw, pluck_data
from apify_client._utils import catch_not_found_or_throw, create_storage_signature, pluck_data
from apify_client.clients.base import ResourceClient, ResourceClientAsync

if TYPE_CHECKING:
Expand Down Expand Up @@ -571,6 +572,67 @@ def get_statistics(self) -> dict | None:

return None

def create_items_public_url(
self,
*,
offset: int | None = None,
limit: int | None = None,
clean: bool | None = None,
desc: bool | None = None,
fields: list[str] | None = None,
omit: list[str] | None = None,
unwind: list[str] | None = None,
skip_empty: bool | None = None,
skip_hidden: bool | None = None,
flatten: list[str] | None = None,
view: str | None = None,
expires_in_millis: int | None = None,
) -> str:
"""Generate a URL that can be used to access dataset items.

If the client has permission to access the dataset's URL signing key,
the URL will include a signature to verify its authenticity.

You can optionally control how long the signed URL should be valid using the `expires_in_millis` option.
This value sets the expiration duration in milliseconds from the time the URL is generated.
If not provided, the URL will not expire.

Any other options (like `limit` or `offset`) will be included as query parameters in the URL.

Returns:
The public dataset items URL.
"""
dataset = self.get()

request_params = self._params(
offset=offset,
limit=limit,
desc=desc,
clean=clean,
fields=fields,
omit=omit,
unwind=unwind,
skipEmpty=skip_empty,
skipHidden=skip_hidden,
flatten=flatten,
view=view,
)

if dataset and 'urlSigningSecretKey' in dataset:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm starting here the rest of the function seems identical to the async variant. Now sure if there is a nice way to reuse the code? I guess this is how we do it here in the client? 🤔

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure about this, but this is how every other methods are written. WDYT @janbuchar?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We sure do repeat stuff here, but no need to change that now. Perhaps one day we can generate the whole client lib from an OpenAPI spec. Until then, this will have to do

signature = create_storage_signature(
resource_id=dataset['id'],
url_signing_secret_key=dataset['urlSigningSecretKey'],
expires_in_millis=expires_in_millis,
)
request_params['signature'] = signature

items_public_url = urlparse(self._url('items'))
filtered_params = {k: v for k, v in request_params.items() if v is not None}
if filtered_params:
items_public_url = items_public_url._replace(query=urlencode(filtered_params))

return urlunparse(items_public_url)


class DatasetClientAsync(ResourceClientAsync):
"""Async sub-client for manipulating a single dataset."""
Expand Down Expand Up @@ -1027,3 +1089,64 @@ async def get_statistics(self) -> dict | None:
catch_not_found_or_throw(exc)

return None

async def create_items_public_url(
self,
*,
offset: int | None = None,
limit: int | None = None,
clean: bool | None = None,
desc: bool | None = None,
fields: list[str] | None = None,
omit: list[str] | None = None,
unwind: list[str] | None = None,
skip_empty: bool | None = None,
skip_hidden: bool | None = None,
flatten: list[str] | None = None,
view: str | None = None,
expires_in_millis: int | None = None,
) -> str:
"""Generate a URL that can be used to access dataset items.

If the client has permission to access the dataset's URL signing key,
the URL will include a signature to verify its authenticity.

You can optionally control how long the signed URL should be valid using the `expires_in_millis` option.
This value sets the expiration duration in milliseconds from the time the URL is generated.
If not provided, the URL will not expire.

Any other options (like `limit` or `offset`) will be included as query parameters in the URL.

Returns:
The public dataset items URL.
"""
dataset = await self.get()

request_params = self._params(
offset=offset,
limit=limit,
desc=desc,
clean=clean,
fields=fields,
omit=omit,
unwind=unwind,
skipEmpty=skip_empty,
skipHidden=skip_hidden,
flatten=flatten,
view=view,
)

if dataset and 'urlSigningSecretKey' in dataset:
signature = create_storage_signature(
resource_id=dataset['id'],
url_signing_secret_key=dataset['urlSigningSecretKey'],
expires_in_millis=expires_in_millis,
)
request_params['signature'] = signature

items_public_url = urlparse(self._url('items'))
filtered_params = {k: v for k, v in request_params.items() if v is not None}
if filtered_params:
items_public_url = items_public_url._replace(query=urlencode(filtered_params))

return urlunparse(items_public_url)
105 changes: 104 additions & 1 deletion src/apify_client/clients/resource_clients/key_value_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,17 @@
from contextlib import asynccontextmanager, contextmanager
from http import HTTPStatus
from typing import TYPE_CHECKING, Any
from urllib.parse import urlencode, urlparse, urlunparse

from apify_shared.utils import filter_out_none_values_recursively, ignore_docs, parse_date_fields

from apify_client._errors import ApifyApiError
from apify_client._utils import catch_not_found_or_throw, encode_key_value_store_record_value, pluck_data
from apify_client._utils import (
catch_not_found_or_throw,
create_storage_signature,
encode_key_value_store_record_value,
pluck_data,
)
from apify_client.clients.base import ResourceClient, ResourceClientAsync

if TYPE_CHECKING:
Expand Down Expand Up @@ -287,6 +293,54 @@ def delete_record(self, key: str) -> None:
timeout_secs=_SMALL_TIMEOUT,
)

def create_keys_public_url(
self,
*,
limit: int | None = None,
exclusive_start_key: str | None = None,
collection: str | None = None,
prefix: str | None = None,
expires_in_millis: int | None = None,
) -> str:
"""Generate a URL that can be used to access key-value store keys.

If the client has permission to access the key-value store's URL signing key,
the URL will include a signature to verify its authenticity.

You can optionally control how long the signed URL should be valid using the `expires_in_millis` option.
This value sets the expiration duration in milliseconds from the time the URL is generated.
If not provided, the URL will not expire.

Any other options (like `limit` or `prefix`) will be included as query parameters in the URL.

Returns:
The public key-value store keys URL.
"""
store = self.get()

request_params = self._params(
limit=limit,
exclusive_start_key=exclusive_start_key,
collection=collection,
prefix=prefix,
)

if store and 'urlSigningSecretKey' in store:
signature = create_storage_signature(
resource_id=store['id'],
url_signing_secret_key=store['urlSigningSecretKey'],
expires_in_millis=expires_in_millis,
)
request_params['signature'] = signature

keys_public_url = urlparse(self._url('keys'))

filtered_params = {k: v for k, v in request_params.items() if v is not None}
if filtered_params:
keys_public_url = keys_public_url._replace(query=urlencode(filtered_params))

return urlunparse(keys_public_url)


class KeyValueStoreClientAsync(ResourceClientAsync):
"""Async sub-client for manipulating a single key-value store."""
Expand Down Expand Up @@ -533,3 +587,52 @@ async def delete_record(self, key: str) -> None:
params=self._params(),
timeout_secs=_SMALL_TIMEOUT,
)

async def create_keys_public_url(
self,
*,
limit: int | None = None,
exclusive_start_key: str | None = None,
collection: str | None = None,
prefix: str | None = None,
expires_in_millis: int | None = None,
) -> str:
"""Generate a URL that can be used to access key-value store keys.

If the client has permission to access the key-value store's URL signing key,
the URL will include a signature to verify its authenticity.

You can optionally control how long the signed URL should be valid using the `expires_in_millis` option.
This value sets the expiration duration in milliseconds from the time the URL is generated.
If not provided, the URL will not expire.

Any other options (like `limit` or `prefix`) will be included as query parameters in the URL.

Returns:
The public key-value store keys URL.
"""
store = await self.get()

keys_public_url = urlparse(self._url('keys'))

request_params = self._params(
limit=limit,
exclusive_start_key=exclusive_start_key,
collection=collection,
prefix=prefix,
)

if store and 'urlSigningSecretKey' in store:
signature = create_storage_signature(
resource_id=store['id'],
url_signing_secret_key=store['urlSigningSecretKey'],
expires_in_millis=expires_in_millis,
)
request_params['signature'] = signature

keys_public_url = urlparse(self._url('keys'))
filtered_params = {k: v for k, v in request_params.items() if v is not None}
if filtered_params:
keys_public_url = keys_public_url._replace(query=urlencode(filtered_params))

return urlunparse(keys_public_url)
10 changes: 10 additions & 0 deletions tests/integration/integration_test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import secrets
import string


def random_string(length: int = 10) -> str:
return ''.join(secrets.choice(string.ascii_letters) for _ in range(length))


def random_resource_name(resource: str) -> str:
return f'python-client-test-{resource}-{random_string(5)}'
Loading
Loading