Skip to content

Commit 6eb3fbc

Browse files
committed
Add __restrict__
1 parent 08ff4ff commit 6eb3fbc

File tree

5 files changed

+7
-7
lines changed

5 files changed

+7
-7
lines changed

include/ck/utility/amd_buffer_addressing_builtins.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ union BufferResource
2020
};
2121

2222
template <typename T>
23-
__device__ int32x4_t make_wave_buffer_resource(T* p_wave, index_t element_space_size)
23+
__device__ int32x4_t make_wave_buffer_resource(T* __restrict__ p_wave, index_t element_space_size)
2424
{
2525
BufferResource<T> wave_buffer_resource;
2626

@@ -35,7 +35,7 @@ __device__ int32x4_t make_wave_buffer_resource(T* p_wave, index_t element_space_
3535
}
3636

3737
template <typename T>
38-
__device__ int32x4_t make_wave_buffer_resource_with_default_range(T* p_wave)
38+
__device__ int32x4_t make_wave_buffer_resource_with_default_range(T* __restrict__ p_wave)
3939
{
4040
BufferResource<T> wave_buffer_resource;
4141

@@ -711,7 +711,7 @@ template <typename T,
711711
index_t N,
712712
AmdBufferCoherenceEnum coherence = AmdBufferCoherenceEnum::DefaultCoherence>
713713
__device__ void amd_buffer_store(const typename vector_type_maker<T, N>::type::type src_thread_data,
714-
T* p_dst_wave,
714+
T* __restrict__ p_dst_wave,
715715
const index_t dst_thread_element_offset,
716716
const bool dst_thread_element_valid,
717717
const index_t dst_element_space_size)

include/ck/utility/c_style_pointer_cast.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ namespace ck {
1212
template <typename PY,
1313
typename PX,
1414
typename enable_if<is_pointer_v<PY> && is_pointer_v<PX>, bool>::type = false>
15-
__host__ __device__ PY c_style_pointer_cast(PX p_x)
15+
__host__ __device__ PY c_style_pointer_cast(PX __restrict__ p_x)
1616
{
1717
#pragma clang diagnostic push
1818
#pragma clang diagnostic ignored "-Wold-style-cast"

include/ck_tile/core/arch/amd_buffer_addressing.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ struct __attribute__((packed)) buffer_resource
3737
uint32_t config;
3838
};
3939

40-
CK_TILE_DEVICE int32x4_t make_wave_buffer_resource(const void* ptr, uint32_t size = 0xffffffff)
40+
CK_TILE_DEVICE int32x4_t make_wave_buffer_resource(const void* __restrict__ ptr, uint32_t size = 0xffffffff)
4141
{
4242
buffer_resource res{ptr, size, CK_TILE_BUFFER_RESOURCE_3RD_DWORD};
4343
int32x4_t r = __builtin_bit_cast(int32x4_t, res);

include/ck_tile/core/arch/amd_buffer_addressing_builtins.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ struct __attribute__((packed)) buffer_resource
2828
uint32_t config;
2929
};
3030

31-
CK_TILE_DEVICE int32x4_t make_wave_buffer_resource(const void* ptr, uint32_t size = 0xffffffff)
31+
CK_TILE_DEVICE int32x4_t make_wave_buffer_resource(const void* __restrict__ ptr, uint32_t size = 0xffffffff)
3232
{
3333
buffer_resource res{ptr, size, CK_TILE_BUFFER_RESOURCE_3RD_DWORD};
3434
int32x4_t r = __builtin_bit_cast(int32x4_t, res);

include/ck_tile/core/tensor/load_tile_transpose.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,7 @@ CK_TILE_DEVICE auto
403403
load_tile_transpose(const tile_window_with_static_distribution<BottomTensorView_,
404404
WindowLengths_,
405405
TileDistribution_,
406-
NumCoord>& tile_window)
406+
NumCoord>& __restrict__ tile_window)
407407
{
408408
using OutTileDstrEncode = typename OutputTileDistributionTraits<
409409
typename TileDistribution_::DstrEncode,

0 commit comments

Comments
 (0)