@@ -233,11 +233,18 @@ def compile_cuda_script( # # noqa: C901
233
233
234
234
235
235
def run_program (
236
- args : list [str ], seed : Optional [int ], timeout : int , multi_gpu : bool = False
236
+ args : list [str ],
237
+ seed : Optional [int ],
238
+ timeout : int ,
239
+ multi_gpu : bool = False ,
240
+ extra_env : Optional [dict [str , str ]] = None ,
237
241
) -> RunResult :
238
242
print ("[Running]" )
239
243
# set up a pipe so the tester can communicate its verdict with us
240
244
env = os .environ .copy ()
245
+ if extra_env is not None :
246
+ env .update (extra_env )
247
+
241
248
pipe_read , pipe_write = os .pipe ()
242
249
env ["POPCORN_FD" ] = str (pipe_write )
243
250
if seed is not None :
@@ -344,7 +351,10 @@ def profile_program(
344
351
"--" ,
345
352
] + call
346
353
347
- run_result = run_program (call , seed = seed , timeout = timeout , multi_gpu = multi_gpu )
354
+ run_result = run_program (call , seed = seed , timeout = timeout , multi_gpu = multi_gpu , extra_env = {
355
+ "GPU_DUMP_CODE_OBJECT" : "1" ,
356
+ })
357
+
348
358
profile_result = None
349
359
350
360
if run_result .success :
@@ -362,6 +372,10 @@ def profile_program(
362
372
# keeping the individual traces around.
363
373
trace_path .unlink ()
364
374
375
+ # Also move the code objects to the profiling output directory.
376
+ for code_obj in list (Path .cwd ().glob ("_code_object*.o" )):
377
+ code_obj .rename (output_dir / code_obj .name )
378
+
365
379
profile_result = ProfileResult (
366
380
profiler = 'rocPROF' ,
367
381
download_url = None ,
0 commit comments