Skip to content

Commit ddf6b51

Browse files
OSS-Fuzz Teamcopybara-github
authored andcommitted
Don't delete /indexes at the beginning of index_build.py
Doing this breaks re-running the snapshotting for some projects when there are no changes, because some OSS-Fuzz build systems won't relink the fuzzer binary in those cases. If we kept /indexes, then the index_build.py can just re-use the previous indexing result even if linking didn't happen again. However, keeping them has problems: 1. The indexer won't like it if destination files already exist. 2. We'll pick up stale indexes on a rebuild when there are actual changes. We solve 1. by only deleting the specific index directory when the indexer is about to run. For 2. we try to be a bit smarter about selecting binaries/indexes in index_build.py, to avoid picking up stale indexes with mismatching build IDs. PiperOrigin-RevId: 783745887
1 parent 6a98d95 commit ddf6b51

File tree

2 files changed

+43
-22
lines changed

2 files changed

+43
-22
lines changed

infra/base-images/base-builder/indexer/clang_wrapper.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import os
2727
from pathlib import Path # pylint: disable=g-importing-member
2828
import shlex
29+
import shutil
2930
import subprocess
3031
import sys
3132
import time
@@ -282,8 +283,13 @@ def read_cdb_fragments(cdb_path: Path) -> Any:
282283
def run_indexer(build_id: str, linker_commands: dict[str, Any]):
283284
"""Run the indexer."""
284285
index_dir = INDEXES_PATH / build_id
285-
# TODO: check if this is correct.
286-
index_dir.mkdir(exist_ok=True)
286+
if index_dir.exists():
287+
# A previous indexer already ran for the same build ID. Clear the directory
288+
# so we can re-run the indexer, otherwise we might run into various issues
289+
# (e.g. the indexer doesn't like it when source files already exist).
290+
shutil.rmtree(index_dir)
291+
292+
index_dir.mkdir()
287293

288294
# Use a build-specific compile commands directory, since there could be
289295
# parallel linking happening at the same time.

infra/base-images/base-builder/indexer/index_build.py

Lines changed: 35 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ class BinaryMetadata:
100100
binary_args: list[str]
101101
binary_env: dict[str, str]
102102
build_id: str
103+
build_id_matches: bool
103104
compile_commands: list[dict[str, Any]]
104105
harness_kind: manifest_types.HarnessKind
105106

@@ -228,8 +229,8 @@ def enumerate_build_targets(
228229
logging.info('enumerate_build_targets')
229230
linker_json_paths = list((OUT / 'cdb').glob('*_linker_commands.json'))
230231

231-
targets = []
232232
logging.info('Found %i linker JSON files.', len(linker_json_paths))
233+
binary_to_build_metadata: dict[str, BinaryMetadata] = {}
233234
for linker_json_path in linker_json_paths:
234235
build_id = linker_json_path.name.split('_')[0]
235236
with linker_json_path.open('rt') as f:
@@ -241,7 +242,26 @@ def enumerate_build_targets(
241242
# the binary path and checking the build id should improve the success
242243
# rate.
243244
if (OUT / name).exists():
244-
binary_paths = [binary_path]
245+
# Just because the name matches, doesn't mean it's the right one for
246+
# this linker command.
247+
# Only set this if we haven't already found an exact build ID match.
248+
# We can't always rely on build ID matching, because some builds will
249+
# modify the binary after the linker runs.
250+
if (
251+
name in binary_to_build_metadata
252+
and binary_to_build_metadata[name].build_id_matches
253+
):
254+
continue
255+
256+
binary_to_build_metadata[name] = BinaryMetadata(
257+
name=name,
258+
binary_args=binary_args,
259+
binary_env=binary_env,
260+
compile_commands=data['compile_commands'],
261+
build_id=build_id,
262+
build_id_matches=build_id == get_build_id(binary_path.as_posix()),
263+
harness_kind=harness_kind,
264+
)
245265
else:
246266
logging.info('trying to find %s with build id %s', name, build_id)
247267
binary_paths = find_fuzzer_binaries(OUT, build_id)
@@ -250,21 +270,19 @@ def enumerate_build_targets(
250270
logging.error('could not find %s with build id %s', name, build_id)
251271
continue
252272

253-
for binary_path in binary_paths:
254-
compile_commands = data['compile_commands']
255-
256-
targets.append(
257-
BinaryMetadata(
258-
name=binary_path.name,
259-
binary_args=binary_args,
260-
binary_env=binary_env,
261-
compile_commands=compile_commands,
262-
build_id=build_id,
263-
harness_kind=harness_kind,
264-
)
265-
)
273+
for binary_path in binary_paths:
274+
compile_commands = data['compile_commands']
275+
binary_to_build_metadata[binary_path.name] = BinaryMetadata(
276+
name=binary_path.name,
277+
binary_args=binary_args,
278+
binary_env=binary_env,
279+
compile_commands=compile_commands,
280+
build_id=build_id,
281+
build_id_matches=True,
282+
harness_kind=harness_kind,
283+
)
266284

267-
return targets
285+
return tuple(binary_to_build_metadata.values())
268286

269287

270288
def copy_fuzzing_engine() -> Path:
@@ -658,10 +676,7 @@ def main():
658676
)
659677
args = parser.parse_args()
660678

661-
# Clear existing indexer artifacts.
662-
if INDEXES_PATH.exists():
663-
shutil.rmtree(INDEXES_PATH)
664-
INDEXES_PATH.mkdir()
679+
INDEXES_PATH.mkdir(exist_ok=True)
665680

666681
# Clean up the existing OUT by default, otherwise we may run into various
667682
# build errors.

0 commit comments

Comments
 (0)