diff --git a/src/clusterfuzz/_internal/build_management/build_archive.py b/src/clusterfuzz/_internal/build_management/build_archive.py index 133fc1c2c1..cd2fb12de5 100644 --- a/src/clusterfuzz/_internal/build_management/build_archive.py +++ b/src/clusterfuzz/_internal/build_management/build_archive.py @@ -14,6 +14,7 @@ """Build Archive manager.""" import abc +import json import os from typing import BinaryIO from typing import Callable @@ -219,29 +220,144 @@ def unpack(self, class ChromeBuildArchive(DefaultBuildArchive): """Handles chrome build archives. This special cases the default behaviour by - looking at the content of the `.runtime_deps` file, in order to unpack all the - fuzzer dependencies correctly. - In case something goes wrong, this defaults to using the default unpacker. + looking at the content of the `.runtime_deps` file for each fuzzer target in + order to unpack all of its dependencies correctly. + + Expects a manifest file named `clusterfuzz_manifest.json` in the root of the + archive to decide which schema version to use when interpreting its contents. + The legacy schema is applied to archives with no manifest. Defaults to using + the default unpacker in case something goes wrong. + + Under the legacy schema, fuzz targets were assumed to be at the root of the + archive while runtime_deps starting with `../../` were remapped to + `/src_root/`. + + Given the following runtime_deps: + + my_fuzzer.runtime_deps: + ========== + ./my_fuzzer + my_fuzzer.options + my_fuzzer.owners + my_fuzzer.runtime_deps + ./libbase.so + ./libatomic.so + ../../.vpython3 + ../../third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib + /lib/ + + + The legacy schema would expect an archive with the following structure: + ========== + my_fuzzer + my_fuzzer.options + my_ruzzer.owners + my_fuzzer.runtime_deps + libbase.so + libatomic.so + # etc. for all fuzz targets + src_root/ + .vpython3 + third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib/lib/ + # all instrumented libs + # etc. for other deps + + Schema version 1 does away with `/src_root/` and interprets runtime_deps + entries as file paths relative to the runtime_deps file, which lives in the + build directory along with fuzz target binaries. + + Expected archive structure with the same runtime_deps: + ========== + out/build/my_fuzzer + out/build/my_fuzzer.options + out/build/my_fuzzer.owners + out/build/my_fuzzer.runtime_deps + out/build/libbase.so + out/build/libatomic.so + # etc. for all fuzz targets and deps in the build directory + .vpython3 + third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib/lib/ + # all instrumented libs + # etc. for other deps """ + def __init__(self, + reader: archive.ArchiveReader, + default_archive_schema_version: int = 0): + """Initializes a `ChromiumBuildArchive` with the given reader. + + Arguments: + reader: See `DefaultBuildArchive`. + default_archive_schema_version: Specifies which version of a build archive + to expect if `clusterfuzz_manifest.json` is missing or badly formatted. + """ + super().__init__(reader) + # The manifest may not exist for earlier versions of archives. In this + # case, default to schema version 0. + manifest_path = 'clusterfuzz_manifest.json' + if self.file_exists(manifest_path): + with self.open(manifest_path) as f: + manifest = json.load(f) + self._archive_schema_version = manifest.get('archive_schema_version') + if self._archive_schema_version is None: + logs.warning( + 'clusterfuzz_manifest.json was incorrectly formatted or missing an ' + 'archive_schema_version field') + self._archive_schema_version = default_archive_schema_version + else: + self._archive_schema_version = default_archive_schema_version + def root_dir(self) -> str: if not hasattr(self, '_root_dir'): self._root_dir = super().root_dir() # pylint: disable=attribute-defined-outside-init return self._root_dir - def to_archive_path(self, path: str) -> str: - """Deps are relative to the Chrome root directory. However, there might be - a common root directory in the archive, which means we need to make sure - the file path is correct. + def archive_schema_version(self) -> int: + """Returns the schema version number for this archive.""" + return self._archive_schema_version + + def get_dependency_path(self, path: str, deps_file_path: str) -> str: + """Deps are given as paths relative to the deps file where they are listed, + so we need to translate them to the corresponding paths relative to the + archive root. Args: - path: the dependency path relative to Chrome's root directory. + path: the dependency path relative to the deps file. + deps_file_path: the path to the deps file, relative to the archive root. Returns: - the path relative to the archive. + the dependency path relative to the archive root. """ - path = os.path.normpath(path) + # Archive schema version 0 represents legacy behavior. For newer archive + # versions, runtime_deps that were formerly stored under + # {self.root_dir()}/src_root/ are now stored in the root directory, while + # the build artifacts formerly stored in the root directory are now stored + # in the build directory. + + if self._archive_schema_version > 0: + # Assumes the dependency path is relative to the deps file and + # transforms it into into a full path relative to the archive root. For + # example: + # + # deps_file_path: "/A/B/fuzz_target.runtime_deps" + # os.path.dirname(deps_file_path) => "/A/B/" (call this DEPS_DIR) + # path1: "./my_dep" + # path2: "../../C/my_dep2" + # path3: "D/my_dep3" + # + # os.path.join(DEPS_DIR, path1) => "/A/B/./my_dep" + # os.path.join(DEPS_DIR, path2) => "/A/B/../../C/my_dep2" + # os.path.join(DEPS_DIR, path3) => "/A/B/D/my_dep3" + # + # os.path.normpath(os.path.join(DEPS_DIR, path1)) => "/A/B/my_dep" + # os.path.normpath(os.path.join(DEPS_DIR, path2)) => "/C/my_dep2" + # os.path.normpath(os.path.join(DEPS_DIR, path3)) => "/A/B/D/my_dep3" + return os.path.normpath( + os.path.join(os.path.dirname(deps_file_path), path)) + + # Legacy behavior. Remap `../../` to `src_root/`. + path = os.path.normpath(path) if path.startswith('../../'): path = path.replace('../../', 'src_root/') @@ -271,7 +387,7 @@ def _get_common_files(self) -> List[str]: def get_target_dependencies( self, fuzz_target: str) -> List[archive.ArchiveMemberInfo]: - target_path = self.to_archive_path(fuzz_target) + target_path = self.get_path_for_target(fuzz_target) deps_file = f'{target_path}.runtime_deps' if not self.file_exists(deps_file): logs.warning(f'runtime_deps file not found for {target_path}') @@ -280,7 +396,10 @@ def get_target_dependencies( res = [] matchers = [] with self.open(deps_file) as f: - deps = [self.to_archive_path(l.decode()) for l in f.read().splitlines()] + deps = [ + self.get_dependency_path(l.decode(), deps_file) + for l in f.read().splitlines() + ] for dep in deps: # We need to match the file prefixes here, because some of the deps are # globering the whole directory. Same for files, on mac platform, we diff --git a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py index c56180ea1f..791446f9fa 100644 --- a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py +++ b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py @@ -13,6 +13,7 @@ # limitations under the License. """Build archive tests.""" import io +import json import os import tempfile import unittest @@ -152,8 +153,18 @@ def _add_files_to_archive(self, files): name=file, is_dir=False, size_bytes=0, mode=0)) self.mock.open.return_value.list_members.return_value = res - def _generate_possible_fuzzer_dependencies(self, dir_prefix, fuzz_target): - """Generates all possible dependencies for the given target.""" + def _generate_possible_fuzzer_dependencies_legacy(self, dir_prefix, + fuzz_target): + """Generates all possible dependencies for the given target. + + This implementation represents the legacy archive schema prior to version 1 + and should not be used for new tests; we keep it around for backwards + compatibility. + + New tests should use a combination of + `_generate_possible_fuzzer_dependencies()` and + `_resolve_relative_dependency_paths()`. + """ needed_files = [ f'{fuzz_target}', f'{fuzz_target}.exe', @@ -175,6 +186,40 @@ def _generate_possible_fuzzer_dependencies(self, dir_prefix, fuzz_target): ] return [os.path.join(dir_prefix, file) for file in needed_files] + def _generate_possible_fuzzer_dependencies(self, fuzz_target): + """Returns a list of dependencies as file paths relative to + {fuzz_target}.runtime_deps, as they appear in runtime_deps files in real + archives. + """ + return [ + f'./{fuzz_target}', + f'{fuzz_target}.owners', + f'{fuzz_target}.runtime_deps', + f'{fuzz_target}.dSYM/Contents/Resources/DWARF/some_dependency', + './libbase.so', + '../../tools/valgrind/asan/', + '../../third_party/llvm-build/Release+Asserts/bin/llvm-symbolizer', + '../../third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib/lib', + 'third_party/instrumented_libs/binaries/msan-chained-origins-noble-lib/lib/ld-linux-x86-64.so.2', + './libatomic.so', + 'icudtl.dat', + f'bin/run_{fuzz_target}', + '../../testing/location_tags.json', + ] + + def _resolve_relative_dependency_paths(self, deps_paths): + """Returns a list of dependencies as normalized file paths, i.e. with + relative path separators like './' and '../' resolved to their true + directory names. + """ + + # Runtime deps include file paths that begin with ../../ so the build + # directory is assumed to be two levels deep into the file tree. + return [ + os.path.normpath(os.path.join('out/build/', file)) + for file in deps_paths + ] + def _generate_runtime_deps(self, deps): def _mock_open(_): @@ -189,12 +234,19 @@ def _mock_open(_): def _declare_fuzzers(self, fuzzers): self._declared_fuzzers = fuzzers + def _set_archive_schema_version(self, version): + self.build = build_archive.ChromeBuildArchive(self.mock.open.return_value, + version) + @parameterized.parameterized.expand(['/b/build/', 'build/', '']) - def test_possible_dependencies(self, dir_prefix): + def test_possible_dependencies_legacy(self, dir_prefix): """Tests that all the necessary dependencies are correctly extracted from - the runtime_deps file.""" - deps_files = self._generate_possible_fuzzer_dependencies('', 'my_fuzzer') - needed_files = self._generate_possible_fuzzer_dependencies( + the runtime_deps file, using the legacy archive schema where dependency + paths are interpreted as relative to the archive root and `../../` is + remapped to `src_root/`.""" + deps_files = self._generate_possible_fuzzer_dependencies_legacy( + '', 'my_fuzzer') + needed_files = self._generate_possible_fuzzer_dependencies_legacy( dir_prefix, 'my_fuzzer') self._add_files_to_archive(needed_files) self._generate_runtime_deps(deps_files) @@ -204,29 +256,13 @@ def test_possible_dependencies(self, dir_prefix): self.assertCountEqual(to_extract, needed_files) @parameterized.parameterized.expand(['/b/build/', 'build/', '']) - def test_possible_dependencies_archive_without_normalized_path( + def test_possible_dependencies_deps_without_normalized_path_legacy( self, dir_prefix): """Tests that the chrome build handler correctly handles mixed-up normalized and not normalized path.""" - deps_files = self._generate_possible_fuzzer_dependencies('', 'my_fuzzer') - needed_files = self._generate_possible_fuzzer_dependencies( - dir_prefix, 'my_fuzzer') - self._add_files_to_archive(needed_files) - - # we want our runtime_deps to have normalized path so that they do not - # exactly match the archive paths. - self._generate_runtime_deps(deps_files) - self._declare_fuzzers(['my_fuzzer']) - to_extract = self.build.get_target_dependencies('my_fuzzer') - to_extract = [f.name for f in to_extract] - self.assertCountEqual(to_extract, needed_files) - - @parameterized.parameterized.expand(['/b/build/', 'build/', '']) - def test_possible_dependencies_deps_without_normalized_path(self, dir_prefix): - """Tests that the chrome build handler correctly handles mixed-up - normalized and not normalized path.""" - deps_files = self._generate_possible_fuzzer_dependencies('', 'my_fuzzer') - needed_files = self._generate_possible_fuzzer_dependencies( + deps_files = self._generate_possible_fuzzer_dependencies_legacy( + '', 'my_fuzzer') + needed_files = self._generate_possible_fuzzer_dependencies_legacy( dir_prefix, 'my_fuzzer') self._add_files_to_archive([os.path.normpath(f) for f in needed_files]) self._generate_runtime_deps(deps_files) @@ -237,13 +273,14 @@ def test_possible_dependencies_deps_without_normalized_path(self, dir_prefix): [os.path.normpath(f) for f in needed_files]) @parameterized.parameterized.expand(['/b/build/', 'build/', '']) - def test_other_fuzzer_not_extracted(self, dir_prefix): + def test_other_fuzzer_not_extracted_legacy(self, dir_prefix): """Tests that the chrome build handler only unpacks dependencies for the requested fuzzer, even if other fuzzers exist in the build.""" - deps_files = self._generate_possible_fuzzer_dependencies('', 'my_fuzzer') - needed_files = self._generate_possible_fuzzer_dependencies( + deps_files = self._generate_possible_fuzzer_dependencies_legacy( + '', 'my_fuzzer') + needed_files = self._generate_possible_fuzzer_dependencies_legacy( dir_prefix, 'my_fuzzer') - other_fuzzer = self._generate_possible_fuzzer_dependencies( + other_fuzzer = self._generate_possible_fuzzer_dependencies_legacy( dir_prefix, 'other_fuzzer') self._add_files_to_archive(list(set(needed_files + other_fuzzer))) self._generate_runtime_deps(deps_files) @@ -253,10 +290,10 @@ def test_other_fuzzer_not_extracted(self, dir_prefix): self.assertCountEqual(to_extract, needed_files) @parameterized.parameterized.expand(['/b/build/', 'build/', '']) - def test_dsyms_are_correctly_unpacked(self, dir_prefix): + def test_dsyms_are_correctly_unpacked_legacy(self, dir_prefix): """Tests that even if not listed in the runtime deps, dSYMs are correctly unpacked. """ - needed_files = self._generate_possible_fuzzer_dependencies( + needed_files = self._generate_possible_fuzzer_dependencies_legacy( dir_prefix, 'my_fuzzer') self._add_files_to_archive(needed_files) self._generate_runtime_deps(['my_fuzzer']) @@ -265,3 +302,110 @@ def test_dsyms_are_correctly_unpacked(self, dir_prefix): dsym_path = os.path.join( dir_prefix, 'my_fuzzer.dSYM/Contents/Resources/DWARF/some_dependency') self.assertIn(dsym_path, to_extract) + + def test_possible_dependencies(self): + """Tests that all the necessary dependencies are correctly extracted from + the runtime_deps file. + + Under archive schema version 1, dependency paths in `runtime_deps` files + are interpreted as being relative to the file itself, meaning that they must + be normalized to the equivalent path relative to the archive root before + they can be extracted. + """ + self._set_archive_schema_version(1) + deps_entries = self._generate_possible_fuzzer_dependencies('my_fuzzer') + deps_files = self._resolve_relative_dependency_paths(deps_entries) + self._add_files_to_archive(deps_files) + self._generate_runtime_deps(deps_entries) + self._declare_fuzzers(['my_fuzzer']) + to_extract = self.build.get_target_dependencies('my_fuzzer') + to_extract = [f.name for f in to_extract] + self.assertCountEqual(to_extract, deps_files) + + def test_other_fuzzer_not_extracted(self): + """Tests that the chrome build handler only unpacks dependencies for the + requested fuzzer, even if other fuzzers exist in the build.""" + self._set_archive_schema_version(1) + deps_entries = self._generate_possible_fuzzer_dependencies('my_fuzzer') + needed_files = self._resolve_relative_dependency_paths(deps_entries) + other_fuzzer = self._resolve_relative_dependency_paths( + self._generate_possible_fuzzer_dependencies('other_fuzzer')) + self._add_files_to_archive(list(set(needed_files + other_fuzzer))) + self._generate_runtime_deps(deps_entries) + self._declare_fuzzers(['my_fuzzer', 'other_fuzzer']) + to_extract = self.build.get_target_dependencies('my_fuzzer') + to_extract = [f.name for f in to_extract] + self.assertCountEqual(to_extract, needed_files) + + def test_dsyms_are_correctly_unpacked(self): + """Tests that even if not listed in the runtime deps, dSYMs are correctly + unpacked.""" + self._set_archive_schema_version(1) + needed_files = self._resolve_relative_dependency_paths( + self._generate_possible_fuzzer_dependencies('my_fuzzer')) + self._add_files_to_archive(needed_files) + self._generate_runtime_deps(['my_fuzzer']) + to_extract = self.build.get_target_dependencies('my_fuzzer') + to_extract = [f.name for f in to_extract] + self.assertIn( + 'out/build/my_fuzzer.dSYM/Contents/Resources/DWARF/some_dependency', + to_extract) + + +class ChromeBuildArchiveManifestTest(unittest.TestCase): + """Test for reading clusterfuzz_manifest.json for Chrome archives.""" + + def setUp(self): + test_helpers.patch(self, [ + 'clusterfuzz._internal.system.archive.ArchiveReader.file_exists', + 'clusterfuzz._internal.system.archive.ArchiveReader', + 'clusterfuzz._internal.system.archive.open', + ]) + self.mock.file_exists.return_value = False + + def _generate_manifest(self, archive_schema_version): + """Mocks open calls so that they return a buffer containing valid JSON for + the given archive schema version.""" + + def _mock_open(_): + buffer = io.BytesIO(b'') + buffer.write( + json.dumps({ + 'archive_schema_version': archive_schema_version + }).encode()) + buffer.seek(0) + return buffer + + self.mock.open.return_value.open.side_effect = _mock_open + + def _generate_invalid_manifest(self): + """Mocks open calls so that they return a buffer containing invalid contents + for clusterfuzz_manifest.json.""" + + def _mock_open(_): + buffer = io.BytesIO(b'') + buffer.write(json.dumps({'my_field': 1}).encode()) + buffer.seek(0) + return buffer + + self.mock.open.return_value.open.side_effect = _mock_open + + def test_manifest_is_correctly_read(self): + """Tests that the manifest is correctly read and used to set the archive + schema version if it exists and that the cases of a missing or invalid + manifest are handled correctly.""" + + # No manifest exists; should default to archive schema version 0 (legacy). + test_archive = build_archive.ChromeBuildArchive(self.mock.open.return_value) + self.assertEqual(test_archive.archive_schema_version(), 0) + + # Invalid manifest; should default to version 0. + self.mock.file_exists.return_value = True + self._generate_invalid_manifest() + test_archive = build_archive.ChromeBuildArchive(self.mock.open.return_value) + self.assertEqual(test_archive.archive_schema_version(), 0) + + # Valid manifest. + self._generate_manifest(1) + test_archive = build_archive.ChromeBuildArchive(self.mock.open.return_value) + self.assertEqual(test_archive.archive_schema_version(), 1)