From c8ee75bd6331b41bc792bb04a2f2e754529a2ed5 Mon Sep 17 00:00:00 2001 From: Lalatendu Mohanty Date: Fri, 4 Jul 2025 23:36:34 +0530 Subject: [PATCH] Replacing the Python email library parser with packaging.metadata Replaces the Python email library parser with packaging.metadata.Metadata for parsing wheel/package metadata. Fixes: #561 Signed-off-by: Lalatendu Mohanty --- pyproject.toml | 1 - src/fromager/bootstrapper.py | 7 +--- src/fromager/candidate.py | 35 +++++++---------- src/fromager/dependencies.py | 65 +++++++++++++++++++++++++----- tests/test_dependencies.py | 76 ++++++++++++++++++++++++++++++++++++ tests/test_pep658_support.py | 9 +++-- 6 files changed, 153 insertions(+), 40 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2356d2d4..322355de 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,6 @@ dependencies = [ "elfdeps>=0.2.0", "license-expression", "packaging", - "pkginfo", "psutil", "pydantic", "pypi_simple", diff --git a/src/fromager/bootstrapper.py b/src/fromager/bootstrapper.py index 2516fdca..9bdf837d 100644 --- a/src/fromager/bootstrapper.py +++ b/src/fromager/bootstrapper.py @@ -12,7 +12,6 @@ import tempfile import typing import zipfile -from email.parser import BytesParser from urllib.parse import urlparse from packaging.requirements import Requirement @@ -1242,10 +1241,8 @@ def _get_version_from_package_metadata( config_settings=pbi.config_settings, ) metadata_filename = source_dir.parent / metadata_dir_base / "METADATA" - with open(metadata_filename, "rb") as f: - p = BytesParser() - metadata = p.parse(f, headersonly=True) - return Version(metadata["Version"]) + metadata = dependencies.parse_metadata(metadata_filename) + return metadata.version def _resolve_prebuilt_with_history( self, diff --git a/src/fromager/candidate.py b/src/fromager/candidate.py index 09a4658e..4c541fe4 100644 --- a/src/fromager/candidate.py +++ b/src/fromager/candidate.py @@ -2,27 +2,19 @@ import datetime import logging import typing -from email.message import EmailMessage, Message -from email.parser import BytesParser from io import BytesIO -from typing import TYPE_CHECKING from zipfile import ZipFile +from packaging.metadata import Metadata from packaging.requirements import Requirement from packaging.utils import BuildTag, canonicalize_name from packaging.version import Version +from . import dependencies from .request_session import session logger = logging.getLogger(__name__) -# fix for runtime errors caused by inheriting classes that are generic in stubs but not runtime -# https://mypy.readthedocs.io/en/latest/runtime_troubles.html#using-classes-that-are-generic-in-stubs-but-not-at-runtime -if TYPE_CHECKING: - Metadata = Message[str, str] -else: - Metadata = Message - @dataclasses.dataclass(frozen=True, order=True, slots=True, repr=False, kw_only=True) class Candidate: @@ -73,11 +65,10 @@ def metadata(self) -> Metadata: return self._metadata def _get_dependencies(self) -> typing.Iterable[Requirement]: - deps = self.metadata.get_all("Requires-Dist", []) + deps = self.metadata.requires_dist or [] extras = self.extras if self.extras else [""] - for d in deps: - r = Requirement(d) + for r in deps: if r.marker is None: yield r else: @@ -95,7 +86,8 @@ def dependencies(self) -> list[Requirement]: @property def requires_python(self) -> str | None: - return self.metadata.get("Requires-Python") + spec = self.metadata.requires_python + return str(spec) if spec is not None else None def get_metadata_for_wheel(url: str, metadata_url: str | None = None) -> Metadata: @@ -107,7 +99,7 @@ def get_metadata_for_wheel(url: str, metadata_url: str | None = None) -> Metadat metadata_url: Optional URL of the metadata file (PEP 658) Returns: - Parsed metadata as a Message object + Parsed metadata as a Metadata object """ # Try PEP 658 metadata endpoint first if available if metadata_url: @@ -119,8 +111,8 @@ def get_metadata_for_wheel(url: str, metadata_url: str | None = None) -> Metadat response.raise_for_status() # Parse metadata directly from the response content - p = BytesParser() - metadata = p.parse(BytesIO(response.content), headersonly=True) + # validate=False for backwards compatibility with previous BytesParser + metadata = dependencies.parse_metadata(response.content, validate=False) logger.debug(f"Successfully retrieved metadata via PEP 658 for {url}") return metadata @@ -136,8 +128,9 @@ def get_metadata_for_wheel(url: str, metadata_url: str | None = None) -> Metadat with ZipFile(BytesIO(data)) as z: for n in z.namelist(): if n.endswith(".dist-info/METADATA"): - p = BytesParser() - return p.parse(z.open(n), headersonly=True) + metadata_content = z.read(n) + # validate=False for backwards compatibility with previous BytesParser + return dependencies.parse_metadata(metadata_content, validate=False) - # If we didn't find the metadata, return an empty dict - return EmailMessage() + # If we didn't find the metadata, raise an error + raise ValueError(f"Could not find METADATA file in wheel: {url}") diff --git a/src/fromager/dependencies.py b/src/fromager/dependencies.py index bcc5932f..61a5a2da 100644 --- a/src/fromager/dependencies.py +++ b/src/fromager/dependencies.py @@ -6,8 +6,8 @@ import pathlib import tempfile import typing +import zipfile -import pkginfo import pyproject_hooks import tomlkit from packaging.metadata import Metadata @@ -344,14 +344,23 @@ def default_get_install_dependencies_of_sdist( return set(metadata.requires_dist) -def parse_metadata(metadata_file: pathlib.Path, *, validate: bool = True) -> Metadata: - """Parse a dist-info/METADATA file +def parse_metadata( + metadata_source: pathlib.Path | bytes, *, validate: bool = True +) -> Metadata: + """Parse metadata from a file path or bytes. + + Args: + metadata_source: Path to METADATA file or bytes containing metadata + validate: Whether to validate metadata (default: True) - The default parse mode is 'strict'. It even fails for a mismatch of field - and core metadata version, e.g. a package with metadata 2.2 and - license-expression field (added in 2.4). + Returns: + Parsed Metadata object """ - return Metadata.from_email(metadata_file.read_bytes(), validate=validate) + if isinstance(metadata_source, pathlib.Path): + metadata_bytes = metadata_source.read_bytes() + else: + metadata_bytes = metadata_source + return Metadata.from_email(metadata_bytes, validate=validate) def pep517_metadata_of_sdist( @@ -418,9 +427,23 @@ def validate_dist_name_version( def get_install_dependencies_of_wheel( req: Requirement, wheel_filename: pathlib.Path, requirements_file_dir: pathlib.Path ) -> set[Requirement]: + """Get install dependencies from a wheel file. + + Extracts and parses the METADATA file from the wheel to get the + Requires-Dist entries. + + Args: + req: The requirement being processed + wheel_filename: Path to the wheel file + requirements_file_dir: Directory to write the requirements file + + Returns: + Set of requirements from the wheel's metadata + """ logger.info(f"getting installation dependencies from {wheel_filename}") - wheel = pkginfo.Wheel(str(wheel_filename)) - deps = _filter_requirements(req, wheel.requires_dist) + metadata = _get_metadata_from_wheel(wheel_filename) + requires_dist = metadata.requires_dist or [] + deps = _filter_requirements(req, requires_dist) _write_requirements_file( deps, requirements_file_dir / INSTALL_REQ_FILE_NAME, @@ -428,6 +451,30 @@ def get_install_dependencies_of_wheel( return deps +def _get_metadata_from_wheel( + wheel_filename: pathlib.Path, *, validate: bool = False +) -> Metadata: + """Extract and parse METADATA from a wheel file. + + Args: + wheel_filename: Path to the wheel file + validate: Whether to validate metadata (default: False for backwards + compatibility with the previous pkginfo-based implementation) + + Returns: + Parsed Metadata object + + Raises: + ValueError: If no METADATA file is found in the wheel + """ + with zipfile.ZipFile(wheel_filename) as whl: + for name in whl.namelist(): + if name.endswith(".dist-info/METADATA"): + metadata_content = whl.read(name) + return parse_metadata(metadata_content, validate=validate) + raise ValueError(f"Could not find METADATA file in wheel: {wheel_filename}") + + def get_pyproject_contents(sdist_root_dir: pathlib.Path) -> dict[str, typing.Any]: pyproject_toml_filename = sdist_root_dir / "pyproject.toml" if not os.path.exists(pyproject_toml_filename): diff --git a/tests/test_dependencies.py b/tests/test_dependencies.py index cbc47485..a8312364 100644 --- a/tests/test_dependencies.py +++ b/tests/test_dependencies.py @@ -4,6 +4,7 @@ import shutil import textwrap import typing +import zipfile from unittest.mock import Mock, patch import pytest @@ -308,3 +309,78 @@ def test_validate_dist_name_version( else: with pytest.raises(exc): validate() + + +def test_get_install_dependencies_of_wheel(tmp_path: pathlib.Path) -> None: + """Test extracting install dependencies from a wheel file.""" + # Arrange: Create a minimal wheel file with dependencies + wheel_file = tmp_path / "test_pkg-1.0.0-py3-none-any.whl" + metadata_content = textwrap.dedent( + """\ + Metadata-Version: 2.3 + Name: test_pkg + Version: 1.0.0 + Author-email: Test Author + Requires-Dist: requests>=2.0 + Requires-Dist: urllib3 + Requires-Dist: pytest; extra == "test" + """ + ) + with zipfile.ZipFile(wheel_file, "w") as zf: + zf.writestr("test_pkg/__init__.py", "") + zf.writestr("test_pkg-1.0.0.dist-info/METADATA", metadata_content) + zf.writestr( + "test_pkg-1.0.0.dist-info/WHEEL", + "Wheel-Version: 1.0\nRoot-Is-Purelib: true\nTag: py3-none-any\n", + ) + zf.writestr("test_pkg-1.0.0.dist-info/RECORD", "") + + req = Requirement("test_pkg") + + # Act + result = dependencies.get_install_dependencies_of_wheel(req, wheel_file, tmp_path) + + # Assert: Should get requests and urllib3, but not pytest (extra not requested) + result_names = {r.name for r in result} + assert result_names == {"requests", "urllib3"} + + +def test_get_install_dependencies_of_wheel_no_deps(tmp_path: pathlib.Path) -> None: + """Test extracting dependencies from a wheel with no dependencies.""" + # Arrange: Create a wheel file without dependencies + wheel_file = tmp_path / "simple_pkg-1.0.0-py3-none-any.whl" + metadata_content = textwrap.dedent( + """\ + Metadata-Version: 2.3 + Name: simple_pkg + Version: 1.0.0 + """ + ) + with zipfile.ZipFile(wheel_file, "w") as zf: + zf.writestr("simple_pkg/__init__.py", "") + zf.writestr("simple_pkg-1.0.0.dist-info/METADATA", metadata_content) + zf.writestr( + "simple_pkg-1.0.0.dist-info/WHEEL", + "Wheel-Version: 1.0\nRoot-Is-Purelib: true\nTag: py3-none-any\n", + ) + zf.writestr("simple_pkg-1.0.0.dist-info/RECORD", "") + + req = Requirement("simple_pkg") + + # Act + result = dependencies.get_install_dependencies_of_wheel(req, wheel_file, tmp_path) + + # Assert + assert result == set() + + +def test_get_metadata_from_wheel_missing_metadata(tmp_path: pathlib.Path) -> None: + """Test that missing METADATA file raises ValueError.""" + # Arrange: Create a wheel file without METADATA + wheel_file = tmp_path / "broken_pkg-1.0.0-py3-none-any.whl" + with zipfile.ZipFile(wheel_file, "w") as zf: + zf.writestr("broken_pkg/__init__.py", "") + + # Act & Assert + with pytest.raises(ValueError, match="Could not find METADATA file"): + dependencies._get_metadata_from_wheel(wheel_file) diff --git a/tests/test_pep658_support.py b/tests/test_pep658_support.py index 0bbaf77c..3af992ce 100644 --- a/tests/test_pep658_support.py +++ b/tests/test_pep658_support.py @@ -57,10 +57,11 @@ def test_get_metadata_with_pep658_success(self, mock_session: typing.Any) -> Non metadata = get_metadata_for_wheel(wheel_url, metadata_url) # Verify the metadata was parsed correctly - assert metadata["Name"] == "test-package" - assert metadata["Version"] == "1.0.0" - assert metadata["Summary"] == "A test package" - assert "requests >= 2.0.0" in metadata.get_all("Requires-Dist", []) + assert metadata.name == "test-package" + assert str(metadata.version) == "1.0.0" + assert metadata.summary == "A test package" + assert metadata.requires_dist is not None + assert any(str(req) == "requests>=2.0.0" for req in metadata.requires_dist) # Verify only the metadata URL was called, not the wheel URL mock_session.get.assert_called_once_with(metadata_url)