gmake_python_helpers

Helpers for using Python venv/pip-tools/black/pylama/... with GNU make
git clone https://ccx.te2000.cz/git/gmake_python_helpers
Log | Files | Refs | README

metadata.py (34762B)


      1 from __future__ import annotations
      2 
      3 import email.feedparser
      4 import email.header
      5 import email.message
      6 import email.parser
      7 import email.policy
      8 import pathlib
      9 import sys
     10 import typing
     11 from typing import (
     12     Any,
     13     Callable,
     14     Generic,
     15     Literal,
     16     TypedDict,
     17     cast,
     18 )
     19 
     20 from . import licenses, requirements, specifiers, utils
     21 from . import version as version_module
     22 from .licenses import NormalizedLicenseExpression
     23 
     24 T = typing.TypeVar("T")
     25 
     26 
     27 if sys.version_info >= (3, 11):  # pragma: no cover
     28     ExceptionGroup = ExceptionGroup
     29 else:  # pragma: no cover
     30 
     31     class ExceptionGroup(Exception):
     32         """A minimal implementation of :external:exc:`ExceptionGroup` from Python 3.11.
     33 
     34         If :external:exc:`ExceptionGroup` is already defined by Python itself,
     35         that version is used instead.
     36         """
     37 
     38         message: str
     39         exceptions: list[Exception]
     40 
     41         def __init__(self, message: str, exceptions: list[Exception]) -> None:
     42             self.message = message
     43             self.exceptions = exceptions
     44 
     45         def __repr__(self) -> str:
     46             return f"{self.__class__.__name__}({self.message!r}, {self.exceptions!r})"
     47 
     48 
     49 class InvalidMetadata(ValueError):
     50     """A metadata field contains invalid data."""
     51 
     52     field: str
     53     """The name of the field that contains invalid data."""
     54 
     55     def __init__(self, field: str, message: str) -> None:
     56         self.field = field
     57         super().__init__(message)
     58 
     59 
     60 # The RawMetadata class attempts to make as few assumptions about the underlying
     61 # serialization formats as possible. The idea is that as long as a serialization
     62 # formats offer some very basic primitives in *some* way then we can support
     63 # serializing to and from that format.
     64 class RawMetadata(TypedDict, total=False):
     65     """A dictionary of raw core metadata.
     66 
     67     Each field in core metadata maps to a key of this dictionary (when data is
     68     provided). The key is lower-case and underscores are used instead of dashes
     69     compared to the equivalent core metadata field. Any core metadata field that
     70     can be specified multiple times or can hold multiple values in a single
     71     field have a key with a plural name. See :class:`Metadata` whose attributes
     72     match the keys of this dictionary.
     73 
     74     Core metadata fields that can be specified multiple times are stored as a
     75     list or dict depending on which is appropriate for the field. Any fields
     76     which hold multiple values in a single field are stored as a list.
     77 
     78     """
     79 
     80     # Metadata 1.0 - PEP 241
     81     metadata_version: str
     82     name: str
     83     version: str
     84     platforms: list[str]
     85     summary: str
     86     description: str
     87     keywords: list[str]
     88     home_page: str
     89     author: str
     90     author_email: str
     91     license: str
     92 
     93     # Metadata 1.1 - PEP 314
     94     supported_platforms: list[str]
     95     download_url: str
     96     classifiers: list[str]
     97     requires: list[str]
     98     provides: list[str]
     99     obsoletes: list[str]
    100 
    101     # Metadata 1.2 - PEP 345
    102     maintainer: str
    103     maintainer_email: str
    104     requires_dist: list[str]
    105     provides_dist: list[str]
    106     obsoletes_dist: list[str]
    107     requires_python: str
    108     requires_external: list[str]
    109     project_urls: dict[str, str]
    110 
    111     # Metadata 2.0
    112     # PEP 426 attempted to completely revamp the metadata format
    113     # but got stuck without ever being able to build consensus on
    114     # it and ultimately ended up withdrawn.
    115     #
    116     # However, a number of tools had started emitting METADATA with
    117     # `2.0` Metadata-Version, so for historical reasons, this version
    118     # was skipped.
    119 
    120     # Metadata 2.1 - PEP 566
    121     description_content_type: str
    122     provides_extra: list[str]
    123 
    124     # Metadata 2.2 - PEP 643
    125     dynamic: list[str]
    126 
    127     # Metadata 2.3 - PEP 685
    128     # No new fields were added in PEP 685, just some edge case were
    129     # tightened up to provide better interoptability.
    130 
    131     # Metadata 2.4 - PEP 639
    132     license_expression: str
    133     license_files: list[str]
    134 
    135 
    136 _STRING_FIELDS = {
    137     "author",
    138     "author_email",
    139     "description",
    140     "description_content_type",
    141     "download_url",
    142     "home_page",
    143     "license",
    144     "license_expression",
    145     "maintainer",
    146     "maintainer_email",
    147     "metadata_version",
    148     "name",
    149     "requires_python",
    150     "summary",
    151     "version",
    152 }
    153 
    154 _LIST_FIELDS = {
    155     "classifiers",
    156     "dynamic",
    157     "license_files",
    158     "obsoletes",
    159     "obsoletes_dist",
    160     "platforms",
    161     "provides",
    162     "provides_dist",
    163     "provides_extra",
    164     "requires",
    165     "requires_dist",
    166     "requires_external",
    167     "supported_platforms",
    168 }
    169 
    170 _DICT_FIELDS = {
    171     "project_urls",
    172 }
    173 
    174 
    175 def _parse_keywords(data: str) -> list[str]:
    176     """Split a string of comma-separated keywords into a list of keywords."""
    177     return [k.strip() for k in data.split(",")]
    178 
    179 
    180 def _parse_project_urls(data: list[str]) -> dict[str, str]:
    181     """Parse a list of label/URL string pairings separated by a comma."""
    182     urls = {}
    183     for pair in data:
    184         # Our logic is slightly tricky here as we want to try and do
    185         # *something* reasonable with malformed data.
    186         #
    187         # The main thing that we have to worry about, is data that does
    188         # not have a ',' at all to split the label from the Value. There
    189         # isn't a singular right answer here, and we will fail validation
    190         # later on (if the caller is validating) so it doesn't *really*
    191         # matter, but since the missing value has to be an empty str
    192         # and our return value is dict[str, str], if we let the key
    193         # be the missing value, then they'd have multiple '' values that
    194         # overwrite each other in a accumulating dict.
    195         #
    196         # The other potentional issue is that it's possible to have the
    197         # same label multiple times in the metadata, with no solid "right"
    198         # answer with what to do in that case. As such, we'll do the only
    199         # thing we can, which is treat the field as unparseable and add it
    200         # to our list of unparsed fields.
    201         parts = [p.strip() for p in pair.split(",", 1)]
    202         parts.extend([""] * (max(0, 2 - len(parts))))  # Ensure 2 items
    203 
    204         # TODO: The spec doesn't say anything about if the keys should be
    205         #       considered case sensitive or not... logically they should
    206         #       be case-preserving and case-insensitive, but doing that
    207         #       would open up more cases where we might have duplicate
    208         #       entries.
    209         label, url = parts
    210         if label in urls:
    211             # The label already exists in our set of urls, so this field
    212             # is unparseable, and we can just add the whole thing to our
    213             # unparseable data and stop processing it.
    214             raise KeyError("duplicate labels in project urls")
    215         urls[label] = url
    216 
    217     return urls
    218 
    219 
    220 def _get_payload(msg: email.message.Message, source: bytes | str) -> str:
    221     """Get the body of the message."""
    222     # If our source is a str, then our caller has managed encodings for us,
    223     # and we don't need to deal with it.
    224     if isinstance(source, str):
    225         payload = msg.get_payload()
    226         assert isinstance(payload, str)
    227         return payload
    228     # If our source is a bytes, then we're managing the encoding and we need
    229     # to deal with it.
    230     else:
    231         bpayload = msg.get_payload(decode=True)
    232         assert isinstance(bpayload, bytes)
    233         try:
    234             return bpayload.decode("utf8", "strict")
    235         except UnicodeDecodeError as exc:
    236             raise ValueError("payload in an invalid encoding") from exc
    237 
    238 
    239 # The various parse_FORMAT functions here are intended to be as lenient as
    240 # possible in their parsing, while still returning a correctly typed
    241 # RawMetadata.
    242 #
    243 # To aid in this, we also generally want to do as little touching of the
    244 # data as possible, except where there are possibly some historic holdovers
    245 # that make valid data awkward to work with.
    246 #
    247 # While this is a lower level, intermediate format than our ``Metadata``
    248 # class, some light touch ups can make a massive difference in usability.
    249 
    250 # Map METADATA fields to RawMetadata.
    251 _EMAIL_TO_RAW_MAPPING = {
    252     "author": "author",
    253     "author-email": "author_email",
    254     "classifier": "classifiers",
    255     "description": "description",
    256     "description-content-type": "description_content_type",
    257     "download-url": "download_url",
    258     "dynamic": "dynamic",
    259     "home-page": "home_page",
    260     "keywords": "keywords",
    261     "license": "license",
    262     "license-expression": "license_expression",
    263     "license-file": "license_files",
    264     "maintainer": "maintainer",
    265     "maintainer-email": "maintainer_email",
    266     "metadata-version": "metadata_version",
    267     "name": "name",
    268     "obsoletes": "obsoletes",
    269     "obsoletes-dist": "obsoletes_dist",
    270     "platform": "platforms",
    271     "project-url": "project_urls",
    272     "provides": "provides",
    273     "provides-dist": "provides_dist",
    274     "provides-extra": "provides_extra",
    275     "requires": "requires",
    276     "requires-dist": "requires_dist",
    277     "requires-external": "requires_external",
    278     "requires-python": "requires_python",
    279     "summary": "summary",
    280     "supported-platform": "supported_platforms",
    281     "version": "version",
    282 }
    283 _RAW_TO_EMAIL_MAPPING = {raw: email for email, raw in _EMAIL_TO_RAW_MAPPING.items()}
    284 
    285 
    286 def parse_email(data: bytes | str) -> tuple[RawMetadata, dict[str, list[str]]]:
    287     """Parse a distribution's metadata stored as email headers (e.g. from ``METADATA``).
    288 
    289     This function returns a two-item tuple of dicts. The first dict is of
    290     recognized fields from the core metadata specification. Fields that can be
    291     parsed and translated into Python's built-in types are converted
    292     appropriately. All other fields are left as-is. Fields that are allowed to
    293     appear multiple times are stored as lists.
    294 
    295     The second dict contains all other fields from the metadata. This includes
    296     any unrecognized fields. It also includes any fields which are expected to
    297     be parsed into a built-in type but were not formatted appropriately. Finally,
    298     any fields that are expected to appear only once but are repeated are
    299     included in this dict.
    300 
    301     """
    302     raw: dict[str, str | list[str] | dict[str, str]] = {}
    303     unparsed: dict[str, list[str]] = {}
    304 
    305     if isinstance(data, str):
    306         parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data)
    307     else:
    308         parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data)
    309 
    310     # We have to wrap parsed.keys() in a set, because in the case of multiple
    311     # values for a key (a list), the key will appear multiple times in the
    312     # list of keys, but we're avoiding that by using get_all().
    313     for name in frozenset(parsed.keys()):
    314         # Header names in RFC are case insensitive, so we'll normalize to all
    315         # lower case to make comparisons easier.
    316         name = name.lower()
    317 
    318         # We use get_all() here, even for fields that aren't multiple use,
    319         # because otherwise someone could have e.g. two Name fields, and we
    320         # would just silently ignore it rather than doing something about it.
    321         headers = parsed.get_all(name) or []
    322 
    323         # The way the email module works when parsing bytes is that it
    324         # unconditionally decodes the bytes as ascii using the surrogateescape
    325         # handler. When you pull that data back out (such as with get_all() ),
    326         # it looks to see if the str has any surrogate escapes, and if it does
    327         # it wraps it in a Header object instead of returning the string.
    328         #
    329         # As such, we'll look for those Header objects, and fix up the encoding.
    330         value = []
    331         # Flag if we have run into any issues processing the headers, thus
    332         # signalling that the data belongs in 'unparsed'.
    333         valid_encoding = True
    334         for h in headers:
    335             # It's unclear if this can return more types than just a Header or
    336             # a str, so we'll just assert here to make sure.
    337             assert isinstance(h, (email.header.Header, str))
    338 
    339             # If it's a header object, we need to do our little dance to get
    340             # the real data out of it. In cases where there is invalid data
    341             # we're going to end up with mojibake, but there's no obvious, good
    342             # way around that without reimplementing parts of the Header object
    343             # ourselves.
    344             #
    345             # That should be fine since, if mojibacked happens, this key is
    346             # going into the unparsed dict anyways.
    347             if isinstance(h, email.header.Header):
    348                 # The Header object stores it's data as chunks, and each chunk
    349                 # can be independently encoded, so we'll need to check each
    350                 # of them.
    351                 chunks: list[tuple[bytes, str | None]] = []
    352                 for bin, encoding in email.header.decode_header(h):
    353                     try:
    354                         bin.decode("utf8", "strict")
    355                     except UnicodeDecodeError:
    356                         # Enable mojibake.
    357                         encoding = "latin1"
    358                         valid_encoding = False
    359                     else:
    360                         encoding = "utf8"
    361                     chunks.append((bin, encoding))
    362 
    363                 # Turn our chunks back into a Header object, then let that
    364                 # Header object do the right thing to turn them into a
    365                 # string for us.
    366                 value.append(str(email.header.make_header(chunks)))
    367             # This is already a string, so just add it.
    368             else:
    369                 value.append(h)
    370 
    371         # We've processed all of our values to get them into a list of str,
    372         # but we may have mojibake data, in which case this is an unparsed
    373         # field.
    374         if not valid_encoding:
    375             unparsed[name] = value
    376             continue
    377 
    378         raw_name = _EMAIL_TO_RAW_MAPPING.get(name)
    379         if raw_name is None:
    380             # This is a bit of a weird situation, we've encountered a key that
    381             # we don't know what it means, so we don't know whether it's meant
    382             # to be a list or not.
    383             #
    384             # Since we can't really tell one way or another, we'll just leave it
    385             # as a list, even though it may be a single item list, because that's
    386             # what makes the most sense for email headers.
    387             unparsed[name] = value
    388             continue
    389 
    390         # If this is one of our string fields, then we'll check to see if our
    391         # value is a list of a single item. If it is then we'll assume that
    392         # it was emitted as a single string, and unwrap the str from inside
    393         # the list.
    394         #
    395         # If it's any other kind of data, then we haven't the faintest clue
    396         # what we should parse it as, and we have to just add it to our list
    397         # of unparsed stuff.
    398         if raw_name in _STRING_FIELDS and len(value) == 1:
    399             raw[raw_name] = value[0]
    400         # If this is one of our list of string fields, then we can just assign
    401         # the value, since email *only* has strings, and our get_all() call
    402         # above ensures that this is a list.
    403         elif raw_name in _LIST_FIELDS:
    404             raw[raw_name] = value
    405         # Special Case: Keywords
    406         # The keywords field is implemented in the metadata spec as a str,
    407         # but it conceptually is a list of strings, and is serialized using
    408         # ", ".join(keywords), so we'll do some light data massaging to turn
    409         # this into what it logically is.
    410         elif raw_name == "keywords" and len(value) == 1:
    411             raw[raw_name] = _parse_keywords(value[0])
    412         # Special Case: Project-URL
    413         # The project urls is implemented in the metadata spec as a list of
    414         # specially-formatted strings that represent a key and a value, which
    415         # is fundamentally a mapping, however the email format doesn't support
    416         # mappings in a sane way, so it was crammed into a list of strings
    417         # instead.
    418         #
    419         # We will do a little light data massaging to turn this into a map as
    420         # it logically should be.
    421         elif raw_name == "project_urls":
    422             try:
    423                 raw[raw_name] = _parse_project_urls(value)
    424             except KeyError:
    425                 unparsed[name] = value
    426         # Nothing that we've done has managed to parse this, so it'll just
    427         # throw it in our unparseable data and move on.
    428         else:
    429             unparsed[name] = value
    430 
    431     # We need to support getting the Description from the message payload in
    432     # addition to getting it from the the headers. This does mean, though, there
    433     # is the possibility of it being set both ways, in which case we put both
    434     # in 'unparsed' since we don't know which is right.
    435     try:
    436         payload = _get_payload(parsed, data)
    437     except ValueError:
    438         unparsed.setdefault("description", []).append(
    439             parsed.get_payload(decode=isinstance(data, bytes))  # type: ignore[call-overload]
    440         )
    441     else:
    442         if payload:
    443             # Check to see if we've already got a description, if so then both
    444             # it, and this body move to unparseable.
    445             if "description" in raw:
    446                 description_header = cast(str, raw.pop("description"))
    447                 unparsed.setdefault("description", []).extend(
    448                     [description_header, payload]
    449                 )
    450             elif "description" in unparsed:
    451                 unparsed["description"].append(payload)
    452             else:
    453                 raw["description"] = payload
    454 
    455     # We need to cast our `raw` to a metadata, because a TypedDict only support
    456     # literal key names, but we're computing our key names on purpose, but the
    457     # way this function is implemented, our `TypedDict` can only have valid key
    458     # names.
    459     return cast(RawMetadata, raw), unparsed
    460 
    461 
    462 _NOT_FOUND = object()
    463 
    464 
    465 # Keep the two values in sync.
    466 _VALID_METADATA_VERSIONS = ["1.0", "1.1", "1.2", "2.1", "2.2", "2.3", "2.4"]
    467 _MetadataVersion = Literal["1.0", "1.1", "1.2", "2.1", "2.2", "2.3", "2.4"]
    468 
    469 _REQUIRED_ATTRS = frozenset(["metadata_version", "name", "version"])
    470 
    471 
    472 class _Validator(Generic[T]):
    473     """Validate a metadata field.
    474 
    475     All _process_*() methods correspond to a core metadata field. The method is
    476     called with the field's raw value. If the raw value is valid it is returned
    477     in its "enriched" form (e.g. ``version.Version`` for the ``Version`` field).
    478     If the raw value is invalid, :exc:`InvalidMetadata` is raised (with a cause
    479     as appropriate).
    480     """
    481 
    482     name: str
    483     raw_name: str
    484     added: _MetadataVersion
    485 
    486     def __init__(
    487         self,
    488         *,
    489         added: _MetadataVersion = "1.0",
    490     ) -> None:
    491         self.added = added
    492 
    493     def __set_name__(self, _owner: Metadata, name: str) -> None:
    494         self.name = name
    495         self.raw_name = _RAW_TO_EMAIL_MAPPING[name]
    496 
    497     def __get__(self, instance: Metadata, _owner: type[Metadata]) -> T:
    498         # With Python 3.8, the caching can be replaced with functools.cached_property().
    499         # No need to check the cache as attribute lookup will resolve into the
    500         # instance's __dict__ before __get__ is called.
    501         cache = instance.__dict__
    502         value = instance._raw.get(self.name)
    503 
    504         # To make the _process_* methods easier, we'll check if the value is None
    505         # and if this field is NOT a required attribute, and if both of those
    506         # things are true, we'll skip the the converter. This will mean that the
    507         # converters never have to deal with the None union.
    508         if self.name in _REQUIRED_ATTRS or value is not None:
    509             try:
    510                 converter: Callable[[Any], T] = getattr(self, f"_process_{self.name}")
    511             except AttributeError:
    512                 pass
    513             else:
    514                 value = converter(value)
    515 
    516         cache[self.name] = value
    517         try:
    518             del instance._raw[self.name]  # type: ignore[misc]
    519         except KeyError:
    520             pass
    521 
    522         return cast(T, value)
    523 
    524     def _invalid_metadata(
    525         self, msg: str, cause: Exception | None = None
    526     ) -> InvalidMetadata:
    527         exc = InvalidMetadata(
    528             self.raw_name, msg.format_map({"field": repr(self.raw_name)})
    529         )
    530         exc.__cause__ = cause
    531         return exc
    532 
    533     def _process_metadata_version(self, value: str) -> _MetadataVersion:
    534         # Implicitly makes Metadata-Version required.
    535         if value not in _VALID_METADATA_VERSIONS:
    536             raise self._invalid_metadata(f"{value!r} is not a valid metadata version")
    537         return cast(_MetadataVersion, value)
    538 
    539     def _process_name(self, value: str) -> str:
    540         if not value:
    541             raise self._invalid_metadata("{field} is a required field")
    542         # Validate the name as a side-effect.
    543         try:
    544             utils.canonicalize_name(value, validate=True)
    545         except utils.InvalidName as exc:
    546             raise self._invalid_metadata(
    547                 f"{value!r} is invalid for {{field}}", cause=exc
    548             ) from exc
    549         else:
    550             return value
    551 
    552     def _process_version(self, value: str) -> version_module.Version:
    553         if not value:
    554             raise self._invalid_metadata("{field} is a required field")
    555         try:
    556             return version_module.parse(value)
    557         except version_module.InvalidVersion as exc:
    558             raise self._invalid_metadata(
    559                 f"{value!r} is invalid for {{field}}", cause=exc
    560             ) from exc
    561 
    562     def _process_summary(self, value: str) -> str:
    563         """Check the field contains no newlines."""
    564         if "\n" in value:
    565             raise self._invalid_metadata("{field} must be a single line")
    566         return value
    567 
    568     def _process_description_content_type(self, value: str) -> str:
    569         content_types = {"text/plain", "text/x-rst", "text/markdown"}
    570         message = email.message.EmailMessage()
    571         message["content-type"] = value
    572 
    573         content_type, parameters = (
    574             # Defaults to `text/plain` if parsing failed.
    575             message.get_content_type().lower(),
    576             message["content-type"].params,
    577         )
    578         # Check if content-type is valid or defaulted to `text/plain` and thus was
    579         # not parseable.
    580         if content_type not in content_types or content_type not in value.lower():
    581             raise self._invalid_metadata(
    582                 f"{{field}} must be one of {list(content_types)}, not {value!r}"
    583             )
    584 
    585         charset = parameters.get("charset", "UTF-8")
    586         if charset != "UTF-8":
    587             raise self._invalid_metadata(
    588                 f"{{field}} can only specify the UTF-8 charset, not {list(charset)}"
    589             )
    590 
    591         markdown_variants = {"GFM", "CommonMark"}
    592         variant = parameters.get("variant", "GFM")  # Use an acceptable default.
    593         if content_type == "text/markdown" and variant not in markdown_variants:
    594             raise self._invalid_metadata(
    595                 f"valid Markdown variants for {{field}} are {list(markdown_variants)}, "
    596                 f"not {variant!r}",
    597             )
    598         return value
    599 
    600     def _process_dynamic(self, value: list[str]) -> list[str]:
    601         for dynamic_field in map(str.lower, value):
    602             if dynamic_field in {"name", "version", "metadata-version"}:
    603                 raise self._invalid_metadata(
    604                     f"{dynamic_field!r} is not allowed as a dynamic field"
    605                 )
    606             elif dynamic_field not in _EMAIL_TO_RAW_MAPPING:
    607                 raise self._invalid_metadata(
    608                     f"{dynamic_field!r} is not a valid dynamic field"
    609                 )
    610         return list(map(str.lower, value))
    611 
    612     def _process_provides_extra(
    613         self,
    614         value: list[str],
    615     ) -> list[utils.NormalizedName]:
    616         normalized_names = []
    617         try:
    618             for name in value:
    619                 normalized_names.append(utils.canonicalize_name(name, validate=True))
    620         except utils.InvalidName as exc:
    621             raise self._invalid_metadata(
    622                 f"{name!r} is invalid for {{field}}", cause=exc
    623             ) from exc
    624         else:
    625             return normalized_names
    626 
    627     def _process_requires_python(self, value: str) -> specifiers.SpecifierSet:
    628         try:
    629             return specifiers.SpecifierSet(value)
    630         except specifiers.InvalidSpecifier as exc:
    631             raise self._invalid_metadata(
    632                 f"{value!r} is invalid for {{field}}", cause=exc
    633             ) from exc
    634 
    635     def _process_requires_dist(
    636         self,
    637         value: list[str],
    638     ) -> list[requirements.Requirement]:
    639         reqs = []
    640         try:
    641             for req in value:
    642                 reqs.append(requirements.Requirement(req))
    643         except requirements.InvalidRequirement as exc:
    644             raise self._invalid_metadata(
    645                 f"{req!r} is invalid for {{field}}", cause=exc
    646             ) from exc
    647         else:
    648             return reqs
    649 
    650     def _process_license_expression(
    651         self, value: str
    652     ) -> NormalizedLicenseExpression | None:
    653         try:
    654             return licenses.canonicalize_license_expression(value)
    655         except ValueError as exc:
    656             raise self._invalid_metadata(
    657                 f"{value!r} is invalid for {{field}}", cause=exc
    658             ) from exc
    659 
    660     def _process_license_files(self, value: list[str]) -> list[str]:
    661         paths = []
    662         for path in value:
    663             if ".." in path:
    664                 raise self._invalid_metadata(
    665                     f"{path!r} is invalid for {{field}}, "
    666                     "parent directory indicators are not allowed"
    667                 )
    668             if "*" in path:
    669                 raise self._invalid_metadata(
    670                     f"{path!r} is invalid for {{field}}, paths must be resolved"
    671                 )
    672             if (
    673                 pathlib.PurePosixPath(path).is_absolute()
    674                 or pathlib.PureWindowsPath(path).is_absolute()
    675             ):
    676                 raise self._invalid_metadata(
    677                     f"{path!r} is invalid for {{field}}, paths must be relative"
    678                 )
    679             if pathlib.PureWindowsPath(path).as_posix() != path:
    680                 raise self._invalid_metadata(
    681                     f"{path!r} is invalid for {{field}}, "
    682                     "paths must use '/' delimiter"
    683                 )
    684             paths.append(path)
    685         return paths
    686 
    687 
    688 class Metadata:
    689     """Representation of distribution metadata.
    690 
    691     Compared to :class:`RawMetadata`, this class provides objects representing
    692     metadata fields instead of only using built-in types. Any invalid metadata
    693     will cause :exc:`InvalidMetadata` to be raised (with a
    694     :py:attr:`~BaseException.__cause__` attribute as appropriate).
    695     """
    696 
    697     _raw: RawMetadata
    698 
    699     @classmethod
    700     def from_raw(cls, data: RawMetadata, *, validate: bool = True) -> Metadata:
    701         """Create an instance from :class:`RawMetadata`.
    702 
    703         If *validate* is true, all metadata will be validated. All exceptions
    704         related to validation will be gathered and raised as an :class:`ExceptionGroup`.
    705         """
    706         ins = cls()
    707         ins._raw = data.copy()  # Mutations occur due to caching enriched values.
    708 
    709         if validate:
    710             exceptions: list[Exception] = []
    711             try:
    712                 metadata_version = ins.metadata_version
    713                 metadata_age = _VALID_METADATA_VERSIONS.index(metadata_version)
    714             except InvalidMetadata as metadata_version_exc:
    715                 exceptions.append(metadata_version_exc)
    716                 metadata_version = None
    717 
    718             # Make sure to check for the fields that are present, the required
    719             # fields (so their absence can be reported).
    720             fields_to_check = frozenset(ins._raw) | _REQUIRED_ATTRS
    721             # Remove fields that have already been checked.
    722             fields_to_check -= {"metadata_version"}
    723 
    724             for key in fields_to_check:
    725                 try:
    726                     if metadata_version:
    727                         # Can't use getattr() as that triggers descriptor protocol which
    728                         # will fail due to no value for the instance argument.
    729                         try:
    730                             field_metadata_version = cls.__dict__[key].added
    731                         except KeyError:
    732                             exc = InvalidMetadata(key, f"unrecognized field: {key!r}")
    733                             exceptions.append(exc)
    734                             continue
    735                         field_age = _VALID_METADATA_VERSIONS.index(
    736                             field_metadata_version
    737                         )
    738                         if field_age > metadata_age:
    739                             field = _RAW_TO_EMAIL_MAPPING[key]
    740                             exc = InvalidMetadata(
    741                                 field,
    742                                 f"{field} introduced in metadata version "
    743                                 f"{field_metadata_version}, not {metadata_version}",
    744                             )
    745                             exceptions.append(exc)
    746                             continue
    747                     getattr(ins, key)
    748                 except InvalidMetadata as exc:
    749                     exceptions.append(exc)
    750 
    751             if exceptions:
    752                 raise ExceptionGroup("invalid metadata", exceptions)
    753 
    754         return ins
    755 
    756     @classmethod
    757     def from_email(cls, data: bytes | str, *, validate: bool = True) -> Metadata:
    758         """Parse metadata from email headers.
    759 
    760         If *validate* is true, the metadata will be validated. All exceptions
    761         related to validation will be gathered and raised as an :class:`ExceptionGroup`.
    762         """
    763         raw, unparsed = parse_email(data)
    764 
    765         if validate:
    766             exceptions: list[Exception] = []
    767             for unparsed_key in unparsed:
    768                 if unparsed_key in _EMAIL_TO_RAW_MAPPING:
    769                     message = f"{unparsed_key!r} has invalid data"
    770                 else:
    771                     message = f"unrecognized field: {unparsed_key!r}"
    772                 exceptions.append(InvalidMetadata(unparsed_key, message))
    773 
    774             if exceptions:
    775                 raise ExceptionGroup("unparsed", exceptions)
    776 
    777         try:
    778             return cls.from_raw(raw, validate=validate)
    779         except ExceptionGroup as exc_group:
    780             raise ExceptionGroup(
    781                 "invalid or unparsed metadata", exc_group.exceptions
    782             ) from None
    783 
    784     metadata_version: _Validator[_MetadataVersion] = _Validator()
    785     """:external:ref:`core-metadata-metadata-version`
    786     (required; validated to be a valid metadata version)"""
    787     # `name` is not normalized/typed to NormalizedName so as to provide access to
    788     # the original/raw name.
    789     name: _Validator[str] = _Validator()
    790     """:external:ref:`core-metadata-name`
    791     (required; validated using :func:`~packaging.utils.canonicalize_name` and its
    792     *validate* parameter)"""
    793     version: _Validator[version_module.Version] = _Validator()
    794     """:external:ref:`core-metadata-version` (required)"""
    795     dynamic: _Validator[list[str] | None] = _Validator(
    796         added="2.2",
    797     )
    798     """:external:ref:`core-metadata-dynamic`
    799     (validated against core metadata field names and lowercased)"""
    800     platforms: _Validator[list[str] | None] = _Validator()
    801     """:external:ref:`core-metadata-platform`"""
    802     supported_platforms: _Validator[list[str] | None] = _Validator(added="1.1")
    803     """:external:ref:`core-metadata-supported-platform`"""
    804     summary: _Validator[str | None] = _Validator()
    805     """:external:ref:`core-metadata-summary` (validated to contain no newlines)"""
    806     description: _Validator[str | None] = _Validator()  # TODO 2.1: can be in body
    807     """:external:ref:`core-metadata-description`"""
    808     description_content_type: _Validator[str | None] = _Validator(added="2.1")
    809     """:external:ref:`core-metadata-description-content-type` (validated)"""
    810     keywords: _Validator[list[str] | None] = _Validator()
    811     """:external:ref:`core-metadata-keywords`"""
    812     home_page: _Validator[str | None] = _Validator()
    813     """:external:ref:`core-metadata-home-page`"""
    814     download_url: _Validator[str | None] = _Validator(added="1.1")
    815     """:external:ref:`core-metadata-download-url`"""
    816     author: _Validator[str | None] = _Validator()
    817     """:external:ref:`core-metadata-author`"""
    818     author_email: _Validator[str | None] = _Validator()
    819     """:external:ref:`core-metadata-author-email`"""
    820     maintainer: _Validator[str | None] = _Validator(added="1.2")
    821     """:external:ref:`core-metadata-maintainer`"""
    822     maintainer_email: _Validator[str | None] = _Validator(added="1.2")
    823     """:external:ref:`core-metadata-maintainer-email`"""
    824     license: _Validator[str | None] = _Validator()
    825     """:external:ref:`core-metadata-license`"""
    826     license_expression: _Validator[NormalizedLicenseExpression | None] = _Validator(
    827         added="2.4"
    828     )
    829     """:external:ref:`core-metadata-license-expression`"""
    830     license_files: _Validator[list[str] | None] = _Validator(added="2.4")
    831     """:external:ref:`core-metadata-license-file`"""
    832     classifiers: _Validator[list[str] | None] = _Validator(added="1.1")
    833     """:external:ref:`core-metadata-classifier`"""
    834     requires_dist: _Validator[list[requirements.Requirement] | None] = _Validator(
    835         added="1.2"
    836     )
    837     """:external:ref:`core-metadata-requires-dist`"""
    838     requires_python: _Validator[specifiers.SpecifierSet | None] = _Validator(
    839         added="1.2"
    840     )
    841     """:external:ref:`core-metadata-requires-python`"""
    842     # Because `Requires-External` allows for non-PEP 440 version specifiers, we
    843     # don't do any processing on the values.
    844     requires_external: _Validator[list[str] | None] = _Validator(added="1.2")
    845     """:external:ref:`core-metadata-requires-external`"""
    846     project_urls: _Validator[dict[str, str] | None] = _Validator(added="1.2")
    847     """:external:ref:`core-metadata-project-url`"""
    848     # PEP 685 lets us raise an error if an extra doesn't pass `Name` validation
    849     # regardless of metadata version.
    850     provides_extra: _Validator[list[utils.NormalizedName] | None] = _Validator(
    851         added="2.1",
    852     )
    853     """:external:ref:`core-metadata-provides-extra`"""
    854     provides_dist: _Validator[list[str] | None] = _Validator(added="1.2")
    855     """:external:ref:`core-metadata-provides-dist`"""
    856     obsoletes_dist: _Validator[list[str] | None] = _Validator(added="1.2")
    857     """:external:ref:`core-metadata-obsoletes-dist`"""
    858     requires: _Validator[list[str] | None] = _Validator(added="1.1")
    859     """``Requires`` (deprecated)"""
    860     provides: _Validator[list[str] | None] = _Validator(added="1.1")
    861     """``Provides`` (deprecated)"""
    862     obsoletes: _Validator[list[str] | None] = _Validator(added="1.1")
    863     """``Obsoletes`` (deprecated)"""