metadata.py (34762B)
1 from __future__ import annotations 2 3 import email.feedparser 4 import email.header 5 import email.message 6 import email.parser 7 import email.policy 8 import pathlib 9 import sys 10 import typing 11 from typing import ( 12 Any, 13 Callable, 14 Generic, 15 Literal, 16 TypedDict, 17 cast, 18 ) 19 20 from . import licenses, requirements, specifiers, utils 21 from . import version as version_module 22 from .licenses import NormalizedLicenseExpression 23 24 T = typing.TypeVar("T") 25 26 27 if sys.version_info >= (3, 11): # pragma: no cover 28 ExceptionGroup = ExceptionGroup 29 else: # pragma: no cover 30 31 class ExceptionGroup(Exception): 32 """A minimal implementation of :external:exc:`ExceptionGroup` from Python 3.11. 33 34 If :external:exc:`ExceptionGroup` is already defined by Python itself, 35 that version is used instead. 36 """ 37 38 message: str 39 exceptions: list[Exception] 40 41 def __init__(self, message: str, exceptions: list[Exception]) -> None: 42 self.message = message 43 self.exceptions = exceptions 44 45 def __repr__(self) -> str: 46 return f"{self.__class__.__name__}({self.message!r}, {self.exceptions!r})" 47 48 49 class InvalidMetadata(ValueError): 50 """A metadata field contains invalid data.""" 51 52 field: str 53 """The name of the field that contains invalid data.""" 54 55 def __init__(self, field: str, message: str) -> None: 56 self.field = field 57 super().__init__(message) 58 59 60 # The RawMetadata class attempts to make as few assumptions about the underlying 61 # serialization formats as possible. The idea is that as long as a serialization 62 # formats offer some very basic primitives in *some* way then we can support 63 # serializing to and from that format. 64 class RawMetadata(TypedDict, total=False): 65 """A dictionary of raw core metadata. 66 67 Each field in core metadata maps to a key of this dictionary (when data is 68 provided). The key is lower-case and underscores are used instead of dashes 69 compared to the equivalent core metadata field. Any core metadata field that 70 can be specified multiple times or can hold multiple values in a single 71 field have a key with a plural name. See :class:`Metadata` whose attributes 72 match the keys of this dictionary. 73 74 Core metadata fields that can be specified multiple times are stored as a 75 list or dict depending on which is appropriate for the field. Any fields 76 which hold multiple values in a single field are stored as a list. 77 78 """ 79 80 # Metadata 1.0 - PEP 241 81 metadata_version: str 82 name: str 83 version: str 84 platforms: list[str] 85 summary: str 86 description: str 87 keywords: list[str] 88 home_page: str 89 author: str 90 author_email: str 91 license: str 92 93 # Metadata 1.1 - PEP 314 94 supported_platforms: list[str] 95 download_url: str 96 classifiers: list[str] 97 requires: list[str] 98 provides: list[str] 99 obsoletes: list[str] 100 101 # Metadata 1.2 - PEP 345 102 maintainer: str 103 maintainer_email: str 104 requires_dist: list[str] 105 provides_dist: list[str] 106 obsoletes_dist: list[str] 107 requires_python: str 108 requires_external: list[str] 109 project_urls: dict[str, str] 110 111 # Metadata 2.0 112 # PEP 426 attempted to completely revamp the metadata format 113 # but got stuck without ever being able to build consensus on 114 # it and ultimately ended up withdrawn. 115 # 116 # However, a number of tools had started emitting METADATA with 117 # `2.0` Metadata-Version, so for historical reasons, this version 118 # was skipped. 119 120 # Metadata 2.1 - PEP 566 121 description_content_type: str 122 provides_extra: list[str] 123 124 # Metadata 2.2 - PEP 643 125 dynamic: list[str] 126 127 # Metadata 2.3 - PEP 685 128 # No new fields were added in PEP 685, just some edge case were 129 # tightened up to provide better interoptability. 130 131 # Metadata 2.4 - PEP 639 132 license_expression: str 133 license_files: list[str] 134 135 136 _STRING_FIELDS = { 137 "author", 138 "author_email", 139 "description", 140 "description_content_type", 141 "download_url", 142 "home_page", 143 "license", 144 "license_expression", 145 "maintainer", 146 "maintainer_email", 147 "metadata_version", 148 "name", 149 "requires_python", 150 "summary", 151 "version", 152 } 153 154 _LIST_FIELDS = { 155 "classifiers", 156 "dynamic", 157 "license_files", 158 "obsoletes", 159 "obsoletes_dist", 160 "platforms", 161 "provides", 162 "provides_dist", 163 "provides_extra", 164 "requires", 165 "requires_dist", 166 "requires_external", 167 "supported_platforms", 168 } 169 170 _DICT_FIELDS = { 171 "project_urls", 172 } 173 174 175 def _parse_keywords(data: str) -> list[str]: 176 """Split a string of comma-separated keywords into a list of keywords.""" 177 return [k.strip() for k in data.split(",")] 178 179 180 def _parse_project_urls(data: list[str]) -> dict[str, str]: 181 """Parse a list of label/URL string pairings separated by a comma.""" 182 urls = {} 183 for pair in data: 184 # Our logic is slightly tricky here as we want to try and do 185 # *something* reasonable with malformed data. 186 # 187 # The main thing that we have to worry about, is data that does 188 # not have a ',' at all to split the label from the Value. There 189 # isn't a singular right answer here, and we will fail validation 190 # later on (if the caller is validating) so it doesn't *really* 191 # matter, but since the missing value has to be an empty str 192 # and our return value is dict[str, str], if we let the key 193 # be the missing value, then they'd have multiple '' values that 194 # overwrite each other in a accumulating dict. 195 # 196 # The other potentional issue is that it's possible to have the 197 # same label multiple times in the metadata, with no solid "right" 198 # answer with what to do in that case. As such, we'll do the only 199 # thing we can, which is treat the field as unparseable and add it 200 # to our list of unparsed fields. 201 parts = [p.strip() for p in pair.split(",", 1)] 202 parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items 203 204 # TODO: The spec doesn't say anything about if the keys should be 205 # considered case sensitive or not... logically they should 206 # be case-preserving and case-insensitive, but doing that 207 # would open up more cases where we might have duplicate 208 # entries. 209 label, url = parts 210 if label in urls: 211 # The label already exists in our set of urls, so this field 212 # is unparseable, and we can just add the whole thing to our 213 # unparseable data and stop processing it. 214 raise KeyError("duplicate labels in project urls") 215 urls[label] = url 216 217 return urls 218 219 220 def _get_payload(msg: email.message.Message, source: bytes | str) -> str: 221 """Get the body of the message.""" 222 # If our source is a str, then our caller has managed encodings for us, 223 # and we don't need to deal with it. 224 if isinstance(source, str): 225 payload = msg.get_payload() 226 assert isinstance(payload, str) 227 return payload 228 # If our source is a bytes, then we're managing the encoding and we need 229 # to deal with it. 230 else: 231 bpayload = msg.get_payload(decode=True) 232 assert isinstance(bpayload, bytes) 233 try: 234 return bpayload.decode("utf8", "strict") 235 except UnicodeDecodeError as exc: 236 raise ValueError("payload in an invalid encoding") from exc 237 238 239 # The various parse_FORMAT functions here are intended to be as lenient as 240 # possible in their parsing, while still returning a correctly typed 241 # RawMetadata. 242 # 243 # To aid in this, we also generally want to do as little touching of the 244 # data as possible, except where there are possibly some historic holdovers 245 # that make valid data awkward to work with. 246 # 247 # While this is a lower level, intermediate format than our ``Metadata`` 248 # class, some light touch ups can make a massive difference in usability. 249 250 # Map METADATA fields to RawMetadata. 251 _EMAIL_TO_RAW_MAPPING = { 252 "author": "author", 253 "author-email": "author_email", 254 "classifier": "classifiers", 255 "description": "description", 256 "description-content-type": "description_content_type", 257 "download-url": "download_url", 258 "dynamic": "dynamic", 259 "home-page": "home_page", 260 "keywords": "keywords", 261 "license": "license", 262 "license-expression": "license_expression", 263 "license-file": "license_files", 264 "maintainer": "maintainer", 265 "maintainer-email": "maintainer_email", 266 "metadata-version": "metadata_version", 267 "name": "name", 268 "obsoletes": "obsoletes", 269 "obsoletes-dist": "obsoletes_dist", 270 "platform": "platforms", 271 "project-url": "project_urls", 272 "provides": "provides", 273 "provides-dist": "provides_dist", 274 "provides-extra": "provides_extra", 275 "requires": "requires", 276 "requires-dist": "requires_dist", 277 "requires-external": "requires_external", 278 "requires-python": "requires_python", 279 "summary": "summary", 280 "supported-platform": "supported_platforms", 281 "version": "version", 282 } 283 _RAW_TO_EMAIL_MAPPING = {raw: email for email, raw in _EMAIL_TO_RAW_MAPPING.items()} 284 285 286 def parse_email(data: bytes | str) -> tuple[RawMetadata, dict[str, list[str]]]: 287 """Parse a distribution's metadata stored as email headers (e.g. from ``METADATA``). 288 289 This function returns a two-item tuple of dicts. The first dict is of 290 recognized fields from the core metadata specification. Fields that can be 291 parsed and translated into Python's built-in types are converted 292 appropriately. All other fields are left as-is. Fields that are allowed to 293 appear multiple times are stored as lists. 294 295 The second dict contains all other fields from the metadata. This includes 296 any unrecognized fields. It also includes any fields which are expected to 297 be parsed into a built-in type but were not formatted appropriately. Finally, 298 any fields that are expected to appear only once but are repeated are 299 included in this dict. 300 301 """ 302 raw: dict[str, str | list[str] | dict[str, str]] = {} 303 unparsed: dict[str, list[str]] = {} 304 305 if isinstance(data, str): 306 parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data) 307 else: 308 parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data) 309 310 # We have to wrap parsed.keys() in a set, because in the case of multiple 311 # values for a key (a list), the key will appear multiple times in the 312 # list of keys, but we're avoiding that by using get_all(). 313 for name in frozenset(parsed.keys()): 314 # Header names in RFC are case insensitive, so we'll normalize to all 315 # lower case to make comparisons easier. 316 name = name.lower() 317 318 # We use get_all() here, even for fields that aren't multiple use, 319 # because otherwise someone could have e.g. two Name fields, and we 320 # would just silently ignore it rather than doing something about it. 321 headers = parsed.get_all(name) or [] 322 323 # The way the email module works when parsing bytes is that it 324 # unconditionally decodes the bytes as ascii using the surrogateescape 325 # handler. When you pull that data back out (such as with get_all() ), 326 # it looks to see if the str has any surrogate escapes, and if it does 327 # it wraps it in a Header object instead of returning the string. 328 # 329 # As such, we'll look for those Header objects, and fix up the encoding. 330 value = [] 331 # Flag if we have run into any issues processing the headers, thus 332 # signalling that the data belongs in 'unparsed'. 333 valid_encoding = True 334 for h in headers: 335 # It's unclear if this can return more types than just a Header or 336 # a str, so we'll just assert here to make sure. 337 assert isinstance(h, (email.header.Header, str)) 338 339 # If it's a header object, we need to do our little dance to get 340 # the real data out of it. In cases where there is invalid data 341 # we're going to end up with mojibake, but there's no obvious, good 342 # way around that without reimplementing parts of the Header object 343 # ourselves. 344 # 345 # That should be fine since, if mojibacked happens, this key is 346 # going into the unparsed dict anyways. 347 if isinstance(h, email.header.Header): 348 # The Header object stores it's data as chunks, and each chunk 349 # can be independently encoded, so we'll need to check each 350 # of them. 351 chunks: list[tuple[bytes, str | None]] = [] 352 for bin, encoding in email.header.decode_header(h): 353 try: 354 bin.decode("utf8", "strict") 355 except UnicodeDecodeError: 356 # Enable mojibake. 357 encoding = "latin1" 358 valid_encoding = False 359 else: 360 encoding = "utf8" 361 chunks.append((bin, encoding)) 362 363 # Turn our chunks back into a Header object, then let that 364 # Header object do the right thing to turn them into a 365 # string for us. 366 value.append(str(email.header.make_header(chunks))) 367 # This is already a string, so just add it. 368 else: 369 value.append(h) 370 371 # We've processed all of our values to get them into a list of str, 372 # but we may have mojibake data, in which case this is an unparsed 373 # field. 374 if not valid_encoding: 375 unparsed[name] = value 376 continue 377 378 raw_name = _EMAIL_TO_RAW_MAPPING.get(name) 379 if raw_name is None: 380 # This is a bit of a weird situation, we've encountered a key that 381 # we don't know what it means, so we don't know whether it's meant 382 # to be a list or not. 383 # 384 # Since we can't really tell one way or another, we'll just leave it 385 # as a list, even though it may be a single item list, because that's 386 # what makes the most sense for email headers. 387 unparsed[name] = value 388 continue 389 390 # If this is one of our string fields, then we'll check to see if our 391 # value is a list of a single item. If it is then we'll assume that 392 # it was emitted as a single string, and unwrap the str from inside 393 # the list. 394 # 395 # If it's any other kind of data, then we haven't the faintest clue 396 # what we should parse it as, and we have to just add it to our list 397 # of unparsed stuff. 398 if raw_name in _STRING_FIELDS and len(value) == 1: 399 raw[raw_name] = value[0] 400 # If this is one of our list of string fields, then we can just assign 401 # the value, since email *only* has strings, and our get_all() call 402 # above ensures that this is a list. 403 elif raw_name in _LIST_FIELDS: 404 raw[raw_name] = value 405 # Special Case: Keywords 406 # The keywords field is implemented in the metadata spec as a str, 407 # but it conceptually is a list of strings, and is serialized using 408 # ", ".join(keywords), so we'll do some light data massaging to turn 409 # this into what it logically is. 410 elif raw_name == "keywords" and len(value) == 1: 411 raw[raw_name] = _parse_keywords(value[0]) 412 # Special Case: Project-URL 413 # The project urls is implemented in the metadata spec as a list of 414 # specially-formatted strings that represent a key and a value, which 415 # is fundamentally a mapping, however the email format doesn't support 416 # mappings in a sane way, so it was crammed into a list of strings 417 # instead. 418 # 419 # We will do a little light data massaging to turn this into a map as 420 # it logically should be. 421 elif raw_name == "project_urls": 422 try: 423 raw[raw_name] = _parse_project_urls(value) 424 except KeyError: 425 unparsed[name] = value 426 # Nothing that we've done has managed to parse this, so it'll just 427 # throw it in our unparseable data and move on. 428 else: 429 unparsed[name] = value 430 431 # We need to support getting the Description from the message payload in 432 # addition to getting it from the the headers. This does mean, though, there 433 # is the possibility of it being set both ways, in which case we put both 434 # in 'unparsed' since we don't know which is right. 435 try: 436 payload = _get_payload(parsed, data) 437 except ValueError: 438 unparsed.setdefault("description", []).append( 439 parsed.get_payload(decode=isinstance(data, bytes)) # type: ignore[call-overload] 440 ) 441 else: 442 if payload: 443 # Check to see if we've already got a description, if so then both 444 # it, and this body move to unparseable. 445 if "description" in raw: 446 description_header = cast(str, raw.pop("description")) 447 unparsed.setdefault("description", []).extend( 448 [description_header, payload] 449 ) 450 elif "description" in unparsed: 451 unparsed["description"].append(payload) 452 else: 453 raw["description"] = payload 454 455 # We need to cast our `raw` to a metadata, because a TypedDict only support 456 # literal key names, but we're computing our key names on purpose, but the 457 # way this function is implemented, our `TypedDict` can only have valid key 458 # names. 459 return cast(RawMetadata, raw), unparsed 460 461 462 _NOT_FOUND = object() 463 464 465 # Keep the two values in sync. 466 _VALID_METADATA_VERSIONS = ["1.0", "1.1", "1.2", "2.1", "2.2", "2.3", "2.4"] 467 _MetadataVersion = Literal["1.0", "1.1", "1.2", "2.1", "2.2", "2.3", "2.4"] 468 469 _REQUIRED_ATTRS = frozenset(["metadata_version", "name", "version"]) 470 471 472 class _Validator(Generic[T]): 473 """Validate a metadata field. 474 475 All _process_*() methods correspond to a core metadata field. The method is 476 called with the field's raw value. If the raw value is valid it is returned 477 in its "enriched" form (e.g. ``version.Version`` for the ``Version`` field). 478 If the raw value is invalid, :exc:`InvalidMetadata` is raised (with a cause 479 as appropriate). 480 """ 481 482 name: str 483 raw_name: str 484 added: _MetadataVersion 485 486 def __init__( 487 self, 488 *, 489 added: _MetadataVersion = "1.0", 490 ) -> None: 491 self.added = added 492 493 def __set_name__(self, _owner: Metadata, name: str) -> None: 494 self.name = name 495 self.raw_name = _RAW_TO_EMAIL_MAPPING[name] 496 497 def __get__(self, instance: Metadata, _owner: type[Metadata]) -> T: 498 # With Python 3.8, the caching can be replaced with functools.cached_property(). 499 # No need to check the cache as attribute lookup will resolve into the 500 # instance's __dict__ before __get__ is called. 501 cache = instance.__dict__ 502 value = instance._raw.get(self.name) 503 504 # To make the _process_* methods easier, we'll check if the value is None 505 # and if this field is NOT a required attribute, and if both of those 506 # things are true, we'll skip the the converter. This will mean that the 507 # converters never have to deal with the None union. 508 if self.name in _REQUIRED_ATTRS or value is not None: 509 try: 510 converter: Callable[[Any], T] = getattr(self, f"_process_{self.name}") 511 except AttributeError: 512 pass 513 else: 514 value = converter(value) 515 516 cache[self.name] = value 517 try: 518 del instance._raw[self.name] # type: ignore[misc] 519 except KeyError: 520 pass 521 522 return cast(T, value) 523 524 def _invalid_metadata( 525 self, msg: str, cause: Exception | None = None 526 ) -> InvalidMetadata: 527 exc = InvalidMetadata( 528 self.raw_name, msg.format_map({"field": repr(self.raw_name)}) 529 ) 530 exc.__cause__ = cause 531 return exc 532 533 def _process_metadata_version(self, value: str) -> _MetadataVersion: 534 # Implicitly makes Metadata-Version required. 535 if value not in _VALID_METADATA_VERSIONS: 536 raise self._invalid_metadata(f"{value!r} is not a valid metadata version") 537 return cast(_MetadataVersion, value) 538 539 def _process_name(self, value: str) -> str: 540 if not value: 541 raise self._invalid_metadata("{field} is a required field") 542 # Validate the name as a side-effect. 543 try: 544 utils.canonicalize_name(value, validate=True) 545 except utils.InvalidName as exc: 546 raise self._invalid_metadata( 547 f"{value!r} is invalid for {{field}}", cause=exc 548 ) from exc 549 else: 550 return value 551 552 def _process_version(self, value: str) -> version_module.Version: 553 if not value: 554 raise self._invalid_metadata("{field} is a required field") 555 try: 556 return version_module.parse(value) 557 except version_module.InvalidVersion as exc: 558 raise self._invalid_metadata( 559 f"{value!r} is invalid for {{field}}", cause=exc 560 ) from exc 561 562 def _process_summary(self, value: str) -> str: 563 """Check the field contains no newlines.""" 564 if "\n" in value: 565 raise self._invalid_metadata("{field} must be a single line") 566 return value 567 568 def _process_description_content_type(self, value: str) -> str: 569 content_types = {"text/plain", "text/x-rst", "text/markdown"} 570 message = email.message.EmailMessage() 571 message["content-type"] = value 572 573 content_type, parameters = ( 574 # Defaults to `text/plain` if parsing failed. 575 message.get_content_type().lower(), 576 message["content-type"].params, 577 ) 578 # Check if content-type is valid or defaulted to `text/plain` and thus was 579 # not parseable. 580 if content_type not in content_types or content_type not in value.lower(): 581 raise self._invalid_metadata( 582 f"{{field}} must be one of {list(content_types)}, not {value!r}" 583 ) 584 585 charset = parameters.get("charset", "UTF-8") 586 if charset != "UTF-8": 587 raise self._invalid_metadata( 588 f"{{field}} can only specify the UTF-8 charset, not {list(charset)}" 589 ) 590 591 markdown_variants = {"GFM", "CommonMark"} 592 variant = parameters.get("variant", "GFM") # Use an acceptable default. 593 if content_type == "text/markdown" and variant not in markdown_variants: 594 raise self._invalid_metadata( 595 f"valid Markdown variants for {{field}} are {list(markdown_variants)}, " 596 f"not {variant!r}", 597 ) 598 return value 599 600 def _process_dynamic(self, value: list[str]) -> list[str]: 601 for dynamic_field in map(str.lower, value): 602 if dynamic_field in {"name", "version", "metadata-version"}: 603 raise self._invalid_metadata( 604 f"{dynamic_field!r} is not allowed as a dynamic field" 605 ) 606 elif dynamic_field not in _EMAIL_TO_RAW_MAPPING: 607 raise self._invalid_metadata( 608 f"{dynamic_field!r} is not a valid dynamic field" 609 ) 610 return list(map(str.lower, value)) 611 612 def _process_provides_extra( 613 self, 614 value: list[str], 615 ) -> list[utils.NormalizedName]: 616 normalized_names = [] 617 try: 618 for name in value: 619 normalized_names.append(utils.canonicalize_name(name, validate=True)) 620 except utils.InvalidName as exc: 621 raise self._invalid_metadata( 622 f"{name!r} is invalid for {{field}}", cause=exc 623 ) from exc 624 else: 625 return normalized_names 626 627 def _process_requires_python(self, value: str) -> specifiers.SpecifierSet: 628 try: 629 return specifiers.SpecifierSet(value) 630 except specifiers.InvalidSpecifier as exc: 631 raise self._invalid_metadata( 632 f"{value!r} is invalid for {{field}}", cause=exc 633 ) from exc 634 635 def _process_requires_dist( 636 self, 637 value: list[str], 638 ) -> list[requirements.Requirement]: 639 reqs = [] 640 try: 641 for req in value: 642 reqs.append(requirements.Requirement(req)) 643 except requirements.InvalidRequirement as exc: 644 raise self._invalid_metadata( 645 f"{req!r} is invalid for {{field}}", cause=exc 646 ) from exc 647 else: 648 return reqs 649 650 def _process_license_expression( 651 self, value: str 652 ) -> NormalizedLicenseExpression | None: 653 try: 654 return licenses.canonicalize_license_expression(value) 655 except ValueError as exc: 656 raise self._invalid_metadata( 657 f"{value!r} is invalid for {{field}}", cause=exc 658 ) from exc 659 660 def _process_license_files(self, value: list[str]) -> list[str]: 661 paths = [] 662 for path in value: 663 if ".." in path: 664 raise self._invalid_metadata( 665 f"{path!r} is invalid for {{field}}, " 666 "parent directory indicators are not allowed" 667 ) 668 if "*" in path: 669 raise self._invalid_metadata( 670 f"{path!r} is invalid for {{field}}, paths must be resolved" 671 ) 672 if ( 673 pathlib.PurePosixPath(path).is_absolute() 674 or pathlib.PureWindowsPath(path).is_absolute() 675 ): 676 raise self._invalid_metadata( 677 f"{path!r} is invalid for {{field}}, paths must be relative" 678 ) 679 if pathlib.PureWindowsPath(path).as_posix() != path: 680 raise self._invalid_metadata( 681 f"{path!r} is invalid for {{field}}, " 682 "paths must use '/' delimiter" 683 ) 684 paths.append(path) 685 return paths 686 687 688 class Metadata: 689 """Representation of distribution metadata. 690 691 Compared to :class:`RawMetadata`, this class provides objects representing 692 metadata fields instead of only using built-in types. Any invalid metadata 693 will cause :exc:`InvalidMetadata` to be raised (with a 694 :py:attr:`~BaseException.__cause__` attribute as appropriate). 695 """ 696 697 _raw: RawMetadata 698 699 @classmethod 700 def from_raw(cls, data: RawMetadata, *, validate: bool = True) -> Metadata: 701 """Create an instance from :class:`RawMetadata`. 702 703 If *validate* is true, all metadata will be validated. All exceptions 704 related to validation will be gathered and raised as an :class:`ExceptionGroup`. 705 """ 706 ins = cls() 707 ins._raw = data.copy() # Mutations occur due to caching enriched values. 708 709 if validate: 710 exceptions: list[Exception] = [] 711 try: 712 metadata_version = ins.metadata_version 713 metadata_age = _VALID_METADATA_VERSIONS.index(metadata_version) 714 except InvalidMetadata as metadata_version_exc: 715 exceptions.append(metadata_version_exc) 716 metadata_version = None 717 718 # Make sure to check for the fields that are present, the required 719 # fields (so their absence can be reported). 720 fields_to_check = frozenset(ins._raw) | _REQUIRED_ATTRS 721 # Remove fields that have already been checked. 722 fields_to_check -= {"metadata_version"} 723 724 for key in fields_to_check: 725 try: 726 if metadata_version: 727 # Can't use getattr() as that triggers descriptor protocol which 728 # will fail due to no value for the instance argument. 729 try: 730 field_metadata_version = cls.__dict__[key].added 731 except KeyError: 732 exc = InvalidMetadata(key, f"unrecognized field: {key!r}") 733 exceptions.append(exc) 734 continue 735 field_age = _VALID_METADATA_VERSIONS.index( 736 field_metadata_version 737 ) 738 if field_age > metadata_age: 739 field = _RAW_TO_EMAIL_MAPPING[key] 740 exc = InvalidMetadata( 741 field, 742 f"{field} introduced in metadata version " 743 f"{field_metadata_version}, not {metadata_version}", 744 ) 745 exceptions.append(exc) 746 continue 747 getattr(ins, key) 748 except InvalidMetadata as exc: 749 exceptions.append(exc) 750 751 if exceptions: 752 raise ExceptionGroup("invalid metadata", exceptions) 753 754 return ins 755 756 @classmethod 757 def from_email(cls, data: bytes | str, *, validate: bool = True) -> Metadata: 758 """Parse metadata from email headers. 759 760 If *validate* is true, the metadata will be validated. All exceptions 761 related to validation will be gathered and raised as an :class:`ExceptionGroup`. 762 """ 763 raw, unparsed = parse_email(data) 764 765 if validate: 766 exceptions: list[Exception] = [] 767 for unparsed_key in unparsed: 768 if unparsed_key in _EMAIL_TO_RAW_MAPPING: 769 message = f"{unparsed_key!r} has invalid data" 770 else: 771 message = f"unrecognized field: {unparsed_key!r}" 772 exceptions.append(InvalidMetadata(unparsed_key, message)) 773 774 if exceptions: 775 raise ExceptionGroup("unparsed", exceptions) 776 777 try: 778 return cls.from_raw(raw, validate=validate) 779 except ExceptionGroup as exc_group: 780 raise ExceptionGroup( 781 "invalid or unparsed metadata", exc_group.exceptions 782 ) from None 783 784 metadata_version: _Validator[_MetadataVersion] = _Validator() 785 """:external:ref:`core-metadata-metadata-version` 786 (required; validated to be a valid metadata version)""" 787 # `name` is not normalized/typed to NormalizedName so as to provide access to 788 # the original/raw name. 789 name: _Validator[str] = _Validator() 790 """:external:ref:`core-metadata-name` 791 (required; validated using :func:`~packaging.utils.canonicalize_name` and its 792 *validate* parameter)""" 793 version: _Validator[version_module.Version] = _Validator() 794 """:external:ref:`core-metadata-version` (required)""" 795 dynamic: _Validator[list[str] | None] = _Validator( 796 added="2.2", 797 ) 798 """:external:ref:`core-metadata-dynamic` 799 (validated against core metadata field names and lowercased)""" 800 platforms: _Validator[list[str] | None] = _Validator() 801 """:external:ref:`core-metadata-platform`""" 802 supported_platforms: _Validator[list[str] | None] = _Validator(added="1.1") 803 """:external:ref:`core-metadata-supported-platform`""" 804 summary: _Validator[str | None] = _Validator() 805 """:external:ref:`core-metadata-summary` (validated to contain no newlines)""" 806 description: _Validator[str | None] = _Validator() # TODO 2.1: can be in body 807 """:external:ref:`core-metadata-description`""" 808 description_content_type: _Validator[str | None] = _Validator(added="2.1") 809 """:external:ref:`core-metadata-description-content-type` (validated)""" 810 keywords: _Validator[list[str] | None] = _Validator() 811 """:external:ref:`core-metadata-keywords`""" 812 home_page: _Validator[str | None] = _Validator() 813 """:external:ref:`core-metadata-home-page`""" 814 download_url: _Validator[str | None] = _Validator(added="1.1") 815 """:external:ref:`core-metadata-download-url`""" 816 author: _Validator[str | None] = _Validator() 817 """:external:ref:`core-metadata-author`""" 818 author_email: _Validator[str | None] = _Validator() 819 """:external:ref:`core-metadata-author-email`""" 820 maintainer: _Validator[str | None] = _Validator(added="1.2") 821 """:external:ref:`core-metadata-maintainer`""" 822 maintainer_email: _Validator[str | None] = _Validator(added="1.2") 823 """:external:ref:`core-metadata-maintainer-email`""" 824 license: _Validator[str | None] = _Validator() 825 """:external:ref:`core-metadata-license`""" 826 license_expression: _Validator[NormalizedLicenseExpression | None] = _Validator( 827 added="2.4" 828 ) 829 """:external:ref:`core-metadata-license-expression`""" 830 license_files: _Validator[list[str] | None] = _Validator(added="2.4") 831 """:external:ref:`core-metadata-license-file`""" 832 classifiers: _Validator[list[str] | None] = _Validator(added="1.1") 833 """:external:ref:`core-metadata-classifier`""" 834 requires_dist: _Validator[list[requirements.Requirement] | None] = _Validator( 835 added="1.2" 836 ) 837 """:external:ref:`core-metadata-requires-dist`""" 838 requires_python: _Validator[specifiers.SpecifierSet | None] = _Validator( 839 added="1.2" 840 ) 841 """:external:ref:`core-metadata-requires-python`""" 842 # Because `Requires-External` allows for non-PEP 440 version specifiers, we 843 # don't do any processing on the values. 844 requires_external: _Validator[list[str] | None] = _Validator(added="1.2") 845 """:external:ref:`core-metadata-requires-external`""" 846 project_urls: _Validator[dict[str, str] | None] = _Validator(added="1.2") 847 """:external:ref:`core-metadata-project-url`""" 848 # PEP 685 lets us raise an error if an extra doesn't pass `Name` validation 849 # regardless of metadata version. 850 provides_extra: _Validator[list[utils.NormalizedName] | None] = _Validator( 851 added="2.1", 852 ) 853 """:external:ref:`core-metadata-provides-extra`""" 854 provides_dist: _Validator[list[str] | None] = _Validator(added="1.2") 855 """:external:ref:`core-metadata-provides-dist`""" 856 obsoletes_dist: _Validator[list[str] | None] = _Validator(added="1.2") 857 """:external:ref:`core-metadata-obsoletes-dist`""" 858 requires: _Validator[list[str] | None] = _Validator(added="1.1") 859 """``Requires`` (deprecated)""" 860 provides: _Validator[list[str] | None] = _Validator(added="1.1") 861 """``Provides`` (deprecated)""" 862 obsoletes: _Validator[list[str] | None] = _Validator(added="1.1") 863 """``Obsoletes`` (deprecated)"""