Viewing file: sources.py (8.43 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
import logging import mimetypes import os from collections import defaultdict from typing import Callable, Dict, Iterable, List, Optional, Tuple
from pip._vendor.packaging.utils import ( InvalidSdistFilename, InvalidWheelFilename, canonicalize_name, parse_sdist_filename, parse_wheel_filename, )
from pip._internal.models.candidate import InstallationCandidate from pip._internal.models.link import Link from pip._internal.utils.urls import path_to_url, url_to_path from pip._internal.vcs import is_url
logger = logging.getLogger(__name__)
FoundCandidates = Iterable[InstallationCandidate] FoundLinks = Iterable[Link] CandidatesFromPage = Callable[[Link], Iterable[InstallationCandidate]] PageValidator = Callable[[Link], bool]
class LinkSource: @property def link(self) -> Optional[Link]: """Returns the underlying link, if there's one.""" raise NotImplementedError()
def page_candidates(self) -> FoundCandidates: """Candidates found by parsing an archive listing HTML file.""" raise NotImplementedError()
def file_links(self) -> FoundLinks: """Links found by specifying archives directly.""" raise NotImplementedError()
def _is_html_file(file_url: str) -> bool: return mimetypes.guess_type(file_url, strict=False)[0] == "text/html"
class _FlatDirectoryToUrls: """Scans directory and caches results"""
def __init__(self, path: str) -> None: self._path = path self._page_candidates: List[str] = [] self._project_name_to_urls: Dict[str, List[str]] = defaultdict(list) self._scanned_directory = False
def _scan_directory(self) -> None: """Scans directory once and populates both page_candidates and project_name_to_urls at the same time """ for entry in os.scandir(self._path): url = path_to_url(entry.path) if _is_html_file(url): self._page_candidates.append(url) continue
# File must have a valid wheel or sdist name, # otherwise not worth considering as a package try: project_filename = parse_wheel_filename(entry.name)[0] except InvalidWheelFilename: try: project_filename = parse_sdist_filename(entry.name)[0] except InvalidSdistFilename: continue
self._project_name_to_urls[project_filename].append(url) self._scanned_directory = True
@property def page_candidates(self) -> List[str]: if not self._scanned_directory: self._scan_directory()
return self._page_candidates
@property def project_name_to_urls(self) -> Dict[str, List[str]]: if not self._scanned_directory: self._scan_directory()
return self._project_name_to_urls
class _FlatDirectorySource(LinkSource): """Link source specified by ``--find-links=<path-to-dir>``.
This looks the content of the directory, and returns:
* ``page_candidates``: Links listed on each HTML file in the directory. * ``file_candidates``: Archives in the directory. """
_paths_to_urls: Dict[str, _FlatDirectoryToUrls] = {}
def __init__( self, candidates_from_page: CandidatesFromPage, path: str, project_name: str, ) -> None: self._candidates_from_page = candidates_from_page self._project_name = canonicalize_name(project_name)
# Get existing instance of _FlatDirectoryToUrls if it exists if path in self._paths_to_urls: self._path_to_urls = self._paths_to_urls[path] else: self._path_to_urls = _FlatDirectoryToUrls(path=path) self._paths_to_urls[path] = self._path_to_urls
@property def link(self) -> Optional[Link]: return None
def page_candidates(self) -> FoundCandidates: for url in self._path_to_urls.page_candidates: yield from self._candidates_from_page(Link(url))
def file_links(self) -> FoundLinks: for url in self._path_to_urls.project_name_to_urls[self._project_name]: yield Link(url)
class _LocalFileSource(LinkSource): """``--find-links=<path-or-url>`` or ``--[extra-]index-url=<path-or-url>``.
If a URL is supplied, it must be a ``file:`` URL. If a path is supplied to the option, it is converted to a URL first. This returns:
* ``page_candidates``: Links listed on an HTML file. * ``file_candidates``: The non-HTML file. """
def __init__( self, candidates_from_page: CandidatesFromPage, link: Link, ) -> None: self._candidates_from_page = candidates_from_page self._link = link
@property def link(self) -> Optional[Link]: return self._link
def page_candidates(self) -> FoundCandidates: if not _is_html_file(self._link.url): return yield from self._candidates_from_page(self._link)
def file_links(self) -> FoundLinks: if _is_html_file(self._link.url): return yield self._link
class _RemoteFileSource(LinkSource): """``--find-links=<url>`` or ``--[extra-]index-url=<url>``.
This returns:
* ``page_candidates``: Links listed on an HTML file. * ``file_candidates``: The non-HTML file. """
def __init__( self, candidates_from_page: CandidatesFromPage, page_validator: PageValidator, link: Link, ) -> None: self._candidates_from_page = candidates_from_page self._page_validator = page_validator self._link = link
@property def link(self) -> Optional[Link]: return self._link
def page_candidates(self) -> FoundCandidates: if not self._page_validator(self._link): return yield from self._candidates_from_page(self._link)
def file_links(self) -> FoundLinks: yield self._link
class _IndexDirectorySource(LinkSource): """``--[extra-]index-url=<path-to-directory>``.
This is treated like a remote URL; ``candidates_from_page`` contains logic for this by appending ``index.html`` to the link. """
def __init__( self, candidates_from_page: CandidatesFromPage, link: Link, ) -> None: self._candidates_from_page = candidates_from_page self._link = link
@property def link(self) -> Optional[Link]: return self._link
def page_candidates(self) -> FoundCandidates: yield from self._candidates_from_page(self._link)
def file_links(self) -> FoundLinks: return ()
def build_source( location: str, *, candidates_from_page: CandidatesFromPage, page_validator: PageValidator, expand_dir: bool, cache_link_parsing: bool, project_name: str, ) -> Tuple[Optional[str], Optional[LinkSource]]: path: Optional[str] = None url: Optional[str] = None if os.path.exists(location): # Is a local path. url = path_to_url(location) path = location elif location.startswith("file:"): # A file: URL. url = location path = url_to_path(location) elif is_url(location): url = location
if url is None: msg = ( "Location '%s' is ignored: " "it is either a non-existing path or lacks a specific scheme." ) logger.warning(msg, location) return (None, None)
if path is None: source: LinkSource = _RemoteFileSource( candidates_from_page=candidates_from_page, page_validator=page_validator, link=Link(url, cache_link_parsing=cache_link_parsing), ) return (url, source)
if os.path.isdir(path): if expand_dir: source = _FlatDirectorySource( candidates_from_page=candidates_from_page, path=path, project_name=project_name, ) else: source = _IndexDirectorySource( candidates_from_page=candidates_from_page, link=Link(url, cache_link_parsing=cache_link_parsing), ) return (url, source) elif os.path.isfile(path): source = _LocalFileSource( candidates_from_page=candidates_from_page, link=Link(url, cache_link_parsing=cache_link_parsing), ) return (url, source) logger.warning( "Location '%s' is ignored: it is neither a file nor a directory.", location, ) return (url, None)
|