Source code for ocebuild.filesystem.archives

## @file
# Copyright (c) 2023, The OCE Build Authors. All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
##
"""Methods for handling and extracting archive formats."""

from contextlib import contextmanager
from shutil import rmtree, unpack_archive
from tempfile import mkdtemp, NamedTemporaryFile
from urllib.request import Request

from typing import Generator, Union

from .cache import UNPACK_DIR

from ocebuild.parsers.regex import re_match
from ocebuild.sources import request

#NOTE: This import was remapped from 'third_party' to 'ocebuild.third_party'.
from ocebuild.third_party.cpython.pathlib import Path


@contextmanager
[docs]def extract_archive(url: Union[str, Request], persist: bool=False ) -> Generator[Path, str, None]: """Extracts a file from a URL and yields a temporary extraction directory. Args: url: URL of the archive file. persist: Flag to disable cleanup of the temporary directory. Yields: tmp_dir (str): Path to the temporary directory. Example: >>> with extract_archive('https://example.com/foo.zip') as tmp_dir: print(tmp_dir) # -> "/tmp/xxxxxx" """ tmp_dir = mkdtemp(dir=UNPACK_DIR) try: #TODO: If github file url, test `raw.githubusercontent` redirect, # otherwise parse and extract from an archive url. with request(url) as response: # Extract filename from request headers. filename = re_match(pattern=r'^attachment; filename="?(.*)"?;?$', string=response.headers.get('Content-Disposition'), group=1) if filename: extension = "".join(Path(filename).suffixes) elif '.' in url: extension = f'.{url.split(".")[-1]}' else: extension = url.rsplit("/", maxsplit=1)[-1] # Write archive to a temporary file. with NamedTemporaryFile(suffix=f'-{filename or extension}', dir=UNPACK_DIR) as tmp_file: tmp_file.write(response.read()) tmp_file.seek(0) # Extract the zip file to the temporary directory. unpack_archive(tmp_file.name, tmp_dir) # Yield the temporary directory. yield Path(tmp_dir) finally: # Cleanup after context exits if not persist: rmtree(tmp_dir)
__all__ = [ # Functions (1) "extract_archive" ]