Source code for ocebuild.filesystem.archives

## @file
# Copyright (c) 2023, The OCE Build Authors. All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
##
"""Methods for handling and extracting archive formats."""

from contextlib import contextmanager
from shutil import rmtree, unpack_archive
from tempfile import mkdtemp, NamedTemporaryFile
from urllib.request import Request

from typing import Generator, Union

from .cache import UNPACK_DIR

from ocebuild.parsers.regex import re_match
from ocebuild.sources import request

#NOTE: This import was remapped from 'third_party' to 'ocebuild.third_party'.
from ocebuild.third_party.cpython.pathlib import Path


@contextmanager
[docs]def extract_archive(url: Union[str, Request],
                    persist: bool=False
                    ) -> Generator[Path, str, None]:
  """Extracts a file from a URL and yields a temporary extraction directory.

  Args:
    url: URL of the archive file.
    persist: Flag to disable cleanup of the temporary directory.

  Yields:
    tmp_dir (str): Path to the temporary directory.

  Example:
    >>> with extract_archive('https://example.com/foo.zip') as tmp_dir:
    print(tmp_dir)
    # -> "/tmp/xxxxxx"
  """
  tmp_dir = mkdtemp(dir=UNPACK_DIR)
  try:
    #TODO: If github file url, test `raw.githubusercontent` redirect,
    #      otherwise parse and extract from an archive url.
    with request(url) as response:
      # Extract filename from request headers.
      filename = re_match(pattern=r'^attachment; filename="?(.*)"?;?$',
                          string=response.headers.get('Content-Disposition'),
                          group=1)
      if filename:
        extension = "".join(Path(filename).suffixes)
      elif '.' in url:
        extension = f'.{url.split(".")[-1]}'
      else:
        extension = url.rsplit("/", maxsplit=1)[-1]
      # Write archive to a temporary file.
      with NamedTemporaryFile(suffix=f'-{filename or extension}',
                              dir=UNPACK_DIR) as tmp_file:
        tmp_file.write(response.read())
        tmp_file.seek(0)
        # Extract the zip file to the temporary directory.
        unpack_archive(tmp_file.name, tmp_dir)
    # Yield the temporary directory.
    yield Path(tmp_dir)
  finally:
    # Cleanup after context exits
    if not persist: rmtree(tmp_dir)


__all__ = [
  # Functions (1)
  "extract_archive"
]