From 7309ec687ef09f71250033fb0d95d349ba420831 Mon Sep 17 00:00:00 2001 From: Jan Lindemann Date: Fri, 24 Apr 2026 16:27:53 +0200 Subject: [PATCH] lib.TarIo.extract(): Return list of extracted files Make TarIo.extract return the list of files that were actually extracted. Signed-off-by: Jan Lindemann --- src/python/jw/pkg/lib/TarIo.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/src/python/jw/pkg/lib/TarIo.py b/src/python/jw/pkg/lib/TarIo.py index 7a553786..671bf8fd 100644 --- a/src/python/jw/pkg/lib/TarIo.py +++ b/src/python/jw/pkg/lib/TarIo.py @@ -20,7 +20,7 @@ class TarIo(CopyContext): def _match(self, path: str, path_filter: list[str]) -> bool: return path in path_filter - def _filter_tar_file(self, blob: bytes, path_filter: list[str]|None=None) -> bytes: + def _filter_tar_file(self, blob: bytes, path_filter: list[str]|None=None, matched: list[str]|None=None) -> bytes: ret = io.BytesIO() with tarfile.open(fileobj=ret, mode='w') as tf_out: tf_in = TarFile(fileobj=io.BytesIO(blob)) @@ -28,13 +28,20 @@ class TarIo(CopyContext): if path_filter is not None and not self._match(info.name, path_filter): continue log(DEBUG, f'Adding {info.name}') + if matched is not None: + matched.append(info.name) buf = tf_in.extractfile(info) tf_out.addfile(info, buf) return ret.getvalue() - async def _read_filtered(self, path, path_filter: list[str]|None=None) -> bytes: - blob = (await self.src.get(path)).stdout - return self._filter_tar_file(blob, path_filter) + async def _read_filtered(self, path, path_filter: list[str]|None=None, matched: list[str]|None=None) -> bytes: + try: + blob = (await self.src.get(path)).stdout + except Exception as e: + log(ERR, f'Failed to read tar file "{path}" ({str(e)}') + breakpoint() + raise + return self._filter_tar_file(blob, path_filter, matched=matched) def _add(self, tf: TarFile, path: str, st: StatResult, contents: bytes) -> None: file_obj = io.BytesIO(contents) @@ -55,11 +62,14 @@ class TarIo(CopyContext): self._add(tf, path, st, contents) @abc.abstractmethod - async def _extract(self, root: str|None=None, path_filter: list[str]|None=None) -> None: + async def _extract(self, blob: bytes, root: str|None=None) -> None: raise NotImplementedError() - async def extract(self, root: str|None=None, path_filter: list[str]|None=None) -> None: - return await self._extract(root=root, path_filter=path_filter) + async def extract(self, root: str|None=None, path_filter: list[str]|None=None) -> list[str]: + ret: list[str] = [] + filtered = await self._read_filtered(self.src.root, path_filter, matched=ret) + await self._extract(blob=filtered, root=root) + return ret @classmethod def create(cls, *args, type: str=None, **kwargs): @@ -70,8 +80,8 @@ class TarIo(CopyContext): class TarIoTarFile(TarIo): - async def _extract(self, root: str|None=None, path_filter: list[str]|None=None) -> None: - tf = TarFile(fileobj=io.BytesIO(await self._read_filtered(self.src.root, path_filter))) + async def _extract(self, blob: bytes, root: str|None=None) -> None: + tf = TarFile(fileobj=io.BytesIO(bytes)) for info in tf.getmembers(): log(DEBUG, f'Extracting {info.name}') path = root + '/' + info.name if root else info.name @@ -92,9 +102,9 @@ class TarIoTarFile(TarIo): class TarIoTarExec(TarIo): - async def _extract(self, root: str|None=None, path_filter: list[str]|None=None) -> None: + async def _extract(self, blob: bytes, root: str|None=None) -> None: cmd = ['tar'] if root is not None: cmd += ['-C', root] cmd += ['-x', '-f', '-'] - await self.dst.run(cmd, cmd_input=await self._read_filtered(self.src.root, path_filter)) + await self.dst.run(cmd, cmd_input=blob)