lib.TarIo.extract(): Return list of extracted files

Make TarIo.extract return the list of files that were actually extracted.

Signed-off-by: Jan Lindemann <jan@janware.com>
This commit is contained in:
Jan Lindemann 2026-04-24 16:27:53 +02:00
commit 7309ec687e
Signed by: Jan Lindemann
GPG key ID: 3750640C9E25DD61

View file

@ -20,7 +20,7 @@ class TarIo(CopyContext):
def _match(self, path: str, path_filter: list[str]) -> bool:
return path in path_filter
def _filter_tar_file(self, blob: bytes, path_filter: list[str]|None=None) -> bytes:
def _filter_tar_file(self, blob: bytes, path_filter: list[str]|None=None, matched: list[str]|None=None) -> bytes:
ret = io.BytesIO()
with tarfile.open(fileobj=ret, mode='w') as tf_out:
tf_in = TarFile(fileobj=io.BytesIO(blob))
@ -28,13 +28,20 @@ class TarIo(CopyContext):
if path_filter is not None and not self._match(info.name, path_filter):
continue
log(DEBUG, f'Adding {info.name}')
if matched is not None:
matched.append(info.name)
buf = tf_in.extractfile(info)
tf_out.addfile(info, buf)
return ret.getvalue()
async def _read_filtered(self, path, path_filter: list[str]|None=None) -> bytes:
blob = (await self.src.get(path)).stdout
return self._filter_tar_file(blob, path_filter)
async def _read_filtered(self, path, path_filter: list[str]|None=None, matched: list[str]|None=None) -> bytes:
try:
blob = (await self.src.get(path)).stdout
except Exception as e:
log(ERR, f'Failed to read tar file "{path}" ({str(e)}')
breakpoint()
raise
return self._filter_tar_file(blob, path_filter, matched=matched)
def _add(self, tf: TarFile, path: str, st: StatResult, contents: bytes) -> None:
file_obj = io.BytesIO(contents)
@ -55,11 +62,14 @@ class TarIo(CopyContext):
self._add(tf, path, st, contents)
@abc.abstractmethod
async def _extract(self, root: str|None=None, path_filter: list[str]|None=None) -> None:
async def _extract(self, blob: bytes, root: str|None=None) -> None:
raise NotImplementedError()
async def extract(self, root: str|None=None, path_filter: list[str]|None=None) -> None:
return await self._extract(root=root, path_filter=path_filter)
async def extract(self, root: str|None=None, path_filter: list[str]|None=None) -> list[str]:
ret: list[str] = []
filtered = await self._read_filtered(self.src.root, path_filter, matched=ret)
await self._extract(blob=filtered, root=root)
return ret
@classmethod
def create(cls, *args, type: str=None, **kwargs):
@ -70,8 +80,8 @@ class TarIo(CopyContext):
class TarIoTarFile(TarIo):
async def _extract(self, root: str|None=None, path_filter: list[str]|None=None) -> None:
tf = TarFile(fileobj=io.BytesIO(await self._read_filtered(self.src.root, path_filter)))
async def _extract(self, blob: bytes, root: str|None=None) -> None:
tf = TarFile(fileobj=io.BytesIO(bytes))
for info in tf.getmembers():
log(DEBUG, f'Extracting {info.name}')
path = root + '/' + info.name if root else info.name
@ -92,9 +102,9 @@ class TarIoTarFile(TarIo):
class TarIoTarExec(TarIo):
async def _extract(self, root: str|None=None, path_filter: list[str]|None=None) -> None:
async def _extract(self, blob: bytes, root: str|None=None) -> None:
cmd = ['tar']
if root is not None:
cmd += ['-C', root]
cmd += ['-x', '-f', '-']
await self.dst.run(cmd, cmd_input=await self._read_filtered(self.src.root, path_filter))
await self.dst.run(cmd, cmd_input=blob)