Package zeroinstall :: Package zerostore :: Module unpack
[frames] | no frames]

Source Code for Module zeroinstall.zerostore.unpack

  1  """Unpacking archives of various formats.""" 
  2   
  3  # Copyright (C) 2009, Thomas Leonard 
  4  # See the README file for details, or visit http://0install.net. 
  5   
  6  from zeroinstall import _ 
  7  import os, subprocess 
  8  import shutil 
  9  import glob 
 10  import traceback 
 11  from tempfile import mkdtemp, mkstemp 
 12  import re 
 13  from logging import debug, warn 
 14  import errno 
 15  from zeroinstall import SafeException 
 16  from zeroinstall.support import find_in_path, ro_rmtree 
 17   
 18  _cpio_version = None 
19 -def _get_cpio_version():
20 global _cpio_version 21 if _cpio_version is None: 22 _cpio_version = os.popen('cpio --version 2>&1').next() 23 debug(_("cpio version = %s"), _cpio_version) 24 return _cpio_version
25
26 -def _gnu_cpio():
27 gnu_cpio = '(GNU cpio)' in _get_cpio_version() 28 debug(_("Is GNU cpio = %s"), gnu_cpio) 29 return gnu_cpio
30 31 _tar_version = None
32 -def _get_tar_version():
33 global _tar_version 34 if _tar_version is None: 35 _tar_version = os.popen('tar --version 2>&1').next().strip() 36 debug(_("tar version = %s"), _tar_version) 37 return _tar_version
38
39 -def _gnu_tar():
40 gnu_tar = '(GNU tar)' in _get_tar_version() 41 debug(_("Is GNU tar = %s"), gnu_tar) 42 return gnu_tar
43
44 -def recent_gnu_tar():
45 """@deprecated: should be private""" 46 recent_gnu_tar = False 47 if _gnu_tar(): 48 version = re.search(r'\)\s*(\d+(\.\d+)*)', _get_tar_version()) 49 if version: 50 version = map(int, version.group(1).split('.')) 51 recent_gnu_tar = version > [1, 13, 92] 52 else: 53 warn(_("Failed to extract GNU tar version number")) 54 debug(_("Recent GNU tar = %s"), recent_gnu_tar) 55 return recent_gnu_tar
56 57 # Disabled, as Plash does not currently support fchmod(2). 58 _pola_run = None 59 #_pola_run = find_in_path('pola-run') 60 #if _pola_run: 61 # info('Found pola-run: %s', _pola_run) 62 #else: 63 # info('pola-run not found; archive extraction will not be sandboxed') 64
65 -def type_from_url(url):
66 """Guess the MIME type for this resource based on its URL. Returns None if we don't know what it is.""" 67 url = url.lower() 68 if url.endswith('.rpm'): return 'application/x-rpm' 69 if url.endswith('.deb'): return 'application/x-deb' 70 if url.endswith('.tar.bz2'): return 'application/x-bzip-compressed-tar' 71 if url.endswith('.tar.gz'): return 'application/x-compressed-tar' 72 if url.endswith('.tar.lzma'): return 'application/x-lzma-compressed-tar' 73 if url.endswith('.tar.xz'): return 'application/x-xz-compressed-tar' 74 if url.endswith('.tbz'): return 'application/x-bzip-compressed-tar' 75 if url.endswith('.tgz'): return 'application/x-compressed-tar' 76 if url.endswith('.tlz'): return 'application/x-lzma-compressed-tar' 77 if url.endswith('.txz'): return 'application/x-xz-compressed-tar' 78 if url.endswith('.tar'): return 'application/x-tar' 79 if url.endswith('.zip'): return 'application/zip' 80 if url.endswith('.cab'): return 'application/vnd.ms-cab-compressed' 81 if url.endswith('.dmg'): return 'application/x-apple-diskimage' 82 if url.endswith('.gem'): return 'application/x-ruby-gem' 83 return None
84
85 -def check_type_ok(mime_type):
86 """Check we have the needed software to extract from an archive of the given type. 87 @raise SafeException: if the needed software is not available""" 88 assert mime_type 89 if mime_type == 'application/x-rpm': 90 if not find_in_path('rpm2cpio'): 91 raise SafeException(_("This package looks like an RPM, but you don't have the rpm2cpio command " 92 "I need to extract it. Install the 'rpm' package first (this works even if " 93 "you're on a non-RPM-based distribution such as Debian).")) 94 elif mime_type == 'application/x-deb': 95 if not find_in_path('ar'): 96 raise SafeException(_("This package looks like a Debian package, but you don't have the 'ar' command " 97 "I need to extract it. Install the package containing it (sometimes called 'binutils') " 98 "first. This works even if you're on a non-Debian-based distribution such as Red Hat).")) 99 elif mime_type == 'application/x-bzip-compressed-tar': 100 pass # We'll fall back to Python's built-in tar.bz2 support 101 elif mime_type == 'application/zip': 102 if not find_in_path('unzip'): 103 raise SafeException(_("This package looks like a zip-compressed archive, but you don't have the 'unzip' command " 104 "I need to extract it. Install the package containing it first.")) 105 elif mime_type == 'application/vnd.ms-cab-compressed': 106 if not find_in_path('cabextract'): 107 raise SafeException(_("This package looks like a Microsoft Cabinet archive, but you don't have the 'cabextract' command " 108 "I need to extract it. Install the package containing it first.")) 109 elif mime_type == 'application/x-apple-diskimage': 110 if not find_in_path('hdiutil'): 111 raise SafeException(_("This package looks like a Apple Disk Image, but you don't have the 'hdiutil' command " 112 "I need to extract it.")) 113 elif mime_type == 'application/x-lzma-compressed-tar': 114 pass # We can get it through Zero Install 115 elif mime_type == 'application/x-xz-compressed-tar': 116 if not find_in_path('unxz'): 117 raise SafeException(_("This package looks like a xz-compressed package, but you don't have the 'unxz' command " 118 "I need to extract it. Install the package containing it (it's probably called 'xz-utils') " 119 "first.")) 120 elif mime_type in ('application/x-compressed-tar', 'application/x-tar', 'application/x-ruby-gem'): 121 pass 122 else: 123 from zeroinstall import version 124 raise SafeException(_("Unsupported archive type '%(type)s' (for injector version %(version)s)") % {'type': mime_type, 'version': version})
125
126 -def _exec_maybe_sandboxed(writable, prog, *args):
127 """execlp prog, with (only) the 'writable' directory writable if sandboxing is available. 128 If no sandbox is available, run without a sandbox.""" 129 prog_path = find_in_path(prog) 130 if not prog_path: raise Exception(_("'%s' not found in $PATH") % prog) 131 if _pola_run is None: 132 os.execlp(prog_path, prog_path, *args) 133 # We have pola-shell :-) 134 pola_args = ['--prog', prog_path, '-f', '/'] 135 for a in args: 136 pola_args += ['-a', a] 137 if writable: 138 pola_args += ['-fw', writable] 139 os.execl(_pola_run, _pola_run, *pola_args)
140
141 -def unpack_archive_over(url, data, destdir, extract = None, type = None, start_offset = 0):
142 """Like unpack_archive, except that we unpack to a temporary directory first and 143 then move things over, checking that we're not following symlinks at each stage. 144 Use this when you want to unpack an unarchive into a directory which already has 145 stuff in it. 146 @note: Since 0.49, the leading "extract" component is removed (unlike unpack_archive). 147 @since: 0.28""" 148 import stat 149 tmpdir = mkdtemp(dir = destdir) 150 assert extract is None or os.sep not in extract, extract 151 try: 152 mtimes = [] 153 154 unpack_archive(url, data, tmpdir, extract, type, start_offset) 155 156 if extract is None: 157 srcdir = tmpdir 158 else: 159 srcdir = os.path.join(tmpdir, extract) 160 assert not os.path.islink(srcdir) 161 162 stem_len = len(srcdir) 163 for root, dirs, files in os.walk(srcdir): 164 relative_root = root[stem_len + 1:] or '.' 165 target_root = os.path.join(destdir, relative_root) 166 try: 167 info = os.lstat(target_root) 168 except OSError as ex: 169 if ex.errno != errno.ENOENT: 170 raise # Some odd error. 171 # Doesn't exist. OK. 172 os.mkdir(target_root) 173 else: 174 if stat.S_ISLNK(info.st_mode): 175 raise SafeException(_('Attempt to unpack dir over symlink "%s"!') % relative_root) 176 elif not stat.S_ISDIR(info.st_mode): 177 raise SafeException(_('Attempt to unpack dir over non-directory "%s"!') % relative_root) 178 mtimes.append((relative_root, os.lstat(os.path.join(srcdir, root)).st_mtime)) 179 180 for s in dirs: # Symlinks are counted as directories 181 src = os.path.join(srcdir, relative_root, s) 182 if os.path.islink(src): 183 files.append(s) 184 185 for f in files: 186 src = os.path.join(srcdir, relative_root, f) 187 dest = os.path.join(destdir, relative_root, f) 188 if os.path.islink(dest): 189 raise SafeException(_('Attempt to unpack file over symlink "%s"!') % 190 os.path.join(relative_root, f)) 191 os.rename(src, dest) 192 193 for path, mtime in mtimes[1:]: 194 os.utime(os.path.join(destdir, path), (mtime, mtime)) 195 finally: 196 ro_rmtree(tmpdir)
197
198 -def unpack_archive(url, data, destdir, extract = None, type = None, start_offset = 0):
199 """Unpack stream 'data' into directory 'destdir'. If extract is given, extract just 200 that sub-directory from the archive (i.e. destdir/extract will exist afterwards). 201 Works out the format from the name.""" 202 if type is None: type = type_from_url(url) 203 if type is None: raise SafeException(_("Unknown extension (and no MIME type given) in '%s'") % url) 204 if type == 'application/x-bzip-compressed-tar': 205 extract_tar(data, destdir, extract, 'bzip2', start_offset) 206 elif type == 'application/x-deb': 207 extract_deb(data, destdir, extract, start_offset) 208 elif type == 'application/x-rpm': 209 extract_rpm(data, destdir, extract, start_offset) 210 elif type == 'application/zip': 211 extract_zip(data, destdir, extract, start_offset) 212 elif type == 'application/x-tar': 213 extract_tar(data, destdir, extract, None, start_offset) 214 elif type == 'application/x-lzma-compressed-tar': 215 extract_tar(data, destdir, extract, 'lzma', start_offset) 216 elif type == 'application/x-xz-compressed-tar': 217 extract_tar(data, destdir, extract, 'xz', start_offset) 218 elif type == 'application/x-compressed-tar': 219 extract_tar(data, destdir, extract, 'gzip', start_offset) 220 elif type == 'application/vnd.ms-cab-compressed': 221 extract_cab(data, destdir, extract, start_offset) 222 elif type == 'application/x-apple-diskimage': 223 extract_dmg(data, destdir, extract, start_offset) 224 elif type == 'application/x-ruby-gem': 225 extract_gem(data, destdir, extract, start_offset) 226 else: 227 raise SafeException(_('Unknown MIME type "%(type)s" for "%(url)s"') % {'type': type, 'url': url})
228
229 -def extract_deb(stream, destdir, extract = None, start_offset = 0):
230 if extract: 231 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for Debs')) 232 233 stream.seek(start_offset) 234 # ar can't read from stdin, so make a copy... 235 deb_copy_name = os.path.join(destdir, 'archive.deb') 236 deb_copy = open(deb_copy_name, 'w') 237 shutil.copyfileobj(stream, deb_copy) 238 deb_copy.close() 239 240 data_tar = None 241 p = subprocess.Popen(('ar', 't', 'archive.deb'), stdout=subprocess.PIPE, cwd=destdir, universal_newlines=True) 242 o = p.communicate()[0] 243 for line in o.split('\n'): 244 if line == 'data.tar': 245 data_compression = None 246 elif line == 'data.tar.gz': 247 data_compression = 'gzip' 248 elif line == 'data.tar.bz2': 249 data_compression = 'bzip2' 250 elif line == 'data.tar.lzma': 251 data_compression = 'lzma' 252 else: 253 continue 254 data_tar = line 255 break 256 else: 257 raise SafeException(_("File is not a Debian package.")) 258 259 _extract(stream, destdir, ('ar', 'x', 'archive.deb', data_tar)) 260 os.unlink(deb_copy_name) 261 data_name = os.path.join(destdir, data_tar) 262 data_stream = open(data_name) 263 os.unlink(data_name) 264 extract_tar(data_stream, destdir, None, data_compression)
265
266 -def extract_rpm(stream, destdir, extract = None, start_offset = 0):
267 if extract: 268 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for RPMs')) 269 fd, cpiopath = mkstemp('-rpm-tmp') 270 try: 271 child = os.fork() 272 if child == 0: 273 try: 274 try: 275 os.dup2(stream.fileno(), 0) 276 os.lseek(0, start_offset, 0) 277 os.dup2(fd, 1) 278 _exec_maybe_sandboxed(None, 'rpm2cpio', '-') 279 except: 280 traceback.print_exc() 281 finally: 282 os._exit(1) 283 id, status = os.waitpid(child, 0) 284 assert id == child 285 if status != 0: 286 raise SafeException(_("rpm2cpio failed; can't unpack RPM archive; exit code %d") % status) 287 os.close(fd) 288 fd = None 289 290 args = ['cpio', '-mid'] 291 if _gnu_cpio(): 292 args.append('--quiet') 293 294 _extract(open(cpiopath), destdir, args) 295 # Set the mtime of every directory under 'tmp' to 0, since cpio doesn't 296 # preserve directory mtimes. 297 for root, dirs, files in os.walk(destdir): 298 os.utime(root, (0, 0)) 299 finally: 300 if fd is not None: 301 os.close(fd) 302 os.unlink(cpiopath)
303
304 -def extract_gem(stream, destdir, extract = None, start_offset = 0):
305 "@since: 0.53" 306 stream.seek(start_offset) 307 payload = 'data.tar.gz' 308 payload_stream = None 309 tmpdir = mkdtemp(dir = destdir) 310 try: 311 extract_tar(stream, destdir=tmpdir, extract=payload, decompress=None) 312 payload_stream = open(os.path.join(tmpdir, payload)) 313 extract_tar(payload_stream, destdir=destdir, extract=extract, decompress='gzip') 314 finally: 315 if payload_stream: 316 payload_stream.close() 317 ro_rmtree(tmpdir)
318
319 -def extract_cab(stream, destdir, extract, start_offset = 0):
320 "@since: 0.24" 321 if extract: 322 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for Cabinet files')) 323 324 stream.seek(start_offset) 325 # cabextract can't read from stdin, so make a copy... 326 cab_copy_name = os.path.join(destdir, 'archive.cab') 327 cab_copy = open(cab_copy_name, 'w') 328 shutil.copyfileobj(stream, cab_copy) 329 cab_copy.close() 330 331 _extract(stream, destdir, ['cabextract', '-s', '-q', 'archive.cab']) 332 os.unlink(cab_copy_name)
333
334 -def extract_dmg(stream, destdir, extract, start_offset = 0):
335 "@since: 0.46" 336 if extract: 337 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for DMGs')) 338 339 stream.seek(start_offset) 340 # hdiutil can't read from stdin, so make a copy... 341 dmg_copy_name = os.path.join(destdir, 'archive.dmg') 342 dmg_copy = open(dmg_copy_name, 'w') 343 shutil.copyfileobj(stream, dmg_copy) 344 dmg_copy.close() 345 346 mountpoint = mkdtemp(prefix='archive') 347 subprocess.check_call(["hdiutil", "attach", "-quiet", "-mountpoint", mountpoint, "-nobrowse", dmg_copy_name]) 348 subprocess.check_call(["cp", "-pR"] + glob.glob("%s/*" % mountpoint) + [destdir]) 349 subprocess.check_call(["hdiutil", "detach", "-quiet", mountpoint]) 350 os.rmdir(mountpoint) 351 os.unlink(dmg_copy_name)
352
353 -def extract_zip(stream, destdir, extract, start_offset = 0):
354 if extract: 355 # Limit the characters we accept, to avoid sending dodgy 356 # strings to zip 357 if not re.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract): 358 raise SafeException(_('Illegal character in extract attribute')) 359 360 stream.seek(start_offset) 361 # unzip can't read from stdin, so make a copy... 362 zip_copy_name = os.path.join(destdir, 'archive.zip') 363 zip_copy = open(zip_copy_name, 'w') 364 shutil.copyfileobj(stream, zip_copy) 365 zip_copy.close() 366 367 args = ['unzip', '-q', '-o', 'archive.zip'] 368 369 if extract: 370 args.append(extract + '/*') 371 372 _extract(stream, destdir, args) 373 os.unlink(zip_copy_name)
374
375 -def extract_tar(stream, destdir, extract, decompress, start_offset = 0):
376 if extract: 377 # Limit the characters we accept, to avoid sending dodgy 378 # strings to tar 379 if not re.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract): 380 raise SafeException(_('Illegal character in extract attribute')) 381 382 assert decompress in [None, 'bzip2', 'gzip', 'lzma', 'xz'] 383 384 if _gnu_tar(): 385 ext_cmd = ['tar'] 386 if decompress: 387 if decompress == 'bzip2': 388 ext_cmd.append('--bzip2') 389 elif decompress == 'gzip': 390 ext_cmd.append('-z') 391 elif decompress == 'lzma': 392 unlzma = find_in_path('unlzma') 393 if not unlzma: 394 unlzma = os.path.abspath(os.path.join(os.path.dirname(__file__), '_unlzma')) 395 ext_cmd.append('--use-compress-program=' + unlzma) 396 elif decompress == 'xz': 397 unxz = find_in_path('unxz') 398 if not unxz: 399 unxz = os.path.abspath(os.path.join(os.path.dirname(__file__), '_unxz')) 400 ext_cmd.append('--use-compress-program=' + unxz) 401 402 if recent_gnu_tar(): 403 ext_cmd.extend(('-x', '--no-same-owner', '--no-same-permissions')) 404 else: 405 ext_cmd.extend(('xf', '-')) 406 407 if extract: 408 ext_cmd.append(extract) 409 410 _extract(stream, destdir, ext_cmd, start_offset) 411 else: 412 import tempfile 413 414 # Since we don't have GNU tar, use python's tarfile module. This will probably 415 # be a lot slower and we do not support lzma and xz; however, it is portable. 416 # (lzma and xz are handled by first uncompressing stream to a temporary file. 417 # this is simple to do, but less efficient than piping through the program) 418 if decompress is None: 419 rmode = 'r|' 420 elif decompress == 'bzip2': 421 rmode = 'r|bz2' 422 elif decompress == 'gzip': 423 rmode = 'r|gz' 424 elif decompress == 'lzma': 425 unlzma = find_in_path('unlzma') 426 if not unlzma: 427 unlzma = os.path.abspath(os.path.join(os.path.dirname(__file__), '_unlzma')) 428 temp = tempfile.NamedTemporaryFile(suffix='.tar') 429 subprocess.check_call((unlzma), stdin=stream, stdout=temp) 430 rmode = 'r|' 431 stream = temp 432 elif decompress == 'xz': 433 unxz = find_in_path('unxz') 434 if not unxz: 435 unxz = os.path.abspath(os.path.join(os.path.dirname(__file__), '_unxz')) 436 temp = tempfile.NamedTemporaryFile(suffix='.tar') 437 subprocess.check_call((unxz), stdin=stream, stdout=temp) 438 rmode = 'r|' 439 stream = temp 440 else: 441 raise SafeException(_('GNU tar unavailable; unsupported compression format: %s') % decompress) 442 443 import tarfile 444 445 stream.seek(start_offset) 446 # Python 2.5.1 crashes if name is None; see Python bug #1706850 447 tar = tarfile.open(name = '', mode = rmode, fileobj = stream) 448 449 current_umask = os.umask(0) 450 os.umask(current_umask) 451 452 uid = gid = None 453 try: 454 uid = os.geteuid() 455 gid = os.getegid() 456 except: 457 debug(_("Can't get uid/gid")) 458 459 def chmod_extract(tarinfo): 460 # If any X bit is set, they all must be 461 if tarinfo.mode & 0o111: 462 tarinfo.mode |= 0o111 463 464 # Everyone gets read and write (subject to the umask) 465 # No special bits are allowed. 466 tarinfo.mode = ((tarinfo.mode | 0o666) & ~current_umask) & 0o777 467 468 # Don't change owner, even if run as root 469 if uid: 470 tarinfo.uid = uid 471 if gid: 472 tarinfo.gid = gid 473 tar.extract(tarinfo, destdir)
474 475 extracted_anything = False 476 ext_dirs = [] 477 478 for tarinfo in tar: 479 if extract is None or \ 480 tarinfo.name.startswith(extract + '/') or \ 481 tarinfo.name == extract: 482 if tarinfo.isdir(): 483 ext_dirs.append(tarinfo) 484 485 chmod_extract(tarinfo) 486 extracted_anything = True 487 488 # Due to a bug in tarfile (python versions < 2.5), we have to manually 489 # set the mtime of each directory that we extract after extracting everything. 490 491 for tarinfo in ext_dirs: 492 dirname = os.path.join(destdir, tarinfo.name) 493 os.utime(dirname, (tarinfo.mtime, tarinfo.mtime)) 494 495 tar.close() 496 497 if extract and not extracted_anything: 498 raise SafeException(_('Unable to find specified file = %s in archive') % extract) 499
500 -def _extract(stream, destdir, command, start_offset = 0):
501 """Run execvp('command') inside destdir in a child process, with 502 stream seeked to 'start_offset' as stdin.""" 503 504 # Some zip archives are missing timezone information; force consistent results 505 child_env = os.environ.copy() 506 child_env['TZ'] = 'GMT' 507 508 stream.seek(start_offset) 509 510 # TODO: use pola-run if available, once it supports fchmod 511 child = subprocess.Popen(command, cwd = destdir, stdin = stream, stderr = subprocess.PIPE, env = child_env) 512 513 unused, cerr = child.communicate() 514 515 status = child.wait() 516 if status != 0: 517 raise SafeException(_('Failed to extract archive (using %(command)s); exit code %(status)d:\n%(err)s') % {'command': command, 'status': status, 'err': cerr.strip()})
518