Package zeroinstall :: Package zerostore :: Module manifest
[frames] | no frames]

Source Code for Module zeroinstall.zerostore.manifest

  1   
  2  """Processing of implementation manifests. 
  3   
  4  A manifest is a string representing a directory tree, with the property 
  5  that two trees will generate identical manifest strings if and only if: 
  6   
  7   - They have extactly the same set of files, directories and symlinks. 
  8   - For each pair of corresponding directories in the two sets: 
  9     - The mtimes are the same (OldSHA1 only). 
 10   - For each pair of corresponding files in the two sets: 
 11     - The size, executable flag and mtime are the same. 
 12     - The contents have matching secure hash values. 
 13   - For each pair of corresponding symlinks in the two sets: 
 14     - The mtime and size are the same. 
 15     - The targets have matching secure hash values. 
 16   
 17  The manifest is typically processed with a secure hash itself. So, the idea is that 
 18  any significant change to the contents of the tree will change the secure hash value 
 19  of the manifest. 
 20   
 21  A top-level ".manifest" file is ignored. 
 22  """ 
 23   
 24  # Copyright (C) 2009, Thomas Leonard 
 25  # See the README file for details, or visit http://0install.net. 
 26   
 27   
 28  import os, stat 
 29  from zeroinstall import SafeException, _ 
 30  from zeroinstall.zerostore import BadDigest 
 31   
 32  try: 
 33          import hashlib 
 34          sha1_new = hashlib.sha1 
 35  except: 
 36          import sha 
 37          sha1_new = sha.new 
 38          hashlib = None 
 39   
40 -class Algorithm:
41 """Abstract base class for algorithms. 42 An algorithm knows how to generate a manifest from a directory tree. 43 @ivar rating: how much we like this algorithm (higher is better) 44 @type rating: int 45 """
46 - def generate_manifest(self, root):
47 """Returns an iterator that yields each line of the manifest for the directory 48 tree rooted at 'root'.""" 49 raise Exception('Abstract')
50
51 - def new_digest(self):
52 """Create a new digest. Call update() on the returned object to digest the data. 53 Call getID() to turn it into a full ID string.""" 54 raise Exception('Abstract')
55
56 - def getID(self, digest):
57 """Convert a digest (from new_digest) to a full ID.""" 58 raise Exception('Abstract')
59
60 -class OldSHA1(Algorithm):
61 """@deprecated: Injector versions before 0.20 only supported this algorithm.""" 62 63 rating = 10 64
65 - def generate_manifest(self, root):
66 def recurse(sub): 67 # To ensure that a line-by-line comparison of the manifests 68 # is possible, we require that filenames don't contain newlines. 69 # Otherwise, you can name a file so that the part after the \n 70 # would be interpreted as another line in the manifest. 71 if '\n' in sub: raise BadDigest("Newline in filename '%s'" % sub) 72 assert sub.startswith('/') 73 74 if sub == '/.manifest': return 75 76 full = os.path.join(root, sub[1:].replace('/', os.sep)) 77 info = os.lstat(full) 78 79 m = info.st_mode 80 if stat.S_ISDIR(m): 81 if sub != '/': 82 yield "D %s %s" % (int(info.st_mtime), sub) 83 items = os.listdir(full) 84 items.sort() 85 subdir = sub 86 if not subdir.endswith('/'): 87 subdir += '/' 88 for x in items: 89 for y in recurse(subdir + x): 90 yield y 91 return 92 93 assert sub[1:] 94 leaf = os.path.basename(sub[1:]) 95 if stat.S_ISREG(m): 96 d = sha1_new(open(full).read()).hexdigest() 97 if m & 0o111: 98 yield "X %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf) 99 else: 100 yield "F %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf) 101 elif stat.S_ISLNK(m): 102 target = os.readlink(full) 103 d = sha1_new(target).hexdigest() 104 # Note: Can't use utime on symlinks, so skip mtime 105 # Note: eCryptfs may report length as zero, so count ourselves instead 106 yield "S %s %s %s" % (d, len(target), leaf) 107 else: 108 raise SafeException(_("Unknown object '%s' (not a file, directory or symlink)") % 109 full)
110 for x in recurse('/'): yield x
111
112 - def new_digest(self):
113 return sha1_new()
114
115 - def getID(self, digest):
116 return 'sha1=' + digest.hexdigest()
117
118 -def get_algorithm(name):
119 """Look-up an L{Algorithm} by name. 120 @raise BadDigest: if the name is unknown.""" 121 try: 122 return algorithms[name] 123 except KeyError: 124 raise BadDigest(_("Unknown algorithm '%s'") % name)
125
126 -def generate_manifest(root, alg = 'sha1'):
127 """@deprecated: use L{get_algorithm} and L{Algorithm.generate_manifest} instead.""" 128 return get_algorithm(alg).generate_manifest(root)
129
130 -def add_manifest_file(dir, digest_or_alg):
131 """Writes a .manifest file into 'dir', and returns the digest. 132 You should call fixup_permissions before this to ensure that the permissions are correct. 133 On exit, dir itself has mode 555. Subdirectories are not changed. 134 @param dir: root of the implementation 135 @param digest_or_alg: should be an instance of Algorithm. Passing a digest 136 here is deprecated.""" 137 mfile = os.path.join(dir, '.manifest') 138 if os.path.islink(mfile) or os.path.exists(mfile): 139 raise SafeException(_("Directory '%s' already contains a .manifest file!") % dir) 140 manifest = '' 141 if isinstance(digest_or_alg, Algorithm): 142 alg = digest_or_alg 143 digest = alg.new_digest() 144 else: 145 digest = digest_or_alg 146 alg = get_algorithm('sha1') 147 for line in alg.generate_manifest(dir): 148 manifest += line + '\n' 149 digest.update(manifest) 150 151 os.chmod(dir, 0o755) 152 stream = open(mfile, 'wb') 153 os.chmod(dir, 0o555) 154 stream.write(manifest) 155 stream.close() 156 os.chmod(mfile, 0o444) 157 return digest
158
159 -def splitID(id):
160 """Take an ID in the form 'alg=value' and return a tuple (alg, value), 161 where 'alg' is an instance of Algorithm and 'value' is a string. 162 @raise BadDigest: if the algorithm isn't known or the ID has the wrong format.""" 163 parts = id.split('=', 1) 164 if len(parts) != 2: 165 raise BadDigest(_("Digest '%s' is not in the form 'algorithm=value'") % id) 166 return (get_algorithm(parts[0]), parts[1])
167
168 -def copy_with_verify(src, dest, mode, alg, required_digest):
169 """Copy path src to dest, checking that the contents give the right digest. 170 dest must not exist. New file is created with a mode of 'mode & umask'. 171 @param src: source filename 172 @type src: str 173 @param dest: target filename 174 @type dest: str 175 @param mode: target mode 176 @type mode: int 177 @param alg: algorithm to generate digest 178 @type alg: L{Algorithm} 179 @param required_digest: expected digest value 180 @type required_digest: str 181 @raise BadDigest: the contents of the file don't match required_digest""" 182 src_obj = open(src) 183 dest_fd = os.open(dest, os.O_WRONLY | os.O_CREAT | os.O_EXCL, mode) 184 try: 185 digest = alg.new_digest() 186 while True: 187 data = src_obj.read(256) 188 if not data: break 189 digest.update(data) 190 while data: 191 written = os.write(dest_fd, data) 192 assert written >= 0 193 data = data[written:] 194 finally: 195 os.close(dest_fd) 196 src_obj.close() 197 actual = digest.hexdigest() 198 if actual == required_digest: return 199 os.unlink(dest) 200 raise BadDigest(_("Copy failed: file '%(src)s' has wrong digest (may have been tampered with)\n" 201 "Expected: %(required_digest)s\n" 202 "Actual: %(actual_digest)s") % {'src': src, 'required_digest': required_digest, 'actual_digest': actual})
203
204 -def verify(root, required_digest = None):
205 """Ensure that directory 'dir' generates the given digest. 206 For a non-error return: 207 - Dir's name must be a digest (in the form "alg=value") 208 - The calculated digest of the contents must match this name. 209 - If there is a .manifest file, then its digest must also match. 210 @raise BadDigest: if verification fails.""" 211 if required_digest is None: 212 required_digest = os.path.basename(root) 213 alg = splitID(required_digest)[0] 214 215 digest = alg.new_digest() 216 lines = [] 217 for line in alg.generate_manifest(root): 218 line += '\n' 219 digest.update(line) 220 lines.append(line) 221 actual_digest = alg.getID(digest) 222 223 manifest_file = os.path.join(root, '.manifest') 224 if os.path.isfile(manifest_file): 225 digest = alg.new_digest() 226 digest.update(open(manifest_file, 'rb').read()) 227 manifest_digest = alg.getID(digest) 228 else: 229 manifest_digest = None 230 231 if required_digest == actual_digest == manifest_digest: 232 return 233 234 error = BadDigest(_("Cached item does NOT verify.")) 235 236 error.detail = _(" Expected: %(required_digest)s\n" 237 " Actual: %(actual_digest)s\n" 238 ".manifest digest: %(manifest_digest)s\n\n") \ 239 % {'required_digest': required_digest, 'actual_digest': actual_digest, 'manifest_digest': manifest_digest or _('No .manifest file')} 240 241 if manifest_digest is None: 242 error.detail += _("No .manifest, so no further details available.") 243 elif manifest_digest == actual_digest: 244 error.detail += _("The .manifest file matches the actual contents. Very strange!") 245 elif manifest_digest == required_digest: 246 import difflib 247 diff = difflib.unified_diff(open(manifest_file, 'rb').readlines(), lines, 248 'Recorded', 'Actual') 249 error.detail += _("The .manifest file matches the directory name.\n" \ 250 "The contents of the directory have changed:\n") + \ 251 ''.join(diff) 252 elif required_digest == actual_digest: 253 error.detail += _("The directory contents are correct, but the .manifest file is wrong!") 254 else: 255 error.detail += _("The .manifest file matches neither of the other digests. Odd.") 256 raise error
257 258 # XXX: Be more careful about the source tree changing under us. In particular, what happens if: 259 # - A regualar file suddenly turns into a symlink? 260 # - We find a device file (users can hard-link them if on the same device)
261 -def copy_tree_with_verify(source, target, manifest_data, required_digest):
262 """Copy directory source to be a subdirectory of target if it matches the required_digest. 263 manifest_data is normally source/.manifest. source and manifest_data are not trusted 264 (will typically be under the control of another user). 265 The copy is first done to a temporary directory in target, then renamed to the final name 266 only if correct. Therefore, an invalid 'target/required_digest' will never exist. 267 A successful return means than target/required_digest now exists (whether we created it or not).""" 268 import tempfile 269 from logging import info 270 271 alg, digest_value = splitID(required_digest) 272 273 if isinstance(alg, OldSHA1): 274 raise SafeException(_("Sorry, the 'sha1' algorithm does not support copying.")) 275 276 digest = alg.new_digest() 277 digest.update(manifest_data) 278 manifest_digest = alg.getID(digest) 279 280 if manifest_digest != required_digest: 281 raise BadDigest(_("Manifest has been tampered with!\n" 282 "Manifest digest: %(actual_digest)s\n" 283 "Directory name : %(required_digest)s") 284 % {'actual_digest': manifest_digest, 'required_digest': required_digest}) 285 286 target_impl = os.path.join(target, required_digest) 287 if os.path.isdir(target_impl): 288 info(_("Target directory '%s' already exists"), target_impl) 289 return 290 291 # We've checked that the source's manifest matches required_digest, so it 292 # is what we want. Make a list of all the files we need to copy... 293 294 wanted = _parse_manifest(manifest_data) 295 296 tmpdir = tempfile.mkdtemp(prefix = 'tmp-copy-', dir = target) 297 try: 298 _copy_files(alg, wanted, source, tmpdir) 299 300 if wanted: 301 raise SafeException(_('Copy failed; files missing from source:') + '\n- ' + 302 '\n- '.join(wanted.keys())) 303 304 # Make directories read-only (files are already RO) 305 for root, dirs, files in os.walk(tmpdir): 306 for d in dirs: 307 path = os.path.join(root, d) 308 mode = os.stat(path).st_mode 309 os.chmod(path, mode & 0o555) 310 311 # Check that the copy is correct 312 actual_digest = alg.getID(add_manifest_file(tmpdir, alg)) 313 if actual_digest != required_digest: 314 raise SafeException(_("Copy failed; double-check of target gave the wrong digest.\n" 315 "Unless the target was modified during the copy, this is a BUG\n" 316 "in 0store and should be reported.\n" 317 "Expected: %(required_digest)s\n" 318 "Actual: %(actual_digest)s") % {'required_digest': required_digest, 'actual_digest': actual_digest}) 319 try: 320 os.chmod(tmpdir, 0o755) # need write permission to rename on MacOS X 321 os.rename(tmpdir, target_impl) 322 os.chmod(target_impl, 0o555) 323 tmpdir = None 324 except OSError: 325 if not os.path.isdir(target_impl): 326 raise 327 # else someone else installed it already - return success 328 finally: 329 if tmpdir is not None: 330 info(_("Deleting tmpdir '%s'") % tmpdir) 331 from zeroinstall.support import ro_rmtree 332 ro_rmtree(tmpdir)
333
334 -def _parse_manifest(manifest_data):
335 """Parse a manifest file. 336 @param manifest_data: the contents of the manifest file 337 @type manifest_data: str 338 @return: a mapping from paths to information about that path 339 @rtype: {str: tuple}""" 340 wanted = {} 341 dir = '' 342 for line in manifest_data.split('\n'): 343 if not line: break 344 if line[0] == 'D': 345 data = line.split(' ', 1) 346 if len(data) != 2: raise BadDigest(_("Bad line '%s'") % line) 347 path = data[-1] 348 if not path.startswith('/'): raise BadDigest(_("Not absolute: '%s'") % line) 349 path = path[1:] 350 dir = path 351 elif line[0] == 'S': 352 data = line.split(' ', 3) 353 path = os.path.join(dir, data[-1]) 354 if len(data) != 4: raise BadDigest(_("Bad line '%s'") % line) 355 else: 356 data = line.split(' ', 4) 357 path = os.path.join(dir, data[-1]) 358 if len(data) != 5: raise BadDigest(_("Bad line '%s'") % line) 359 if path in wanted: 360 raise BadDigest(_('Duplicate entry "%s"') % line) 361 wanted[path] = data[:-1] 362 return wanted
363
364 -def _copy_files(alg, wanted, source, target):
365 """Scan for files under 'source'. For each one: 366 If it is in wanted and has the right details (or they can be fixed; e.g. mtime), 367 then copy it into 'target'. 368 If it's not in wanted, warn and skip it. 369 On exit, wanted contains only files that were not found.""" 370 from logging import warn 371 dir = '' 372 for line in alg.generate_manifest(source): 373 if line[0] == 'D': 374 type, name = line.split(' ', 1) 375 assert name.startswith('/') 376 dir = name[1:] 377 path = dir 378 elif line[0] == 'S': 379 type, actual_digest, actual_size, name = line.split(' ', 3) 380 path = os.path.join(dir, name) 381 else: 382 assert line[0] in 'XF' 383 type, actual_digest, actual_mtime, actual_size, name = line.split(' ', 4) 384 path = os.path.join(dir, name) 385 try: 386 required_details = wanted.pop(path) 387 except KeyError: 388 warn(_("Skipping file not in manifest: '%s'"), path) 389 continue 390 if required_details[0] != type: 391 raise BadDigest(_("Item '%s' has wrong type!") % path) 392 if type == 'D': 393 os.mkdir(os.path.join(target, path)) 394 elif type in 'XF': 395 required_type, required_digest, required_mtime, required_size = required_details 396 if required_size != actual_size: 397 raise SafeException(_("File '%(path)s' has wrong size (%(actual_size)s bytes, but should be " 398 "%(required_size)s according to manifest)") % 399 {'path': path, 'actual_size': actual_size, 'required_size': required_size}) 400 required_mtime = int(required_mtime) 401 dest_path = os.path.join(target, path) 402 if type == 'X': 403 mode = 0o555 404 else: 405 mode = 0o444 406 copy_with_verify(os.path.join(source, path), 407 dest_path, 408 mode, 409 alg, 410 required_digest) 411 os.utime(dest_path, (required_mtime, required_mtime)) 412 elif type == 'S': 413 required_type, required_digest, required_size = required_details 414 if required_size != actual_size: 415 raise SafeException(_("Symlink '%(path)s' has wrong size (%(actual_size)s bytes, but should be " 416 "%(required_size)s according to manifest)") % 417 {'path': path, 'actual_size': actual_size, 'required_size': required_size}) 418 symlink_target = os.readlink(os.path.join(source, path)) 419 symlink_digest = alg.new_digest() 420 symlink_digest.update(symlink_target) 421 if symlink_digest.hexdigest() != required_digest: 422 raise SafeException(_("Symlink '%(path)s' has wrong target (digest should be " 423 "%(digest)s according to manifest)") % {'path': path, 'digest': required_digest}) 424 dest_path = os.path.join(target, path) 425 os.symlink(symlink_target, dest_path) 426 else: 427 raise SafeException(_("Unknown manifest type %(type)s for '%(path)s'") % {'type': type, 'path': path})
428
429 -class HashLibAlgorithm(Algorithm):
430 new_digest = None # Constructor for digest objects 431
432 - def __init__(self, name, rating):
433 if name == 'sha1': 434 self.new_digest = sha1_new 435 self.name = 'sha1new' 436 else: 437 self.new_digest = getattr(hashlib, name) 438 self.name = name 439 self.rating = rating
440
441 - def generate_manifest(self, root):
442 def recurse(sub): 443 # To ensure that a line-by-line comparison of the manifests 444 # is possible, we require that filenames don't contain newlines. 445 # Otherwise, you can name a file so that the part after the \n 446 # would be interpreted as another line in the manifest. 447 if '\n' in sub: raise BadDigest(_("Newline in filename '%s'") % sub) 448 assert sub.startswith('/') 449 450 full = os.path.join(root, sub[1:]) 451 info = os.lstat(full) 452 new_digest = self.new_digest 453 454 m = info.st_mode 455 if not stat.S_ISDIR(m): raise Exception(_('Not a directory: "%s"') % full) 456 if sub != '/': 457 yield "D %s" % sub 458 items = os.listdir(full) 459 items.sort() 460 dirs = [] 461 for leaf in items: 462 path = os.path.join(root, sub[1:], leaf) 463 info = os.lstat(path) 464 m = info.st_mode 465 466 if stat.S_ISREG(m): 467 if leaf == '.manifest': continue 468 469 d = new_digest(open(path).read()).hexdigest() 470 if m & 0o111: 471 yield "X %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf) 472 else: 473 yield "F %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf) 474 elif stat.S_ISLNK(m): 475 target = os.readlink(path) 476 d = new_digest(target).hexdigest() 477 # Note: Can't use utime on symlinks, so skip mtime 478 # Note: eCryptfs may report length as zero, so count ourselves instead 479 yield "S %s %s %s" % (d, len(target), leaf) 480 elif stat.S_ISDIR(m): 481 dirs.append(leaf) 482 else: 483 raise SafeException(_("Unknown object '%s' (not a file, directory or symlink)") % 484 path) 485 486 if not sub.endswith('/'): 487 sub += '/' 488 for x in dirs: 489 # Note: "sub" is always Unix style. Don't use os.path.join here. 490 for y in recurse(sub + x): yield y 491 return
492 493 for x in recurse('/'): yield x
494
495 - def getID(self, digest):
496 return self.name + '=' + digest.hexdigest()
497 498 algorithms = { 499 'sha1': OldSHA1(), 500 'sha1new': HashLibAlgorithm('sha1', 50), 501 } 502 503 if hashlib is not None: 504 algorithms['sha256'] = HashLibAlgorithm('sha256', 80) 505
506 -def fixup_permissions(root):
507 """Set permissions recursively for children of root: 508 - If any X bit is set, they all must be. 509 - World readable, non-writable. 510 @raise Exception: if there are unsafe special bits set (setuid, etc).""" 511 512 for main, dirs, files in os.walk(root): 513 for x in ['.'] + files: 514 full = os.path.join(main, x) 515 516 raw_mode = os.lstat(full).st_mode 517 if stat.S_ISLNK(raw_mode): continue 518 519 mode = stat.S_IMODE(raw_mode) 520 if mode & ~0o777: 521 raise Exception(_("Unsafe mode: extracted file '%(filename)s' had special bits set in mode '%(mode)s'") % {'filename': full, 'mode': oct(mode)}) 522 if mode & 0o111: 523 os.chmod(full, 0o555) 524 else: 525 os.chmod(full, 0o444)
526