In iOS 4.0 Apple has redesigned the backup process.
iTunes used to store a list of filenames associated with backup files in the Manifest.plist file, but in iOS 4.0 it has moved this information to a Manifest.mbdb
You can see an example of this file by making a backup with your iOS 4.0 devices and looking in your ~/Library/Application Support/MobileSync/Backup folder (Look inside the subfolders with the most recent date)
Here's a screenshot of what the file looks like in a text editor:
(source: supercrazyawesome.com)
How do I parse this into a Cocoa application so that I can update my (free) iPhone Backup Extractor app (http://supercrazyawesome.com) for iOS 4.0?
For those looking for a Java implementation of a MBDB file reader, there are several out there:
"iPhone Analyzer" project (very clean code): http://sourceforge.net/p/iphoneanalyzer/code/HEAD/tree/trunk/library/src/main/java/com/crypticbit/ipa/io/parser/manifest/Mbdb.java
"iPhone Stalker" project: https://code.google.com/p/iphonestalker/source/browse/trunk/src/iphonestalker/util/io/MBDBReader.java
This python script is awesome.
Here's my Ruby version of it (with minor improvement) and search capabilities. (for iOS 5)
# encoding: utf-8
require 'fileutils'
require 'digest/sha1'
class ManifestParser
def initialize(mbdb_filename, verbose = false)
@verbose = verbose
process_mbdb_file(mbdb_filename)
end
# Returns the numbers of records in the Manifest files.
def record_number
@mbdb.size
end
# Returns a huge string containing the parsing of the Manifest files.
def to_s
s = ''
@mbdb.each do |v|
s += "#{fileinfo_str(v)}\n"
end
s
end
def to_file(filename)
File.open(filename, 'w') do |f|
@mbdb.each do |v|
f.puts fileinfo_str(v)
end
end
end
# Copy the backup files to their real path/name.
# * domain_match Can be a regexp to restrict the files to copy.
# * filename_match Can be a regexp to restrict the files to copy.
def rename_files(domain_match = nil, filename_match = nil)
@mbdb.each do |v|
if v[:type] == '-' # Only rename files.
if (domain_match.nil? or v[:domain] =~ domain_match) and (filename_match.nil? or v[:filename] =~ filename_match)
dst = "#{v[:domain]}/#{v[:filename]}"
puts "Creating: #{dst}"
FileUtils.mkdir_p(File.dirname(dst))
FileUtils.cp(v[:fileID], dst)
end
end
end
end
# Return the filename that math the given regexp.
def search(regexp)
result = Array.new
@mbdb.each do |v|
if "#{v[:domain]}::#{v[:filename]}" =~ regexp
result << v
end
end
result
end
private
# Retrieve an integer (big-endian) and new offset from the current offset
def getint(data, offset, intsize)
value = 0
while intsize > 0
value = (value<<8) + data[offset].ord
offset += 1
intsize -= 1
end
return value, offset
end
# Retrieve a string and new offset from the current offset into the data
def getstring(data, offset)
return '', offset + 2 if data[offset] == 0xFF.chr and data[offset + 1] == 0xFF.chr # Blank string
length, offset = getint(data, offset, 2) # 2-byte length
value = data[offset...(offset + length)]
return value, (offset + length)
end
def process_mbdb_file(filename)
@mbdb = Array.new
data = File.open(filename, 'rb') { |f| f.read }
puts "MBDB file read. Size: #{data.size}"
raise 'This does not look like an MBDB file' if data[0...4] != 'mbdb'
offset = 4
offset += 2 # value x05 x00, not sure what this is
while offset < data.size
fileinfo = Hash.new
fileinfo[:start_offset] = offset
fileinfo[:domain], offset = getstring(data, offset)
fileinfo[:filename], offset = getstring(data, offset)
fileinfo[:linktarget], offset = getstring(data, offset)
fileinfo[:datahash], offset = getstring(data, offset)
fileinfo[:unknown1], offset = getstring(data, offset)
fileinfo[:mode], offset = getint(data, offset, 2)
if (fileinfo[:mode] & 0xE000) == 0xA000 # Symlink
fileinfo[:type] = 'l'
elsif (fileinfo[:mode] & 0xE000) == 0x8000 # File
fileinfo[:type] = '-'
elsif (fileinfo[:mode] & 0xE000) == 0x4000 # Dir
fileinfo[:type] = 'd'
else
# $stderr.puts "Unknown file type %04x for #{fileinfo_str(f, false)}" % f['mode']
fileinfo[:type] = '?'
end
fileinfo[:unknown2], offset = getint(data, offset, 4)
fileinfo[:unknown3], offset = getint(data, offset, 4)
fileinfo[:userid], offset = getint(data, offset, 4)
fileinfo[:groupid], offset = getint(data, offset, 4)
fileinfo[:mtime], offset = getint(data, offset, 4)
fileinfo[:atime], offset = getint(data, offset, 4)
fileinfo[:ctime], offset = getint(data, offset, 4)
fileinfo[:filelen], offset = getint(data, offset, 8)
fileinfo[:flag], offset = getint(data, offset, 1)
fileinfo[:numprops], offset = getint(data, offset, 1)
fileinfo[:properties] = Hash.new
(0...(fileinfo[:numprops])).each do |ii|
propname, offset = getstring(data, offset)
propval, offset = getstring(data, offset)
fileinfo[:properties][propname] = propval
end
# Compute the ID of the file.
fullpath = fileinfo[:domain] + '-' + fileinfo[:filename]
fileinfo[:fileID] = Digest::SHA1.hexdigest(fullpath)
# We add the file to the list of files.
@mbdb << fileinfo
end
@mbdb
end
def modestr(val)
def mode(val)
r = (val & 0x4) ? 'r' : '-'
w = (val & 0x2) ? 'w' : '-'
x = (val & 0x1) ? 'x' : '-'
r + w + x
end
mode(val >> 6) + mode(val >> 3) + mode(val)
end
def fileinfo_str(f)
return "(#{f[:fileID]})#{f[:domain]}::#{f[:filename]}" unless @verbose
data = [f[:type], modestr(f[:mode]), f[:userid], f[:groupid], f[:filelen], f[:mtime], f[:atime], f[:ctime], f[:fileID], f[:domain], f[:filename]]
info = "%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % data
info += ' -> ' + f[:linktarget] if f[:type] == 'l' # Symlink destination
f[:properties].each do |k, v|
info += " #{k}=#{v.inspect}"
end
info
end
end
if __FILE__ == $0
mp = ManifestParser.new 'Manifest.mbdb', true
mp.to_file 'filenames.txt'
end
You can find information and a little description of the MBDB/MBDX format here:
http://code.google.com/p/iphonebackupbrowser/
This is my application to browse the backup files. I have tried to document the format of the new files that come with iTunes 9.2.
In iOS 5, the Manifest.mbdx file was eliminated. For the purpose of this article, it was redundant anyway, because the domain and path are in Manifest.mbdb and the ID hash can be generated with SHA1.
Here is my update of galloglass's code so it works with backups of iOS 5 devices. The only changes are elimination of process_mbdx_file() and addition of a few lines in process_mbdb_file().
Tested with backups of an iPhone 4S and an iPad 1, both with plenty of apps and files.
#!/usr/bin/env python
import sys
import hashlib
mbdx = {}
def getint(data, offset, intsize):
"""Retrieve an integer (big-endian) and new offset from the current offset"""
value = 0
while intsize > 0:
value = (value<<8) + ord(data[offset])
offset = offset + 1
intsize = intsize - 1
return value, offset
def getstring(data, offset):
"""Retrieve a string and new offset from the current offset into the data"""
if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF):
return '', offset+2 # Blank string
length, offset = getint(data, offset, 2) # 2-byte length
value = data[offset:offset+length]
return value, (offset + length)
def process_mbdb_file(filename):
mbdb = {} # Map offset of info in this file => file info
data = open(filename).read()
if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file")
offset = 4
offset = offset + 2 # value x05 x00, not sure what this is
while offset < len(data):
fileinfo = {}
fileinfo['start_offset'] = offset
fileinfo['domain'], offset = getstring(data, offset)
fileinfo['filename'], offset = getstring(data, offset)
fileinfo['linktarget'], offset = getstring(data, offset)
fileinfo['datahash'], offset = getstring(data, offset)
fileinfo['unknown1'], offset = getstring(data, offset)
fileinfo['mode'], offset = getint(data, offset, 2)
fileinfo['unknown2'], offset = getint(data, offset, 4)
fileinfo['unknown3'], offset = getint(data, offset, 4)
fileinfo['userid'], offset = getint(data, offset, 4)
fileinfo['groupid'], offset = getint(data, offset, 4)
fileinfo['mtime'], offset = getint(data, offset, 4)
fileinfo['atime'], offset = getint(data, offset, 4)
fileinfo['ctime'], offset = getint(data, offset, 4)
fileinfo['filelen'], offset = getint(data, offset, 8)
fileinfo['flag'], offset = getint(data, offset, 1)
fileinfo['numprops'], offset = getint(data, offset, 1)
fileinfo['properties'] = {}
for ii in range(fileinfo['numprops']):
propname, offset = getstring(data, offset)
propval, offset = getstring(data, offset)
fileinfo['properties'][propname] = propval
mbdb[fileinfo['start_offset']] = fileinfo
fullpath = fileinfo['domain'] + '-' + fileinfo['filename']
id = hashlib.sha1(fullpath)
mbdx[fileinfo['start_offset']] = id.hexdigest()
return mbdb
def modestr(val):
def mode(val):
if (val & 0x4): r = 'r'
else: r = '-'
if (val & 0x2): w = 'w'
else: w = '-'
if (val & 0x1): x = 'x'
else: x = '-'
return r+w+x
return mode(val>>6) + mode((val>>3)) + mode(val)
def fileinfo_str(f, verbose=False):
if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename'])
if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink
elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file
elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir
else:
print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False))
type = '?' # unknown
info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" %
(type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'],
f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename']))
if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination
for name, value in f['properties'].items(): # extra properties
info = info + ' ' + name + '=' + repr(value)
return info
verbose = True
if __name__ == '__main__':
mbdb = process_mbdb_file("Manifest.mbdb")
for offset, fileinfo in mbdb.items():
if offset in mbdx:
fileinfo['fileID'] = mbdx[offset]
else:
fileinfo['fileID'] = "<nofileID>"
print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
print fileinfo_str(fileinfo, verbose)
I liked galloglas's code, and I changed the main function so that it shows a sorted list of total size by application:
verbose = True
if __name__ == '__main__':
mbdb = process_mbdb_file("Manifest.mbdb")
mbdx = process_mbdx_file("Manifest.mbdx")
sizes = {}
for offset, fileinfo in mbdb.items():
if offset in mbdx:
fileinfo['fileID'] = mbdx[offset]
else:
fileinfo['fileID'] = "<nofileID>"
print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
print fileinfo_str(fileinfo, verbose)
if (fileinfo['mode'] & 0xE000) == 0x8000:
sizes[fileinfo['domain']]= sizes.get(fileinfo['domain'],0) + fileinfo['filelen']
for domain in sorted(sizes, key=sizes.get):
print "%-60s %11d (%dMB)" % (domain, sizes[domain], int(sizes[domain]/1024/1024))
That way you can figure out what application is eating all that space.
I finished my work on this stuff - that is, iOS 4 + iTunes 9.2 update of my backup decoder library for Python - http://www.iki.fi/fingon/iphonebackupdb.py
It does what I need, little documentation, but feel free to copy ideas from there ;-)
(Seems to work fine with my backups at least.)
Thanks to galloglass' answer. The code works great with Python 2.7. There is only one thing I want to metion. When read the manifest.mbdb file, you should use binary mode. Otherwise, not all content are read.
I also made some minor changes to make the code work with Python 3.4. Here is the code.
#!/usr/bin/env python
import sys
import hashlib
mbdx = {}
def getint(data, offset, intsize):
"""Retrieve an integer (big-endian) and new offset from the current offset"""
value = 0
while intsize > 0:
value = (value << 8) + data[offset]
offset = offset + 1
intsize = intsize - 1
return value, offset
def getstring(data, offset):
"""Retrieve a string and new offset from the current offset into the data"""
if chr(data[offset]) == chr(0xFF) and chr(data[offset + 1]) == chr(0xFF):
return '', offset + 2 # Blank string
length, offset = getint(data, offset, 2) # 2-byte length
value = data[offset:offset + length]
return value.decode(encoding='latin-1'), (offset + length)
def process_mbdb_file(filename):
mbdb = {} # Map offset of info in this file => file info
data = open(filename, 'rb').read() # 'b' is needed to read all content at once
if data[0:4].decode() != "mbdb": raise Exception("This does not look like an MBDB file")
offset = 4
offset = offset + 2 # value x05 x00, not sure what this is
while offset < len(data):
fileinfo = {}
fileinfo['start_offset'] = offset
fileinfo['domain'], offset = getstring(data, offset)
fileinfo['filename'], offset = getstring(data, offset)
fileinfo['linktarget'], offset = getstring(data, offset)
fileinfo['datahash'], offset = getstring(data, offset)
fileinfo['unknown1'], offset = getstring(data, offset)
fileinfo['mode'], offset = getint(data, offset, 2)
fileinfo['unknown2'], offset = getint(data, offset, 4)
fileinfo['unknown3'], offset = getint(data, offset, 4)
fileinfo['userid'], offset = getint(data, offset, 4)
fileinfo['groupid'], offset = getint(data, offset, 4)
fileinfo['mtime'], offset = getint(data, offset, 4)
fileinfo['atime'], offset = getint(data, offset, 4)
fileinfo['ctime'], offset = getint(data, offset, 4)
fileinfo['filelen'], offset = getint(data, offset, 8)
fileinfo['flag'], offset = getint(data, offset, 1)
fileinfo['numprops'], offset = getint(data, offset, 1)
fileinfo['properties'] = {}
for ii in range(fileinfo['numprops']):
propname, offset = getstring(data, offset)
propval, offset = getstring(data, offset)
fileinfo['properties'][propname] = propval
mbdb[fileinfo['start_offset']] = fileinfo
fullpath = fileinfo['domain'] + '-' + fileinfo['filename']
id = hashlib.sha1(fullpath.encode())
mbdx[fileinfo['start_offset']] = id.hexdigest()
return mbdb
def modestr(val):
def mode(val):
if (val & 0x4):
r = 'r'
else:
r = '-'
if (val & 0x2):
w = 'w'
else:
w = '-'
if (val & 0x1):
x = 'x'
else:
x = '-'
return r + w + x
return mode(val >> 6) + mode((val >> 3)) + mode(val)
def fileinfo_str(f, verbose=False):
if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename'])
if (f['mode'] & 0xE000) == 0xA000:
type = 'l' # symlink
elif (f['mode'] & 0xE000) == 0x8000:
type = '-' # file
elif (f['mode'] & 0xE000) == 0x4000:
type = 'd' # dir
else:
print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False))
type = '?' # unknown
info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" %
(type, modestr(f['mode'] & 0x0FFF), f['userid'], f['groupid'], f['filelen'],
f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename']))
if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination
for name, value in f['properties'].items(): # extra properties
info = info + ' ' + name + '=' + repr(value)
return info
verbose = True
if __name__ == '__main__':
mbdb = process_mbdb_file(
r"Manifest.mbdb")
for offset, fileinfo in mbdb.items():
if offset in mbdx:
fileinfo['fileID'] = mbdx[offset]
else:
fileinfo['fileID'] = "<nofileID>"
print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
print(fileinfo_str(fileinfo, verbose))
Source: Stackoverflow.com