This file is indexed.

/usr/share/pyshared/VMDKstream.py is in vmdk-stream-converter 0.2-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
#!/usr/bin/env python
# encoding: utf-8

# Copyright (C) 2011 Red Hat, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA  02110-1301, USA.  A copy of the GNU General Public License is
# also available at http://www.gnu.org/copyleft/gpl.html.
#
# First cut module to convert raw disk images into stream-optimized VMDK files
#
# See the "Specification Document" referenced in Wikipedia for more details:
#
# http://en.wikipedia.org/wiki/VMDK
#
# Divergence from spec noted below
#
# The stream-optimized format is required for importing images via the vSphere
# SOAP API

import struct
import sys
import os
import math
import string
import zlib

class VMDKStreamException(Exception):
    def __init__(self, msg):
        self.msg = msg
    def __str__(self):
        return self.msg

# Header Constants
MAGIC_NUMBER = 0x564D444B # 'V' 'M' 'D' 'K'

# Marker Constants
MARKER_EOS = 0 # end of stream
MARKER_GT = 1 # grain table
MARKER_GD = 2 # grain directory
MARKER_FOOTER = 3 # footer (repeat of header with final info)

# Other Constants
SECTOR_SIZE = 512

# Descriptor Template
image_descriptor_template='''# Description file created by VMDK stream converter
version=1
# Believe this is random
CID=7e5b80a7
# Indicates no parent
parentCID=ffffffff
createType="streamOptimized"

# Extent description
RDONLY #SECTORS# SPARSE "call-me-stream.vmdk"

# The Disk Data Base
#DDB

ddb.adapterType = "lsilogic"
# #SECTORS# / 63 / 255 rounded up
ddb.geometry.cylinders = "#CYLINDERS#"
ddb.geometry.heads = "255"
ddb.geometry.sectors = "63"
# Believe this is random
ddb.longContentID = "8f15b3d0009d9a3f456ff7b28d324d2a"
ddb.virtualHWVersion = "7"'''


def create_sparse_header(inFileSectors, descriptorSize,
                         gdOffset = 0xFFFFFFFFFFFFFFFF):
    # While theoretically variable we set these based on current VMWare
    # convention
    grainSize = 128
    numGTEsPerGT = 512
    overHead = 128
    formatVersion = 3 # NOTE: Conflicts with VMWare docs - determined by trial/error

    descriptorOffset = 1

    # The following are always fixed in the "stream-optimized" format we are
    # creating
    compressAlgorithm = 1
    flags = 0x30001
    rgdOffset = 0

    # We are building from scratch so an unclean shutdown is not possible
    uncleanShutdown = 0

    # Build the struct
    header_list = [ MAGIC_NUMBER, formatVersion, flags, inFileSectors,
                    grainSize, descriptorOffset, descriptorSize, numGTEsPerGT,
                    rgdOffset, gdOffset, overHead, uncleanShutdown,
                    '\n', ' ', '\r', '\n', compressAlgorithm ]
    for i in range(433):
	header_list.append(0)
    header_struct = "=IIIQQQQIQQQBccccH433B"
    return struct.pack(header_struct, *header_list)

def create_marker(numSectors, size, marker_type):
    marker_list = [ numSectors, size, marker_type ]
    for i in range(496):
	marker_list.append(0)
    marker_struct = "=QII496B"
    return struct.pack(marker_struct, *marker_list)

def create_grain_marker(location, size):
    # The grain marker is special in that the data follows immediately after it
    # without a pad
    return struct.pack("=QI", location, size)

def divro(num, den):
    # Divide always rounding up and returning an integer
    # Is there some nicer way to do this?
    return int(math.ceil((1.0*num)/(1.0*den)))

def pad_to_sector(stringlen):
    # create a pad that, when concated onto a string of stringlen
    # makes it an integer number of sectors
    # return pad and length of input_string + pad in sectors
    pad = ""
    if stringlen % SECTOR_SIZE:
        # This does need padding
        for i in range(SECTOR_SIZE - (stringlen % SECTOR_SIZE)):
            pad += '\0'
    finallen = (stringlen + len(pad))/SECTOR_SIZE
    return pad, finallen

def sector_pointer(file_object):
    # return file point in sectors
    # raise an exception if not sector aligned
    file_location = file_object.tell()
    if file_location % SECTOR_SIZE:
        raise VMDKStreamException("Asked for a sector pointer on a file whose r/w pointer is not sector aligned")
    else:
        return file_location / SECTOR_SIZE


def write_grain_table(outfile, grain_table, gtes_per_gt = 512):
    # Write grain_table to outfile including header
    # return the sector on which the table starts

    zero_grain_table = [ ]
    for i in range(gtes_per_gt):
        zero_grain_table.append(0)

    if grain_table == zero_grain_table:
        # We don't need to write this and can put zeros in the directory
        return 0
    else:
        grain_table_marker = create_marker(numSectors = (gtes_per_gt * 4) / SECTOR_SIZE,
                                           size = 0, marker_type = MARKER_GT)
        outfile.write(grain_table_marker)
        table_location = sector_pointer(outfile)
        outfile.write(struct.pack("%dI" % gtes_per_gt, *grain_table))
        return table_location

def debug_print(message):
    #print message
    pass

def convert_to_stream(infilename, outfilename):
    debug_print("DEBUG: opening %s to write to %s" % (infilename, outfilename))

    infileSize = os.path.getsize(infilename)
    infileSectors = divro(infileSize, 512)
    debug_print("DEBUG: input file is (%s) bytes - (%s) sectors long" % (infileSize, infileSectors))

    # Fixed by convention
    # TODO: Make variable here and in header fuction
    grainSectors=128
    totalGrains=divro(infileSectors, grainSectors)
    debug_print("DEBUG: total grains will be (%s)" % (totalGrains))

    # Fixed by convention
    # TODO: Make variable here and in header fuction
    numGTEsPerGT = 512
    totalGrainTables=divro(totalGrains, numGTEsPerGT)
    debug_print("DEBUG: total Grain Tables needed will be (%s)" % (totalGrainTables))

    grainDirectorySectors=divro(totalGrainTables*4, SECTOR_SIZE)
    debug_print("DEBUG: sectors in Grain Directory will be (%s)" % (grainDirectorySectors))

    grainDirectoryEntries=grainDirectorySectors*128
    debug_print("DEBUG: Number of entries in Grain Directory - (%s)" % (grainDirectoryEntries))

    infileCylinders=divro(infileSectors, (63*255))
    debug_print("DEBUG: Cylinders (%s)" % infileCylinders)

    # Populate descriptor
    tmpl = image_descriptor_template
    tmpl = string.replace(tmpl, "#SECTORS#", str(infileSectors))
    tmpl = string.replace(tmpl, "#CYLINDERS#", str(infileCylinders))
    image_descriptor = tmpl

    image_descriptor_pad, desc_sectors = pad_to_sector(len(image_descriptor))
    debug_print("DEBUG: Descriptor takes up (%s) sectors" % desc_sectors)
    image_descriptor += image_descriptor_pad

    image_header = create_sparse_header(inFileSectors = infileSectors,
                                        descriptorSize = desc_sectors)

    outfile = open(outfilename, "wb")
    outfile.write(image_header)
    outfile.write(image_descriptor)

    # Fixed by convention
    # TODO: Make variable here and in header function
    overHead = 128

    # Pad the output file to fill the overHead
    for i in range((overHead-sector_pointer(outfile)) * SECTOR_SIZE):
        outfile.write('\0')

    # grainDirectory - list of integers representing the global level 0 grain
    # directory
    grainDirectory = [ ]

    # currentGrainTable - list that can grow to numGTEsPerGT integers
    # representing the active grain table
    currentGrainTable = [ ]

    # For slightly more efficient comparison
    grainSize = grainSectors * SECTOR_SIZE
    zeroChunk = ""
    for i in range(grainSize):
        zeroChunk += '\0'

    # We are ready to start reading
    infile = open(infilename, "rb")

    try:
        inputSectorPointer = sector_pointer(infile)
        inChunk = infile.read(grainSize)
        while inChunk != "":
            if inChunk == zeroChunk:
                # All zeros - no need to create a grain - just mark zero in GTE
                currentGrainTable.append(0)
            else:
                # Create a compressed grain
                currentGrainTable.append(sector_pointer(outfile))
		compChunk = zlib.compress(inChunk)
                grain_marker = create_grain_marker(inputSectorPointer,
                                                   len(compChunk))
		grainPad, writeSectors = pad_to_sector(len(compChunk) + len(grain_marker))
		outfile.write(grain_marker)
		outfile.write(compChunk)
		outfile.write(grainPad)

            if len(currentGrainTable) == numGTEsPerGT:
                # Table is full
                table_location =  write_grain_table(outfile, currentGrainTable,
                                                    gtes_per_gt = numGTEsPerGT)
                # function does zero check so we don't have to
                grainDirectory.append(table_location)
                currentGrainTable = [ ]
            # do not update pointer unless we read a full grain last time
            # incomplete grain read indicates EOF and may result in non-sector
            # alignment
            if len(inChunk) == grainSize:
                inputSectorPointer = sector_pointer(infile)
            # read the next chunk
            inChunk = infile.read(grainSize)
    finally:
        # Write out the final grain table if needed
        if len(currentGrainTable):
            debug_print("Partial grain table present - padding and adding it to dir")
            for i in range(numGTEsPerGT-len(currentGrainTable)):
                currentGrainTable.append(0)
            table_location = write_grain_table(outfile, currentGrainTable,
                                               gtes_per_gt = numGTEsPerGT)
            grainDirectory.append(table_location)
        else:
            debug_print("Current grain table is empty so we need not write it out")

        # pad out grain directory then write it
        for i in range(grainDirectoryEntries - totalGrainTables):
            grainDirectory.append(0)
        grain_directory_marker = create_marker(grainDirectorySectors, 0,
                                               MARKER_GD)
        outfile.write(grain_directory_marker)
        gdLocation = sector_pointer(outfile)
        grainDirectoryStruct = "%dI" % grainDirectoryEntries
        debug_print("Grain directory length (%d)" % (len(grainDirectory)))
        debug_print("Grain directory: ")
        debug_print(grainDirectory)
        outfile.write(struct.pack(grainDirectoryStruct, *grainDirectory))

        # footer marker
        outfile.write(create_marker(1, 0, MARKER_FOOTER))

        # footer
        footer = create_sparse_header(inFileSectors = infileSectors,
                                      descriptorSize = desc_sectors,
                                      gdOffset = gdLocation)
        outfile.write(footer)

        # EOS marker
        outfile.write(create_marker(0, 0, MARKER_EOS))
        outfile.close()
        infile.close()

if __name__ == '__main__':
    convert_to_stream(sys.argv[1], sys.argv[2])