This file is indexed.

/usr/lib/python2.7/dist-packages/cvs2svn_lib/generate_blobs.py is in cvs2svn 2.4.0-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
#!/usr/bin/env python -u
# (Be in -*- python -*- mode.)
#
# ====================================================================
# Copyright (c) 2009-2010 CollabNet.  All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution.  The terms
# are also available at http://subversion.tigris.org/license-1.html.
# If newer versions of this license are posted there, you may use a
# newer version instead, at your option.
#
# This software consists of voluntary contributions made by many
# individuals.  For exact contribution history, see the revision
# history and logs, available at http://cvs2svn.tigris.org/.
# ====================================================================

"""Generate git blobs directly from RCS files.

Usage: generate_blobs.py BLOBFILE

To standard input should be written a series of pickles, each of which
contains the following tuple:

(RCSFILE, {CVS_REV : MARK, ...})

indicating which RCS file to read, which CVS revisions should be
written to the blob file, and which marks to give each of the blobs.

Since the tuples are read from stdin, either the calling program has
to write to this program's stdin in binary mode and ensure that this
program's standard input is opened in binary mode (e.g., using
Python's '-u' option) or both can be in text mode *provided* that
pickle protocol 0 is used.

The program does most of its work in RAM, keeping at most one revision
fulltext and one revision deltatext (plus perhaps one or two copies as
scratch space) in memory at a time.  But there are times when the
fulltext of a revision is needed multiple times, for example when
multiple branches sprout from the revision.  In these cases, the
fulltext is written to disk.  If the fulltext is also needed for the
blobfile, then the copy in the blobfils is read again when it is
needed.  If the fulltext is not needed in the blobfile, then it is
written to a temporary file created with Python's tempfile module."""

import sys
import os
import tempfile
import cPickle as pickle

sys.path.insert(0, os.path.dirname(os.path.dirname(sys.argv[0])))

from cvs2svn_lib.rcsparser import Sink
from cvs2svn_lib.rcsparser import parse
from cvs2svn_lib.rcs_stream import RCSStream


def read_marks():
  # A map from CVS revision number (e.g., 1.2.3.4) to mark:
  marks = {}
  for l in sys.stdin:
    [rev, mark] = l.strip().split()
    marks[rev] = mark

  return marks


class RevRecord(object):
  def __init__(self, rev, mark=None):
    self.rev = rev
    self.mark = mark

    # The rev whose fulltext is the base for this one's delta.
    self.base = None

    # Other revs that refer to this one as their base text:
    self.refs = set()

    # The (f, offset, length) where the fulltext of this revision can
    # be found:
    self.fulltext = None

  def is_needed(self):
    return bool(self.mark is not None or self.refs)

  def is_written(self):
    return self.fulltext is not None

  def write_blob(self, f, text):
    f.seek(0, 2)
    length = len(text)
    f.write('blob\n')
    f.write('mark :%s\n' % (self.mark,))
    f.write('data %d\n' % (length,))
    offset = f.tell()
    f.write(text)
    f.write('\n')

    self.fulltext = (f, offset, length)

    # This record (with its mark) has now been written, so the mark is
    # no longer needed.  Setting it to None might allow is_needed() to
    # become False:
    self.mark = None

  def write(self, f, text):
    f.seek(0, 2)
    offset = f.tell()
    length = len(text)
    f.write(text)
    self.fulltext = (f, offset, length)

  def read_fulltext(self):
    assert self.fulltext is not None
    (f, offset, length) = self.fulltext
    f.seek(offset)
    return f.read(length)

  def __str__(self):
    if self.mark is not None:
      return '%s (%r): %r, %s' % (
          self.rev, self.mark, self.refs, self.fulltext is not None,
          )
    else:
      return '%s: %r, %s' % (self.rev, self.refs, self.fulltext is not None)


class WriteBlobSink(Sink):
  def __init__(self, blobfile, marks):
    self.blobfile = blobfile

    # A map {rev : RevRecord} for all of the revisions whose fulltext
    # will still be needed:
    self.revrecs = {}

    # The revisions that need marks will definitely be needed, so
    # create records for them now (the rest will be filled in while
    # reading the RCS file):
    for (rev, mark) in marks.items():
      self.revrecs[rev] = RevRecord(rev, mark)

    # The RevRecord of the last fulltext that has been reconstructed,
    # if it still is_needed():
    self.last_revrec = None
    # An RCSStream holding the fulltext of last_revrec:
    self.last_rcsstream = None

    # A file to temporarily hold the fulltexts of revisions for which
    # no blobs are needed:
    self.fulltext_file = tempfile.TemporaryFile()

  def __getitem__(self, rev):
    try:
      return self.revrecs[rev]
    except KeyError:
      revrec = RevRecord(rev)
      self.revrecs[rev] = revrec
      return revrec

  def define_revision(self, rev, timestamp, author, state, branches, next):
    revrec = self[rev]

    if next is not None:
      revrec.refs.add(next)

    revrec.refs.update(branches)

    for dependent_rev in revrec.refs:
      dependent_revrec = self[dependent_rev]
      assert dependent_revrec.base is None
      dependent_revrec.base = rev

  def tree_completed(self):
    """Remove unneeded RevRecords.

    Remove the RevRecords for any revisions whose fulltext will not be
    needed (neither as blob output nor as the base of another needed
    revision)."""

    revrecs_to_remove = [
        revrec
        for revrec in self.revrecs.itervalues()
        if not revrec.is_needed()
        ]
    while revrecs_to_remove:
      revrec = revrecs_to_remove.pop()
      del self.revrecs[revrec.rev]
      base_revrec = self[revrec.base]
      base_revrec.refs.remove(revrec.rev)
      if not base_revrec.is_needed():
        revrecs_to_remove.append(base_revrec)

  def set_revision_info(self, rev, log, text):
    revrec = self.revrecs.get(rev)

    if revrec is None:
      return

    base_rev = revrec.base
    if base_rev is None:
      # This must be the last revision on trunk, for which the
      # fulltext is stored directly in the RCS file:
      assert self.last_revrec is None
      if revrec.mark is not None:
        revrec.write_blob(self.blobfile, text)
      if revrec.is_needed():
        self.last_revrec = revrec
        self.last_rcsstream = RCSStream(text)
    elif self.last_revrec is not None and base_rev == self.last_revrec.rev:
      # Our base revision is stored in self.last_rcsstream.
      self.last_revrec.refs.remove(rev)
      if self.last_revrec.is_needed():
        if not self.last_revrec.is_written():
          self.last_revrec.write(
              self.fulltext_file, self.last_rcsstream.get_text()
              )
      self.last_rcsstream.apply_diff(text)
      if revrec.mark is not None:
        revrec.write_blob(self.blobfile, self.last_rcsstream.get_text())
      if revrec.is_needed():
        self.last_revrec = revrec
      else:
        self.last_revrec = None
        self.last_rcsstream = None
    else:
      # Our base revision is not stored in self.last_rcsstream; it
      # will have to be obtained from elsewhere.

      # Store the old last_rcsstream if necessary:
      if self.last_revrec is not None:
        if not self.last_revrec.is_written():
          self.last_revrec.write(
              self.fulltext_file, self.last_rcsstream.get_text()
              )
        self.last_revrec = None
        self.last_rcsstream = None

      base_revrec = self[base_rev]
      rcsstream = RCSStream(base_revrec.read_fulltext())
      base_revrec.refs.remove(rev)
      rcsstream.apply_diff(text)
      if revrec.mark is not None:
        revrec.write_blob(self.blobfile, rcsstream.get_text())
      if revrec.is_needed():
        self.last_revrec = revrec
        self.last_rcsstream = rcsstream
      del rcsstream

  def parse_completed(self):
    self.fulltext_file.close()


def main(args):
  [blobfilename] = args
  blobfile = open(blobfilename, 'w+b')
  while True:
    try:
      (rcsfile, marks) = pickle.load(sys.stdin)
    except EOFError:
      break
    parse(open(rcsfile, 'rb'), WriteBlobSink(blobfile, marks))

  blobfile.close()


if __name__ == '__main__':
  main(sys.argv[1:])