/usr/share/pyshared/Codeville/upgrade.py

import binascii
from Codeville.bencode import bdecode, bencode
from Codeville.client_helpers import create_handle, gen_diff
from Codeville.DFS import DFS
from Codeville.history import sync_history, write_changeset
from Codeville.history import roothandle, rootnode
from Codeville.history import read_diff, write_diff, write_index
from Codeville.history import handle_contents_at_point
from Codeville.history import handle_name_at_point
from Codeville.history import HistoryError
from Codeville.old.history import handle_contents_at_point as old_handle_contents_at_point
from Codeville.old.history import handle_name_at_point as old_handle_name_at_point
import copy
import hashlib
from sys import stdout
import zlib

class UpgradeRepository:
    def __init__(self, old_repo, new_repo, txn):
        self.point_map       = {}
        self.handle_map      = {}
        self.all_old_handles = {}

        self.old_repo = old_repo
        self.new_repo = new_repo
        self.txn      = txn
        return

    def sort_history(self, handle_list):
        history_dfs = DFS(self._history_deps, [self.old_repo])
        for point in handle_list:
            history_dfs.search(point)
        return history_dfs.result()

    def _history_deps(node, args):
        co = args[0]

        cset = bdecode(co.lcrepo.get(node))
        cset['precursors'].reverse()

        return cset['precursors']

    _history_deps = staticmethod(_history_deps)

    def sort_names(self, handles):
        name_dfs = DFS(self._name_deps, [handles])
        for old_handle in handles.keys():
            name_dfs.search(old_handle)
        return name_dfs.result()

    def _name_deps(node, args):
        handles = args[0]

        if handles.has_key(node) and handles[node].has_key('parent'):
            parent = handles[node]['parent']
            if handles.has_key(parent) and handles[parent].has_key('name'):
                return [parent]

        return []

    _name_deps = staticmethod(_name_deps)

    def clean_merges(self, UR, dagdb, point):
        clean_merges = {}
        handles      = []
        for handle in UR.all_old_handles.keys():
            if not dagdb.has_key(handle + point):
                continue

            hinfo = bdecode(dagdb.get(handle + point))
            if hinfo.has_key('handle'):
                continue

            if len(hinfo['precursors']) <= 1:
                continue

            clean_merges[handle] = 1
            handles.append(handle)

        return clean_merges, handles


def upgrade(old_repo, new_repo, changes, txn):
    UR = UpgradeRepository(old_repo, new_repo, txn)

    for old_handle in old_repo.staticdb.keys():
        hinfo = bdecode(old_repo.staticdb.get(old_handle))
        if hinfo['type'] == 'file':
            UR.all_old_handles[old_handle] = hinfo

    # sort the history
    ordering = UR.sort_history(changes)

    # sort again for better dag construction
    ordering.reverse()
    ordering = UR.sort_history(ordering)

    assert rootnode == ordering[0]

    print "%d changesets to convert" % (len(ordering), )

    for point in ordering:
        new_point = convert_cset(UR, point)

        stdout.write('.')
        stdout.flush()

    return UR

def convert_cset(UR, point):
    indices = {}

    old_cset = bdecode(UR.old_repo.lcrepo.get(point))

    new_cset = {}
    new_cset['precursors'] = [UR.point_map[pre] for pre in old_cset['precursors']]

    if old_cset.has_key('time'):
        new_cset['time'] = old_cset['time']

    if old_cset.has_key('user'):
        new_cset['user'] = old_cset['user']

    # some heuristics for comments and whether this was a server change
    clean_merge = True
    force_new_cset = False

    if old_cset.has_key('comment'):
        clean_merge = False
        new_cset['comment'] = old_cset['comment'].rstrip()
        if len(new_cset['comment']):
            new_cset['comment'] = new_cset['comment'] + '\n'

    elif point == rootnode:
        pass

    elif old_cset['handles'] != {} or len(old_cset['precursors']) != 2:
        clean_merge = False
        new_cset['comment'] = '--- comment inserted by cdvupgrade ---\n'

    # sort the handles
    handle_list = UR.sort_names(old_cset['handles'])

    # find implicit clean content merges
    clean_merges, hl = UR.clean_merges(UR, UR.old_repo.contents.dagdb, point)
    handle_list.extend(hl)

    # find implicit clean name merges
    clean_nmerges, hl = UR.clean_merges(UR, UR.old_repo.names.dagdb, point)
    handle_list.extend(hl)

    new_cset['handles'] = handles = {}
    for old_handle in handle_list:
        old_hinfo = None
        try:
            old_hinfo = old_cset['handles'][old_handle]
        except KeyError:
            old_hinfo = {}

        # not much has changed
        new_hinfo = copy.copy(old_hinfo)

        new_handle = None
        if UR.handle_map.has_key(old_handle):
            new_handle = UR.handle_map[old_handle]

        # make name changes explicit
        if clean_nmerges.has_key(old_handle):
            name = old_handle_name_at_point(UR.old_repo, old_handle, point, None)
            new_hinfo['parent'] = name['parent']
            new_hinfo['name'] = name['name']

        # fixup the parent pointers
        if old_hinfo.has_key('parent'):
            new_hinfo['parent'] = UR.handle_map[old_hinfo['parent']]

        if old_hinfo.has_key('hash') or clean_merges.has_key(old_handle):
            # figure out what the file is supposed to look like now
            lines = old_handle_contents_at_point(UR.old_repo, old_handle, point, None)['lines']

            # if the file is being added, there are no precursors
            precursors = []
            if new_handle is not None and not old_hinfo.has_key('add'):
                precursors = new_cset['precursors']

            # generate the diff against the new repo
            dinfo = gen_diff(UR.new_repo, new_handle, precursors, lines, UR.txn)
            if old_hinfo.has_key('add'):
                dinfo['add'] = 1
                assert dinfo['matches'] == []

            if dinfo is not None:
                diff = bencode(dinfo)
                new_hinfo['hash'] = hashlib.sha1(diff).digest()

                # if this used to be a clean merge, we have to replace it
                if not old_cset.has_key(old_handle) or not old_cset[old_handle].has_key('hash'):
                    force_new_cset = True

            elif new_hinfo.has_key('hash'):
                del new_hinfo['hash']

            # sanity check
            if new_handle is None:
                assert old_hinfo.has_key('add')
                assert old_hinfo['add']['type'] == 'file'

            # if the file is new, we have to create the handle before writing
            # the diff
            if old_hinfo.has_key('add'):
                nhandle = create_handle(new_cset['precursors'], new_hinfo)
                assert new_handle is None or new_handle == nhandle
                new_handle = nhandle
                UR.handle_map[old_handle] = new_handle

            # write out the new diff
            if new_hinfo.has_key('hash'):
                zdiff = zlib.compress(diff, 6)
                indices[new_handle] = write_diff(UR.new_repo, new_handle, zdiff, UR.txn)

        elif old_hinfo.has_key('add'):
            assert old_hinfo['add']['type'] == 'dir'

            nhandle = create_handle(new_cset['precursors'], new_hinfo)
            assert new_handle is None or new_handle == nhandle
            new_handle = nhandle
            UR.handle_map[old_handle] = new_handle

        if new_hinfo != {}:
            handles[new_handle] = new_hinfo

    # if it used to be a clean merge, preserve the line of clean merge heads
    index_point = None
    if clean_merge and force_new_cset:
        forced_cset = new_cset

        forced_cset['comment'] = '--- change created by cdvupgrade ---\n'

        bforced_cset = bencode(forced_cset)
        forced_point = hashlib.sha1(bforced_cset).digest()
        UR.new_repo.lcrepo.put(forced_point, bforced_cset, txn=UR.txn)

        index_point = forced_point

        new_cset = {'precursors': [forced_cset['precursors'][0], forced_point],
                    'user':       forced_cset['user'],
                    'time':       forced_cset['time'],
                    'handles':    {}}

    # calculate the new point name and write it out
    bnew_cset = bencode(new_cset)
    new_point = hashlib.sha1(bnew_cset).digest()
    UR.new_repo.lcrepo.put(new_point, bnew_cset, txn=UR.txn)

    UR.point_map[point] = new_point

    if index_point is None:
        index_point = new_point

    # now that we know the new point name, write out the indices
    for new_handle, index in indices.items():
        write_index(UR.new_repo, index_point, new_handle, index, UR.txn)

    # diff generation depends on history syncing
    named, modified = sync_history(UR.new_repo, new_point, UR.txn)

    for new_handle in modified:
        handle_contents_at_point(UR.new_repo, new_handle, new_point, UR.txn)

    return new_point
codeville 0.8.0-2 / usr / share / pyshared / Codeville / upgrade.py