/usr/share/pyshared/bzrlib/vf_search.py is in python-bzrlib 2.5.0-2ubuntu2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 | # Copyright (C) 2007-2011 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
"""Searching in versioned file repositories."""
from __future__ import absolute_import
from bzrlib import (
debug,
revision,
trace,
)
from bzrlib.graph import (
DictParentsProvider,
Graph,
invert_parent_map,
)
class AbstractSearchResult(object):
"""The result of a search, describing a set of keys.
Search results are typically used as the 'fetch_spec' parameter when
fetching revisions.
:seealso: AbstractSearch
"""
def get_recipe(self):
"""Return a recipe that can be used to replay this search.
The recipe allows reconstruction of the same results at a later date.
:return: A tuple of `(search_kind_str, *details)`. The details vary by
kind of search result.
"""
raise NotImplementedError(self.get_recipe)
def get_network_struct(self):
"""Return a tuple that can be transmitted via the HPSS protocol."""
raise NotImplementedError(self.get_network_struct)
def get_keys(self):
"""Return the keys found in this search.
:return: A set of keys.
"""
raise NotImplementedError(self.get_keys)
def is_empty(self):
"""Return false if the search lists 1 or more revisions."""
raise NotImplementedError(self.is_empty)
def refine(self, seen, referenced):
"""Create a new search by refining this search.
:param seen: Revisions that have been satisfied.
:param referenced: Revision references observed while satisfying some
of this search.
:return: A search result.
"""
raise NotImplementedError(self.refine)
class AbstractSearch(object):
"""A search that can be executed, producing a search result.
:seealso: AbstractSearchResult
"""
def execute(self):
"""Construct a network-ready search result from this search description.
This may take some time to search repositories, etc.
:return: A search result (an object that implements
AbstractSearchResult's API).
"""
raise NotImplementedError(self.execute)
class SearchResult(AbstractSearchResult):
"""The result of a breadth first search.
A SearchResult provides the ability to reconstruct the search or access a
set of the keys the search found.
"""
def __init__(self, start_keys, exclude_keys, key_count, keys):
"""Create a SearchResult.
:param start_keys: The keys the search started at.
:param exclude_keys: The keys the search excludes.
:param key_count: The total number of keys (from start to but not
including exclude).
:param keys: The keys the search found. Note that in future we may get
a SearchResult from a smart server, in which case the keys list is
not necessarily immediately available.
"""
self._recipe = ('search', start_keys, exclude_keys, key_count)
self._keys = frozenset(keys)
def __repr__(self):
kind, start_keys, exclude_keys, key_count = self._recipe
if len(start_keys) > 5:
start_keys_repr = repr(list(start_keys)[:5])[:-1] + ', ...]'
else:
start_keys_repr = repr(start_keys)
if len(exclude_keys) > 5:
exclude_keys_repr = repr(list(exclude_keys)[:5])[:-1] + ', ...]'
else:
exclude_keys_repr = repr(exclude_keys)
return '<%s %s:(%s, %s, %d)>' % (self.__class__.__name__,
kind, start_keys_repr, exclude_keys_repr, key_count)
def get_recipe(self):
"""Return a recipe that can be used to replay this search.
The recipe allows reconstruction of the same results at a later date
without knowing all the found keys. The essential elements are a list
of keys to start and to stop at. In order to give reproducible
results when ghosts are encountered by a search they are automatically
added to the exclude list (or else ghost filling may alter the
results).
:return: A tuple ('search', start_keys_set, exclude_keys_set,
revision_count). To recreate the results of this search, create a
breadth first searcher on the same graph starting at start_keys.
Then call next() (or next_with_ghosts()) repeatedly, and on every
result, call stop_searching_any on any keys from the exclude_keys
set. The revision_count value acts as a trivial cross-check - the
found revisions of the new search should have as many elements as
revision_count. If it does not, then additional revisions have been
ghosted since the search was executed the first time and the second
time.
"""
return self._recipe
def get_network_struct(self):
start_keys = ' '.join(self._recipe[1])
stop_keys = ' '.join(self._recipe[2])
count = str(self._recipe[3])
return (self._recipe[0], '\n'.join((start_keys, stop_keys, count)))
def get_keys(self):
"""Return the keys found in this search.
:return: A set of keys.
"""
return self._keys
def is_empty(self):
"""Return false if the search lists 1 or more revisions."""
return self._recipe[3] == 0
def refine(self, seen, referenced):
"""Create a new search by refining this search.
:param seen: Revisions that have been satisfied.
:param referenced: Revision references observed while satisfying some
of this search.
"""
start = self._recipe[1]
exclude = self._recipe[2]
count = self._recipe[3]
keys = self.get_keys()
# New heads = referenced + old heads - seen things - exclude
pending_refs = set(referenced)
pending_refs.update(start)
pending_refs.difference_update(seen)
pending_refs.difference_update(exclude)
# New exclude = old exclude + satisfied heads
seen_heads = start.intersection(seen)
exclude.update(seen_heads)
# keys gets seen removed
keys = keys - seen
# length is reduced by len(seen)
count -= len(seen)
return SearchResult(pending_refs, exclude, count, keys)
class PendingAncestryResult(AbstractSearchResult):
"""A search result that will reconstruct the ancestry for some graph heads.
Unlike SearchResult, this doesn't hold the complete search result in
memory, it just holds a description of how to generate it.
"""
def __init__(self, heads, repo):
"""Constructor.
:param heads: an iterable of graph heads.
:param repo: a repository to use to generate the ancestry for the given
heads.
"""
self.heads = frozenset(heads)
self.repo = repo
def __repr__(self):
if len(self.heads) > 5:
heads_repr = repr(list(self.heads)[:5])[:-1]
heads_repr += ', <%d more>...]' % (len(self.heads) - 5,)
else:
heads_repr = repr(self.heads)
return '<%s heads:%s repo:%r>' % (
self.__class__.__name__, heads_repr, self.repo)
def get_recipe(self):
"""Return a recipe that can be used to replay this search.
The recipe allows reconstruction of the same results at a later date.
:seealso SearchResult.get_recipe:
:return: A tuple ('proxy-search', start_keys_set, set(), -1)
To recreate this result, create a PendingAncestryResult with the
start_keys_set.
"""
return ('proxy-search', self.heads, set(), -1)
def get_network_struct(self):
parts = ['ancestry-of']
parts.extend(self.heads)
return parts
def get_keys(self):
"""See SearchResult.get_keys.
Returns all the keys for the ancestry of the heads, excluding
NULL_REVISION.
"""
return self._get_keys(self.repo.get_graph())
def _get_keys(self, graph):
NULL_REVISION = revision.NULL_REVISION
keys = [key for (key, parents) in graph.iter_ancestry(self.heads)
if key != NULL_REVISION and parents is not None]
return keys
def is_empty(self):
"""Return false if the search lists 1 or more revisions."""
if revision.NULL_REVISION in self.heads:
return len(self.heads) == 1
else:
return len(self.heads) == 0
def refine(self, seen, referenced):
"""Create a new search by refining this search.
:param seen: Revisions that have been satisfied.
:param referenced: Revision references observed while satisfying some
of this search.
"""
referenced = self.heads.union(referenced)
return PendingAncestryResult(referenced - seen, self.repo)
class EmptySearchResult(AbstractSearchResult):
"""An empty search result."""
def is_empty(self):
return True
class EverythingResult(AbstractSearchResult):
"""A search result that simply requests everything in the repository."""
def __init__(self, repo):
self._repo = repo
def __repr__(self):
return '%s(%r)' % (self.__class__.__name__, self._repo)
def get_recipe(self):
raise NotImplementedError(self.get_recipe)
def get_network_struct(self):
return ('everything',)
def get_keys(self):
if 'evil' in debug.debug_flags:
from bzrlib import remote
if isinstance(self._repo, remote.RemoteRepository):
# warn developers (not users) not to do this
trace.mutter_callsite(
2, "EverythingResult(RemoteRepository).get_keys() is slow.")
return self._repo.all_revision_ids()
def is_empty(self):
# It's ok for this to wrongly return False: the worst that can happen
# is that RemoteStreamSource will initiate a get_stream on an empty
# repository. And almost all repositories are non-empty.
return False
def refine(self, seen, referenced):
heads = set(self._repo.all_revision_ids())
heads.difference_update(seen)
heads.update(referenced)
return PendingAncestryResult(heads, self._repo)
class EverythingNotInOther(AbstractSearch):
"""Find all revisions in that are in one repo but not the other."""
def __init__(self, to_repo, from_repo, find_ghosts=False):
self.to_repo = to_repo
self.from_repo = from_repo
self.find_ghosts = find_ghosts
def execute(self):
return self.to_repo.search_missing_revision_ids(
self.from_repo, find_ghosts=self.find_ghosts)
class NotInOtherForRevs(AbstractSearch):
"""Find all revisions missing in one repo for a some specific heads."""
def __init__(self, to_repo, from_repo, required_ids, if_present_ids=None,
find_ghosts=False, limit=None):
"""Constructor.
:param required_ids: revision IDs of heads that must be found, or else
the search will fail with NoSuchRevision. All revisions in their
ancestry not already in the other repository will be included in
the search result.
:param if_present_ids: revision IDs of heads that may be absent in the
source repository. If present, then their ancestry not already
found in other will be included in the search result.
:param limit: maximum number of revisions to fetch
"""
self.to_repo = to_repo
self.from_repo = from_repo
self.find_ghosts = find_ghosts
self.required_ids = required_ids
self.if_present_ids = if_present_ids
self.limit = limit
def __repr__(self):
if len(self.required_ids) > 5:
reqd_revs_repr = repr(list(self.required_ids)[:5])[:-1] + ', ...]'
else:
reqd_revs_repr = repr(self.required_ids)
if self.if_present_ids and len(self.if_present_ids) > 5:
ifp_revs_repr = repr(list(self.if_present_ids)[:5])[:-1] + ', ...]'
else:
ifp_revs_repr = repr(self.if_present_ids)
return ("<%s from:%r to:%r find_ghosts:%r req'd:%r if-present:%r"
"limit:%r>") % (
self.__class__.__name__, self.from_repo, self.to_repo,
self.find_ghosts, reqd_revs_repr, ifp_revs_repr,
self.limit)
def execute(self):
return self.to_repo.search_missing_revision_ids(
self.from_repo, revision_ids=self.required_ids,
if_present_ids=self.if_present_ids, find_ghosts=self.find_ghosts,
limit=self.limit)
def search_result_from_parent_map(parent_map, missing_keys):
"""Transform a parent_map into SearchResult information."""
if not parent_map:
# parent_map is empty or None, simple search result
return [], [], 0
# start_set is all the keys in the cache
start_set = set(parent_map)
# result set is all the references to keys in the cache
result_parents = set()
for parents in parent_map.itervalues():
result_parents.update(parents)
stop_keys = result_parents.difference(start_set)
# We don't need to send ghosts back to the server as a position to
# stop either.
stop_keys.difference_update(missing_keys)
key_count = len(parent_map)
if (revision.NULL_REVISION in result_parents
and revision.NULL_REVISION in missing_keys):
# If we pruned NULL_REVISION from the stop_keys because it's also
# in our cache of "missing" keys we need to increment our key count
# by 1, because the reconsitituted SearchResult on the server will
# still consider NULL_REVISION to be an included key.
key_count += 1
included_keys = start_set.intersection(result_parents)
start_set.difference_update(included_keys)
return start_set, stop_keys, key_count
def _run_search(parent_map, heads, exclude_keys):
"""Given a parent map, run a _BreadthFirstSearcher on it.
Start at heads, walk until you hit exclude_keys. As a further improvement,
watch for any heads that you encounter while walking, which means they were
not heads of the search.
This is mostly used to generate a succinct recipe for how to walk through
most of parent_map.
:return: (_BreadthFirstSearcher, set(heads_encountered_by_walking))
"""
g = Graph(DictParentsProvider(parent_map))
s = g._make_breadth_first_searcher(heads)
found_heads = set()
while True:
try:
next_revs = s.next()
except StopIteration:
break
for parents in s._current_parents.itervalues():
f_heads = heads.intersection(parents)
if f_heads:
found_heads.update(f_heads)
stop_keys = exclude_keys.intersection(next_revs)
if stop_keys:
s.stop_searching_any(stop_keys)
for parents in s._current_parents.itervalues():
f_heads = heads.intersection(parents)
if f_heads:
found_heads.update(f_heads)
return s, found_heads
def _find_possible_heads(parent_map, tip_keys, depth):
"""Walk backwards (towards children) through the parent_map.
This finds 'heads' that will hopefully succinctly describe our search
graph.
"""
child_map = invert_parent_map(parent_map)
heads = set()
current_roots = tip_keys
walked = set(current_roots)
while current_roots and depth > 0:
depth -= 1
children = set()
children_update = children.update
for p in current_roots:
# Is it better to pre- or post- filter the children?
try:
children_update(child_map[p])
except KeyError:
heads.add(p)
# If we've seen a key before, we don't want to walk it again. Note that
# 'children' stays relatively small while 'walked' grows large. So
# don't use 'difference_update' here which has to walk all of 'walked'.
# '.difference' is smart enough to walk only children and compare it to
# walked.
children = children.difference(walked)
walked.update(children)
current_roots = children
if current_roots:
# We walked to the end of depth, so these are the new tips.
heads.update(current_roots)
return heads
def limited_search_result_from_parent_map(parent_map, missing_keys, tip_keys,
depth):
"""Transform a parent_map that is searching 'tip_keys' into an
approximate SearchResult.
We should be able to generate a SearchResult from a given set of starting
keys, that covers a subset of parent_map that has the last step pointing at
tip_keys. This is to handle the case that really-long-searches shouldn't be
started from scratch on each get_parent_map request, but we *do* want to
filter out some of the keys that we've already seen, so we don't get
information that we already know about on every request.
The server will validate the search (that starting at start_keys and
stopping at stop_keys yields the exact key_count), so we have to be careful
to give an exact recipe.
Basic algorithm is:
1) Invert parent_map to get child_map (todo: have it cached and pass it
in)
2) Starting at tip_keys, walk towards children for 'depth' steps.
3) At that point, we have the 'start' keys.
4) Start walking parent_map from 'start' keys, counting how many keys
are seen, and generating stop_keys for anything that would walk
outside of the parent_map.
:param parent_map: A map from {child_id: (parent_ids,)}
:param missing_keys: parent_ids that we know are unavailable
:param tip_keys: the revision_ids that we are searching
:param depth: How far back to walk.
"""
if not parent_map:
# No search to send, because we haven't done any searching yet.
return [], [], 0
heads = _find_possible_heads(parent_map, tip_keys, depth)
s, found_heads = _run_search(parent_map, heads, set(tip_keys))
start_keys, exclude_keys, keys = s.get_state()
if found_heads:
# Anything in found_heads are redundant start_keys, we hit them while
# walking, so we can exclude them from the start list.
start_keys = set(start_keys).difference(found_heads)
return start_keys, exclude_keys, len(keys)
|