/usr/share/pyshared/pandas/core/algorithms.py is in python-pandas 0.7.0-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | """
Generic data algorithms. This module is experimental at the moment and not
intended for public consumption
"""
import numpy as np
from pandas.core.series import Series
import pandas.core.common as com
import pandas._tseries as lib
def match(values, index):
"""
Parameters
----------
Returns
-------
match : ndarray
"""
if com.is_float_dtype(index):
return _match_generic(values, index, lib.Float64HashTable,
com._ensure_float64)
elif com.is_integer_dtype(index):
return _match_generic(values, index, lib.Int64HashTable,
com._ensure_int64)
else:
return _match_generic(values, index, lib.PyObjectHashTable,
com._ensure_object)
def _get_hash_table_and_cast(values):
if com.is_float_dtype(values):
klass = lib.Float64HashTable
values = com._ensure_float64(values)
elif com.is_integer_dtype(values):
klass = lib.Int64HashTable
values = com._ensure_int64(values)
else:
klass = lib.PyObjectHashTable
values = com._ensure_object(values)
return klass, values
def count(values, uniques=None):
if uniques is not None:
raise NotImplementedError
else:
if com.is_float_dtype(values):
return _count_generic(values, lib.Float64HashTable,
com._ensure_float64)
elif com.is_integer_dtype(values):
return _count_generic(values, lib.Int64HashTable,
com._ensure_int64)
else:
return _count_generic(values, lib.PyObjectHashTable,
com._ensure_object)
def _count_generic(values, table_type, type_caster):
values = type_caster(values)
table = table_type(len(values))
uniques, labels, counts = table.factorize(values)
return Series(counts, index=uniques)
def _match_generic(values, index, table_type, type_caster):
values = type_caster(values)
index = type_caster(index)
table = table_type(len(index))
table.map_locations(index)
return table.lookup(values)
def factorize(values, sort=False, order=None, na_sentinel=-1):
"""
Encode input values as an enumerated type or categorical variable
Parameters
----------
values : sequence
sort :
order :
Returns
-------
"""
hash_klass, values = _get_hash_table_and_cast(values)
uniques = []
table = hash_klass(len(values))
labels, counts = table.get_labels(values, uniques, 0, na_sentinel)
uniques = com._asarray_tuplesafe(uniques)
if sort and len(counts) > 0:
sorter = uniques.argsort()
reverse_indexer = np.empty(len(sorter), dtype=np.int32)
reverse_indexer.put(sorter, np.arange(len(sorter)))
mask = labels < 0
labels = reverse_indexer.take(labels)
np.putmask(labels, mask, -1)
uniques = uniques.take(sorter)
counts = counts.take(sorter)
return labels, uniques, counts
def unique(values):
"""
"""
pass
|