/usr/share/pyshared/carquinyol/optimizer.py is in python-carquinyol-0.88 0.88.0-4.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 | # Copyright (C) 2008, One Laptop Per Child
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
import os
import errno
import logging
import subprocess
import gobject
from carquinyol import layoutmanager
class Optimizer(object):
"""Optimizes disk space usage by detecting duplicates and sharing storage.
"""
def __init__(self, file_store, metadata_store):
self._file_store = file_store
self._metadata_store = metadata_store
self._enqueue_checksum_id = None
def optimize(self, uid):
"""Add an entry to a queue of entries to be checked for duplicates.
"""
if not os.path.exists(self._file_store.get_file_path(uid)):
return
queue_path = layoutmanager.get_instance().get_queue_path()
open(os.path.join(queue_path, uid), 'w').close()
logging.debug('optimize %r', os.path.join(queue_path, uid))
if self._enqueue_checksum_id is None:
self._enqueue_checksum_id = \
gobject.idle_add(self._process_entry_cb,
priority=gobject.PRIORITY_LOW)
def remove(self, uid):
"""Remove any structures left from space optimization
"""
checksum = self._metadata_store.get_property(uid, 'checksum')
if checksum is None:
return
checksums_dir = layoutmanager.get_instance().get_checksums_dir()
checksum_path = os.path.join(checksums_dir, checksum)
checksum_entry_path = os.path.join(checksum_path, uid)
if os.path.exists(checksum_entry_path):
logging.debug('remove %r', checksum_entry_path)
os.remove(checksum_entry_path)
if os.path.exists(checksum_path):
try:
os.rmdir(checksum_path)
logging.debug('removed %r', checksum_path)
except OSError, e:
if e.errno != errno.ENOTEMPTY:
raise
def _identical_file_already_exists(self, checksum):
"""Check if we already have files with this checksum.
"""
checksums_dir = layoutmanager.get_instance().get_checksums_dir()
checksum_path = os.path.join(checksums_dir, checksum)
return os.path.exists(checksum_path)
def _get_uid_from_checksum(self, checksum):
"""Get an existing entry which file matches checksum.
"""
checksums_dir = layoutmanager.get_instance().get_checksums_dir()
checksum_path = os.path.join(checksums_dir, checksum)
first_uid = os.listdir(checksum_path)[0]
return first_uid
def _create_checksum_dir(self, checksum):
"""Create directory that tracks files with this same checksum.
"""
checksums_dir = layoutmanager.get_instance().get_checksums_dir()
checksum_path = os.path.join(checksums_dir, checksum)
logging.debug('create dir %r', checksum_path)
os.mkdir(checksum_path)
def _add_checksum_entry(self, uid, checksum):
"""Create a file in the checksum dir with the uid of the entry
"""
checksums_dir = layoutmanager.get_instance().get_checksums_dir()
checksum_path = os.path.join(checksums_dir, checksum)
logging.debug('touch %r', os.path.join(checksum_path, uid))
open(os.path.join(checksum_path, uid), 'w').close()
def _already_linked(self, uid, checksum):
"""Check if this entry's file is already a hard link to the checksums
dir.
"""
checksums_dir = layoutmanager.get_instance().get_checksums_dir()
checksum_path = os.path.join(checksums_dir, checksum)
return os.path.exists(os.path.join(checksum_path, uid))
def _process_entry_cb(self):
"""Process one item in the checksums queue by calculating its checksum,
checking if there exist already an identical file, and in that case
substituting its file with a hard link to that pre-existing file.
"""
queue_path = layoutmanager.get_instance().get_queue_path()
queue = os.listdir(queue_path)
if queue:
uid = queue[0]
logging.debug('_process_entry_cb processing %r', uid)
file_in_entry_path = self._file_store.get_file_path(uid)
if not os.path.exists(file_in_entry_path):
logging.info('non-existent entry in queue: %r', uid)
else:
checksum = self._calculate_md5sum(file_in_entry_path)
self._metadata_store.set_property(uid, 'checksum', checksum)
if self._identical_file_already_exists(checksum):
if not self._already_linked(uid, checksum):
existing_entry_uid = \
self._get_uid_from_checksum(checksum)
self._file_store.hard_link_entry(uid,
existing_entry_uid)
self._add_checksum_entry(uid, checksum)
else:
self._create_checksum_dir(checksum)
self._add_checksum_entry(uid, checksum)
os.remove(os.path.join(queue_path, uid))
if len(queue) <= 1:
self._enqueue_checksum_id = None
return False
else:
return True
def _calculate_md5sum(self, path):
"""Calculate the md5 checksum of a given file.
"""
popen = subprocess.Popen(['md5sum', path], stdout=subprocess.PIPE)
stdout, stderr_ = popen.communicate()
return stdout.split(' ', 1)[0]
|