/usr/lib/python3/dist-packages/screed/createscreed.py is in python3-screed 0.9-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | from __future__ import absolute_import
from . import DBConstants
import os
import sqlite3
import itertools
def create_db(filepath, fields, rcrditer):
"""
Creates a screed database in the given filepath. Fields is a tuple
specifying the names and relative order of attributes in a
record. rcrditer is an iterator returning records over a
sequence dataset. Records yielded are in dictionary form
"""
if not filepath.endswith(DBConstants.fileExtension):
filepath += DBConstants.fileExtension
if os.path.exists(filepath): # Remove existing files
os.unlink(filepath)
con = sqlite3.connect(filepath)
cur = con.cursor()
# Sqlite PRAGMA settings for speed
cur.execute("PRAGMA synchronous='OFF'")
cur.execute("PRAGMA locking_mode=EXCLUSIVE")
# Create the admin table
cur.execute('CREATE TABLE %s (%s INTEGER PRIMARY KEY, '
'%s TEXT, %s TEXT)' % (DBConstants._SCREEDADMIN,
DBConstants._PRIMARY_KEY,
DBConstants._FIELDNAME,
DBConstants._ROLENAME))
query = 'INSERT INTO %s (%s, %s) VALUES (?, ?)' % \
(DBConstants._SCREEDADMIN, DBConstants._FIELDNAME,
DBConstants._ROLENAME)
# Put the primary key in as an attribute
cur.execute(query, (DBConstants._PRIMARY_KEY,
DBConstants._PRIMARY_KEY_ROLE))
for attribute, role in fields:
cur.execute(query, (attribute, role))
# Setup the dictionary table creation field substring
fieldsub = ','.join(['%s TEXT' % field for field, role in fields])
# Create the dictionary table
cur.execute('CREATE TABLE %s (%s INTEGER PRIMARY KEY, %s)' %
(DBConstants._DICT_TABLE, DBConstants._PRIMARY_KEY,
fieldsub))
# Setup the 'qmarks' sqlite substring
qmarks = ','.join(['?' for i in range(len(fields))])
# Setup the sql substring for inserting fields into database
fieldsub = ','.join([fieldname for fieldname, role in fields])
query = 'INSERT INTO %s (%s) VALUES (%s)' %\
(DBConstants._DICT_TABLE, fieldsub, qmarks)
# Pull data from the iterator and store in database
# Commiting in batches seems faster than a single call to executemany
data = (tuple(record[fieldname] for fieldname, role in fields)
for record in rcrditer)
while True:
batch = list(itertools.islice(data, 10000))
if not batch:
break
cur.executemany(query, batch)
con.commit()
# Attribute to index
queryby = fields[0][0] # Defaults to the first field
for fieldname, role in fields:
if role == DBConstants._INDEXED_TEXT_KEY:
queryby = fieldname
break
# Make the index on the 'queryby' attribute
cur.execute('CREATE UNIQUE INDEX %sidx ON %s(%s)' %
(queryby, DBConstants._DICT_TABLE, queryby))
con.commit()
con.close()
|