This file is indexed.

/usr/lib/python3/dist-packages/screed/createscreed.py is in python3-screed 1.0-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# Copyright (c) 2016, The Regents of the University of California.

from __future__ import absolute_import

import argparse
import itertools
import os
try:
    import sqlite3
except ImportError:
    pass
import itertools
import sys

from . import DBConstants, fasta, fastq, openscreed


def create_db(filepath, fields, rcrditer):
    """
    Creates a screed database in the given filepath. Fields is a tuple
    specifying the names and relative order of attributes in a
    record. rcrditer is an iterator returning records over a
    sequence dataset. Records yielded are in dictionary form
    """
    try:
        sqlite3
    except NameError:
        raise Exception("error: sqlite3 is needed for this functionality" +
                        " but is not installed.")

    if not filepath.endswith(DBConstants.fileExtension):
        filepath += DBConstants.fileExtension

    if os.path.exists(filepath):  # Remove existing files
        os.unlink(filepath)

    con = sqlite3.connect(filepath)
    cur = con.cursor()

    # Sqlite PRAGMA settings for speed
    cur.execute("PRAGMA synchronous='OFF'")
    cur.execute("PRAGMA locking_mode=EXCLUSIVE")

    # Create the admin table
    cur.execute('CREATE TABLE %s (%s INTEGER PRIMARY KEY, '
                '%s TEXT, %s TEXT)' % (DBConstants._SCREEDADMIN,
                                       DBConstants._PRIMARY_KEY,
                                       DBConstants._FIELDNAME,
                                       DBConstants._ROLENAME))
    query = 'INSERT INTO %s (%s, %s) VALUES (?, ?)' % \
            (DBConstants._SCREEDADMIN, DBConstants._FIELDNAME,
             DBConstants._ROLENAME)

    # Put the primary key in as an attribute
    cur.execute(query, (DBConstants._PRIMARY_KEY,
                        DBConstants._PRIMARY_KEY_ROLE))
    for attribute, role in fields:
        cur.execute(query, (attribute, role))

    # Setup the dictionary table creation field substring
    fieldsub = ','.join(['%s TEXT' % field for field, role in fields])

    # Create the dictionary table
    cur.execute('CREATE TABLE %s (%s INTEGER PRIMARY KEY, %s)' %
                (DBConstants._DICT_TABLE, DBConstants._PRIMARY_KEY,
                 fieldsub))

    # Setup the 'qmarks' sqlite substring
    qmarks = ','.join(['?' for i in range(len(fields))])

    # Setup the sql substring for inserting fields into database
    fieldsub = ','.join([fieldname for fieldname, role in fields])

    query = 'INSERT INTO %s (%s) VALUES (%s)' %\
            (DBConstants._DICT_TABLE, fieldsub, qmarks)
    # Pull data from the iterator and store in database
    # Commiting in batches seems faster than a single call to executemany
    data = (tuple(record[fieldname] for fieldname, role in fields)
            for record in rcrditer)
    while True:
        batch = list(itertools.islice(data, 10000))
        if not batch:
            break
        cur.executemany(query, batch)
    con.commit()

    # Attribute to index
    queryby = fields[0][0]  # Defaults to the first field
    for fieldname, role in fields:
        if role == DBConstants._INDEXED_TEXT_KEY:
            queryby = fieldname
            break

    # Make the index on the 'queryby' attribute
    cur.execute('CREATE UNIQUE INDEX %sidx ON %s(%s)' %
                (queryby, DBConstants._DICT_TABLE, queryby))

    con.commit()
    con.close()


def make_db(filename):
    iterfunc = openscreed.Open(filename, parse_description=True)

    field_mapping = {
        fastq.fastq_iter.__name__: fastq.FieldTypes,
        fasta.fasta_iter.__name__: fasta.FieldTypes
    }

    fieldTypes = field_mapping[iterfunc.iter_fn.__name__]

    # Create the screed db
    create_db(filename, fieldTypes, iterfunc)


def main(args):
    parser = argparse.ArgumentParser(description="A shell interface to the "
                                     "screed database writing function")
    parser.add_argument('filename')
    args = parser.parse_args(args)

    make_db(args.filename)

    print("Database saved in {}{}".format(args.filename,
                                          DBConstants.fileExtension))
    exit(0)


if __name__ == "__main__":
    main(sys.argv[1:])