/usr/share/pyshared/cogent/db/ncbi.py

#!/usr/bin/env python

"""To automate batch functions provided by EUtils

    (http://www.ncbi..nih.gov/entrez/eutils) 
    search and fetch for sets of sequence information
"""
from urllib import urlopen, urlretrieve
from xml.dom.minidom import parseString
from cogent.db.util import UrlGetter, expand_slice
from time import sleep
from StringIO import StringIO
from cogent.parse.record_finder import DelimitedRecordFinder, never_ignore
from string import strip

__author__ = "Mike Robeson"
__copyright__ = "Copyright 2007-2011, The Cogent Project"
__credits__ = ["Mike Robeson", "Rob Knight", "Zongzhi Liu"]
__license__ = "GPL"
__version__ = "1.5.1"
__maintainer__ = "Mike Robeson"
__email__ = "mike.robeson@colorado.edu"
__status__ = "Production"

class QueryNotFoundError(Exception): pass

#eutils_base='http://eutils.ncbi.nlm.nih.gov/entrez/eutils'
eutils_base='http://www.ncbi.nlm.nih.gov/entrez/eutils'

#EUtils requires a tool and and email address
default_tool_string = 'PyCogent'
default_email_address = 'Michael.Robeson@colorado.edu'

#databases last updated 7/22/05
valid_databases=dict.fromkeys(["pubmed", "protein", "nucleotide", "structure",\
    "genome", "books", "cancerchromosomes", "cdd", "domains", "gene", \
    "genomeprj", "gensat", "geo", "gds", "homologene", "journals", "mesh",\
    "ncbisearch", "nlmcatalog", "omim", "pmc", "popset", "probe", "pcassay",\
    "pccompound", "pcsubstance", "snp", "taxonomy", "unigene", "unists"])

#rettypes last updated 7/22/05
#somehow, I don't think we'll be writing parsers for all these...
#WARNING BY RK 4/13/09: THESE RETTYPES ARE HIGHLY MISLEADING AND NO LONGER 
#WORK. See this URL for the list of "official" rettypes, which is highly
#incomplete and has some important omissions (e.g. rettype 'gi' is missing
#but is the "official" replacement for 'GiList'):
# http://eutils.ncbi.nlm.nih.gov/entrez/query/static/efetchseq_help.html
#In particular, use gb or gp for GenBank or GenPept, use gi for GiList,
#use fasta for FASTA, and several other changes.
#Until we get a complete accounting of what all the changes are, treat the
#rettypes below with extreme caution and experiment in the interpreter.
rettypes = {}
rettypes['pubmed']='DocSum Brief Abstract Citation MEDLINE XML uilist ExternalLink ASN1 pubmed_pubmed pubmed_pubmed_refs pubmed_books_refs pubmed_cancerchromosomes pubmed_cdd pubmed_domains pubmed_gds pubmed_gene pubmed_gene_rif pubmed_genome pubmed_genomeprj pubmed_gensat pubmed_geo pubmed_homologene pubmed_nucleotide pubmed_omim pubmed_pcassay pubmed_pccompound pubmed_pccompound_mesh pubmed_pcsubstance pubmed_pcsubstance_mesh pubmed_pmc pubmed_pmc_refs pubmed_popset pubmed_probe pubmed_protein pubmed_snp pubmed_structure pubmed_unigene pubmed_unists'

rettypes['protein']='DocSum ASN1 FASTA XML GenPept GiList graph fasta_xml igp_xml gpc_xml ExternalLink protein_protein protein_cdd protein_domains protein_gene protein_genome protein_genomeprj protein_homologene protein_nucleotide protein_nucleotide_mgc protein_omim protein_pcassay protein_pccompound protein_pcsubstance protein_pmc protein_popset protein_pubmed protein_snp protein_snp_genegenotype protein_structure protein_taxonomy protein_unigene'

rettypes['nucleotide']='DocSum ASN1 FASTA XML GenBank GiList graph fasta_xml gb_xml gbc_xml ExternalLink nucleotide_comp_nucleotide nucleotide_nucleotide nucleotide_nucleotide_comp nucleotide_nucleotide_mrna nucleotide_comp_genome nucleotide_gene nucleotide_genome nucleotide_genome_samespecies nucleotide_gensat nucleotide_geo nucleotide_homologene nucleotide_mrna_genome nucleotide_omim nucleotide_pcassay nucleotide_pccompound nucleotide_pcsubstance nucleotide_pmc nucleotide_popset nucleotide_probe nucleotide_protein nucleotide_pubmed nucleotide_snp nucleotide_snp_genegenotype nucleotide_structure nucleotide_taxonomy nucleotide_unigene nucleotide_unists'

rettypes['structure']='DocSum Brief Structure Summary uilist ExternalLink structure_domains structure_genome structure_nucleotide structure_omim structure_pcassay structure_pccompound structure_pcsubstance structure_pmc structure_protein structure_pubmed structure_snp structure_taxonomy'

rettypes['genome']='DocSum ASN1 GenBank XML ExternalLink genome_genomeprj genome_nucleotide genome_nucleotide_comp genome_nucleotide_mrna genome_nucleotide_samespecies genome_omim genome_pmc genome_protein genome_pubmed genome_structure genome_taxonomy'

rettypes['books']='DocSum Brief Books books_gene books_omim books_pmc_refs books_pubmed_refs'

rettypes['cancerchromosomes']='DocSum SkyCghDetails SkyCghCommon SkyCghCommonVerbose cancerchromosomes_cancerchromosomes_casecell cancerchromosomes_cancerchromosomes_cellcase cancerchromosomes_cancerchromosomes_cytocgh cancerchromosomes_cancerchromosomes_cytoclincgh cancerchromosomes_cancerchromosomes_cytoclinsky cancerchromosomes_cancerchromosomes_cytodiagcgh cancerchromosomes_cancerchromosomes_cytodiagsky cancerchromosomes_cancerchromosomes_cytosky cancerchromosomes_cancerchromosomes_diag cancerchromosomes_cancerchromosomes_textual cancerchromosomes_pmc cancerchromosomes_pubmed'

rettypes['cdd']='DocSum Brief uilist cdd_cdd_fused cdd_cdd_related cdd_gene cdd_homologene cdd_pmc cdd_protein cdd_pubmed cdd_taxonomy'

rettypes['domains']='DocSum Brief uilist domains_domains_new domains_pmc domains_protein domains_pubmed domains_structure domains_taxonomy'

rettypes['gene']='Default DocSum Brief ASN.1 XML Graphics gene_table uilist ExternalLink gene_books gene_cdd gene_gensat gene_geo gene_homologene gene_nucleotide gene_nucleotide_mgc gene_omim gene_pmc gene_probe gene_protein gene_pubmed gene_pubmed_rif gene_snp gene_snp_genegenotype gene_taxonomy gene_unigene gene_unists'

rettypes['genomeprj']='DocSum Brief Overview genomeprj_genomeprj genomeprj_genome genomeprj_nucleotide genomeprj_nucleotide_mrna genomeprj_nucleotide_organella genomeprj_nucleotide_wgs genomeprj_pmc genomeprj_popset genomeprj_protein genomeprj_pubmed genomeprj_taxonomy'

rettypes['gensat']='Group Detail DocSum Brief gensat_gensat gensat_gene gensat_geo gensat_nucleotide gensat_pmc gensat_pubmed gensat_taxonomy gensat_unigene'

rettypes['geo']='DocSum Brief ExternalLink geo_geo_homologs geo_geo_prof geo_geo_seq geo_gds geo_gene geo_gensat geo_homologene geo_nucleotide geo_omim geo_pmc geo_pubmed geo_taxonomy geo_unigene'

rettypes['gds']='DocSum Brief gds_gds gds_geo gds_pmc gds_pubmed gds_taxonomy'

rettypes['homologene']='DocSum Brief HomoloGene AlignmentScores MultipleAlignment ASN1 XML FASTA homologene_homologene homologene_cdd homologene_gene homologene_geo homologene_nucleotide homologene_omim homologene_pmc homologene_protein homologene_pubmed homologene_snp homologene_snp_genegenotype homologene_taxonomy homologene_unigene'

rettypes['journals']='DocSum full journals_PubMed journals_Protein journals_Nucleotide journals_Genome journals_Popset journals_PMC journals_nlmcatalog'

rettypes['mesh']='Full DocSum Brief mesh_PubMed'

rettypes['ncbisearch']='DocSum Brief Home+Page+View ncbisearch_ncbisearch'

rettypes['nlmcatalog']='Brief DocSum XML Expanded Full Subject ExternalLink'

rettypes['omim']='DocSum Detailed Synopsis Variants ASN1 XML ExternalLink omim_omim omim_books omim_gene omim_genome omim_geo omim_homologene omim_nucleotide omim_pmc omim_protein omim_pubmed omim_snp omim_snp_genegenotype omim_structure omim_unigene omim_unists'

rettypes['pmc']='DocSum Brief XML TxTree pmc_books_refs pmc_cancerchromosomes pmc_cdd pmc_domains pmc_gds pmc_gene pmc_genome pmc_genomeprj pmc_gensat pmc_geo pmc_homologene pmc_nucleotide pmc_omim pmc_pccompound pmc_pcsubstance pmc_popset pmc_protein pmc_pubmed pmc_refs_pubmed pmc_snp pmc_structure pmc_taxonomy pmc_unists'

rettypes['popset']='DocSum PS ASN1 XML GiList ExternalLink TxTree popset_genomeprj popset_nucleotide popset_protein popset_pubmed popset_taxonomy'

rettypes['probe']='DocSum Brief ASN1 XML Probe probe_probe probe_gene probe_nucleotide probe_pubmed probe_taxonomy'

rettypes['pcassay']='DocSum Brief uilist pcassay_nucleotide pcassay_pccompound pcassay_pccompound_active pcassay_pccompound_inactive pcassay_pcsubstance pcassay_pcsubstance_active pcassay_pcsubstance_inactive pcassay_protein pcassay_pubmed pcassay_structure'

rettypes['pccompound']='Brief DocSum PROP SYNONYMS pc_fetch pccompound_pccompound_pulldown pccompound_pccompound_sameanytautomer_pulldown pccompound_pccompound_sameconnectivity_pulldown pccompound_pccompound_sameisotopic_pulldown pccompound_pccompound_samestereochem_pulldown pccompound_nucleotide pccompound_pcassay pccompound_pcassay_active pccompound_pcassay_inactive pccompound_pcsubstance pccompound_pmc pccompound_protein pccompound_pubmed pccompound_pubmed_mesh pccompound_structure'

rettypes['pcsubstance']='Brief DocSum PROP SYNONYMS pc_fetch IDLIST pcsubstance_pcsubstance_pulldown pcsubstance_pcsubstance_same_pulldown pcsubstance_pcsubstance_sameanytautomer_pulldown pcsubstance_pcsubstance_sameconnectivity_pulldow pcsubstance_pcsubstance_sameisotopic_pulldown pcsubstance_pcsubstance_samestereochem_pulldown pcsubstance_mesh pcsubstance_nucleotide pcsubstance_pcassay pcsubstance_pcassay_active pcsubstance_pcassay_inactive pcsubstance_pccompound pcsubstance_pmc pcsubstance_protein pcsubstance_pubmed pcsubstance_pubmed_mesh pcsubstance_structure'

rettypes['snp']='DocSum Brief FLT ASN1 XML FASTA RSR ssexemplar CHR FREQXML GENB GEN GENXML DocSet Batch uilist GbExp ExternalLink MergeStatus snp_snp_genegenotype snp_gene snp_homologene snp_nucleotide snp_omim snp_pmc snp_protein snp_pubmed snp_structure snp_taxonomy snp_unigene snp_unists'

rettypes['taxonomy']='DocSum Brief TxUidList TxInfo XML TxTree ExternalLink taxonomy_protein taxonomy_nucleotide taxonomy_structure taxonomy_genome taxonomy_gene taxonomy_cdd taxonomy_domains taxonomy_gds taxonomy_genomeprj taxonomy_gensat taxonomy_homologene taxonomy_pmc taxonomy_popset taxonomy_probe taxonomy_pubmed taxonomy_snp taxonomy_unigene taxonomy_unists'

rettypes['unigene']='DocSum Brief ExternalLink unigene_unigene unigene_unigene_expression unigene_unigene_homologous unigene_gene unigene_gensat unigene_geo unigene_homologene unigene_nucleotide unigene_nucleotide_mgc unigene_omim unigene_protein unigene_pubmed unigene_snp unigene_snp_genegenotype unigene_taxonomy unigene_unists'

rettypes['unists']='DocSum Brief ExternalLink unists_gene unists_nucleotide unists_omim unists_pmc unists_pubmed unists_snp unists_taxonomy unists_unigene'

#convert into dict of known rettypes for efficient lookups -- don't want to
#scan list every time.
for key, val in rettypes.items():
    rettypes[key] = dict.fromkeys(val.split())

class ESearch(UrlGetter):
    """Performs an ESearch, getting a list of ids from an arbitrary query."""
    PrintedFields = dict.fromkeys(['db', 'usehistory', 'term', 'retmax', 
        'retstart', 'tool', 'email'])
    Defaults = {'db':'nucleotide','usehistory':'y', 'retmax':1000, 
        'tool':default_tool_string, 'email':default_email_address}
    BaseUrl = eutils_base+'/esearch.fcgi?'

class EFetch(UrlGetter):
    """Retrieves a list of primary ids.

    WARNING: retmax (the maximum return value) is only 3 by default, so you
    will only get 3 records (this is for testing purposes). You will probably
    want to increase for real searches.
    """
    PrintedFields = dict.fromkeys(['db', 'rettype', 'retmode', 'query_key',\
        'WebEnv', 'retmax', 'retstart', 'id', 'tool', 'email'])
    Defaults = {'retmode':'text','rettype':'fasta','db':'nucleotide',\
            'retstart':0, 'retmax':100, 'tool':default_tool_string, \
            'email':default_email_address}
    BaseUrl = eutils_base+'/efetch.fcgi?'

class ELink(UrlGetter):
    """Retrieves a list of ids from one db that link to another db."""
    PrintedFields = dict.fromkeys(['db', 'id', 'reldate', 'mindate', 'maxdate',
        'datetype', 'term', 'retmode', 'db', 'dbfrom', 'WebEnv', 'query_key',
        'holding', 'cmd', 'tool', 'email'])
    Defaults = {'tool':default_tool_string, 'email':default_email_address}
    BaseUrl = eutils_base + '/elink.fcgi?'

class ESearchResult(object):
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)
    def __str__(self):
        return str(self.__dict__)

def id_list_constructor(id_list_node):
    """Takes an id_list xml node and converts it into list of ids as strings"""
    return [str_constructor(n) for n in id_list_node.childNodes \
        if n.nodeType != n.TEXT_NODE]

def int_constructor(node):
    """Makes an int out of node's first textnode child."""
    return int(node.firstChild.data)

def str_constructor(node):
    """Makes an str out of node's first textnode child."""
    return str(node.firstChild.data)

#the following are the only keys we explicitly handle now:
#(note difference in capitalization from parameters passed in)
esearch_constructors = {'Count':int_constructor, 'RetMax':int_constructor,\
    'RetStart':int_constructor, 'QueryKey':int_constructor, \
    'WebEnv':str_constructor, 'IdList':id_list_constructor}

def ESearchResultParser(result_as_string):
    """Parses an ESearch result. Returns ESearchResult object."""
    if '414 Request-URI Too Large' in result_as_string:
        raise ValueError, "Tried to pass too large an URI:\n" + result_as_string
    doc = parseString(result_as_string)
    #assume one query result -- may need to fix
    query = doc.childNodes[-1]
    result = {}
    for n in query.childNodes:
        #skip top-level text nodes
        if n.nodeType==n.TEXT_NODE:
            continue
        name = str(n.tagName)   #who cares about unicode anyway...
        if name in esearch_constructors:
            result[name] = esearch_constructors[name](n)
        else:   #just keep the data if we don't know what it is
            result[name] = n.toxml()
    return ESearchResult(**result)

def ELinkResultParser(text):
    """Gets the linked ids out of a single ELink result.

    Does not use the XML parser because of problems with long results.
    Only handles cases where there is a single set of links between
    databases.
    """
    result = []
    in_links = False
    for line in text.splitlines():
        if '<LinkName>' in line:
            in_links = True
        elif in_links and ('<Id>' in line):
            try:
                #expect line of form <Id>xxxx</Id>: want xxxx
                result.append(line.split('>', 1)[1].rsplit('<', 1)[0])
            except (IndexError, TypeError):
                pass
        elif '</LinkSetDb>' in line:    #end of block
            break
    return result


class EUtils(object):
    """Retrieves records from NCBI using EUtils."""
    def __init__(self, filename=None, wait=0.5, retmax=100, DEBUG=False, max_recs=None, **kwargs):
        self.__dict__.update(kwargs)
        self.filename = filename
        self.wait = wait
        self.retstart = 0  # was originally set to 1
        self.DEBUG = DEBUG
        self.retmax = retmax
        self.max_recs = max_recs
        #adjust retmax if max_recs is set: no point getting more records
        if max_recs is not None and max_recs < retmax:
            self.retmax = max_recs
    
    def __getitem__(self, query):
        """Gets an query from NCBI. Assumes lists are lists of accessions.
        
        Returns a handle to the result (either in memory or file on disk).
        WARNING: result is not guaranteed to contain any data.
        """
        #check if it's a slice
        if isinstance(query, slice):
            query = expand_slice(query)
        #check if it's a list -- if so, delimit with ' '
        if isinstance(query, list) or isinstance(query,tuple):
            query = ' '.join(map(str, query))
        self.term = query
        #wrap search in loop in case more records than retmax
        search_query = ESearch(**self.__dict__)
        search_query.retmax = 0 #don't want the ids, just want to post search
        if self.DEBUG:
            print 'SEARCH QUERY:'
            print str(search_query)
        cookie = search_query.read()
        if self.DEBUG:
            print 'COOKIE:'
            print `cookie`
        search_result = ESearchResultParser(cookie)
        if self.DEBUG:
            print 'SEARCH RESULT:'
            print search_result
        try:
            self.query_key = search_result.QueryKey
        except AttributeError:
            raise QueryNotFoundError, \
            "Query %s returned no results.\nURL was:\n%s" % \
            (repr(query),str(search_query))
        self.WebEnv = search_result.WebEnv
        count = search_result.Count

        #wrap the fetch in a loop so we get all the results
        fetch_query = EFetch(**self.__dict__)
        curr_rec = 0
        #check if we need to get additional ids

        if self.max_recs:    #cut off at max_recs if set
            count = min(count, self.max_recs)
            retmax = min(self.retmax, self.max_recs)
        else:
            retmax = self.retmax

        #figure out where to put the data
        if self.filename:
            result = open(self.filename, 'w')
        else:
            result = StringIO()
        
        while curr_rec < count:
            #do the fetch
            if count - curr_rec < self.retmax:
                fetch_query.retmax = count - curr_rec
            fetch_query.retstart = curr_rec
            if self.DEBUG:
                print 'FETCH QUERY'
                print 'CURR REC:', curr_rec, 'COUNT:', count
                print str(fetch_query)
            #return the result of the fetch
            curr = fetch_query.read()
            result.write(curr)
            if not curr.endswith('\n'):
                result.write('\n')
            curr_rec += retmax
            sleep(self.wait)
        #clean up after retrieval
        if self.filename:
            result.close()
            return open(self.filename, 'r')
        else:
            result.seek(0)
            return result

#The following are convenience wrappers for some of the above functionality

def get_primary_ids(term, retmax=100, max_recs=None, **kwargs):
    """Gets primary ids from query."""
    search_result = None
    records_got = 0
    if max_recs:
        retmax = min(retmax, max_recs)
    search_query = ESearch(term=term, retmax=retmax, **kwargs)
    while 1:
        cookie = search_query.read()
        if search_result is None:
            search_result = ESearchResultParser(cookie)
        else:
            search_result.IdList.extend(ESearchResultParser(cookie).IdList)
        #set the query key and WebEnv
        search_query.query_key = search_result.QueryKey
        search_query.WebEnv = search_result.WebEnv

        #if more results than retmax, keep adding results
        if max_recs:
            recs_to_get = min(max_recs, search_result.Count)
        else:
            recs_to_get = search_result.Count
        records_got += retmax
        if records_got >= recs_to_get:
            break
        elif recs_to_get - records_got < retmax:
            search_query.retmax = recs_to_get - records_got
        search_query.retstart = records_got
    return search_result.IdList

def ids_to_taxon_ids(ids, db='nucleotide'):
    """Converts primary ids to taxon ids"""
    link = ELink(id=' '.join(ids), db='taxonomy', dbfrom=db, DEBUG=True)
    return ELinkResultParser(link.read())

def get_between_tags(line):
    """"Returns portion of line between xml tags."""
    return line.split('>', 1)[1].rsplit('<', 1)[0]

def taxon_lineage_extractor(lines):
    """Extracts lineage from taxonomy record lines, not incl. species."""
    for line in lines:
        if '<Lineage>' in line:
            #expect line of form <Lineage>xxxx</Lineage> where xxxx semicolon-
            #delimited
            between_tags = line.split('>', 1)[1].rsplit('<', 1)[0]
            yield map(strip, between_tags.split(';'))

taxon_record_finder = DelimitedRecordFinder('</Taxon>', constructor=None, 
    strict=False)

def get_taxid_name_lineage(rec):
    """Returns taxon id, name, and lineage from single xml taxon record."""
    tax_tag = '  <TaxId>'
    name_tag = '  <ScientificName>'
    lineage_tag = '  <Lineage>'
    taxid = name = lineage = None
    for line in rec:
        if line.startswith(tax_tag):
            taxid = get_between_tags(line)
        elif line.startswith(name_tag):
            name = get_between_tags(line)
        elif line.startswith(lineage_tag):
            lineage = map(strip, get_between_tags(line).split(';'))
    return taxid, name, lineage

def get_taxa_names_lineages(lines):
    """Extracts taxon, name and lineage from each entry in an XML record."""
    empty_result = (None, None, None)
    for rec in taxon_record_finder(lines):
        curr = get_taxid_name_lineage(rec)
        if curr != empty_result:
            yield curr


def taxon_ids_to_names_and_lineages(ids, retmax=1000):
    """Yields taxon id, name and lineage for a set of taxon ids."""
    e = EUtils(db='taxonomy', rettype='TxInfo', retmode='xml', retmax=retmax,
        DEBUG=False)
    ids = fix_taxon_ids(ids)
    result = e[ids].read().splitlines()
    #print result
    return get_taxa_names_lineages(result)

def taxon_ids_to_lineages(ids, retmax=1000):
    """Returns full taxonomy (excluding species) from set of taxon ids.
    
    WARNING: Resulting lineages aren't in the same order as input. Use
    taxon_ids_to_name_and_lineage if you need the names and/or lineages 
    associated with the specific ids.
    """
    ids = fix_taxon_ids(ids)
    e = EUtils(db='taxonomy', rettype='TxInfo', retmode='xml', retmax=retmax,
        DEBUG=False)
    result = e[ids].read().splitlines()
    #print result
    return taxon_lineage_extractor(result)

def taxon_ids_to_names(ids, retmax=1000):
    """Returns names (e.g. species) from set of taxon ids.

    WARNING: Resulting lineages aren't in the same order as input. Use
    taxon_ids_to_name_and_lineage if you need the names and/or lineages 
    associated with the specific ids.
    """
    e = EUtils(db='taxonomy', rettype='brief', retmode='text', retmax=retmax,
        DEBUG=False)
    transformed_ids = fix_taxon_ids(ids)
    return e[transformed_ids].read().splitlines()

def fix_taxon_ids(ids):
    """Fixes list of taxonomy ids by adding [taxid] to each.

    Need to add taxid field restriction to each id because NCBI broke taxon
    id search around 3/07 and has no plans to fix it.
    """
    if isinstance(ids, str):
        if not ids.endswith('[taxid]'):
            ids += '[taxid]'
        transformed_ids = ids
    else:
        transformed_ids = []
        for i in ids:
            if not i.endswith('[taxid]'):
                i = i.strip() + '[taxid]'
                transformed_ids.append(i)
    transformed_ids = ' OR '.join(transformed_ids)
    return transformed_ids

def get_unique_lineages(query, db='protein'):
    """Gets the unique lineages directly from a query."""
    return set(map(tuple, taxon_ids_to_lineages(ids_to_taxon_ids(
        get_primary_ids(query,db=db),db=db))))

def get_unique_taxa(query, db='protein'):
    """Gets the unique lineages directly from a query."""
    return set(taxon_ids_to_names(ids_to_taxon_ids(get_primary_ids(query,db=db),db=db)))

if __name__ == '__main__':
    from sys import argv, exit
    
    if len(argv) < 5:
        print "Syntax: python ncbi.py db rettype retmax query."
        exit()
    db = argv[1]
    rettype = argv[2]
    retmax = int(argv[3])
    query = ' '.join(argv[4:])
    print 'Query: ', query
    e = EUtils(db=db,rettype=rettype,retmax=retmax, DEBUG=True)
    print e[query].read()
python-cogent 1.5.1-2 / usr / share / pyshared / cogent / db / ncbi.py