def get_missing_entries(databank_name): databank = storage.find_one('databanks', {'name': databank_name}) if not databank: raise Exception("no such databank: " + databank_name) # Needs a parent to determine what's missing if 'parent_name' not in databank: return [] entries = entries_by_pdbid( storage.find('entries', {'databank_name': databank_name})) missing = [] for entry in get_present_entries(databank['parent_name']): pdbid = entry['pdbid'] if pdbid in entries: if 'filepath' not in entries[pdbid] or 'mtime' not in entries[ pdbid]: missing.append(entries[pdbid]) else: entry = {'pdbid': pdbid, 'databank_name': databank_name} missing.append(entry) return missing
def get_unannotated_entries(databank_name): databank = storage.find_one('databanks',{'name':databank_name}) if not databank: raise Exception ("no such databank: " + databank_name) # Needs a parent to determine what's missing if 'parent_name' not in databank: return [] entries = entries_by_pdbid(storage.find('entries', {'databank_name': databank_name, '$or': [{'filepath': {'$exists': True}}, {'comment': {'$exists': True }}] })) parent_entries = storage.find('entries', {'databank_name': databank['parent_name'], 'filepath': {'$exists': True} }, {'pdbid': 1}) unannotated = [] for parent_entry in parent_entries: pdbid = parent_entry['pdbid'] if pdbid not in entries: entry = {'pdbid': pdbid, 'databank_name': databank_name} unannotated.append(entry) return unannotated
def count_summary (databank_name): databank = storage.find_one('databanks',{'name': databank_name}) if not databank: raise Exception("no such databank: " + databank_name) projection = {'pdbid':1, '_id':0} count = {} pdbids = Set () for entry in storage.find ('entries', {'databank_name': databank_name,'filepath': {'$exists': True}}, projection): pdbids.add (entry ['pdbid']) count ['present'] = len (pdbids) if 'parent_name' in databank: parent_name = databank ['parent_name'] parent_pdbids = Set() missing_pdbids = Set() parent_entries = storage.find ('entries', {'databank_name': parent_name,'filepath': {'$exists': True}}, projection) comment_entries = storage.find('entries', {'databank_name': databank_name, 'comment': {'$exists': True}}, projection) for entry in parent_entries: parent_pdbids.add (entry ['pdbid']) if entry ['pdbid'] not in pdbids: missing_pdbids.add (entry ['pdbid']) count ['missing'] = len (missing_pdbids) count ['annotated'] = 0 for entry in comment_entries: if entry ['pdbid'] in missing_pdbids: count ['annotated'] += 1 # missing = annotated + unannotated count ['unannotated'] = count ['missing'] - count ['annotated'] count ['obsolete'] = 0 for pdbid in pdbids: if pdbid not in parent_pdbids: count ['obsolete'] += 1 count ['valid'] = count ['present'] - count ['obsolete'] else: # no parent, so nothing is missing or obsolete count ['missing'] = 0 count ['valid'] = count ['present'] count ['obsolete'] = 0 count ['annotated'] = 0 count ['unannotated'] = 0 return count
def databanks(name=None): start_time = time() if name is None: databanks = storage.find('databanks', {}) else: databanks = [ storage.find_one('databanks', {'name': name}) ] end_time = time() return render_template('databank/DatabankPage.html', db_tree=db_tree, nav_disabled='databanks', databanks=databanks)
def get_parent (databank_name, pdbid): databank = storage.find_one ('databanks', {'name': databank_name}) if not databank: raise Exception ("no such databank: " + databank_name) if 'parent_name' in databank: parent_name = databank ['parent_name'] parent = storage.find_one ('databanks', {'name': parent_name}) if not parent: raise Exception ("no such databank: " + parent_name) parent_entry = storage.find_one ('entries', {'databank_name': parent_name, 'pdbid': pdbid}) return parent_entry return None
def get_parent_name(databank_name): databank = storage.find_one('databanks', {'name': databank_name}) if not databank: raise Exception("no such databank: " + databank_name) if 'parent_name' in databank: return databank['parent_name'] else: return ''
def get_parent_name (databank_name): databank = storage.find_one ('databanks', {'name': databank_name}) if not databank: raise Exception ("no such databank: " + databank_name) if 'parent_name' in databank: return databank ['parent_name'] else: return ''
def entries(): collection = request.args.get('collection') databank_name = request.args.get('databank') comment_text = request.args.get('comment') _log.info("request for entries %s %s %s" % (collection, databank_name, comment_text)) title = 'No entries selected' entries = [] files = [] comments = {} if databank_name and collection: start_time = time() entries = get_entries_from_collection(databank_name, collection) end_time = time() title = "%s %s" % (databank_name, collection) elif databank_name and comment_text: start_time = time() entries = get_entries_with_comment(databank_name, comment_text) end_time = time() title = comment_text elif comment_text: start_time = time() entries = get_all_entries_with_comment(comment_text) end_time = time() title = comment_text databank = storage.find_one('databanks', {'name': databank_name}) for entry in entries: if databank and 'filepath' in entry: f = { 'name': os.path.basename(entry['filepath']), 'url': get_file_link(databank, entry['pdbid']) } files.append(f) elif 'comment' in entry: if entry['comment'] not in comments: comments[entry['comment']] = [] comments[entry['comment']].append( '%s,%s' % (entry['databank_name'], entry['pdbid'])) comment_tree = comments_to_tree(comments) return render_template('entries/EntriesPage.html', db_tree=db_tree, nav_disabled='entries', collection=collection, databank_name=databank_name, comment=comment_text, title=title, entries=entries, files=files, comment_tree=comment_tree)
def get_entries_of_interest(databank_name, check_pdbids=None): entries = [] if check_pdbids is not None: for pdbid in check_pdbids: entry = storage.find_one('entries', {'databank_name': databank_name, 'pdbid': pdbid}) if entry is None: entry = {'pdbid': pdbid, 'databank_name': databank_name} entries.append(entry) else: entries = get_missing_entries(databank_name) return entries
def databanks(name=None): start_time = time() if name is None: databanks = storage.find('databanks', {}) else: databanks = [storage.find_one('databanks', {'name': name})] end_time = time() return render_template('databank/DatabankPage.html', db_tree=db_tree, nav_disabled='databanks', databanks=databanks)
def get_parent(databank_name, pdbid): databank = storage.find_one('databanks', {'name': databank_name}) if not databank: raise Exception("no such databank: " + databank_name) if 'parent_name' in databank: parent_name = databank['parent_name'] parent = storage.find_one('databanks', {'name': parent_name}) if not parent: raise Exception("no such databank: " + parent_name) parent_entry = storage.find_one('entries', { 'databank_name': parent_name, 'pdbid': pdbid }) return parent_entry return None
def update_entry(entry): databank_name = entry['databank_name'] pdbid = entry['pdbid'] if storage.find_one('entries', { 'databank_name': databank_name, 'pdbid': pdbid }): storage.update('entries', { 'databank_name': databank_name, 'pdbid': pdbid }, entry) else: storage.insert('entries', entry)
def annotations (databank_name, pdbid): """ Request all annotations for a given entry. :param databank_name: Name of the whynot databank. :param pdbid: pdb id of the entry. :return: a text string with all the comments in it. """ entry = storage.find_one ('entries', {'pdbid': pdbid, 'databank_name': databank_name}) comment = '' if entry: comment = entry ['comment'] return Response (comment, mimetype='text/plain')
def get_valid_entries (databank_name): databank = storage.find_one('databanks',{'name': databank_name}) if not databank: raise Exception("no such databank: " + databank_name) if 'parent_name' in databank: valid=[] parent_entries = entries_by_pdbid(get_present_entries(databank['parent_name'])) for entry in get_present_entries(databank_name): if entry['pdbid'] in parent_entries: valid.append(entry) return valid else: return get_present_entries(databank_name)
def get_obsolete_entries (databank_name): databank = storage.find_one ('databanks', {'name': databank_name}) if not databank: raise Exception ("no such databank: " + databank_name) if 'parent_name' in databank: obsolete = [] parent_entries = entries_by_pdbid (get_present_entries (databank ['parent_name'])) for entry in get_present_entries (databank_name): if entry ['pdbid'] not in parent_entries: obsolete.append (entry) return obsolete else: return []
def get_valid_entries(databank_name): databank = storage.find_one('databanks', {'name': databank_name}) if not databank: raise Exception("no such databank: " + databank_name) if 'parent_name' in databank: valid = [] parent_entries = entries_by_pdbid( get_present_entries(databank['parent_name'])) for entry in get_present_entries(databank_name): if entry['pdbid'] in parent_entries: valid.append(entry) return valid else: return get_present_entries(databank_name)
def get_obsolete_entries(databank_name): databank = storage.find_one('databanks', {'name': databank_name}) if not databank: raise Exception("no such databank: " + databank_name) if 'parent_name' in databank: obsolete = [] parent_entries = entries_by_pdbid( get_present_entries(databank['parent_name'])) for entry in get_present_entries(databank_name): if entry['pdbid'] not in parent_entries: obsolete.append(entry) return obsolete else: return []
def entries(): collection = request.args.get('collection') databank_name = request.args.get('databank') comment_text = request.args.get('comment') _log.info("request for entries %s %s %s" %(collection, databank_name, comment_text)) title = 'No entries selected' entries = [] files = [] comments = {} if databank_name and collection: start_time = time() entries = get_entries_from_collection(databank_name, collection) end_time = time() title = "%s %s" %(databank_name, collection) elif databank_name and comment_text: start_time = time() entries = get_entries_with_comment(databank_name, comment_text) end_time = time() title = comment_text elif comment_text: start_time = time() entries = get_all_entries_with_comment(comment_text) end_time = time() title = comment_text databank = storage.find_one('databanks', {'name': databank_name}) for entry in entries: if databank and 'filepath' in entry: f = {'name': os.path.basename(entry ['filepath']), 'url': get_file_link(databank, entry ['pdbid'])} files.append(f) elif 'comment' in entry: if entry ['comment'] not in comments: comments [entry ['comment']] = [] comments [entry ['comment']].append('%s,%s' %(entry ['databank_name'], entry ['pdbid'])) comment_tree = comments_to_tree(comments) return render_template('entries/EntriesPage.html', db_tree=db_tree, nav_disabled='entries', collection=collection, databank_name=databank_name, comment=comment_text, title=title, entries=entries, files=files, comment_tree=comment_tree)
def annotations(databank_name, pdbid): """ Request all annotations for a given entry. :param databank_name: Name of the whynot databank. :param pdbid: pdb id of the entry. :return: a text string with all the comments in it. """ entry = storage.find_one('entries', { 'pdbid': pdbid, 'databank_name': databank_name }) comment = '' if entry: comment = entry['comment'] return Response(comment, mimetype='text/plain')
if entry['pdbid'] in present_entries_bypdbid: continue if entry['pdbid'] in record_pdbids: storage.update('entries', {'databank_name':databank['name'], 'pdbid':entry['pdbid']}, entry) else: storage.insert('entries', entry) if not len(sys.argv) == 3: print 'Usage: %s [databank name] [source]' % sys.argv[0] sys.exit(0) databank_name = sys.argv [1] source = sys.argv [2] databank = storage.find_one ('databanks', {'name':databank_name, 'crawltype':{'$in':[LINE,FILE]}}) if not databank: raise Exception ('not found or unknown crawl type: ' + databank_name) # On urls, we can only use the line crawler for now. if source.startswith ('http://') or source.startswith ('ftp://') or os.path.isfile (source): lines = get_lines (source) remove_changed (databank, lines) crawl_lines (databank, source, lines) elif os.path.isdir(source): files = get_pathnames (source)
def count_summary(databank_name): databank = storage.find_one('databanks', {'name': databank_name}) if not databank: raise Exception("no such databank: " + databank_name) projection = {'pdbid': 1, '_id': 0} count = {} pdbids = Set() for entry in storage.find('entries', { 'databank_name': databank_name, 'filepath': { '$exists': True } }, projection): pdbids.add(entry['pdbid']) count['present'] = len(pdbids) if 'parent_name' in databank: parent_name = databank['parent_name'] parent_pdbids = Set() missing_pdbids = Set() parent_entries = storage.find('entries', { 'databank_name': parent_name, 'filepath': { '$exists': True } }, projection) comment_entries = storage.find('entries', { 'databank_name': databank_name, 'comment': { '$exists': True } }, projection) for entry in parent_entries: parent_pdbids.add(entry['pdbid']) if entry['pdbid'] not in pdbids: missing_pdbids.add(entry['pdbid']) count['missing'] = len(missing_pdbids) count['annotated'] = 0 for entry in comment_entries: if entry['pdbid'] in missing_pdbids: count['annotated'] += 1 # missing = annotated + unannotated count['unannotated'] = count['missing'] - count['annotated'] count['obsolete'] = 0 for pdbid in pdbids: if pdbid not in parent_pdbids: count['obsolete'] += 1 count['valid'] = count['present'] - count['obsolete'] else: # no parent, so nothing is missing or obsolete count['missing'] = 0 count['valid'] = count['present'] count['obsolete'] = 0 count['annotated'] = 0 count['unannotated'] = 0 return count
def get_entry(databank_name, pdbid): return storage.find_one('entries', { 'databank_name': databank_name, 'pdbid': pdbid })
'pdbid': entry['pdbid'] }, entry) else: storage.insert('entries', entry) if not len(sys.argv) == 3: print 'Usage: %s [databank name] [source]' % sys.argv[0] sys.exit(0) databank_name = sys.argv[1] source = sys.argv[2] databank = storage.find_one('databanks', { 'name': databank_name, 'crawltype': { '$in': [LINE, FILE] } }) if not databank: raise Exception('not found or unknown crawl type: ' + databank_name) # On urls, we can only use the line crawler for now. if source.startswith('http://') or source.startswith( 'ftp://') or os.path.isfile(source): lines = get_lines(source) remove_changed(databank, lines) crawl_lines(databank, source, lines) elif os.path.isdir(source):
def get_entry (databank_name, pdbid): return storage.find_one ('entries', {'databank_name': databank_name, 'pdbid': pdbid})