def generate_intensity(): """Generates an intensity data file related to some user action. Clear: clear the old intensity mapping Cluster: intensity mapping associated with a topic cluster Word: intensity mapping associated with a particular word Study: intensity mapping associated with a study cluster""" clicked_on = request.args.get("options") if clicked_on == "clear": intensities_by_location = {} elif clicked_on == "cluster" or clicked_on == "word": if clicked_on == "cluster": cluster = request.args.get("cluster") word = TermCluster.get_words_in_cluster(cluster) else: word = request.args.get("word") studies = StudyTerm.get_by_word(word) # Create a dictionary of {pmid: frequency} values frequencies_by_pmid, max_intensity = organize_frequencies_by_study(studies) pmids = frequencies_by_pmid.keys() # Get the activations for the keys of the dictionary activations = Activation.get_activations_from_studies(pmids) # Assemble the final dictionary of {location:intensity} values, scaling # each value as we go intensities_by_location = scale_frequencies_by_loc(activations, max_intensity, frequencies_by_pmid) elif clicked_on == "study": pmid = request.args.get("pmid") study = Study.get_study_by_pmid(pmid) # Look for cluster-mate studies cluster_mates = study.get_cluster_mates() # Get (location, study count) tuples from db activations = Activation.get_location_count_from_studies(cluster_mates) # Scale study counts in preparation for intensity mapping intensities_by_location = scale_study_counts(activations) print "Found intensities: ", intensities_by_location # Assemble the intensity map intensity_vals = generate_intensity_map(intensities_by_location) return intensity_vals
def generate_citations(radius=3): """Returns a list of text citations associated with some location, word or topic (cluster).""" clicked_on = request.args.get("options") if clicked_on == "location": x_coord = float(request.args.get("xcoord")) y_coord = float(request.args.get("ycoord")) z_coord = float(request.args.get("zcoord")) pmids = Activation.get_pmids_from_xyz(x_coord, y_coord, z_coord, radius) elif clicked_on == "word": word = request.args.get("word") # Get the pmids for a word pmids = StudyTerm.get_pmid_by_term(word) elif clicked_on == "cluster": cluster = request.args.get("cluster") # Get the words for a cluster # Then get the top studies for the words words = TermCluster.get_words_in_cluster(cluster) pmids = StudyTerm.get_pmid_by_term(words) elif clicked_on == "study": pmid = request.args.get("pmid") study = Study.get_study_by_pmid(pmid) # Look for cluster-mate studies pmids = study.get_cluster_mates() citations = Study.get_references(pmids) return jsonify(citations)
def load_studies(): """Load data from database.txt into Location, Activation, Study tables.""" # Delete all rows in existing tables, so if we need to run this a second time, # we won't add duplicates Location.query.delete() Study.query.delete() Activation.query.delete() skip = True count_studies = 0 # Parse txt file and convert to appropriate data types for seeding for row in open("seed_data/database.txt"): # Skip the header of the txt file if skip: skip = False continue # Stop after the first 5000 rows for now # if count_studies > 5000: # break row = row.rstrip().split("\t") # Information to go into Study, if applicable: pmid = int(row[0]) doi = row[1] title = row[9] authors = row[10] year = int(row[11]) journal = row[12].rstrip() # Information to go into Location, if applicable x = float(row[2]) y = float(row[3]) z = float(row[4]) space = row[5] # Check whether PMID is already in Study; if not, add it to db. study_obj = Study.get_study_by_pmid(pmid) if study_obj is None: study_to_add = Study(pmid=pmid, doi=doi, title=title, authors=authors, year=year, journal=journal) db.session.add(study_to_add) db.session.commit() # Check whether xyz is already in Location; if not, add it to db and # retrieve its location ID (an autoincrementing primary key). # If xyz already in Location, retrieve its location_id. location_obj = Location.check_by_xyz_space(x, y, z, space) if location_obj is None: location_to_add = Location(x_coord=x, y_coord=y, z_coord=z, space=space) db.session.add(location_to_add) db.session.commit() loc_id = Location.check_by_xyz_space(x, y, z, space).location_id else: loc_id = location_obj.location_id # Add activation to db, using location_id identified/generated above activation_to_add = Activation(pmid=pmid, location_id=loc_id) db.session.add(activation_to_add) db.session.commit() # Print where we are and increment counter print "Database.txt seeding row ", count_studies count_studies += 1
def load_studies(): """Loads data from database.txt into Location, Activation, Study tables. File format: PMID \t doi \t x \t y \t z \t space \t peak_id \t table_id \t table_num \t title \t authors \t year \t journal \t Source: Neurosynth database.txt file""" skip = True count_studies = 0 database = open("seed_data/database.txt") # Parse txt file and convert to appropriate data types for seeding for row in database: # Skip the header of the txt file if skip: skip = False continue # Stop after the first 5000 rows for now # if count_studies > 5000: # break row = row.rstrip().split('\t') # Information to go into Study, if applicable: pmid = int(row[0]) doi = row[1] title = row[9] authors = row[10] year = int(row[11]) journal = row[12].rstrip() # Information to go into Location, if applicable x = float(row[2]) y = float(row[3]) z = float(row[4]) # Check whether PMID is already in Study; if not, add it to db. study_obj = Study.get_study_by_pmid(pmid) if study_obj is None: study_to_add = Study(pmid=pmid, doi=doi, title=title, authors=authors, year=year, journal=journal) db.session.add(study_to_add) db.session.commit() # Check whether xyz is already in Location; if not, add it to db and # retrieve its location ID (an autoincrementing primary key). # If xyz already in Location, get its location_id. location_obj = Location.check_by_xyz(x, y, z) if location_obj is None: location_to_add = Location(x_coord=x, y_coord=y, z_coord=z) db.session.add(location_to_add) db.session.commit() loc_id = Location.check_by_xyz(x, y, z).location_id else: loc_id = location_obj.location_id # Add activation to db, using location_id identified/generated above activation_to_add = Activation(pmid=pmid, location_id=loc_id) db.session.add(activation_to_add) db.session.commit() # Print where we are and increment counter print "Database.txt seeding row ", count_studies count_studies += 1 database.close()
def generate_d3(radius=3): """ Returns JSON with xyz at the root node. Test with parameters: 40, -45, -25 (Fusiform face area) """ clicked_on = request.args.get("options") if clicked_on == "location": x_coord = float(request.args.get("xcoord")) y_coord = float(request.args.get("ycoord")) z_coord = float(request.args.get("zcoord")) pmids = Activation.get_pmids_from_xyz(x_coord, y_coord, z_coord, radius) scale = 70000 # Get [(wd, freq), ...] and [wd1, wd2] for most frequent words elif clicked_on == "study": pmid = request.args.get("pmid") study = Study.get_study_by_pmid(pmid) pmids = study.get_cluster_mates() scale = 30000 terms_for_dict, words = StudyTerm.get_terms_by_pmid(pmids) # Optional: transform the terms # Get the top clusters top_clusters = TermCluster.get_top_clusters(words) # Get the cluster-word associations associations = TermCluster.get_word_cluster_pairs(top_clusters, words) # Make the root node: root_dict = {"name": "", "children": []} # Build the terminal nodes (leaves) first using (wd, freq) tuples # Output: {word: {'name': word, 'size': freq}, word2: ... } leaves = {} for (word, freq) in terms_for_dict: if word not in leaves: leaves[word] = {"name": word, "size": freq * scale} else: leaves[word]["size"] += freq * scale # Embed the leaves in the clusters: # Output: {cluster_id: {'name': ID, 'children': [...]}, ... } clusters = {} for (cluster_id, word) in associations: if cluster_id not in clusters: clusters[cluster_id] = {"name": cluster_id, "children": [leaves[word]]} else: clusters[cluster_id]["children"].append(leaves[word]) # Put the clusters in the root dictionary # Output: {'name': root, children: [{'name': id, 'children': []}, ...] for cluster in top_clusters: root_dict["children"].append(clusters[cluster]) return jsonify(root_dict)