Пример #1
0
def generate_intensity():
    """Generates an intensity data file related to some user action.

    Clear: clear the old intensity mapping
    Cluster: intensity mapping associated with a topic cluster
    Word: intensity mapping associated with a particular word
    Study: intensity mapping associated with a study cluster"""

    clicked_on = request.args.get("options")

    if clicked_on == "clear":

        intensities_by_location = {}

    elif clicked_on == "cluster" or clicked_on == "word":

        if clicked_on == "cluster":

            cluster = request.args.get("cluster")
            word = TermCluster.get_words_in_cluster(cluster)

        else:

            word = request.args.get("word")

        studies = StudyTerm.get_by_word(word)

        # Create a dictionary of {pmid: frequency} values
        frequencies_by_pmid, max_intensity = organize_frequencies_by_study(studies)
        pmids = frequencies_by_pmid.keys()

        # Get the activations for the keys of the dictionary
        activations = Activation.get_activations_from_studies(pmids)

        # Assemble the final dictionary of {location:intensity} values, scaling
        # each value as we go
        intensities_by_location = scale_frequencies_by_loc(activations, max_intensity, frequencies_by_pmid)

    elif clicked_on == "study":

        pmid = request.args.get("pmid")
        study = Study.get_study_by_pmid(pmid)

        # Look for cluster-mate studies
        cluster_mates = study.get_cluster_mates()

        # Get (location, study count) tuples from db
        activations = Activation.get_location_count_from_studies(cluster_mates)

        # Scale study counts in preparation for intensity mapping
        intensities_by_location = scale_study_counts(activations)

        print "Found intensities: ", intensities_by_location

    # Assemble the intensity map
    intensity_vals = generate_intensity_map(intensities_by_location)

    return intensity_vals
Пример #2
0
def generate_citations(radius=3):
    """Returns a list of text citations associated with some location, word
    or topic (cluster)."""

    clicked_on = request.args.get("options")

    if clicked_on == "location":
        x_coord = float(request.args.get("xcoord"))
        y_coord = float(request.args.get("ycoord"))
        z_coord = float(request.args.get("zcoord"))

        pmids = Activation.get_pmids_from_xyz(x_coord, y_coord, z_coord, radius)

    elif clicked_on == "word":
        word = request.args.get("word")

        # Get the pmids for a word
        pmids = StudyTerm.get_pmid_by_term(word)

    elif clicked_on == "cluster":
        cluster = request.args.get("cluster")

        # Get the words for a cluster
        # Then get the top studies for the words
        words = TermCluster.get_words_in_cluster(cluster)
        pmids = StudyTerm.get_pmid_by_term(words)

    elif clicked_on == "study":

        pmid = request.args.get("pmid")
        study = Study.get_study_by_pmid(pmid)

        # Look for cluster-mate studies
        pmids = study.get_cluster_mates()

    citations = Study.get_references(pmids)

    return jsonify(citations)
Пример #3
0
def load_studies():
    """Load data from database.txt into Location, Activation, Study tables."""

    # Delete all rows in existing tables, so if we need to run this a second time,
    # we won't add duplicates
    Location.query.delete()
    Study.query.delete()
    Activation.query.delete()

    skip = True
    count_studies = 0

    # Parse txt file and convert to appropriate data types for seeding
    for row in open("seed_data/database.txt"):

        # Skip the header of the txt file
        if skip:
            skip = False
            continue

        # Stop after the first 5000 rows for now
        # if count_studies > 5000:
        #     break

        row = row.rstrip().split("\t")

        # Information to go into Study, if applicable:
        pmid = int(row[0])
        doi = row[1]
        title = row[9]
        authors = row[10]
        year = int(row[11])
        journal = row[12].rstrip()

        # Information to go into Location, if applicable
        x = float(row[2])
        y = float(row[3])
        z = float(row[4])
        space = row[5]

        # Check whether PMID is already in Study; if not, add it to db.
        study_obj = Study.get_study_by_pmid(pmid)

        if study_obj is None:
            study_to_add = Study(pmid=pmid, doi=doi, title=title, authors=authors, year=year, journal=journal)
            db.session.add(study_to_add)
            db.session.commit()

        # Check whether xyz is already in Location; if not, add it to db and
        # retrieve its location ID (an autoincrementing primary key).
        # If xyz already in Location, retrieve its location_id.
        location_obj = Location.check_by_xyz_space(x, y, z, space)

        if location_obj is None:
            location_to_add = Location(x_coord=x, y_coord=y, z_coord=z, space=space)
            db.session.add(location_to_add)
            db.session.commit()
            loc_id = Location.check_by_xyz_space(x, y, z, space).location_id
        else:
            loc_id = location_obj.location_id

        # Add activation to db, using location_id identified/generated above
        activation_to_add = Activation(pmid=pmid, location_id=loc_id)
        db.session.add(activation_to_add)
        db.session.commit()

        # Print where we are and increment counter
        print "Database.txt seeding row ", count_studies
        count_studies += 1
Пример #4
0
def load_studies():
    """Loads data from database.txt into Location, Activation, Study tables.

    File format:    PMID \t doi \t x \t y \t z \t space \t peak_id \t table_id 
                    \t table_num \t title \t authors \t year \t journal \t

    Source: Neurosynth database.txt file"""

    skip = True
    count_studies = 0
    database = open("seed_data/database.txt")

    # Parse txt file and convert to appropriate data types for seeding
    for row in database:

        # Skip the header of the txt file
        if skip:
            skip = False
            continue

        # Stop after the first 5000 rows for now
        # if count_studies > 5000:
        #     break

        row = row.rstrip().split('\t')

        # Information to go into Study, if applicable:
        pmid = int(row[0])
        doi = row[1]
        title = row[9]
        authors = row[10]
        year = int(row[11])
        journal = row[12].rstrip()

        # Information to go into Location, if applicable
        x = float(row[2])
        y = float(row[3])
        z = float(row[4])

        # Check whether PMID is already in Study; if not, add it to db.
        study_obj = Study.get_study_by_pmid(pmid)

        if study_obj is None:
            study_to_add = Study(pmid=pmid, doi=doi, title=title,
                                 authors=authors, year=year, journal=journal)
            db.session.add(study_to_add)
            db.session.commit()

        # Check whether xyz is already in Location; if not, add it to db and
        # retrieve its location ID (an autoincrementing primary key).
        # If xyz already in Location, get its location_id.
        location_obj = Location.check_by_xyz(x, y, z)

        if location_obj is None:
            location_to_add = Location(x_coord=x, y_coord=y, z_coord=z)
            db.session.add(location_to_add)
            db.session.commit()
            loc_id = Location.check_by_xyz(x, y, z).location_id
        else:
            loc_id = location_obj.location_id

        # Add activation to db, using location_id identified/generated above
        activation_to_add = Activation(pmid=pmid, location_id=loc_id)
        db.session.add(activation_to_add)
        db.session.commit()

        # Print where we are and increment counter
        print "Database.txt seeding row ", count_studies
        count_studies += 1

    database.close()
Пример #5
0
def generate_d3(radius=3):
    """ Returns JSON with xyz at the root node.

    Test with parameters: 40, -45, -25    (Fusiform face area)
    """

    clicked_on = request.args.get("options")

    if clicked_on == "location":

        x_coord = float(request.args.get("xcoord"))
        y_coord = float(request.args.get("ycoord"))
        z_coord = float(request.args.get("zcoord"))

        pmids = Activation.get_pmids_from_xyz(x_coord, y_coord, z_coord, radius)
        scale = 70000
        # Get [(wd, freq), ...] and [wd1, wd2] for most frequent words

    elif clicked_on == "study":

        pmid = request.args.get("pmid")
        study = Study.get_study_by_pmid(pmid)
        pmids = study.get_cluster_mates()
        scale = 30000

    terms_for_dict, words = StudyTerm.get_terms_by_pmid(pmids)
    # Optional: transform the terms
    # Get the top clusters
    top_clusters = TermCluster.get_top_clusters(words)
    # Get the cluster-word associations
    associations = TermCluster.get_word_cluster_pairs(top_clusters, words)

    # Make the root node:
    root_dict = {"name": "", "children": []}

    # Build the terminal nodes (leaves) first using (wd, freq) tuples
    # Output: {word: {'name': word, 'size': freq}, word2: ... }
    leaves = {}

    for (word, freq) in terms_for_dict:
        if word not in leaves:
            leaves[word] = {"name": word, "size": freq * scale}
        else:
            leaves[word]["size"] += freq * scale

    # Embed the leaves in the clusters:
    # Output: {cluster_id: {'name': ID, 'children': [...]}, ... }
    clusters = {}

    for (cluster_id, word) in associations:
        if cluster_id not in clusters:
            clusters[cluster_id] = {"name": cluster_id, "children": [leaves[word]]}
        else:
            clusters[cluster_id]["children"].append(leaves[word])

    # Put the clusters in the root dictionary
    # Output: {'name': root, children: [{'name': id, 'children': []}, ...]
    for cluster in top_clusters:
        root_dict["children"].append(clusters[cluster])

    return jsonify(root_dict)