Exemplo n.º 1
0
def load_clusters():
    """Load info from topics.txt file into Cluster, TermCluster tables"""

    # Delete whatever's in the db already
    Cluster.query.delete()
    TermCluster.query.delete()

    count_clusters = 0

    for row in open("topics.csv"):

        row = row.rstrip().split(",")

        # Parse the txt into the appropriate data types for seeding
        cluster = int(row[1][-3:])
        word = row[3].strip()

        # Check if word is in our list of key terms. If it is, add to
        # TermCluster table to allow for lookup later (see seed.py for TODO)

        if Term.check_for_term(word) is True:
            term_cluster_to_add = TermCluster(word=word, cluster_id=cluster)
            db.session.add(term_cluster_to_add)
            db.session.commit()

        # Check if a cluster is in our list of clusters. If it's not, add it.
        if Cluster.check_for_cluster(cluster) is False:
            cluster_to_add = Cluster(cluster_id=cluster)
            db.session.add(cluster_to_add)
            db.session.commit()

        # Print where we are and increment counter
        print "Topics.txt seeding row", count_clusters

        count_clusters += 1
Exemplo n.º 2
0
def load_studies_terms():
    """Loads info from studies_terms.txt into StudyTerm & Term tables.

    File format: R ID \t pmid \t word \t frequency

    Source: Neurosynth features.txt, transformed in R to long format."""


    print "Studies_terms.txt seeding"

    # Delete all rows in existing tables, so if we need to run this a second time,
    # we won't be trying to add duplicate users
    StudyTerm.query.delete()
    Term.query.delete()

    skip = True
    count_studies_terms = 0
    studies_terms = open("seed_data/studies_terms.txt")

    for row in studies_terms:
        # Skip the first line of the file
        if skip:
            skip = False
            continue

        # Stop after 5000 lines
        # if count_studies_terms > 5000:
        #     break

        # Parse txt file and convert to appropriate data types for seeding
        row = row.rstrip().split('\t')

        # If the term starts with "X", it is not a word but a number, e.g. "X01"
        # These don't make sense to track, so skip these rows.
        if row[2].startswith('\"X'):
            continue

        # Skip the lines indicating that a term did not appear anywhere
        # in the article (frequency of 0)
        if float(row[3]) == 0.0:
            continue

        pmid = int(row[1])
        word = row[2].strip('\"').replace(".", " ")
        freq = float(row[3])

        # Check if the word is already in Term; if not, add it
        if Term.check_for_term(word) is False:
            word_to_add = Term(word=word)
            db.session.add(word_to_add)

        # Add the row to the studies_terms table
        studies_terms_to_add = StudyTerm(word=word, pmid=pmid, frequency=freq)
        db.session.add(studies_terms_to_add)
        db.session.commit()

        # Print where we are and increment counter
        print "studies_terms.txt seeding row ", count_studies_terms
        count_studies_terms += 1
Exemplo n.º 3
0
def create_term(id):
    form = forms.NewTermForm(request.form)
    if not form.validate():
        flash("Error, all fields are required")
    else:
        term = Term(term=form.term.data, definition=form.definition.data, user_id=current_user.id, collection_id=id)
        model.session.add(term)
        model.session.commit()
        model.session.refresh(term)

    return redirect(url_for("view_collection", id=id))
Exemplo n.º 4
0
def addNodes(nodes):
    added = []
    for id in nodes:
        node,revision,tags = getNode(id)
        
        node['version'] = node['vid']
        node['user'] = node['uid']
        node['id'] = node['nid']
        revision['user'] = revision['uid']
        revision['id'] = revision['vid']
        revision['node'] = revision['nid']
        del(node['status'])
        del(node['uid'])
        del(node['nid'])
        del(node['vid'])
        del(revision['uid'])
        del(revision['nid'])
        del(revision['vid'])
        # add the nodes
        try:
            dbnode = Node.get(node['id']) 
            del(node['id'])
            dbnode.set(**node)
        except SQLObjectNotFound:
            dbnode = Node(**node)

        try:
            dbnoder = NodeRevision.get(revision['id'])
            del(revision['id'])
            dbnoder.set(**revision)
        except SQLObjectNotFound:
            dbnoder = NodeRevision(**revision)
        # add the termnodes
        for tag in tags:
            tag['node'] = tag['nid']
            tag['term'] = tag['tid']
            del(tag['nid'])
            del(tag['tid'])
            try:
                dbtag = Term.get(tag['term'])
            except SQLObjectNotFound:
                dbtag = addTags([tag['term']])[0]
            terms = list(TermNode.selectBy(node=dbnode,term=dbtag))
            if not terms:
                term = TermNode(**tag)
            else:
                term = terms[0]

        print "Node %d made!" % id
        added.append(id)
    return added
Exemplo n.º 5
0
def load_clusters():
    """Load info from topics.txt file into Cluster, TermCluster tables

    File format: R row id,Topic XXX,R column ID,word

        where XXX represents a number between 0-400
        R ids can be discarded during seeding 

    Source: topic clustering data from Neurosynth, converted to long format
    in R prior to seeding. 
    Notes: the words tracked in this clustering are not in perfect
    alignment with those tracked in studies_terms.txt. Approximately 2000 of the 
    terms in studies_terms have a topical cluster, the remaining ~1000 do not.
    This number could be improved by stemming. Many of the words not tracked
    in clusters are multi-word phrases."""

    # Delete whatever's in the db already
    Cluster.query.delete()
    TermCluster.query.delete()

    count_clusters = 0
    topics_fileobj = open('seed_data/topics.csv')

    for row in topics_fileobj:

        row = row.rstrip().split(',')

        # Parse the txt into the appropriate data types for seeding
        cluster = int(row[1][-3:])
        word = row[3].strip()

        # Check if word is in our list of key terms. If it is, add to
        # TermCluster table to allow for lookup later (see model.py for TODO)

        if Term.check_for_term(word) is True:
            term_cluster_to_add = TermCluster(word=word, cluster_id=cluster)
            db.session.add(term_cluster_to_add)
            db.session.commit()

        # Check if a cluster is in our list of clusters. If it's not, add it.
        if Cluster.check_for_cluster(cluster) is False:
            cluster_to_add = Cluster(cluster_id=cluster)
            db.session.add(cluster_to_add)
            db.session.commit()

        # Print where we are and increment counter
        print "Topics.txt seeding row", count_clusters

        count_clusters += 1

    topics_fileobj.close()
Exemplo n.º 6
0
def syncNodes():
    print "syncing nodes"
    tags_checked = []
    users_checked = []
    nodes_checked = []
    tags_new = []
    users_new = []
    nodes_new = []
    
    for node in getNewNodes():
        tid = node['tid']
        nid = node['nid']
        changed = node['changed']
        uid = node['uid']

        if tid not in tags_checked:
            try:
                term = Term.get(tid)
            except SQLObjectNotFound:
                tags_new.append(tid)
            tags_checked.append(tid)

        if uid not in users_checked:
            try:
                node = User.get(uid)
            except SQLObjectNotFound:
                users_new.append(uid)
            users_checked.append(uid)

        if nid not in nodes_checked:
            try:
                node = Node.get(nid)
                if node.changed < changed:
                    nodes_new.append(nid)                    
            except SQLObjectNotFound:
                nodes_new.append(nid)
            nodes_checked.append(nid)
    
    
    hub.begin()    
    # add new db information
    addTags(tags_new)
    addUsers(users_new)
    nodes = addNodes(nodes_new)
    setWeight()
    hub.commit()
    
    return nodes
def display_term_graph():
    """displays poetric terms information page"""

    last_term_id = 49

    start_at = randint(1, last_term_id - SHOW_NUMBER)
    stop_before = start_at + SHOW_NUMBER

    term_data = Term.get_term_data(start_at=start_at, stop_before=stop_before)

    rough_terms = db.session.query(Term.term, Term.term_id).all()
    all_terms = {}
    for term, term_id in rough_terms:
        all_terms[term] = term_id

    return render_template("terms.html", term_data=term_data, all_terms=all_terms)
Exemplo n.º 8
0
def add_term():
    """Add term to the db."""

    term = str(request.form.get('term'))
    user_id = int(request.form.get('user_id'))
    parent_id = int(request.form.get('parent_id'))

    # Add the new category to the db:
    new_term = Term(term=term,
                    user_id=user_id,
                    parent_id=parent_id)
    db.session.add(new_term)
    db.session.commit()

    results_dict = {'message' : "'%s' has been added as a term!" % term,
                    "term_name" : term,
                    "parent_id" : parent_id}

    return jsonify(results_dict)
Exemplo n.º 9
0
def add_category():
    """Add category to the db."""

    category = str(request.form.get('category'))
    user_id = int(request.form.get('user_id'))

    # Add the new category to the db:
    new_category = Term(term=category,
                    user_id=user_id)
    db.session.add(new_category)
    db.session.commit()

    category_object = Term.query.filter(Term.term == category).first()
    category_id = category_object.id

    results_dict = {"category_name" : category,
                    "user_id" : user_id,
                    "category_id" : category_id}

    return jsonify(results_dict)
Exemplo n.º 10
0
 def iter_options():
     for o in select.findAll('option'):
         term = Term.parse(o.text)
         if term:
             attrs = dict(o.attrs)
             yield (term, attrs['value'])
Exemplo n.º 11
0
 def iter_options():
     for o in select.findAll("option"):
         term = Term.parse(o.text)
         if term:
             attrs = dict(o.attrs)
             yield (term, attrs["value"])
Exemplo n.º 12
0
def setWeight():
    # set weight    
    for tag in Term.select():
        tag._get_weight(True)
Exemplo n.º 13
0
def term_type(value):
    term = Term.parse(value)
    if term is None:
        raise argparse.ArgumentTypeError("invalid format")
    return term
Exemplo n.º 14
0
def retrieve_words():
    """Retrieves all available words in the db for autocomplete functionality."""

    words = Term.get_all()

    return jsonify({"words": words})