def list(self, filetype="html"): c.nodes = Session.query(Node).all() entity_q = Session.query(Node) entity_q = entity_q.limit(request.params.get("limit", None)) c.query = request.params.get("q", "") c.sep = request.params.get("sep", "") if request.params.get("sep_filter", False): entity_q = entity_q.filter(Entity.sep_dir != "") if c.sep: entity_q = entity_q.filter(Entity.sep_dir == c.sep) if c.query: o = or_(Entity.label.like(c.query + "%"), Entity.label.like("% " + c.query + "%")) entity_q = entity_q.filter(o).order_by(func.length(Entity.label)) if filetype == "json": response.content_type = "application/json" response.headers["Access-Control-Allow-Origin"] = "*" c.entities = entity_q.all() if request.params.get("redirect", False) and len(c.entities) == 1: h.redirect( h.url(controller=self._controller, action="view", filetype=filetype, id=c.entities[0].ID), code=302 ) else: return render("{type}/{type}-list.".format(type=self._controller) + filetype)
def list(self, filetype='html'): c.nodes = Session.query(Node).all() entity_q = Session.query(Node) entity_q = entity_q.limit(request.params.get('limit', None)) c.query = request.params.get('q', '') c.sep = request.params.get('sep', '') if request.params.get('sep_filter', False): entity_q = entity_q.filter(Entity.sep_dir != '') if c.sep: entity_q = entity_q.filter(Entity.sep_dir == c.sep) if c.query: o = or_(Entity.label.like(c.query+'%'), Entity.label.like('% '+c.query+'%')) entity_q = entity_q.filter(o).order_by(func.length(Entity.label)) if filetype=='json': response.content_type = 'application/json' response.headers['Access-Control-Allow-Origin'] = '*' c.entities = entity_q.all() if request.params.get('redirect', False) and len(c.entities) == 1: h.redirect(h.url(controller=self._controller, action='view', filetype=filetype, id=c.entities[0].ID), code=302) else: return render('{type}/{type}-list.'.format(type=self._controller) + filetype)
def update_partial_graph(entity_type, occurrences): """ Takes an entity type and a SQL filename and only updates part of the graph. For use with single article statistical information. """ raise NotImplementedError # Import SQL statements if entity_type == Idea: table = "idea_graph_edges" type = IdeaGraphEdge elif entity_type == Thinker: table = "thinker_graph_edges" type = ThinkerGraphEdge else: table = "idea_thinker_graph_edges" type = IdeaThinkerGraphEdge edges = Session.query(type) # filter edges query to only the key term for ante, occurs in occurrences.iteritems(): for cons, occurs_in in occurs.iteritems(): # select the proper edge from result set # if edge does not exist, create it and add to session #update edge edge.occurs_in = occurs_in # commit changes Session.commit()
def _thinker_evaluate(self, evaltype=None, id=None, id2=None, uid=None, username=None, degree=1, maxdegree=1): """ Private method to handle generic evaluations. See ``teacher_of`` and ``has_influenced`` for use. """ id2 = request.params.get('id2', id2) uid = request.params.get('uid', uid) username = request.params.get('username', username) evaluation = self._get_evaluation(evaltype, id, id2, uid, username) try: evaluation.degree = int(request.params.get('degree', degree)) except TypeError: abort(400) # Create and commit evaluation Session.flush() # Issue an HTTP success response.status_int = 200 return "OK"
def create(self): if not h.auth.is_logged_in(): abort(401) if not h.auth.is_admin(): abort(403) valid_params = ["sep_dir", "wiki"] params = request.params.mixed() if '_method' in params: del params['_method'] if 'label' in params: label = params['label'] del params['label'] else: abort(400) for k in params.keys(): if k not in valid_params: abort(400) school_of_thought = SchoolOfThought(name, **params) Session.add(school_of_thought) Session.flush() # Issue an HTTP success response.status_int = 302 response.headers['location'] = h.url(controller='school_of_thought', action='view', id=school_of_thought.ID) return "Moved temporarily"
def process_articles(entity_type=Entity, output_filename='output-all.txt', corpus_root='corpus/'): terms = select_terms(entity_type) Session.expunge_all() Session.close() articles = Session.query(Entity.sep_dir).filter(Entity.sep_dir != None) articles = articles.filter(Entity.sep_dir != '') articles = articles.distinct().all() articles = [a[0] for a in articles] # parallel processing of articles p = Pool() args = [(title, terms, entity_type, None, corpus_root) for title in articles] doc_lines = p.map(process_wrapper, args) p.close() #serial processing for tests ''' doc_lines = [] for title in articles: lines = process_article(title, terms, entity_type, None, corpus_root) doc_lines.append(lines) ''' # write graph output to file print output_filename with open(output_filename, 'w') as f: for lines in doc_lines: f.writelines(lines)
def update_graph(entity_type, sql_filename): """ Performs a complete update of the database graph information, adding jweight, entropy and occurrence data from the sql file generated by complete_mining. This will remove ALL previous graph data. """ # Import SQL statements if entity_type == Idea: table = "idea_graph_edges" elif entity_type == Thinker: table = "thinker_graph_edges" else: table = "idea_thinker_graph_edges" connection = Session.connection() print "deleting old graph information ..." connection.execute(""" TRUNCATE TABLE %(table)s; """ % {'filename' : sql_filename, 'table' : table }) print "inserting new graph information" connection.execute(""" SET foreign_key_checks=0; LOCK TABLES %(table)s WRITE; LOAD DATA INFILE '%(filename)s' INTO TABLE %(table)s FIELDS TERMINATED BY '::' (ante_id, cons_id, confidence, jweight, weight, occurs_in); UNLOCK TABLES; SET foreign_key_checks=1; """ % {'filename' : sql_filename, 'table' : table }) Session.close()
def evaluate(self, id=None): if not h.auth.is_logged_in(): abort(401) c.idea = h.fetch_obj(Idea, id, new_id=True) node_q = Session.query(Node).filter_by(concept_id=id) c.node = node_q.first() if request.environ.get('REMOTE_USER', False): user = h.get_user(request.environ['REMOTE_USER']) sq = Session.query(IdeaEvaluation.cons_id) sq = sq.filter(IdeaEvaluation.ante == c.idea) sq = sq.filter(IdeaEvaluation.uid == user.ID) sq = sq.subquery() to_evaluate = c.idea.related.outerjoin( (sq, Idea.ID == sq.c.cons_id)) to_evaluate = to_evaluate.filter(sq.c.cons_id == None) else: to_evaluate = c.idea.related c.paginator = paginate.Page(to_evaluate, page=int(request.params.get('page', 1)), items_per_page=10, controller='idea', action='edit', id=id) response.headers['Access-Control-Allow-Origin'] = '*' return render('idea/idea-edit.html')
def process_articles(entity_type=Entity, output_filename='output-all.txt', corpus_root='corpus/'): terms = select_terms(entity_type) Session.expunge_all() Session.close() articles = Session.query(Entity.sep_dir).filter(Entity.sep_dir!=None) articles = articles.filter(Entity.sep_dir!='') articles = articles.distinct().all() articles = [a[0] for a in articles] # parallel processing of articles p = Pool() args = [(title, terms, entity_type, None, corpus_root) for title in articles] doc_lines = p.map(process_wrapper, args) p.close() #serial processing for tests ''' doc_lines = [] for title in articles: lines = process_article(title, terms, entity_type, None, corpus_root) doc_lines.append(lines) ''' # write graph output to file print output_filename with open(output_filename, 'w') as f: for lines in doc_lines: f.writelines(lines)
def _delete_evaluation(self, evaltype, id, id2, uid=None, username=None): if not h.auth.is_logged_in(): abort(401) id2 = request.params.get('id2', id2) uid = request.params.get('uid', uid) username = request.params.get('username', username) evaluation = self._get_evaluation(id, id2, uid, username, autoCreate=False) if not evaluation: abort(404) current_uid = h.get_user(request.environ['REMOTE_USER']).ID if evaluation.uid != current_uid or not h.auth.is_admin(): abort(401) setattr(evaluation, evaltype, -1) # Delete evaluation if this eliminates both settings, new db schema # will eliminate this need #if evaluation.generality == -1 and evaluation.relatedness == -1: # h.delete_obj(evaluation) Session.flush() Session.commit() response.status_int = 200 return "OK"
def evaluate(self, id=None): if not h.auth.is_logged_in(): abort(401) c.idea = h.fetch_obj(Idea, id, new_id=True) node_q = Session.query(Node).filter_by(concept_id=id) c.node = node_q.first() if request.environ.get('REMOTE_USER', False): user = h.get_user(request.environ['REMOTE_USER']) sq = Session.query(IdeaEvaluation.cons_id) sq = sq.filter(IdeaEvaluation.ante==c.idea) sq = sq.filter(IdeaEvaluation.uid==user.ID) sq = sq.subquery() to_evaluate = c.idea.related.outerjoin((sq, Idea.ID==sq.c.cons_id)) to_evaluate = to_evaluate.filter(sq.c.cons_id==None) else: to_evaluate = c.idea.related c.paginator = paginate.Page( to_evaluate, page=int(request.params.get('page', 1)), items_per_page=10, controller='idea', action='edit', id=id ) response.headers['Access-Control-Allow-Origin'] = '*' return render('idea/idea-edit.html')
def _get_evaluation(self, id, id2, uid=None, username=None, autoCreate=True): idea1 = h.fetch_obj(Idea, id, new_id=True) idea2 = h.fetch_obj(Idea, id2, new_id=True) # Get user information if uid: uid = h.fetch_obj(User, uid).ID elif username: user = h.get_user(username) uid = user.ID if user else abort(403) else: uid = h.get_user(request.environ['REMOTE_USER']).ID evaluation_q = Session.query(IdeaEvaluation) evaluation = evaluation_q.filter_by(ante_id=id, cons_id=id2, uid=uid).first() # if an evaluation does not yet exist, create one if autoCreate and not evaluation: evaluation = IdeaEvaluation(id, id2, uid) Session.add(evaluation) return evaluation
def update_graph(entity_type, sql_filename): # Import SQL statements if entity_type == Idea: table = "idea_graph_edges" elif entity_type == Thinker: table = "thinker_graph_edges" else: table = "idea_thinker_graph_edges" connection = Session.connection() print "deleting old graph information ..." connection.execute(""" TRUNCATE TABLE %(table)s; """ % {'filename' : sql_filename, 'table' : table }) print "inserting new graph information" connection.execute(""" SET foreign_key_checks=0; LOCK TABLES %(table)s WRITE; LOAD DATA INFILE '%(filename)s' INTO TABLE %(table)s FIELDS TERMINATED BY '::' (ante_id, cons_id, confidence, jweight, weight, occurs_in); UNLOCK TABLES; SET foreign_key_checks=1; """ % {'filename' : sql_filename, 'table' : table }) Session.close()
def submit_changes(self): ''' This function validates the submitted profile edit form and commits the changes. Restricted to ``POST`` requests. If successful, redirects to the result action to prevent resubmission. ''' if not h.auth.is_logged_in(): abort(401) c.user = h.get_user(request.environ['REMOTE_USER']) if self.form_result['password'] != '': c.user.set_password(self.form_result['password']) # TODO: Enable area editing #c.user.first_area_id=self.form_result['first_area'], #user.first_area_level=self.form_result['first_area_level'], #if self.form_result['second_area']: # c.user.second_area_id=self.form_result['second_area'], # c.user.second_area_level=self.form_result['second_area_level'] c.user.fullname = self.form_result['fullname'] Session.flush() Session.commit() h.redirect(h.url(controller='account', action='profile', message='edited'))
def submit_changes(self): ''' This function validates the submitted profile edit form and commits the changes. Restricted to ``POST`` requests. If successful, redirects to the result action to prevent resubmission. ''' if not h.auth.is_logged_in(): abort(401) c.user = h.get_user(request.environ['REMOTE_USER']) if self.form_result['password'] != '': c.user.set_password(self.form_result['password']) # TODO: Enable area editing #c.user.first_area_id=self.form_result['first_area'], #user.first_area_level=self.form_result['first_area_level'], #if self.form_result['second_area']: # c.user.second_area_id=self.form_result['second_area'], # c.user.second_area_level=self.form_result['second_area_level'] c.user.fullname = self.form_result['fullname'] Session.flush() Session.commit() h.redirect( h.url(controller='account', action='profile', message='edited'))
def _reset(self, username=None): username = username or request.environ.get('REMOTE_USER', False) if not username: abort(401) try: user = h.get_user(username) except: abort(400) new_password = user.reset_password() msg = Message("*****@*****.**", user.email, "InPhO password reset") msg.plain = """ %(name)s, your password at the Indiana Philosophy Ontology (InPhO) has been changed to: Username: %(uname)s Password: %(passwd)s The Indiana Philosophy Ontology (InPhO) Team [email protected] """ % { 'passwd': new_password, 'uname': user.username, 'name': user.fullname or user.username or '' } msg.send() Session.commit() h.redirect(h.url(controller='account', action='reset_result'))
def _reset(self, username=None): username = username or request.environ.get('REMOTE_USER', False) if not username: abort(401) try: user = h.get_user(username) except: abort(400) new_password = user.reset_password() msg = Message("*****@*****.**", user.email, "InPhO password reset") msg.plain = """ %(name)s, your password at the Indiana Philosophy Ontology (InPhO) has been changed to: Username: %(uname)s Password: %(passwd)s The Indiana Philosophy Ontology (InPhO) Team [email protected] """ % {'passwd' : new_password, 'uname' : user.username, 'name' : user.fullname or user.username or ''} msg.send() Session.commit() h.redirect(h.url(controller='account', action='reset_result'))
def list(self, filetype='html'): entity_q = Session.query(self._type) #TODO: Remove the following line when Nodes are eliminated entity_q = entity_q.filter(Entity.typeID != 2) c.missing_entity = 0 # get the list of entities #c.entities = entity_q.all() c.nodes = Session.query(Node).filter(Node.parent_id == None) c.nodes = c.nodes.order_by("name").all() c.query = request.params.get('q', '') c.query = c.query.strip() c.sep = request.params.get('sep', '') c.wiki = request.params.get('wiki', '') if request.params.get('sep_filter', False): entity_q = entity_q.filter(Entity.sep_dir != '') if c.sep: entity_q = entity_q.filter(Entity.sep_dir == c.sep) if c.wiki: entity_q = entity_q.filter(Entity.wiki == c.wiki) if c.query: o = or_(Entity.label.like(c.query + '%'), Entity.label.like('% ' + c.query + '%'), Entity.label.like('%-' + c.query + '%')) entity_q = entity_q.filter(o).order_by(func.length(Entity.label)) c.total = entity_q.count() # limit must be the last thing applied to the query entity_q = entity_q.limit(request.params.get('limit', None)) c.entities = entity_q.all() if filetype == 'json': response.content_type = 'application/json' if request.params.get('redirect', False) and len(c.entities) == 1: h.redirect(h.url(controller=self._controller, action='view', filetype=filetype, id=c.entities[0].ID), code=302) else: #if there are no results, show the related SEP results if not c.entities: c.entities = self.missing_entity_search(c.query) if c.entities: c.missing_entity = 1 #raise Exception #render the page return render('{type}/{type}-list.'.format(type=self._controller) + filetype)
def __call__(self, environ, start_response): """Invoke the Controller""" # WSGIController.__call__ dispatches to the Controller method # the request is routed to. This routing information is # available in environ['pylons.routes_dict'] try: return WSGIController.__call__(self, environ, start_response) finally: Session.remove()
def list(self, filetype="html"): entity_q = Session.query(self._type) # TODO: Remove the following line when Nodes are eliminated entity_q = entity_q.filter(Entity.typeID != 2) c.missing_entity = 0 # get the list of entities # c.entities = entity_q.all() c.nodes = Session.query(Node).filter(Node.parent_id == None) c.nodes = c.nodes.order_by("name").all() c.query = request.params.get("q", "") c.query = c.query.strip() c.sep = request.params.get("sep", "") c.wiki = request.params.get("wiki", "") if request.params.get("sep_filter", False): entity_q = entity_q.filter(Entity.sep_dir != "") if c.sep: entity_q = entity_q.filter(Entity.sep_dir == c.sep) if c.wiki: entity_q = entity_q.filter(Entity.wiki == c.wiki) if c.query: o = or_( Entity.label.like(c.query + "%"), Entity.label.like("% " + c.query + "%"), Entity.label.like("%-" + c.query + "%"), ) entity_q = entity_q.filter(o).order_by(func.length(Entity.label)) c.total = entity_q.count() # limit must be the last thing applied to the query entity_q = entity_q.limit(request.params.get("limit", None)) c.entities = entity_q.all() if filetype == "json": response.content_type = "application/json" if request.params.get("redirect", False) and len(c.entities) == 1: h.redirect( h.url(controller=self._controller, action="view", filetype=filetype, id=c.entities[0].ID), code=302 ) else: # if there are no results, show the related SEP results if not c.entities: c.entities = self.missing_entity_search(c.query) if c.entities: c.missing_entity = 1 # raise Exception # render the page return render("{type}/{type}-list.".format(type=self._controller) + filetype)
def graph_all(self, filetype='html', limit=False): sep_filter = request.params.get('sep_filter', False) c.sep_filter = sep_filter idea_q = Session.query(Idea) c.ideas = idea_q.all() edge_q =\ Session.query(IdeaGraphEdge).order_by(IdeaGraphEdge.jweight.desc()).limit(3*len(c.ideas)) c.edges = edge_q.all() return render('idea/graph_all.' + filetype)
def _delete_date(self, id, id2): c.entity = h.fetch_obj(Entity, id, new_id=True) # get the date object date = self._get_date(id, id2) if date in c.entity.dates: idx = c.entity.dates.index(date) Session.delete(c.entity.dates[idx]) Session.commit() return "OK"
def filter_apriori_input(occur_filename, output_filename, entity_type=Idea, doc_terms=None): #select terms terms = select_terms(entity_type) Session.expunge_all() Session.close() lines = dm.prepare_apriori_input(occur_filename, terms, doc_terms) with open(output_filename, 'w') as f: f.writelines(lines)
def _delete_unary(self, type, id, id2=None): thinker = h.fetch_obj(Thinker, id) id2 = request.params.get('id2', id2) obj = h.fetch_obj(unary_vars[type]['object'], id2) if obj in getattr(thinker, unary_vars[type]['property']): getattr(thinker, unary_vars[type]['property']).remove(obj) Session.commit() response.status_int = 200 return "OK"
def _get_anon_evaluation(self, id, id2, ip, autoCreate=True): idea1 = h.fetch_obj(Idea, id, new_id=True) idea2 = h.fetch_obj(Idea, id2, new_id=True) evaluation_q = Session.query(AnonIdeaEvaluation) evaluation = evaluation_q.filter_by(ante_id=id, cons_id=id2, ip=ip).first() # if an evaluation does not yet exist, create one if autoCreate and not evaluation: evaluation = AnonIdeaEvaluation(id, id2,ip) Session.add(evaluation) return evaluation
def searchpatterns(self, id): c.entity = h.fetch_obj(Entity, id, new_id=True) # add a new search pattern pattern = request.params.get('pattern', None) if pattern is None: abort(400) if pattern not in c.entity.searchpatterns: c.entity.searchpatterns.append(unicode(pattern)) Session.commit() return "OK"
def searchpatterns(self, id): c.entity = h.fetch_obj(Entity, id, new_id=True) # add a new search pattern pattern = request.params.get("pattern", None) if pattern is None: abort(400) if pattern not in c.entity.searchpatterns: c.entity.searchpatterns.append(unicode(pattern)) Session.commit() return "OK"
def update(self, id=None): terms = ['label', 'sep_dir', 'last_accessed', 'language', 'openAccess', 'active', 'student', 'ISSN'] URL = request.params.get('URL', None) if URL is not None: journal = h.fetch_obj(Journal, id) if URL == 'none' or URL == 'None': journal.URL = None else: journal.URL = unquote(URL) journal.check_url() Session.commit() super(JournalController, self).update(id, terms)
def _get_anon_evaluation(self, id, id2, ip, autoCreate=True): idea1 = h.fetch_obj(Idea, id, new_id=True) idea2 = h.fetch_obj(Idea, id2, new_id=True) evaluation_q = Session.query(AnonIdeaEvaluation) evaluation = evaluation_q.filter_by(ante_id=id, cons_id=id2, ip=ip).first() # if an evaluation does not yet exist, create one if autoCreate and not evaluation: evaluation = AnonIdeaEvaluation(id, id2, ip) Session.add(evaluation) return evaluation
def triple(self, id): c.entity = h.fetch_obj(Thinker, id) #parese the triple triple = request.params.get('triple').split() subject_t = triple[0] predicate_t = triple[1] objectURLComponents = triple[2].split('/') #parse triple for last check = "no teacher or student" #lastComponentIndex = objectURLComponents.__len__()-1 object_t = objectURLComponents[-1] #- subject is the same as the id #- predicate is from the list and will be used in a if/elif/elif/elif/elif ... to see what database to add it to if "dbpedia.org" in objectURLComponents: object_t_label = object_t.replace("_", " ") obj = Thinker(object_t_label) obj.wiki = object_t elif "inpho.cogs.indiana.edu" in objectURLComponents: obj = h.fetch_obj(Thinker, object_t) '''if(inpho): obj = h.fetch_obj(Thinker, object_t) # returns the SQLAlchemy object elif(dbpedia) obj = Thinker(object_t) # returns the SQLAlchemy object ''' if predicate_t == 'ns1:influenced': c.entity.influenced.append(obj) elif predicate_t == 'ns1:influenced_by': c.entity.influenced_by.append(obj) elif predicate_t == 'ns1:student': c.entity.students.append(obj) elif predicate_t == 'ns1:teacher': c.entity.teachers.append(obj) ''' elif predicate == 'profession': elif predicate == 'birth_date': elif predicate == 'death_date': else predicate == 'nationality': ''' Session.commit() subject_to_display = subject_t.split("/")[len(subject_t.split("/")) - 1] predicate_to_display = predicate_t.split(":")[1] object_to_display = object_t return "OK : " + subject_to_display + " " + predicate_to_display + " " + object_to_display
def _inpho_token_generator(document): if PUNC_TABLE.get(ord('-')): del PUNC_TABLE[ord('-')] PUNC_TABLE[ord('\n')] = ord(' ') rest = document.lower() rest = rehyph(rest) rest = strip_punc_word(rest) query = Session.query(Searchpattern) MIN_LEN = 6 short_patterns = Session.query(Searchpattern.searchpattern) short_patterns = short_patterns.filter(func.length(Searchpattern.searchpattern) < MIN_LEN) short_patterns = short_patterns.distinct().all() short_patterns = set(w[0] for w in short_patterns) while rest: if u' ' not in rest: yield rest return first, rest = rest.split(u' ', 1) rest = rest.strip() # always yield the raw string yield first # check if we can simply skip the short patterns if len(first) < MIN_LEN and first not in short_patterns: continue # search the database for keywords patterns = query.filter(Searchpattern.searchpattern.like(first + u' %')).all() exact_match = query.filter(Searchpattern.searchpattern==first).first() if exact_match is not None: patterns.append(exact_match) for p in patterns: # check if multi-phrase starts match in the rest of the phrase. if u' ' in p.searchpattern: first_pattern_word, longpattern = p.searchpattern.split(u' ', 1) if first == first_pattern_word and (rest == longpattern or rest.startswith(longpattern + u' ')): yield u"inpho:{}".format(p.entity.ID) elif first == p.searchpattern: yield u"inpho:{}".format(p.entity.ID)
def queries(self, id): c.entity = h.fetch_obj(Journal, id, new_id=True) # add a new search pattern pattern = request.params.get('pattern', None) if pattern is None: abort(400) pattern = unicode(pattern) if pattern not in c.entity.queries: c.entity.queries.append(unicode(pattern)) Session.commit() return "OK"
def triple(self, id): c.entity = h.fetch_obj(Thinker, id) #parese the triple triple = request.params.get('triple').split() subject_t = triple[0] predicate_t = triple[1] objectURLComponents = triple[2].split('/')#parse triple for last check = "no teacher or student" #lastComponentIndex = objectURLComponents.__len__()-1 object_t = objectURLComponents[-1] #- subject is the same as the id #- predicate is from the list and will be used in a if/elif/elif/elif/elif ... to see what database to add it to if "dbpedia.org" in objectURLComponents: object_t_label = object_t.replace("_"," ") obj = Thinker(object_t_label) obj.wiki = object_t elif "inpho.cogs.indiana.edu" in objectURLComponents: obj = h.fetch_obj(Thinker, object_t) '''if(inpho): obj = h.fetch_obj(Thinker, object_t) # returns the SQLAlchemy object elif(dbpedia) obj = Thinker(object_t) # returns the SQLAlchemy object ''' if predicate_t == 'ns1:influenced': c.entity.influenced.append(obj) elif predicate_t == 'ns1:influenced_by': c.entity.influenced_by.append(obj) elif predicate_t =='ns1:student': c.entity.students.append(obj) elif predicate_t == 'ns1:teacher': c.entity.teachers.append(obj) ''' elif predicate == 'profession': elif predicate == 'birth_date': elif predicate == 'death_date': else predicate == 'nationality': ''' Session.commit() subject_to_display=subject_t.split("/")[len(subject_t.split("/"))-1] predicate_to_display=predicate_t.split(":")[1] object_to_display=object_t return "OK : "+subject_to_display+" "+predicate_to_display+" "+object_to_display
def process_article(article, terms=None, entity_type=Idea, output_filename=None, corpus_root='corpus/'): """ Processes a single article for apriori input. """ if terms is None: terms = select_terms(entity_type) lines = [] filename = article_path(article) article_terms = Session.query(entity_type) article_terms = article_terms.filter(entity_type.sep_dir==article) article_terms = article_terms.all() if filename and os.path.isfile(filename): logging.info("processing: %s %s" % (article, filename)) doc = extract_article_body(filename) lines = dm.occurrences(doc, terms, title=article, remove_overlap=False, format_for_file=True, output_filename=output_filename) else: logging.warning("BAD SEP_DIR: %s" % article) return lines
def new_entries(): """ Returns a list of all entries which do not have a corresponding InPhO Entity. """ # get list of all entries in database sep_dirs = Session.query(Entity.sep_dir).filter(Entity.sep_dir!='').all() sep_dirs = [row[0] for row in sep_dirs] # get list of all entries in the SEP database entries = os.path.join(config.get('corpus', 'db_path'), 'entries.txt') # build list of new entries new_sep_dirs = [] with open(entries) as f: for line in f: sep_dir = line.split('::', 1)[0] try: if sep_dir not in sep_dirs and copy_edit(sep_dir): # published entry not in database, add to list of entries new_sep_dirs.append(sep_dir) except IOError: # skip IOErrors, as these indicate potential entries w/o logs continue # remove the sample entry try: new_sep_dirs.remove('sample') except ValueError: pass return new_sep_dirs
def missing_entity_search(self, query): query = quote_plus(query) url = "http://plato.stanford.edu/cgi-bin/search/xmlSearcher.py?query=" + query results = multi_get([url])[0][1] json = None values_dict = [] if results: tree = ET.ElementTree(ET.fromstring(results)) root = tree.getroot() json = [] for element in root.getiterator("{http://a9.com/-/spec/opensearch/1.1/}Item"): dict = {} for iter in element.getiterator("{http://a9.com/-/spec/opensearch/1.1/}Location"): dict["Location"] = iter.text json.append(dict) for j in range(len(json)): for key, value in json[j].iteritems(): values_dict.append(value) entities = Session.query(Entity).filter(Entity.sep_dir.in_(values_dict)).all() entities.sort(key=lambda entity: values_dict.index(entity.sep_dir)) # raise Exception return entities
def select_terms(entity_type=Idea): # process entities ideas = Session.query(entity_type) ideas = ideas.options(subqueryload('_spatterns')) # do not process Nodes or Journals ideas = ideas.filter(and_(Entity.typeID!=2, Entity.typeID!=4)) return ideas.all()
def data_integrity(self, filetype="html", redirect=False): if not h.auth.is_logged_in(): abort(401) if not h.auth.is_admin(): abort(403) idea_q = Session.query(Idea) c.ideas = list(idea_q) # Missing searchstring c.missing_string = [ idea for idea in c.ideas if not getattr(idea, 'searchstring') ] # Missing searchpattern c.missing_pattern = [ idea for idea in c.ideas if not getattr(idea, 'searchpattern') ] # Missing sep_dir c.missing_sep_dir = [ idea for idea in c.ideas if not getattr(idea, 'sep_dir') ] # Duplicates c.duplicate = [] c.sorted_ideas = sorted(c.ideas, key=lambda idea: idea.label) for i in range(len(c.sorted_ideas) - 1): if c.sorted_ideas[i].label == c.sorted_ideas[i + 1].label: c.duplicate.append(c.sorted_ideas[i]) c.duplicate.append(c.sorted_ideas[i + 1]) return render('idea/data_integrity.%s' % filetype)
def _delete_evaluation(self, evaltype, id, id2, uid=None, username=None): id2 = request.params.get('id2', id2) uid = request.params.get('uid', uid) username = request.params.get('username', username) # look for a specific user's feedback evaluation = self._get_evaluation(evaltype, id, id2, uid, username, autoCreate=False) # if that feedback does not exist, unleash the nuclear option and delete # ALL evaluation facts for this relation, wiping it from the database. if h.auth.is_admin() and not evaluation: eval_q = Session.query(evaltype) eval_q = eval_q.filter_by(ante_id=id, cons_id=id2) evals = eval_q.all() # wipe them out. all of them. for evaluation in evals: h.delete_obj(evaluation) # return ok, with how many were deleted response.status_int = 200 return "OK %d" % len(evals) elif not evaluation: abort(404) # simply return an error (not evaluated), if not admin current_uid = h.get_user(request.environ['REMOTE_USER']).ID if evaluation.uid != current_uid and not h.auth.is_admin(): abort(401) h.delete_obj(evaluation) response.status_int = 200 return "OK"
def missing_entity_search(self, query): query = quote_plus(query) url = 'http://plato.stanford.edu/cgi-bin/search/xmlSearcher.py?query=' + \ query results = multi_get([url])[0][1] json = None values_dict = [] if results: tree = ET.ElementTree(ET.fromstring(results)) root = tree.getroot() json = [] for element in root.getiterator('{http://a9.com/-/spec/opensearch/1.1/}Item'): dict = {} for iter in element.getiterator('{http://a9.com/-/spec/opensearch/1.1/}Location'): dict['Location'] = iter.text json.append(dict) for j in range(len(json)): for key,value in json[j].iteritems(): values_dict.append(value) entities = Session.query(Entity).filter(Entity.sep_dir.in_(values_dict)).all() entities.sort(key = lambda entity: values_dict.index(entity.sep_dir)) #raise Exception return entities
def _delete_queries(self, id): c.entity = h.fetch_obj(Journal, id, new_id=True) # add a new search pattern pattern = request.params.get('pattern', None) if pattern is None: abort(400) # rudimentary input sanitization pattern = pattern.strip() if pattern in c.entity.queries: c.entity.queries.remove(pattern) Session.commit() return "OK"
def _list_property(self, property, id, filetype='html', limit=False, sep_filter=False, type='idea'): c.idea = h.fetch_obj(Idea, id) limit = int(request.params.get('limit', limit)) start = int(request.params.get('start', 0)) sep_filter = request.params.get('sep_filter', sep_filter) property = getattr(c.idea, property) if sep_filter: property = property.filter(Entity.sep_dir != '') # TODO: Fix hacky workaround for the AppenderQuery vs. Relationship # property issue - upgrading SQLAlchemy may fix this by allowing us to # use len() in a smart way. try: c.total = property.count() except TypeError: c.total = len(property) if limit: property = property[start:start + limit] c.entities = property c.nodes = Session.query(Node).filter( Node.parent_id == None).order_by("name").all() return render('%s/%s-list.%s' % (type, type, filetype))
def abbrs(self, id): c.entity = h.fetch_obj(Journal, id, new_id=True) # add a new search pattern pattern = request.params.get('pattern', None) if pattern is None: abort(400) # rudimentary input sanitization pattern = pattern.strip() if pattern not in c.entity.abbrs: c.entity.abbrs.append(unicode(pattern)) Session.commit() return "OK"
def select_terms(entity_type=Idea): # process entities ideas = Session.query(entity_type) ideas = ideas.options(subqueryload('_spatterns')) # do not process Nodes or Journals ideas = ideas.filter(and_(Entity.typeID != 2, Entity.typeID != 4)) return ideas.all()
def new_entries(): """ Returns a list of all entries which do not have a corresponding InPhO Entity. """ # get list of all entries in database sep_dirs = Session.query(Entity.sep_dir).filter(Entity.sep_dir != '').all() sep_dirs = [row[0] for row in sep_dirs] # get list of all entries in the SEP database entries = os.path.join(config.get('corpus', 'db_path'), 'entries.txt') # build list of new entries new_sep_dirs = [] with open(entries) as f: for line in f: sep_dir = line.split('::', 1)[0] try: if sep_dir not in sep_dirs and copy_edit(sep_dir): # published entry not in database, add to list of entries new_sep_dirs.append(sep_dir) except IOError: # skip IOErrors, as these indicate potential entries w/o logs continue # remove the sample entry try: new_sep_dirs.remove('sample') except ValueError: pass return new_sep_dirs
def process_article(article, terms=None, entity_type=Idea, output_filename=None, corpus_root='corpus/'): """ Processes a single article for apriori input. """ if terms is None: terms = select_terms(entity_type) lines = [] filename = article_path(article) article_terms = Session.query(entity_type) article_terms = article_terms.filter(entity_type.sep_dir == article) article_terms = article_terms.all() if filename and os.path.isfile(filename): logging.info("processing: %s %s" % (article, filename)) doc = extract_article_body(filename) lines = dm.occurrences(doc, terms, title=article, remove_overlap=False, format_for_file=True, output_filename=output_filename) else: logging.warning("BAD SEP_DIR: %s" % article) return lines
def update(self, id=None): terms = [ 'label', 'sep_dir', 'last_accessed', 'language', 'openAccess', 'active', 'student', 'ISSN' ] URL = request.params.get('URL', None) if URL is not None: journal = h.fetch_obj(Journal, id) if URL == 'none' or URL == 'None': journal.URL = None else: journal.URL = unquote(URL) journal.check_url() Session.commit() super(JournalController, self).update(id, terms)
def get_subgraph(ids, thresh=None): edge_q = Session.query(IdeaGraphEdge) edge_q = edge_q.order_by(IdeaGraphEdge.jweight.desc()) edge_q = edge_q.filter(IdeaGraphEdge.cons_id.in_(ids)) edge_q = edge_q.filter(IdeaGraphEdge.ante_id.in_(ids)) if thresh: edge_q = edge_q.filter(IdeaGraphEdge.jweight > thresh) return edge_q.all()