def get(self, category): logging.info("Now in PaperList get.") if category == 'resources': q = Papers.query(Papers.Category == 'Learning Resources').order(Papers.Rank, -Papers.CreatedDate) CatName = 'Learning Resources' elif category == 'platform': q = Papers.query(Papers.Category == 'Learning Platform').order(Papers.Rank, -Papers.CreatedDate) CatName = 'Learning Platform' elif category == 'learners': q = Papers.query(Papers.Category == 'Winning Students').order(Papers.Rank, -Papers.CreatedDate) CatName = 'Learners and Programs' elif category == 'misc': q = Papers.query(Papers.Category != 'Feedback', Papers.Category != 'Learning Resources', Papers.Category != 'Learning Platform', Papers.Category != 'Winning Students').order(Papers.Category, Papers.Rank, -Papers.CreatedDate) CatName = 'Miscellaneous' else: q = Papers.query().order(Papers.Category, -Papers.CreatedDate) CatName = 'All' papers = q.fetch(99) if papers: Havepapers = True else: Havepapers = False logout = None login = None currentuser = users.get_current_user() if currentuser: logout = users.create_logout_url('/pagecontents' ) else: login = users.create_login_url('/pagecontents/create') self.render_template('PaperList.html', {'papers': papers, 'Havepapers': Havepapers, 'cat': category, 'CatName': CatName, 'currentuser':currentuser, 'login':login, 'logout': logout})
def post(self): CreatedBy = users.get_current_user() cat=self.request.get('cat') n = Papers(Title=self.request.get('Title'), Category=self.request.get('Category'), Text=self.request.get('Text'), Type=self.request.get('Type'), Status=self.request.get('Status'), CreatedBy=CreatedBy, StatusBy=CreatedBy) n.put() return self.redirect('/papers/' + cat)
def extract_citations(): """ Extract citations from the papers, based on the title of other papers """ titles = dict((p.title.strip(), p.id) for p in Papers.select(Papers) if len(p.title.strip().split(' ')) > 1) for paper in Papers.select(): citations = [ titles[t] for t in list(titles.keys()) if t in paper.paper_text and titles[t] != paper.id ] for citation in citations: create_citation(paper.id, citation) print("Paper {paper_id}".format(paper_id=paper.id)) print(citations)
def get(self): logging.info("Now in FeedbackList get.") CatName = 'All' q = Papers.query(Papers.Category == 'Feedback').order(-Papers.CreatedDate) papers = q.fetch(999) if papers: Havepapers = True else: Havepapers = False logout = None login = None currentuser = users.get_current_user() if currentuser: logout = users.create_logout_url('/feedback' ) else: login = users.create_login_url('/feedback') template_values = { 'papers': papers, 'Havepapers': Havepapers, 'currentuser':currentuser, 'cat': 'Feedback', 'login':login, 'logout': logout } jinja_environment = \ jinja2.Environment(autoescape=False, loader=jinja2.FileSystemLoader(TEMPLATE_DIR)) jinja_environment.filters['AccessOK'] = AccessOK template = jinja_environment.get_template('FeedbackList.html') self.response.out.write(template.render(template_values))
def get(self): q = Papers.query(Papers.Category != 'Feedback').order(Papers.Category, -Papers.CreatedDate) papers = q.fetch(10) if papers: Havepapers = True else: Havepapers = False logout = None login = None currentuser = users.get_current_user() if currentuser: logout = users.create_logout_url('/' ) else: login = users.create_login_url('/') template_values = {'content1': 'No content yet.', 'papers':papers, 'Havepapers':Havepapers, 'currentuser':currentuser, 'login':login, 'logout': logout} template = jinja_environment.get_template('TransIntro.html') jinja_environment.filters['AccessOK'] = AccessOK jinja_environment.filters['AccessOKNew'] = AccessOKNew self.response.out.write(template.render(template_values))
def get_documents() -> list: """ Given the documents in the option parser return the body of these documents Returns: list of corpus of bodies """ paper = Papers.select().where(Papers.id == options.document).get() return paper.paper_text
def post(self): CreatedBy = users.get_current_user() n = Papers( Title=self.request.get('Title'), Category='Feedback', Text=self.request.get('Text'), Type=self.request.get('Type'), Status='Published', CreatedBy=CreatedBy, StatusBy=CreatedBy) logging.info('QQQ: FeedbackPost_Title: %s' % n.Title) logging.info('QQQ: FeedbackPost_Category: %s' % n.Category) logging.info('QQQ: FeedbackPost_Text: %s' % n.Text) logging.info('QQQ: FeedbackPost_Type: %s' % n.Type) logging.info('QQQ: FeedbackPost_Status: %s' % n.Status) n.put() return self.redirect('/feedback')
def main(): papers = Papers.select().limit(LIMIT) n_citations = [] for index, p in enumerate(papers[BEGIN:]): print(p.id - 1) n_citations.append(scrape_citation_count(p)) if n_citations[index] == -1: print("Finished after {} iterations".format(index)) break time.sleep(2) citations_to_csv(n_citations) print(n_citations)
def main(): if not os.path.isfile('ldamodel.pkl'): papers = [p.paper_text for p in Papers.select().order_by(fn.Random()).limit(200)] ldamodel, dictionary = train_classifier(papers, 20) pickle.dump(ldamodel, open('ldamodel.pkl', 'wb')) pickle.dump(dictionary, open('dictionary.pkl', 'wb')) else: ldamodel = pickle.load(open('ldamodel.pkl', 'rb')) dictionary = pickle.load(open('dictionary.pkl', 'rb')) if not os.path.isfile('labels.txt'): topic_labels = extract_topics(ldamodel) with open('labels.txt', 'w') as f: f.write("\n".join(topic_labels)) else: with open('labels.txt', 'r') as f: topic_labels = f.read().splitlines() create_database_labels(topic_labels) print(topic_labels) label_documents(ldamodel, topic_labels, dictionary)
def label_documents(model: LdaModel, topic_labels: list, dictionary: corpora.Dictionary): """ Labels the documents in the database including the documents not present in the training set Args: model: the trained ldaModel topic_labels: list with labels of the topics dictionary: dictionary that is used when scanning the training data """ papers = Papers.select() for paper in papers: text, title, paper_id = paper.paper_text, paper.title, paper.id cleaned_text = clean(text) text_dict = dictionary.doc2bow(cleaned_text) topic_scores = model[text_dict] labels = [] for topic_score in topic_scores: if topic_score[1] > 1/len(topic_scores): Papers_labels.get_or_create(paper_id=paper_id, label_id=topic_score[0]) labels.append(topic_labels[topic_score[0]]) print(title, labels)
def main(): papers = Papers.select().limit(100) labels, data = [p.title for p in papers], [p.paper_text for p in papers] data_feat = extract_features(data) distance_matrix = create_distance_matrix(data_feat) cluster(distance_matrix, labels)
def main(): for paper in Papers.select().limit(10): citations = re.split('[Rr][Ee][Ff][Ee][Rr][Ee][Nn][Cc][Ee]([Ss])?', paper.paper_text)[-1] print(citations.split('\n'))