def my_projects(self): person = request.environ.get('repoze.who.identity')['user'] c.person = person # Get user object from db. user = controller_globals._get_user_from_email(person.email) # Set user's show preference defaults in case they weren't set. if (user.show_journal == True or user.show_journal == False): c.show_journal = user.show_journal else: user.show_journal = True if (user.show_authors == True or user.show_authors == False): c.show_authors = user.show_authors else: user.show_authors = True if (user.show_keywords == True or user.show_keywords == False): c.show_keywords = user.show_keywords else: user.show_keywords = True project_q = Session.query(model.Project) c.leading_projects = user.leader_of_projects leading_project_ids = [proj.id for proj in c.leading_projects] c.participating_projects = [ p for p in user.member_of_projects if p.id not in leading_project_ids ] c.review_ids_to_names_d = self._get_review_ids_to_names_d( c.participating_projects) statuses_q = Session.query(model.PredictionsStatus) c.statuses = {} c.do_we_have_a_maybe = {} for project_id in leading_project_ids: predictions_for_review = statuses_q.filter( model.PredictionsStatus.project_id == project_id).all() if len(predictions_for_review ) > 0 and predictions_for_review[0].predictions_exist: c.statuses[project_id] = True else: c.statuses[project_id] = False c.do_we_have_a_maybe[project_id] = False # Flag projects that have locked priorities c.projects_w_locked_priorities = self._get_projects_w_locked_priorities( leading_project_ids) c.my_work = False c.my_projects = True return render('/accounts/dashboard.mako')
def parse_csv_header(header, project): users = [] assignments = [] for uname in header[1:]: user = Session.query(model.User).filter_by(username=uname).first() users.append(user) assignment = Session.query(model.Assignment).filter_by( project_id=project.id).filter_by(user_id=user.id).first() assignments.append(assignment) return users, assignments
def my_projects(self): person = request.environ.get('repoze.who.identity')['user'] c.person = person # Get user object from db. user = controller_globals._get_user_from_email(person.email) # Set user's show preference defaults in case they weren't set. if (user.show_journal==True or user.show_journal==False): c.show_journal = user.show_journal else: user.show_journal = True if (user.show_authors==True or user.show_authors==False): c.show_authors = user.show_authors else: user.show_authors = True if (user.show_keywords==True or user.show_keywords==False): c.show_keywords = user.show_keywords else: user.show_keywords = True project_q = Session.query(model.Project) c.leading_projects = user.leader_of_projects leading_project_ids = [proj.id for proj in c.leading_projects] c.participating_projects = [p for p in user.member_of_projects if p.id not in leading_project_ids] c.review_ids_to_names_d = self._get_review_ids_to_names_d(c.participating_projects) statuses_q = Session.query(model.PredictionsStatus) c.statuses = {} c.do_we_have_a_maybe = {} for project_id in leading_project_ids: predictions_for_review = statuses_q.filter(model.PredictionsStatus.project_id==project_id).all() if len(predictions_for_review) > 0 and predictions_for_review[0].predictions_exist: c.statuses[project_id] = True else: c.statuses[project_id] = False c.do_we_have_a_maybe[project_id] = False # Flag projects that have locked priorities c.projects_w_locked_priorities = self._get_projects_w_locked_priorities(leading_project_ids) c.my_work = False c.my_projects = True return render('/accounts/dashboard.mako')
def _build_tags_dict(self): tags = Session.query(model.Tag, model.TagType).filter(model.Tag.citation_id.in_(self.all_citations)).join(model.TagType, model.Tag.tag_id == model.TagType.id).all() for citation_id in self.all_citations: if citation_id not in self.citation_to_tags_dict: self.citation_to_tags_dict[citation_id] = [] for tag in tags: self.citation_to_tags_dict[tag[0].citation_id].append(tag[1].text)
def _get_username_from_id(self, id): if id == CONSENSUS_USER: return "consensus" if id not in self.user_dict: user_q = Session.query(model.User) self.user_dict[id] = user_q.filter(model.User.id == id).one().username return self.user_dict[id]
def _get_users_labels_for_assignment(self, project_id, user_id, assignment_id): """Returns a user's list of labels across a specific project""" labels_q = Session.query(model.Label) labels = labels_q.filter_by(project_id=project_id, user_id=user_id, assignment_id=assignment_id).all() return labels
def _get_username_from_id(self, id): if id == CONSENSUS_USER: return "consensus" if id not in self.user_dict: user_q = Session.query(model.User) self.user_dict[id] = user_q.filter( model.User.id == id).one().username return self.user_dict[id]
def _get_notes_for_citation(self, citation_id, user_id): notes_q = Session.query(model.Note) notes = notes_q.filter(and_(\ model.Note.citation_id == citation_id, model.Note.creator_id == user_id)).all() if len(notes) > 0: return notes[0] return None
def _row_unique(row, project_id, user_id): try: labeledfeatures = Session.query(model.LabeledFeature).\ filter_by(term = row[0]).\ filter_by(label = row[1]).\ filter_by(project_id = project_id).\ filter_by(user_id = user_id).all() except IndexError, e: return False, 2
def _build_tags_dict(self): tags = Session.query(model.Tag, model.TagType).filter( model.Tag.citation_id.in_(self.all_citations)).join( model.TagType, model.Tag.tag_id == model.TagType.id).all() for citation_id in self.all_citations: if citation_id not in self.citation_to_tags_dict: self.citation_to_tags_dict[citation_id] = [] for tag in tags: self.citation_to_tags_dict[tag[0].citation_id].append(tag[1].text)
def _build_notes_dict(self): notes = Session.query(model.Note).filter(model.Note.citation_id.in_(self.all_citations)).all() for citation_id in self.all_citations: if citation_id not in self.citation_to_notes_dict: self.citation_to_notes_dict[citation_id] = {} for labeler in self.all_labelers: self.citation_to_notes_dict[citation_id][labeler.username] = None for note in notes: self.citation_to_notes_dict[note.citation_id][self._get_username_from_id(note.creator_id)] = note
def _get_all_priorities_locked_by_this_user(self, all_assignments): locked_priorities = [] for a in all_assignments: user_id = a.user_id project_id = a.project_id priorities_q = Session.query(model.Priority).filter_by(project_id=project_id).\ filter_by(locked_by=user_id) priorities = priorities_q.all() locked_priorities.extend([p for p in priorities]) return list(set(locked_priorities))
def import_csv_with_labels(project_id, csv_file_location): project = Session.query(model.Project).filter_by(id=project_id).first() with open(csv_file_location) as csv_file: csv_reader = csv.reader(csv_file, delimiter=',', quotechar='"') users, assignments = parse_csv_header(next(csv_reader), project) for row in csv_reader: parse_csv_row(row, users, assignments, project) Session.commit()
def _get_tag_types_for_review(self, review_id, only_for_user_id=None): tag_q = Session.query(model.TagType) if only_for_user_id: tag_types = tag_q.filter(and_(\ model.TagType.project_id == review_id,\ model.TagType.creator_id == only_for_user_id )).all() else: tag_types = tag_q.filter(model.TagType.project_id == review_id).all() return [tag_type.text for tag_type in tag_types]
def _build_notes_dict(self): notes = Session.query(model.Note).filter( model.Note.citation_id.in_(self.all_citations)).all() for citation_id in self.all_citations: if citation_id not in self.citation_to_notes_dict: self.citation_to_notes_dict[citation_id] = {} for labeler in self.all_labelers: self.citation_to_notes_dict[citation_id][ labeler.username] = None for note in notes: self.citation_to_notes_dict[note.citation_id][ self._get_username_from_id(note.creator_id)] = note
def _project_has_locked_priorities(self, project_id): """Returns True if project has any locked priorities, else False Integer -> Boolean """ priorities_q = Session.query(model.Priority).\ filter_by(project_id=project_id).\ filter_by(is_out=1) priority = priorities_q.first() if priority: return True else: return False
def _get_tag_types_for_citation(self, citation_id, objects=False): tags = self._get_tags_for_citation(citation_id) # now map those types to names tag_type_q = Session.query(model.TagType) tags = [] for tag in tags: tag_obj = tag_type_q.filter(model.TagType.id == tag.tag_id).one() if objects: tags.append(tag_obj) else: tags.append(tag_obj.text) return tags
def _get_tags_for_citation(self, citation_id, texts_only=True, only_for_user_id=None): tag_q = Session.query(model.Tag) tags = None if only_for_user_id: # then filter on the study and the user tags = tag_q.filter(and_(\ model.Tag.citation_id == citation_id,\ model.Tag.creator_id == only_for_user_id)).all() else: # all tags for this citation, regardless of user tags = tag_q.filter(model.Tag.citation_id == citation_id).all() if texts_only: return self._tag_ids_to_texts([tag.tag_id for tag in tags]) return tags
def gen_token_to_reset_pwd(self, user): # generate a random token for the user to reset their password; stick # it in the database make_token = lambda N: ''.join(random.choice(string.ascii_uppercase + string.digits) for x in range(N)) reset_pwd_q = Session.query(model.ResetPassword) existing_tokens = [entry.token for entry in reset_pwd_q.all()] token_length=10 cur_token = make_token(token_length) while cur_token in existing_tokens: cur_token = make_code(token_length) reset = model.ResetPassword() reset.token = cur_token reset.user_email = user.email Session.add(reset) Session.commit() return cur_token
def confirm_password_reset(self, id): token = str(id) reset_pwd_q = Session.query(model.ResetPassword) # we pull all in case they've tried to reset their pwd a few times # by the way, these should time-expire... matches = reset_pwd_q.filter(model.ResetPassword.token == token).all() if len(matches) == 0: return """ Hrmm... It looks like you're trying to reset your password, but I can't match the provided token. Please go back to the email that was sent to you and make sure you've copied the URL correctly. """ user = controller_globals._get_user_from_email(matches[0].user_email) for match in matches: Session.delete(match) user._set_password(token) Session.commit() return ''' ok! your password has been set to %s (you can change it once you've logged in).\n <a href="%s">log in here</a>.''' % (token, url('/', qualified=True))
def parse_csv_row(row, users, assignments, project): source_id = row[0] citation = Session.query(model.Citation).filter_by( project_id=project.id).filter_by(pmid=source_id).first() for index, label_val in enumerate(row[1:]): if label_val: user = users[index] assignment = assignments[index] label = model.Label() label.project_id = project.id label.study_id = citation.id label.assignment_id = assignment.id label.user_id = user.id label.labeling_time = 1 label.first_labeled = datetime.datetime.utcnow() label.label_last_updated = datetime.datetime.utcnow() label.label = label_val model.Session.add(label) print "Created label " + label_val + " by user " + user.username + " for " + str( citation.id)
def my_work(self): person = request.environ.get('repoze.who.identity')['user'] c.person = person user = controller_globals._get_user_from_email(c.person.email) if not user: log.error('''\ Hum...fetching user from the database returned False. We need to investigate. Go remove the catch all in controller_globals.py, method _get_user_from_email() to see which OperationalError is being raised ''') # If somehow the user's citation settings variables don't get initialized yet, # then the following 3 if-else blocks should take care of it in order to avoid # any errors due to the values of the variables being null: c.show_journal = user.show_journal if not user.show_journal is None else True if (user.show_authors == True or user.show_authors == False): c.show_authors = user.show_authors else: user.show_authors = True if (user.show_keywords == True or user.show_keywords == False): c.show_keywords = user.show_keywords else: user.show_keywords = True # pull all assignments for this person assignment_q = Session.query(model.Assignment) all_assignments = assignment_q.filter( model.Assignment.user_id == person.id).all() # This process is incredibly slow. Take it out for now and find out # why the .done and .done_so_far field on assignment is off sometimes. #self._set_assignment_done_status(all_assignments) self._clear_this_user_locks(all_assignments) # Build assignment completion status dictionary c.d_completion_status = self._get_assignment_completion_status( all_assignments) c.outstanding_assignments = [a for a in all_assignments if not a.done] # if there's an initial assignment, we'll only show that. assignment_types = [assignment.assignment_type for assignment in \ c.outstanding_assignments] ##### # for any review that has an initial assignment, we will show # *only* that assignment, thereby forcining participants to # finish initial assignments before moving on to other # assignments. fix for issue #5. #### # which reviews have (outstanding) initial assigments? reviews_with_initial_assignments = [] for assignment in c.outstanding_assignments: if assignment.assignment_type == "initial": reviews_with_initial_assignments.append(assignment.project_id) # now remove other (non-initial) assignments for reviews # that have an initial assignment filtered_assignments = [assignment for assignment in c.outstanding_assignments if \ assignment.project_id not in reviews_with_initial_assignments or \ assignment.assignment_type == "initial"] c.outstanding_assignments = filtered_assignments c.finished_assignments = [a for a in all_assignments if a.done] project_q = Session.query(model.Project) c.participating_projects = user.member_of_projects c.review_ids_to_names_d = self._get_review_ids_to_names_d( c.participating_projects) c.my_work = True c.my_projects = False return render('/accounts/dashboard.mako')
def _get_username_from_id(self, id): if id == CONSENSUS_USER: return "consensus" user_q = Session.query(model.User) return user_q.filter(model.User.id == id).one().username
def _get_citations_for_review(self, review_id): citation_q = Session.query(model.Citation) citations_for_review = citation_q.filter(model.Citation.project_id == review_id).all() return citations_for_review
def _get_participants_for_review(self, project_id): project = Session.query(model.Project).filter(model.Project.id == project_id).one() members = project.members return members
def write_labels(self): # get fields review_q = Session.query(model.Project) review = review_q.filter(model.Project.id == self.project_id).one() all_labelers = self._get_participants_for_review(self.project_id) ## some helpers none_to_str = lambda x: "" if x is None else x zero_to_none = lambda x: "none" if x==0 else x fields_to_export = self.fields # map citation ids to dictionaries that, in turn, map # usernames to labels citation_to_lbls_dict = {} all_citations = [cit.id for cit in self._get_citations_for_review(self.project_id)] citations_labeled_dict = {} for cit in all_citations: citations_labeled_dict[cit]=False # likewise, for notes citation_to_notes_dict = {} # we filter the citations list (potentially) citations_to_export = [] # for efficiency reasons, we keep track of whether we need # create a new empty dictionary for the current citation last_citation_id = None labeler_names = ["consensus"] # always export the consensus # first collect labels for all citations that pass our # filtering criteria for citation, label in Session.query(\ model.Citation, model.Label).filter(model.Citation.id==model.Label.study_id).\ filter(model.Label.project_id==self.project_id).order_by(model.Citation.id).all(): # the above gives you all labeled citations for this review # i.e., citations that have at least one label citations_labeled_dict[citation.id]=True if self.lbl_filter_f(label): cur_citation_id = citation.id if last_citation_id != cur_citation_id: citation_to_lbls_dict[citation.id] = {} citation_to_notes_dict[cur_citation_id] = {} citations_to_export.append(citation) # NOTE that we are assuming unique user names per-review labeler = self._get_username_from_id(label.user_id) if not labeler in labeler_names: labeler_names.append(labeler) citation_to_lbls_dict[cur_citation_id][labeler] = label.label last_citation_id = cur_citation_id # note that this will only contain entries for reviews that have # been labeled! i.e., notes made on unlabeled citations are not # reflected here. citation_to_notes_dict[cur_citation_id][labeler] = \ self._get_notes_for_citation(cur_citation_id, label.user_id) # we automatically export all labeler's labels for labeler in labeler_names: fields_to_export.append(labeler) # finally, export notes (if asked) notes_fields = ["general", "population", "intervention/comparator", "outcome"] if "notes" in fields_to_export: fields_to_export.remove("notes") # we append all labelers notes for labeler in labeler_names: if labeler != "consensus": for notes_field in notes_fields: fields_to_export.append("%s notes (%s)" % (notes_field, labeler)) self.write_buffer = [",".join(fields_to_export)] for citation in citations_to_export: cur_line = [] for field in fields_to_export: if field == "(internal) id": cur_line.append("%s" % citation.id) elif field == "(source) id": cur_line.append("%s" % citation.refman) elif field == "pubmed id": cur_line.append("%s" % zero_to_none(citation.pmid)) elif field == "abstract": cur_line.append('"%s"' % none_to_str(citation.abstract).replace('"', "'")) elif field == "title": cur_line.append('"%s"' % citation.title.replace('"', "'")) elif field == "keywords": cur_line.append('"%s"' % citation.keywords.replace('"', "'")) elif field == "journal": cur_line.append('"%s"' % none_to_str(citation.journal)) elif field == "authors": cur_line.append('"%s"' % "".join(citation.authors)) elif field == "tags": cur_tags = self._get_tags_for_citation(citation.id) cur_line.append('"%s"' % ",".join(cur_tags)) elif field in labeler_names: cur_labeler = field cur_lbl = "o" cit_lbl_d = citation_to_lbls_dict[citation.id] if cur_labeler in cit_lbl_d: cur_lbl = str(cit_lbl_d[cur_labeler]) # create a consensus label automagically in cases where # there is unanimous agreement elif cur_labeler == "consensus": if len(set(cit_lbl_d.values()))==1: if len(cit_lbl_d) > 1: # if at least two people agree (and none disagree), set the # consensus label to reflect this cur_lbl = str(cit_lbl_d.values()[0]) else: # then only one person has labeled it -- # consensus is kind of silly cur_lbl = "o" else: # no consensus! cur_lbl = "x" cur_line.append(cur_lbl) elif "notes" in field: # notes field # this is kind of hacky -- we first parse out the labeler # name from the column header string assembled above and # then get a user id from this. get_labeler_name_from_str = lambda x: x.split("(")[1].split(")")[0] cur_labeler = get_labeler_name_from_str(field) # @TODO not sure what we should do in consensus case... if cur_labeler == "consensus": cur_line.append("") else: cur_note = None cur_notes_d = citation_to_notes_dict[citation.id] if cur_labeler in cur_notes_d: cur_note = cur_notes_d[cur_labeler] if cur_note is None: cur_line.append("") else: notes_field = field if "general" in notes_field: cur_line.append("\"%s\"" % cur_note.general.replace('"', "'")) elif "population" in notes_field: cur_line.append("\"%s\"" % cur_note.population.replace('"', "'")) elif "outcome" in notes_field: cur_line.append("\"%s\"" % cur_note.outcome.replace('"', "'")) else: # intervention/comparator cur_line.append("\"%s\"" % cur_note.ic.replace('"', "'")) self.write_buffer.append(",".join(cur_line)) # exporting *all* (including unlabeled!) citations, per Ethan's request #-- may want to make this optional self.write_buffer.append("citations that are not yet labeled by anyone") # jj 2014-08-20: Request to include citation information even for those citations that have not # been labeled yet. unlabeled_citation_ids = [cit for cit in citations_labeled_dict if not citations_labeled_dict[cit]] unlabeled_citations = Session.query(model.Citation).filter(model.Citation.id.in_(unlabeled_citation_ids)).all() for citation in unlabeled_citations: cur_line = [] for field in fields_to_export: if field == "(internal) id": cur_line.append("%s" % citation.id) elif field == "(source) id": cur_line.append("%s" % citation.refman) elif field == "pubmed id": cur_line.append("%s" % zero_to_none(citation.pmid)) elif field == "abstract": cur_line.append('"%s"' % none_to_str(citation.abstract).replace('"', "'")) elif field == "title": cur_line.append('"%s"' % citation.title.replace('"', "'")) elif field == "keywords": cur_line.append('"%s"' % citation.keywords.replace('"', "'")) elif field == "journal": cur_line.append('"%s"' % none_to_str(citation.journal)) elif field == "authors": cur_line.append('"%s"' % "".join(citation.authors)) self.write_buffer.append(",".join(cur_line)) path_to_export = "~/labels_%s.csv" % self.project_id try: fout = open(path_to_export, 'w') except IOError: os.makedirs(os.path.dirname(path_to_export)) fout = open(path_to_export, 'w') lbls_str = "\n".join(self.write_buffer) lbls_str = lbls_str.encode("utf-8", "ignore") fout.write(lbls_str) fout.close() print "finished" + str(self.project_id) return
def __init__(self, id, lbl_filter_f = None): """ id => id of the abstrackr project filter => filter function for labels """ self.project_id = id # default to exporting everything if lbl_filter_f is None: lbl_filter_f = lambda label: True # get fields conf = appconfig('config:development.ini', relative_to='.') #load_environment(conf.global_conf, conf.local_conf) review_q = Session.query(model.Project) review = review_q.filter(model.Project.id == id).one() """ create XML root and add meta information -Birol """ self.root = ET.Element('project') et_project_name = ET.SubElement(self.root, 'name') et_project_name.text = review.name et_project_id = ET.SubElement(self.root, 'id') et_project_id.text = str(review.id) labeler_dict = {} member_list = review.members leader_list = review.leaders et_project_member_list = ET.SubElement(self.root, 'member_list') for user in member_list + leader_list: et_project_member = ET.SubElement(et_project_member_list, 'member') et_project_member_id = ET.SubElement(et_project_member, 'id') et_project_member_id.text = str(user.id) et_project_member_username = ET.SubElement(et_project_member, 'username') et_project_member_username.text = user.username et_project_member_email = ET.SubElement(et_project_member, 'email') et_project_member_email.text = user.email """ create an ET subelement to hold the citations -Birol """ et_citation_list = ET.SubElement(self.root, "citation_list") citations_to_export = Session.query(model.Citation).filter_by(project_id = id).all() for citation in citations_to_export: """ create ET subelement for each citation, then add the relevant fields -Birol """ ## some helpers none_to_str = lambda x: "" if x is None else x zero_to_none = lambda x: "none" if x==0 else str(x) et_citation = ET.SubElement(et_citation_list, "citation") et_citation_internal = ET.SubElement(et_citation, "internal_id") et_citation_internal.text = zero_to_none(citation.id) et_citation_source = ET.SubElement(et_citation, "source_id") et_citation_source.text = zero_to_none(citation.refman) et_citation_pubmed = ET.SubElement(et_citation, "pubmed_id") et_citation_pubmed.text = zero_to_none(citation.pmid) """ We replace double quotes with single quotes here, so we do not have to do it later. -Birol """ et_citation_abstract = ET.SubElement(et_citation, "abstract") et_citation_abstract.text = none_to_str(citation.abstract).replace('"', "'") et_citation_title = ET.SubElement(et_citation, "title") et_citation_title.text = citation.title.replace('"', "'") """ Not sure if all the keywords are separated by commas. If not, this would cause problems. -Birol """ kw_list = citation.keywords.replace('"', "'").split(',') et_citation_keyword_list = ET.SubElement(et_citation, "keyword_list") for kw in kw_list: if kw == "": continue et_citation_keyword = ET.SubElement(et_citation_keyword_list, "keyword") et_citation_keyword.text = kw et_citation_journal = ET.SubElement(et_citation, "journal") et_citation_journal.text = none_to_str(citation.journal) """ Also not sure if all the authors are separated by " and ". If not, there will be suffering. -Birol """ auth_list = citation.authors.split(' and ') et_citation_author_list = ET.SubElement(et_citation, "author_list") for auth in auth_list: if auth == "": continue et_citation_author = ET.SubElement(et_citation_author_list, "author") et_citation_author.text = auth tag_list = Session.query(model.TagType).join(model.Tag, model.Tag.tag_id == model.TagType.id).filter_by(citation_id=citation.id).all() et_citation_tag_list = ET.SubElement(et_citation, "tag_list") for tag in tag_list: et_citation_tag = ET.SubElement(et_citation_tag_list, "tag") et_citation_tag.text = tag.text label_list = Session.query(model.Label).filter_by(study_id=citation.id).all() et_citation_label_list = ET.SubElement(et_citation, "label_list") for label in label_list: if not lbl_filter_f(label): continue et_citation_label = ET.SubElement(et_citation_label_list, "label") et_citation_label_labeler = ET.SubElement(et_citation_label, "labeler") et_citation_label_labeler.text = str(label.user_id) et_citation_label_decision = ET.SubElement(et_citation_label, "decision") et_citation_label_decision.text = str(label.label) notes_list = Session.query(model.Note).filter_by(citation_id = citation.id).all() et_citation_notes_list = ET.SubElement(et_citation, "notes_list") for note in notes_list: et_citation_note = ET.SubElement(et_citation_notes_list, "note") et_citation_note_creator = ET.SubElement(et_citation_note, "user") et_citation_note_creator.text = str(note.creator_id) et_citation_note_general = ET.SubElement(et_citation_note, "general") et_citation_note_general.text = note.general et_citation_note_population = ET.SubElement(et_citation_note, "population") et_citation_note_population.text = note.population et_citation_note_ic = ET.SubElement(et_citation_note, "ec") et_citation_note_ic.text = note.ic et_citation_note_outcome = ET.SubElement(et_citation_note, "outcome") et_citation_note_outcome.text = note.outcome return
def _create_reviews(p_id, iter_size, which_iter): lock_file_path = join(dirname(abspath(__file__)), '_delete_lock.lck') if not isfile(lock_file_path): Session.query( model.Citation).filter(model.Citation.project_id != p_id).delete() Session.query( model.Label).filter(model.Label.project_id != p_id).delete() Session.commit() open(lock_file_path, 'w+').close() u_id = 2629 k_init = 400 c_count = len( Session.query(model.Citation).filter_by(project_id=p_id).all()) k_inc = 100 for itercount in range(iter_size * which_iter, iter_size * which_iter + iter_size): ### THIS is the code for one run of the experiment ## labeled citation counter labeled_citation_counter = 0 labels = Session.query(model.Label).filter_by(project_id=p_id).all() user = Session.query(model.User).filter_by(id=u_id).first() citations = Session.query( model.Citation).filter_by(project_id=p_id).all() print len(citations) c_count = len(citations) r_sample = defaultdict(list) sample_indexes = sample(range(c_count), k_init) C_r = [] for ii in sample_indexes: C_r.append(citations[ii]) for cc in C_r: for ll in Session.query(model.Label).filter_by( project_id=p_id).filter_by(study_id=cc.id).all(): r_sample[ll.study_id].append(ll) new_review = model.Project() new_review.leaders.append(user) new_review.initial_round_size = 0 new_review.tag_privacy = True Session.add(new_review) Session.flush() state_dict = defaultdict(int) citation_dict = {} for c in citations: citation = model.Citation() citation.project_id = new_review.id citation.title = c.title citation.abstract = c.abstract citation.keywords = c.keywords citation.refman = c.refman model.Session.add(citation) Session.flush() citation_dict[citation.id] = c.id if c.id in r_sample: labeled_citation_counter += 1 state_dict[citation.id] = 1 for t in r_sample[c.id]: label = model.Label() label.project_id = new_review.id label.study_id = citation.id label.label = t.label model.Session.add(label) print new_review.id Session.commit() ## i is a counter for the current increment i = 0 while True: ## we want to change the increment size if there are a certain number of citations is labeled #if labeled_citation_counter > 15000: # k_inc = 2000 #elif labeled_citation_counter > 5000: # k_inc = 1000 #else: # k_inc = 500 r_sample = defaultdict(list) print "EXPERIMENT NO: " + str(itercount) make_predictions(new_review.id) ######################## here's where I record the results preds_for_review = Session.query(model.Prediction).filter( model.Prediction.project_id == new_review.id).all() path_to_preds_out = os.path.join( "_exports", "predictions_%d_%d_of_%d.csv" % (p_id, i, itercount)) with open(path_to_preds_out, 'w+') as fout: csv_out = csv.writer(fout) preds_file_headers = [ "citation_id", "refman", "title", "predicted p of being relevant", "'hard' screening prediction*", "state" ] csv_out.writerow(preds_file_headers) sorted_preds = sorted(preds_for_review, key=lambda x: x.predicted_probability, reverse=True) for pred in sorted_preds: citation = Session.query(model.Citation).filter( model.Citation.id == pred.study_id).first() #citation = self._get_citation_from_id(pred.study_id) citation_title = citation.title.encode('ascii', 'ignore') row_str = [ citation.id, citation.refman, citation_title, pred.predicted_probability, pred.prediction, state_dict[citation.id] ] csv_out.writerow(row_str) ######################### --------------------------- i += 1 if labeled_citation_counter >= c_count: break P_a = [] for pa in Session.query(model.Prediction).filter_by( project_id=new_review.id).order_by( model.Prediction.predicted_probability.desc()).all(): if state_dict[pa.study_id] == 0: P_a.append(pa) if len(P_a) == k_inc: break if len(P_a) == 0: print "~~~NO PREDS!!!" ccc = [ label for label in Session.query(model.Citation.id).filter_by( project_id=new_review.id).filter( ~model.Citation.labels.any()).limit(k_inc) ] print len(ccc) for cc in ccc: labeled_citation_counter += 1 state_dict[cc.id] = 1 for ll in Session.query(model.Label).filter_by( study_id=citation_dict[cc.id]).all(): label = model.Label() label.project_id = new_review.id label.study_id = cc.id label.label = ll.label model.Session.add(label) else: for pp in P_a: labeled_citation_counter += 1 state_dict[pp.study_id] = 2 for ll in Session.query( model.Label).filter_by(project_id=p_id).filter_by( study_id=citation_dict[pp.study_id]).all(): label = model.Label() label.project_id = new_review.id label.study_id = pp.study_id label.label = ll.label model.Session.add(label) Session.commit() print len( Session.query( model.Label).filter_by(project_id=new_review.id).all()) return
def write_labels(self): # get fields review_q = Session.query(model.Project) review = review_q.filter(model.Project.id == self.project_id).one() self.all_labelers = self._get_participants_for_review(self.project_id) ## some helpers none_to_str = lambda x: "" if x is None else x zero_to_none = lambda x: "none" if x==0 else x fields_to_export = self.fields # map citation ids to dictionaries that, in turn, map # usernames to labels citation_to_lbls_dict = {} self.all_citations = [cit.id for cit in self._get_citations_for_review(self.project_id)] citations_labeled_dict = {} for cit in self.all_citations: citations_labeled_dict[cit]=False # likewise, for notes # citation_to_notes_dict = {} if "notes" in fields_to_export: self._build_notes_dict() if "tags" in fields_to_export: self._build_tags_dict() # we filter the citations list (potentially) citations_to_export = [] # for efficiency reasons, we keep track of whether we need # create a new empty dictionary for the current citation last_citation_id = None labeler_names = ["consensus"] # always export the consensus # first collect labels for all citations that pass our # filtering criteria for citation, label in Session.query(\ model.Citation, model.Label).filter(model.Citation.id==model.Label.study_id).\ filter(model.Label.project_id==self.project_id).order_by(model.Citation.id).all(): # the above gives you all labeled citations for this review # i.e., citations that have at least one label citations_labeled_dict[citation.id]=True if self.lbl_filter_f(label): cur_citation_id = citation.id if last_citation_id != cur_citation_id: citation_to_lbls_dict[citation.id] = {} # citation_to_notes_dict[cur_citation_id] = {} citations_to_export.append(citation) # NOTE that we are assuming unique user names per-review labeler = self._get_username_from_id(label.user_id) if not labeler in labeler_names: labeler_names.append(labeler) citation_to_lbls_dict[cur_citation_id][labeler] = label.label last_citation_id = cur_citation_id # note that this will only contain entries for reviews that have # been labeled! i.e., notes made on unlabeled citations are not # reflected here. # if "notes" in fields_to_export # citation_to_notes_dict[cur_citation_id][labeler] = \ # self._get_notes_for_citation(cur_citation_id, label.user_id) # we automatically export all labeler's labels for labeler in labeler_names: fields_to_export.append(labeler) # finally, export notes (if asked) notes_fields = ["general", "population", "intervention/comparator", "outcome"] if "notes" in fields_to_export: fields_to_export.remove("notes") # we append all labelers notes for labeler in labeler_names: if labeler != "consensus": for notes_field in notes_fields: fields_to_export.append("%s notes (%s)" % (notes_field, labeler)) self.write_buffer = [",".join(fields_to_export)] for citation in citations_to_export: cur_line = [] for field in fields_to_export: if field == "(internal) id": cur_line.append("%s" % citation.id) elif field == "(source) id": cur_line.append("%s" % citation.refman) elif field == "pubmed id": cur_line.append("%s" % zero_to_none(citation.pmid)) elif field == "abstract": cur_line.append('"%s"' % none_to_str(citation.abstract).replace('"', "'")) elif field == "title": cur_line.append('"%s"' % citation.title.replace('"', "'")) elif field == "keywords": cur_line.append('"%s"' % citation.keywords.replace('"', "'")) elif field == "journal": cur_line.append('"%s"' % none_to_str(citation.journal)) elif field == "authors": cur_line.append('"%s"' % "".join(citation.authors)) elif field == "tags": #cur_tags = self._get_tags_for_citation(citation.id) cur_tags = self.citation_to_tags_dict[citation.id] cur_line.append('"%s"' % ",".join(cur_tags)) elif field in labeler_names: cur_labeler = field cur_lbl = "o" cit_lbl_d = citation_to_lbls_dict[citation.id] if cur_labeler in cit_lbl_d: cur_lbl = str(cit_lbl_d[cur_labeler]) # create a consensus label automagically in cases where # there is unanimous agreement elif cur_labeler == "consensus": if len(set(cit_lbl_d.values()))==1: if len(cit_lbl_d) > 1: # if at least two people agree (and none disagree), set the # consensus label to reflect this cur_lbl = str(cit_lbl_d.values()[0]) else: # then only one person has labeled it -- # consensus is kind of silly cur_lbl = "o" else: # no consensus! cur_lbl = "x" cur_line.append(cur_lbl) elif "notes" in field: # notes field # this is kind of hacky -- we first parse out the labeler # name from the column header string assembled above and # then get a user id from this. get_labeler_name_from_str = lambda x: x.split("(")[1].split(")")[0] cur_labeler = get_labeler_name_from_str(field) # @TODO not sure what we should do in consensus case... if cur_labeler == "consensus": cur_line.append("") else: cur_note = None cur_notes_d = self.citation_to_notes_dict[citation.id] if cur_labeler in cur_notes_d: cur_note = cur_notes_d[cur_labeler] if cur_note is None: cur_line.append("") else: notes_field = field if "general" in notes_field: cur_line.append("\"%s\"" % cur_note.general.replace('"', "'")) elif "population" in notes_field: cur_line.append("\"%s\"" % cur_note.population.replace('"', "'")) elif "outcome" in notes_field: cur_line.append("\"%s\"" % cur_note.outcome.replace('"', "'")) else: # intervention/comparator cur_line.append("\"%s\"" % cur_note.ic.replace('"', "'")) self.write_buffer.append(",".join(cur_line)) # exporting *all* (including unlabeled!) citations, per Ethan's request #-- may want to make this optional self.write_buffer.append("citations that are not yet labeled by anyone") # jj 2014-08-20: Request to include citation information even for those citations that have not # been labeled yet. unlabeled_citation_ids = [cit for cit in citations_labeled_dict if not citations_labeled_dict[cit]] unlabeled_citations = Session.query(model.Citation).filter(model.Citation.id.in_(unlabeled_citation_ids)).all() for citation in unlabeled_citations: cur_line = [] for field in fields_to_export: if field == "(internal) id": cur_line.append("%s" % citation.id) elif field == "(source) id": cur_line.append("%s" % citation.refman) elif field == "pubmed id": cur_line.append("%s" % zero_to_none(citation.pmid)) elif field == "abstract": cur_line.append('"%s"' % none_to_str(citation.abstract).replace('"', "'")) elif field == "title": cur_line.append('"%s"' % citation.title.replace('"', "'")) elif field == "keywords": cur_line.append('"%s"' % citation.keywords.replace('"', "'")) elif field == "journal": cur_line.append('"%s"' % none_to_str(citation.journal)) elif field == "authors": cur_line.append('"%s"' % "".join(citation.authors)) self.write_buffer.append(",".join(cur_line)) path_to_export = os.path.join(STATIC_FILES_PATH, "exports", "labels_%s.csv" % self.project_id) try: fout = open(path_to_export, 'w') except IOError: os.makedirs(os.path.dirname(path_to_export)) fout = open(path_to_export, 'w') lbls_str = "\n".join(self.write_buffer) lbls_str = lbls_str.encode("utf-8", "ignore") fout.write(lbls_str) fout.close() return "%sexports/labels_%s.csv" % (url('/', qualified=True), self.project_id)
from pylons import config from abstrackr.config.environment import load_environment from abstrackr.model.meta import Session import abstrackr.model as model from sqlalchemy import and_ conf = appconfig('config:production.ini', relative_to='.') load_environment(conf.global_conf, conf.local_conf) ### This is fixing Emily's project. The refman ids weren't saved when she imported the project PROJECT_ID = 219 FILE_PATH = './Abstraktr_Update_Lit_Review_11.12.13.txt' citations_q = Session.query(model.Citation) found = 0 not_found = 0 with open(FILE_PATH, 'r') as f: reader = csv.DictReader(f, delimiter='\t') for row in reader: #print(row['id'], row['title'], row['abstract']) citation = citations_q.filter_by(title=row['title'], project_id=PROJECT_ID).first() if not citation: print('could not find title matching with %s' % row['title']) not_found += 1 else: found += 1
def _text_for_tag(self, tag_id): tag_type_q = Session.query(model.TagType) tag_obj = tag_type_q.filter(model.TagType.id == tag_id).one() return tag_obj.text
def my_work(self): person = request.environ.get('repoze.who.identity')['user'] c.person = person user = controller_globals._get_user_from_email(c.person.email) if not user: log.error('''\ Hum...fetching user from the database returned False. We need to investigate. Go remove the catch all in controller_globals.py, method _get_user_from_email() to see which OperationalError is being raised ''') # If somehow the user's citation settings variables don't get initialized yet, # then the following 3 if-else blocks should take care of it in order to avoid # any errors due to the values of the variables being null: c.show_journal = user.show_journal if not user.show_journal is None else True if (user.show_authors==True or user.show_authors==False): c.show_authors = user.show_authors else: user.show_authors = True if (user.show_keywords==True or user.show_keywords==False): c.show_keywords = user.show_keywords else: user.show_keywords = True # pull all assignments for this person assignment_q = Session.query(model.Assignment) all_assignments = assignment_q.filter(model.Assignment.user_id == person.id).all() # This process is incredibly slow. Take it out for now and find out # why the .done and .done_so_far field on assignment is off sometimes. #self._set_assignment_done_status(all_assignments) self._clear_this_user_locks(all_assignments) # Build assignment completion status dictionary c.d_completion_status = self._get_assignment_completion_status(all_assignments) c.outstanding_assignments = [a for a in all_assignments if not a.done] # if there's an initial assignment, we'll only show that. assignment_types = [assignment.assignment_type for assignment in \ c.outstanding_assignments] ##### # for any review that has an initial assignment, we will show # *only* that assignment, thereby forcining participants to # finish initial assignments before moving on to other # assignments. fix for issue #5. #### # which reviews have (outstanding) initial assigments? reviews_with_initial_assignments = [] for assignment in c.outstanding_assignments: if assignment.assignment_type == "initial": reviews_with_initial_assignments.append(assignment.project_id) # now remove other (non-initial) assignments for reviews # that have an initial assignment filtered_assignments = [assignment for assignment in c.outstanding_assignments if \ assignment.project_id not in reviews_with_initial_assignments or \ assignment.assignment_type == "initial"] c.outstanding_assignments = filtered_assignments c.finished_assignments = [a for a in all_assignments if a.done] project_q = Session.query(model.Project) c.participating_projects = user.member_of_projects c.review_ids_to_names_d = self._get_review_ids_to_names_d(c.participating_projects) c.my_work = True c.my_projects = False return render('/accounts/dashboard.mako')