def sync_document_terms(db_session: SessionLocal, project: Project): """Performs term extraction from known documents.""" p = plugin_service.get_active_instance( db_session=db_session, plugin_type="storage", project_id=project.id ) if not p: log.debug("Tried to sync document terms but couldn't find any active storage plugins.") return terms = term_service.get_all(db_session=db_session, project_id=project.id).all() log.debug(f"Fetched {len(terms)} terms from database.") term_strings = [t.text.lower() for t in terms if t.discoverable] phrases = build_term_vocab(term_strings) matcher = build_phrase_matcher("dispatch-term", phrases) documents = get_all(db_session=db_session) for doc in documents: log.debug(f"Processing document. Name: {doc.name}") try: if "sheet" in doc.resource_type: mime_type = "text/csv" else: mime_type = "text/plain" doc_text = p.instance.get(doc.resource_id, mime_type) extracted_terms = list(set(extract_terms_from_text(doc_text, matcher))) matched_terms = ( db_session.query(Term) .filter(func.upper(Term.text).in_([func.upper(t) for t in extracted_terms])) .all() ) log.debug(f"Extracted the following terms from {doc.weblink}. Terms: {extracted_terms}") if matched_terms: doc.terms = matched_terms db_session.commit() except Exception as e: # even if one document fails we don't want them to all fail log.exception(e)
def auto_tagger(db_session: SessionLocal, project: Project): """Attempts to take existing tags and associate them with incidents.""" tags = tag_service.get_all(db_session=db_session, project_id=project.id).all() log.debug(f"Fetched {len(tags)} tags from database.") tag_strings = [t.name.lower() for t in tags if t.discoverable] phrases = build_term_vocab(tag_strings) matcher = build_phrase_matcher("dispatch-tag", phrases) for incident in get_all(db_session=db_session, project_id=project.id).all(): plugin = plugin_service.get_active_instance( db_session=db_session, project_id=incident.project.id, plugin_type="storage" ) log.debug(f"Processing incident. Name: {incident.name}") doc = incident.incident_document if doc: try: mime_type = "text/plain" text = plugin.instance.get(doc.resource_id, mime_type) except Exception as e: log.debug(f"Failed to get document. Reason: {e}") log.exception(e) continue extracted_tags = list(set(extract_terms_from_text(text, matcher))) matched_tags = ( db_session.query(Tag) .filter(func.upper(Tag.name).in_([func.upper(t) for t in extracted_tags])) .all() ) incident.tags.extend(matched_tags) db_session.commit() log.debug( f"Associating tags with incident. Incident: {incident.name}, Tags: {extracted_tags}" )
def list_tasks(client: Any, file_id: str): """Returns all tasks in file.""" doc = get_file(client, file_id) document_meta = {"document": {"id": file_id, "name": doc["name"]}} all_comments = list_comments(client, file_id) task_comments = filter_comments(all_comments) tasks = [] for t in task_comments: status = get_task_status(t) assignees = [{ "individual": { "email": x } } for x in get_assignees(t["content"])] description = t.get("quotedFileContent", {}).get("value", "") tickets = get_tickets(t["replies"]) task_meta = { "task": { "resource_id": t["id"], "description": description, "created_at": t["createdTime"], "assignees": assignees, "tickets": tickets, "weblink": f'https://docs.google.com/a/{GOOGLE_DOMAIN}/document/d/{file_id}/edit?disco={t["id"]}', } } # this is a dirty hack because google doesn't return emailAddresses for comments # complete with conflicting docs # https://developers.google.com/drive/api/v2/reference/comments#resource from dispatch.database.core import SessionLocal from dispatch.individual.models import IndividualContact db_session = SessionLocal() owner = (db_session.query(IndividualContact).filter( IndividualContact.name == t["author"]["displayName"]).first()) if owner: task_meta["task"].update( {"owner": { "individual": { "email": owner.email } }}) db_session.close() task_meta["task"].update(status) tasks.append({**document_meta, **task_meta}) return tasks