def save_recording(self): self.setEnabled(False) import model self.project.main_media = self.screen_file self.project.secondary_media = self.cam_file index = 1 for mark in self.marks: timestamp = mark[1] - self.start_recording_time ann = model.Annotation("mark_" + str(index), mark[0]) ann.annotation_time = QtCore.QTime(0, 0, 0).addSecs( timestamp.total_seconds()) self.project.add_annotation(ann) if self.ui.ckb_screen.isChecked(): from transitiondetection import TransitionDetector detector = TransitionDetector() pois = detector.detect_transitions(self.screen_file) index = 1 for begin_time, _ in pois: ann = model.Annotation("slide_" + str(index), "SLIDE_TRANSITION") ann.annotation_time = QtCore.QTime(0, 0, 0).addSecs(begin_time) self.project.add_annotation(ann) self.save_json_project() self.setEnabled(True) QtGui.QMessageBox.information(self, "Project-mm-2015", "Processamento Finalizado!")
def parse_babelfy(fn, cl, resp, cpsob, text, redo_ents): """ See L{parse} @note: only accepts annotations that have a DBpedia page """ if not resp: return {} data = json.loads(resp) anresp = {} for res in data: if (cfg.use_confidence and float(res["score"]) < cfg.MinConfs.vals[cfg.mywscheme][cl.name][cfg.myevmode]): continue dbp_url = res["DBpediaURL"] if dbp_url in (None, ""): continue start = res["charFragment"]["start"] end = res["charFragment"]["end"] + 1 link = dbp_url.replace(cfg.DBPRESPREF, "") mention = text[start:end] confidence = res["score"] mtnkey = CorpusMgr.create_mention_key(fn, start, end) cpsob.add_entity_to_corpus(link, cl.name, res, redo_ents=redo_ents) cpsob.add_mention_to_corpus(mtnkey, mention) anresp[(start, end)] = md.Annotation(cpsob.mentions[mtnkey], cpsob.entities[link]) anresp[(start, end)].fmention = \ utils.Utils.norm_mention(mention) anresp[(start, end)].confidence = confidence anresp[(start, end)].service = cl.name return anresp
def parse_tagme(fn, cl, resp, cpsob, redo_ents): """ See L{parse} """ if not resp: return {} anresp = {} for an in resp["annotations"]: if (cfg.use_confidence and float(an["rho"]) < cfg.MinConfs.vals[cfg.mywscheme][cl.name][cfg.myevmode]): continue try: link = myutils.norm_label(an["title"]) cpsob.add_entity_to_corpus(link, cl.name, an, redo_ents=redo_ents) except KeyError: continue surface = an["spot"] start, end = an["start"], an["end"] mtnkey = CorpusMgr.create_mention_key(fn, start, end) cpsob.add_mention_to_corpus(mtnkey, surface) anresp[(start, end)] = \ md.Annotation(cpsob.mentions[mtnkey], cpsob.entities[link]) anresp[(start, end)].fmention = \ utils.Utils.norm_mention(surface) anresp[(start, end)].confidence = float(an["rho"]) anresp[(start, end)].service = cl.name return anresp
def parse_aida(fn, cl, resp, cpsob, redo_ents): """ See L{parse} @type resp: json """ if not resp: return {} anresp = {} entlist = resp["allEntities"] for ent in entlist: if (cfg.use_confidence and float(resp[u"entityMetadata"][ent]["importance"]) < cfg.MinConfs.vals[cfg.mywscheme][cl.name][cfg.myevmode]): continue # res["mentions"] contains all info i store but confidence annot = [ ann for ann in resp["mentions"] if "bestEntity" in ann and ann["bestEntity"]["kbIdentifier"] == ent ][0] start, end = int(annot["offset"]), int(annot["offset"] + annot["length"]) surface = annot["name"] # confidence is in res["entityMetadata"] indexed by entity name confidence = float(resp[u"entityMetadata"][ent]["importance"]) link = myutils.norm_label( resp[u"entityMetadata"][ent]["readableRepr"]) mtnkey = CorpusMgr.create_mention_key(fn, start, end) cpsob.add_entity_to_corpus(link, cl.name, ent, redo_ents=redo_ents) cpsob.add_mention_to_corpus(mtnkey, surface) anresp[(start, end)] = md.Annotation(cpsob.mentions[mtnkey], cpsob.entities[link]) anresp[(start, end)].fmention = utils.Utils.norm_mention(surface) anresp[(start, end)].confidence = confidence anresp[(start, end)].service = cl.name return anresp
def parse_wminer(fn, cl, resp, cpsob, redo_ents): """ See L{parse} """ if not resp: return {} anresp = {} jso = resp.json() for topic in jso['spots']: if (cfg.use_confidence and float(topic['score']) < cfg.MinConfs.vals[cfg.mywscheme][cl.name][cfg.myevmode]): continue start = int(topic['start']) end = int(topic['end']) link = myutils.norm_label(topic['wikiname']) surface = topic['mention'] mtnkey = CorpusMgr.create_mention_key(fn, start, end) cpsob.add_entity_to_corpus(link, cl.name, topic, redo_ents=redo_ents) cpsob.add_mention_to_corpus(mtnkey, surface) anresp[(start, end)] = md.Annotation(cpsob.mentions[mtnkey], cpsob.entities[link]) anresp[(start, end)].fmention = \ utils.Utils.norm_mention(surface) anresp[(start, end)].confidence = float(topic['score']) anresp[(start, end)].service = cl.name return anresp
def normalize(cls, data: dict): doc = model.Document() doc.current_id = data['currentId'] doc.reference_view = data['referenceView'] doc.ontology = data['ontology'] doc.stepSize = data['stepSize'] doc.camera_position = tuple(data['cameraPosition']) doc.camera_view_up = tuple(data['cameraViewUp']) for i, dann in enumerate(data['markups'], start=1): dmark = dann['markup'] ann = model.Annotation() ann.name = f'Annotation {i}' ann.orientation = dann['orientation'] ann.representation_type = dann['representationType'] ann.thickness = dann['thickness'] ann.markup_type = dmark['type'] ann.coordinate_system = dmark['coordinateSystem'] if 'coordinateUnits' in dmark: ann.coordinate_units = dmark['coordinateUnits'] for point in dmark['controlPoints']: ann.points.append(tuple(point['position'])) doc.annotations.append(ann) return doc
def normalize(cls, data: dict): doc = model.Document() # this format only supports one markup doc.current_id = 0 for i, dmark in enumerate(data['Markups']): if dmark['Selected']: doc.current_id = i doc.reference_view = dmark['ReferenceView'] doc.ontology = dmark['Ontology'] doc.stepSize = dmark['StepSize'] doc.camera_position = tuple(dmark['CameraPosition']) doc.camera_view_up = tuple(dmark['CameraViewUp']) break for i, dmark in enumerate(data['Markups']): ann = model.Annotation() ann.name = dmark['Label'] ann.markup_type = 'ClosedCurve' ann.coordinate_system = 'LPS' ann.points = [ (-p['x'], -p['y'], p['z']) # RAS → LPS conversion for p in dmark['Points'] ] ann.thickness = dmark['Thickness'] ann.orientation = dmark['SplineOrientation'] ann.representation_type = dmark['RepresentationType'] doc.annotations.append(ann) return doc
def upload(): """"Define the upload route.""" form = UploadForm() if form.validate_on_submit(): # check if the file already exist in the db # try to get by PK and receive None if it does not exist : PDBid = form.pdb_file.data.filename.split('.')[0][-4:] # ex : filename = pdb1234.pdb # PDBid = 1234 current_pdb = model.PDBFile.query.get(PDBid) if not current_pdb: # insert data contains in pdb into db : filename = pdb_set.save(storage=form.pdb_file.data) # The uploaded file to save path = pdb_set.path(filename) # compute annotation current_pdb = model.PDBFile(path) dssp_data = model.Annotation(pdb_id=current_pdb.id, method="dssp", result=annot.dsspAnnot(path)) current_pdb.annotations.append(dssp_data) pross_data = model.Annotation(pdb_id=current_pdb.id, method="pross", result=annot.prossAnnot(path)) current_pdb.annotations.append(pross_data) # Add all annotations into db db.session.add(current_pdb) db.session.commit() flask.flash("The pdb was added in the database") else: flask.flash("This pdb was already in the database") return flask.redirect( flask.url_for('resultsForOnePDB', PDBid=current_pdb.id, unit=form.angle_unit.data)) return flask.render_template('upload.html', form=form)
def normalize(cls, data: dict): doc = model.Document() # expect these to be present, even though we don't actually need them. # conversion will still fail so that inference works correctly. _ = data["DefaultCameraPosition"] _ = data["DefaultCameraViewUp"] _ = data["DefaultOntology"] _ = data["DefaultReferenceView"] _ = data["DefaultRepresentationType"] _ = data["DefaultSplineOrientation"] _ = data["DefaultStepSize"] _ = data["DefaultThickness"] # set document-wide values based on the currently selected markup. doc.current_id = 0 for i, dmark in enumerate(data['Markups']): if dmark['Selected']: doc.current_id = i doc.reference_view = dmark['ReferenceView'] doc.ontology = dmark['Ontology'] doc.stepSize = dmark['StepSize'] doc.camera_position = tuple(dmark['CameraPosition']) doc.camera_view_up = tuple(dmark['CameraViewUp']) break # copy markup-specific values for i, dmark in enumerate(data['Markups']): ann = model.Annotation() ann.name = dmark['Label'] ann.markup_type = 'ClosedCurve' ann.coordinate_system = 'LPS' ann.points = [ (-p['x'], -p['y'], p['z']) # RAS → LPS conversion for p in dmark['Points'] ] ann.thickness = dmark['Thickness'] ann.orientation = dmark['SplineOrientation'] ann.representation_type = dmark['RepresentationType'] doc.annotations.append(ann) return doc
def parse_spotstat(fn, cl, resp, cpsob, redo_ents): """ See L{parse} """ if not resp: return {} anresp = {} jso = resp.json() # annotations are in 'Resources' element of the response if "Resources" not in jso: return {} for an in jso["Resources"]: if (cfg.use_confidence and float(an["@similarityScore"]) < cfg.MinConfs.vals[cfg.mywscheme][cl.name][cfg.myevmode]): continue try: start = an["@offset"] end = int(an["@offset"]) + len(unicode(an["@surfaceForm"])) surface = an["@surfaceForm"] except KeyError: print "!! KeyError for annot: {}".format(repr(an)) continue try: # unquote takes and gives str, act accordingly # http://stackoverflow.com/questions/5139249 link = urllib.unquote( (an["@URI"].replace(cl.cfg.DBPRESPREF, u"")).encode("utf8")).decode("utf8") cpsob.add_entity_to_corpus(link, cl.name, an, redo_ents=redo_ents) except KeyError: continue mtnkey = CorpusMgr.create_mention_key(fn, start, end) cpsob.add_mention_to_corpus(mtnkey, surface) anresp[(start, end)] = md.Annotation(cpsob.mentions[mtnkey], cpsob.entities[link]) anresp[(start, end)].fmention = \ utils.Utils.norm_mention(surface) anresp[(start, end)].confidence = float(an["@similarityScore"]) anresp[(start, end)].service = cl.name return anresp
def parse_wminer_remote(fn, cl, resp, cpsob, redo_ents): """ See L{parse} @deprecated @note: use L{parse_wminer} instead """ if not resp: return {} posi2topic = {} # clean up response try: tree = etree.fromstring(resp) except etree.XMLSyntaxError: return {} srctext = tree.xpath("//request/param[@name='source']")[0].text #TODO: Run deduplication here? (or at least give option?) for topic in tree.xpath("//detectedTopic"): if (cfg.use_confidence and float(topic.attrib["weight"]) < cfg.MinConfs.vals[cfg.mywscheme][cl.name][cfg.myevmode]): continue for ref in topic.xpath("references//reference"): start = int(ref.attrib["start"]) end = int(ref.attrib["end"]) link = myutils.norm_label(topic.attrib["title"]) surface = srctext[start:end] mtnkey = CorpusMgr.create_mention_key(fn, start, end) cpsob.add_entity_to_corpus(link, cl.name, ref, redo_ents=redo_ents) cpsob.add_mention_to_corpus(mtnkey, surface) posi2topic[(start, end)] = md.Annotation(cpsob.mentions[mtnkey], cpsob.entities[link]) posi2topic[(start, end)].fmention = \ utils.Utils.norm_mention(surface) posi2topic[(start, end)].confidence = float(topic.attrib["weight"]) posi2topic[(start, end)].service = cl.name return posi2topic
def parse_raida(fn, cl, resp, cpsob, redo_ents): """ See L{parse} @type resp: json """ if not resp: return {} anresp = {} #resp = json.loads(resp) entlist = resp["allEntities"] for ent in resp["mentions"]: if "bestEntity" in ent: link = myutils.norm_label( ent["bestEntity"]["kbIdentifier"].replace( cfg.AIDA_KBPREFIX, ""), svc=cl.name) confidence = float(ent["bestEntity"]["disambiguationScore"]) if (cfg.use_confidence and confidence < cfg.MinConfs.vals[ cfg.mywscheme][cl.name][cfg.myevmode]): continue else: continue start = int(ent["offset"]) end = int(ent["offset"]) + int(ent["length"]) surface = ent["name"] mtnkey = CorpusMgr.create_mention_key(fn, start, end) cpsob.add_entity_to_corpus(link, cl.name, ent, redo_ents=redo_ents) cpsob.add_mention_to_corpus(mtnkey, surface) anresp[(start, end)] = md.Annotation(cpsob.mentions[mtnkey], cpsob.entities[link]) anresp[(start, end)].fmention = \ utils.Utils.norm_mention(surface) anresp[(start, end)].confidence = confidence anresp[(start, end)].service = cl.name return anresp
def read_file(self, svc, cpsob, runid, ipt=None, oneoutforall=True, has_snbr=True, has_normcat=True): """ Read a single annotation file. If no path is given, assumes that it's a file containing annots for a corpus, and figures out the path from the other keyword arguments. Otherwise reads the path given. @param svc: service for annotations @param cpsob: L{model.Corpus} obj @param runid: run-id for the annotations to read @param ipt: file name @param oneoutforall: if True, means that input contains annots for a directory @param has_snbr: if True, one of the last two columns is the sent nbr @param has_normcat: if True, one of the last two columns is the normalized categ """ assert not (ipt is None and oneoutforall is False) if ipt is None and oneoutforall: fn = "".join(("_".join((cpsob.name, svc, "all", runid)), ".txt")) ffn = os.path.join(self.cfg.outdir, fn) else: fn = ipt ffn = ipt annots = {} with codecs.open(ffn, "r", "utf8") as inf: try: line = inf.readline() except UnicodeDecodeError: print "UnicodeDecodeError. Skipping: {}".format(ffn) return annots while line: # skip header if line.startswith("doc\tmtn\tstart\tend\t"): line = inf.readline() continue sl = line.strip().split("\t") if oneoutforall: ke = sl[0] sh = 0 else: ke = os.path.basename(ffn) sh = 1 # empty file name if not ke: continue annots.setdefault(ke, {}) try: if (self.cfg.use_confidence and float(sl[6-sh]) < self.cfg.MinConfs.vals[self.cfg.mywscheme]\ [svc][self.cfg.myevmode]): line = inf.readline() continue start, end, link = int(sl[2 - sh]), int(sl[3 - sh]), sl[4 - sh] # if ke == 'enb1207e.txt' and start == 258: # import pdb;pdb.set_trace() annots[ke].setdefault((start, end), {}) mtnkey = self.cpsmgr.create_mention_key(ke, start, end) cpsob.add_entity_to_corpus(link, svc) cpsob.add_mention_to_corpus(mtnkey, sl[1 - sh]) entity = cpsob.entities[link] mention = cpsob.mentions[mtnkey] annots[ke][(start, end)] = md.Annotation(mention, entity) annots[ke][(start, end)].fmention = sl[1 - sh] annots[ke][(start, end)].service = sl[5 - sh] annots[ke][(start, end)].confidence = \ float(sl[6-sh]) try: if has_snbr: if has_normcat: annots[ke][(start, end)].snbr = int(sl[-2]) annots[ke][(start, end)].normcat = sl[-1] else: annots[ke][(start, end)].snbr = int(sl[-1]) if has_normcat: if has_snbr: annots[ke][(start, end)].snbr = int(sl[-2]) annots[ke][(start, end)].normcat = sl[-1] else: annots[ke][(start, end)].normcat = sl[-1] except ValueError: if self.cfg.DBG: print "ValueError: {}, {}".format(ffn, repr(sl)) pass except IndexError: # if len(sl) == 1: # line = inf.readline() # print "IndexErrorSKIPPED (only fn, no annots): {}, [{}]".format( # ffn, sl[0]) # continue #import pdb;pdb.set_trace() print "IndexError: {}, [{}]".format(ffn, line.strip()) line = inf.readline() continue line = inf.readline() return annots
onlyfiles = [f for f in listdir('data/') if isfile(join('data/', f))] for fname in onlyfiles: # check if the file already exist in the db # try to get by PK and receive None if it does not exist : PDBid = fname.split('.')[0][-4:] # ex : filename = pdb1234.pdb # PDBid = 1234 current_pdb = model.PDBFile.query.get(PDBid) if not current_pdb: # insert data contains in pdb into db : path = join('data/', fname) # compute annotation current_pdb = model.PDBFile(path) dssp_data = model.Annotation(pdb_id=current_pdb.id, method="dssp", result=annot.dsspAnnot(path)) current_pdb.annotations.append(dssp_data) pross_data = model.Annotation(pdb_id=current_pdb.id, method="pross", result=annot.prossAnnot(path)) current_pdb.annotations.append(pross_data) # Add all annotations into db db.session.add(current_pdb) db.session.commit()