Exemplo n.º 1
0
    def save_recording(self):
        self.setEnabled(False)

        import model
        self.project.main_media = self.screen_file
        self.project.secondary_media = self.cam_file

        index = 1
        for mark in self.marks:
            timestamp = mark[1] - self.start_recording_time
            ann = model.Annotation("mark_" + str(index), mark[0])
            ann.annotation_time = QtCore.QTime(0, 0, 0).addSecs(
                timestamp.total_seconds())
            self.project.add_annotation(ann)

        if self.ui.ckb_screen.isChecked():
            from transitiondetection import TransitionDetector
            detector = TransitionDetector()
            pois = detector.detect_transitions(self.screen_file)

            index = 1
            for begin_time, _ in pois:
                ann = model.Annotation("slide_" + str(index),
                                       "SLIDE_TRANSITION")
                ann.annotation_time = QtCore.QTime(0, 0, 0).addSecs(begin_time)
                self.project.add_annotation(ann)

        self.save_json_project()

        self.setEnabled(True)
        QtGui.QMessageBox.information(self, "Project-mm-2015",
                                      "Processamento Finalizado!")
Exemplo n.º 2
0
    def parse_babelfy(fn, cl, resp, cpsob, text, redo_ents):
        """
        See L{parse}
        @note: only accepts annotations that have a DBpedia page
        """
        if not resp:
            return {}
        data = json.loads(resp)
        anresp = {}
        for res in data:
            if (cfg.use_confidence and float(res["score"]) <
                    cfg.MinConfs.vals[cfg.mywscheme][cl.name][cfg.myevmode]):
                continue
            dbp_url = res["DBpediaURL"]
            if dbp_url in (None, ""):
                continue
            start = res["charFragment"]["start"]
            end = res["charFragment"]["end"] + 1
            link = dbp_url.replace(cfg.DBPRESPREF, "")
            mention = text[start:end]
            confidence = res["score"]
            mtnkey = CorpusMgr.create_mention_key(fn, start, end)

            cpsob.add_entity_to_corpus(link, cl.name, res, redo_ents=redo_ents)
            cpsob.add_mention_to_corpus(mtnkey, mention)

            anresp[(start, end)] = md.Annotation(cpsob.mentions[mtnkey],
                                                 cpsob.entities[link])
            anresp[(start, end)].fmention = \
                utils.Utils.norm_mention(mention)
            anresp[(start, end)].confidence = confidence
            anresp[(start, end)].service = cl.name
        return anresp
Exemplo n.º 3
0
    def parse_tagme(fn, cl, resp, cpsob, redo_ents):
        """
        See L{parse}
        """
        if not resp:
            return {}
        anresp = {}
        for an in resp["annotations"]:
            if (cfg.use_confidence and float(an["rho"]) <
                    cfg.MinConfs.vals[cfg.mywscheme][cl.name][cfg.myevmode]):
                continue
            try:
                link = myutils.norm_label(an["title"])
                cpsob.add_entity_to_corpus(link,
                                           cl.name,
                                           an,
                                           redo_ents=redo_ents)
            except KeyError:
                continue
            surface = an["spot"]
            start, end = an["start"], an["end"]
            mtnkey = CorpusMgr.create_mention_key(fn, start, end)

            cpsob.add_mention_to_corpus(mtnkey, surface)
            anresp[(start, end)] = \
                md.Annotation(cpsob.mentions[mtnkey],
                              cpsob.entities[link])
            anresp[(start, end)].fmention = \
                utils.Utils.norm_mention(surface)
            anresp[(start, end)].confidence = float(an["rho"])
            anresp[(start, end)].service = cl.name
        return anresp
Exemplo n.º 4
0
 def parse_aida(fn, cl, resp, cpsob, redo_ents):
     """
     See L{parse}
     @type resp: json
     """
     if not resp:
         return {}
     anresp = {}
     entlist = resp["allEntities"]
     for ent in entlist:
         if (cfg.use_confidence
                 and float(resp[u"entityMetadata"][ent]["importance"]) <
                 cfg.MinConfs.vals[cfg.mywscheme][cl.name][cfg.myevmode]):
             continue
         # res["mentions"] contains all info i store but confidence
         annot = [
             ann for ann in resp["mentions"] if "bestEntity" in ann
             and ann["bestEntity"]["kbIdentifier"] == ent
         ][0]
         start, end = int(annot["offset"]), int(annot["offset"] +
                                                annot["length"])
         surface = annot["name"]
         # confidence is in res["entityMetadata"] indexed by entity name
         confidence = float(resp[u"entityMetadata"][ent]["importance"])
         link = myutils.norm_label(
             resp[u"entityMetadata"][ent]["readableRepr"])
         mtnkey = CorpusMgr.create_mention_key(fn, start, end)
         cpsob.add_entity_to_corpus(link, cl.name, ent, redo_ents=redo_ents)
         cpsob.add_mention_to_corpus(mtnkey, surface)
         anresp[(start, end)] = md.Annotation(cpsob.mentions[mtnkey],
                                              cpsob.entities[link])
         anresp[(start, end)].fmention = utils.Utils.norm_mention(surface)
         anresp[(start, end)].confidence = confidence
         anresp[(start, end)].service = cl.name
     return anresp
Exemplo n.º 5
0
    def parse_wminer(fn, cl, resp, cpsob, redo_ents):
        """
        See L{parse}
        """
        if not resp:
            return {}
        anresp = {}
        jso = resp.json()
        for topic in jso['spots']:
            if (cfg.use_confidence and float(topic['score']) <
                    cfg.MinConfs.vals[cfg.mywscheme][cl.name][cfg.myevmode]):
                continue
            start = int(topic['start'])
            end = int(topic['end'])
            link = myutils.norm_label(topic['wikiname'])
            surface = topic['mention']
            mtnkey = CorpusMgr.create_mention_key(fn, start, end)
            cpsob.add_entity_to_corpus(link,
                                       cl.name,
                                       topic,
                                       redo_ents=redo_ents)
            cpsob.add_mention_to_corpus(mtnkey, surface)

            anresp[(start, end)] = md.Annotation(cpsob.mentions[mtnkey],
                                                 cpsob.entities[link])
            anresp[(start, end)].fmention = \
                utils.Utils.norm_mention(surface)
            anresp[(start, end)].confidence = float(topic['score'])
            anresp[(start, end)].service = cl.name
        return anresp
Exemplo n.º 6
0
    def normalize(cls, data: dict):
        doc = model.Document()
        doc.current_id = data['currentId']
        doc.reference_view = data['referenceView']
        doc.ontology = data['ontology']
        doc.stepSize = data['stepSize']
        doc.camera_position = tuple(data['cameraPosition'])
        doc.camera_view_up = tuple(data['cameraViewUp'])

        for i, dann in enumerate(data['markups'], start=1):
            dmark = dann['markup']

            ann = model.Annotation()
            ann.name = f'Annotation {i}'
            ann.orientation = dann['orientation']
            ann.representation_type = dann['representationType']
            ann.thickness = dann['thickness']

            ann.markup_type = dmark['type']
            ann.coordinate_system = dmark['coordinateSystem']
            if 'coordinateUnits' in dmark:
                ann.coordinate_units = dmark['coordinateUnits']

            for point in dmark['controlPoints']:
                ann.points.append(tuple(point['position']))

            doc.annotations.append(ann)

        return doc
Exemplo n.º 7
0
    def normalize(cls, data: dict):
        doc = model.Document()

        # this format only supports one markup
        doc.current_id = 0
        for i, dmark in enumerate(data['Markups']):
            if dmark['Selected']:
                doc.current_id = i

                doc.reference_view = dmark['ReferenceView']
                doc.ontology = dmark['Ontology']
                doc.stepSize = dmark['StepSize']
                doc.camera_position = tuple(dmark['CameraPosition'])
                doc.camera_view_up = tuple(dmark['CameraViewUp'])

                break

        for i, dmark in enumerate(data['Markups']):
            ann = model.Annotation()
            ann.name = dmark['Label']
            ann.markup_type = 'ClosedCurve'

            ann.coordinate_system = 'LPS'
            ann.points = [
                (-p['x'], -p['y'], p['z'])  # RAS → LPS conversion
                for p in dmark['Points']
            ]

            ann.thickness = dmark['Thickness']
            ann.orientation = dmark['SplineOrientation']
            ann.representation_type = dmark['RepresentationType']

            doc.annotations.append(ann)

        return doc
Exemplo n.º 8
0
def upload():
    """"Define the upload route."""
    form = UploadForm()
    if form.validate_on_submit():

        # check if the file already exist in the db
        # try to get by PK and receive None if it does not exist :
        PDBid = form.pdb_file.data.filename.split('.')[0][-4:]
        # ex : filename = pdb1234.pdb
        #         PDBid = 1234

        current_pdb = model.PDBFile.query.get(PDBid)
        if not current_pdb:
            # insert data contains in pdb into db :
            filename = pdb_set.save(storage=form.pdb_file.data)
            # The uploaded file to save

            path = pdb_set.path(filename)

            # compute annotation
            current_pdb = model.PDBFile(path)
            dssp_data = model.Annotation(pdb_id=current_pdb.id,
                                         method="dssp",
                                         result=annot.dsspAnnot(path))
            current_pdb.annotations.append(dssp_data)
            pross_data = model.Annotation(pdb_id=current_pdb.id,
                                          method="pross",
                                          result=annot.prossAnnot(path))
            current_pdb.annotations.append(pross_data)

            # Add all annotations into db
            db.session.add(current_pdb)
            db.session.commit()

            flask.flash("The pdb was added in the database")
        else:
            flask.flash("This pdb was already in the database")

        return flask.redirect(
            flask.url_for('resultsForOnePDB',
                          PDBid=current_pdb.id,
                          unit=form.angle_unit.data))

    return flask.render_template('upload.html', form=form)
Exemplo n.º 9
0
    def normalize(cls, data: dict):
        doc = model.Document()

        # expect these to be present, even though we don't actually need them.
        # conversion will still fail so that inference works correctly.
        _ = data["DefaultCameraPosition"]
        _ = data["DefaultCameraViewUp"]
        _ = data["DefaultOntology"]
        _ = data["DefaultReferenceView"]
        _ = data["DefaultRepresentationType"]
        _ = data["DefaultSplineOrientation"]
        _ = data["DefaultStepSize"]
        _ = data["DefaultThickness"]

        # set document-wide values based on the currently selected markup.
        doc.current_id = 0
        for i, dmark in enumerate(data['Markups']):
            if dmark['Selected']:
                doc.current_id = i

                doc.reference_view = dmark['ReferenceView']
                doc.ontology = dmark['Ontology']
                doc.stepSize = dmark['StepSize']
                doc.camera_position = tuple(dmark['CameraPosition'])
                doc.camera_view_up = tuple(dmark['CameraViewUp'])

                break

        # copy markup-specific values
        for i, dmark in enumerate(data['Markups']):
            ann = model.Annotation()
            ann.name = dmark['Label']
            ann.markup_type = 'ClosedCurve'

            ann.coordinate_system = 'LPS'
            ann.points = [
                (-p['x'], -p['y'], p['z'])  # RAS → LPS conversion
                for p in dmark['Points']
            ]

            ann.thickness = dmark['Thickness']
            ann.orientation = dmark['SplineOrientation']
            ann.representation_type = dmark['RepresentationType']

            doc.annotations.append(ann)

        return doc
Exemplo n.º 10
0
 def parse_spotstat(fn, cl, resp, cpsob, redo_ents):
     """
     See L{parse}
     """
     if not resp:
         return {}
     anresp = {}
     jso = resp.json()
     # annotations are in 'Resources' element of the response
     if "Resources" not in jso:
         return {}
     for an in jso["Resources"]:
         if (cfg.use_confidence and float(an["@similarityScore"]) <
                 cfg.MinConfs.vals[cfg.mywscheme][cl.name][cfg.myevmode]):
             continue
         try:
             start = an["@offset"]
             end = int(an["@offset"]) + len(unicode(an["@surfaceForm"]))
             surface = an["@surfaceForm"]
         except KeyError:
             print "!! KeyError for annot: {}".format(repr(an))
             continue
         try:
             # unquote takes and gives str, act accordingly
             # http://stackoverflow.com/questions/5139249
             link = urllib.unquote(
                 (an["@URI"].replace(cl.cfg.DBPRESPREF,
                                     u"")).encode("utf8")).decode("utf8")
             cpsob.add_entity_to_corpus(link,
                                        cl.name,
                                        an,
                                        redo_ents=redo_ents)
         except KeyError:
             continue
         mtnkey = CorpusMgr.create_mention_key(fn, start, end)
         cpsob.add_mention_to_corpus(mtnkey, surface)
         anresp[(start, end)] = md.Annotation(cpsob.mentions[mtnkey],
                                              cpsob.entities[link])
         anresp[(start, end)].fmention = \
             utils.Utils.norm_mention(surface)
         anresp[(start, end)].confidence = float(an["@similarityScore"])
         anresp[(start, end)].service = cl.name
     return anresp
Exemplo n.º 11
0
    def parse_wminer_remote(fn, cl, resp, cpsob, redo_ents):
        """
        See L{parse}
        @deprecated
        @note: use L{parse_wminer} instead
        """
        if not resp:
            return {}
        posi2topic = {}
        # clean up response
        try:
            tree = etree.fromstring(resp)
        except etree.XMLSyntaxError:
            return {}
        srctext = tree.xpath("//request/param[@name='source']")[0].text
        #TODO: Run deduplication here? (or at least give option?)
        for topic in tree.xpath("//detectedTopic"):
            if (cfg.use_confidence and float(topic.attrib["weight"]) <
                    cfg.MinConfs.vals[cfg.mywscheme][cl.name][cfg.myevmode]):
                continue
            for ref in topic.xpath("references//reference"):
                start = int(ref.attrib["start"])
                end = int(ref.attrib["end"])
                link = myutils.norm_label(topic.attrib["title"])
                surface = srctext[start:end]
                mtnkey = CorpusMgr.create_mention_key(fn, start, end)

                cpsob.add_entity_to_corpus(link,
                                           cl.name,
                                           ref,
                                           redo_ents=redo_ents)
                cpsob.add_mention_to_corpus(mtnkey, surface)

                posi2topic[(start,
                            end)] = md.Annotation(cpsob.mentions[mtnkey],
                                                  cpsob.entities[link])
                posi2topic[(start, end)].fmention = \
                    utils.Utils.norm_mention(surface)
                posi2topic[(start,
                            end)].confidence = float(topic.attrib["weight"])
                posi2topic[(start, end)].service = cl.name
        return posi2topic
Exemplo n.º 12
0
    def parse_raida(fn, cl, resp, cpsob, redo_ents):
        """
        See L{parse}
        @type resp: json
        """
        if not resp:
            return {}
        anresp = {}
        #resp = json.loads(resp)
        entlist = resp["allEntities"]
        for ent in resp["mentions"]:
            if "bestEntity" in ent:
                link = myutils.norm_label(
                    ent["bestEntity"]["kbIdentifier"].replace(
                        cfg.AIDA_KBPREFIX, ""),
                    svc=cl.name)
                confidence = float(ent["bestEntity"]["disambiguationScore"])
                if (cfg.use_confidence and confidence < cfg.MinConfs.vals[
                        cfg.mywscheme][cl.name][cfg.myevmode]):
                    continue
            else:
                continue
            start = int(ent["offset"])
            end = int(ent["offset"]) + int(ent["length"])
            surface = ent["name"]
            mtnkey = CorpusMgr.create_mention_key(fn, start, end)

            cpsob.add_entity_to_corpus(link, cl.name, ent, redo_ents=redo_ents)
            cpsob.add_mention_to_corpus(mtnkey, surface)
            anresp[(start, end)] = md.Annotation(cpsob.mentions[mtnkey],
                                                 cpsob.entities[link])
            anresp[(start, end)].fmention = \
                utils.Utils.norm_mention(surface)
            anresp[(start, end)].confidence = confidence
            anresp[(start, end)].service = cl.name
        return anresp
Exemplo n.º 13
0
 def read_file(self,
               svc,
               cpsob,
               runid,
               ipt=None,
               oneoutforall=True,
               has_snbr=True,
               has_normcat=True):
     """
     Read a single annotation file. If no path is given, assumes that it's
     a file containing annots for a corpus, and figures out the path from
     the other keyword arguments. Otherwise reads the path given.
     @param svc: service for annotations
     @param cpsob: L{model.Corpus} obj
     @param runid: run-id for the annotations to read
     @param ipt: file name
     @param oneoutforall: if True, means that input contains annots for a directory
     @param has_snbr: if True, one of the last two columns is the sent nbr
     @param has_normcat: if True, one of the last two columns is the normalized categ
     """
     assert not (ipt is None and oneoutforall is False)
     if ipt is None and oneoutforall:
         fn = "".join(("_".join((cpsob.name, svc, "all", runid)), ".txt"))
         ffn = os.path.join(self.cfg.outdir, fn)
     else:
         fn = ipt
         ffn = ipt
     annots = {}
     with codecs.open(ffn, "r", "utf8") as inf:
         try:
             line = inf.readline()
         except UnicodeDecodeError:
             print "UnicodeDecodeError. Skipping: {}".format(ffn)
             return annots
         while line:
             # skip header
             if line.startswith("doc\tmtn\tstart\tend\t"):
                 line = inf.readline()
                 continue
             sl = line.strip().split("\t")
             if oneoutforall:
                 ke = sl[0]
                 sh = 0
             else:
                 ke = os.path.basename(ffn)
                 sh = 1
             # empty file name
             if not ke:
                 continue
             annots.setdefault(ke, {})
             try:
                 if (self.cfg.use_confidence and float(sl[6-sh]) <
                     self.cfg.MinConfs.vals[self.cfg.mywscheme]\
                     [svc][self.cfg.myevmode]):
                     line = inf.readline()
                     continue
                 start, end, link = int(sl[2 - sh]), int(sl[3 - sh]), sl[4 -
                                                                         sh]
                 # if ke == 'enb1207e.txt' and start == 258:
                 #     import pdb;pdb.set_trace()
                 annots[ke].setdefault((start, end), {})
                 mtnkey = self.cpsmgr.create_mention_key(ke, start, end)
                 cpsob.add_entity_to_corpus(link, svc)
                 cpsob.add_mention_to_corpus(mtnkey, sl[1 - sh])
                 entity = cpsob.entities[link]
                 mention = cpsob.mentions[mtnkey]
                 annots[ke][(start, end)] = md.Annotation(mention, entity)
                 annots[ke][(start, end)].fmention = sl[1 - sh]
                 annots[ke][(start, end)].service = sl[5 - sh]
                 annots[ke][(start, end)].confidence = \
                     float(sl[6-sh])
                 try:
                     if has_snbr:
                         if has_normcat:
                             annots[ke][(start, end)].snbr = int(sl[-2])
                             annots[ke][(start, end)].normcat = sl[-1]
                         else:
                             annots[ke][(start, end)].snbr = int(sl[-1])
                     if has_normcat:
                         if has_snbr:
                             annots[ke][(start, end)].snbr = int(sl[-2])
                             annots[ke][(start, end)].normcat = sl[-1]
                         else:
                             annots[ke][(start, end)].normcat = sl[-1]
                 except ValueError:
                     if self.cfg.DBG:
                         print "ValueError: {}, {}".format(ffn, repr(sl))
                     pass
             except IndexError:
                 # if len(sl) == 1:
                 #     line = inf.readline()
                 #     print "IndexErrorSKIPPED (only fn, no annots): {}, [{}]".format(
                 #         ffn, sl[0])
                 #     continue
                 #import pdb;pdb.set_trace()
                 print "IndexError: {}, [{}]".format(ffn, line.strip())
                 line = inf.readline()
                 continue
             line = inf.readline()
     return annots
Exemplo n.º 14
0
onlyfiles = [f for f in listdir('data/') if isfile(join('data/', f))]

for fname in onlyfiles:

    # check if the file already exist in the db
    # try to get by PK and receive None if it does not exist :
    PDBid = fname.split('.')[0][-4:]
    # ex : filename = pdb1234.pdb
    #         PDBid = 1234

    current_pdb = model.PDBFile.query.get(PDBid)
    if not current_pdb:
        # insert data contains in pdb into db :

        path = join('data/', fname)

        # compute annotation
        current_pdb = model.PDBFile(path)
        dssp_data = model.Annotation(pdb_id=current_pdb.id,
                                     method="dssp",
                                     result=annot.dsspAnnot(path))
        current_pdb.annotations.append(dssp_data)
        pross_data = model.Annotation(pdb_id=current_pdb.id,
                                      method="pross",
                                      result=annot.prossAnnot(path))
        current_pdb.annotations.append(pross_data)

        # Add all annotations into db
        db.session.add(current_pdb)
        db.session.commit()