예제 #1
1
 def upload_task(self, passage):
     passage_out = self.create_passage(text=to_text(passage,
                                                    sentences=False)[0],
                                       type="PUBLIC",
                                       source=self.source)
     task_in = dict(type="TOKENIZATION",
                    status="SUBMITTED",
                    project=self.project,
                    user=self.user,
                    passage=passage_out,
                    manager_comment=passage.ID,
                    user_comment=passage.ID,
                    parent=None,
                    is_demo=False,
                    is_active=True)
     tok_task_out = self.create_tokenization_task(**task_in)
     tok_user_task_in = dict(tok_task_out)
     tok_user_task_in.update(
         to_json(passage, return_dict=True, tok_task=True))
     tok_user_task_out = self.submit_tokenization_task(**tok_user_task_in)
     task_in.update(parent=tok_task_out, type="ANNOTATION")
     ann_user_task_in = self.create_annotation_task(**task_in)
     ann_user_task_in.update(
         to_json(passage,
                 return_dict=True,
                 tok_task=tok_user_task_out,
                 all_categories=self.layer["categories"]))
     return self.submit_annotation_task(**ann_user_task_in)
예제 #2
0
 def upload_task(self,
                 passage,
                 log=None,
                 submit=True,
                 ids=None,
                 upload=True):
     if ids:
         passage_id, tok_id, ann_id = ids[passage.ID]
         passage_out = self.get_passage(passage_id)
         tok_user_task_out = tok_task_out = self.get_user_task(tok_id)
         ann_user_task_in = self.get_user_task(ann_id)
     else:
         passage_out = self.create_passage(
             text=to_text(passage, sentences=False)[0],
             type="PUBLIC",
             source=self.source,
             external_id=passage.ID) if upload else passage
         task_in = dict(type="TOKENIZATION",
                        status="ONGOING",
                        project=self.project,
                        user=self.user,
                        passage=passage_out,
                        manager_comment=passage.ID,
                        user_comment=passage.ID,
                        parent=None,
                        is_demo=False,
                        is_active=True)
         tok_task_out = self.create_task(**task_in) if upload else task_in
         tok_user_task_in = dict(tok_task_out)
         tok_user_task_in.update(
             to_json(passage, return_dict=True, tok_task=True))
         tok_user_task_out = self.submit_task(
             **tok_user_task_in) if upload else tok_user_task_in
         task_in.update(parent=tok_task_out, type="ANNOTATION")
         ann_user_task_in = self.create_task(
             **task_in) if upload else task_in
     ann_user_task_in.update(
         to_json(passage,
                 return_dict=True,
                 tok_task=tok_user_task_out,
                 all_categories=self.layer["categories"]))
     ann_user_task_out = self.submit_task(
         **ann_user_task_in, submit=submit) if upload else ann_user_task_in
     if log:
         print(passage.ID,
               passage_out["id"],
               tok_task_out["id"],
               ann_user_task_out["id"],
               file=log,
               sep="\t",
               flush=True)
     return ann_user_task_out
예제 #3
0
 def upload_task(self, passage):
     passage_out = self.create_passage(text=to_text(passage, sentences=False)[0], type="PUBLIC", source=self.source)
     task_in = dict(type="TOKENIZATION", status="SUBMITTED", project=self.project, user=self.user,
                    passage=passage_out, manager_comment=passage.ID, user_comment=passage.ID, parent=None,
                    is_demo=False, is_active=True)
     tok_task_out = self.create_tokenization_task(**task_in)
     tok_user_task_in = dict(tok_task_out)
     tok_user_task_in.update(to_json(passage, return_dict=True, tok_task=True))
     tok_user_task_out = self.submit_tokenization_task(**tok_user_task_in)
     task_in.update(parent=tok_task_out, type="ANNOTATION")
     ann_user_task_in = self.create_annotation_task(**task_in)
     ann_user_task_in.update(
         to_json(passage, return_dict=True, tok_task=tok_user_task_out, all_categories=self.layer["categories"]))
     return self.submit_annotation_task(**ann_user_task_in)
예제 #4
0
 def upload_passage(self, external_id, tokens):
     assert external_id, "Missing external ID for passage %s" % tokens
     assert tokens, "Empty passage %s" % external_id
     passage_out = self.create_passage(text=" ".join(tokens),
                                       external_id=external_id,
                                       type="PUBLIC",
                                       source=self.source)
     task_in = dict(type="TOKENIZATION",
                    status="SUBMITTED",
                    project=self.project,
                    user=self.user,
                    passage=passage_out,
                    manager_comment="External ID: " + external_id,
                    user_comment="",
                    parent=None,
                    is_demo=False,
                    is_active=True)
     tok_task_out = self.create_tokenization_task(**task_in)
     tok_user_task_in = dict(tok_task_out)
     passage = list(from_text(tokens, tokenized=True))[0]
     tok_user_task_in.update(
         to_json(passage, return_dict=True, tok_task=True))
     self.submit_tokenization_task(**tok_user_task_in)
     task_in = dict(type="ANNOTATION",
                    status="NOT_STARTED",
                    project=self.project,
                    user=self.annotation_user,
                    passage=tok_task_out["passage"],
                    manager_comment="External ID: " + external_id,
                    user_comment="",
                    parent=tok_task_out,
                    is_demo=False,
                    is_active=True)
     self.create_annotation_task(**task_in)
     print("Uploaded passage " + external_id + " successfully")
예제 #5
0
def main(args):
    os.makedirs(args.outdir, exist_ok=True)
    for passage in get_passages_with_progress_bar(args.filenames):
        site_filename = os.path.join(args.outdir, passage.ID + ".json")
        with open(site_filename, "w", encoding="utf-8") as f:
            print("\n".join(convert.to_json(passage)), file=f)
        if args.verbose:
            with external_write_mode():
                print("Wrote '%s'" % site_filename)
예제 #6
0
    def upload_streussel_passage_file(self, filenames, log=None, **kwargs):
        del kwargs
        log_h = open(log, "w", encoding="utf-8") if log else None
        with open(filenames) as f_all:
            for filename in f_all:
                passage_text = ""
                external_id = "None given"
                filename = filename.strip()
                with open(filename, encoding="utf-8") as f:
                    for line in f:
                        line = line.strip()
                        if not line:
                            continue
                        elif line.startswith("#"):
                            fields = line.split()
                            if len(fields) != 4 or fields[1] != "sent_id":
                                print("FORMAT ERROR in " + filename,
                                      file=sys.stderr)
                            else:
                                external_id = fields[3].split("-")[1]
                        else:
                            passage_text = passage_text + " " + line
                passage_out = self.create_passage(text=passage_text.strip(),
                                                  external_id=external_id,
                                                  type="PUBLIC",
                                                  source=self.source)
                task_in = dict(type="TOKENIZATION",
                               status="SUBMITTED",
                               project=self.project,
                               user=self.user,
                               passage=passage_out,
                               manager_comment="External ID: " + external_id,
                               user_comment="",
                               parent=None,
                               is_demo=False,
                               is_active=True)
                tok_task_out = self.create_task(**task_in)
                tok_user_task_in = dict(tok_task_out)

                passage = list(from_text(passage_text.split(),
                                         tokenized=True))[0]
                tok_user_task_in.update(
                    to_json(passage, return_dict=True, tok_task=True))

                self.submit_task(**tok_user_task_in)
                print("Uploaded passage " + filename + " successfully.",
                      file=sys.stderr)
                if log:
                    print(filename.split(".")[-2],
                          passage_out["id"],
                          tok_task_out["id"],
                          file=log_h,
                          sep="\t")
        if log:
            log_h.close()
예제 #7
0
 def upload_passage(self, external_id, tokens):
     assert external_id, "Missing external ID for passage %s" % tokens
     assert tokens, "Empty passage %s" % external_id
     passage_out = self.create_passage(text=" ".join(tokens), external_id=external_id, type="PUBLIC",
                                       source=self.source)
     task_in = dict(type="TOKENIZATION", status="SUBMITTED", project=self.project, user=self.user,
                    passage=passage_out, manager_comment="External ID: "+external_id,
                    user_comment="", parent=None, is_demo=False, is_active=True)
     tok_task_out = self.create_task(**task_in)
     tok_user_task_in = dict(tok_task_out)
     passage = list(from_text(tokens, tokenized=True))[0]
     tok_user_task_in.update(to_json(passage, return_dict=True, tok_task=True))
     self.submit_task(**tok_user_task_in)
     task_in = dict(type="ANNOTATION", status="NOT_STARTED", project=self.project, user=self.annotation_user,
                    passage=tok_task_out["passage"], manager_comment="External ID: "+external_id,
                    user_comment=external_id, parent=tok_task_out, is_demo=False, is_active=True)
     self.create_task(**task_in)
     print("Uploaded passage "+external_id+" successfully")
예제 #8
0
 def tokenize_and_upload(self, filename, log=None, lang=None, **kwargs):
     del kwargs
     log_h = open(log, "w", encoding="utf-8") if log else None
     prefix = os.path.splitext(os.path.basename(filename))[0].replace(
         " ", "_")
     with open(filename, encoding="utf-8") as f:
         for passage, text in from_text(f,
                                        passage_id=prefix,
                                        lang=lang,
                                        return_text=True):
             passage_out = self.create_passage(text=text,
                                               type="PUBLIC",
                                               source=self.source)
             task_in = dict(type="TOKENIZATION",
                            status="SUBMITTED",
                            project=self.project,
                            user=self.user,
                            passage=passage_out,
                            manager_comment=passage.ID,
                            user_comment="",
                            parent=None,
                            is_demo=False,
                            is_active=True)
             tok_task_out = self.create_task(**task_in)
             tok_user_task_in = dict(tok_task_out)
             tok_user_task_in.update(
                 to_json(passage, return_dict=True, tok_task=True))
             self.submit_task(**tok_user_task_in)
             task_in.update(parent=tok_task_out, type="ANNOTATION")
             ann_user_task_out = self.create_task(**task_in)
             print("Uploaded passage " + filename + " successfully.",
                   file=sys.stderr)
             if log:
                 print(passage.ID,
                       passage_out["id"],
                       tok_task_out["id"],
                       ann_user_task_out["id"],
                       file=log_h,
                       sep="\t",
                       flush=True)
     if log:
         log_h.close()
예제 #9
0
    def upload_streussel_passage_file(self, filenames, log=None, **kwargs):
        del kwargs
        log_h = open(log, "w", encoding="utf-8") if log else None
        with open(filenames) as f_all:
            for filename in f_all:
                passage_text = ""
                external_id = "None given"
                filename = filename.strip()
                with open(filename, encoding="utf-8") as f:
                    for line in f:
                        line = line.strip()
                        if not line:
                            continue
                        elif line.startswith("#"):
                            fields = line.split()
                            if len(fields) != 4 or fields[1] != "sent_id":
                                print("FORMAT ERROR in " + filename, file=sys.stderr)
                            else:
                                external_id = fields[3].split("-")[1]
                        else:
                            passage_text = passage_text + " " + line
                passage_out = self.create_passage(text=passage_text.strip(), external_id=external_id, type="PUBLIC",
                                                  source=self.source)
                task_in = dict(type="TOKENIZATION", status="SUBMITTED", project=self.project,
                               user=self.user, passage=passage_out, manager_comment="External ID: " + external_id,
                               user_comment="", parent=None, is_demo=False, is_active=True)
                tok_task_out = self.create_task(**task_in)
                tok_user_task_in = dict(tok_task_out)

                passage = list(from_text(passage_text.split(), tokenized=True))[0]
                tok_user_task_in.update(to_json(passage, return_dict=True, tok_task=True))

                self.submit_task(**tok_user_task_in)
                print("Uploaded passage " + filename + " successfully.", file=sys.stderr)
                if log:
                    print(filename.split(".")[-2], passage_out["id"], tok_task_out["id"], file=log_h, sep="\t")
        if log:
            log_h.close()