Пример #1
0
def train_bot(args):
    reddit = get_reddit_client()
    message = reddit.get_submission(submission_id=args.id)
    print(message.title)
    print('----------')
    print(message.selftext)
    print('')
    message_type = input('Enter category: ')
    Session = connect_to_database(DATABASE_URI)
    session = Session()
    session.add(
        model.Corpus(title=message.title,
                     text=message.selftext,
                     category=message_type))
    session.commit()
Пример #2
0
 def collect_annotations_for_service(self, infi, svc, collected=None):
     """
     Read annotations from results file
     @param infi: filename for results file
     @param svc: service for annotations (svc name needed to combine annots)
     @param collected: dict to store annots (create or update)
     @return: dict with annotations for all services
     """
     print "- Reading annots for service [{}], {}".format(
         svc, time.asctime(time.localtime()))
     cps = md.Corpus(self.cfg)
     # for now filenames are indeed provided by the client to this class
     if collected is None:
         return {svc: self.ar.read_file(svc, cps, "", ipt=infi)}
     else:
         collected.update({svc: self.ar.read_file(svc, cps, "", ipt=infi)})
         return collected
Пример #3
0
def train_bot(args, by_id):
    reddit = get_reddit_client()
    if by_id:
        messages = [reddit.get_submission(submission_id=args.id)]
    else:
        messages = reddit.get_subreddit(SUBREDDIT).get_new(limit=args.limit)
    for message in messages:
        print(message.title)
        print('----------')
        print(message.selftext)
        print('')
        message_type = input('Enter category: ')
        if message_type == '':
            continue
        Session = connect_to_database(DATABASE_URI)
        session = Session()
        session.add(model.Corpus(title=message.title, text=message.selftext, category=message_type))
        session.commit()
Пример #4
0
                    out.write("".join(("\t".join(ll), "\n")))
                    wtn_ll += 1
                    if wtn_ll % cfg.written_progress == 0:
                        print "Written {} lines, {}".format(
                            wtn_ll, time.asctime(time.localtime()))


# TEST
if __name__ == "__main__":
    ar = clients.AnnotationReader(cfg)
    cc = CooccurrenceMgr()
    print "Tests with individual files"
    svc2anns = {}
    svc2edges = {}
    svc2edgecounts = {}
    mycorpus = md.Corpus(cfg)
    if False:
        for svc in [s for s in cfg.activate if cfg.activate[s]["general"]]:
            print svc
            # read_file fine cos contains annots for whole corpus (whole run)
            #annots = ar.read_file(svc, mycorpus, "064", has_snbr=True,
            annots = ar.read_file(svc,
                                  mycorpus,
                                  "361",
                                  has_snbr=True,
                                  has_normcat=False)
            svc2anns[svc] = annots
            svc2edges[svc] = cc.create_entity_edges_from_annotation_objs(
                svc2anns[svc])
            svc2edgecounts[svc] = cc.count_edges(svc2edges[svc])
            cc.write_edge_dict_as_tsv(svc2edgecounts[svc],