def train_bot(args): reddit = get_reddit_client() message = reddit.get_submission(submission_id=args.id) print(message.title) print('----------') print(message.selftext) print('') message_type = input('Enter category: ') Session = connect_to_database(DATABASE_URI) session = Session() session.add( model.Corpus(title=message.title, text=message.selftext, category=message_type)) session.commit()
def collect_annotations_for_service(self, infi, svc, collected=None): """ Read annotations from results file @param infi: filename for results file @param svc: service for annotations (svc name needed to combine annots) @param collected: dict to store annots (create or update) @return: dict with annotations for all services """ print "- Reading annots for service [{}], {}".format( svc, time.asctime(time.localtime())) cps = md.Corpus(self.cfg) # for now filenames are indeed provided by the client to this class if collected is None: return {svc: self.ar.read_file(svc, cps, "", ipt=infi)} else: collected.update({svc: self.ar.read_file(svc, cps, "", ipt=infi)}) return collected
def train_bot(args, by_id): reddit = get_reddit_client() if by_id: messages = [reddit.get_submission(submission_id=args.id)] else: messages = reddit.get_subreddit(SUBREDDIT).get_new(limit=args.limit) for message in messages: print(message.title) print('----------') print(message.selftext) print('') message_type = input('Enter category: ') if message_type == '': continue Session = connect_to_database(DATABASE_URI) session = Session() session.add(model.Corpus(title=message.title, text=message.selftext, category=message_type)) session.commit()
out.write("".join(("\t".join(ll), "\n"))) wtn_ll += 1 if wtn_ll % cfg.written_progress == 0: print "Written {} lines, {}".format( wtn_ll, time.asctime(time.localtime())) # TEST if __name__ == "__main__": ar = clients.AnnotationReader(cfg) cc = CooccurrenceMgr() print "Tests with individual files" svc2anns = {} svc2edges = {} svc2edgecounts = {} mycorpus = md.Corpus(cfg) if False: for svc in [s for s in cfg.activate if cfg.activate[s]["general"]]: print svc # read_file fine cos contains annots for whole corpus (whole run) #annots = ar.read_file(svc, mycorpus, "064", has_snbr=True, annots = ar.read_file(svc, mycorpus, "361", has_snbr=True, has_normcat=False) svc2anns[svc] = annots svc2edges[svc] = cc.create_entity_edges_from_annotation_objs( svc2anns[svc]) svc2edgecounts[svc] = cc.count_edges(svc2edges[svc]) cc.write_edge_dict_as_tsv(svc2edgecounts[svc],