def main(): arg_parser = ArgumentParser(prog="classifier") subparsers = arg_parser.add_subparsers( dest="command", metavar="command", required=True ) build_parser = subparsers.add_parser("build", help="Build classifier") classify_parser = subparsers.add_parser("classify", help="Classify tweets") classify_parser.add_argument( "tweets", help="File path of tweet data saved as json format", ) args = arg_parser.parse_args() if args.command == "build": classifier_builder.builder_main(save=True) elif args.command == "classify": with Path(args.tweets).open() as f: tweets = json.load(f) tweet_texts = [] for tweet in tweets: if "retweeted_status" in tweet: text = tweet["retweeted_status"]["full_text"] else: text = tweet["full_text"] tweet_texts.append(text) labels = classify.classify( CLASSIFIER_PATH / "model.pkl", Estimator.RANDOMFOREST, CLASSIFIER_PATH / "features.json", tweet_texts, ) with Path("./tweet_label.json").open("w") as f: json.dump([bool(label) for label in labels], f)
def get_email(user): server = IMAPClient(HOST, use_uid=True, ssl=True) username = db.get("user:%s:login" % user) password = db.get("user:%s:password" % user) server.login(username, password) server.select_folder('INBOX', readonly=True) messages = server.search(['NOT DELETED','SINCE 15-May-2014']) response = server.fetch(messages, ['RFC822', 'FLAGS']) for msgid, data in response.iteritems(): # check for duplicates duplicate = db.zrangebyscore("mail:%s:inbox" % user, msgid, msgid) if duplicate: continue emailUTF8 = data['RFC822'].encode('utf-8') msg = parser.parsestr(emailUTF8) body = extract_body(msg).encode('utf-8') msg['message'] = body msg['subject'] = ('NoSubj' if (msg['Subject'] == None or msg['Subject'].encode('utf-8') == "".encode('utf-8')) else msg['Subject']) msg['to'] = ('NoTo' if (msg['To'] == None) else msg['To']) plain = {'plain_body': extract_body_text(msg).encode('utf-8'), 'subject': msg['subject']} # TODO set unread email = {'id': msgid, 'from': msg['From'], 'to': msg['To'], 'subject': msg['Subject'], 'date': msg['Date'], 'cc': msg['CC'], 'read': False, 'message': body, 'categorized': False, 'summary': shorten(plain), 'archived': False} trained = db.get("user:%s:trained" % user) if trained == "true": email['category'] = int(classify(msg, user)) else: email['category'] = 1 emailJSON = json.dumps(email, sort_keys=True, indent=4, separators=(',', ': ')) db.zadd("mail:%s:inbox" % user, emailJSON, msgid) db.sadd("mail:%s:%s" % (user, email['category']), msgid) server.logout()
def new_analysis_view(self, request): """ DESCRIPTION: View to render the new analysis page. """ if request.method == 'POST': form = EvaluationForm(request.data) if form.is_valid(): exam = form.save() exam.user = request.user exam.image = form.data.get('image') im = Image.open('.' + exam.image.url) exam.results = classify(im) exam.save() return redirect('/evaluations/my_analysis_view') else: form = EvaluationForm() return render(request, 'new_analysis.html', {'form': form})
def initialize_cmudict(): global CMUDICT with open(CMUDICT_FNAME) as f: CMUDICT = pickle.load(f) if __name__ == "__main__": hostname = socket.gethostname() print hostname s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind((HOST, PORT)) initialize_cmudict() # classify something just to force the classifier to initialize phones = pronun.classify('a') print phones while True: s.listen(1) conn, addr = s.accept() while True: data = conn.recv(1024) if not data: break print data if data in CMUDICT: phones = CMUDICT[data][0] phones = [re.sub('\d', '', p) for p in phones]
def initialize_cmudict(): global CMUDICT with open(CMUDICT_FNAME) as f: CMUDICT = pickle.load(f) if __name__ == "__main__": hostname = socket.gethostname(); print hostname s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind((HOST, PORT)) initialize_cmudict() # classify something just to force the classifier to initialize phones = pronun.classify('a') print phones while True: s.listen(1) conn, addr = s.accept() while True: data = conn.recv(1024) if not data: break print data if data in CMUDICT: phones = CMUDICT[data][0] phones = [re.sub('\d','',p) for p in phones]