Пример #1
0
def main():
    arg_parser = ArgumentParser(prog="classifier")
    subparsers = arg_parser.add_subparsers(
        dest="command", metavar="command", required=True
    )
    build_parser = subparsers.add_parser("build", help="Build classifier")
    classify_parser = subparsers.add_parser("classify", help="Classify tweets")
    classify_parser.add_argument(
        "tweets", help="File path of tweet data saved as json format",
    )

    args = arg_parser.parse_args()

    if args.command == "build":
        classifier_builder.builder_main(save=True)
    elif args.command == "classify":
        with Path(args.tweets).open() as f:
            tweets = json.load(f)
            tweet_texts = []
            for tweet in tweets:
                if "retweeted_status" in tweet:
                    text = tweet["retweeted_status"]["full_text"]
                else:
                    text = tweet["full_text"]
                tweet_texts.append(text)
        labels = classify.classify(
            CLASSIFIER_PATH / "model.pkl",
            Estimator.RANDOMFOREST,
            CLASSIFIER_PATH / "features.json",
            tweet_texts,
        )
        with Path("./tweet_label.json").open("w") as f:
            json.dump([bool(label) for label in labels], f)
Пример #2
0
def get_email(user):
    server = IMAPClient(HOST, use_uid=True, ssl=True)
    username = db.get("user:%s:login" % user)
    password = db.get("user:%s:password" % user)
    server.login(username, password)
    server.select_folder('INBOX', readonly=True)
    messages = server.search(['NOT DELETED','SINCE 15-May-2014'])
    response = server.fetch(messages, ['RFC822', 'FLAGS'])
    for msgid, data in response.iteritems():
        # check for duplicates
        duplicate = db.zrangebyscore("mail:%s:inbox" % user, msgid, msgid)
        if duplicate:
            continue
        emailUTF8 = data['RFC822'].encode('utf-8')
        msg = parser.parsestr(emailUTF8)
        body = extract_body(msg).encode('utf-8')
        msg['message'] = body
        msg['subject'] = ('NoSubj' if (msg['Subject'] == None or msg['Subject'].encode('utf-8') == "".encode('utf-8'))  else msg['Subject'])
        msg['to'] = ('NoTo' if (msg['To'] == None) else msg['To'])
        plain = {'plain_body': extract_body_text(msg).encode('utf-8'), 'subject': msg['subject']}
        # TODO set unread
        email = {'id': msgid, 'from': msg['From'], 'to': msg['To'], 'subject': msg['Subject'],
                'date': msg['Date'], 'cc': msg['CC'], 'read': False,
                'message': body, 'categorized': False, 'summary': shorten(plain), 'archived': False}

        trained = db.get("user:%s:trained" % user)
        if trained == "true":
            email['category'] = int(classify(msg, user))
        else:
            email['category'] = 1
        emailJSON = json.dumps(email, sort_keys=True, indent=4, separators=(',', ': '))
        db.zadd("mail:%s:inbox" % user, emailJSON, msgid)
        db.sadd("mail:%s:%s" % (user, email['category']), msgid)
    server.logout()
Пример #3
0
 def new_analysis_view(self, request):
     """
     DESCRIPTION:
     View to render the new analysis page.
     """
     if request.method == 'POST':
         form = EvaluationForm(request.data)
         if form.is_valid():
             exam = form.save()
             exam.user = request.user
             exam.image = form.data.get('image')
             im = Image.open('.' + exam.image.url)
             exam.results = classify(im)
             exam.save()
             return redirect('/evaluations/my_analysis_view')
     else:
         form = EvaluationForm()
     return render(request, 'new_analysis.html', {'form': form})
Пример #4
0
def initialize_cmudict():
    global CMUDICT

    with open(CMUDICT_FNAME) as f:
        CMUDICT = pickle.load(f)


if __name__ == "__main__":
    hostname = socket.gethostname()
    print hostname
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.bind((HOST, PORT))

    initialize_cmudict()
    # classify something just to force the classifier to initialize
    phones = pronun.classify('a')
    print phones

    while True:
        s.listen(1)
        conn, addr = s.accept()

        while True:
            data = conn.recv(1024)
            if not data: break
            print data

            if data in CMUDICT:
                phones = CMUDICT[data][0]
                phones = [re.sub('\d', '', p) for p in phones]
Пример #5
0
def initialize_cmudict():
    global CMUDICT
    
    with open(CMUDICT_FNAME) as f:
        CMUDICT = pickle.load(f)


if __name__ == "__main__":
    hostname = socket.gethostname(); print hostname
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.bind((HOST, PORT))
    
    initialize_cmudict()
    # classify something just to force the classifier to initialize
    phones = pronun.classify('a')
    print phones
    
    while True:
        s.listen(1)
        conn, addr = s.accept()
        
        while True:
            data = conn.recv(1024)
            if not data: break
            print data
            
            if data in CMUDICT:
                phones = CMUDICT[data][0]
                phones = [re.sub('\d','',p) for p in phones]