def identify(): ''' Run WLI's identifiers against the text provided within the request's body (either HTML or plain text), then return the results to the client. ''' page = request.body.read() return json.dumps(wli.identify(page))
def retrieve_page_and_identify(url): ''' Tell WLI to download the page at the given URL (HTTP only) and to run WLI's identifiers against it, then return the results to the client. ''' return json.dumps(wli.identify(wli.retrieve_page('http://'+url)))
user = users_c.fetchone() #print "Retrieving tweets for user %d..." % user['id'] tweets_c = db.cursor() tweets_c.execute("SELECT id, user_id, text FROM tweets WHERE user_id=%d" % user['id']) tweets = tweets_c.fetchall() n_tweets = len(tweets) n_ita = 0 # if a user has less than 3 tweets, no record is inserted if n_tweets >= 3: #print 'Running CLD on %d tweets...' % n_tweets for i, tweet in enumerate(tweets): wli_result = wli.identify(tweet['text'].encode('utf-8')) # print tweet['text'].encode('utf-8') # mongo.wli.twittersfera.save({ # 'id': tweet['id'], # 'user_id': tweet['user_id'], # 'text': tweet['text'], # 'wli': wli_result # }) if wli_result['chromium_cld']['status'] != 'ok': #print wli_result['chromium_cld'] continue if wli_result['chromium_cld']['best'] == 'ita': n_ita += 1