def parse_keyuser(self,response): item = WeibospiderItem() analyzer = Analyzer() total_pq = analyzer.get_html(response.body,'script:contains("feed_content wbcon")') item['keyword_uid'] =analyzer.get_keyuser(total_pq) item['keyword'] = response.meta['keyword'] return item
def update_music_data(self): analyzer = Analyzer() music_list = self.banshee.get_tracks() # delete previously analyzed songs no longer existing in Banshee for mp3 in self.music_shelve: if mp3 not in music_list: del self.music_shelve[mp3] self.music_shelve.sync() song_count = len(music_list) progress = Progress("Analyzing Songs", song_count) # calculate and save features of new songs for mp3 in music_list: if mp3 not in self.music_shelve: features = analyzer.compute_features(mp3) if analyzer.valid_features(features): self.music_shelve[mp3] = features self.music_shelve.sync() progress.display() # convert music data to array self.music_data = np.array(self.music_shelve.values())
def classification_preprocess_all_datasets(): """ Preprocesses all datasets to be ready for classification task. This will include stemming, word correction, lower-casing, hashtag removal, special char removal. """ for i in range(0,len(utils.annotated_datasets)): tweetlines = utils.get_dataset(utils.annotated_datasets[i]) tweets = [] for line in tweetlines: if len(line)>1: tweets.append(tweet.to_tweet(line)) # tweets = lower_case(tweets) tweets = remove_hastags_and_users(tweets) tweets = count_emoticons(tweets) tweets = replace_links(tweets) tweets = remove_specialchars(tweets) tweets = correct_words(tweets) tweets = stem(tweets) tweets = tokenize(tweets) tweets = pos_tag(tweets) tweets = count_exclamations(tweets) analyzer = Analyzer(utils.annotated_datasets[i], tweets) stats = analyzer.analyze() print stats #store tweets in pickles... print "Storing pickles..." utils.store_pickles(tweets, utils.annotated_datasets[i][24:len(utils.annotated_datasets[i])-4])
def atuser_uid_parser(self,response): item = WeibospiderItem() analyzer = Analyzer() friendcircle = FriendCircle() total_pq = analyzer.get_html(response.body,'script:contains("W_face_radius")') uid = friendcircle.get_user_uid(total_pq) self.atuser_dict[response.meta['atuser_nickname']] = uid
def analyze(self): for i, line in enumerate(self.segment): if i == 0: self.vicar.name = line else: analyzer = Analyzer(line, self.vicar) analyzer.analyze()
def test_pipe_path_winxp(p): a = Analyzer() p.return_value = osversion(5, 1) assert a.get_pipe_path("foo") == "\\\\.\\PIPE\\foo" p.return_value = osversion(6, 1) assert a.get_pipe_path("foo") == "\\??\\PIPE\\foo"
def parse_secondload(self,response): item = response.meta['item'] analyzer = Analyzer() total_pq = analyzer.get_mainhtml(response.body) item['content'] = analyzer.get_content(total_pq) item['time'] = analyzer.get_time(total_pq) item['atuser'],item['repostuser'] = analyzer.get_atuser_repostuser(total_pq) return item
def main(): analyzer = Analyzer() response = requests.get("http://cryptopals.com/static/challenge-data/4.txt") content = response.content.split('\n') for line in content: decoded_str = line.decode("hex") analyzer.incremental_brute(decoded_str) analyzer.getCurrent()
def parse_load(self,response): item = WeibospiderItem();analyzer = Analyzer();friendcircle = FriendCircle() total_pq = analyzer.get_mainhtml(response.body) item['uid'] = response.meta['uid'] item['content'] = analyzer.get_content(total_pq) item['time'],item['timestamp'] = analyzer.get_time(total_pq) atuser_info,item['repost_user'] = analyzer.get_atuser_repostuser(total_pq) yield item
def run(self): # info = urllib2.urlopen(self.url).info() html_name, url_name = scanner([self.url], "000") a = Analyzer(html_name, url_name) print self.url self.model.mydata = self.model.mydata + [(self.url, a.getAds()[1], a.getAds()[0], a.getUniqueVisitors(), "0")] self.model.emit(SIGNAL("layoutChanged()"))
def main(path): files = [join(path, f) for f in listdir(path) if isfile(join(path, f)) and fnmatch(f, '*.zip')] reports = [] for filename in files: print >> sys.stderr, "Processing report %s" % filename rep = report.Report(filename) reports.append(rep) an = Analyzer(reports) an.run()
def parse_total_page(self,response): '''获取需要爬取的搜索结果总页数''' analyzer = Analyzer() total_pq = analyzer.get_html(response.body,'script:contains("W_pages")') keyword_analyzer = keyword_info_analyzer() total_pages = keyword_analyzer.get_totalpages(total_pq) #需要爬取的搜索结果总页数 for page in range(1): #此处更改为total_pages search_url = response.meta['search_url'] + str(page + 1) #此处添加for循环total_pages yield Request(url=search_url,meta={'cookiejar':response.meta['cookiejar'],'keyword':response.meta['keyword']},callback=self.parse_keyword_info)
def parse_thirdload(self,response): item = WeibospiderItem() analyzer = Analyzer() total_pq = analyzer.get_mainhtml(response.body) item['uid'] = response.meta['uid'] item['content'] = analyzer.get_content(total_pq) item['time'] = analyzer.get_time(total_pq) item['atuser'],item['repostuser'] = analyzer.get_atuser_repostuser(total_pq) return item
def parse_keyword_info(self,response): '''获取搜索结果信息''' item = WeibospiderItem() analyzer = Analyzer() total_pq = analyzer.get_html(response.body,'script:contains("feed_content wbcon")') keyword_analyzer = keyword_info_analyzer() item['keyword_uid'],item['keyword_alias'],item['keyword_content'],item['keyword_publish_time'] = keyword_analyzer.get_keyword_info(total_pq) item['keyword'] = response.meta['keyword'] return item
def parse_atuser_uid(self,response): item = WeibospiderItem() analyzer = Analyzer() friendcircle = FriendCircle() item['atuser_nickname'] = response.meta['atuser_nickname']; total_pq = analyzer.get_html(response.body,'script:contains("W_face_radius")') atuser_uid = friendcircle.get_user_uid2(item['atuser_nickname'],total_pq) item['atuser_uid'] = atuser_uid item['uid'] = response.meta['uid'] yield item
def parse_atuser_uid(self,response): '''解析对应@用户的uid''' item = WeibospiderItem() analyzer = Analyzer() friendcircle = FriendCircle() atuser_nickname = response.meta['atuser_nickname']; total_pq = analyzer.get_html(response.body,'script:contains("W_face_radius")') #uid = friendcircle.get_user_uid(total_pq) atuser_uid = friendcircle.get_user_uid2(atuser_nickname,total_pq) #根据昵称获取@用户uid self.atuser_dict[atuser_nickname] = atuser_uid
def get_userurl(self,response): analyzer = Analyzer() total_pq = analyzer.get_html(response.body,'script:contains("PCD_person_info")') user_property = analyzer.get_userproperty(total_pq) if user_property == 'icon_verify_co_v': #该账号为公众账号 public_userinfo_url = analyzer.get_public_userinfohref(total_pq) yield Request(url=public_userinfo_url,meta={'cookiejar':response.meta['cookiejar'],'uid':response.meta['uid'],'user_property':user_property},callback=self.parse_public_userinfo) else: #该账号为个人账号 userinfo_url = analyzer.get_userinfohref(total_pq) yield Request(url=userinfo_url,meta={'cookiejar':response.meta['cookiejar'],'uid':response.meta['uid'],'user_property':user_property},callback=self.parse_userinfo)
def analyze_tweets(twitter_string): analyzer = Analyzer(["han", "hon", "den", "det", "denna", "denne", "hen"]) read = ReadJSON() total_tweets = 0 for line_array in read.read_string(twitter_string): analyzer.analyze_tweet(line_array) total_tweets += 1 arr = analyzer.count_array arr["total"] = total_tweets return arr
def parse(path, f=None): p = Parser(path=path) p.parse_file() a = Analyzer(parser=p) a.analyze() j = Packer(analyzer=a) if f is None: return j.pack() else: j.pack(f=f)
def parse_total_page(self,response): '''获取需要爬取的搜索结果总页数''' analyzer = Analyzer() total_pq = analyzer.get_html(response.body,'script:contains("W_pages")') keyword_analyzer = keyword_info_analyzer() total_pages = keyword_analyzer.get_totalpages(total_pq) #需要爬取的搜索结果总页数 logger.info("the total_pages is: %d",total_pages) for page in range(1): #TODO 此处更改为total_pages search_url = response.meta['search_url'] + str(page + 1) #此处添加for循环total_pages yield Request(url=search_url,cookies=random.choice(COOKIES),meta={'keyword':response.meta['keyword']},callback=self.parse_keyword_info)
def pickup(self): # создаем браузер, которым будем ходить по wmtake.ru b = Browser() # сщздаем анализатор, которым будем распознавать капчу a = Analyzer(self.site, self.symsize, self.charset) a.load() b.show() log.debug('LOADING PAGE WITH WM BONUS') b.get('http://wmtake.ru/m.base/bonus.php') while(True): log.debug('SAVING CAPTCHA') captcha = b.js('$("#scode-pic img")[0].src') #b.save(captcha, '/home/polzuka/inspirado/captcha/wmtake/%02d.gif' % i) log.debug('CAPTCHA TRANSFORMING') try: t = Transformer('orig', b.image(captcha)) t.resizeby('resize', t['orig'], 2, 2) t.grayscale('grayscale', t['resize'], 2) t.binarize('binarize', t['grayscale'], 150, CV_THRESH_BINARY_INV) t.contourSplit('breaksplit', t['binarize'], 0.001) if len(t.symbols) != self.symqty: raise Exception except Exception, e: log.debug(e) log.debug(colorize('INCORRECT SYMBOL NUMBER', RED)) log.debug('LOADING PAGE WITH WM BONUS') b.get('http://wmtake.ru/m.base/bonus.php') continue t.normolize('origsplit', 'breaksplit', self.symsize) symbols = t.slice('origsplit') log.debug('RECOGNITION CAPTCHA') code = a.captcha(symbols) log.debug('ANALYZE RESULT: %s' % colorize(code)) del t print code log.debug('FILLING FIELDS') b.js("$('#scode').val('%s')" % code) b.js("$('#purse').val('R%s')" % self.purse) b.js("$('div.news_box div.bn p').click()") b.sleep(10) if not b.js("$('#mess-exec:visible').length"): log.debug('FINISH') break log.debug('INCORRECT CAPCTHA RECOGNITION') log.debug('LOADING PAGE WITH WM BONUS') b.js("$('#mess-exec p').click()")
def processFile(path): try: p = Parser(path=path) p.parseFile() a = Analyzer(parser=p) a.analyze() j = Packer(analyzer=a) return j.pack() except: print path exit(1)
class Manager: """ Actual object to manage everything """ # reference to the entire data storage __cache = None # reference to object managing articles access and storage __articles = None # reference to the object doing the analysis __analyzer = None def __init__(self, config): """ Prepare the entire system's objects config - the configuration object from the click library """ yaml_config = yaml.load(config.obj["config"]) config.obj["config"].close() self.__cache = Cache(db_file=config.obj["database"]) self.__articles = Articles(key=yaml_config["api_key"], cache=self.__cache) self.__analyzer = Analyzer() def perform_search(self, phrase, training_size=1000): """ Perform the actual search either to cache or ny times phrase - the phrase to search by training_size - the amount of articles to use and fetch returns the list of found articles """ return self.__articles.perform_search(phrase, training_size) def analyze_results(self, article_list): """ Perform the regression analysis on the results and print them out to the command line """ print("Using %i articles" % (len(article_list),)) self.__analyzer.process_data(article_list) def predict_result(self, date): """ Make a prediction on a date date - a datetime to make a prediction on return a tuple of the type of article created from the analyzer """ return self.__analyzer.predict(date)
def picup(self): browser.show() browser.get('http://wmstream.ru/') print 'GET PAGE' browser.sleep(1000) print 'GET CAPTCHA' captcha = browser.js('$("#wmbonus_form_captcha img")[0].src') browser.sleep(1000) t.load('orig', browser.image(captcha)) t.show() browser.save(captcha, '/home/polzuka/inspirado/symbols/first') t.resizeby('resize', t['orig'], 4, 4) t.grayscale('grayscale', t['resize'], 2) t.binarize('binarize', t['grayscale'], 200, CV_THRESH_BINARY) radius = 3 kernel = cvCreateStructuringElementEx(radius * 2 + 1, radius * 2 + 1, radius, radius, CV_SHAPE_ELLIPSE) t.morphology('morphology', t['binarize'], 1, 1, kernel) try: t.breakSplit('breaksplit', t['morphology'], 0.2) except TransformError: print 'ololo' t.normolize('origsplit', 'breaksplit', 20, 30) sl = t.slice('origsplit') a = Analyzer(20, 30, '0123456789') a.load('/home/polzuka/inspirado/fann.data') code = a.captcha(sl) print code print 'GET CLICK' browser.js('$("#frm_vallet").mousedown()') browser.js('$("#frm_vallet").mouseup()') browser.js('$("#frm_vallet").click()') browser.sleep(1000) browser.js('$("#frm_vallet").val("%s")' % data['purse']) browser.js('$("#frm_captcha").mousedown()') browser.js('$("#frm_captcha").mouseup()') browser.js('$("#frm_captcha").click()') for i in xrange(5): browser.js('$("#frm_captcha").keydown()') browser.js('$("#frm_captcha").keyup()') browser.js('$("#frm_captcha").keypress()') browser.js('$("#frm_captcha").val("%s")' % code) print 2 browser.js('$("#btn_bonus").click()') print 3
def parse_load(self,response): item = WeibospiderItem() #获取用户微博内容信息 analyzer = Analyzer() friendcircle = FriendCircle() total_pq = analyzer.get_html(response.body,'script:contains("WB_feed WB_feed_v3")') item['uid'] = response.meta['uid'] item['content'] = analyzer.get_content(total_pq) item['time'],item['timestamp'] = analyzer.get_time(total_pq) weibo_analyzer = weibocontent_analyzer() item['repost_nums'],item['comment_nums'],item['like_nums'] = weibo_analyzer.get_weibo_relative_args(total_pq) yield item
def brute(ciphertext): analyzer = Analyzer() highestText = "" highestValue = 0 keys = [chr(i) for i in xrange(0, 0x100)] for key in keys: message = xor(ciphertext, key) value = analyzer.analyze(message) if (value > highestValue): highestValue = value highestText = message print "[Score = %f] %s" % (highestValue, highestText)
def __init__(self, master): self.master = master east_group = LabelFrame(master, text='东部') east_group.grid(row=0, column=0, padx=5, pady=5) west_group = LabelFrame(master, text='西部') west_group.grid(row=1, column=0, padx=5, pady=5) # 东部排名 east_ranking = LabelFrame(master, text='东部排名') east_ranking.grid(row=0, column=1, rowspan=2, padx=5, pady=5, sticky=N) self.east_ranking_list = self.creat_teams_ranking_list(east_ranking) # 西部排名 west_ranking = LabelFrame(master, text='西部排名') west_ranking.grid(row=0, column=2, rowspan=2, padx=5, pady=5, sticky=N) self.west_ranking_list = self.creat_teams_ranking_list(west_ranking) # 东部 atlantic_group = LabelFrame(east_group, text='大西洋区') atlantic_group.grid(row=0, column=0, padx=5, pady=5) central_group = LabelFrame(east_group, text='中部区') central_group.grid(row=0, column=1, padx=5, pady=5) southeast_group = LabelFrame(east_group, text='东南区') southeast_group.grid(row=0, column=2, padx=5, pady=5) # 西部 pacific_group = LabelFrame(west_group, text='太平洋区') pacific_group.grid(row=1, column=0, padx=5, pady=5) southwest_group = LabelFrame(west_group, text='西南区') southwest_group.grid(row=1, column=1, padx=5, pady=5) northwest_group = LabelFrame(west_group, text='西北区') northwest_group.grid(row=1, column=2, padx=5, pady=5) spider = Spider() index_data = spider.load_teams_index() teams_ranking_data = spider.load_teams_ranking() analyzer = Analyzer() teams_data = analyzer.analyze_teams_data(index_data) self.teams_ranking = analyzer.analyze_teams_ranking(teams_ranking_data) self.load_teams_ranking() self.teams_logo = utils.load_teams_logos() self.load_group(atlantic_group, teams_data[0:5]) self.load_group(pacific_group, teams_data[5:10]) self.load_group(central_group, teams_data[10:15]) self.load_group(southwest_group, teams_data[15:20]) self.load_group(southeast_group, teams_data[20:25]) self.load_group(northwest_group, teams_data[25:30])
def parse_userinfo(self,response): '''解析非公众账号个人信息''' item = WeibospiderItem() analyzer = Analyzer() try: total_pq1 = analyzer.get_html(response.body,'script:contains("pf_photo")') item['image_urls'] = analyzer.get_userphoto_url(total_pq1) total_pq2 = analyzer.get_html(response.body,'script:contains("PCD_text_b")') item['userinfo'] = analyzer.get_userinfo(total_pq2) except Exception,e: item['userinfo'] = {}.fromkeys(('昵称:'.decode('utf-8'),'所在地:'.decode('utf-8'),'性别:'.decode('utf-8'),'博客:'.decode('utf-8'),'个性域名:'. decode('utf-8'),'简介:'.decode('utf-8'),'生日:'.decode('utf-8'),'注册时间:'.decode('utf-8')),'') item['image_urls'] = None
def parse_thirdload(self,response): item = response.meta['item'] #print 'UUUUUUUUUUUUUUUUUUUUUUUUU',response.meta['item'],'OOOOOOOOOOOOOOOOOOO',item['userinfo'],"PPPPPPPPPPPPPPPPPPPPP" item['uid'] = response.meta['uid'] item['followuidlist'] = response.meta['followlist'] #item['userinfo'] = response.meta['userinfo'] #print '{{{{{{{{{{{{{{{{{{{{{{{',response.meta['userinfo'] analyzer = Analyzer() total_pq = analyzer.get_mainhtml(response.body) item['content'] = analyzer.get_content(total_pq) item['time'] = analyzer.get_time(total_pq) item['atuser'],item['repostuser'] = analyzer.get_atuser_repostuser(total_pq) return item
def parse_public_userinfo(self,response): '''解析公众账号个人信息''' item = WeibospiderItem() analyzer = Analyzer() try: total_pq1 = analyzer.get_html(response.body,'script:contains("pf_photo")') item['image_urls'] = analyzer.get_userphoto_url(total_pq1) total_pq2 = analyzer.get_html(response.body,'script:contains("PCD_text_b")') item['userinfo'] = analyzer.get_public_userinfo(total_pq2) except Exception,e: item['userinfo'] = {}.fromkeys(('联系人:'.decode('utf-8'),'电话:'.decode('utf-8'),'>邮箱:'.decode('utf-8'),'友情链接:'.decode('utf-8')),'') item['image_urls'] = None
def test_analyze_connect(self): print("test_check_connect:") analyze=Analyzer() import random words=["あー","えー","あ","え","の","てs","@","tue"] words_count={} for word in words: words_count[word]=0 send_message="" num=0 while num<20: random_word=random.sample(words,2) words_count[random_word[0]]+=1 words_count[random_word[1]]+=1 send_message=send_message+random_word[0]+" "+random_word[1]+" " checked=analyze.analyze_connect(send_message) num+=1 print(send_message) print(checked) for word in checked.keys(): self.assertEqual(words_count[word],checked[word])
def clear_background_for_image2(source, target, colors, verbose=True): """Clear background of source image. Result is a black and white image in png format. White color coresponds to foreground pixels, black color to background pixels.""" global errors sourceTime = os.path.getmtime(source) targetTime = os.path.getmtime(target) if os.path.exists(target) else 0 if sourceTime <= targetTime: print "%s is up to date" % (source) return print "%s -> %s" % (source, target) img = Image.open(source) # compute mean color try: analyzer = Analyzer(img, colors, verbose) pixels, groups, indexes = analyzer.filter_background2(img, 20) analyzer.save_pixels(target, pixels) except AnalyzerError as e: print "Error: %s: %s" % (source, str(e)) errors = errors + 1
def main(): # ensure proper usage if len(sys.argv) != 2: sys.exit("Usage: ./smile word") # absolute paths to lists positives = os.path.join(sys.path[0], "positive-words.txt") negatives = os.path.join(sys.path[0], "negative-words.txt") # instantiate analyzer analyzer = Analyzer(positives, negatives) # analyze word score = analyzer.analyze(sys.argv[1]) if score > 0.0: print(colored(":)", "green")) elif score < 0.0: print(colored(":(", "red")) else: print(colored(":|", "yellow"))
def search(): # validate screen_name screen_name = request.args.get("screen_name", "") if not screen_name: return redirect(url_for("index")) # get screen_name's tweets tweets = helpers.get_user_timeline(screen_name, 100) if tweets == None: sys.exit("Couldn't get user timeline: Invalid twitter handle") # absolute paths to lists positives = os.path.join(sys.path[0], "positive-words.txt") negatives = os.path.join(sys.path[0], "negative-words.txt") # instantiate analyzer analyzer = Analyzer(positives, negatives) positive_counter = 0 negative_counter = 0 neutral_counter = 0 for i in tweets: sentiment = analyzer.analyze(i) if sentiment > 0: positive_counter += 1 elif sentiment < 0: negative_counter += 1 else: neutral_counter += 1 positive, negative, neutral = positive_counter, negative_counter, neutral_counter # generate chart chart = helpers.chart(positive, negative, neutral) # render results return render_template("search.html", chart=chart, screen_name=screen_name)
def search(): # validate screen_name screen_name = request.args.get("screen_name", "") if not screen_name: return redirect(url_for("index")) # get screen_name's tweets tweets = helpers.get_user_timeline(screen_name, 100) # redirect to error page if no user found/tweets protected if tweets is None: return redirect(url_for("notweets", screen_name=screen_name)) positives = os.path.join(sys.path[0], 'positive-words.txt') negatives = os.path.join(sys.path[0], 'negative-words.txt') analyzer = Analyzer(positives, negatives) # TODO positive, negative, neutral = 0.0, 0.0, 0.0 for tweet in tweets: score = analyzer.analyze(tweet) if score < 0: negative += 1 elif score > 0: positive += 1 else: neutral += 1 # Find percentages of sentiments - using len(tweets) should give correct results even if user tweets < 100 positive = (positive / len(tweets)) * 100 neutral = (neutral / len(tweets)) * 100 negative = (negative / len(tweets)) * 100 # generate chart chart = helpers.chart(positive, negative, neutral) # render results return render_template("search.html", chart=chart, screen_name=screen_name)
def search(): # validate screen_name screen_name = request.args.get("screen_name", "") if not screen_name: return redirect(url_for("index")) # get screen_name's tweets tweets = helpers.get_user_timeline(screen_name, 100) # error checking if tweets == None: sys.exit("Couldn't reach user timeline") # absolute paths to lists positives = os.path.join(sys.path[0], "positive-words.txt") negatives = os.path.join(sys.path[0], "negative-words.txt") # instantiate analyzer analyzer = Analyzer(positives, negatives) # initial values positive, negative, neutral = 0.0, 0.0, 0.0 # get score of tweet and add accordingly for tweet in tweets: # analyze word score = analyzer.analyze(tweet) if score > 0.0: positive += 1 elif score < 0.0: negative += 1 else: neutral += 1 # generate chart chart = helpers.chart(positive, negative, neutral) # render results return render_template("search.html", chart=chart, screen_name=screen_name)
def search(): # validate screen_name screen_name = request.args.get("screen_name", "") if not screen_name: return redirect(url_for("index")) # absolute paths to lists positives = os.path.join(sys.path[0], "positive-words.txt") negatives = os.path.join(sys.path[0], "negative-words.txt") # instantiate analyzer analyzer = Analyzer(positives, negatives) # get screen_name's tweets tweets = helpers.get_user_timeline(screen_name) # analyze each tweet and get sentiments frequency if(tweets==None): positive, negative, neutral = 0.0, 0.0, 0.0 else: positive, negative, neutral = 0.0, 0.0, 0.0 if(len(tweets)!=0): for i in tweets: s = analyzer.analyze(i) if s > 0: positive+=1 elif s < 0: negative+=1 else: neutral+=1 positive = positive/len(tweets) negative = negative/len(tweets) neutral = neutral/len(tweets) # generate chart chart = helpers.chart(positive, negative, neutral) # render results return render_template("search.html", chart=chart, screen_name=screen_name)
def main(): """ Main function of packet scanner when running from cmdline. """ #ps = PackageScanner() #packages = ps.getInstalledPackages() #print(packages) #ps.saveScanResults() an = Analyzer() an.loadFromFile(config.PKG_SCAN_DIR / config.PKG_SCAN_FILE) #an.loadFromPackageCont(packages) an.analyze() an.saveAnalysisResults()
def file_upload(): print('DEBUG: uploading file...') if request.method == 'POST': result = {} # Check username existence if 'username' not in request.form: flash("No username") result['result'] = 'False' return jsonify(result) username = request.form['username'] print("DEBUG: username = ", username) # Check file existence if 'file' not in request.files: flash('No file part') result['result'] = 'False' return jsonify(result) file = request.files['file'] if file.filename == '': flash('No selected file') result['result'] = 'False' return jsonify(result) if file and allowed_file(file.filename): filename = secure_filename(file.filename) file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) path = os.path.join(app.config['UPLOAD_FOLDER'], filename) clear_frames(username) get_frames(username, path) tester = Analyzer(username) is_me = False is_lively = tester.detect_liveness(path) if tester.identify() > THRESHOLD: is_me = True if is_me and is_lively: result['result'] = 'True' else: result['result'] = 'False' return jsonify(result) # return redirect(request.url) return render_template('upload.html')
def main(): utils.print_header("MUSIC BACKUP PROGRAM") configs = Configs.from_json("config.json") analyzer = Analyzer(configs) dst_files = analyzer.get_backup_files() src_files = analyzer.get_source_files() analyzer.compare_directories() summary = "" if analyzer.files_to_backup > 0 and configs.backup_enabled: utils.print_header("COPYING TO BACKUP") print("Starting copying process...\n") copier = Copier(configs.source_path, configs.backup_path, src_files, dst_files, analyzer.files_to_backup) backed_up_count = copier.copy() summary += "Backed up a total of {} files!".format(backed_up_count) if analyzer.files_to_backcopy > 0 and configs.backcopy_enabled: utils.print_header("COPYING TO LOCAL") print("Starting copying process...") copier = Copier(configs.backup_path, configs.backcopy_path, dst_files, src_files, analyzer.files_to_backcopy) backcopied_count = copier.copy() summary += "Copied a total of {} files to your local!".format( backcopied_count) if summary and (configs.backcopy_enabled or configs.backup_enabled): utils.print_header("SUMMARY") print(summary) print("\nComplete!") return
def search(): # validate screen_name screen_name = request.args.get("screen_name", "").lstrip("@") if not screen_name: return redirect(url_for("index")) # get screen_name's tweets tweets = helpers.get_user_timeline(screen_name, count=100) positives = os.path.join(sys.path[0], "positive-words.txt") negatives = os.path.join(sys.path[0], "negative-words.txt") analyzer = Analyzer(positives, negatives) total_tweets = len(tweets) positive = 0.0 negative = 0.0 neutral = total_tweets for tweet in tweets: score = analyzer.analyze(tweet) if score > 0: positive += 1 neutral -= 1 elif score < 0: negative += 1 neutral -= 1 # make positive, negative, and neutral into percentages positive = (positive / total_tweets) * 100 negative = (negative / total_tweets) * 100 neutral = (neutral / total_tweets) * 100 # generate chart chart = helpers.chart(positive, negative, neutral) # render results return render_template("search.html", chart=chart, screen_name=screen_name)
def search(): # validate screen_name screen_name = request.args.get("screen_name", "").lstrip("@") if not screen_name: return redirect(url_for("index")) # get screen_name's tweets tweets = helpers.get_user_timeline(screen_name, 100) # instantiate analyzer # absolute paths to lists positives = os.path.join(sys.path[0], "positive-words.txt") negatives = os.path.join(sys.path[0], "negative-words.txt") analyzing = Analyzer(positives, negatives) # TODO positive, negative, neutral = 0.0, 0.0, 0.00 if tweets == None: print("Enter different user, no tweets") else: for tweet in tweets: sentsum = 0 for word in tweet.split(): sentsum += analyzing.analyze(word) if sentsum > 0.0: positive += 1.0 elif sentsum < 0.0: negative += 1.0 else: neutral += 1.0 positive = positive / 100.00 negative = negative / 100.00 neutral = neutral / 100.00 # generate chart chart = helpers.chart(positive, negative, neutral) # render results return render_template("search.html", chart=chart, screen_name=screen_name)
def search(): # validate screen_name screen_name = request.args.get("screen_name", "").lstrip("@") if not screen_name: return redirect(url_for("index")) # absolute paths to lists positives = os.path.join(sys.path[0], "positive-words.txt") negatives = os.path.join(sys.path[0], "negative-words.txt") # instantiate analyzer analyzer = Analyzer(positives, negatives) # get screen_name's tweets tweets = helpers.get_user_timeline(screen_name) # return to index if screen_name doesn't exist if tweets == None: return redirect(url_for("index")) # TODO positive, negative, neutral = 0.0, 0.0, 0.0 #analayze tweets for tweet in tweets: score = analyzer.analyze(tweet) if score > 0.0: positive += 1 elif score < 0.0: negative += 1 else: neutral += 1 # generate chart chart = helpers.chart(positive, negative, neutral) # render results return render_template("search.html", chart=chart, screen_name=screen_name)
def search(): # validate screen_name screen_name = request.args.get("screen_name", "") if not screen_name: return redirect(url_for("index")) # get screen_name's tweets tweets = helpers.get_user_timeline(screen_name) if not tweets: return redirect(url_for("index")) # absolute paths to lists positives = os.path.join(sys.path[0], "positive-words.txt") negatives = os.path.join(sys.path[0], "negative-words.txt") classics = os.path.join(sys.path[0], "classics-words.txt") arthistory = os.path.join(sys.path[0], "arthistory-words.txt") tech = os.path.join(sys.path[0], "tech-words.txt") # instantiate analyzer analyzer = Analyzer(positives, negatives, classics, arthistory, tech) # initialise positive, negative, neutral counters positive, negative, neutral = 0.0, 0.0, 0.0 # loop through tweets list to analyse it, adding to counter for tweet in tweets: score = analyzer.analyze(tweet) if score > 0.0: positive += 1 elif score < 0.0: negative += 1 else: neutral += 1 # generate chart chart = helpers.chart(positive, negative, neutral) # render results return render_template("search.html", chart=chart, screen_name=screen_name)
def parse_based_follownum(self, response): item = WeibospiderItem() analyzer = Analyzer() total_follow_pq = analyzer.get_childfollowhtml(response.body) follow_page_num = analyzer.get_relation_pagenum(total_follow_pq) if follow_page_num != "" and int(follow_page_num) >= 5: for page in range(5, 0, -1): GetWeibopage.relation_data['page'] = page follow_url = getinfo.get_follow_mainurl( response.meta['uid'] ) + WeiboSpider.getweibopage.get_relation_paramurl() yield Request(url=follow_url, meta={ 'cookiejar': response.meta['cookiejar'], 'uid': response.meta['uid'] }, callback=self.parse_follow) elif follow_page_num == "": follow_url = 'http://weibo.com/%s/follow?page=1' % response.meta[ 'uid'] yield Request(url=follow_url, meta={ 'cookiejar': 1, 'uid': response.meta['uid'] }, callback=self.parse_follow) else: for page in range(int(follow_page_num), 0, -1): GetWeibopage.relation_data['page'] = page follow_url = getinfo.get_follow_mainurl( response.meta['uid'] ) + WeiboSpider.getweibopage.get_relation_paramurl() yield Request(url=follow_url, meta={ 'cookiejar': response.meta['cookiejar'], 'uid': response.meta['uid'] }, callback=self.parse_follow)
def search(): # validate screen_name screen_name = request.args.get("screen_name", "").lstrip("@") if not screen_name: return redirect(url_for("index")) # get screen_name's tweets tweets = helpers.get_user_timeline(screen_name, 100) if tweets is None: return redirect(url_for("index")) # absolute paths to lists positives = os.path.join(sys.path[0], "positive-words.txt") negatives = os.path.join(sys.path[0], "negative-words.txt") # instantiate analyzer analyzer = Analyzer(positives, negatives) # set up counters of positive, negative and neutral tweets positive = 0 negative = 0 neutral = 0 # iterate over each tweet for tweet in tweets: # analyze each word of tweets, sum up the total score and add it to the correct sentiment tally score = analyzer.analyze(tweet) if score > 0.0: positive += 1 elif score < 0.0: negative += 1 else: neutral += 1 # generate chart chart = helpers.chart(positive, negative, neutral) # render results return render_template("search.html", chart=chart, screen_name=screen_name)
def search(): # validate screen_name screen_name = request.args.get("screen_name", "").lstrip("@") if screen_name == None: return redirect(url_for("index")) # get screen_name's tweets positives = os.path.join(sys.path[0], "positive-words.txt") negatives = os.path.join(sys.path[0], "negative-words.txt") analyzer = Analyzer(positives, negatives) tweets = helpers.get_user_timeline(screen_name, 100) if tweets == None: return redirect(url_for("index")) red_score = 0.0 yellow_score = 0.0 green_score = 0.0 for t in tweets: fscore = analyzer.analyze(t) #Analyze every tweet and score it if fscore > 0: green_score += 1.0 elif fscore < 0: red_score += 1.0 else: yellow_score += 1.0 # TODO # generate chart chart = helpers.chart(green_score, red_score, yellow_score) # render results return render_template("search.html", chart=chart, screen_name=screen_name)
def search(): # validate screen_name screen_name = request.args.get("screen_name", "").lstrip("@") if not screen_name: return redirect(url_for("index")) # get screen_name's tweets tweets = helpers.get_user_timeline(screen_name, 100) # back to index if no screen_name if tweets == None: return render_template("index.html") # TODO #instantiate Analyzer object positives = os.path.join(sys.path[0], "positive-words.txt") negatives = os.path.join(sys.path[0], "negative-words.txt") analyzer = Analyzer(positives, negatives) # prepare for check positive = negative = neutral = 0 for tweet in tweets: score = analyzer.analyze(tweet) if score > 0: positive += 1 elif score < 0: negative += 1 else: neutral += 1 #positive, negative, neutral = 0.0, 0.0, 100.0 # generate chart chart = helpers.chart(positive, negative, neutral) # render results return render_template("search.html", chart=chart, screen_name=screen_name)
def init_analyzer(self, example_count, first_run=False): if first_run is True: analyzer = Analyzer(example_count) # analyzer.init_analyzer(example_count) pickle_out_analyzer = open('saved/analyzer.pickle', 'wb') pickle.dump(analyzer, pickle_out_analyzer) pickle_out_analyzer.close() else: pickle_in_analyzer = open('saved/analyzer.pickle', 'rb') analyzer = pickle.load(pickle_in_analyzer) self.analyzer = analyzer
def search(): # validate screen_name screen_name = request.args.get("screen_name", "").lstrip("@") if not screen_name: return redirect(url_for("index")) # get screen_name's tweets tweets = helpers.get_user_timeline(screen_name, count=100) if tweets == None: sys.exit("Tweets not found") # absolute paths to lists positives = os.path.join(sys.path[0], "positive-words.txt") negatives = os.path.join(sys.path[0], "negative-words.txt") # instantiate analyzer analyzer = Analyzer(positives, negatives) #analyse tweets and count positives, negatives and neutrals counter_positives, counter_negatives, counter_neutrals = 0.0, 0.0, 0.0 for line in tweets: score = analyzer.analyze(line) if score > 0.0: counter_positives += 1 elif score < 0.0: counter_negatives += 1 else: counter_neutrals += 1 total_tweets = counter_positives + counter_negatives + counter_neutrals #generates a chart that accurately depicts those sentiments as percentages. positive, negative, neutral = 100.0 * counter_positives / total_tweets, 100.0 * counter_negatives / total_tweets, 100.0 * counter_neutrals / total_tweets # generate chart chart = helpers.chart(positive, negative, neutral) # render results return render_template("search.html", chart=chart, screen_name=screen_name)
def search(): # validate screen_name screen_name = request.args.get("screen_name") if not screen_name: return redirect(url_for("index")) screen_name = re.sub(r'@?','',screen_name) # get screen_name's tweets tweets = helpers.get_user_timeline(screen_name, 100) if tweets == None: return redirect(url_for("index")) # instantiate analyzer analyzer = Analyzer() # TODO positive, negative, neutral = 0.0, 0.0, 0.0 for tweet in tweets: score = analyzer.analyze(tweet) if score > 0.0: positive += 1 elif score < 0.0: negative += 1 else: neutral += 1 allTweets = positive + negative + neutral positive = (positive/allTweets) * 100 negative = (negative/allTweets) * 100 neutral = 100 - positive - negative # generate chart chart = helpers.chart(positive, negative, neutral) # render results return render_template("search.html", chart=chart, screen_name=screen_name)
def search(): # validate screen_name screen_name = request.args.get("screen_name", "") if not screen_name: return redirect(url_for("index")) # absolute paths to lists positives = os.path.join(sys.path[0], "positive-words.txt") negatives = os.path.join(sys.path[0], "negative-words.txt") # instantiate analyzer analyzer = Analyzer(positives, negatives) # get screen_name's 50 most recent tweets tweets = helpers.get_user_timeline(screen_name, 50) # redirect to the index if there are no tweets if tweets == None: return redirect(url_for("index")) # define variables for counting positive, negative, neutral = 0.0, 0.0, 100.0 # analyze words in tweet and increment counting variables for word in tweets: score = analyzer.analyze(word) if score > 0.0: positive += 1 elif score < 0.0: negative += 1 else: neutral += 1 # generate chart chart = helpers.chart(positive, negative, neutral) # render results return render_template("search.html", chart=chart, screen_name=screen_name)
def search(): # validate screen_name screen_name = request.args.get("screen_name", "") if not screen_name: return redirect(url_for("index")) positives = os.path.join(sys.path[0], "positive-words.txt") negatives = os.path.join(sys.path[0], "negative-words.txt") # get screen_name's tweets analyzer = Analyzer(positives, negatives) para, value, positive, negative, neutral = analyzer.analyze(screen_name, 100) # TODO # positive, negative, neutral = 0.0, 0.0, 100.0 # generate chart chart = helpers.chart(positive, negative, neutral) # render results return render_template("search.html", chart=chart, screen_name=screen_name)
def search(): # validate screen_name screen_name = request.args.get("screen_name", "") if not screen_name: return redirect(url_for("index")) # get screen_name's tweets tweets = helpers.get_user_timeline(screen_name) # render index template if get_user_timeline returned None if tweets == None: return redirect(url_for("index")) #Absolute paths to lists positives = os.path.join(sys.path[0], "positive-words.txt") negatives = os.path.join(sys.path[0], "negative-words.txt") #Init variables to keep track of tweets positive, negative, neutral = 0.0, 0.0, 0.0 #Instantiate Analyzer for single tweet(from Analayzer.py) singleTweetAnalyzer = Analyzer(positives,negatives) #Loop through every tweet for tweet in tweets: #Analyze tweet score = singleTweetAnalyzer.analyze(tweet) if score > 0.0: positive+=1 elif score < 0.0: negative+=1 else: neutral+=1 # generate chart chart = helpers.chart(positive, negative, neutral) # render results return render_template("search.html", chart=chart, screen_name=screen_name)
def search(): # validate screen_name screen_name = request.args.get("screen_name", "") if not screen_name: return redirect(url_for("index")) # get screen_name's tweets tweets = helpers.get_user_timeline(screen_name) if tweets==None: return redirect(url_for("index")) #Absolute paths positives = os.path.join(sys.path[0], "positive-words.txt") negatives = os.path.join(sys.path[0], "negative-words.txt") analyzer = Analyzer(positives,negatives) positive, negative, neutral = 0.0, 0.0, 0.0 if len(tweets) is not 0: for tweet in tweets: score = analyzer.analyze(tweet) if score > 0: positive += 1 elif score < 0: negative += 1 else: neutral += 1 else: neutral = 100.0 # generate chart chart = helpers.chart(positive, negative, neutral) # render results return render_template("search.html", chart=chart, screen_name=screen_name)
def search(): # validate screen_name screen_name = request.args.get("screen_name", "") if not screen_name: return redirect(url_for("index")) # absolute paths to lists positives = os.path.join(sys.path[0], "positive-words.txt") negatives = os.path.join(sys.path[0], "negative-words.txt") # instantiate analyzer analyzer = Analyzer(positives, negatives) # get screen_name's most recent 100 tweets tweets = helpers.get_user_timeline(screen_name, 100) # return to index if screen_name doesn't exist if tweets == None: return redirect(url_for("index")) # create positive, negative and neutral count positive, negative, neutral = 0, 0, 0 # analyze each tweet & increase corresponding sentimen count for tweet in tweets: score = analyzer.analyze(tweet) if score > 0.0: positive += 1 elif score < 0.0: negative += 1 else: neutral += 1 # generate chart chart = helpers.chart(positive, negative, neutral) # render results return render_template("search.html", chart=chart, screen_name=screen_name)
def search(): # validate screen_name screen_name = request.args.get("screen_name", "") if not screen_name: return redirect(url_for("index")) # get screen_name's tweets tweets = helpers.get_user_timeline(screen_name, 100) if tweets == None: return redirect(url_for("index")) ## absolute paths to lists positives = os.path.join(sys.path[0], "positive-words.txt") negatives = os.path.join(sys.path[0], "negative-words.txt") # instantiate analyzer analyzer = Analyzer(positives, negatives) positive, negative, neutral = 0.0, 0.0, 100.0 for tweet in tweets: #analyze tweets using previously written function score = analyzer.analyze(tweet) #add to positive score if tweetscore is positive if score > 0: positive += 1 neutral -= 1 # same with negative elif score < 0: negative += 1 neutral -=1 #same with neutral """else: neutral -= 1""" # generate chart chart = helpers.chart(positive, negative, neutral) # render results return render_template("search.html", chart=chart, screen_name=screen_name)
def __init__(self): Analyzer.__init__(self) # definition of the (instance) parsing graph self.graph = {0: # initial state ([(None, None, -1001, None), # T10.0.0 EOSeq -> missing number error (' ', None, 0, None), # T10.0.1 white spaces -> keep state ('0b', None, 1, None), # T10.0.2 binary prefix -> go to 1 ('0x', None, 2, None), # T10.0.3 hexadecimal prefix -> go to 2 ('0', None, 3, None), # T10.0.4 possibly octal prefix -> go to 3 ('-', None, 4, None), # T10.0.5 negative decimal prefix -> go to 4 ('+', None, 6, None), # T10.0.6 positive decimal prefix -> go to 6 (self.dec_range_detector, None, 5, None)], # T10.0.7 decimal digit -> go to 5 -1001), # T10.0.8 missing number error 1: # parsing next binary digits ([(None, None, -1002, None), # T10.1.0 EOSeq -> malformed number error (self.bin_number_analyzer, None, 1000, self.check_limits)], # T10.1.1 -1002), # T10.1.2 2: # parsing next hexadecimal digits ([(None, None, -1005, None), # T10.2.0 EOSeq -> malformed number error (self.hex_number_analyzer, None, 1000, self.check_limits)], # T10.2.1 -1005), # T10.2.2 3: # parsing next octal digits ([(None, None, 1000, self.insert_zero), # T10.3.0 EOSeq -> keep first octal digit (self.oct_number_analyzer, None, 1000, self.check_limits)], # T10.3.1 -1003), # T10.3.2 4: # parsing next decimal digits (negative number) ([(None, None, -1004, None), # T10.4.0 EOSeq -> malformed number error (self.dec_number_analyzer, None, 1000, self.invert_number)], # T10.4.1 decimal digit -> invert value -1004), # T10.4.2 malformed number error 5: # parsing next decimal digits ([(None, None, 1000, None), # T10.5.0 EOSeq -> NEVER followed (T10.0.6 don't advance pos) (self.dec_number_analyzer, None, 1000, self.check_limits)], # T10.5.1 -1004), # T10.5.2 malformed number error 6: # parsing next decimal digits ([(None, None, -1004, None), # T10.5.0 EOSeq -> malformed number error (self.dec_number_analyzer, None, 1000, self.check_limits)], # T10.5.1 -1004), # T10.5.2 malformed number error }
def run(self): logger.info('agent starting skyline %s' % skyline_app) Analyzer(getpid()).start() # @added 20201105 - Feature #3830: metrics_manager # Start a metrics_manager process. This process is responsible for # managing all the metrics lists, Redis sets and hash keys outside and # independently from the analysis process/es. logger.info('agent starting skyline metrics_manager') Metrics_Manager(getpid()).start() while 1: sleep(100)
def main(): # ensure proper usage if len(sys.argv) != 2: sys.exit("Usage: ./tweets word") tweets = helpers.get_user_timeline(sys.argv[1].lstrip("@")) # absolute paths to lists positives = os.path.join(sys.path[0], "positive-words.txt") negatives = os.path.join(sys.path[0], "negative-words.txt") # instantiate analyzer analyzer = Analyzer(positives, negatives) # analyze word for i in tweets: score = analyzer.analyze(i) if score > 0.0: print(colored(str(score) + " " + i, "green")) elif score < 0.0: print(colored(str(score) + " " + i, "red")) else: print(colored(str(score) + " " + i, "yellow"))