def writeToFile(text): with open(args.output, "a") as myfile: myfile.write(text.encode("utf-8")) def isReserved(word): return word in reservedKeywords #GoodKeywords = ["محترم","جميل","محترمة"] GoodKeywords = ["حصري","حلو","طيب","رائع","عادي","خلوق","مختلف","مميز","سهل","لطيف","سعيد","سلس","بسيط","الحمد","نعم","خاص","كويس","متألق","خفيف","راقي","متواضع","يسر","راح","جميل","محترم","رايق","محترمة","مؤدب","حلوة","ممتع","جديد","مبدع","فايق","متميز","حبوب"] #GoodKeywords = ["و","انت","يا","ا"] grap = TweetGrapper() iteration = 0 while True: newGoodKeywords = [] for w in GoodKeywords: searchString = "\""+w + " و \"" result = grap.search([searchString],None) if len(result) > 0 : #find 1grams of the resulted word for tweet in result: r = tweet.clean(True) searchString = w + " و " pos = r.find(searchString.decode("utf-8"))
if args.output is not None: with open(args.output, "a") as myfile: tweetText = tweetText + separator myfile.write(tweetText.encode('utf-8')) else: print tweetText keywords = [] # reading keywords from input file with open(args.input) as f: kws = f.read().split("\n") keywords = [kw.strip() for kw in kws if len(kw) > 0] grap = TweetGrapper() #Search Mode #------------------ if "search" == args.mode.lower(): print "Activating search mode" if args.location is not None and args.lang is not None: grap.search(keywords, writeTweet, args.location, args.lang) else: grap.search(keywords, writeTweet) #STREAM Mode #------------------ elif "stream" == args.mode.lower(): print "Activating stream mode" if args.location is not None and args.lang is not None:
if args.output is not None : with open(args.output, "a") as myfile: tweetText = tweetText+separator myfile.write(tweetText.encode('utf-8')) else : print tweetText keywords = [] # reading keywords from input file with open(args.input) as f: kws = f.read().split("\n") keywords = [kw.strip() for kw in kws if len(kw) > 0] grap = TweetGrapper() #Search Mode #------------------ if "search" == args.mode.lower(): print "Activating search mode" if args.location is not None and args.lang is not None : grap.search(keywords,writeTweet,args.location,args.lang) else : grap.search(keywords,writeTweet) #STREAM Mode #------------------ elif "stream" == args.mode.lower(): print "Activating stream mode" if args.location is not None and args.lang is not None :
def isReserved(word): return word in reservedKeywords #GoodKeywords = ["محترم","جميل","محترمة"] GoodKeywords = [ "حصري", "حلو", "طيب", "رائع", "عادي", "خلوق", "مختلف", "مميز", "سهل", "لطيف", "سعيد", "سلس", "بسيط", "الحمد", "نعم", "خاص", "كويس", "متألق", "خفيف", "راقي", "متواضع", "يسر", "راح", "جميل", "محترم", "رايق", "محترمة", "مؤدب", "حلوة", "ممتع", "جديد", "مبدع", "فايق", "متميز", "حبوب" ] #GoodKeywords = ["و","انت","يا","ا"] grap = TweetGrapper() iteration = 0 while True: newGoodKeywords = [] for w in GoodKeywords: searchString = "\"" + w + " و \"" result = grap.search([searchString], None) if len(result) > 0: #find 1grams of the resulted word for tweet in result: r = tweet.clean(True) searchString = w + " و " pos = r.find(searchString.decode("utf-8"))
# for i in l : # print i.clean().encode("utf-8") # parser = argparse.ArgumentParser(description='tool to extract set of Subjecitve Words and idioms depending on set of Patterns written in Config File') # parser.add_argument('-c','--config', help='Input Config file name',required=True) # parser.add_argument('-i','--input', help='Input Tweets files to Extract subjective words from',required=True) # parser.add_argument('-o','--output',help='Output file name - print in console if not specified', required= True) # parser.add_argument('-uf','--uniqandfilter',help='filter extracted lexicon words and save them to clean_uniq_output file with counts', required= False , action="store_true") # parser.add_argument('-sl','--seedlexicon', help='Input classified lexicon file name',required=False) # args = parser.parse_args() grap = TweetGrapper() def do(tweet): print str(tweet.id ) +"\t" + tweet.simpleText() #grap.streamloop(["السيسي","مصر"],do) # grap.streamloop(["مصر","مرسي","السيسي","مبارك","الأخوان","\"30 يونيو\"","\"25 يناي\"","#انتخبوا_العرص","عسكر"],do) grap.streamloop(["فودافون","موبينيل"],do) # config = Config(args.config) # matcher = PatternMatcher(args.input,config) # print config.Patterns
@author: hadyelsahar ''' import argparse from Classes.Tweet import * from TweetGrapper.TweetGrapper import * from PatternMatcher import * # t = Tweet(u"اصلها \n\n لو عرفت تخليك بني ادم محترم و عارف ربنا .. اوعي تسيبها !",language="ar",searchKeyword="محترم") # print t.clean() # print t.clean() # print t.cleanText grap = TweetGrapper() l = grap.search("محترم") for i in l: print str(i.id) + "\t" + i.clean().encode("utf-8") # parser = argparse.ArgumentParser(description='tool to extract set of Subjecitve Words and idioms depending on set of Patterns written in Config File') # parser.add_argument('-c','--config', help='Input Config file name',required=True) # parser.add_argument('-i','--input', help='Input Tweets files to Extract subjective words from',required=True) # parser.add_argument('-o','--output',help='Output file name - print in console if not specified', required= True) # parser.add_argument('-uf','--uniqandfilter',help='filter extracted lexicon words and save them to clean_uniq_output file with counts', required= False , action="store_true") # parser.add_argument('-sl','--seedlexicon', help='Input classified lexicon file name',required=False) # args = parser.parse_args() # if args.uniqandfilter is True and args.seedlexicon is None: # parser.error('must specify seedlexicon when choosing [-uf] option')
@author: hadyelsahar """ import argparse from Classes.Tweet import * from TweetGrapper.TweetGrapper import * from PatternMatcher import * # t = Tweet(u"اصلها \n\n لو عرفت تخليك بني ادم محترم و عارف ربنا .. اوعي تسيبها !",language="ar",searchKeyword="محترم") # print t.clean() # print t.clean() # print t.cleanText grap = TweetGrapper() l = grap.search("محترم") for i in l: print str(i.id) + "\t" + i.clean().encode("utf-8") # parser = argparse.ArgumentParser(description='tool to extract set of Subjecitve Words and idioms depending on set of Patterns written in Config File') # parser.add_argument('-c','--config', help='Input Config file name',required=True) # parser.add_argument('-i','--input', help='Input Tweets files to Extract subjective words from',required=True) # parser.add_argument('-o','--output',help='Output file name - print in console if not specified', required= True) # parser.add_argument('-uf','--uniqandfilter',help='filter extracted lexicon words and save them to clean_uniq_output file with counts', required= False , action="store_true") # parser.add_argument('-sl','--seedlexicon', help='Input classified lexicon file name',required=False) # args = parser.parse_args()