示例#1
0
def writeToFile(text):
	with open(args.output, "a") as myfile:		
		myfile.write(text.encode("utf-8")) 


def isReserved(word):
	return word in reservedKeywords


#GoodKeywords = ["محترم","جميل","محترمة"] 
GoodKeywords = ["حصري","حلو","طيب","رائع","عادي","خلوق","مختلف","مميز","سهل","لطيف","سعيد","سلس","بسيط","الحمد","نعم","خاص","كويس","متألق","خفيف","راقي","متواضع","يسر","راح","جميل","محترم","رايق","محترمة","مؤدب","حلوة","ممتع","جديد","مبدع","فايق","متميز","حبوب"]

#GoodKeywords = ["و","انت","يا","ا"]

grap = TweetGrapper()

iteration = 0 

while True: 

	newGoodKeywords = []	
	for w in GoodKeywords:	
		searchString = "\""+w + " و \"" 
		result = grap.search([searchString],None)
		if len(result) > 0 :
			#find 1grams of the resulted word
			for tweet in result:
				r = tweet.clean(True)
				searchString = w + " و "				 							
				pos = r.find(searchString.decode("utf-8"))
示例#2
0
    if args.output is not None:
        with open(args.output, "a") as myfile:
            tweetText = tweetText + separator
            myfile.write(tweetText.encode('utf-8'))
    else:
        print tweetText


keywords = []
# reading keywords from input file
with open(args.input) as f:
    kws = f.read().split("\n")
    keywords = [kw.strip() for kw in kws if len(kw) > 0]

grap = TweetGrapper()

#Search Mode
#------------------
if "search" == args.mode.lower():
    print "Activating search mode"
    if args.location is not None and args.lang is not None:
        grap.search(keywords, writeTweet, args.location, args.lang)
    else:
        grap.search(keywords, writeTweet)

#STREAM Mode
#------------------
elif "stream" == args.mode.lower():
    print "Activating stream mode"
    if args.location is not None and args.lang is not None:
示例#3
0
  
  if args.output is not None :
    with open(args.output, "a") as myfile:
      tweetText = tweetText+separator
      myfile.write(tweetText.encode('utf-8'))           
  else :
    print tweetText


keywords = []
# reading keywords from input file 
with open(args.input) as f:
    kws = f.read().split("\n")    
    keywords = [kw.strip() for kw in kws if len(kw) > 0]        

grap = TweetGrapper()

#Search Mode
#------------------
if  "search" == args.mode.lower():   
  print "Activating search mode"  
  if args.location is not None and args.lang is not None : 
    grap.search(keywords,writeTweet,args.location,args.lang)
  else :     
    grap.search(keywords,writeTweet)

#STREAM Mode
#------------------
elif "stream"  == args.mode.lower():
  print "Activating stream mode"
  if args.location is not None and args.lang is not None : 
示例#4
0
def isReserved(word):
    return word in reservedKeywords


#GoodKeywords = ["محترم","جميل","محترمة"]
GoodKeywords = [
    "حصري", "حلو", "طيب", "رائع", "عادي", "خلوق", "مختلف", "مميز", "سهل",
    "لطيف", "سعيد", "سلس", "بسيط", "الحمد", "نعم", "خاص", "كويس", "متألق",
    "خفيف", "راقي", "متواضع", "يسر", "راح", "جميل", "محترم", "رايق", "محترمة",
    "مؤدب", "حلوة", "ممتع", "جديد", "مبدع", "فايق", "متميز", "حبوب"
]

#GoodKeywords = ["و","انت","يا","ا"]

grap = TweetGrapper()

iteration = 0

while True:

    newGoodKeywords = []
    for w in GoodKeywords:
        searchString = "\"" + w + " و \""
        result = grap.search([searchString], None)
        if len(result) > 0:
            #find 1grams of the resulted word
            for tweet in result:
                r = tweet.clean(True)
                searchString = w + " و "
                pos = r.find(searchString.decode("utf-8"))
示例#5
0
# for i in l :
# 	print i.clean().encode("utf-8")


# parser = argparse.ArgumentParser(description='tool to extract set of Subjecitve Words and idioms depending on set of Patterns written in Config File')
# parser.add_argument('-c','--config', help='Input Config file name',required=True)
# parser.add_argument('-i','--input', help='Input Tweets files to Extract subjective words from',required=True)
# parser.add_argument('-o','--output',help='Output file name - print in console if not specified', required= True)
# parser.add_argument('-uf','--uniqandfilter',help='filter extracted lexicon words and save them to clean_uniq_output file with counts', required= False , action="store_true")
# parser.add_argument('-sl','--seedlexicon', help='Input classified lexicon file name',required=False)
# args = parser.parse_args()
	


grap = TweetGrapper()
def do(tweet):
	print str(tweet.id ) +"\t" + tweet.simpleText()
#grap.streamloop(["السيسي","مصر"],do)
# grap.streamloop(["مصر","مرسي","السيسي","مبارك","الأخوان","\"30 يونيو\"","\"25 يناي\"","#انتخبوا_العرص","عسكر"],do)
grap.streamloop(["فودافون","موبينيل"],do)





# config = Config(args.config)
# matcher = PatternMatcher(args.input,config)

# print config.Patterns
示例#6
0
@author: hadyelsahar 
'''

import argparse

from Classes.Tweet import *
from TweetGrapper.TweetGrapper import *
from PatternMatcher import *

# t = Tweet(u"اصلها \n\n لو عرفت تخليك بني ادم محترم و عارف ربنا .. اوعي تسيبها !",language="ar",searchKeyword="محترم")

# print t.clean()
# print t.clean()
# print t.cleanText

grap = TweetGrapper()
l = grap.search("محترم")

for i in l:
    print str(i.id) + "\t" + i.clean().encode("utf-8")

# parser = argparse.ArgumentParser(description='tool to extract set of Subjecitve Words and idioms depending on set of Patterns written in Config File')
# parser.add_argument('-c','--config', help='Input Config file name',required=True)
# parser.add_argument('-i','--input', help='Input Tweets files to Extract subjective words from',required=True)
# parser.add_argument('-o','--output',help='Output file name - print in console if not specified', required= True)
# parser.add_argument('-uf','--uniqandfilter',help='filter extracted lexicon words and save them to clean_uniq_output file with counts', required= False , action="store_true")
# parser.add_argument('-sl','--seedlexicon', help='Input classified lexicon file name',required=False)
# args = parser.parse_args()

# if args.uniqandfilter is True and args.seedlexicon is None:
#   parser.error('must specify seedlexicon when choosing [-uf] option')
示例#7
0
@author: hadyelsahar 
"""

import argparse

from Classes.Tweet import *
from TweetGrapper.TweetGrapper import *
from PatternMatcher import *

# t = Tweet(u"اصلها \n\n لو عرفت تخليك بني ادم محترم و عارف ربنا .. اوعي تسيبها !",language="ar",searchKeyword="محترم")

# print t.clean()
# print t.clean()
# print t.cleanText

grap = TweetGrapper()
l = grap.search("محترم")

for i in l:
    print str(i.id) + "\t" + i.clean().encode("utf-8")


# parser = argparse.ArgumentParser(description='tool to extract set of Subjecitve Words and idioms depending on set of Patterns written in Config File')
# parser.add_argument('-c','--config', help='Input Config file name',required=True)
# parser.add_argument('-i','--input', help='Input Tweets files to Extract subjective words from',required=True)
# parser.add_argument('-o','--output',help='Output file name - print in console if not specified', required= True)
# parser.add_argument('-uf','--uniqandfilter',help='filter extracted lexicon words and save them to clean_uniq_output file with counts', required= False , action="store_true")
# parser.add_argument('-sl','--seedlexicon', help='Input classified lexicon file name',required=False)
# args = parser.parse_args()