def pre_process(self): name = self.name file_list = self.file_list try: # Split text into a list of words list_ = [] x = 0 while x < len(file_list): num_tweets = Functions.num_of_tweets(file_list[x]) time_stamp = Functions.time_stamp(file_list[x]) target_doc = open(file_list[x], 'r') print("FILE: ",target_doc.name) res = [] for lines in target_doc: word_list = [] line = lines.lower() word = Functions.preprocess(line) for i in word: if i not in punctuation: word_list.append(i) # For testing purposes list_.append(i) #print("done") emo = EAC(name, word_list, file_list[x], time_stamp, num_tweets,res) emo.emotion_analysis() x += 1 return list_ except BaseException as e: print("Pre_process error: ", e)
def pre_process(self, file_list): try: # Split text into a list of words list_ = [] x=0 while x < len(file_list): num_tweets = Functions.num_of_tweets(file_list[x]) target_doc = open(file_list[x], 'r') time_stamp = Functions.time_stamp(file_list[x]) target_doc = open(file_list[x], 'r') for lines in target_doc: word_list =[] line = lines.lower() word = Functions.preprocess(line) for i in word: if i not in punctuation: word_list.append(i) # For testing purposes list_.append(i) Company.emotion_analysis(self,word_list,file_list[x],time_stamp,num_tweets) x += 1 print(list_) return list_ except BaseException as e: print("Pre_process error: ", e)
def create_csv(self): name = self.name try: file_list = Functions.open_folder(name, 'C:/Users/MOYIN/Desktop/Flask/WebSc/result/' + name + '/') test_file = open('C:/Users/MOYIN/Desktop/Flask/WebSc/result/' + name + '/Csv_data/'+name + ".csv", "w", newline='') f = csv.writer(test_file) emotion_list = ["time", "price", "amusement", "interest", "pride", "joy", "pleasure", "relief", "compassion", "admiration", "contentment", "love", "disappointment", "regret", "sadness", "shame", "guilt", "hate","contempt", "disgust", "fear", "anger"] count = 0 if count == 0: # Headers f.writerow(emotion_list) count += 1 for file in file_list: emotion_result = [] load = open(file, "r") loaded = json.load(load) emotion_result.append(loaded) for x in emotion_result: row = [] for item in x: # add price and time first if "time" in item: row.append(x["time"]) # print(x["time"]) d = datetime.strptime(x["time"], '%d-%m-%y') month_ = d.strftime('%m').lstrip("0") year_ = d.strftime('%Y') day_ = d.strftime('%d') comp = self.name.lstrip("$") price = Functions.get_price(comp, int(year_), int(month_), int(day_)) row.append(price) for item in x: # add emotions if "emotions" in item: # for all emotion strengths for emo in x["emotions"]: for i in emotion_list: if i is not "time" and i is not "price": if i in emo: row.append(emo[i]) else: row.append(0) print(row) f.writerow(row) print("CSV file generated : ") #Company.correlation_csv(self,name) return test_file.name except BaseException as e: print("Create csv error: ", e)
def __init__(self, raw_library_name, sha_1): self.raw_library_name = raw_library_name self.sha_1 = sha_1 self.pure_name = f.clean_name(raw_library_name, v.list_of_suffixes)[0] self.version = f.clean_name(raw_library_name, v.list_of_suffixes)[1] self.clean_package_name = self.pure_name + '.' + self.version self.is_package = True if self.raw_library_name.endswith( v.package_suffix) else False
def menu_sum(expenses): """ Launch the menu that corresponds to the 'sum' command :param expenses: The list of expenses to sum """ categories = Functions.get_all_categories(expenses) category = Parser.choose(categories, "Please choose a category:") Functions.sum_category(expenses, [category])
def menu_list(expenses): """ Launch the menu that corresponds to the 'list' command :param expenses: The list of expenses to print out """ printer = Functions.list_elements categories = ['All categories'] + Functions.get_all_categories(expenses) category = Parser.choose(categories, "Please choose a category:") if category is 'All categories': printer(expenses, []) return constraining_options = ['No constraints', 'Place a constraint'] constraint_choice = Parser.choose(constraining_options, "Please choose a constraint:") if constraint_choice is 'No constraints': printer(expenses, [category]) return operators = ['<', '>', '='] operator = Parser.choose(operators, "Please choose an operator: ") comparison_element = Parser.get_input_of_type( int, "Please choose a number to compare to: ") printer(expenses, [category, operator, comparison_element])
def create_graph(self): name = self.name groups = self.groups try: groups = open(groups, "r") load_groups = json.load(groups) G = nx.Graph() file_list = Functions.open_folder(name,'C:/Users/MOYIN/Desktop/Flask/WebSc/result/' + name + '/') print(file_list) for file in file_list: load = open(file, "r") result_json = json.load(load) G.add_node("Emotion", x=500, y=400, fixed=True) for wd in result_json['emotions']: for word in wd: G.add_node(word, group=load_groups[word]) G.add_edge("Emotion", word, value=wd[word]) d = json_graph.node_link_data(G) # file = open("result\\" + self + "\\Force_layout\\" + os.path.basename(file), 'w') filex = open("C:/Users/MOYIN/Desktop/Flask/static/Companies/" + name + "/"+os.path.basename(file), 'w') json.dump(d, filex) print("Graph files created: ", filex.name) return True except BaseException as e: print("Graph creation error : ", e)
def correlation_csv(self): try: name = self.name emotion_list = ["amusement", "interest", "pride", "joy", "pleasure", "relief", "compassion", "admiration", "contentment", "love", "disappointment", "regret", "sadness", "shame", "guilt", "hate", "contempt", "disgust", "fear", "anger"] test_file = open("C:/Users/MOYIN/Desktop/Flask/static/Companies/"+name+"/"+name + "_CT.csv", "w+", newline='') test_file.truncate() f = csv.writer(test_file) count = 0 if count == 0: # Headers headers = ["emotion", "correlation"] f.writerow(headers) count += 1 for i in emotion_list: row=[] row.append(i) correlate = Functions.correlation(i, name) row.append(correlate) print(row) f.writerow(row) print("correlation table created") print("----------------------------------------------------------------") test_file.close() return test_file.name except BaseException as e: print("Correlation error : ", e)
def create_graph(name): groups = open("Emotion_Groups.json", "r") connected = open("connected.json", "r") load_groups = json.load(groups) load_connect = json.load(connected) G = nx.Graph() file_list = Functions.open_folder('result\\' + name + '\\') for file in file_list: load = open(file, "r") result_json = json.load(load) print(os.path.basename(file)) G.add_node("Emotion", x=500, y=400, fixed=True) for wd in result_json['emotions']: for word in wd: # G.add_node(word, group=load_groups[word], x=200, y=300, fixed=True) G.add_node(word, group=load_groups[word]) G.add_edge("Emotion", word, value=wd[word]) d = json_graph.node_link_data(G) file = open("result\\" + name + "\\Force_layout\\" + os.path.basename(file), 'w') json.dump(d, file) print(d)
def menu_max(expenses): """ Launch the menu that corresponds to the 'maxday' command :param expenses: The list of expenses to look through """ options = [ "Get the day with the most expenses", "Get the maximum expense in a day" ] user_choice = Parser.choose(options, "Please choose what max you want:") if user_choice is options[0]: Functions.max_day(expenses, []) elif user_choice is options[1]: day = Parser.get_input_of_type(int, "Please pick a day:") Functions.max_per_day(expenses, [day])
def get_needed_downloads(self): package_versions = self.get_versions() version_shaved = f.shave_suffix(self.version, '.0') if self.version in package_versions: needed = [self.version] # elif version_shaved in package_versions: # needed = [version_shaved] else: needed = [vers for vers in package_versions] return needed
def data_collection(self): try: name = self.name path = "C:/Users/MOYIN/Desktop/Flask/WebSc/Tracker/" + name # get all files in folder file_list = Functions.open_folder(name, path) # check if files exists if file_list: Company.pre_process(self,file_list) else: Functions.get_data(name,path) Company.data_collection(self) return file_list except BaseException as e: print("data collection error: ", e)
def create_csv(name): file_list = Functions.open_folder('result\\' + name + '\\') test_file = open('result\\' + name + '\\Csv_data\\'+name + ".csv", "w", newline='') f = csv.writer(test_file) emotion_list = ["time", "price", "amusement", "interest", "pride", "joy", "pleasure", "relief", "compassion", "admiration", "contentment", "love", "disappointment", "regret", "sadness", "shame", "guilt", "hate", "contempt", "disgust", "fear", "anger"] count = 0 if count == 0: # Headers f.writerow(emotion_list) count += 1 emotion_result = [] for file in file_list: load = open(file, "r") loaded = json.load(load) emotion_result.append(loaded) for x in emotion_result: row = [] for item in x: # add price and time first if "time" in item: row.append(x["time"]) # print(x["time"]) d = datetime.strptime(x["time"], '%d-%m-%y') month_ = d.strftime('%m').lstrip("0") year_ = d.strftime('%Y') day_ = d.strftime('%d') comp = name.lstrip("$") price = CsvGenerate.get_price(comp, int(year_), int(month_), int(day_)) row.append(price) for item in x: # add emotions if "emotions" in item: # for all emotion strengths for emo in x["emotions"]: # print(emo) for i in emotion_list: if i is not "time" and i is not "price": if i in emo: row.append(emo[i]) else: row.append(0) # print(row) f.writerow(row) test_file.close() print("done")
def menu_sort(expenses): """ Launch the menu that corresponds to the 'sort' command :param expenses: The list of expenses to sort """ options = ['Day', 'Category'] criterion = Parser.choose(options, "Sort by:") if criterion is 'Day': valid_days = Functions.get_all_days( expenses) # get all the unique entries for days day = Parser.get_input_of_type(int, "Please choose a day:") if day not in valid_days: print("There are no entries for day {}".format(str(day))) return Functions.sort_by(expenses, [day]) elif criterion is 'Category': valid_categories = Functions.get_all_categories( expenses) # get all the unique entries for categories category = Parser.choose(valid_categories, "Please pick a category:") Functions.sort_by(expenses, [category])
def menu_filter(expenses): """ Launch the menu that corresponds to the 'filter' command :param expenses: The list of expenses to filter """ categories = Functions.get_all_categories(expenses) category = Parser.choose(categories, "Please choose a category:") restrictions = ['No restrictions', 'Add restriction'] restriction = Parser.choose(restrictions, "Please choose any further restriction:") if restriction is 'No restrictions': Functions.filter(expenses, [category]) elif restriction is 'Add restriction': operators = ['<', '>', '='] operator = Parser.choose(operators, "Please pick an operator:") value = Parser.get_input_of_type(int, "Please pick a value to compare to:") Functions.filter(expenses, [category, operator, value])
def alpha_words(name): # open dictionary source = 'Emotion\\' json_pattern = os.path.join(source, '*.json') file_list = glob.glob(json_pattern) # for each dictionary for file_emo in file_list: load_file = json.load(open(file_emo, 'r')) # print(load_file) # read in any json file that comes in/ using glob for filename pattern matching source = 'Tracker\\'+name json_pattern = os.path.join(source, '*.json') file_list = glob.glob(json_pattern) for file in file_list: # open each file in file list target_doc = open(file, 'r') Functions.num_of_tweets(file) res = [] # for each line in document for lines in target_doc: word_list = [] line = lines.lower() word = Functions.preprocess(line) for i in word: # append to word list to be sorted word_list.append(i) # sort each line in alphabetic order word_list.sort() #print(word_list) # check if the dictionary word is in the list dict_words = load_file["words"][0] if len(dict_words) == 0: id_ = load_file["id"] wrd = 0 di_ct = {id_: wrd} res.append(di_ct) else: for X in dict_words: # print(X) if Functions.binary_search(word_list, X) is True: id_ = load_file["id"] wrd = dict_words[X] di_ct = {id_: wrd} # print(di_ct) res.append(di_ct) else: id_ = load_file["id"] wrd = 0 di_ct = {id_: wrd} res.append(di_ct) Functions.counting(res, file,"19-02-2016",200,name)
def command_interface(): """ The main user interface. """ os.system('clear') expenses = Expense.initialize_list() # Initial list with sample values expenses_history = [copy.deepcopy(expenses)] options = { 'add': Functions.add, 'insert': Functions.insert, 'remove': Functions.remove, 'list': Functions.list_elements, 'sum': Functions.sum_category, 'max': Functions.max_per_day, 'maxday': Functions.max_day, 'sort': Functions.sort_by, 'filter': Functions.filter, 'undo': None, 'exit': menu_exit } while True: print("\n MAIN MENU \n\n" " Please input a command \n\n") user_input = input("--> ") os.system('clear') operands = user_input.split() if operands[0] not in options.keys(): print("Invalid command '" + str(operands[0]) + "'") elif operands[0] == 'undo': expenses = Functions.undo( expenses_history ) # treat undo separately because of the history change else: options[operands[0]](expenses, operands[1:]) if expenses != expenses_history[ -1]: # only append if there was a change expenses_history.append( copy.deepcopy(expenses) ) # append a copy of current expenses to the history
def emotion_measure(name): # read in any json file that comes in/ using glob for filename pattern matching source = 'Tracker\\'+name json_dir = source json_pattern = os.path.join(json_dir, '*.json') file_list = glob.glob(json_pattern) for file in file_list: print(file) target_doc = open(file, 'r') res = [] time_stamp = Functions.time_stamp(file) num_tweets = Functions.num_of_tweets(file) Functions.num_of_tweets(file) for lines in target_doc: # print(lines) line = lines.lower() word = twitterstreamV2.preprocess(line) source = 'Emotion\\' json_dir = source json_pattern = os.path.join(json_dir, '*.json') file_list = glob.glob(json_pattern) for file_emo in file_list: emo_doc = open(file_emo, 'r') load_file = json.load(emo_doc) for wd in word: # checks if the word exists in the dictionary and prints it out dict_words = load_file["words"][0] if wd in dict_words: id_ = load_file["id"] wrd = dict_words[wd] di_ct = {id_: wrd} res.append(di_ct) #print(wd) #print(di_ct) else: id_ = load_file["id"] wrd = 0 di_ct = {id_: wrd} res.append(di_ct) target_doc.close() #print(res) Functions.counting(res, file, time_stamp, num_tweets, name)
def menu_interface(): """ The main menu-based user interface """ os.system('clear') expenses = Expense.initialize_list() expenses_history = [copy.deepcopy(expenses)] menu_options = [ "Add an element to the list", "Insert an element to the list", "List elements", "Sort elements", "Filter out certain elements", "Get maximum", "Sum elements", "Undo", "Exit" ] functions = { menu_options[0]: menu_add, menu_options[1]: menu_insert, menu_options[2]: menu_list, menu_options[3]: menu_sort, menu_options[4]: menu_filter, menu_options[5]: menu_max, menu_options[6]: menu_sum, menu_options[7]: None, menu_options[8]: menu_exit } while True: function_to_call = functions[Parser.choose( menu_options, "Please choose one of the following options:")] os.system('clear') if function_to_call is None: expenses = Functions.undo(expenses_history) continue function_to_call(expenses) if expenses != expenses_history[-1]: expenses_history.append(copy.deepcopy(expenses))
def on_demand_process(): input_data = input("\n\n" + "="*20 + "\nThe On Demand Mode:\nType the name of the project, the version (if you have one) and if it's a Nuget Package or a dll/exe\nIf the package \ in the version exists it will be downloaded, if no version is given all versions will be downloaded\nExamples: \ project-1.1.8.nupkg - project.2.2.0.dll - project-1.3.2.exe - project\nInput:\n") start = time.time() inst = c.Library(input_data,"No Hash") needed_versions = inst.get_needed_downloads() if not needed_versions: print("This Package has no versions in Nuget") else: print(inst.clean_package_name, needed_versions) for version in needed_versions: f.download_package(inst.pure_name,version) downloaded_package_name = inst.pure_name + '.' + version if inst.is_package: # calculate the .nupkg files hash print(downloaded_package_name, f.hash_calculator(v.default_path_to + downloaded_package_name + v.package_suffix)) else: # extract and calculate + the .nupkg hash f.extract_package(downloaded_package_name + v.package_suffix) print (downloaded_package_name, f.hash_calculate_directory(downloaded_package_name), "Package Hash-" + f.hash_calculator(v.default_path_to + downloaded_package_name + v.package_suffix)) end = time.time() print('\nExecution time: ' + str(end - start)) on_demand_process()
def emotion_analysis(self): try: name = self.name word_list = self.word_list num_tweets = self.num_tweets res = self.res file = self.file time_stamp = self.time_stamp # open dictionary and order words alphabetically dict_file = self.dictionary dictionary = open(dict_file, "r") di_ct = json.load(dictionary) ordered = Functions.word_order(di_ct) # obtain bigrams for detecting negation grams = list(bigrams(word_list)) negative_flag = False ; intensifier_flag = False intensi = [] ; negate = [] # check for negating of intensifier words for i in grams: # print(i) for n in negation_list: if n in i: negate.append(i) negative_flag = True for n in intensifier_list: if n in i: intensi.append(i) intensifier_flag =True #print(intensi) for x in word_list: if Functions.binary_search(self, ordered, x) is True: if negative_flag is True: for i in negate: if x in i: pass pass else: wrd =0 if di_ct[x]: i_d = di_ct[x][1] if intensifier_flag is True: for i in intensi: if x in i: score = (intensifier_list[i[0]]) wrd = di_ct[x][0] + score intensifier_flag = False else: wrd = di_ct[x][0] else: wrd = di_ct[x][0] new_d = {i_d: abs(wrd)} res.append(new_d) ''' # accommodate for stemmed words, future development ------------------- elif Functions.binary_search(self,ordered,stemmer.stem(x))is True: x_stem = stemmer.stem(x) # handles negation and intensifier occurrence if negative_flag is True: # negation_counter += 1 pass else: if di_ct[x_stem]: # print(x) i_d = di_ct[x_stem][1] if intensifier_flag is True: wrd = di_ct[x_stem][0] + 1 intensifier_flag = False else: wrd = di_ct[x_stem][0] new_d = {i_d: abs(wrd)} res.append(new_d) # print(new_d) ''' Functions.reduce(self, res, file, time_stamp, num_tweets,name) return res except BaseException as e: print ("emotion analysis error: ", e)
def emotion_analysis(self,word_list,file,time_stamp,num_tweets): try: name = self.name # open dictionary and order words alphabetically dict_file = self.dictionary dictionary = open(dict_file, "r") di_ct = json.load(dictionary) ordered = Functions.word_order(di_ct) # obtain bigrams for detecting negation grams = list(bigrams(word_list)) negative_flag = False intensifier_flag = False Word_found = False print(grams) intensi = [] negate = [] for i in grams: for n in negation_list: if n in i: negate.append(i) for n in intensifier_list: if n in i: intensi.append(i) print("this is x :", intensi) print(negate) print("________________________") # print(word_list) for x in word_list: # Check for any negative words or intensifiers # print(x, stemmer.stem(x)) for i in grams: if x in i: # check for negation for n in negation_list: if n not in i: pass else: negative_flag = True # check for intensifier for n in intensifier_list: if n not in i: pass else: intensifier_flag = True # check if word or stem of word is in dictionary if Functions.binary_search(self, ordered, x) is True: if negative_flag is True: count = 1 negation_counter.append(count) pass else: if di_ct[x]: i_d = di_ct[x][1] if intensifier_flag is True: wrd = di_ct[x][0] + 1 intensifier_flag = False else: wrd = di_ct[x][0] new_d = {i_d: abs(wrd)} res.append(new_d) # print(new_d) elif Functions.binary_search(self,ordered,stemmer.stem(x))is True: x_stem = stemmer.stem(x) # handles negation and intensifier occurrence if negative_flag is True: # negation_counter += 1 pass else: if di_ct[x_stem]: # print(x) i_d = di_ct[x_stem][1] if intensifier_flag is True: wrd = di_ct[x_stem][0] + 1 intensifier_flag = False else: wrd = di_ct[x_stem][0] new_d = {i_d: abs(wrd)} res.append(new_d) # print(new_d) # print("number of tweets :" , num_tweets) # Company.create_graph(self,name) # Company.create_csv(self, name) # dictionary.close() Functions.counting(self, res, file, time_stamp, num_tweets,name) return res except BaseException as e: print ("emotion analysis error: ", e)
import json from Modules import Functions from nltk.stem.snowball import SnowballStemmer ''' word = Word("good").get_synsets(pos=NOUN) chosen = word[1].lemma_names() print(chosen) ''' stemmer = SnowballStemmer("english") dict_file = "C:/Users/MOYIN/Desktop/Flask/WebSc/Emotion_Dictionary_ORG.json" dictionary = open(dict_file, "r") di_ct = json.load(dictionary) ordered = Functions.word_order(di_ct) words = ordered ''' for i in di_ct: print(di_ct[i]) ''' # source: https://gist.github.com/cdtavijit/431135aa6da53d47bc72 def synonym_finder(specific_word, synonymList): word = Word(specific_word) for i,j in enumerate(word.synsets): # print "Synonyms:", ", ".join(j.lemma_names()) for x in range (len(j.lemma_names())):
inst = c.Library(file_hash_tuple[0], str( file_hash_tuple[1])) # create a "Library" class instance message = "Suspected" needed_versions = inst.get_needed_downloads() print(inst.pure_name + ': ', needed_versions) if not needed_versions: # if the package has no versions in Nuget message = "Package not found in Nuget" for package in hash_results: ############################### if inst.sha_1.lower() in hash_results[package]: ######## message = "Match found in: " + package ############# results.append(message) print(message) continue # to the next iteration (file,hash) tuple for version in needed_versions: found_match_flag = False f.download_package(inst.pure_name, version) downloaded_package_name = inst.pure_name + '.' + version print("\n" + downloaded_package_name + '------ Downloaded. Hashes:') if not downloaded_package_name in hash_results: if inst.is_package: # calculate the .nupkg files hash hash_results[downloaded_package_name] = [ f.hash_calculator(v.default_path_to + downloaded_package_name + v.package_suffix) ] else: # extract and calculate f.extract_package(downloaded_package_name + v.package_suffix) hash_results[ downloaded_package_name] = f.hash_calculate_directory( downloaded_package_name) + [ f.hash_calculator(v.default_path_to +
# ---------------------------------------------------------------------- import difflib from Modules import Functions as f from Modules import Parameters as p ZIPMatch = 0 CityMatch = 0 StreetMatch = 0 NoMatch = 0 ApproxMatch = 0 bestscore = 0 MatchData = [[]] #----------Loop through all companies-------- for company in p.companylist: # -----------Get SAP data---------- SAPData = f.retrieveSAPdata(company) SAPData.pop(0) f.printmatrix(SAPData, 2) if p.ExportSAPToFile == 1: f.exportSAPtocsv(SAPData, company) # -----------Get MKT data---------- MktData = f.retrievecompanydata(company) MktData.pop(0) f.printmatrix(MktData, 2) # -----------Get Match data---------- #-----------------Go through each record of the Mkt array and find the closest match in the SAP data------- #Loop through mkt data ZIPMatch = 0 CityMatch = 0