def calculate_centroid(inverted_index: InvertedIndex.InvertedIndex, docs): result = list() size = len(inverted_index.index_array) index_array = inverted_index.index_array docs_score = ScoreCalculator.docs_score_calculator(index_array, docs) for i in range(len(docs_score)): weights_sum = 0 for j in range(1, len(docs_score[i])): weights_sum += docs_score[i][j][1] result.append(weights_sum / size) return result, docs_score
def score_calculator(invertedIndex: InvertedIndex, docs, query, k): query_doc = Document.Document(query, 1) query_score = ScoreCalculator.query_score_calculator(query_doc, docs) query_words = query_doc.words query_words = sorted(query_words) index_res = [] for i in range(len(query_words)): loc = invertedIndex.find_word(query_words[i]) if loc == -1: print("word " + query_words[i] + " can't be found in List!") else: index_res.append(invertedIndex.index_array[loc]) docs_score = docs_score_calculator(index_res, docs) max_heap = CosinusCalculator.cosinus_max_heap_creator( query_score, docs_score) res = [] if len(max_heap.heap) > 1 and len(max_heap.heap) > k: for i in range(k): res.append(max_heap.pop()) elif 1 < len(max_heap.heap) < k + 1: for i in range(len(max_heap.heap)): res.append(max_heap.pop()) return res
import ScoreCalculator sc = ScoreCalculator.ScoreCalculator("hoge", "hoge") score = sc.get_best_music_list()
def get_ipscore(log_file_name): # Database Connection database = MySQLdb.connect(HOST, USER, PASSWORD, DB_NAME) cursor = database.cursor() # Get system constants from DB N, Ni, Nu, Ns, t, L, h = ScoreCalculator.get_system_constants(cursor) # Open test log file and read line by line with open(log_file_name, "r") as log_file: for line in log_file: # Define all score values K = 0 # static score S = 0 # source score F = 0 # familiarity score T = 0 # traffic score V = 0 # variation score P = 0 # overall score tns = 0 # tns value tnd = 0 # tnd value n_days = 0 # number of days value out_log_id = -1 # log id ## ## 1 ## Read current log and get attributes ## # Check line attributes and size attributes = line.split(',') size = len(attributes) if size == LogParser.LINE_SIZE and attributes[8].strip() \ and '=,' not in line and '=""' not in line: # Get attributes of the current log date_time, date, time = LogParser.get_datetime(attributes) source_ip = LogParser.get_source(attributes) dest_ip, dest_port = LogParser.get_destination(attributes) action, service = LogParser.get_action_service(attributes) # Parse time of the current log current_time = str(date_time.time()).split(':') hour = current_time[0].strip() minute = current_time[1].strip() second = current_time[2].strip() time_interval = hour + minute[0] + '0' # Set time interval of the current log if int(minute[0]) == 5: if int(hour) == 23: time_interval += '_' + hour + str(59) elif int(hour) < 9: time_interval += '_0' + str(int(hour) + 1) + '00' elif int(hour) >= 9: time_interval += '_' + str(int(hour) + 1) + '00' else: time_interval += '_' + hour + str(int(minute[0]) + 1) + '0' #print date_time, source_ip, dest_ip, dest_port, action, service ## ## 2 ## Check time of current log and decide to calculate score ## If the log is in the first 10 minutes, insert it into ## runtime_tmp table and training data ## Else, calculate a score for current log and do other insertions ## # Define default run time table values dec_action = 'booting' score = -1 debug = 'booting' # Check time of current log to know whether it is in the first 10 min if time_interval != '0000_0010': # Calculate static score K = ScoreCalculator.calculate_static_score(cursor, K, source_ip, dest_ip, dest_port, service) # Calculate source score S = ScoreCalculator.calculate_source_score(cursor, S, service, dest_port, (str(date) + ' ' + str(time)), Ni, Nu, L) # Calculate familiarity score out_log_id, F = ScoreCalculator.calculate_familiarity_score(cursor, service, F, out_log_id, date, source_ip, dest_port, h, L) # Calculate traffic score T = ScoreCalculator.calculate_traffic_score(cursor, database, T, service, dest_port, (str(date) + ' ' + str(time)), out_log_id, t) # Calculate variation score V = ScoreCalculator.calculate_variation_score(cursor, V, tnd, source_ip, dest_port, (str(date) + ' ' + str(time)), Ns, L) # Calculate overall score and set decision, score and debug values P = K + (S+F+T+V)/4.0 if P > L/100.0: dec_action = 'allow' else: dec_action = 'deny' score = P debug = str(K) + ';' + str(S) + ';' + str(F) + ';' + str(T) + ';' + str(V) ## ## 3 ## Insert current log into runtime_tmp table ## insert_tmp_query = insert_query = 'INSERT INTO runtime_tmp (datetime, srcip, dstip, port, service, action, score, debug) \ VALUES("' + (str(date) +' ' + str(time)) + '","' + source_ip + '","' + dest_ip + '",' + str(dest_port) \ + ',"' + service + '","' + dec_action + '",' + str(score) + ',"' + debug + '")' cursor.execute(insert_tmp_query) ## ## 4 ## Insert current log into training tables in DB ## # Insert current traffic into database log_id = LogParser.insert_traffic_into_db(cursor, source_ip, dest_ip, dest_port, service) # Increase count of the current log LogParser.update_log_count(cursor, log_id, date, time_interval) # Close database connection database.close()
import FetchDocument import InvertedIndex import ChampionList import ScoreCalculator if __name__ == '__main__': docs = FetchDocument.getAllDocuments2() inverted_index = InvertedIndex.InvertedIndex() for i in range(len(docs)): for j in range(len(docs[i].words)): inverted_index.add_id(docs[i].words[j], docs[i].doc_id) # inverted_index.print_all() # k = input("Please Enter K Value:\n") k = 10 # r = input("Please Enter R Value:\n") r = 20 # query = input("Please Enter your Query:\n") query = 'تراکتور' champion_list = ChampionList.champion_list_creator(inverted_index, docs, r) # champion_list.print_all() results = ScoreCalculator.score_calculator(champion_list, docs, query, k) print(results)
exDir = os.getcwd() LastUploadDate_file = exDir + '/LastUploadDate.txt' exDir += '/Scavenger Hunt' #print exDir LastUploadTime = os.path.getmtime(LastUploadDate_file) newPhotosDetected = False for dir in os.listdir(exDir): #We can exclude any directories here. if (os.path.isfile(os.path.join(exDir, dir)) == False): #print exDir + '/' + dir + ':' location = exDir + '/' + dir for file in os.listdir(location): fileLocation = location + '/' + file fileTime = os.path.getmtime(fileLocation) if fileTime > LastUploadTime: text = file.split( '.', 1 )[0] + ' Just earned another point uploading a picture of: ' + dir.split( '-', 1)[1] + ' #ScavengerHunt #CRASH #FindTheThing' print text InstagramAPI.uploadPhoto(fileLocation, caption=text) newPhotosDetected = True if newPhotosDetected == True: with open(LastUploadDate_file, 'wr') as file: file.write(' ') ScoreCalculator.mainStuff(exDir)
for i in range(len(docs)): for j in range(len(docs[i])): full_docs.append(docs[i][j]) centroids = list() indexes_scores = list() query = 'آزمایش' # query = input("Please Enter your Query:\n") query_scores = list() for i in range(len(docs)): centroid, docs_scores = calculate_centroid(inverted_indexes[i], docs[i]) centroids.append(centroid) indexes_scores.append(docs_scores) query_scores.append( ScoreCalculator.query_score_calculator(Document.Document(query, 1), docs[i])) best_index = find_best_cluster_index(inverted_indexes, centroids, query_scores) # k = input("Please Enter K Value:\n") k = 10 results = ScoreCalculator.score_calculator(inverted_indexes[best_index], docs[best_index], query, k) print(results)
import pickle with open(filename, 'rb') as fp: itemlist = pickle.load(fp) return itemlist # Not preprocessed tweets #tweets = readTweets("combined_tweets_swedish_serialized") # Preprocessed tweets: tweets = readTweets("cleaned_tweets") analyzer = SentimentIntensityAnalyzer() import ScoreCalculator scores = ScoreCalculator.ScoreCalculator(tweets, analyzer) scores.getScores() # scores.printStats(); #scores.calculateMetrics() #strategy = 'Vader lexicon unstemmed' #scores.printMetricsTable(strategy) #scores.printConfusionMatrix() scores.confusionMatrix() ###########################################