示例#1
0
def calculate_centroid(inverted_index: InvertedIndex.InvertedIndex, docs):
    result = list()
    size = len(inverted_index.index_array)
    index_array = inverted_index.index_array
    docs_score = ScoreCalculator.docs_score_calculator(index_array, docs)
    for i in range(len(docs_score)):
        weights_sum = 0
        for j in range(1, len(docs_score[i])):
            weights_sum += docs_score[i][j][1]
        result.append(weights_sum / size)
    return result, docs_score
示例#2
0
def score_calculator(invertedIndex: InvertedIndex, docs, query, k):
    query_doc = Document.Document(query, 1)
    query_score = ScoreCalculator.query_score_calculator(query_doc, docs)
    query_words = query_doc.words
    query_words = sorted(query_words)
    index_res = []
    for i in range(len(query_words)):
        loc = invertedIndex.find_word(query_words[i])
        if loc == -1:
            print("word " + query_words[i] + " can't be found in List!")
        else:
            index_res.append(invertedIndex.index_array[loc])
    docs_score = docs_score_calculator(index_res, docs)
    max_heap = CosinusCalculator.cosinus_max_heap_creator(
        query_score, docs_score)
    res = []
    if len(max_heap.heap) > 1 and len(max_heap.heap) > k:
        for i in range(k):
            res.append(max_heap.pop())
    elif 1 < len(max_heap.heap) < k + 1:
        for i in range(len(max_heap.heap)):
            res.append(max_heap.pop())
    return res
示例#3
0
import ScoreCalculator

sc = ScoreCalculator.ScoreCalculator("hoge", "hoge")

score = sc.get_best_music_list()
示例#4
0
def get_ipscore(log_file_name):
    
    # Database Connection
    database = MySQLdb.connect(HOST, USER, PASSWORD, DB_NAME)
    cursor = database.cursor()
    
    # Get system constants from DB
    N, Ni, Nu, Ns, t, L, h = ScoreCalculator.get_system_constants(cursor)


    # Open test log file and read line by line
    with open(log_file_name, "r") as log_file:
        for line in log_file:
            # Define all score values
            K = 0 # static score
            S = 0 # source score
            F = 0 # familiarity score
            T = 0 # traffic score
            V = 0 # variation score
            P = 0 # overall score
    
            tns = 0 # tns value
            tnd = 0 # tnd value
            n_days = 0 # number of days value
            out_log_id = -1 # log id
            
            
            ##
            ## 1 ## Read current log and get attributes
            ##
            # Check line attributes and size
            attributes = line.split(',')
            size = len(attributes)
            if size == LogParser.LINE_SIZE and attributes[8].strip() \
                    and '=,' not in line and '=""' not in line:
                # Get attributes of the current log
                date_time, date, time = LogParser.get_datetime(attributes)
                source_ip = LogParser.get_source(attributes)
                dest_ip, dest_port = LogParser.get_destination(attributes)
                action, service = LogParser.get_action_service(attributes)
                        
                # Parse time of the current log
                current_time = str(date_time.time()).split(':')
                hour = current_time[0].strip()
                minute = current_time[1].strip()
                second = current_time[2].strip()
                time_interval = hour + minute[0] + '0'
                        
                # Set time interval of the current log
                if int(minute[0]) == 5:
                    if int(hour) == 23:
                        time_interval += '_' + hour + str(59)
                    elif int(hour) < 9:
                        time_interval += '_0' + str(int(hour) + 1) + '00'
                    elif int(hour) >= 9:
                        time_interval += '_' + str(int(hour) + 1) + '00'
                else:
                    time_interval += '_' + hour + str(int(minute[0]) + 1) + '0'
                        
                #print date_time, source_ip, dest_ip, dest_port, action, service


                ##
                ## 2 ## Check time of current log and decide to calculate score
                ## If the log is in the first 10 minutes, insert it into
                ##      runtime_tmp table and training data
                ## Else, calculate a score for current log and do other insertions
                ##

                
                # Define default run time table values
                dec_action = 'booting'
                score = -1
                debug = 'booting'
                
                
                # Check time of current log to know whether it is in the first 10 min
                if time_interval != '0000_0010':
                    # Calculate static score
                    K = ScoreCalculator.calculate_static_score(cursor, K, source_ip, dest_ip, dest_port, service)

                    # Calculate source score
                    S = ScoreCalculator.calculate_source_score(cursor, S, service, dest_port, (str(date) + ' ' + str(time)), Ni, Nu, L)
                
                    # Calculate familiarity score
                    out_log_id, F = ScoreCalculator.calculate_familiarity_score(cursor, service, F, out_log_id, date, source_ip, dest_port, h, L)
                    
                    # Calculate traffic score
                    T = ScoreCalculator.calculate_traffic_score(cursor, database, T, service, dest_port, (str(date) + ' ' + str(time)), out_log_id, t)
                    
                    # Calculate variation score
                    V = ScoreCalculator.calculate_variation_score(cursor, V, tnd, source_ip, dest_port, (str(date) + ' ' + str(time)), Ns, L)
                 
                 
                    # Calculate overall score and set decision, score and debug values
                    P = K + (S+F+T+V)/4.0
                    if P > L/100.0:
                        dec_action = 'allow'
                    else:
                        dec_action = 'deny'
                    score = P
                    debug = str(K) + ';' + str(S) + ';' + str(F) + ';' + str(T) + ';' + str(V)
                    
                    
                ##
                ## 3 ## Insert current log into runtime_tmp table
                ##
                insert_tmp_query = insert_query = 'INSERT INTO runtime_tmp (datetime, srcip, dstip, port, service, action, score, debug) \
                    VALUES("' + (str(date) +' ' + str(time)) + '","' + source_ip + '","' + dest_ip + '",' + str(dest_port) \
                        + ',"' + service + '","' + dec_action + '",' + str(score) + ',"' + debug + '")'
                cursor.execute(insert_tmp_query)

                ##
                ## 4 ## Insert current log into training tables in DB
                ##
                # Insert current traffic into database
                log_id = LogParser.insert_traffic_into_db(cursor, source_ip, dest_ip, dest_port, service)
            
                # Increase count of the current log
                LogParser.update_log_count(cursor, log_id, date, time_interval)


    # Close database connection
    database.close()
示例#5
0
import FetchDocument
import InvertedIndex
import ChampionList
import ScoreCalculator

if __name__ == '__main__':
    docs = FetchDocument.getAllDocuments2()
    inverted_index = InvertedIndex.InvertedIndex()
    for i in range(len(docs)):
        for j in range(len(docs[i].words)):
            inverted_index.add_id(docs[i].words[j], docs[i].doc_id)
#    inverted_index.print_all()
#    k = input("Please Enter K Value:\n")
    k = 10
#    r = input("Please Enter R Value:\n")
    r = 20
#    query = input("Please Enter your Query:\n")
    query = 'تراکتور'
    champion_list = ChampionList.champion_list_creator(inverted_index, docs, r)
#    champion_list.print_all()
    results = ScoreCalculator.score_calculator(champion_list, docs, query, k)
    print(results)
示例#6
0
exDir = os.getcwd()
LastUploadDate_file = exDir + '/LastUploadDate.txt'
exDir += '/Scavenger Hunt'

#print exDir

LastUploadTime = os.path.getmtime(LastUploadDate_file)
newPhotosDetected = False

for dir in os.listdir(exDir):
    #We can exclude any directories here.
    if (os.path.isfile(os.path.join(exDir, dir)) == False):
        #print exDir + '/' + dir + ':'
        location = exDir + '/' + dir
        for file in os.listdir(location):
            fileLocation = location + '/' + file
            fileTime = os.path.getmtime(fileLocation)
            if fileTime > LastUploadTime:
                text = file.split(
                    '.', 1
                )[0] + ' Just earned another point uploading a picture of: ' + dir.split(
                    '-', 1)[1] + ' #ScavengerHunt #CRASH #FindTheThing'
                print text
                InstagramAPI.uploadPhoto(fileLocation, caption=text)
                newPhotosDetected = True

if newPhotosDetected == True:
    with open(LastUploadDate_file, 'wr') as file:
        file.write(' ')
        ScoreCalculator.mainStuff(exDir)
示例#7
0
    for i in range(len(docs)):
        for j in range(len(docs[i])):
            full_docs.append(docs[i][j])

    centroids = list()
    indexes_scores = list()

    query = 'آزمایش'
    # query = input("Please Enter your Query:\n")

    query_scores = list()
    for i in range(len(docs)):
        centroid, docs_scores = calculate_centroid(inverted_indexes[i],
                                                   docs[i])
        centroids.append(centroid)
        indexes_scores.append(docs_scores)
        query_scores.append(
            ScoreCalculator.query_score_calculator(Document.Document(query, 1),
                                                   docs[i]))

    best_index = find_best_cluster_index(inverted_indexes, centroids,
                                         query_scores)

    #    k = input("Please Enter K Value:\n")
    k = 10

    results = ScoreCalculator.score_calculator(inverted_indexes[best_index],
                                               docs[best_index], query, k)

    print(results)
示例#8
0
    import pickle
    with open(filename, 'rb') as fp:
        itemlist = pickle.load(fp)
    return itemlist


# Not preprocessed tweets
#tweets = readTweets("combined_tweets_swedish_serialized")
# Preprocessed tweets:
tweets = readTweets("cleaned_tweets")

analyzer = SentimentIntensityAnalyzer()

import ScoreCalculator

scores = ScoreCalculator.ScoreCalculator(tweets, analyzer)

scores.getScores()

# scores.printStats();

#scores.calculateMetrics()

#strategy = 'Vader lexicon unstemmed'
#scores.printMetricsTable(strategy)
#scores.printConfusionMatrix()

scores.confusionMatrix()

###########################################