class CarbonaraBros(): def __init__(self, relevant_threshold=0.8): self.fe = FeaturesExtractor() self.relevant_threshold = relevant_threshold self.tableClassifier = Classifier('models/table_classifier.h5') self.listClassifier = Classifier('models/list_classifier.h5') def processDom(self, dom): analysis = { 'table': { 'relevant': [], 'not_relevant': [], }, 'list': { 'relevant': [], 'not_relevant': [] } } # table for table in dom.xpath("//table"): features = self.fe.extract( table, selected=DefaultFeatures.table_selected, features_descriptor=DefaultFeatures.table) features_array = self.fe.toArray(features) probabilities = self.tableClassifier.classify(features_array) score = probabilities[1] if score >= self.relevant_threshold: analysis['table']['relevant'].append((score, table)) else: analysis['table']['not_relevant'].append((score, table)) lists = dom.xpath("//ul") lists = lists + dom.xpath("//ol") lists = lists + dom.xpath("//dl") for list in lists: features = self.fe.extract( list, selected=DefaultFeatures.list_selected, features_descriptor=DefaultFeatures.list) features_array = self.fe.toArray(features) probabilities = self.listClassifier.classify(features_array) score = probabilities[1] if score >= self.relevant_threshold: analysis['list']['relevant'].append((score, list)) else: analysis['list']['not_relevant'].append((score, list)) return analysis
def initialize(): index = 0 all_features = np.empty((data_size, feature_length)) all_output_labels = np.empty( (data_size, size_of_output_labels_vector) ) # loop through all words, call word_to_index[word] and assign it to np.zeros all_words = {} for filename in os.listdir(directory): if os.path.isdir(directory + "/" + filename): for filename2 in os.listdir(os.path.join(directory, filename)): if os.path.isdir(directory + "/" + filename + "/" + filename2): for filename3 in os.listdir( os.path.join(directory, filename, filename2)): if filename3 == "cache-file": with open( os.path.join(directory, filename, filename2, filename3), 'rd') as f: for line in f: audioFilePath = os.path.join( directory, filename, filename2, line.split(' ')[0]) word = line.split(' ')[1] startTime = float(line.split(' ')[2]) endTime = float(line.split(' ')[3]) with open(audioFilePath, 'rd') as a: timelinedWord = WordWithTimeline( word, startTime, endTime) try: all_features[ index, :] = FeaturesExtractor.getFeaturesFFT( timelinedWord, audioFilePath, feature_length) all_output_labels[ index, :] = np.zeros( size_of_output_labels_vector ) all_words[index] = word all_output_labels[ index, word_to_index[word]] = 1 index += 1 except ValueError: print("skipping word, all zeros") if index >= data_size: return all_output_labels, all_features, all_words
def print_result(result, color): for score, node in result: # 1° column: score score = round(score, 2) # 2° column: text d summary_length = 60 node_summary = node_text_summary(node, length=summary_length) node_summary = '"{}"'.format(node_summary) # 3° column: feature vector descriptor = DefaultFeatures.table if node.tag == "table" else DefaultFeatures.list selected = DefaultFeatures.table_selected if node.tag == "table" else DefaultFeatures.list_selected ft = FeaturesExtractor() features = ft.extract(node, selected=selected, features_descriptor=descriptor) features_array = ft.toArray(features) padding = " " * (summary_length - len(node_summary)) print(with_color(score, color=color), node_summary, padding, str(list(features_array)))
for candidate in candidates: feature_vectors.append( features_extractor.extract_features(tomogram, candidate)) # this sets each candidate's label labels.append(labeler.label(candidate)) return (candidates, feature_vectors, labels) #this is tuple of tuples of TiltedTemplates (each group has the same template_id) templates = TemplateGenerator.generate_tilted_templates() #save templates to files candidate_selector = CandidateSelector.CandidateSelector(templates) features_extractor = FeaturesExtractor.FeaturesExtractor(templates) #Training feature_vectors = [] #a label is a template_id, where 0 is junk labels = [] criteria = (Candidate.fromTuple(1, 0, 10, 10), Candidate.fromTuple(1, 2, 27, 18), Candidate.fromTuple(0, 0, 10, 28)) for i in range(TRAINING_SET_SIZE): # configuration for tomogram generation #with a set composition tomogram = TomogramGenerator.generate_tomogram_with_given_candidates(
os.makedirs(videoFeatureDir) for featureName in featureNameListNew: videoFeatureList = [] for i in range(0, 3): videoFeatureList.extend( np.loadtxt(featureName + "_feature_" + str(i))) np.savetxt(videoFeatureDir + os.sep + os.path.basename(featureName), videoFeatureList, newline=" ") if __name__ == '__main__': starttime = datetime.datetime.now() fe.FeaturesExtractor( r"/home/sunbite/MFSSEL/keyframe_not_on_spark/", r"/home/sunbite/MFSSEL/features_not_on_spark/").featuresExtractor() # fe.FeaturesExtractor( # r"/home/sunbite/MFSSEL/keyframe_not_on_spark/", # r"/home/sunbite/MFSSEL/features_not_on_spark/").getAllVideoFeature() endtime = datetime.datetime.now() print( '----------------------------------------------------------------------------' ) print( '----------------------------------------------------------------------------' ) print( '-------------FeaturesExtractor Running time: %s Seconds--------------' % (endtime - starttime).seconds)
# set random seed seed(random_seed) set_random_seed(random_seed) ### data load ### logger.info('load data') (x_train, y_train), (x_test, y_test) = load_data(dataset) # create the training and test data with normal class only for training CAE's x_train_normal, x_test_normal = get_normal_data(x_train, y_train, x_test, y_test, normal_class) ### make features_extractor model and extract features for training and test dataset images logger.info('extract features') if features_extractor == 'cae': features_train, features_test, featuresExtractTime = FeaturesExtractor.cae( cae_type, x_train_normal, x_test_normal, x_test) else: featuresExtractTime = 0 features_train, features_test = FeaturesExtractor.raw( x_train_normal, x_test) ### anomaly detection logger.info('anomaly detection: calculate anomaly scores and auc') if anomaly_detection == 'ocsvm': scores, anomalyDetectTime = AnomalyDetection.ocsvm(features_train, features_test) labels_test = y_test.flatten() == normal_class elif anomaly_detection == 'nnd': scores, anomalyDetectTime = AnomalyDetection.nnd(features_train, features_test, k) labels_test = y_test.flatten() != normal_class
else: if word not in word_to_index: word_to_index[word] = max( word_to_index.values()) + 1 index_to_word[ max(word_to_index.values()) + 1] = word startTime = float(line.split(' ')[2]) endTime = float(line.split(' ')[3]) with open(audioFilePath, 'rd') as a: timelinedWord = WordWithTimeline( word, startTime, endTime) features = FeaturesExtractor.getFeatures( timelinedWord, audioFilePath) #print(audioFilePath) print(word) #print("Start time " + str(startTime)) #print("End time " + str(endTime)) #print("Feature size: " + str(features.size)) if word not in wordToIndex: wordToIndex[word] = wordIndex indexToWord[wordIndex] = word wordIndex += 1 wordToIndexFile.write(word + "\n") #!!!!!!!!!!!!!!!!!!!!!!! Do SOMETING WITH FEATURES HERE !!!!!!!!!!!!!!!!!!!!!!! # They have a variable size so we probably need to do something about that
def __init__(self, relevant_threshold=0.8): self.fe = FeaturesExtractor() self.relevant_threshold = relevant_threshold self.tableClassifier = Classifier('models/table_classifier.h5') self.listClassifier = Classifier('models/list_classifier.h5')
with open(os.path.join(directory, "cache-file"), 'rd') as f: while numOfWords > 0: line = f.readline() words[5 - numOfWords] = line.split(' ')[1] audioFilePath = os.path.join(directory, line.split(' ')[0]) startTime = float(line.split(' ')[2]) endTime = float(line.split(' ')[3]) # with open(audioFilePath, 'rd') as a: timelinedWord = WordWithTimeline( line.split(' ')[1], startTime, endTime) features[5 - numOfWords, :] = FeaturesExtractor.getFeaturesFFT( timelinedWord, audioFilePath, feature_length) numOfWords -= 1 for i in range(5): outputs[i] = generateDicArray(words, words[i]) # print(words) # print(outputs) # print(features) n_classes = 5 x = int(np.sqrt(feature_length)) layer_1 = Convolutional(input_shape=(x, x, 1),
videoFeatureList.extend( np.loadtxt(featureName + "_feature_" + str(i))) np.savetxt(videoFeatureDir + os.sep + os.path.basename(featureName), videoFeatureList, newline=" ") if __name__ == '__main__': starttime = datetime.datetime.now() # fe.FeaturesExtractor( # r"/home/sunbite/keyframe/", # r"/home/sunbite/features_new_1").featuresExtractor() fe.FeaturesExtractor( r"/home/sunbite/MFSSEL/keyframe/", r"/home/sunbite/MFSSEL/features_new_1/").getAllVideoFeature() endtime = datetime.datetime.now() print( '----------------------------------------------------------------------------' ) print( '----------------------------------------------------------------------------' ) print( '-------------FeaturesExtractor Running time: %s Seconds--------------' % (endtime - starttime).seconds) print( '----------------------------------------------------------------------------' ) print(