Exemplos de Preprocessor.preprocess em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: preprocessing.preprocessor

Classe / Tipo: Preprocessor

Método / Função: preprocess

Exemplos em hotexamples.com: 2

Preprocessor.preprocess em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de preprocessing.preprocessor.Preprocessor.preprocess em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

Preprocessor(15)

compress(2)

partition_for_training(2)

get_sensitive_terms(2)

get_from_json(2)

get_df(2)

find_redundant_information(2)

clean_textual_attributes(2)

analyze_textual_attributes(2)

add_filter(2)

normalize(2)

__return_class_names__(2)

__return_anchors__(2)

__load_image_data_thumbnails__(2)

preprocess(2)

__get_score_model__(2)

__generate_kijiji_set__(2)

__generate_colors_for_bounding_boxes__(2)

read_user_songs(2)

__create_output_directories__(2)

remove_outliers(2)

get_textual_attribute_mapping(2)

load_ecg(1)

process(1)

process_images(1)

read_songs(1)

load_mitbih(1)

load_extension(1)

CNN_preprocessor(1)

get_user_song_matrix(1)

get_songs_by_indices(1)

get_raw_data(1)

__return_image_names__(1)

__load_model_weights__(1)

__get_similar__(1)

__get_concatenated_images__(1)

__get_closest_images__(1)

__get_attributes_list__(1)

__flatten_img_data__(1)

__binarize__(1)

reset_file_reader(1)

Métodos Frequentes

Preprocessor (15)

compress (2)

partition_for_training (2)

get_sensitive_terms (2)

get_from_json (2)

get_df (2)

find_redundant_information (2)

clean_textual_attributes (2)

analyze_textual_attributes (2)

add_filter (2)

Métodos Frequentes

normalize (2)

__return_class_names__ (2)

__return_anchors__ (2)

__load_image_data_thumbnails__ (2)

preprocess (2)

__get_score_model__ (2)

__generate_kijiji_set__ (2)

__generate_colors_for_bounding_boxes__ (2)

read_user_songs (2)

__create_output_directories__ (2)

remove_outliers (2)

get_textual_attribute_mapping (2)

load_ecg (1)

process (1)

process_images (1)

read_songs (1)

load_mitbih (1)

load_extension (1)

CNN_preprocessor (1)

get_user_song_matrix (1)

Métodos Frequentes

remove_outliers (2)

get_textual_attribute_mapping (2)

load_ecg (1)

process (1)

process_images (1)

read_songs (1)

load_mitbih (1)

load_extension (1)

CNN_preprocessor (1)

get_user_song_matrix (1)

get_songs_by_indices (1)

get_raw_data (1)

__return_image_names__ (1)

__load_model_weights__ (1)

__get_similar__ (1)

__get_concatenated_images__ (1)

__get_closest_images__ (1)

__get_attributes_list__ (1)

__flatten_img_data__ (1)

__binarize__ (1)

reset_file_reader (1)

Métodos Frequentes

get_songs_by_indices (1)

get_raw_data (1)

__return_image_names__ (1)

__load_model_weights__ (1)

__get_similar__ (1)

__get_concatenated_images__ (1)

__get_closest_images__ (1)

__get_attributes_list__ (1)

__flatten_img_data__ (1)

__binarize__ (1)

reset_file_reader (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: csap.py Projeto: iahuang/csap

if __name__ == "__main__": csrc_path = argv[0] with open(csrc_path) as fl: avr_code = gcc.compile(fl.read()) with open("lib/avrheader.sap") as fl: avrheader = fl.read() translator = Translator(avrheader) sap = translator.to_sap(avr_code) with open("build/build.sap.superset", "w") as fl: fl.write(sap) proc = Preprocessor() proc.load_extension("ext/sapplus.json") sap = proc.preprocess(sap) with open("build/build.sap", "w") as fl: fl.write(sap) out = assemble(sap) if out.success: pass else: print("SAP output did not compile successfully")

Exemplo n.º 2

0

Exibir arquivo

Arquivo: main.py Projeto: JannisGz/Preprocessing20Newsgroups

def compare_preprocessing(): # Loading train and test data: all_categories = [ 'alt.atheism', 'comp.graphics', 'comp.os.ms-windows.misc', 'comp.sys.ibm.pc.hardware', 'comp.sys.mac.hardware', 'comp.windows.x', 'misc.forsale', 'rec.autos', 'rec.motorcycles', 'rec.sport.baseball', 'rec.sport.hockey', 'sci.crypt', 'sci.electronics', 'sci.med', 'sci.space', 'soc.religion.christian', 'talk.politics.guns', 'talk.politics.mideast', 'talk.politics.misc', 'talk.religion.misc' ] print("Loading 20 newsgroups...") newsgroups_train = fetch_20newsgroups(subset='train', remove=('headers', 'footers', 'quotes'), categories=all_categories) newsgroups_test = fetch_20newsgroups(subset='test', remove=('headers', 'footers', 'quotes'), categories=all_categories) print("{} training documents loaded.".format( newsgroups_train.filenames.shape[0])) print("Buidling Preprocessor combinations...") # flags: special_character_removal, number_removal, url_email_removal, stopword_removal, lower, stemming, lemmatize num_of_preprocessor_flags = 7 # Creates a list of all possible permutations of a boolean list with the length of number of flags booleans = [ False, True ] # Creates a list of all possible permutations of a boolean list flags_list = [ list(b) for b in itertools.product(booleans, repeat=num_of_preprocessor_flags) ] invalid_flags = [] for i in range(len(flags_list)): if flags_list[i][5] and flags_list[i][ 6]: # Removes simultaneous Stemming and Lemmatization invalid_flags.append(flags_list[i]) elif flags_list[i][5] and not flags_list[i][ 4]: # Remove Stemming without lowercase (lowercase is inbuilt) invalid_flags.append(flags_list[i]) flags_list = [x for x in flags_list if x not in invalid_flags] print("{} Combinations built.".format(len(flags_list))) # Initialize vectorizer, machine learning algorithm and data frame to store the results vectorizer = TfidfVectorizer(analyzer="word", tokenizer=dummy, lowercase=False, preprocessor=dummy, stop_words=None) clf = MultinomialNB(alpha=.01) columns = [ 'Special Character Removal', 'Number Removal', 'URL and E-Mail Removal', 'Stopword Removal', 'Lowercase', 'Stemming', 'Lemmatization', 'Unique Words', 'Accuracy' ] rows = [] for flags in flags_list: # loops through all combinations prep = Preprocessor(special_character_removal=flags[0], number_removal=flags[1], url_email_removal=flags[2], stopword_removal=flags[3], lower=flags[4], stemming=flags[5], lemmatize=flags[6]) preprocessed_train_data = [ prep.preprocess(d) for d in newsgroups_train.data ] preprocessed_test_data = [ prep.preprocess(d) for d in newsgroups_test.data ] vectors = vectorizer.fit_transform(preprocessed_train_data) # Train machine learning model clf.fit(vectors, newsgroups_train.target) # Transform test data to the model fitted to the training data vectors_test = vectorizer.transform(preprocessed_test_data) # Evaluate pred = clf.predict(vectors_test) vocab = vectors.shape[1] accuracy = metrics.accuracy_score(newsgroups_test.target, pred) rows.append([ flags[0], flags[1], flags[2], flags[3], flags[4], flags[5], flags[6], vocab, accuracy ]) print( "Spec: {} , Numbers: {} , EmailUrl: {} , SWR: {}, low: {}, Stem: {} , Lem: {} -> Vocab: {}, Acc: {}" .format(flags[0], flags[1], flags[2], flags[3], flags[4], flags[5], flags[6], vocab, accuracy)) # Organize data frame and save the results df = pd.DataFrame(np.array(rows), columns=columns) df = df.sort_values(by=['Accuracy'], ascending=False) pprint(df) df.to_csv('results.csv', sep=';')