Exemplos de downsample em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: hare.downsample

Método / Função: downsample

Exemplos em hotexamples.com: 2

downsample em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de hare.downsample.downsample em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Exemplo n.º 1

0

Exibir arquivo

def train(self, texts: List[str], target: List[int]) -> None: from tensorflow.python.keras.models import Sequential #type: ignore from tensorflow.python.keras.layers import Embedding, Dense, LSTM, GlobalMaxPool1D #type: ignore from tensorflow.keras.optimizers import Adam #type: ignore from tensorflow.keras.callbacks import History #type: ignore if self.downsampling: texts, target = downsample(texts, target, self.downsampling_ratio) if self.verbose: print('1. Vectorizing texts') NUMBER_OF_FEATURES: int = 20000 self.tokenizer = text.Tokenizer(num_words=NUMBER_OF_FEATURES) self.tokenizer.fit_on_texts(texts) vocabulary: Dict[str, int] = self.tokenizer.word_index if self._max_sequence_length == 0: self._max_sequence_length = len(max(texts, key=len)) vectorized_texts: array = self.vectorize_texts(texts) if self.embedding_location == '': if self.verbose: print('2. Skip (no embeddings)') print('3. Skip (no embeddings)') else: if self.verbose: print('2. Loading word embeddings') embedding_dictionary: Dict[ str, List[float]] = load_embedding_dictionary( self.embedding_location) nr_of_embedding_features: int = len( list(embedding_dictionary.values()) [1]) # Check how many values we have for the first word if self.verbose: print('3. Creating embedding matrix') embedding_matrix: array = create_embedding_matrix_for_vocabulary( embedding_dictionary, vocabulary) if self.verbose: print('4. Building up model') #Define a simple LSTM model with a pretrained embedding layer model: Sequential = Sequential() if self.embedding_location == '': #Add an empty embedding layer if we have no pretrained embeddings EMPTY_EMBEDDING_LAYER_SIZE: int = 300 model.add( Embedding(len(vocabulary) + 1, EMPTY_EMBEDDING_LAYER_SIZE)) else: model.add( Embedding(input_dim=len(vocabulary) + 1, output_dim=nr_of_embedding_features, input_length=vectorized_texts.shape[1], weights=[embedding_matrix], trainable=False)) model.add(LSTM(16, return_sequences=True)) model.add(LSTM(16, return_sequences=True)) model.add(LSTM(16, return_sequences=True)) model.add(GlobalMaxPool1D()) model.add(Dense(256)) model.add(Dense(256)) model.add(Dense(1, activation='sigmoid')) #Compile the model optimizer: Adam = Adam(lr=self.learning_rate) model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['acc']) if self.verbose: print('5. training the model') history: History = model.fit( vectorized_texts, target, epochs=self.learning_epochs, #validation_data=(test_vectors, test_target), verbose=1, # Logs once per epoch. batch_size=self.learning_batch_size) self.model = model

Exemplo n.º 2

0

Exibir arquivo

def train(self, texts: List[str], target: List[int]) -> None: from tensorflow.python.keras.models import Model #type: ignore from tensorflow.python.keras.layers import Input, Embedding, GRU, Dense, Bidirectional, GlobalMaxPool1D, concatenate #type: ignore from tensorflow.keras.optimizers import Adam #type: ignore from tensorflow.keras.callbacks import History #type: ignore if self.downsampling: texts, target = downsample(texts, target, self.downsampling_ratio) if self.verbose: print('1. Vectorizing texts') NUMBER_OF_FEATURES: int = 20000 self.tokenizer = text.Tokenizer(num_words=NUMBER_OF_FEATURES) self.tokenizer.fit_on_texts(texts) vocabulary: Dict[str, int] = self.tokenizer.word_index if self._max_sequence_length == 0: self._max_sequence_length = len(max(texts, key=len)) vectorized_texts: array = self.vectorize_texts(texts) if self.include_casing_information: casing_information: array = self.texts_to_casing_information(texts) if self.embedding_location == '': if self.verbose: print('2. Skip (no embeddings)') print('3. Skip (no embeddings)') else: if self.verbose: print('2. Loading word embeddings') embedding_dictionary: Dict[ str, List[float]] = load_embedding_dictionary( self.embedding_location) nr_of_embedding_features: int = len( list(embedding_dictionary.values()) [1]) # Check how many values we have for the first word if self.verbose: print('3. Creating embedding matrix') embedding_matrix: array = create_embedding_matrix_for_vocabulary( embedding_dictionary, vocabulary) if self.verbose: print('4. Building up model') #Define a simple BiGru model with a pretrained embedding layer word_input: Input = Input(shape=(self._max_sequence_length, )) if self.embedding_location == '': #Add an empty embedding layer if we have no pretrained embeddings EMPTY_EMBEDDING_LAYER_SIZE: int = 300 layers = Embedding( len(vocabulary) + 1, EMPTY_EMBEDDING_LAYER_SIZE)(word_input) else: layers = Embedding(input_dim=len(vocabulary) + 1, output_dim=nr_of_embedding_features, input_length=vectorized_texts.shape[1], weights=[embedding_matrix], trainable=False)(word_input) #Add a separate 'entrance' for the casing information if self.include_casing_information: word_model: Model = Model(inputs=word_input, outputs=layers) casing_input: Input = Input(shape=(self._max_sequence_length, 1)) casing_model: Model = Model(inputs=casing_input, outputs=casing_input) layers = concatenate([word_model.output, casing_model.output]) if self.bidirectional: layers = Bidirectional( GRU(16, activation='tanh', return_sequences=True))(layers) layers = Bidirectional( GRU(16, activation='tanh', return_sequences=True))(layers) else: layers = GRU(16, activation='tanh', return_sequences=True)(layers) layers = GRU(16, activation='tanh', return_sequences=True)(layers) layers = GlobalMaxPool1D()(layers) layers = Dense(256)(layers) layers = Dense(256)(layers) layers = Dense(1, activation='sigmoid')(layers) if self.include_casing_information: model: Model = Model([word_model.input, casing_model.input], layers) else: model: Model = Model(word_input, layers) #Compile the model optimizer: Adam = Adam(lr=self.learning_rate) model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['acc']) if self.verbose: print('5. training the model') if self.include_casing_information: input = [vectorized_texts, casing_information] else: input = vectorized_texts history: History = model.fit( input, target, epochs=self.learning_epochs, #validation_data=(test_vectors, test_target), verbose=1, # Logs once per epoch. batch_size=self.learning_batch_size) self.model = model