def writing_data_and_price_json_csv(): cars_data = [] cars_price = [] # count = 0 # count1 = 0 for i in range(len(page_saving.brands)): for j in range(20): content = tools.file_content( f'captured_data/{page_saving.brands[i]}/{page_saving.brands[i]}_page_{j+1}.html' ) for match_data in re.finditer( pattern_data, content): # for loop for appending car data # count += 1 # print(match_data.groupdict()) cars_data.append(match_data.groupdict()) for match_price in re.finditer( pattern_price, content): # for loop for appending car prices # count1 += 1 cars_price.append(match_price.groupdict()) # print(count, count1) tools.write_csv(cars_price, ['price'], 'cars_price.csv') tools.write_json(cars_price, 'cars_price.json') tools.write_json(cars_data, 'cars_data.json') tools.write_csv(cars_data, [ 'brand', 'model', 'first_registration', 'kilometers', 'engine', 'transmission' ], 'cars_data.csv')
def model_evaluation(path_to_model_dir, path_to_data_file, save_path, save_name, output_dims=2): # load data test_data = get_ndarray(path_to_data_file) # load perplexities perp = int((path_to_model_dir / "perplexity.txt").read_text()) # get transform transform = load_and_transform((path_to_model_dir / "model"), test_data, perp, output_dims) # write it out write_csv(transform, (save_path / save_name))
def run_standard_ptsne(test_name_path, train_data_path, test_data_path, output_dims=2): # create a folder with the test_name os.mkdir(str(test_name_path)) # get relevant data raw_data = get_ndarray(train_data_path) test_data = get_ndarray(test_data_path) input_dims = len(raw_data[0]) perplexity = 30 ptsne = Parametric_tSNE(input_dims, output_dims, perplexity) loss = ptsne.fit(raw_data, verbose=False) tform = ptsne.transform(test_data) # write out data write_loss(test_name_path, loss) write_csv(tform, (test_name_path / "tform.csv")) ptsne.clear_session()
def run_generation(test_data, gene_pool, resident_directory, genetic_helper, input_dims, output_dims, generation_name, log=False, save_model=True): for child_num, dna in [(i + 1, j) for (i, j) in enumerate(gene_pool)]: child_name = "child_" + str(child_num) # make a folder for the child child_dir = resident_directory / child_name os.mkdir(str(child_dir)) loss, model, tform = train_child(dna, genetic_helper, test_data, input_dims, output_dims, log) tools.write_loss(child_dir, loss) tools.write_dna(child_dir, dna) tools.write_csv(tform, (child_dir / "tform.csv")) if save_model: model.save_model(str(child_dir / "model")) if log: print(generation_name, " ", child_name, " trained") model.clear_session()
# go through our flat 40 data # use the info we have to retrain a model of that shape and structure # get a tranform and save it TARGET_FOLDER = Path.cwd( ) / "TestData" / "half_curve_layer_swap_40" / "generation_30" SAVE_FOLDER = Path.cwd() / "Bred40_Tforms" TRAINING_DATA = Path.cwd() / "RBMTrainingDataset" / "training_set.csv" TEST_DATA = Path.cwd() / "RBMTrainingDataset" / "2018_data_eos.csv" test_data = tools.get_ndarray(TEST_DATA) input_dims = len(test_data[0]) train_data = tools.get_ndarray(TRAINING_DATA) output_dims = 2 gh = G.Genetics() for child in [c for c in TARGET_FOLDER.iterdir() if c.is_dir()]: # get the perp and layer sturcure for this child dna_string = tools.read_dna(child) perp, layers = gh.decode_dna(dna_string, legacy_dna=True) # train this model ptsne = Parametric_tSNE(input_dims, output_dims, perp, all_layers=layers) _ = ptsne.fit(train_data, verbose=False) transform = ptsne.transform(test_data) save_path = SAVE_FOLDER / (child.name + "_tform.csv") tools.write_csv(transform, save_path) ptsne.clear_session()
def measure_layer(layer, x): global count_ops, count_params, module_number, modules_flops global modules_params, to_print,flops_file_path1 delta_ops = 0 delta_params = 0 multi_add = 1 if to_print: print("") type_name = get_layer_info(layer) ### ops_conv if type_name in ['Conv2d']: out_h = int((x.size()[2] + 2 * layer.padding[0] - layer.kernel_size[0]) / layer.stride[0] + 1) out_w = int((x.size()[3] + 2 * layer.padding[1] - layer.kernel_size[1]) / layer.stride[1] + 1) delta_ops = layer.in_channels * layer.out_channels * layer.kernel_size[0] * \ layer.kernel_size[1] * out_h * out_w / layer.groups * multi_add delta_params = get_layer_param(layer) if hasattr(layer, 'shared'): delta_params = delta_params / int(layer.shared) # module_number += 1 # modules_flops.append(delta_ops) # modules_params.append(delta_params) # if to_print: # print(layer) # print("Module number: ", module_number) # print("FLOPS:", delta_ops) # print("Parameter:", delta_params) ### ops_nonlinearity elif type_name in ['ReLU']: delta_ops = x.numel() delta_params = get_layer_param(layer) # module_number += 1 # modules_flops.append(delta_ops) # to_print: # print(layer) # print("Module number: ", module_number) # print("FLOPS:", delta_ops) ### ops_pooling elif type_name in ['AvgPool2d']: in_w = x.size()[2] kernel_ops = layer.kernel_size * layer.kernel_size out_w = int((in_w + 2 * layer.padding - layer.kernel_size) / layer.stride + 1) out_h = int((in_w + 2 * layer.padding - layer.kernel_size) / layer.stride + 1) delta_ops = x.size()[0] * x.size()[1] * out_w * out_h * kernel_ops delta_params = get_layer_param(layer) module_number += 1 # if to_print: # print(layer) # print("Module number: ", module_number) # print("Overrall FLOPS so far:", count_ops) elif type_name in ['AdaptiveAvgPool2d']: delta_ops = x.size()[0] * x.size()[1] * x.size()[2] * x.size()[3] delta_params = get_layer_param(layer) ### ops_linear elif type_name in ['Linear']: weight_ops = layer.weight.numel() * multi_add bias_ops = layer.bias.numel() delta_ops = x.size()[0] * (weight_ops + bias_ops) delta_params = get_layer_param(layer) if to_print: print(layer) print("Module number: ", module_number) print("Overrall FLOPS so far:", count_ops) #### save to the file print(flops_file_path1) # write_file(flops_file_path1,str(module_number)+','+str(count_ops)) write_csv(flops_file_path1, [str(module_number),str(count_ops)]) ### ops_nothing elif type_name in ['BatchNorm2d', 'Dropout2d', 'DropChannel', 'Dropout']: delta_params = get_layer_param(layer) ### unknown layer type else: raise TypeError('unknown layer type: %s' % type_name) count_ops += delta_ops count_params += delta_params layer.flops = delta_ops layer.params = delta_params return
import tools data = tools.read_csv(tools.path_vendas) id = tools.write_csv(tools.path_vendas, data) print( "sua ordem de serviço é: %05d\nGuarde o número, pois somente com ele, você conseguirá acompanhar o seu chamado" % id)
def select(select_object, candidates_object): if not os.path.isfile(select_object['path']): log.logger.critical('Cannot find master image: ' + select_object['path']) exit(1) log.logger.info('Working on ' + select_object['qr'] + ' with database id: ' + str(select_object['Id'])) match = [] for c in candidates_object: r = restore.extractTar(c['id']) if not os.path.isfile(r): log.logger.critical('Cannot find restore image: ' + r) exit(1) image.convertToJpeg(c['filename'], '/tmp/') match.append( image.matchHistogram(select_object['path'], '/tmp/' + c['name'] + '.jpg')) os.remove('/tmp/' + c['name'] + '.jpg') m = match.index(min(match)) #print 'THE MATCH VALUE IS!!!: ' + str(min(match)) #print select_object #print candidates_object[m] if select_object['scan_date'] is None: put_dir = os.path.join('/data/selected/', 'unknown_scandir') else: put_dir = os.path.join('/data/selected/', str(select_object['scan_date'])) jpg_put_dir = os.path.join(put_dir, 'jpeg') csv_put_dir = os.path.join(put_dir, 'csv') if not os.path.exists(put_dir): os.makedirs(jpg_put_dir) if not os.path.exists(jpg_put_dir): os.makedirs(jpg_put_dir) if not os.path.exists(csv_put_dir): os.makedirs(csv_put_dir) try: os.rename(candidates_object[m]['filename'], os.path.join(put_dir, select_object['qr'] + '.tif')) except Exception as e: log.logger.critical('Could not move: ' + candidates_object[m]['filename'] + ' to: ' + os.path.join(put_dir, select_object['qr'] + '.tif')) log.logger.debug(e) exit(1) try: image.convertToJpeg(os.path.join(put_dir, select_object['qr'] + '.tif'), jpg_put_dir, thumbnail=True) except Exception as e: log.logger.critical('Could not create thumnail from: ' + candidates_object[m]['filename'] + ' to: /tmp/' + candidates_object[m]['name'] + '.jpg') log.logger.debug(e) exit(1) # try: # f = open(os.path.join(jpg_put_dir,select_object['qr'] + '.txt'),'w') # f.write('\\\\nnms125\\Master-Images' + select_object['path'][13:].replace('/','\\')) # f.close() # except Exception as e: # log.logger.error(e) for c in candidates_object: if os.path.exists(c['filename']): try: os.remove(c['filename']) log.logger.info('Succesfully removed temponary file: ' + c['filename']) except Exception as e: log.logger.waring('Could not remove temponary file: ' + c['filename'] + ' Please cleanup manually') log.logger.debug(e) #['qr','check_it','tiff','jpg','master','scan_date','wag_jpg_link','box','analytics_database_id','best_match_value','second_best_match_value','match_factor','match_diff','correct','false_description'] ma = tools.match_analytics(match) rowfill = [ select_object['qr'], tools.checkit(1000, 80), '\\\\10.61.2.125\\selected\\' + str(select_object['scan_date']) + '\\' + select_object['qr'] + '.tif', '\\\\10.61.2.125\\selected\\' + str(select_object['scan_date']) + '\\jpeg\\' + select_object['qr'] + '.jpg', '\\\\nnms125\\Master-Images' + select_object['path'][13:].replace('/', '\\'), str(select_object['scan_date']), '\\\\nnm\\dino\\Digibarium\\FES herbarium digistraten\\' + select_object['match_file'].replace('/', '\\'), select_object['match_box'], select_object['Id'], ma[0], ma[1], ma[2], ma[3] ] tools.write_csv( os.path.join(csv_put_dir, str(select_object['scan_date']) + '.csv'), rowfill, )
def select(select_object,candidates_object): if not os.path.isfile(select_object['path']): log.logger.critical('Cannot find master image: ' + select_object['path']) exit(1) log.logger.info('Working on ' + select_object['qr'] + ' with database id: ' + str(select_object['Id'])) match = [] for c in candidates_object: r = restore.extractTar(c['id']) if not os.path.isfile(r): log.logger.critical('Cannot find restore image: ' + r) exit(1) image.convertToJpeg(c['filename'],'/tmp/') match.append(image.matchHistogram(select_object['path'],'/tmp/' + c['name'] + '.jpg')) os.remove('/tmp/' + c['name'] + '.jpg') m = match.index(min(match)) #print 'THE MATCH VALUE IS!!!: ' + str(min(match)) #print select_object #print candidates_object[m] if select_object['scan_date'] is None: put_dir = os.path.join('/data/selected/','unknown_scandir') else: put_dir = os.path.join('/data/selected/',str(select_object['scan_date'])) jpg_put_dir = os.path.join(put_dir,'jpeg') csv_put_dir = os.path.join(put_dir,'csv') if not os.path.exists(put_dir): os.makedirs(jpg_put_dir) if not os.path.exists(jpg_put_dir): os.makedirs(jpg_put_dir) if not os.path.exists(csv_put_dir): os.makedirs(csv_put_dir) try: os.rename(candidates_object[m]['filename'],os.path.join(put_dir,select_object['qr'] + '.tif')) except Exception as e: log.logger.critical('Could not move: ' + candidates_object[m]['filename'] + ' to: ' + os.path.join(put_dir,select_object['qr'] + '.tif')) log.logger.debug(e) exit(1) try: image.convertToJpeg(os.path.join(put_dir,select_object['qr'] + '.tif'),jpg_put_dir,thumbnail=True) except Exception as e: log.logger.critical('Could not create thumnail from: ' + candidates_object[m]['filename'] + ' to: /tmp/' + candidates_object[m]['name'] + '.jpg') log.logger.debug(e) exit(1) # try: # f = open(os.path.join(jpg_put_dir,select_object['qr'] + '.txt'),'w') # f.write('\\\\nnms125\\Master-Images' + select_object['path'][13:].replace('/','\\')) # f.close() # except Exception as e: # log.logger.error(e) for c in candidates_object: if os.path.exists(c['filename']): try: os.remove(c['filename']) log.logger.info('Succesfully removed temponary file: ' + c['filename']) except Exception as e: log.logger.waring('Could not remove temponary file: ' + c['filename'] + ' Please cleanup manually') log.logger.debug(e) #['qr','check_it','tiff','jpg','master','scan_date','wag_jpg_link','box','analytics_database_id','best_match_value','second_best_match_value','match_factor','match_diff','correct','false_description'] ma = tools.match_analytics(match) rowfill = [select_object['qr'], tools.checkit(1000,80), '\\\\10.61.2.125\\selected\\' + str(select_object['scan_date']) + '\\' + select_object['qr'] + '.tif', '\\\\10.61.2.125\\selected\\' + str(select_object['scan_date']) + '\\jpeg\\' + select_object['qr'] + '.jpg', '\\\\nnms125\\Master-Images' + select_object['path'][13:].replace('/','\\'), str(select_object['scan_date']), '\\\\nnm\\dino\\Digibarium\\FES herbarium digistraten\\' + select_object['match_file'].replace('/','\\'), select_object['match_box'], select_object['Id'], ma[0], ma[1], ma[2], ma[3] ] tools.write_csv(os.path.join(csv_put_dir,str(select_object['scan_date']) + '.csv'), rowfill, )
# csv import random import tools data = tools.read_csv("persons.csv") for row in data[1:]: row[1] = int(row[1]) + 1 data[0].append("Education") for row in data[1:]: row.append(random.choice([0, 1])) tools.write_csv("test_2.csv", data) # header = data.pop(0) # for row in data: # print(row[1]) # data = [header] + data filename = "persons.csv" data = tools.read_dict_csv(filename=filename) for row in data: row["Education"] = random.choice([0, 1]) tools.write_dict_csv("persons_2.csv", data) print(data)
def validate(val_loader, model, criterion): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() top1_per_cls = [AverageMeter() for i in range(0, model.module.num_blocks)] ###### initialize a AverageMeter() list whose size is the number of classifiers top5_per_cls = [AverageMeter() for i in range(0, model.module.num_blocks)] ### Switch to evaluate mode model.eval() end = time.time() for i, (input, target) in enumerate(val_loader): # target = target.cuda(async=True) target = target.cuda(non_blocking=True) # input_var = torch.autograd.Variable(input, volatile=True) # target_var = torch.autograd.Variable(target, volatile=True) ### Compute output with torch.no_grad(): output = model(input, 0.0) if args.model == 'msdnet': loss = msd_loss(output, target, criterion) #####? else: loss = criterion(output, target) ### Measure accuracy and record loss if hasattr(output, 'data'): prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) elif args.model == 'msdnet': prec1, prec5, _ = msdnet_accuracy(output, target, input) # losses.update(loss.data[0], input.size(0)) losses.update(loss.data.item(), input.size(0)) # top1.update(prec1[0], input.size(0)) top1.update( prec1.item(), input.size(0) ) ####### a list storing mean precision of top1 for all classifiers # top5.update(prec5[0], input.size(0)) top5.update(prec5.item(), input.size(0)) ### Measure elapsed time batch_time.update(time.time() - end) end = time.time() # if i % args.print_freq == 0: # print('Test: [{0}/{1}]\t' # 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' # 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' # 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' # 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( # i, len(val_loader), batch_time=batch_time, loss=losses, # top1=top1, top5=top5)) _, _, (ttop1s, ttop5s) = msdnet_accuracy(output, target, input, val=True) ###### ttop1s and ttop5s: the list stores prec1 or prec5 for each classifier, whose size is the number of classifier for c in range(0, len(top1_per_cls)): ####### for each classifier top1_per_cls[c].update( ttop1s[c], input.size(0) ) ####### top1_per_cls[c] stores prec1 for classifier c(1¬10) for all epoches top5_per_cls[c].update(ttop5s[c], input.size(0)) print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format(top1=top1, top5=top5)) for c in range(0, len(top1_per_cls)): ####### for each classifier print( ' * For classifier {cls}: Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}' .format(cls=c, top1=top1_per_cls[c], top5=top5_per_cls[c])) # save to the file print(acc_file_path) # write_file(acc_file_path, str(c)+','+str(top1_per_cls[c].avg)) write_csv(acc_file_path, [str(c), str(top1_per_cls[c].avg)]) return 100. - top1.avg, 100. - top5.avg ######### the final clasifier's mean value
def advanced_evaluate(geno_type, file_path, test_mode=False, save_mode=False): if not test_mode: print("Now we are evaluating models from genetic algorithm.") else: print("Now we are evaluating test models") # read data from windblade data img_path = 'C:/Users/admin/PycharmProjects/genetic-algorithm/dataset_blades/' X, Y, x_test, y_test = get_windblade_data(img_path, 8024, 0) # read data from cifar-10 # X, Y, X_test, Y_test = cifar_data(30000, 0) idx = [i for i in range(len(Y))] print('before shuffle index is ', idx) random.Random(0).shuffle(idx) print('have a look at index', idx) X = X[idx] Y = Y[idx] X = np.squeeze(X) Y = np.squeeze(Y) print('number of samples is:', X.shape, Y.shape) seed = 7 k_fold = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed) es = EarlyStopping(monitor='val_loss', patience=15, verbose=0, mode='min') rl = ReduceLROnPlateau(monitor='loss', factor=0.1, patience=5, min_lr=0) callbacks = [es, rl] for key in geno_type: if True: print( '================================ start advanced evaluation ===============================' ) print('finishing model building part') cv_scores = [] cv_accuracy = [] print('start to train on k-fold validation ... ...') for train, test in k_fold.split(X, Y): model, model_test = geno2model(geno_type[key]) # # print(type(y_train), type(y_test)) # print('+++++++ have check at y train:', np.sum(y_train), y_train.size) # print('+++++++ have a check at y test', np.sum(y_test), y_test.size) print('have a look at Y distribution', Y[train][100:110], Y[test][23:33]) print('the size of Y train is', Y[train].size) print('the sum of Y train is', np.sum(Y[train])) print('the size of Y test is', Y[test].size) print('the sum of Y test is', np.sum(Y[test])) if not test_mode: model.compile(optimizer=optimizers.Adam(lr=0.001), loss=losses.categorical_crossentropy, metrics=['accuracy']) else: model_test.compile(optimizer=optimizers.SGD(lr=0.001), loss=losses.mean_squared_error, metrics=['accuracy']) print('model compile finished') x_train = X[train] # y_train = np_utils.to_categorical(Y[train]) x_test = X[test] # y_test = np_utils.to_categorical(Y[test]) # x_train = preprocess_input(x_train) # x_test = preprocess_input(x_test) x_train, x_test = normalize(x_train, x_test) print( "=======================Have a look at input after normalization===============================" ) print(np.sum(x_train)) y_train = Y[train] y_test = Y[test] print('data preparation finished') if not test_mode: model.fit(x_train, y_train, validation_split=0.15, epochs=100, batch_size=32, verbose=1, callbacks=callbacks) else: model_test.fit(x_train, y_train, validation_split=0.2, epochs=100, batch_size=8, verbose=1, callbacks=callbacks) print('model training finished') if test_mode: input("press enter to continue and get evaluating result") if not test_mode: score, acc = model.evaluate(x_test, y_test, verbose=1) else: score, acc = model_test.evaluate(x_test, y_test, verbose=1) cv_accuracy.append(acc) cv_scores.append(score) print( '========== the score of current round is %f, accuracy is %f ====================' % (score, acc)) input("press enter to continue") print('cross validation scores:', cv_scores, cv_accuracy) geno_type[key]['ad_score'] = [ np.mean(cv_scores), np.mean(cv_accuracy), np.std(cv_accuracy) ] geno_type[key]['flag_change_ad'] = False if K.backend() == 'tensorflow': K.clear_session() if save_mode: trainable_count = int( np.sum([ K.count_params(p) for p in set(model.trainable_weights) ])) # write genotype into csv file write_csv(file_path, geno_type[key], ite_idx=0, num_paras=trainable_count) else: print( '================= No changes to the model, so not necessary to re-train again ===================' ) if test_mode: continue return geno_type
def evaluate(genotype, ite_idx, num_train, num_test, file_path, save_model=False): acc = np.zeros((len(genotype), 1)) i = 0 if ite_idx == 0 and save_model: create_csv(file_path) for key in genotype: if genotype[key]['flag_change']: print("======= iteration idx is %d ======" % ite_idx) print("======= genotype idx is %d =======" % i) dict_print(genotype[key]) model, model_test = geno2model(genotype[key]) # x_train, y_train = get_data( # 'C:/Users/zhu/PycharmProjects/GeneticAlgorithm/BloodCell/dataset2-master/images/TRAIN/', 800) # x_train, y_train = get_data_cifar([2,3], 3000) # x_test, y_test = get_data_cifar([4], 800) es = EarlyStopping(monitor='val_loss', patience=15, verbose=0) rl = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5) callbacks = [es, rl] img_path = 'C:/Users/admin/PycharmProjects/genetic-algorithm/dataset_blades/' x_train, y_train, x_test, y_test = get_windblade_data( img_path, num_train, num_test) # convert y into one hot matrix y_train = np_utils.to_categorical(y_train, 2) y_test = np_utils.to_categorical(y_test, 2) print(x_train.shape) print(y_train.shape) model.compile(optimizer=optimizers.Adam(lr=0.001), loss=losses.categorical_crossentropy, metrics=['accuracy']) history = model.fit(x=x_train, y=y_train, batch_size=32, epochs=10, verbose=0, shuffle=True, validation_data=(x_test, y_test), callbacks=callbacks) print(history.history['val_acc'][-1]) # y_test = model_test.predict(x = x_train[10]) # print('the predicting value is',y_test, 'ground truth is', y_train[10]) candidate = genotype[key] candidate['score'] = history.history['val_acc'][-1] candidate['flag_change'] = False i = i + 1 if save_model: # count trainable parameters trainable_count = int( np.sum([ K.count_params(p) for p in set(model.trainable_weights) ])) # write genotype into csv file write_csv(file_path, candidate, ite_idx, trainable_count) if K.backend() == 'tensorflow': K.clear_session() else: print( '================= No changes to the model, so not necessary to re-train again ===================' ) if save_model: candidate = genotype[key] write_csv(file_path, candidate, ite_idx, num_paras=0) return genotype
from core import Parametric_tSNE from report_writing import log_basic_test_params import pandas as pd import training_evaluation as eval import Genetics # import mnist data and ensure its in a workable format (i think i already did this legwok) train_data = pd.read_csv(str(Path.cwd() / 'Formatted_MNIST_Data' / 'formatted_mnist_train.csv'), sep=',', header=None).values test_data = pd.read_csv(str(Path.cwd() / 'Formatted_MNIST_Data' / 'formatted_mnist_test.csv'), sep=',', header=None).values # instance a ptsne network and train the dataset using training data ptsne = Parametric_tSNE(784, 2, 30) print("starting to train...") loss = ptsne.fit(train_data, verbose=True) print('done training....') tform = ptsne.transform(test_data) save_path = Path.cwd() / 'MNIST_testing' tools.write_loss(save_path, loss) tools.write_csv(tform, (save_path / "tform.csv")) # graph and save