def test_multiple_extensions(tmp_path): os.chdir(tmp_path) files = [ 'A000VOI123ZH_00.doc', 'E100VOI123ZH_00.pdf', 'E100PIN105ZH_00.docx', 'A100PIN105ZH_00.pdf', ] paths = [Path(f) for f in files] for p in paths: p.touch() main(*r'....(......).....\.(pdf|docx)'.split()) assert Path('A000VOI123ZH_00.doc').is_file() assert not Path('E100VOI123ZH_00.pdf').exists() assert not Path('E100PIN105ZH_00.docx').exists() assert not Path('A100PIN105ZH_00.pdf').exists() assert not Path('VOI123/A000VOI123ZH_00.doc').exists() assert Path('VOI123/E100VOI123ZH_00.pdf').is_file() assert Path('PIN105/E100PIN105ZH_00.docx').is_file() assert Path('PIN105/A100PIN105ZH_00.pdf').is_file()
def test_folders_wont_be_matched(tmp_path): os.chdir(tmp_path) Path('A000VOI123ZH_00.pdf').touch() Path('E100VOI123ZH_00.pdf').touch() Path('E100PIN105ZH_00.pdf').mkdir() Path('E100PIN105ZH_00.pdf/E100PIN105ZH_00.pdf').touch() Path('A100PIN105ZH_00.pdf').mkdir() Path('A100PIN105ZH_00.pdf/aaa').touch() Path('A100PIN105ZH_00.pdf/bbb').touch() main(*r'....(......).....\.pdf'.split()) assert not Path('A000VOI123ZH_00.pdf').exists() assert not Path('E100VOI123ZH_00.pdf').exists() assert Path('E100PIN105ZH_00.pdf').is_dir() assert not Path('E100PIN105ZH_00.pdf/E100PIN105ZH_00.pdf').exists() assert Path('A100PIN105ZH_00.pdf').is_dir() assert Path('A100PIN105ZH_00.pdf/aaa').is_file() assert Path('A100PIN105ZH_00.pdf/bbb').is_file() assert Path('VOI123/A000VOI123ZH_00.pdf').is_file() assert Path('VOI123/E100VOI123ZH_00.pdf').is_file() assert Path('PIN105/E100PIN105ZH_00.pdf').is_file() assert not Path('PIN105/A100PIN105ZH_00.pdf').exists()
def main(out_workspace, input_bathymetry, broad_bpi_inner_radius, broad_bpi_outer_radius, fine_bpi_inner_radius, fine_bpi_outer_radius, classification_dict, output_zones): # Load required toolboxes local_path = os.path.dirname(__file__) btm_toolbox = os.path.abspath(os.path.join(local_path, '..', 'btm.pyt')) arcpy.ImportToolbox(btm_toolbox) # local variables: broad_bpi = os.path.join(out_workspace, "broad_bpi") fine_bpi = os.path.join(out_workspace, "fine_bpi") slope_rast = os.path.join(out_workspace, "slope") broad_std = os.path.join(out_workspace, "broad_std") fine_std = os.path.join(out_workspace, "fine_std") utils.workspace_exists(out_workspace) # set geoprocessing environments arcpy.env.scratchWorkspace = out_workspace arcpy.env.workspace = out_workspace # TODO: currently set to automatically overwrite, expose this as option arcpy.env.overwriteOutput = True try: # Process: Build Broad Scale BPI utils.msg("Calculating broad-scale BPI...") bpi.main(input_bathymetry, broad_bpi_inner_radius, \ broad_bpi_outer_radius, broad_bpi, bpi_type='broad') # Process: Build Fine Scale BPI utils.msg("Calculating fine-scale BPI...") bpi.main(input_bathymetry, fine_bpi_inner_radius, \ fine_bpi_outer_radius, fine_bpi, bpi_type='fine') # Process: Standardize BPIs utils.msg("Standardizing BPI rasters...") arcpy.standardizebpi_btm(broad_bpi, "0", "0", broad_std, fine_bpi, \ "0", "0", fine_std) # Process: Calculate Slope slope.main(input_bathymetry, slope_rast) # Process: Zone Classification Builder outputs_base = arcpy.env.addOutputsToMap arcpy.env.addOutputsToMap = True utils.msg("Classifying Zones...") classify.main(classification_dict, broad_std, fine_std, slope_rast, \ input_bathymetry, output_zones) arcpy.env.addOutputsToMap = outputs_base except Exception as e: # Print error message if an error occurs utils.msg(e, mtype='error')
def test_duplicate_files_not_moved(tmp_path): os.chdir(tmp_path) Path('a').mkdir() Path('a/A000VOI123ZH_00.pdf').touch() Path('b').mkdir() Path('b/A000VOI123ZH_00.pdf').touch() main(*r'....(......).....\.pdf'.split()) one_file_moved = Path('VOI123/A000VOI123ZH_00.pdf').is_file() one_file_not_moved = Path('a/A000VOI123ZH_00.pdf').is_file() or Path( 'b/A000VOI123ZH_00.pdf').is_file() assert one_file_moved and one_file_not_moved
def main(): longitud, number = collect.main() average = cluster.main() positive, negative, ejemplo1, ejemplo2 = classify.main() f = open("summary.txt", "w", encoding="utf-8") f.write("Number of users collected: %d\n" % longitud) f.write("Number of messages collected: %d\n" % number) f.write("Average number of users per community: %d\n" % average) f.write("Number of instances per class found: %d, %d\n" % (positive, negative)) f.write("%s\n" % str(ejemplo1)) f.write("%s\n" % str(ejemplo2)) f.close() f2 = open("description.txt", "w", encoding="utf-8") f2.write( "Study of the impact caused by a tweet from a friend of DowJones in the stock market.\n" ) f2.write( "I do a research on TOP 100 friends on Twitter of DowJones account.\n First, I downloaded the values for SP500 for a couple of weeks each minute from finance.google.com.\n With each tweet from DowJones friends during that time, I saw the impact of that tweet on the stock market, calculating the value of the stock market at that time and subtract the value of the stock within 5 minutes of difference. Then I classified as positive if the subtraction is positive and say that type of tweet has a positive impact.\n Otherwise, if the subtraction is negative, the impact of the tweet had negative impact.\n" ) f2.write( "In the classifier, with the training tweets I made a cluster of words with 10 means.\n The number of words in each cluster is one of the features that I have entered in my classifier. Another feature is the time each tweet was published.\n With this I have created a classifier that predicts the impact that the tweet of analysts will have on the stock market in SP500.\n" ) f2.write( "In the clusters, I have seen the number of communities that can exist. I have found that there are small communities.\n But at no time does it become a single community, like all stock analysts together in one cluster. Moreover I saw where each of the analysts friends was located. I have observed that it is not only the United States, the main country in the clusters, also we have United Kingdom, Iran or even Australia have some importance in these clusters.\n" ) f2.close()
def get_category(): if not request.json or not 'title' in request.json: abort(400) category = classify.main(request.json['title']) return jsonify({ 'category': category, "emoji": getEmoji.getEmoji(category, request.json['title']) }), 201
def test_multi_level_classification(tmp_path): (tmp_path / 'bill smith').touch() (tmp_path / 'amy smith').touch() (tmp_path / 'tom smith').touch() (tmp_path / 'bill gates').touch() (tmp_path / 'sarah gates').touch() main(*[r'(\w+) (\w+)'] + rf'-d \2/\1 --input={tmp_path} --output {tmp_path}/out/ -f \1-\2'. split()) assert not (tmp_path / 'bill smith').exists() assert not (tmp_path / 'amy smith').exists() assert not (tmp_path / 'tom smith').exists() assert not (tmp_path / 'bill gates').exists() assert not (tmp_path / 'sarah gates').exists() assert (tmp_path / 'out/smith/bill/bill-smith').is_file() assert (tmp_path / 'out/smith/amy/amy-smith').is_file() assert (tmp_path / 'out/smith/tom/tom-smith').is_file() assert (tmp_path / 'out/gates/bill/bill-gates').is_file() assert (tmp_path / 'out/gates/sarah/sarah-gates').is_file()
def test_files_in_nested_folders(tmp_path): files = [ 'none/A000VOI123ZH_00.pdf', 'null/E100VOI123ZH_00.pdf', 'nil/test/aha/E100PIN105ZH_00.pdf', 'A100PIN105ZH_00.pdf', ] paths = [tmp_path / f for f in files] for p in paths: p.parent.mkdir(parents=True, exist_ok=True) p.touch() main(*rf'....(......).....\.pdf -i {tmp_path} -o {tmp_path}'.split()) for p in paths: assert not p.is_file() assert (tmp_path / 'VOI123' / 'A000VOI123ZH_00.pdf').is_file() assert (tmp_path / 'VOI123' / 'E100VOI123ZH_00.pdf').is_file() assert (tmp_path / 'PIN105' / 'E100PIN105ZH_00.pdf').is_file() assert (tmp_path / 'PIN105' / 'A100PIN105ZH_00.pdf').is_file()
def __classify_all_features(main_args): """ Runs classification or kfold validation on the specified feature sets Args: main_args: Arguments to specify train/test files, classification, and other arguments relative to the task """ feature_sets = [] all_features = SMALL_ALL_FEATURES if "large_yeast_data" in args.data_file: all_features = LARGE_ALL_FEATURES for i in range(args.feature_min - 1, main_args.feature_max, 1): combinations = itertools.combinations(all_features, i + 1) for combination in combinations: feature_sets.append(list(combination)) print "Length of all combinations: " + str(len(feature_sets)) for feature_set in feature_sets: classify_args = classify.ClassifyArgs(features=feature_set, classifiers=ALL_CLASSIFIERS, write_to_log=True, train_file=main_args.train_file, test_file=main_args.test_file, data_file=main_args.data_file, classify=main_args.classify, kfold=main_args.kfold) classify.main(classify_args) classify_args = classify.ClassifyArgs(features=feature_set, classifiers=ALL_CLASSIFIERS, write_to_log=True, train_file=main_args.train_file, test_file=main_args.test_file, data_file=main_args.data_file, vote=main_args.vote, classify=main_args.classify, kfold=main_args.kfold) classify.main(classify_args) print "feature set loop done"
def test_basic(tmp_path): os.chdir(tmp_path) files = [ 'A000VOI123ZH_00.pdf', 'E100VOI123ZH_00.pdf', 'E100PIN105ZH_00.pdf', 'A100PIN105ZH_00.pdf', ] paths = [Path(f) for f in files] for p in paths: p.touch() main(*r'....(......).....\.pdf'.split()) for p in paths: assert not p.is_file() assert (tmp_path / 'VOI123' / 'A000VOI123ZH_00.pdf').is_file() assert (tmp_path / 'VOI123' / 'E100VOI123ZH_00.pdf').is_file() assert (tmp_path / 'PIN105' / 'E100PIN105ZH_00.pdf').is_file() assert (tmp_path / 'PIN105' / 'A100PIN105ZH_00.pdf').is_file()
def test_rename_file(tmp_path): files = [ 'A000VOI123ZH.00.pdf', 'E100VOI123ZH.01.pdf', 'E100PIN105ZH.00.pdf', 'A100PIN105ZH_00.pdf', ] paths = [tmp_path / f for f in files] for p in paths: p.touch() main( * rf'(....(......)..).(..\.pdf) -i {tmp_path} -o {tmp_path} -d \2 -f \1_\3 ' .split()) for p in paths: assert not p.is_file() assert (tmp_path / 'VOI123' / 'A000VOI123ZH_00.pdf').is_file() assert (tmp_path / 'VOI123' / 'E100VOI123ZH_01.pdf').is_file() assert (tmp_path / 'PIN105' / 'E100PIN105ZH_00.pdf').is_file() assert (tmp_path / 'PIN105' / 'A100PIN105ZH_00.pdf').is_file()
def main(self): """ファイルを監視し、必要量テキストを生成する """ while True: #常にループ print(""" ┌--------------------┐ ┌---------------------| File check |---------------------┐ | └--------------------┘ |""") if not self.is_generation(): #作らなくても良いなら print("full... stopping text generation :) \n") continue #以降の処理をスキップする self.generation_story() #文章生成 result = classify.main(self.get_story_sentence()) #分類を行う print(self.get_story_sentence()) print( "======================================================================\n\n" ) if self.can_save_data(result): self.proofreading() print(self.get_story_sentence()) self.save_story_data(self.get_story_sentence(), result)
def run_tests(result_dict, tests, t_0, args): test_name, test_base, tests = tests[0], tests[1], tests[2:] output_name = '{}.json'.format(test_name.replace(' ', '-')) output_path = os.path.join(args.folder_name, output_name) print('Saving to file: "{}"'.format(output_name)) print('Name of tests: "{}"'.format(test_name)) print('Number of tests: {}'.format(len(tests))) print() print('Parameters are: \n {}'.format(test_base)) for test in tests: print(' {}'.format(test)) result = [] diff = [] current_result = { 'best': { 'accuracy': '0', 'name': 'init', 'full': 'init', } } # File with just the current tests, overwrite old file json.dump(current_result, open(output_path, 'w'), indent=4, sort_keys=True) for idx, test in enumerate(tests): text_to_show = 'TEST NUMBER: {} / {}'.format(idx+1, len(tests)) print() print('########{}########'.format('#' * len(text_to_show))) print('#### {} ####'.format(text_to_show)) print('########{}########'.format('#' * len(text_to_show))) print(test_base + test) f_accuracy, stats, elapsed = main(parse_args(test_base + test), args.output) result.append([f_accuracy, elapsed, test]) key = '|'.join(test_base + test) accuracy = '{:.4f}'.format(100*f_accuracy) elapsed = '{:.1f}'.format(elapsed) date = strftime('%Y-%m-%d') clock = strftime('%H:%M') if 100 * f_accuracy > float(current_result['best']['accuracy']): current_result['best'] = { 'accuracy': accuracy, 'test': test, 'name': key, } # Test already tried, add to the result if key in result_dict: result_dict[key]['accuracy'].append(accuracy) result_dict[key]['elapsed'].append(elapsed) result_dict[key]['date'].append(date) result_dict[key]['clock'].append(clock) else: result_dict[key] = { 'test': test, 'name': test_name, 'base': test_base, 'full': test_base + test, 'date': [date], 'clock': [clock], 'accuracy': [accuracy], 'elapsed': [elapsed], } current_result[key] = { 'test': test, 'name': test_name, 'base': test_base, 'full': test_base + test, 'date': [date], 'clock': [clock], 'accuracy': [accuracy], 'elapsed': [elapsed], } accs = result_dict[key]['accuracy'] # Different result for same parameters if len(set(accs)) > 1: diff.append([key, accs]) print('Different result.') for idx, acc in enumerate(accs): print(' {}: Acc: {}'.format(idx, acc)) # Dump data after each run # Big file with all results json.dump(result_dict, open(args.result_path, 'w'), indent=4, sort_keys=True) # File with just the current tests, overwrite old file json.dump(current_result, open(output_path, 'w'), indent=4, sort_keys=True) print() print('Total time: {:.1f} seconds'.format(time.time()-t_0)) return result, diff
import classify import json k_to_test = [80000] results = {40000: [], 80000: [], 125000: []} for k in k_to_test: for i in range(5): acc, conf, restricted_dict = classify.main(k, i, restrict_random = True) conf_list = [list(conf[0]), list(conf[1])] results[k].append([acc, conf_list, k]) with open("results_denis_new_round_{}k.json".format(k), "w") as json_out: json.dump(results, json_out) print("test with k: {} completed".format(k)) '''python pipeline.py --K 10 --model_path ../../../LanguageModels/Skipgram/Advanced/TrainedModels/Model4 --span 80 --KNN_papers_set Set4 ''' #1
def execute(node, previous, experiment_folder): """ Execute a task defined by the given node in the experiment graph. Parameters ---------- node : Element The node to be executed. previous : dict (or list of dict) Dictionary of the experiment's running-time variables after the end of the parent node's execution. May be a list of dictionaries in the special case of a fusion node, which has more than one parent. experiment_folder : string String with the path to the experiment folder, where the files of the experiment will be saved. Returns ------- exp_param : dict The updated dictionary of the experiment's running-time variables after the node's execution. """ global execution_time global tex_path global tex_dict global openset_experiment exp_param = previous parameters = ast.literal_eval(node.get("parameters")) node_id = node.attrib['id'] #Get node name node_name = node.get('name') if node.tag == "collection": print "Collection", exp_param.keys() images, classes, extract_path, read_time = \ read_collection.main(node_name, openset_experiment, parameters, node_id) execution_time += read_time exp_param['images'] = images exp_param['classes'] = classes exp_param['extract_path'] = extract_path elif node.tag == "train_test_method": print "train_test_method", exp_param.keys() images = exp_param['images'] classes = exp_param['classes'] images, classes, train_test_list, train_test_time = \ train_test.main(images, classes, experiment_folder, node_name, parameters, openset_experiment, node_id) execution_time += train_test_time exp_param['images'] = images exp_param['classes'] = classes exp_param['train_test_list'] = train_test_list exp_param['train_test_method'] = node_name exp_param['train_test_parameters'] = parameters elif node.tag == "descriptor": print "descriptor", exp_param.keys() images = exp_param['images'] extract_path = exp_param['extract_path'] classes_keys = exp_param['classes'].keys() if node_name == "bag": train_test_list = exp_param['train_test_list'] images, extract_time = extract_bag.main(images, train_test_list, extract_path, experiment_folder, parameters, node_id) elif node_name == "bovg": train_test_list = exp_param['train_test_list'] images, extract_time = extract_bovg.main(images, train_test_list, extract_path, experiment_folder, parameters, node_id) else: images, extract_time = extract_features.main( images, classes_keys, extract_path, node_name, parameters, node_id) execution_time += extract_time exp_param['images'] = images exp_param['descriptor'] = node_name elif node.tag == "normalizer": try: manager = Manager() images = manager.dict(exp_param['images']) train_test_list = exp_param['train_test_list'] except: print "\n\tMissing Input. Exiting." sys.exit(1) norm_fv_paths, normalize_time = normalize_features.main( images, train_test_list, experiment_folder, node_name, parameters, node_id) execution_time += normalize_time del exp_param['images'] exp_param['fv_paths'] = norm_fv_paths elif node.tag == "classifier": try: classes = exp_param['classes'] train_test_list = exp_param['train_test_list'] descriptor = exp_param['descriptor'] try: fv_paths = exp_param['fv_paths'] del exp_param['fv_paths'] except: images = exp_param['images'] fv_paths = util.save_file_extract(images, train_test_list, experiment_folder) except: print "\n\tMissing Input. Exiting." sys.exit(1) images, classes_list, classify_time = classify.main( fv_paths, classes.keys(), train_test_list, experiment_folder, node_name, parameters, descriptor, node_id) execution_time += classify_time exp_param['images'] = images exp_param['classes_list'] = classes_list elif node.tag == "fusion_method": len_exp_param = len(exp_param) #list with the images dictionaries, classes dictionaries, and train and # test set list list_images = [] list_classes = [] list_train_test = [] extract_path = exp_param[INDEX_ZERO]['extract_path'] for index in range(len_exp_param): try: list_images.append(exp_param[index]['images']) except: images = {} for fv_path in exp_param[index]['fv_paths']: print "fv_path:", fv_path images_new = util.read_fv_file(fv_path) images = util.merge_dict(images, images_new) list_images.append(images) list_classes.append(exp_param[index]['classes']) #In case that it performs the fusion of collections, there is no # train_test_list try: list_train_test.append(exp_param[index]['train_test_list']) except: list_train_test.append(None) #classes_list is present only after the classification module try: classes_list = exp_param[INDEX_ZERO]['classes_list'] except: classes_list = None try: train_test_method = exp_param[INDEX_ZERO]['train_test_method'] train_test_parameters = exp_param[INDEX_ZERO][ 'train_test_parameters'] except: train_test_method = None train_test_parameters = None images, classes, train_test_list, fusion_time = \ fusion.main(list_images, list_classes, list_train_test, classes_list, experiment_folder, node_name, parameters, node_id) execution_time += fusion_time exp_param = {} exp_param['images'] = images exp_param['classes'] = classes if train_test_list is not None: exp_param['train_test_list'] = train_test_list if classes_list is not None: exp_param['classes_list'] = classes_list if train_test_method is not None: exp_param['train_test_method'] = train_test_method exp_param['train_test_parameters'] = train_test_parameters exp_param['descriptor'] = None exp_param['extract_path'] = extract_path elif node.tag == "evaluation_measure": try: images = exp_param['images'] train_test_list = exp_param['train_test_list'] classes_list = exp_param['classes_list'] except: print "\n\tMissing Input. Exiting." sys.exit(1) evaluation_time, evaluation_path = evaluation.main( images, train_test_list, classes_list, experiment_folder, node_name, parameters, node_id) execution_time += evaluation_time #Dictionaries to create the tex file train_test_method = exp_param['train_test_method'] train_test_parameters = str(exp_param['train_test_parameters']) if train_test_method not in tex_dict: tex_dict[train_test_method] = {} train_test_dict = tex_dict[train_test_method] if train_test_parameters not in train_test_dict: train_test_dict[train_test_parameters] = {} output_dict = train_test_dict[train_test_parameters] if node_name not in output_dict: output_dict[node_name] = [] list_output = [evaluation_path, classes_list[0], node_id] if list_output not in output_dict[node_name]: output_dict[node_name].append(list_output) train_test_dict[train_test_parameters] = output_dict tex_dict[train_test_method] = train_test_dict elif node.tag == "preprocessing": images = exp_param['images'] classes = exp_param['classes'] images, classes, preprocessing_time = preprocessing.main( images, classes, experiment_folder, node_name, parameters, node_id) execution_time += preprocessing_time exp_param['images'] = images exp_param['classes'] = classes else: print "Error. Unknown Tag." sys.exit(1) return exp_param
def tellmewho(self, file, method): output = classify.main(file, method) self.output_label.config(text = output)
# resize = cv2.resize(img,(int(img.shape[0]/size),int(img.shape[1]/size))) #resizing to enhance detection gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #cv2 supports grayscale faces = face_cascade.detectMultiScale(gray, 1.3, 1) #getting coordinates for (x, y, w, h) in faces: cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 0), 2) sub_face = img[y:y + h, x:x + h] #taking only face cv2.imwrite('test.jpg', sub_face) #saving current image for testing text = classify.main( 'test.jpg') #get the classification result from classify.py #text = text.title() font = cv2.FONT_HERSHEY_PLAIN with open('data.csv') as f: data = csv.reader(f) next(data) for details in data: if details[0] == text.title(): cv2.putText(img, 'Name: ' + details[0], (x + w + 20, y), font, 1, (0, 0, 255), 2) cv2.putText(img, 'Surname: ' + details[1], (x + w + 20, y + 40), font, 1, (0, 0, 255), 2) cv2.putText(img, 'Age: ' + details[2], (x + w + 20, y + 100), font, 1,
def main(): with open('./summary.txt', 'w') as f: sys.stdout = f collect.main() cluster.main() classify.main()
for j in range(PROMPT_LIMIT): guess = recognize_speech_from_mic(recognizer, microphone) if guess["transcription"]: break if not guess["success"]: break print("I didn't catch that. What did you say?\n") if guess["error"]: print("ERROR: {}".format(guess["error"])) break print("You said: {}".format(guess["transcription"])) guessed = guess["transcription"].lower() user_has_more_attempts = i < NUM_GUESSES - 1 if guessed='classify': classify.main() break elif guessed='check': fake.main() break else: print("Incorrect. Try again.\n") break
stepback = int( sys.argv[1]) # get command line input: number of days to stepback pd.options.mode.chained_assignment = None # default='warn' (to turn off pandas warning text) yesterday = date.today() - timedelta(days=stepback) datestr = yesterday.strftime("%Y-%m-%d") logpath = datestr + '\log.txt' #f = open(logpath,"a") #orig = sys.stdout #sys.stdout = f ### call R script from Python ### command = 'C://Program Files//R//R-3.4.1//bin//Rscript' path2Rscript = 'Crawler.R' cmd = [command, path2Rscript] + [sys.argv[1]] #subprocess.call(cmd,stdout=f,shell=False) subprocess.call(cmd, shell=False) ################################# datevar = date.today() - timedelta(days=stepback + 1) datestr = datevar.strftime("%Y-%m-%d") print(datetime.now(), datestr) import preprocess preprocess.main(datestr) import classify classify.main(datestr) import new_hashing new_hashing.main(datestr) #f.close() #sys.stdout = orig