async def startup(app: web.Application): with open('RS_ViaOW.xml', 'r') as file: all_itinerary_viaow = parse_data(file) with open('RS_Via-3.xml', 'r') as file: all_itinerary_via3 = parse_data(file) all_itinerary = all_itinerary_via3 + all_itinerary_viaow app['graph'] = generate_graph(all_itinerary)
def execute(input_parameters): try: initialize() in_data = sys.argv parser.parse_data(in_data) source = parser.get_source() dest = parser.get_dest() valid.check_path(source) valid.check_path(dest) in_rsync_str = create_rsync_string() run_rsync() except: pass
def add_file_data(self, name, data): """Add a file data to document. 'name' is the name of the file, 'data is the file data. File will not copied in to docdir until sync(). """ # FIXME: set mime type term # parse the file data into text text = parse_data(data) # generate terms from the text self._gen_terms(None, text) # set data to be text sample # FIXME: is this the right thing to put in the data? summary = text[0:997] + '...' self._set_data(summary) # FIXME: should files be renamed to something generic (0.pdf)? prefix = self.db._find_prefix('file') self._add_term(prefix, name) # add it to the cache to be written at sync() self._infiles[name] = data
def main(): parser = argparse.ArgumentParser(description='do symbolic regression') parser.add_argument('train_file', help='file to train') parser.add_argument('-i', type=int, default=100, help='number of iteration') parser.add_argument('-p', type=int, default=1000, help='population size') parser.add_argument('-v', action='store_true', help='print detail information') parser.add_argument('-r', action='store_true', help='recover from dump') args = parser.parse_args() if not os.path.isfile(args.train_file): print('main: train file not exist') exit(1) with open(args.train_file, 'r') as f: lines = f.readlines() data, label = parse_data(lines) load_data(data, label) set_config({ 'ITER_NUM': args.i, 'POP_SIZE': args.p, 'PRESERVE_NUM': args.i // 2, 'CONST': [0, 2, 2.5, 3, 4, 5, 10], 'PROB_OP': {'+': 2, '-': 2, '*': 2, '/': 2, '^': 0.5, '~': 3, 'abs': 0.5, 'sin': 1.5, 'cos': 1.5, 'tan': 1, 'asin': 0.5, 'acos': 0.5, 'atan': 0.5, 'sinh': 0.5, 'cosh': 0.5, 'tanh': 0.5, 'exp': 2, 'sqrt': 2, 'log': 2}, 'VERBOSE': args.v }) result = train(args.r) print('MSE: %s' % result[1]) print('EXPR (postfix): %s' % result[0].to_postfix()) print('EXPR (infix): %s' % result[0])
def weekly_update(country): # 500 most recent incidents pulled per page by default response = urllib2.urlopen( 'https://api.acleddata.com/acled/read?country=' + country) json_response = json.load(response) if json_response['success']: ### Parses all data ('count' is acled response for count of incidents) #parse_data(json_response['data'], json_response['count'], country) ### Just parse first 3 incidents and add to 'test' region parse_data(json_response['data'], 3, 'test') else: print "Error retrieving data from acled api ..." return False
def on_city_page(self, response): cityname = response.meta['city'].split("/")[1] if "amanha" in response.url: filename = "original_pages/{}-amanha".format(cityname) data = parser.parse_data(cityname, response.body, "AMANHA") else: filename = "original_pages/{}-hoje".format(cityname) data = parser.parse_data(cityname, response.body, "HOJE") with open("{}.html".format(filename), 'wb') as f: f.write(response.body) self.log('Saved file %s' % "{}.html".format(filename)) # data = parser.parse_data(cityname,response.body) return data
def main(): #parsing tagged data #let user know how to use the program try: parsing = (parser.parse_data("tagged_emails")) except: print( "Please make sure near this python file you have the following folder format:" ) print("tagged_emails") print("Which is automatically generated by seminar_tagger.py") return print("Tagging e-mails. Please wait...") print("This may take some time because of the use of Word2Vec.") #create a folder for all txt tagged files named 'tagged_emails' if not os.path.exists("tagged_ontology_emails"): os.makedirs("tagged_ontology_emails") #creating the model model = Word2Vec.load_word2vec_format( '/home/projects/google-news-corpus/GoogleNews-vectors-negative300.bin', binary=True) ontology = Ontology(model) data = parsing[0] #tuple type-content file_names = parsing[ 1] #keep track of the name in order to write a tagged mail #with the same name for d in data: type_list = d[0] # list of types content_list = d[1] # list of content for each type if (len(type_list) == 0): continue if (find_with_pattern("[Tt]opic", type_list) == None): continue #find current topic topic_pos = type_list.index(find_with_pattern("[Tt]opic", type_list)) topic_type = type_list[topic_pos] topic_content = content_list[topic_pos] ontology.add_to(topic_content, file_names[data.index(d)]) created_ontology = ontology.get_traversal_info() write(created_ontology) print('Operation successfully')
def setup_test(test, filename): f = open(filename) data = f.read() f.close() gl = {} lc = {} lexer = python_lexer.PythonLexer() lexer.input(data) res = parser.parse_data(data, lexer) code = utils.node_to_str(res) test.gl = gl test.lc = lc test.code = code test.node = res
def process(conn, addr): print("(info) Wait for game data...") buf = conn.recv(BUFFER_SIZE) print("(info) Parsing data...") data = parser.parse_data(buf) print("(info) Computing...") solver = core.Solver(data) x, y = solver.compute() print("(info) Sending back...") message = "{}{}".format(chr(y), chr(x)) conn.send(message.encode("ascii"))
def plot_service_perf(start_date, end_date, groupby): print('grpby:') print(groupby) values = parser.parse_data(start_date, end_date, '(\d{4,4}-.*T(\d.*))\+.*T(\d.*)\+.*(/rest/.*)\".*', ("{'trace': 3, 'xsrc': 0, 'ysrc': '2-1', 'ytype': 'time', 'groupby': '%s', 'agg':'avg'}" % groupby), 'service_perf') name_regex = r'^.*/(.*)$' data = [] traces = {} averages = {} for value in values: # each url x = [] y = [] total_sum = 0 counter = 0 for val in values[value]: x.append(val[0]) # time y.append(val[1]) # count if(type(val[1]) is float and val[1] > 0): total_sum += val[1] counter += 1 averages[value] = total_sum / counter trace = Scatter(x=x, y=y, name=value, text=re.match(name_regex, value).group(1)) traces[value] = trace for trace in traces.values(): data.append(trace) graph = {} graph['info'] = 'Averages:\n' print(averages) averages = sorted(averages.items(), key=lambda x: x[1], reverse=True) for average in averages: graph['info'] += "{0}: {1:2.3f}\n".format(average[0], average[1]) graph['group_by'] = ['Hour', 'Minute'] # plot graph graph['graph'] = (plotly.offline.plot({ "data": data, "layout": generate_layout("MyMSD -> Datapower API calls", "Date/Time", "Frequency") }, show_link=False, include_plotlyjs=False, output_type='div')) return graph
def database(dirs,path): # ITERATING OVER FILES database_ = [] for file in dirs: files = [] dfNH=[] dfSEG=[] # PARSING DATA FILES data = parser.parse_data(os.path.join(path,file)) files.append(file) for i in range(len(data)): if data[i][0] == 'N': res_id = data[i][2] for j in range(len(data)): if data[j][0] == 'H' and data[j][2] == res_id : dfNH.append(data[j] + data[i]) break for j in range(len(data)): if data[j][0] == 'SD': dfSEG.append(files+data[j]) # REMOVING IRRELEVANT DATA ENTRIES for i in range(len(dfSEG)): for j in range(len(dfNH)): dist= geometry.distance(dfSEG[i][4],dfSEG[i][5],dfSEG[i][6],dfNH[j][3],dfNH[j][4],dfNH[j][5]) if dist < 4.2: database_.append(dfSEG[i]+dfNH[j]) fve = f_vector(database_,1) rv = [database_,fve] return rv
def process(service, change): # debug print(change) # prepare png name file_name = change.get('file').get('name') png_name = os.path.splitext(file_name)[0] + '.png' local_png_path = os.path.join(CACHE_FOLDER, png_name) # download from upstream f = download_file(service, change.get('file').get('id')) # parse data = parse_data(f, TYPE_ECG) data = np.array(data) # filter filtered = data[:, 1] filtered = power_line_noise_filter(filtered, ECG_FS) filtered = high_pass_filter(filtered, ECG_FS, HIGH_PASS_CUTOFF) filtered = low_pass_filter(filtered, ECG_FS, LOW_PASS_CUTOFF) filtered = np.column_stack((data[:, 0], filtered)) # save to png plot_ecg(filtered) plot_to_png(local_png_path) # upload upload_png(service, local_png_path, png_name)
input = numpy.asanyarray([float(i) for i in sys.argv[3].split(" ")]).reshape(1, -1) verbose = bool(sys.argv[4]) nation = nation.load_nation(nation_code) path_model = nation.base_path_datas + nation.sources[source_id].path_model if verbose: support.colored_print("Loading model...", "green") model = joblib.load(path_model) if verbose: support.colored_print("Making prediction...", "green") output = model.predict(input) if verbose: support.colored_print("Estimating error...", "green") training_set_error_input, training_set_error_output = parser.parse_data( nation.base_path_datas + nation.sources[source_id].path_training_set_error) error = knn.get_error_estimation(input[0], training_set_error_input, training_set_error_output, nation.sources[source_id].best_k, nation.sources[source_id].k_weighted) if verbose: support.colored_print("Showing results...", "green") support.colored_print( "Prediction: %.2lf\nRelative error (estimated): %.2lf %%" % (output[0], error), "blue") support.colored_print("Completed!", "pink")
def main(): #parsing both untagged and tagged #let user know how to use the program try: parsing=(parser.parse_data("nltk_data/corpora/untagged")) data_tagged=(parser.parse_data("nltk_data/corpora/training")) except: print("Please make sure near this python file you have the following folder format:") print("\"nltk_data/corpora/untagged\" and \"nltk_data/corpora/training\"") print("In order to access tagged and untagged emails") return print("Tagged and untagged e-mails have been read.") print("Tagging e-mails. Please wait...") #print("This might take a while because of requests to enciclopedya...") #set up the time,location,speaker dict from tagged train_sents = parser.get_training_sents(data_tagged[0]) #create a folder for all txt tagged files named 'tagged_emails' if not os.path.exists("tagged_emails"): os.makedirs("tagged_emails") data = parsing[0] #tuple type-content file_names = parsing[1] #keep track of the name in order to write a tagged mail #with the same name #first train the tagger and initialise it only once for all emails ner = nertagger.NerTagger(train_sents) for d in data: type_list = d[0] # list of types content_list = d[1] # list of content for each type #continue if no content is found if(len(type_list) == 0): continue if(find_with_pattern("[Aa]bstract", type_list) == None): continue #find current abstract abstract_pos = type_list.index(find_with_pattern("[Aa]bstract", type_list)) abstract_type = type_list[abstract_pos] abstract_content = content_list[abstract_pos] #split into paragraphs and sentences paragraphs = get_paragraphs(abstract_content) sentences_from_paragraphs = get_sentences(paragraphs) #there are always useful informations in header #deal with them first stime_from_header = get_time(type_list, content_list)[0] etime_from_header = get_time(type_list, content_list)[1] speaker_from_header = get_speaker(type_list, content_list, ner) location_from_header = get_location(type_list, content_list, ner) if(speaker_from_header != None): speaker_from_header = speaker_from_header.strip() if(location_from_header != None): location_from_header = location_from_header.strip() if(speaker_from_header == None or speaker_from_header == ""): speaker_from_header = try_to_find_speaker_in_abstract(abstract_content) #setting up the tagged corpus abstract_tagged = tag_paragraphs_and_sentences(sentences_from_paragraphs) content_list[abstract_pos] = abstract_tagged for content in content_list: if(stime_from_header != None and stime_from_header!=""): tag_email(content_list, stime_from_header, "<stime>", "</stime>", content) for content in content_list: if(etime_from_header != None and etime_from_header!=""): tag_email(content_list, etime_from_header, "<etime>", "</etime>", content) for content in content_list: if(speaker_from_header != None and speaker_from_header != ""): tag_email(content_list, speaker_from_header, "<speaker>", "</speaker>", content) for content in content_list: if(location_from_header != None and location_from_header != "" and "*" not in location_from_header): #had some troubles with * inputs tag_email(content_list, location_from_header, "<location>", "</location>", content) #get file name and write file_name = file_names[data.index(d)] write(file_name, type_list, content_list) print("Operation successful.") print("Tagged files should be in "+os.getcwd()+"/tagged_emails")
def count_words(words): return parse_data(words)
import python_lexer import parser import utils f = open("samples/sample3.py") data = f.read() f.close() lexer = python_lexer.PythonLexer() lexer.input(data) for token in lexer: print(token.value) res = parser.parse_data(data, lexer) for node in res: # if isinstance(node, utils.Node) and (node.kind=='classdef' or node.kind=='funcdef'): if isinstance(node, utils.Node) and (node.kind == "block"): print("@" * 50) print(utils.node_to_str(node)) # print(utils.node_to_str(res))
for i in range(1, input_lines + 1): true_class = 1 + math.floor((i-1) / 50) nidx = bruteForce.nearest_neighbor(i, input_data) nn_class = 1 + math.floor((nidx - 1)/50) if debug: print('the nearest neighbor of %d is %d real classes: %d, %d' %(i, nidx, true_class, nn_class )) if nn_class == true_class: corrects += 1 else: wrongs += 1 print('correct: %d incorrect: %d ' %(corrects, wrongs)) if __name__ == '__main__': input_data = parser.parse_data() for l in [4,8,16]: for k in [4, 8, 16]: print('Using local sensitivity hashing. l= %d k= %d' %(l, k)) init_hash_matrices(l=l, k=k) hash_elements(input_data, l) classify_articles(input_data) print(line_break) # to force output if redirecting to a file sys.stdout.flush() print('Now doing brute force') test_brute_force(input_data) # print(hash_tables[1])
from parser import parse_data, load_js, dump from urllib.request import urlopen from splash import show_splash if __name__ == '__main__': os.system('clear') show_splash() time.sleep(1) # Parse all given arguments and return needed values html, path, op_type = check_args(len(argv), argv) # Process link as a single post url if op_type == "single_post": base_data, type_name = parse_data(html) select_media(type_name, base_data, path) print("[*] Done!") # Process comprehensive dump of all media else: # # Use selenium to preload embedded js html = load_js(argv[2]) links, dump_dir = dump(html, argv[2], path) for l in links: link_html = urlopen(l).read() base_data, type_name = parse_data(link_html) select_media(type_name, base_data, dump_dir) print("[*] Done!")
if len(sys.argv) == 1 or sys.argv[1] == "help": support.colored_print( "Usage:\n\t-parameter 1: nation code (string)\n\t-parameter 2: source id (int)\n\t-parameter 3: verbose (bool)", "red") sys.exit(0) nation_code = sys.argv[1] source_id = int(sys.argv[2]) verbose = bool(sys.argv[3]) nation = nation.load_nation(nation_code) selected_model = nation.sources[source_id].best_model training_set_input, training_set_output = parser.parse_data( nation.base_path_datas + nation.sources[source_id].path_training_set_prediction) test_size = 200 train_size = len(training_set_input) - test_size training_set_output = training_set_output[:, source_id] # setup if selected_model == "EXTRA_TREE_REGRESSOR": model = ExtraTreesRegressor(criterion="mse") model_name = "EXTRA_TREE_REGRESSOR" elif selected_model == "GBRT": model = GradientBoostingRegressor(loss="lad", n_estimators=200) model_name = "GBRT" else:
def load_parsed_data(): q = query.get() data = parse_data(query=q) return data
# Copyright (c) 2019-present, HuggingFace Inc. # All rights reserved. This source code is licensed under the BSD-style license found in the LICENSE file in the root directory of this source tree. import os import math import logging import json from pprint import pformat from argparse import ArgumentParser from collections import defaultdict from itertools import chain from utils import get_dataset, get_dataset_personalities from parser import parse_data, save_as_json data_train = parse_data('train_both_original.txt') data_test = parse_data('valid_both_original.txt') data = {'train': data_train, 'valid': data_test} #save_as_json(data,'firstjson.json') with open('data_personachat_original.json', 'w') as outfile: json.dump(data, outfile, indent=4)
PPG_FS_512 = 256 # we skip a half data point that is ambiance LOW_PASS_CUTOFF = 35 HIGH_PASS_CUTOFF = 0.5 def parse_args(): p = argparse.ArgumentParser() p.add_argument('raw_data_file', nargs=1, help='Specify the raw data file') p.add_argument('annotation_file', nargs='?', help='Specify the annotation file') p.add_argument('start_data_point', nargs='?', help='Specify the start data point') p.add_argument('num_data_point', nargs='?', help='Specify the number of data point to be displayed') return p.parse_args() args = parse_args() f = open(args.raw_data_file[0]) ecg_data = parse_data(f, TYPE_ECG) # back to begining f.seek(0) ppg_data = parse_data(f, TYPE_PPG512) # Convert to numpy array ecg_data = np.array(ecg_data) ppg_data = np.array(ppg_data) print ecg_data.shape print ppg_data.shape # Slice ECG data as specified if args.start_data_point: start = int(args.start_data_point)
import parser TEST_OPTION = 'test' TRAINING_OPTION = 'training' if __name__ == "__main__": ## TRAINING DATA TR_imgs, TR_labels = parser.parse_data(TRAINING_OPTION) TR_groups = parser.group_imgs(TR_imgs, TR_labels) TR_priors = parser.get_priors(TR_groups) TR_likelyhoods = parser.get_likelyhoods(TR_groups) # # TESTING DATA TS_imgs, TS_labels = parser.parse_data(TEST_OPTION) TS_output_label, TS_posteriori = parser.classify_group( TS_imgs, TR_likelyhoods, TR_priors) stats = parser.analyse(TS_labels, TS_output_label, TS_imgs, TS_posteriori) confusion_matrix = parser.create_matrix(TS_labels, TS_output_label) parser.collect_odds_data(TR_likelyhoods) parser.find_low_high_examples(TS_imgs, TS_labels, TS_posteriori)
import pathlib from pprint import pprint import parser data = pathlib.Path("tests.log").open().read() template_path = pathlib.Path.joinpath(pathlib.Path.cwd(), "templates", "log.textfsm") parsed_data = parser.parse_data(template_path, data) pprint(parsed_data)
for staff in v: G.add_edge(staff, parent) walker(UniHiera) staff_nodes = [] org_nodes = [] labels = {} # Empty dictionary to use to list what labels should be shown for n in G.nodes( data=True ): org_nodes.append(n[0]) labels[n[0]] = n[0] staff_bucket = parser.parse_data('FinalData.txt') for key, value in staff_bucket.items(): for n in G.nodes( data=True): if key == n[0]: for user in value: G.add_edge(user, n[0]) staff_nodes.append(user) pos = nx.spring_layout(G) # Set layout type to use for positioning Nodes # Draw Nodes, Edges and Labels nx.draw_networkx_nodes(G, pos, nodelist=org_nodes, node_color='r', node_shape='s', node_size=25).set_edgecolor('w') nx.draw_networkx_nodes(G, pos, nodelist=staff_nodes,
def get_data(round_trip: bool) -> tuple: return parse_data(round_trip)
def get_data(): filename = parse_data() with open(filename) as f: data = json.load(f) return {"data": data['cities'], "stats": data['stats']}