示例#1
0
 async def startup(app: web.Application):
     with open('RS_ViaOW.xml', 'r') as file:
         all_itinerary_viaow = parse_data(file)
     with open('RS_Via-3.xml', 'r') as file:
         all_itinerary_via3 = parse_data(file)
     all_itinerary = all_itinerary_via3 + all_itinerary_viaow
     app['graph'] = generate_graph(all_itinerary)
示例#2
0
def execute(input_parameters):
    try:
        initialize()
        in_data = sys.argv
        parser.parse_data(in_data)
        source = parser.get_source()
        dest = parser.get_dest()
        valid.check_path(source)
        valid.check_path(dest)
        in_rsync_str = create_rsync_string()
        run_rsync()
    except:
        pass
示例#3
0
    def add_file_data(self, name, data):
        """Add a file data to document.

        'name' is the name of the file, 'data is the file data.

        File will not copied in to docdir until sync().
        """
        # FIXME: set mime type term

        # parse the file data into text
        text = parse_data(data)

        # generate terms from the text
        self._gen_terms(None, text)

        # set data to be text sample
        # FIXME: is this the right thing to put in the data?
        summary = text[0:997] + '...'
        self._set_data(summary)

        # FIXME: should files be renamed to something generic (0.pdf)?
        prefix = self.db._find_prefix('file')
        self._add_term(prefix, name)

        # add it to the cache to be written at sync()
        self._infiles[name] = data
示例#4
0
def main():
    parser = argparse.ArgumentParser(description='do symbolic regression')
    parser.add_argument('train_file', help='file to train')
    parser.add_argument('-i', type=int, default=100, help='number of iteration')
    parser.add_argument('-p', type=int, default=1000, help='population size')
    parser.add_argument('-v', action='store_true', help='print detail information')
    parser.add_argument('-r', action='store_true', help='recover from dump')
    args = parser.parse_args()

    if not os.path.isfile(args.train_file):
        print('main: train file not exist')
        exit(1)

    with open(args.train_file, 'r') as f:
        lines = f.readlines()
    data, label = parse_data(lines)

    load_data(data, label)
    set_config({
        'ITER_NUM': args.i,
        'POP_SIZE': args.p,
        'PRESERVE_NUM': args.i // 2,
        'CONST': [0, 2, 2.5, 3, 4, 5, 10],
        'PROB_OP': {'+': 2, '-': 2, '*': 2, '/': 2, '^': 0.5, '~': 3,
                    'abs': 0.5, 'sin': 1.5, 'cos': 1.5, 'tan': 1,
                    'asin': 0.5, 'acos': 0.5, 'atan': 0.5,
                    'sinh': 0.5, 'cosh': 0.5, 'tanh': 0.5,
                    'exp': 2, 'sqrt': 2, 'log': 2},
        'VERBOSE': args.v
    })
    result = train(args.r)

    print('MSE: %s' % result[1])
    print('EXPR (postfix): %s' % result[0].to_postfix())
    print('EXPR (infix): %s' % result[0])
示例#5
0
def weekly_update(country):

    # 500 most recent incidents pulled per page by default
    response = urllib2.urlopen(
        'https://api.acleddata.com/acled/read?country=' + country)
    json_response = json.load(response)

    if json_response['success']:
        ### Parses all data ('count' is acled response for count of incidents)
        #parse_data(json_response['data'], json_response['count'], country)

        ### Just parse first 3 incidents and add to 'test' region
        parse_data(json_response['data'], 3, 'test')

    else:
        print "Error retrieving data from acled api ..."
    return False
    def on_city_page(self, response):
        cityname = response.meta['city'].split("/")[1]

        if "amanha" in response.url:
            filename = "original_pages/{}-amanha".format(cityname)
            data = parser.parse_data(cityname, response.body, "AMANHA")
        else:
            filename = "original_pages/{}-hoje".format(cityname)
            data = parser.parse_data(cityname, response.body, "HOJE")

        with open("{}.html".format(filename), 'wb') as f:
            f.write(response.body)
        self.log('Saved file %s' % "{}.html".format(filename))

        # data = parser.parse_data(cityname,response.body)

        return data
def main():
    #parsing tagged data
    #let user know how to use the program
    try:
        parsing = (parser.parse_data("tagged_emails"))
    except:
        print(
            "Please make sure near this python file you have the following folder format:"
        )
        print("tagged_emails")
        print("Which is automatically generated by seminar_tagger.py")
        return

    print("Tagging e-mails. Please wait...")
    print("This may take some time because of the use of Word2Vec.")

    #create a folder for all txt tagged files named 'tagged_emails'
    if not os.path.exists("tagged_ontology_emails"):
        os.makedirs("tagged_ontology_emails")

    #creating the model
    model = Word2Vec.load_word2vec_format(
        '/home/projects/google-news-corpus/GoogleNews-vectors-negative300.bin',
        binary=True)

    ontology = Ontology(model)

    data = parsing[0]  #tuple type-content
    file_names = parsing[
        1]  #keep track of the name in order to write a tagged mail
    #with the same name

    for d in data:

        type_list = d[0]  # list of types
        content_list = d[1]  # list of content for each type

        if (len(type_list) == 0):
            continue
        if (find_with_pattern("[Tt]opic", type_list) == None):
            continue

        #find current topic
        topic_pos = type_list.index(find_with_pattern("[Tt]opic", type_list))
        topic_type = type_list[topic_pos]
        topic_content = content_list[topic_pos]

        ontology.add_to(topic_content, file_names[data.index(d)])

    created_ontology = ontology.get_traversal_info()
    write(created_ontology)

    print('Operation successfully')
示例#8
0
文件: test.py 项目: kapucko/dipl
def setup_test(test, filename):
    f = open(filename)
    data = f.read()
    f.close()
    gl = {}
    lc = {}
    lexer = python_lexer.PythonLexer()
    lexer.input(data)
    res = parser.parse_data(data, lexer)
    code = utils.node_to_str(res)
    test.gl = gl
    test.lc = lc
    test.code = code
    test.node = res
示例#9
0
def process(conn, addr):
    print("(info) Wait for game data...")
    buf = conn.recv(BUFFER_SIZE)

    print("(info) Parsing data...")
    data = parser.parse_data(buf)

    print("(info) Computing...")
    solver = core.Solver(data)
    x, y = solver.compute()

    print("(info) Sending back...")
    message = "{}{}".format(chr(y), chr(x))
    conn.send(message.encode("ascii"))
示例#10
0
def plot_service_perf(start_date, end_date, groupby):
	print('grpby:')
	print(groupby)
	values = parser.parse_data(start_date, end_date, '(\d{4,4}-.*T(\d.*))\+.*T(\d.*)\+.*(/rest/.*)\".*', ("{'trace': 3, 'xsrc': 0, 'ysrc': '2-1', 'ytype': 'time', 'groupby': '%s', 'agg':'avg'}" % groupby), 'service_perf')

	name_regex = r'^.*/(.*)$'

	data = []
	traces = {}
	averages = {}
	for value in values:  # each url
		x = []
		y = []
		total_sum = 0
		counter = 0
		for val in values[value]:
			x.append(val[0])  # time
			y.append(val[1])  # count
			if(type(val[1]) is float and val[1] > 0):
				total_sum += val[1]
				counter += 1
		averages[value] = total_sum / counter
		trace = Scatter(x=x, y=y, name=value, text=re.match(name_regex, value).group(1))
		traces[value] = trace

	for trace in traces.values():
		data.append(trace)

	graph = {}

	graph['info'] = 'Averages:\n' 
	print(averages)
	averages = sorted(averages.items(), key=lambda x: x[1], reverse=True) 
	for average in averages:
		graph['info'] += "{0}: {1:2.3f}\n".format(average[0], average[1]) 


	graph['group_by'] = ['Hour', 'Minute']
	
	# plot graph
	graph['graph'] = (plotly.offline.plot({
		"data": data,
		"layout": generate_layout("MyMSD -> Datapower API calls", "Date/Time", "Frequency")
	}, show_link=False, include_plotlyjs=False, output_type='div'))
	return graph
示例#11
0
def database(dirs,path):

    # ITERATING OVER FILES 

    database_ = []

    for file in dirs:
        
        files = []
        dfNH=[]
        dfSEG=[]

        # PARSING DATA FILES
        
        data = parser.parse_data(os.path.join(path,file))
        files.append(file)
        
        for i in range(len(data)):
            if data[i][0] == 'N':
                res_id = data[i][2]
                for j in range(len(data)):
                    if data[j][0] == 'H' and data[j][2] == res_id : 
                        dfNH.append(data[j] + data[i]) 
                        break
                        
        for j in range(len(data)):
            if data[j][0] == 'SD': 
                dfSEG.append(files+data[j])

        # REMOVING IRRELEVANT DATA ENTRIES

        for i in range(len(dfSEG)):
            for j in range(len(dfNH)):
                dist= geometry.distance(dfSEG[i][4],dfSEG[i][5],dfSEG[i][6],dfNH[j][3],dfNH[j][4],dfNH[j][5])
                if dist < 4.2: 
                    database_.append(dfSEG[i]+dfNH[j])


    fve = f_vector(database_,1)
    rv = [database_,fve]
    return rv
示例#12
0
def process(service, change):
    # debug
    print(change)
    # prepare png name
    file_name = change.get('file').get('name')
    png_name = os.path.splitext(file_name)[0] + '.png'
    local_png_path = os.path.join(CACHE_FOLDER, png_name)
    # download from upstream
    f = download_file(service, change.get('file').get('id'))
    # parse
    data = parse_data(f, TYPE_ECG)
    data = np.array(data)
    # filter
    filtered = data[:, 1]
    filtered = power_line_noise_filter(filtered, ECG_FS)
    filtered = high_pass_filter(filtered, ECG_FS, HIGH_PASS_CUTOFF)
    filtered = low_pass_filter(filtered, ECG_FS, LOW_PASS_CUTOFF)
    filtered = np.column_stack((data[:, 0], filtered))
    # save to png
    plot_ecg(filtered)
    plot_to_png(local_png_path)
    # upload
    upload_png(service, local_png_path, png_name)
input = numpy.asanyarray([float(i)
                          for i in sys.argv[3].split(" ")]).reshape(1, -1)
verbose = bool(sys.argv[4])

nation = nation.load_nation(nation_code)
path_model = nation.base_path_datas + nation.sources[source_id].path_model
if verbose:
    support.colored_print("Loading model...", "green")

model = joblib.load(path_model)
if verbose:
    support.colored_print("Making prediction...", "green")

output = model.predict(input)
if verbose:
    support.colored_print("Estimating error...", "green")

training_set_error_input, training_set_error_output = parser.parse_data(
    nation.base_path_datas + nation.sources[source_id].path_training_set_error)
error = knn.get_error_estimation(input[0], training_set_error_input,
                                 training_set_error_output,
                                 nation.sources[source_id].best_k,
                                 nation.sources[source_id].k_weighted)
if verbose:
    support.colored_print("Showing results...", "green")

support.colored_print(
    "Prediction: %.2lf\nRelative error (estimated): %.2lf %%" %
    (output[0], error), "blue")
support.colored_print("Completed!", "pink")
def main():

	#parsing both untagged and tagged
	#let user know how to use the program
	try:
		parsing=(parser.parse_data("nltk_data/corpora/untagged"))
		data_tagged=(parser.parse_data("nltk_data/corpora/training"))
	except:
		print("Please make sure near this python file you have the following folder format:")
		print("\"nltk_data/corpora/untagged\" and \"nltk_data/corpora/training\"")
		print("In order to access tagged and untagged emails")
		return

	print("Tagged and untagged e-mails have been read.")
	print("Tagging e-mails. Please wait...")
	#print("This might take a while because of requests to enciclopedya...")
	#set up the time,location,speaker dict from tagged
	train_sents = parser.get_training_sents(data_tagged[0])

	#create a folder for all txt tagged files named 'tagged_emails'
	if not os.path.exists("tagged_emails"):
		os.makedirs("tagged_emails")
	
	data       = parsing[0] #tuple type-content
	file_names = parsing[1] #keep track of the name in order to write a tagged mail
							#with the same name

	#first train the tagger and initialise it only once for all emails
	ner = nertagger.NerTagger(train_sents)

	for d in data:
		
		type_list    = d[0] # list of types
		content_list = d[1] # list of content for each type

		#continue if no content is found
		if(len(type_list) == 0):
			continue
		if(find_with_pattern("[Aa]bstract", type_list) == None):
			continue

		#find current abstract
		abstract_pos     = type_list.index(find_with_pattern("[Aa]bstract", type_list))
		abstract_type    = type_list[abstract_pos]
		abstract_content = content_list[abstract_pos]

		#split into paragraphs and sentences
		paragraphs = get_paragraphs(abstract_content)	
		sentences_from_paragraphs = get_sentences(paragraphs)

		#there are always useful informations in header
		#deal with them first
		stime_from_header    = get_time(type_list, content_list)[0]
		etime_from_header    = get_time(type_list, content_list)[1]
		speaker_from_header  = get_speaker(type_list, content_list, ner)
		location_from_header = get_location(type_list, content_list, ner)

		if(speaker_from_header  != None):
			speaker_from_header  = speaker_from_header.strip()
		if(location_from_header != None):
			location_from_header = location_from_header.strip()

		if(speaker_from_header == None or speaker_from_header == ""):
			speaker_from_header = try_to_find_speaker_in_abstract(abstract_content)

		#setting up the tagged corpus
		abstract_tagged = tag_paragraphs_and_sentences(sentences_from_paragraphs)
		content_list[abstract_pos] = abstract_tagged

		for content in content_list:
			if(stime_from_header != None   and stime_from_header!=""):
				tag_email(content_list, stime_from_header,   "<stime>",   "</stime>",   content)
		for content in content_list:
			if(etime_from_header != None   and etime_from_header!=""):
				tag_email(content_list, etime_from_header,   "<etime>",   "</etime>",   content)
		for content in content_list:	
			if(speaker_from_header != None and speaker_from_header != ""):
				tag_email(content_list, speaker_from_header, "<speaker>", "</speaker>", content)
		for content in content_list:	
			if(location_from_header != None and location_from_header != "" and "*" not in location_from_header): #had some troubles with * inputs
				tag_email(content_list, location_from_header, "<location>", "</location>", content)

		


		#get file name and write
		file_name = file_names[data.index(d)]
		write(file_name, type_list, content_list)

	print("Operation successful.")
	print("Tagged files should be in "+os.getcwd()+"/tagged_emails")
示例#15
0
def count_words(words):
    return parse_data(words)
示例#16
0
文件: prednaska.py 项目: kapucko/dipl
import python_lexer
import parser
import utils

f = open("samples/sample3.py")
data = f.read()
f.close()
lexer = python_lexer.PythonLexer()
lexer.input(data)
for token in lexer:
    print(token.value)
res = parser.parse_data(data, lexer)
for node in res:
    # if isinstance(node, utils.Node) and (node.kind=='classdef' or node.kind=='funcdef'):
    if isinstance(node, utils.Node) and (node.kind == "block"):
        print("@" * 50)
        print(utils.node_to_str(node))

# print(utils.node_to_str(res))
    for i in range(1, input_lines + 1):
        true_class = 1 + math.floor((i-1) / 50)
        nidx = bruteForce.nearest_neighbor(i, input_data)
        nn_class  = 1 + math.floor((nidx - 1)/50)
        if debug:
        	print('the nearest neighbor of %d is %d real classes: %d, %d' %(i, nidx, true_class, nn_class ))
        
        if  nn_class == true_class:
            corrects += 1
        else:
            wrongs += 1
    print('correct: %d incorrect: %d ' %(corrects, wrongs))  


if __name__ == '__main__':
	input_data = parser.parse_data()
	for l in [4,8,16]:
		for k in [4, 8, 16]:
			print('Using local sensitivity hashing. l= %d k= %d' %(l, k))
			init_hash_matrices(l=l, k=k)
			hash_elements(input_data, l)
			classify_articles(input_data)
			print(line_break)
			# to force output if redirecting to a file
			sys.stdout.flush()

	print('Now doing brute force')
	test_brute_force(input_data)

    # print(hash_tables[1])
示例#18
0
from parser import parse_data, load_js, dump
from urllib.request import urlopen
from splash import show_splash

if __name__ == '__main__':

    os.system('clear')
    show_splash()
    time.sleep(1)

    # Parse all given arguments and return needed values
    html, path, op_type = check_args(len(argv), argv)

    # Process link as a single post url
    if op_type == "single_post":
        base_data, type_name = parse_data(html)
        select_media(type_name, base_data, path)
        print("[*] Done!")
    # Process comprehensive dump of all media
    else:
        #
        # Use selenium to preload embedded js
        html = load_js(argv[2])

        links, dump_dir = dump(html, argv[2], path)
        for l in links:
            link_html = urlopen(l).read()
            base_data, type_name = parse_data(link_html)
            select_media(type_name, base_data, dump_dir)
        print("[*] Done!")
示例#19
0
if len(sys.argv) == 1 or sys.argv[1] == "help":
    support.colored_print(
        "Usage:\n\t-parameter 1: nation code (string)\n\t-parameter 2: source id (int)\n\t-parameter 3: verbose (bool)",
        "red")
    sys.exit(0)

nation_code = sys.argv[1]
source_id = int(sys.argv[2])
verbose = bool(sys.argv[3])

nation = nation.load_nation(nation_code)

selected_model = nation.sources[source_id].best_model
training_set_input, training_set_output = parser.parse_data(
    nation.base_path_datas +
    nation.sources[source_id].path_training_set_prediction)
test_size = 200
train_size = len(training_set_input) - test_size
training_set_output = training_set_output[:, source_id]

# setup
if selected_model == "EXTRA_TREE_REGRESSOR":
    model = ExtraTreesRegressor(criterion="mse")
    model_name = "EXTRA_TREE_REGRESSOR"

elif selected_model == "GBRT":
    model = GradientBoostingRegressor(loss="lad", n_estimators=200)
    model_name = "GBRT"

else:
示例#20
0
def load_parsed_data():
    q = query.get()
    data = parse_data(query=q)
    return data
# Copyright (c) 2019-present, HuggingFace Inc.
# All rights reserved. This source code is licensed under the BSD-style license found in the LICENSE file in the root directory of this source tree.
import os
import math
import logging
import json
from pprint import pformat
from argparse import ArgumentParser
from collections import defaultdict
from itertools import chain

from utils import get_dataset, get_dataset_personalities
from parser import parse_data, save_as_json

data_train = parse_data('train_both_original.txt')
data_test = parse_data('valid_both_original.txt')
data = {'train': data_train, 'valid': data_test}
#save_as_json(data,'firstjson.json')
with open('data_personachat_original.json', 'w') as outfile:
    json.dump(data, outfile, indent=4)
示例#22
0
PPG_FS_512 = 256 # we skip a half data point that is ambiance
LOW_PASS_CUTOFF = 35
HIGH_PASS_CUTOFF = 0.5

def parse_args():
    p = argparse.ArgumentParser()
    p.add_argument('raw_data_file', nargs=1, help='Specify the raw data file')
    p.add_argument('annotation_file', nargs='?', help='Specify the annotation file')
    p.add_argument('start_data_point', nargs='?', help='Specify the start data point')
    p.add_argument('num_data_point', nargs='?', help='Specify the number of data point to be displayed')
    return p.parse_args()

args = parse_args()

f = open(args.raw_data_file[0])
ecg_data = parse_data(f, TYPE_ECG)

# back to begining
f.seek(0)
ppg_data = parse_data(f, TYPE_PPG512)

# Convert to numpy array
ecg_data = np.array(ecg_data)
ppg_data = np.array(ppg_data)

print ecg_data.shape
print ppg_data.shape

# Slice ECG data as specified
if args.start_data_point:
    start = int(args.start_data_point)
示例#23
0
import parser

TEST_OPTION = 'test'
TRAINING_OPTION = 'training'

if __name__ == "__main__":
    ## TRAINING DATA
    TR_imgs, TR_labels = parser.parse_data(TRAINING_OPTION)
    TR_groups = parser.group_imgs(TR_imgs, TR_labels)
    TR_priors = parser.get_priors(TR_groups)
    TR_likelyhoods = parser.get_likelyhoods(TR_groups)

    # # TESTING DATA
    TS_imgs, TS_labels = parser.parse_data(TEST_OPTION)
    TS_output_label, TS_posteriori = parser.classify_group(
        TS_imgs, TR_likelyhoods, TR_priors)
    stats = parser.analyse(TS_labels, TS_output_label, TS_imgs, TS_posteriori)
    confusion_matrix = parser.create_matrix(TS_labels, TS_output_label)
    parser.collect_odds_data(TR_likelyhoods)
    parser.find_low_high_examples(TS_imgs, TS_labels, TS_posteriori)
示例#24
0
import pathlib
from pprint import pprint

import parser

data = pathlib.Path("tests.log").open().read()
template_path = pathlib.Path.joinpath(pathlib.Path.cwd(), "templates", "log.textfsm")

parsed_data = parser.parse_data(template_path, data)

pprint(parsed_data)
示例#25
0
					for staff in v:
						G.add_edge(staff, parent)



walker(UniHiera)

staff_nodes = []
org_nodes = []
labels = {} # Empty dictionary to use to list what labels should be shown

for n in G.nodes( data=True ):
		org_nodes.append(n[0])
		labels[n[0]] = n[0]

staff_bucket = parser.parse_data('FinalData.txt')

for key, value in staff_bucket.items():
	for n in G.nodes( data=True):
		if key == n[0]:
			for user in value:
				G.add_edge(user, n[0])
				staff_nodes.append(user)

pos = nx.spring_layout(G) # Set layout type to use for positioning Nodes

# Draw Nodes, Edges and Labels
nx.draw_networkx_nodes(G, pos, nodelist=org_nodes,
                       node_color='r', node_shape='s',
                       node_size=25).set_edgecolor('w')
nx.draw_networkx_nodes(G, pos, nodelist=staff_nodes,
示例#26
0
def get_data(round_trip: bool) -> tuple:
    return parse_data(round_trip)
示例#27
0
def get_data():
    filename = parse_data()
    with open(filename) as f:
        data = json.load(f)
    return {"data": data['cities'], "stats": data['stats']}