def test_bad_data(self): data_lines = \ [ 'Temperature1: 56', 'Temperature2=54 ', 'Temperature3=53', 'Temperature3=52', 'Temperature1=666', 'Temperature1', 'Error', 'Temperature1=666degrees', 'Temperature4=No data', ] expected_result = \ { 1: [ 666 ], 2: [ 54 ], 3: [ 53, 52 ], } parser = DataParser() for line in data_lines: parser.parse(line) parsing_results = parser.get_results() self.assertEqual(expected_result, parsing_results)
def test_good_data(self): data_lines = \ [ 'Temperature1=56', 'Temperature2=54 ', 'Temperature3=53', 'Temperature3=52', 'Temperature1=666', ] expected_result = \ { 1: [ 56, 666 ], 2: [ 54 ], 3: [ 53, 52 ], } parser = DataParser() for line in data_lines: parser.parse(line) parsing_results = parser.get_results() self.assertEqual(expected_result, parsing_results)
def test_DataParser_parse_sample(self): data_parser = DataParser(' ') for line in self._link: try: """ generate results """ dicom_file_path = os.path.join(self._data_path, line) contour_file_path = os.path.join(self._data_path, self._link[line]) shape, img, mask, contour = data_parser.parse_sample( dicom_file_path, contour_file_path) np.savez(os.path.join(self._result_path, os.path.splitext(line)[0]) + '.npz', shape=shape, img=img, mask=mask, contour=contour) """ Load baseline and do the comparison """ baseline = np.load( os.path.join(self._baseline_path, os.path.splitext(line)[0]) + '.npz') equal1 = np.array_equal(baseline['shape'], shape) equal2 = np.array_equal(baseline['img'], img) equal3 = np.array_equal(baseline['mask'], mask) equal4 = np.array_equal(baseline['contour'], contour) if (not (equal1 and equal2 and equal3 and equal4)): return False except: return False return True
def main(): threshold = 60 second_threshold = 10 stream = SensorStream() CONTINUOUS_INCREMENT = False TCP_IP = '192.168.43.222' TCP_PORT = 80 BUFFER_SIZE = 1024 sleep_time = 0.05 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect((TCP_IP, TCP_PORT)) parser = DataParser(acc_unit=100, gy_unit=128) mov = 0 rotating = False while True: raw_bytestream = s.recv(BUFFER_SIZE) parser.parse_data(raw_bytestream, stream) x,y,z,gX,gY,gZ = stream.getValues() print x,y,z,gX,gY,gZ time.sleep(sleep_time)
def test_04_parser_parse_raw_data(self): input = "empid=D011\ngender=M\nage=29" parser = DataParser() parser.parse_raw_data(input) expected = [{'empid': 'D011', 'gender': 'M', 'age': '29'}] actual = parser.get_data() self.assertEqual(expected, actual)
def test_parse_unicode_data(self): file_format = FileFormat.from_csv('tests/specs/simple_format.csv') parser = DataParser('tests/data/simple_format_2015-06-28-unicode.txt', file_format) self.assertEquals( [[['Foonyor', 1, 1], [u'Barzàne'.encode('utf-8'), 0, -12]]], list(parser.gen_data()) )
def main(args): data_parser = DataParser(data_prefix=args.data_prefix, images_dirpath=args.images_dirpath, masks_dirpath=args.masks_dirpath, img_masks_filepath=args.img_masks_filepath, contours_type=args.contours_type, logs_prefix=args.logs_prefix, visualize_contours=args.visualize_contours) data_parser.parse()
def get_logs(): dp = DataParser(constants.log_file_path) mentions_by_ticker = dp.mentions_by_ticker() # summarize by user clustered_messages = dp.messages_by_user('JohnArtman', start_date=datetime(2018, 6, 1), end_date=datetime(2019, 1, 1)) summarized_messages = get_summary(clustered_messages, 5) print(summarized_messages)
def main(): if args.filename is None: parser.print_help() sys.exit(1) data_parser = DataParser(args.filename) jobs, machines, tasks = data_parser.get_instance_parameters() instance = Instance('Roxanne', machines, jobs, tasks) instance.print_info() instance.generate_best_cmax() instance.johnsons_algorithm()
def load_data(self, idir): # loop via all files in the source dir files = os.listdir(idir) for f in files: try: dParse = DataParser(idir + "/" + f, analyse=0, compact=1) self.flow_cnt += dParse.lines_cnt except: print("Error: failued to parse file", (idir + f)) self.errors += 1 continue # binary classification label # 1 - correct traffic # 0 - anomaly traffic #label = 1 #int(f.split("-")[-1].split(".")[0]) #tmpTLS = dParse.getTLSInfo() # Features extraction #tmpBD, tmpBDL = dParse.getByteDistribution() tmpIPT = dParse.getIndividualFlowIPTs() tmpPL = dParse.getIndividualFlowPacketLengths() tmp = dParse.getIndividualFlowMetadata(PKTS=0, BYTES=0, FLOW_TIME=0, WHT=1, BYTE_DIST_M=0, BYTE_DIST_S=1, ENTROPY=0, IDP=1) if tmpPL != None: # and tmpPL != None and tmpIPT != None: # iterate over every flow for i in range(len(tmpPL)): tmp_data = [] tmp_data.extend(tmp[i]) tmp_data.extend(tmpPL[i]) #tmp_data.extend(tmpIPT[i]) ##tmp_data.extend(tmpBD[i]) #tmp_data.extend(tmpBDL[i]) #print("FlowMetadata",tmp[i]) # print("PacketLenghts",tmpPL[i]) # print("IndividualFlowIPT",tmpIPT[i]) #print("bd",list(tmpBD[i])) # tmp_data.extend(tmpTLS[i]) if self.features_cnt == 0: self.features_cnt = len(tmp_data) self.data.append(tmp_data) #print(self.data[i]) #print("final data length and sum",len(self.data[i]),sum(self.data[i])) self.labels.append(self.label)
def __init__(self, plot_painter, params, port_settings=DEFAULT_PORT_SETTINGS): super().__init__() self.data_parser = DataParser(params) self.plot_painter = plot_painter self.worker = SerialWorker(port_settings) self.thread = QThread() self.worker.moveToThread(self.thread) self.worker.read_data_signal.connect(self.add_data) self.signal_start_background_job.connect(self.worker.run)
def main(_): feat_dict = FeatureDictionary() print("feature_size: %d" % feat_dict.feature_size) print("field_size: %d" % feat_dict.field_size) print(feat_dict.col2feat_id.keys()) dataparser = DataParser(feat_dict, FLAGS.label) train_ids, train_vals, train_labels = dataparser.parse(infile="%s\\train_sample.csv" % FLAGS.data_dir) print("len of train: %d" % len(train_ids)) test_ids, test_vals, test_labels = dataparser.parse(infile="%s\\test_sample.csv" % FLAGS.data_dir) print("len of test: %d" % len(test_ids)) # ------bulid Tasks------ model_params = { "field_size": feat_dict.field_size, "feature_size": feat_dict.feature_size, "embedding_size": FLAGS.embedding_size, "learning_rate": FLAGS.learning_rate, "l2_reg": FLAGS.l2_reg, "deep_layers": FLAGS.deep_layers, "dropout": FLAGS.dropout, "experts_num": 3, "experts_units": 32, "use_experts_bias": True, "use_gate_bias": True } print(model_params) DeepFM = build_model_estimator(model_params) # DeepFM = tf.contrib.estimator.add_metrics(DeepFM, my_auc) if FLAGS.task_type == 'train': train_spec = tf.estimator.TrainSpec(input_fn=lambda: input_fn(train_ids, train_vals, train_labels, num_epochs=FLAGS.num_epochs, batch_size=FLAGS.batch_size)) eval_spec = tf.estimator.EvalSpec(input_fn=lambda: input_fn(test_ids, test_vals, test_labels, num_epochs=1, batch_size=FLAGS.batch_size), steps=None, start_delay_secs=1000, throttle_secs=1200) tf.estimator.train_and_evaluate(DeepFM, train_spec, eval_spec) results = DeepFM.evaluate( input_fn=lambda: input_fn(test_ids, test_vals, test_labels, num_epochs=1, batch_size=FLAGS.batch_size)) for key in results: log.info("%s : %s" % (key, results[key])) elif FLAGS.task_type == 'eval': results = DeepFM.evaluate(input_fn=lambda: input_fn(test_ids, test_vals, test_labels, num_epochs=1, batch_size=FLAGS.batch_size)) for key in results: log.info("%s : %s" % (key, results[key])) elif FLAGS.task_type == 'infer': preds = DeepFM.predict(input_fn=lambda: input_fn(test_ids, test_vals, test_labels, num_epochs=1, batch_size=FLAGS.batch_size), predict_keys="prob") with open(FLAGS.data_dir+"/pred.txt", "w") as fo: for prob in preds: fo.write("%f\n" % (prob['prob']))
def predict(self, X): #print X data_parser = DataParser() Xtrain = pd.Series(X) input_texts = [] for input_text in Xtrain: input_texts.append(data_parser.split(input_text.decode('utf-8'))) count_vectorizer = CountVectorizer( vocabulary=self.vocabulary # 学習時の vocabulary を指定する ) feature_vectors = count_vectorizer.fit_transform(input_texts) return self.estimator.predict(feature_vectors)
def test_parse_dados(self): self.assertEqual(len(Professor.instances.values()), 0) self.assertEqual(len(Turma.instances.values()), 0) self.assertEqual(len(Materia.instances.values()), 0) self.assertEqual(len(Vertice.instances), 0) dt = DataParser("data/Escola_A.xlsx") dt.parse_dados() self.assertEqual(len(Professor.instances.values()), 28) self.assertEqual(len(Turma.instances.values()), 11) self.assertEqual(len(Materia.instances.values()), 11) self.assertEqual(len(Vertice.instances), 302)
def test_parse_dados_professor(self): self.assertEqual(len(Professor.instances.values()), 0) self.assertEqual(len(Vertice.instances), 0) dt = DataParser("data/Escola_A.xlsx") dt.parse_dados() self.assertEqual(len(Professor.instances.values()), 28) self.assertEqual(len(Vertice.instances), 302) for instance in Professor.instances.values(): lista_vertices = [vertice for vertice in Vertice.instances if vertice.professor == instance] self.assertEqual(lista_vertices, instance.vertices)
def add_deadline(data=os.path.dirname(__file__) + "/data.json"): credentials = get_credentials() http = credentials.authorize(httplib2.Http()) service = discovery.build('calendar', 'v3', http=http) my_data = DataParser() deadline_dict = my_data.json_to_dict(data) for index, deadline in deadline_dict.items(): events = my_data.gen_event(deadline) for event in events: if not is_duplicate_event(service, event): create_event(service, event)
def test_parse_dados_materia(self): self.assertEqual(len(Materia.instances.values()), 0) self.assertEqual(len(Vertice.instances), 0) dt = DataParser("data/Escola_A.xlsx") dt.parse_dados() self.assertEqual(len(Materia.instances.values()), 11) self.assertEqual(len(Vertice.instances), 302) for instance in Materia.instances.values(): lista_vertices = [vertice for vertice in Vertice.instances if vertice.materia == instance] self.assertEqual(lista_vertices, instance.vertices)
def test_parse_configuracoes(self): self.assertEqual(len(Hora.instances.values()), 0) self.assertEqual(len(Horario.instances.values()), 0) dt = DataParser("data/Escola_A.xlsx") dt.parse_configuracoes() self.assertEqual(len(Hora.instances.values()), 6) self.assertEqual(len(Horario.instances.values()), 30) lista_cores = [horario.cor for horario in Horario.instances.values()] for cor in range(30): self.assertIn(cor, lista_cores)
def test_parse_restricoes_professor(self): self.assertEqual(len(Hora.instances.values()), 0) self.assertEqual(len(Horario.instances.values()), 0) self.assertEqual(len(Professor.instances.values()), 0) dt = DataParser("data/Escola_A.xlsx") dt.parse_dados() dt.parse_configuracoes() dt.parse_restricoes_professor() self.assertEqual(len(Hora.instances.values()), 6) self.assertEqual(len(Horario.instances.values()), 30) self.assertEqual(len(Professor.instances.values()), 28) professor_1 = Professor('Professor 1') self.assertEqual(len(professor_1.restricoes), 1) self.assertIn(Horario.get(Horario.construir_identificador('Terça', Hora('10:40:00'))), professor_1.restricoes) professor_2 = Professor('Professor 2') self.assertEqual(len(professor_2.restricoes), 2) self.assertIn(Horario.get(Horario.construir_identificador('Segunda', Hora('11:30:00'))), professor_2.restricoes) self.assertIn(Horario.get(Horario.construir_identificador('Segunda', Hora('07:00:00'))), professor_2.restricoes)
def main(): terminal_command = sys.argv[1:] terminal_parser = TerminalParser() terminal_parser.add_argument('-i', '--input', default=[], nargs='+') terminal_parser.add_argument('-o', '--output', default=[], nargs='+') input_files = terminal_parser.get_list_of_input_files(terminal_command) output_files = terminal_parser.get_list_of_output_files(terminal_command) for file_name in input_files: data_parser = DataParser(file_name) file_extension = data_parser.extract_extension_from_filename() print(file_extension)
def test_parse_restricoes_turma(self): self.assertEqual(len(Hora.instances.values()), 0) self.assertEqual(len(Horario.instances.values()), 0) self.assertEqual(len(Turma.instances.values()), 0) dt = DataParser("data/Escola_A.xlsx") dt.parse_dados() dt.parse_configuracoes() dt.parse_restricoes_turma() self.assertEqual(len(Hora.instances.values()), 6) self.assertEqual(len(Horario.instances.values()), 30) self.assertEqual(len(Turma.instances.values()), 11) turma_1 = Turma('1') self.assertEqual(len(turma_1.restricoes), 2) self.assertIn(Horario.get(Horario.construir_identificador('Terça', Hora('11:30:00'))), turma_1.restricoes) self.assertIn(Horario.get(Horario.construir_identificador('Segunda', Hora('11:30:00'))), turma_1.restricoes) turma_8 = Turma('8') self.assertEqual(len(turma_8.restricoes), 3) self.assertIn(Horario.get(Horario.construir_identificador('Quarta', Hora('11:30:00'))), turma_8.restricoes) self.assertIn(Horario.get(Horario.construir_identificador('Segunda', Hora('11:30:00'))), turma_8.restricoes) self.assertIn(Horario.get(Horario.construir_identificador('Quarta', Hora('10:40:00'))), turma_8.restricoes)
class LaunchFunctor: def __init__(self, data_dir, initial_time, time_step, max_chunk_size): self.data_parser = DataParser(data_dir, initial_time, time_step, max_chunk_size) def __call__(self, task): chunk_ind = task.chunk block_ind = task.block surf_ind = task.surface observer = task.observer surf_info = self.data_parser.parse_geometry_file(surf_ind, block_ind) gdf_info = self.data_parser.parse_data_file_chunk(surf_ind, block_ind, chunk_ind) start_time = self.data_parser.get_time_offset(surf_ind, block_ind, chunk_ind) + self.data_parser.initial_time return {'start_time': start_time, 'observer': observer, 'gdf_info': gdf_info, 'surf_info': surf_info}
def __init__(self, config, stack_players=None): self.config = config self.dataParser = DataParser(config) self.constrainedModel = ConstrainedModel(self.config, self.dataParser, stack_players) self.backtester = Backtester(self.config, self.dataParser, self.constrainedModel)
async def crash_info(request): ''' crash info parses the crash record and returns a JSON object ''' log.info("Handling '/api/v1/getCrashInfo'") angle, max_force_offset, _, _, _ = DataParser().parse_input_data( request.body.decode('utf8')) return json({'impactAngle': angle, 'offsetMaximumForce': max_force_offset})
class QLaunchFunctor: def __init__(self, data_dir, initial_time, time_step, max_chunk_size): self.data_parser = DataParser(data_dir, initial_time, time_step, max_chunk_size) def __call__(self, task_seq, queue): grouped_task_seq = task_seq['observer'].groupby([task_seq.block, task_seq.chunk, task_seq.surface]) for (block_ind, chunk_ind, surf_ind), observer_series in grouped_task_seq: surf_info = self.data_parser.parse_geometry_file(surf_ind, block_ind) gdf_info = self.data_parser.parse_data_file_chunk(surf_ind, block_ind, chunk_ind) start_time = self.data_parser.get_time_offset(surf_ind, block_ind, chunk_ind) + \ self.data_parser.initial_time for observer in observer_series: task = {'block_ind': block_ind, 'surf_ind': surf_ind, 'chunk_ind': chunk_ind, 'start_time': start_time, 'observer': observer, 'gdf_info': gdf_info, 'surf_info': surf_info} queue.put(task)
def load_family_details(self, pheno_covar): """Load family data updating the pheno_covar with family ids found. :param pheno_covar: Phenotype/covariate object :return: None """ file = open(self.fam_details) header = file.readline() format = file.readline() self.file_index = 0 mask_components = [] # 1s indicate an individual is to be masked out for line in file: words = line.strip().split() indid = ":".join(words[0:2]) if DataParser.valid_indid(indid): mask_components.append(0) sex = int(words[5]) pheno = float(words[6]) pheno_covar.add_subject(indid, sex, pheno) else: mask_components.append(1) mask_components = numpy.array(mask_components) self.ind_mask = numpy.zeros(len(mask_components) * 2, dtype=numpy.int8).reshape(-1, 2) self.ind_mask[0:, 0] = mask_components self.ind_mask[0:, 1] = mask_components self.ind_count = self.ind_mask.shape[0] pheno_covar.freeze_subjects()
def scraper(i): tag_links = cPickle.load(open('../data/tag_names_and_links{0}.p'.format(i), 'rb')) count_tag = 0 for tag_name, init_tag_link in tag_links: count_tag += 1 tag_data = [] count_page = 0 # print str(i) + ' time: ' + str(int(time.time() - start_time) / 60) + ' min' print str(i) + ' started tag {0}/{1} : {2}'.format(count_tag, len(tag_links), tag_name) while True: count_page += 1 tag_link = init_tag_link + str(count_page) if count_page != 1 else init_tag_link tag_page = urlopen(tag_link).read() question_links = map(lambda tail: 'http://pravoved.ru' + tail, re.findall(re.compile('(?<=target="_blank" href=")(.+?)(?=")', flags=re.DOTALL), tag_page)) if not question_links: break for question_link in question_links: tag_data.append([tag_name] + DataParser().get_data(question_link)) # header, question, answers, additions = DataParser().get_data(question_link) # print "HEADER\n" + header # print "QUESTION\n" + question # print "ANSWERS\n" + answers # print "ADDITIONS\n" + additions pd.DataFrame(tag_data, columns=['tag', 'header', 'question', 'answers', 'additions'])\ .to_csv('../data/{0}-{1}extra.csv'.format(i, count_tag), sep='\t', index=False, encoding='utf-8') print str(i) + ' ENDED'
def __init__(self, config, stacks_finder): self.config = config self.data_parser = DataParser(self.config) self.stacks = stacks_finder.find_stacks() self.solution_queue = mp.Queue() self.num_lineups = self.modify_num_lineups(config) self.lineups = []
def setUp(self): self.parser = DataParser() self.cmd_view = CmdView() self.file_reader = FileReader() self.validator = Validator() self.db = Database("test.db") self.vis = Visualiser() self.val = Validator() self.serial = Serializer() self.controller = Controller(self.cmd_view, self.file_reader, self.parser, self.validator, self.db, self.vis, self.serial) self.init()
def __init__(self, filename, mode='file'): if mode == 'file': parser = DataParser(filename) self.data = parser.data self.headers = parser.headers self.theta_0 = 0.0 self.theta_1 = 0.0 self.prev_mse = 0.0
def test_DataParser_save_sample(self): data_parser = DataParser(' ') data_parser.set_result_path(self._result_path) for line in self._link: try: """ generate results """ dicom_file_path = os.path.join(self._data_path, line) contour_file_path = os.path.join(self._data_path, self._link[line]) shape, img, mask, contour = data_parser.parse_sample( dicom_file_path, contour_file_path) data_parser.save_sample( os.path.splitext(line)[0], shape, img, mask, contour) """ Load baseline and do the comparison """ with h5py.File(os.path.join(self._baseline_path, 'data.h5'), 'r') as hf: sample = hf[os.path.splitext(line)[0]] equal1 = np.array_equal(np.array(sample['shape']), shape) equal2 = np.array_equal(np.array(sample['img']), img) equal3 = np.array_equal(np.array(sample['mask']), mask) if (not (equal1 and equal2 and equal3)): return False except: return False return True
def core(coin: str, fiat: str, amount: float, reverse: bool, clipboard: bool, verbose: bool, timer: bool, wordform: bool, no_cache: bool): """ Converts a cryptocurrency amount to a fiat equivalent or vice-versa Valid fiat currency values are: "AUD", "BRL", "CAD", "CHF", "CLP", "CNY", "CZK", "DKK", "EUR", "GBP", "HKD", "HUF", "IDR", "ILS", "INR", "JPY", "KRW", "MXN","MYR", "NOK", "NZD", "PHP", "PKR", "PLN", "RUB", "SEK", "SGD", "THB", "TRY","TWD", "ZAR" Valid cryptocurrency values are: "BTC", "ETH" "XRP", "LTC", and "BCH" `c2f btc usd 50` Translates to: What is 50 BTC worth in USD? `c2f btc usd 50 --reverse` Translates to: What is $50 USD worth in BTC? """ util_setup(logger, timer, enabled_=True) with MeasureBlockTime("Main block"): if verbose: logzero.loglevel(logging.DEBUG) data_parser = DataParser(no_cache=no_cache) fiat = fiat.upper() coin = coin.upper() try: if reverse: if wordform: click.echo(f"What is ${amount:.2f} {fiat} worth in {coin}?") quantity = data_parser.convert_to_crypto(fiat.upper(), coin.upper(), amount) formatted_quantity = "{:.8f}".format(quantity) print(formatted_quantity) else: if wordform: click.echo(f"How much is {amount} {coin} in {fiat}?") quantity = data_parser.convert_to_fiat(fiat.upper(), coin.upper(), amount) formatted_quantity = "{:,.2f}".format(quantity) print(formatted_quantity) if clipboard: pyperclip.copy(formatted_quantity) sh.notify_send(APP_NAME, f"{formatted_quantity} copied to clipboard") except Exception as e: logger.exception(e)
def test_parse_data(self): file_format = FileFormat.from_csv('tests/specs/simple_format.csv') parser = DataParser('tests/data/simple_format_2015-06-28.txt', file_format) self.assertEquals( [[['Foonyor', 1, 1], ['Barzane', 0, -12], ['Quuxitude', 1, 103]]], list(parser.gen_data()) ) self.assertEquals( [[['Foonyor', 1, 1], ['Barzane', 0, -12]], [['Quuxitude', 1, 103]]], list(parser.gen_data(chunk_size=2)) ) self.assertEquals( [[['Foonyor', 1, 1]], [['Barzane', 0, -12]], [['Quuxitude', 1, 103]]], list(parser.gen_data(chunk_size=1)) )
def load_data(self, idir, label, max_files): files = os.listdir(idir) num_files = 0 for f in files: try: dParse = DataParser(idir + f,self.compact) except: print idir + f print 'fail' continue num_files += 1 tmpTLS = dParse.getTLSInfo() if self.bd_compact == 1: tmpBD = dParse.getByteDistribution_compact() elif self.bd_compact == 2: tmpBD = dParse.getByteDistribution_mean_std() else: tmpBD = dParse.getByteDistribution() tmpIPT = dParse.getIndividualFlowIPTs() tmpPL = dParse.getIndividualFlowPacketLengths() tmp, ignore = dParse.getIndividualFlowMetadata() if tmp != None and tmpPL != None and tmpIPT != None: for i in range(len(tmp)): if ignore[i] == 1 and label == 1.0: continue tmp_data = [] if 0 in self.types: tmp_data.extend(tmp[i]) if 1 in self.types: tmp_data.extend(tmpPL[i]) if 2 in self.types: tmp_data.extend(tmpIPT[i]) if 3 in self.types: tmp_data.extend(tmpBD[i]) if 4 in self.types: tmp_data.extend(tmpTLS[i]) if len(tmp_data) != self.num_params: print len(tmp_data) self.data.append(tmp_data) for i in range(len(tmp)): if ignore[i] == 1 and label == 1.0: continue self.labels.append(label) if max_files != None and num_files >= max_files: break
def predict(self, X): #print X data_parser = DataParser() Xtrain = pd.Series(X) input_texts = [] for input_text in Xtrain: input_texts.append(data_parser.split(input_text[1].decode('utf-8'))) print input_texts count_vectorizer = CountVectorizer( vocabulary=self.vocabulary # 学習時の vocabulary を指定する ) feature_vectors = count_vectorizer.fit_transform(input_texts) # featureにcategory_idを追加 features_array = feature_vectors.toarray() category_ids = np.array(X)[:, 0].T features_array = np.c_[category_ids, features_array] return self.estimator.predict(features_array)
def test_saving(self): data_lines = \ [ 'Temperature1: 56', 'Temperature2=54 ', 'Temperature3=53', 'Temperature3=52', 'Temperature1=666', 'Temperature1', 'Error', 'Temperature1=666degrees', 'Temperature4=No data', 'Temperature1=54', 'Temperature1=55', 'Temperature4=40', 'Temperature3=55', ] parser = DataParser() for line in data_lines: parser.parse(line) parser.save_results('mixed_json_results.json') self.assertTrue( filecmp.cmp('mixed_json_results.json', 'json_files//mixed_json_results.json'), 'Files are different') os.remove('mixed_json_results.json')
assert False, 'no activation function' def cost_func(y, y_hat, batch_size, choice='euclidean'): if choice == 'euclidean': return T.sum((y-y_hat)**2) / batch_size else: assert False, 'no cost function' t_start = time.time() sample_file = 'mfcc/train-1.ark' test_file = 'mfcc/test.ark' label_file = 'label/train-1.lab' label_map_file = 'phones/48_39.map' DataParser.load(sample_file, label_file, label_map_file) #DataParser.test() dim_x = DataParser.dimension_x dim_y_hat = DataParser.dimension_y batch_size = 10 neuron_num = 64 epoch_cycle = 2 learning_rate = 0.1 lr = theano.shared(learning_rate) adagrad_t = theano.shared(0) # e.g. matrix 3*2 dot matrix 2*1 = matrix 3*1 # [[1., 3.], [2., 2.], [3.,1.]] dot [[2], [1]] = [[5.], [6.], [7.]] x = T.matrix('input', dtype='float64') # matrix of dim_x * batch_size y_hat = T.matrix('reference', dtype='float64') # matrix of dim_y_hat * batch_size
import itertools from data_parser import DataParser from database_connector import SimpleDatabaseConnector from file_formats import FilenamePrefixFormatSelector parser = argparse.ArgumentParser(description='Process data file and store to SQL.') parser.add_argument('db_url', help='Database url') parser.add_argument('table_name', help='Database table to create') parser.add_argument('data_file', help='Data file to dump') args = parser.parse_args() # init format_selector = FilenamePrefixFormatSelector.from_directory('specs/') format = format_selector.get_format(args.data_file) data_parser = DataParser(args.data_file, format) db_connector = SimpleDatabaseConnector(args.db_url) print 'Creating table ...' db_connector.create_table(args.table_name, format) # TODO rollback on failure? print 'Insertion rows ...' total_inserted = 0 for rows in data_parser.gen_data(): db_connector.insert_rows(rows) total_inserted += len(rows) print '%d rows inserted' % total_inserted print 'Completed'
def devices(): global flows global data global metadata global count_flocap global classifiers_to_display global classifier_names classifiers_to_display = [] classifier_names = [] display_fields = OrderedDict({}) config_file = 'laui.cfg' fp = open(config_file,'r') for line in fp: if line.startswith('display_field'): tokens = line.split() display_fields[int(tokens[3])] = (tokens[1],tokens[2].replace('_',' ')) continue elif line.strip() == '' or line.startswith('#') or not line.startswith('classifier'): continue tokens = line.split() if tokens[2] == 'logreg': classifiers_to_display.append((tokens[1], tokens[2], tokens[3], tokens[4])) classifier_names.append(tokens[1]) elif tokens[2] == 'mapping': tmp_map = {} with open(tokens[4],'r') as fp2: for line2 in fp2: tokens2 = line2.split() tmp_map[tokens2[0]] = float(tokens2[1]) classifiers_to_display.append((tokens[1], tokens[2], tmp_map, int(tokens[3]))) classifier_names.append(tokens[1]) fp.close() subnet = '10.0.2.' devices_ = {} file_names = [] is_upload = False if request.files.get('upload') != None: upload = request.files.get('upload') dir_name = tempfile.mkdtemp() upload.save(dir_name + 'temp.json') file_names.append(dir_name+'temp.json') is_upload = True else: tmp_files = get_files_by_time(out_dir) tmp_files.reverse() if len(tmp_files) > 0: file_names.append(out_dir+tmp_files[0]) if len(tmp_files) > 1: file_names.append(out_dir+tmp_files[1]) if len(tmp_files) > 2: file_names.append(out_dir+tmp_files[2]) if len(tmp_files) > 3: file_names.append(out_dir+tmp_files[3]) if len(tmp_files) > 4: file_names.append(out_dir+tmp_files[4]) if len(tmp_files) > 5: file_names.append(out_dir+tmp_files[5]) start_time = time.time() data = [] metadata = [] total_flows = 0 for f in file_names: try: # just a robustness check parser = DataParser(f) tmpBD = parser.getByteDistribution() tmpIPT = parser.getIndividualFlowIPTs() tmpPL = parser.getIndividualFlowPacketLengths() tmp,tmp_m = parser.getIndividualFlowMetadata() except: continue # flows += parser.advancedInfo if parser.advancedInfo == None: continue for k in parser.advancedInfo: flows[k] = parser.advancedInfo[k] if tmp != None and tmpPL != None and tmpIPT != None: for i in range(len(tmp)): # if not parser.flows['appflows'][i]['flow']['sa'].startswith(subnet) and \ # not parser.flows['appflows'][i]['flow']['da'].startswith(subnet): # continue tmp_id = '' if tmp_m[len(tmp)-i-1][0].startswith(subnet): tmp_id = tmp_m[len(tmp)-i-1][0] elif tmp_m[len(tmp)-i-1][1].startswith(subnet): tmp_id = tmp_m[len(tmp)-i-1][1] else: continue tmp_data = [] tmp_data.extend(tmp[len(tmp)-i-1]) tmp_data.extend(tmpPL[len(tmp)-i-1]) tmp_data.extend(tmpIPT[len(tmp)-i-1]) tmp_data.extend(tmpBD[len(tmp)-i-1]) data.append(tmp_data) metadata.append(tmp_m[len(tmp)-i-1]) total_flows += 1 if total_flows == count_flocap*2 and not is_upload: break if total_flows == count_flocap*2 and not is_upload: break if request.files.get('upload') != None: os.removedirs(dir_name) results = classify_samples(data, metadata) tmp = {} to_display = [] to_display_names = [] for key in display_fields: to_display_names.append(display_fields[key]) for i in range(len(results)): color = [] for j in range(len(results[i])): color.append(get_color(results[i][j])) tmp_id = '' if metadata[i][0].startswith(subnet): tmp_id = metadata[i][0] elif metadata[i][1].startswith(subnet): tmp_id = metadata[i][1] else: continue tmp_to_display = [] for key in display_fields: tmp_to_display.append(metadata[i][key]) if tmp_id not in devices_: devices_[tmp_id] = [0,0,0,0,0,0,0,0,0,0,0,0,0,0] tmp[tmp_id] = [] devices_[tmp_id][0] += 1 # total flows if metadata[i][9] in ciphers: (name_, rec_) = ciphers[metadata[i][9]] if rec_ == 'RECOMMENDED': devices_[tmp_id][1] += 1 elif rec_ == 'LEGACY': devices_[tmp_id][2] += 1 elif rec_ == 'AVOID': devices_[tmp_id][3] += 1 if metadata[i][10] != -1: devices_[tmp_id][metadata[i][12]+4] += 1 tmp[tmp_id].append((results[i],metadata[i][0],metadata[i][1],metadata[i][2],metadata[i][3],metadata[i][4],metadata[i][5],metadata[i][6],metadata[i][7],color,'','',metadata[i][8],tmp_to_display)) return template('devices',devices=devices_,subnet=subnet+'*',results=tmp,num_flows=len(results),classifier_names=classifier_names, to_display_names=to_display_names)
def results(): #def results(data={}): global flows global data global metadata global count_flocap global classifiers_to_display global classifier_names classifiers_to_display = [] classifier_names = [] display_fields = OrderedDict({}) config_file = 'laui.cfg' fp = open(config_file,'r') for line in fp: if line.startswith('display_field'): tokens = line.split() display_fields[int(tokens[3])] = (tokens[1],tokens[2].replace('_',' ')) continue elif line.strip() == '' or line.startswith('#') or not line.startswith('classifier'): continue tokens = line.split() if tokens[2] == 'logreg': classifiers_to_display.append((tokens[1], tokens[2], tokens[3], tokens[4])) classifier_names.append(tokens[1]) elif tokens[2] == 'mapping': tmp_map = {} with open(tokens[4],'r') as fp2: for line2 in fp2: tokens2 = line2.split() tmp_map[tokens2[0]] = float(tokens2[1]) classifiers_to_display.append((tokens[1], tokens[2], tmp_map, int(tokens[3]))) classifier_names.append(tokens[1]) fp.close() file_names = [] is_upload = False if request.files.get('upload') != None: # if False: upload = request.files.get('upload') dir_name = tempfile.mkdtemp() upload.save(dir_name + 'temp.json') file_names.append(dir_name+'temp.json') is_upload = True else: tmp_files = get_files_by_time(out_dir) tmp_files.reverse() if len(tmp_files) > 0: file_names.append(out_dir+tmp_files[0]) if len(tmp_files) > 1: file_names.append(out_dir+tmp_files[1]) if len(tmp_files) > 2: file_names.append(out_dir+tmp_files[2]) if len(tmp_files) > 3: file_names.append(out_dir+tmp_files[3]) if len(tmp_files) > 4: file_names.append(out_dir+tmp_files[4]) if len(tmp_files) > 5: file_names.append(out_dir+tmp_files[5]) start_time = time.time() data = [] metadata = [] total_flows = 0 for f in file_names: try: # just a robustness check parser = DataParser(f) tmpBD = parser.getByteDistribution() tmpIPT = parser.getIndividualFlowIPTs() tmpPL = parser.getIndividualFlowPacketLengths() tmp,tmp_m = parser.getIndividualFlowMetadata() except: continue # flows += parser.advancedInfo if parser.advancedInfo == None: continue for k in parser.advancedInfo: flows[k] = parser.advancedInfo[k] if tmp != None and tmpPL != None and tmpIPT != None: for i in range(len(tmp)): tmp_data = [] tmp_data.extend(tmp[len(tmp)-i-1]) tmp_data.extend(tmpPL[len(tmp)-i-1]) tmp_data.extend(tmpIPT[len(tmp)-i-1]) tmp_data.extend(tmpBD[len(tmp)-i-1]) # nga issue, will fix when pcaps start flowing again if tmp_data[2] == 0 and tmp_data[4] > 0: continue if tmp_data[3] == 0 and tmp_data[5] > 0: continue # if len(tmp_data) != num_params: # continue data.append(tmp_data) metadata.append(tmp_m[len(tmp)-i-1]) total_flows += 1 if total_flows == count_flocap*2 and not is_upload: break if total_flows == count_flocap*2 and not is_upload: break if request.files.get('upload') != None: os.removedirs(dir_name) results = classify_samples(data, metadata) lhost = {} for i in range(len(metadata)): if metadata[i][0] not in lhost: lhost[metadata[i][0]] = 1 else: lhost[metadata[i][0]] += 1 sorted_lhost = sorted(lhost.items(), key=operator.itemgetter(1)) sorted_lhost.reverse() if len(sorted_lhost) > 0: (lh,_) = sorted_lhost[0] else: lh = None tmp = [] to_display = [] to_display_names = [] for key in display_fields: to_display_names.append(display_fields[key]) for i in range(len(results)): color = [] for j in range(len(results[i])): color.append(get_color(results[i][j])) s_orgName = '' d_orgName = '' if metadata[i][0] == lh: s_orgName = 'localhost' if metadata[i][1] == lh: d_orgName = 'localhost' tmp_to_display = [] for key in display_fields: tmp_to_display.append(metadata[i][key]) tmp.append((results[i],metadata[i][0],metadata[i][1],metadata[i][2],metadata[i][3],metadata[i][4],metadata[i][5],metadata[i][6],metadata[i][7],color,s_orgName,d_orgName,metadata[i][8],tmp_to_display)) end_time = time.time()-start_time tmp = sorted(tmp,key=lambda x: x[0]) tmp.reverse() return template('results',results=tmp,num_flows=len(results),t=end_time,classifier_names=classifier_names, to_display_names=to_display_names)
assert False, 'no activation function' def cost_func(y, y_hat, batch_size, choice='euclidean'): if choice == 'euclidean': return T.sum((y-y_hat)**2) / batch_size else: assert False, 'no cost function' t_start = time.time() sample_file = 'mfcc/train.ark' test_file = 'mfcc/test.ark' label_file = 'label/train.lab' label_map_file = 'phones/48_39.map' DataParser.load(sample_file, label_file, label_map_file) #DataParser.test() dim_x = DataParser.dimension_x dim_y_hat = DataParser.dimension_y batch_size = 21 neuron_num = 64 epoch_cycle = 50 learning_rate = 0.01 lr = theano.shared(learning_rate) lr_decay = 1.0 # e.g. matrix 3*2 dot matrix 2*1 = matrix 3*1 # [[1., 3.], [2., 2.], [3.,1.]] dot [[2], [1]] = [[5.], [6.], [7.]] x = T.matrix('input', dtype='float64') # matrix of dim_x * batch_size y_hat = T.matrix('reference', dtype='float64') # matrix of dim_y_hat * batch_size
def __init__(self, data_dir, initial_time, time_step, max_chunk_size): self.data_parser = DataParser(data_dir, initial_time, time_step, max_chunk_size)
needToRepeatInput = True else: print 'inputAnswer is neither affirmative nor negative' pass print 'userInput is', userInput return userInput if __name__ == '__main__': signal.signal(signal.SIGINT, signal_handler) #building = raw_input() #level = raw_input() internetConnection = False speechInput = None speaker = AudioFeedback() userInputInitialise = DataParser() voiceInput, numpadData = userInputInitialise.location_input_type() if voiceInput: print 'using voice input' print 'initialising speech recognition' speechInput = SpeechRecognition() else: print 'using keypad input' print 'starting program' askBuildingName = 'please enter building name' speaking_proc = Process(target=speaker.threadedFeedback, args=(askBuildingName,)) speaking_proc.start() speaking_proc.join() #building = raw_input()
def provideDirections(self, nextCheckPoint, currentCheckPoint, pos_x, pos_y, speaker): #threading.Timer(1.0, provideDirections(nextCheckPoint, currentCheckPoint, pos_x, pos_y)).start() print "start of function" sayNextCheckPoint = 'your next checkpoint is %s\n' %(mapinfo['map'][nextCheckPoint - 1]['nodeName']) #speaker.threadedFeedback(sayNextCheckPoint) speaking_proc = Process(target=speaker.threadedFeedback, args=(sayNextCheckPoint,)) speaking_proc.start() speaking_proc.join() detourCheckPoint = False reachCheckPoint = False start_time = time.time() time_to_speak = time.time() dist = None change_direction = None direction = '%s %lf degrees, %lf' speak_direction = '%s %d degrees, and walk %d point %d meters\n' isFirstTimeGivingDirections = False isFirstTimeProcDirections = True oneSecondHasPassed = False while True: if (time.time() - start_time > 1) or isFirstTimeProcDirections: start_time = time.time() #distance, heading = input() dataParser = DataParser() step = dataParser.get_step_read() print "NUMBER OF STEPS %d\n", step compass_read = dataParser.get_compass_read() print "compass_read", compass_read del dataParser if not isFirstTimeProcDirections: if step == 0: print 'going into calcDisplacement\n\n\n' reachCheckPoint, pos_x, pos_y, detourCheckPoint, speak_direction = self.calcDisplacement(0, compass_read, 5, pos_x, pos_y, nextCheckPoint, currentCheckPoint) else: for i in range(step): print 'step #', i reachCheckPoint, pos_x, pos_y, detourCheckPoint, speak_direction = self.calcDisplacement(1, compass_read, i, pos_x, pos_y, nextCheckPoint, currentCheckPoint) if oneSecondHasPassed: isFirstTimeGivingDirections = True oneSecondHasPassed = False if isFirstTimeProcDirections: oneSecondHasPassed = True isFirstTimeProcDirections = False if (time.time() - time_to_speak > 4) or isFirstTimeGivingDirections: isFirstTimeGivingDirections = False time_to_speak = time.time() speaking_proc = Process(target=speaker.threadedFeedback, args=(speak_direction,)) speaking_proc.start() if reachCheckPoint: speak_direction = 'checkpoint reached' speaking_proc = Process(target=speaker.threadedFeedback, args=(speak_direction,)) speaking_proc.start() break if detourCheckPoint: break return reachCheckPoint, pos_x, pos_y, detourCheckPoint