Python DataParser示例，data_parser.DataParser Python示例

示例#1

0

显示文件

 def test_bad_data(self):
     data_lines = \
         [
             'Temperature1: 56',
             'Temperature2=54 ',
             'Temperature3=53',
             'Temperature3=52',
             'Temperature1=666',
             'Temperature1',
             'Error',
             'Temperature1=666degrees',
             'Temperature4=No data',
         ]
     expected_result = \
         {
             1:
                 [
                     666
                 ],
             2:
                 [
                     54
                 ],
             3:
                 [
                     53,
                     52
                 ],
         }
     parser = DataParser()
     for line in data_lines:
         parser.parse(line)
     parsing_results = parser.get_results()
     self.assertEqual(expected_result, parsing_results)

示例#2

0

显示文件

    def test_good_data(self):
        data_lines = \
            [
                'Temperature1=56',
                'Temperature2=54 ',
                'Temperature3=53',
                'Temperature3=52',
                'Temperature1=666',
            ]
        expected_result = \
            {
                1:
                    [
                        56,
                        666
                    ],
                2:
                    [
                        54
                    ],
                3:
                    [
                        53,
                        52
                    ],
            }

        parser = DataParser()
        for line in data_lines:
            parser.parse(line)
        parsing_results = parser.get_results()
        self.assertEqual(expected_result, parsing_results)

示例#3

0

显示文件

    def test_DataParser_parse_sample(self):
        data_parser = DataParser(' ')
        for line in self._link:
            try:
                """ generate results """
                dicom_file_path = os.path.join(self._data_path, line)
                contour_file_path = os.path.join(self._data_path,
                                                 self._link[line])
                shape, img, mask, contour = data_parser.parse_sample(
                    dicom_file_path, contour_file_path)
                np.savez(os.path.join(self._result_path,
                                      os.path.splitext(line)[0]) + '.npz',
                         shape=shape,
                         img=img,
                         mask=mask,
                         contour=contour)
                """ Load baseline and do the comparison """
                baseline = np.load(
                    os.path.join(self._baseline_path,
                                 os.path.splitext(line)[0]) + '.npz')
                equal1 = np.array_equal(baseline['shape'], shape)
                equal2 = np.array_equal(baseline['img'], img)
                equal3 = np.array_equal(baseline['mask'], mask)
                equal4 = np.array_equal(baseline['contour'], contour)
                if (not (equal1 and equal2 and equal3 and equal4)):
                    return False
            except:
                return False

        return True

示例#4

0

显示文件

文件： cilent_connection.py 项目： adelomaker/SensorDataTransmission

def main():
     
    threshold = 60
    second_threshold = 10
    stream = SensorStream()
    CONTINUOUS_INCREMENT = False
          
    TCP_IP = '192.168.43.222'
    TCP_PORT = 80
    BUFFER_SIZE = 1024
    sleep_time = 0.05
     
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.connect((TCP_IP, TCP_PORT))
      
    parser = DataParser(acc_unit=100, gy_unit=128)
    
    mov = 0
    rotating = False
     
    while True:
         
        raw_bytestream = s.recv(BUFFER_SIZE)
        parser.parse_data(raw_bytestream, stream)
        
        x,y,z,gX,gY,gZ = stream.getValues()

		print x,y,z,gX,gY,gZ
        
           
        time.sleep(sleep_time)

示例#5

0

显示文件

文件： unittests.py 项目： illej/python-assignment-3

    def test_04_parser_parse_raw_data(self):
        input = "empid=D011\ngender=M\nage=29"
        parser = DataParser()
        parser.parse_raw_data(input)

        expected = [{'empid': 'D011', 'gender': 'M', 'age': '29'}]
        actual = parser.get_data()
        self.assertEqual(expected, actual)

示例#6

0

显示文件

文件： test_data_parser.py 项目： dragonghy/file_parser

    def test_parse_unicode_data(self):
        file_format = FileFormat.from_csv('tests/specs/simple_format.csv')
        parser = DataParser('tests/data/simple_format_2015-06-28-unicode.txt', file_format)

        self.assertEquals(
            [[['Foonyor', 1, 1], [u'Barzàne'.encode('utf-8'), 0, -12]]],
            list(parser.gen_data())
        )

示例#7

0

显示文件

文件： run__data_parser.py 项目： p-r-t/dicom_data_pipeline

def main(args):
    data_parser = DataParser(data_prefix=args.data_prefix,
                             images_dirpath=args.images_dirpath,
                             masks_dirpath=args.masks_dirpath,
                             img_masks_filepath=args.img_masks_filepath,
                             contours_type=args.contours_type,
                             logs_prefix=args.logs_prefix,
                             visualize_contours=args.visualize_contours)
    data_parser.parse()

示例#8

0

显示文件

文件： main.py 项目： ajmal017/ChatDataMapper

def get_logs():
    dp = DataParser(constants.log_file_path)
    mentions_by_ticker = dp.mentions_by_ticker()

    # summarize by user
    clustered_messages = dp.messages_by_user('JohnArtman',
                                             start_date=datetime(2018, 6, 1),
                                             end_date=datetime(2019, 1, 1))
    summarized_messages = get_summary(clustered_messages, 5)
    print(summarized_messages)

示例#9

0

显示文件

文件： pr1.py 项目： mikolajGrom/SPD---Python

def main():
    if args.filename is None:
        parser.print_help()
        sys.exit(1)
    data_parser = DataParser(args.filename)
    jobs, machines, tasks = data_parser.get_instance_parameters()
    instance = Instance('Roxanne', machines, jobs, tasks)
    instance.print_info()
    instance.generate_best_cmax()
    instance.johnsons_algorithm()

示例#10

0

显示文件

    def load_data(self, idir):
        # loop via all files in the source dir
        files = os.listdir(idir)
        for f in files:
            try:
                dParse = DataParser(idir + "/" + f, analyse=0, compact=1)
                self.flow_cnt += dParse.lines_cnt
            except:
                print("Error: failued to parse file", (idir + f))
                self.errors += 1
                continue

            # binary classification label
            # 1 - correct traffic
            # 0 - anomaly traffic
            #label = 1 #int(f.split("-")[-1].split(".")[0])

            #tmpTLS = dParse.getTLSInfo()

            # Features extraction
            #tmpBD, tmpBDL = dParse.getByteDistribution()
            tmpIPT = dParse.getIndividualFlowIPTs()
            tmpPL = dParse.getIndividualFlowPacketLengths()
            tmp = dParse.getIndividualFlowMetadata(PKTS=0,
                                                   BYTES=0,
                                                   FLOW_TIME=0,
                                                   WHT=1,
                                                   BYTE_DIST_M=0,
                                                   BYTE_DIST_S=1,
                                                   ENTROPY=0,
                                                   IDP=1)

            if tmpPL != None:  # and tmpPL != None and tmpIPT != None:
                # iterate over every flow
                for i in range(len(tmpPL)):
                    tmp_data = []
                    tmp_data.extend(tmp[i])
                    tmp_data.extend(tmpPL[i])
                    #tmp_data.extend(tmpIPT[i])
                    ##tmp_data.extend(tmpBD[i])
                    #tmp_data.extend(tmpBDL[i])

                    #print("FlowMetadata",tmp[i])
                    # print("PacketLenghts",tmpPL[i])
                    # print("IndividualFlowIPT",tmpIPT[i])
                    #print("bd",list(tmpBD[i]))
                    #   tmp_data.extend(tmpTLS[i])

                    if self.features_cnt == 0:
                        self.features_cnt = len(tmp_data)

                    self.data.append(tmp_data)
                    #print(self.data[i])
                    #print("final data length and sum",len(self.data[i]),sum(self.data[i]))
                    self.labels.append(self.label)

示例#11

0

显示文件

    def __init__(self, plot_painter, params, port_settings=DEFAULT_PORT_SETTINGS):
        super().__init__()

        self.data_parser = DataParser(params)
        self.plot_painter = plot_painter

        self.worker = SerialWorker(port_settings)
        self.thread = QThread()
        self.worker.moveToThread(self.thread)
        self.worker.read_data_signal.connect(self.add_data)
        self.signal_start_background_job.connect(self.worker.run)

示例#12

0

显示文件

def main(_):
    feat_dict = FeatureDictionary()
    print("feature_size: %d" % feat_dict.feature_size)
    print("field_size: %d" % feat_dict.field_size)
    print(feat_dict.col2feat_id.keys())
    dataparser = DataParser(feat_dict, FLAGS.label)
    train_ids, train_vals, train_labels = dataparser.parse(infile="%s\\train_sample.csv" % FLAGS.data_dir)
    print("len of train: %d" % len(train_ids))
    test_ids, test_vals, test_labels = dataparser.parse(infile="%s\\test_sample.csv" % FLAGS.data_dir)
    print("len of test: %d" % len(test_ids))

    # ------bulid Tasks------
    model_params = {
        "field_size": feat_dict.field_size,
        "feature_size": feat_dict.feature_size,
        "embedding_size": FLAGS.embedding_size,
        "learning_rate": FLAGS.learning_rate,
        "l2_reg": FLAGS.l2_reg,
        "deep_layers": FLAGS.deep_layers,
        "dropout": FLAGS.dropout,
        "experts_num": 3,
        "experts_units": 32,
        "use_experts_bias": True,
        "use_gate_bias": True
    }
    print(model_params)
    DeepFM = build_model_estimator(model_params)
    # DeepFM = tf.contrib.estimator.add_metrics(DeepFM, my_auc)

    if FLAGS.task_type == 'train':
        train_spec = tf.estimator.TrainSpec(input_fn=lambda: input_fn(train_ids, train_vals, train_labels,
                                                                      num_epochs=FLAGS.num_epochs,
                                                                      batch_size=FLAGS.batch_size))
        eval_spec = tf.estimator.EvalSpec(input_fn=lambda: input_fn(test_ids, test_vals, test_labels,
                                                                    num_epochs=1,
                                                                    batch_size=FLAGS.batch_size),
                                          steps=None, start_delay_secs=1000, throttle_secs=1200)
        tf.estimator.train_and_evaluate(DeepFM, train_spec, eval_spec)
        results = DeepFM.evaluate(
            input_fn=lambda: input_fn(test_ids, test_vals, test_labels, num_epochs=1, batch_size=FLAGS.batch_size))
        for key in results:
            log.info("%s : %s" % (key, results[key]))
    elif FLAGS.task_type == 'eval':
        results = DeepFM.evaluate(input_fn=lambda: input_fn(test_ids, test_vals, test_labels,
                                                            num_epochs=1, batch_size=FLAGS.batch_size))
        for key in results:
            log.info("%s : %s" % (key, results[key]))
    elif FLAGS.task_type == 'infer':
        preds = DeepFM.predict(input_fn=lambda: input_fn(test_ids, test_vals, test_labels,
                                                         num_epochs=1, batch_size=FLAGS.batch_size),
                               predict_keys="prob")
        with open(FLAGS.data_dir+"/pred.txt", "w") as fo:
            for prob in preds:
                fo.write("%f\n" % (prob['prob']))

示例#13

0

显示文件

文件： predicter.py 项目： mofmof/machine-learning-samples

    def predict(self, X):
        #print X
        data_parser = DataParser()
        Xtrain = pd.Series(X)
        input_texts = []
        for input_text in Xtrain:
          input_texts.append(data_parser.split(input_text.decode('utf-8')))

        count_vectorizer = CountVectorizer(
            vocabulary=self.vocabulary # 学習時の vocabulary を指定する
        )
        feature_vectors = count_vectorizer.fit_transform(input_texts)
        return self.estimator.predict(feature_vectors)

示例#14

0

显示文件

    def test_parse_dados(self):
        self.assertEqual(len(Professor.instances.values()), 0)
        self.assertEqual(len(Turma.instances.values()), 0)
        self.assertEqual(len(Materia.instances.values()), 0)
        self.assertEqual(len(Vertice.instances), 0)

        dt = DataParser("data/Escola_A.xlsx")
        dt.parse_dados()

        self.assertEqual(len(Professor.instances.values()), 28)
        self.assertEqual(len(Turma.instances.values()), 11)
        self.assertEqual(len(Materia.instances.values()), 11)
        self.assertEqual(len(Vertice.instances), 302)

示例#15

0

显示文件

    def test_parse_dados_professor(self):
        self.assertEqual(len(Professor.instances.values()), 0)
        self.assertEqual(len(Vertice.instances), 0)

        dt = DataParser("data/Escola_A.xlsx")
        dt.parse_dados()

        self.assertEqual(len(Professor.instances.values()), 28)
        self.assertEqual(len(Vertice.instances), 302)

        for instance in Professor.instances.values():
            lista_vertices = [vertice for vertice in Vertice.instances if vertice.professor == instance]
            self.assertEqual(lista_vertices, instance.vertices)

示例#16

0

显示文件

def add_deadline(data=os.path.dirname(__file__) + "/data.json"):
    credentials = get_credentials()
    http = credentials.authorize(httplib2.Http())
    service = discovery.build('calendar', 'v3', http=http)

    my_data = DataParser()
    deadline_dict = my_data.json_to_dict(data)

    for index, deadline in deadline_dict.items():
        events = my_data.gen_event(deadline)
        for event in events:
            if not is_duplicate_event(service, event):
                create_event(service, event)

示例#17

0

显示文件

    def test_parse_dados_materia(self):
        self.assertEqual(len(Materia.instances.values()), 0)
        self.assertEqual(len(Vertice.instances), 0)

        dt = DataParser("data/Escola_A.xlsx")
        dt.parse_dados()

        self.assertEqual(len(Materia.instances.values()), 11)
        self.assertEqual(len(Vertice.instances), 302)

        for instance in Materia.instances.values():
            lista_vertices = [vertice for vertice in Vertice.instances if vertice.materia == instance]
            self.assertEqual(lista_vertices, instance.vertices)

示例#18

0

显示文件

    def test_parse_configuracoes(self):
        self.assertEqual(len(Hora.instances.values()), 0)
        self.assertEqual(len(Horario.instances.values()), 0)

        dt = DataParser("data/Escola_A.xlsx")
        dt.parse_configuracoes()

        self.assertEqual(len(Hora.instances.values()), 6)
        self.assertEqual(len(Horario.instances.values()), 30)

        lista_cores = [horario.cor for horario in Horario.instances.values()]

        for cor in range(30):
            self.assertIn(cor, lista_cores)

示例#19

0

显示文件

    def test_parse_restricoes_professor(self):
        self.assertEqual(len(Hora.instances.values()), 0)
        self.assertEqual(len(Horario.instances.values()), 0)
        self.assertEqual(len(Professor.instances.values()), 0)

        dt = DataParser("data/Escola_A.xlsx")
        dt.parse_dados()
        dt.parse_configuracoes()
        dt.parse_restricoes_professor()

        self.assertEqual(len(Hora.instances.values()), 6)
        self.assertEqual(len(Horario.instances.values()), 30)
        self.assertEqual(len(Professor.instances.values()), 28)

        professor_1 = Professor('Professor 1')
        self.assertEqual(len(professor_1.restricoes), 1)
        self.assertIn(Horario.get(Horario.construir_identificador('Terça', Hora('10:40:00'))),
                      professor_1.restricoes)

        professor_2 = Professor('Professor 2')
        self.assertEqual(len(professor_2.restricoes), 2)
        self.assertIn(Horario.get(Horario.construir_identificador('Segunda', Hora('11:30:00'))),
                      professor_2.restricoes)
        self.assertIn(Horario.get(Horario.construir_identificador('Segunda', Hora('07:00:00'))),
                      professor_2.restricoes)

示例#20

0

显示文件

文件： main.py 项目： oleg-kazbeev/etl-project

def main():
    terminal_command = sys.argv[1:]

    terminal_parser = TerminalParser()
    terminal_parser.add_argument('-i', '--input', default=[], nargs='+')
    terminal_parser.add_argument('-o', '--output', default=[], nargs='+')

    input_files = terminal_parser.get_list_of_input_files(terminal_command)
    output_files = terminal_parser.get_list_of_output_files(terminal_command)

    for file_name in input_files:
        data_parser = DataParser(file_name)
        file_extension = data_parser.extract_extension_from_filename()
        print(file_extension)

示例#21

0

显示文件

    def test_parse_restricoes_turma(self):
        self.assertEqual(len(Hora.instances.values()), 0)
        self.assertEqual(len(Horario.instances.values()), 0)
        self.assertEqual(len(Turma.instances.values()), 0)

        dt = DataParser("data/Escola_A.xlsx")
        dt.parse_dados()
        dt.parse_configuracoes()
        dt.parse_restricoes_turma()

        self.assertEqual(len(Hora.instances.values()), 6)
        self.assertEqual(len(Horario.instances.values()), 30)
        self.assertEqual(len(Turma.instances.values()), 11)

        turma_1 = Turma('1')
        self.assertEqual(len(turma_1.restricoes), 2)
        self.assertIn(Horario.get(Horario.construir_identificador('Terça', Hora('11:30:00'))),
                      turma_1.restricoes)
        self.assertIn(Horario.get(Horario.construir_identificador('Segunda', Hora('11:30:00'))),
                      turma_1.restricoes)

        turma_8 = Turma('8')
        self.assertEqual(len(turma_8.restricoes), 3)
        self.assertIn(Horario.get(Horario.construir_identificador('Quarta', Hora('11:30:00'))),
                      turma_8.restricoes)
        self.assertIn(Horario.get(Horario.construir_identificador('Segunda', Hora('11:30:00'))),
                      turma_8.restricoes)
        self.assertIn(Horario.get(Horario.construir_identificador('Quarta', Hora('10:40:00'))),
                      turma_8.restricoes)

示例#22

0

显示文件

文件： launcher_script.py 项目： Sovianum/Parallel-data-processing

class LaunchFunctor:
    def __init__(self, data_dir, initial_time, time_step, max_chunk_size):
        self.data_parser = DataParser(data_dir, initial_time, time_step, max_chunk_size)

    def __call__(self, task):
        chunk_ind = task.chunk
        block_ind = task.block
        surf_ind = task.surface
        observer = task.observer

        surf_info = self.data_parser.parse_geometry_file(surf_ind, block_ind)
        gdf_info = self.data_parser.parse_data_file_chunk(surf_ind, block_ind, chunk_ind)
        start_time = self.data_parser.get_time_offset(surf_ind, block_ind, chunk_ind) + self.data_parser.initial_time

        return {'start_time': start_time, 'observer': observer, 'gdf_info': gdf_info, 'surf_info': surf_info}

示例#23

0

显示文件

 def __init__(self, config, stack_players=None):
     self.config = config
     self.dataParser = DataParser(config)
     self.constrainedModel = ConstrainedModel(self.config, self.dataParser,
                                              stack_players)
     self.backtester = Backtester(self.config, self.dataParser,
                                  self.constrainedModel)

示例#24

0

显示文件

async def crash_info(request):
    ''' crash info parses the crash record and returns a JSON object '''
    log.info("Handling '/api/v1/getCrashInfo'")

    angle, max_force_offset, _, _, _ = DataParser().parse_input_data(
        request.body.decode('utf8'))
    return json({'impactAngle': angle, 'offsetMaximumForce': max_force_offset})

示例#25

0

显示文件

文件： launcher_script.py 项目： Sovianum/Parallel-data-processing

class QLaunchFunctor:
    def __init__(self, data_dir, initial_time, time_step, max_chunk_size):
        self.data_parser = DataParser(data_dir, initial_time, time_step, max_chunk_size)

    def __call__(self, task_seq, queue):
        grouped_task_seq = task_seq['observer'].groupby([task_seq.block, task_seq.chunk, task_seq.surface])
        for (block_ind, chunk_ind, surf_ind), observer_series in grouped_task_seq:
            surf_info = self.data_parser.parse_geometry_file(surf_ind, block_ind)
            gdf_info = self.data_parser.parse_data_file_chunk(surf_ind, block_ind, chunk_ind)
            start_time = self.data_parser.get_time_offset(surf_ind, block_ind, chunk_ind) + \
                         self.data_parser.initial_time

            for observer in observer_series:
                task = {'block_ind': block_ind, 'surf_ind': surf_ind, 'chunk_ind': chunk_ind,
                        'start_time': start_time, 'observer': observer, 'gdf_info': gdf_info, 'surf_info': surf_info}
                queue.put(task)

示例#26

0

显示文件

    def load_family_details(self, pheno_covar):
        """Load family data updating the pheno_covar with  family ids found.

        :param pheno_covar: Phenotype/covariate object
        :return: None
        """
        file = open(self.fam_details)
        header = file.readline()
        format = file.readline()
        self.file_index = 0

        mask_components = []  # 1s indicate an individual is to be masked out
        for line in file:
            words = line.strip().split()
            indid = ":".join(words[0:2])
            if DataParser.valid_indid(indid):
                mask_components.append(0)
                sex = int(words[5])
                pheno = float(words[6])
                pheno_covar.add_subject(indid, sex, pheno)
            else:
                mask_components.append(1)
        mask_components = numpy.array(mask_components)
        self.ind_mask = numpy.zeros(len(mask_components) * 2,
                                    dtype=numpy.int8).reshape(-1, 2)
        self.ind_mask[0:, 0] = mask_components
        self.ind_mask[0:, 1] = mask_components
        self.ind_count = self.ind_mask.shape[0]
        pheno_covar.freeze_subjects()

示例#27

0

显示文件

def scraper(i):
    tag_links = cPickle.load(open('../data/tag_names_and_links{0}.p'.format(i), 'rb'))

    count_tag = 0
    for tag_name, init_tag_link in tag_links:
        count_tag += 1
        tag_data = []
        count_page = 0

        # print str(i) + ' time: ' + str(int(time.time() - start_time) / 60) + ' min'
        print str(i) + ' started tag {0}/{1} : {2}'.format(count_tag, len(tag_links), tag_name)

        while True:
            count_page += 1
            tag_link = init_tag_link + str(count_page) if count_page != 1 else init_tag_link
            tag_page = urlopen(tag_link).read()
            question_links = map(lambda tail: 'http://pravoved.ru' + tail,
                                 re.findall(re.compile('(?<=target="_blank" href=")(.+?)(?=")', flags=re.DOTALL),
                                            tag_page))
            if not question_links:
                break
            for question_link in question_links:
                tag_data.append([tag_name] + DataParser().get_data(question_link))

                # header, question, answers, additions = DataParser().get_data(question_link)
                # print "HEADER\n" + header
                # print "QUESTION\n" + question
                # print "ANSWERS\n" + answers
                # print "ADDITIONS\n" + additions

        pd.DataFrame(tag_data, columns=['tag', 'header', 'question', 'answers', 'additions'])\
            .to_csv('../data/{0}-{1}extra.csv'.format(i, count_tag), sep='\t', index=False, encoding='utf-8')

    print str(i) + ' ENDED'

示例#28

0

显示文件

 def __init__(self, config, stacks_finder):
     self.config = config
     self.data_parser = DataParser(self.config)
     self.stacks = stacks_finder.find_stacks()
     self.solution_queue = mp.Queue()
     self.num_lineups = self.modify_num_lineups(config)
     self.lineups = []

示例#29

0

显示文件

文件： unittests.py 项目： illej/python-assignment-3

 def setUp(self):
     self.parser = DataParser()
     self.cmd_view = CmdView()
     self.file_reader = FileReader()
     self.validator = Validator()
     self.db = Database("test.db")
     self.vis = Visualiser()
     self.val = Validator()
     self.serial = Serializer()
     self.controller = Controller(self.cmd_view,
                                  self.file_reader,
                                  self.parser,
                                  self.validator,
                                  self.db,
                                  self.vis,
                                  self.serial)
     self.init()

示例#30

0

显示文件

 def __init__(self, filename, mode='file'):
     if mode == 'file':
         parser = DataParser(filename)
         self.data = parser.data
         self.headers = parser.headers
         self.theta_0 = 0.0
         self.theta_1 = 0.0
         self.prev_mse = 0.0

示例#31

0

显示文件

    def test_DataParser_save_sample(self):
        data_parser = DataParser(' ')
        data_parser.set_result_path(self._result_path)
        for line in self._link:
            try:
                """ generate results """
                dicom_file_path = os.path.join(self._data_path, line)
                contour_file_path = os.path.join(self._data_path,
                                                 self._link[line])
                shape, img, mask, contour = data_parser.parse_sample(
                    dicom_file_path, contour_file_path)
                data_parser.save_sample(
                    os.path.splitext(line)[0], shape, img, mask, contour)
                """ Load baseline and do the comparison """
                with h5py.File(os.path.join(self._baseline_path, 'data.h5'),
                               'r') as hf:
                    sample = hf[os.path.splitext(line)[0]]
                    equal1 = np.array_equal(np.array(sample['shape']), shape)
                    equal2 = np.array_equal(np.array(sample['img']), img)
                    equal3 = np.array_equal(np.array(sample['mask']), mask)
                if (not (equal1 and equal2 and equal3)):
                    return False
            except:
                return False

        return True

示例#32

0

显示文件

文件： core.py 项目： raph92/crypto2fiat

def core(coin: str, fiat: str, amount: float, reverse: bool, clipboard: bool, verbose: bool, timer: bool,
         wordform: bool, no_cache: bool):
    """
    Converts a cryptocurrency amount to a fiat equivalent or vice-versa

    Valid fiat currency values are: "AUD", "BRL", "CAD", "CHF", "CLP", "CNY", "CZK",
    "DKK", "EUR", "GBP", "HKD", "HUF", "IDR", "ILS", "INR", "JPY", "KRW", "MXN","MYR", "NOK",
    "NZD", "PHP", "PKR", "PLN", "RUB", "SEK", "SGD", "THB", "TRY","TWD", "ZAR"
    Valid cryptocurrency values are: "BTC", "ETH" "XRP", "LTC", and "BCH"

    `c2f btc usd 50`
    Translates to: What is 50 BTC worth in USD?

    `c2f btc usd 50 --reverse`
    Translates to: What is $50 USD worth in BTC?
    """
    util_setup(logger, timer, enabled_=True)
    with MeasureBlockTime("Main block"):
        if verbose:
            logzero.loglevel(logging.DEBUG)

        data_parser = DataParser(no_cache=no_cache)
        fiat = fiat.upper()
        coin = coin.upper()

        try:
            if reverse:
                if wordform:
                    click.echo(f"What is ${amount:.2f} {fiat} worth in {coin}?")
                quantity = data_parser.convert_to_crypto(fiat.upper(), coin.upper(), amount)
                formatted_quantity = "{:.8f}".format(quantity)

                print(formatted_quantity)
            else:
                if wordform:
                    click.echo(f"How much is {amount} {coin} in {fiat}?")
                quantity = data_parser.convert_to_fiat(fiat.upper(), coin.upper(), amount)
                formatted_quantity = "{:,.2f}".format(quantity)
                print(formatted_quantity)

            if clipboard:
                pyperclip.copy(formatted_quantity)
                sh.notify_send(APP_NAME, f"{formatted_quantity} copied to clipboard")
        except Exception as e:
            logger.exception(e)

示例#33

0

显示文件

文件： test_data_parser.py 项目： dragonghy/file_parser

    def test_parse_data(self):
        file_format = FileFormat.from_csv('tests/specs/simple_format.csv')
        parser = DataParser('tests/data/simple_format_2015-06-28.txt', file_format)

        self.assertEquals(
            [[['Foonyor', 1, 1], ['Barzane', 0, -12], ['Quuxitude', 1, 103]]],
            list(parser.gen_data())
        )

        self.assertEquals(
            [[['Foonyor', 1, 1], ['Barzane', 0, -12]], [['Quuxitude', 1, 103]]],
            list(parser.gen_data(chunk_size=2))
        )

        self.assertEquals(
            [[['Foonyor', 1, 1]], [['Barzane', 0, -12]], [['Quuxitude', 1, 103]]],
            list(parser.gen_data(chunk_size=1))
        )

示例#34

0

显示文件

文件： pull_data.py 项目： paulinsider/botnet

    def load_data(self, idir, label, max_files):
        files = os.listdir(idir)
        num_files = 0
        for f in files:
            try:
                dParse = DataParser(idir + f,self.compact)
            except:
                print idir + f
                print 'fail'
                continue

            num_files += 1

            tmpTLS = dParse.getTLSInfo()
            if self.bd_compact == 1:
                tmpBD = dParse.getByteDistribution_compact()
            elif self.bd_compact == 2:
                tmpBD = dParse.getByteDistribution_mean_std()
            else:
                tmpBD = dParse.getByteDistribution()
            tmpIPT = dParse.getIndividualFlowIPTs()
            tmpPL = dParse.getIndividualFlowPacketLengths()
            tmp, ignore = dParse.getIndividualFlowMetadata()

            if tmp != None and tmpPL != None and tmpIPT != None:
                for i in range(len(tmp)):
                    if ignore[i] == 1 and label == 1.0:
                        continue
                    tmp_data = []
                    if 0 in self.types:
                        tmp_data.extend(tmp[i])
                    if 1 in self.types:
                        tmp_data.extend(tmpPL[i])
                    if 2 in self.types:
                        tmp_data.extend(tmpIPT[i])
                    if 3 in self.types:
                        tmp_data.extend(tmpBD[i])
                    if 4 in self.types:
                        tmp_data.extend(tmpTLS[i])
                    if len(tmp_data) != self.num_params:
                        print len(tmp_data)
                    self.data.append(tmp_data)
                for i in range(len(tmp)):
                    if ignore[i] == 1 and label == 1.0:
                        continue
                    self.labels.append(label)
            if max_files != None and num_files >= max_files:
                break

示例#35

0

显示文件

文件： predicter_answer.py 项目： mofmof/machine-learning-samples

    def predict(self, X):
        #print X
        data_parser = DataParser()
        Xtrain = pd.Series(X)
        input_texts = []
        for input_text in Xtrain:
          input_texts.append(data_parser.split(input_text[1].decode('utf-8')))

        print input_texts

        count_vectorizer = CountVectorizer(
            vocabulary=self.vocabulary # 学習時の vocabulary を指定する
        )

        feature_vectors = count_vectorizer.fit_transform(input_texts)

        # featureにcategory_idを追加
        features_array = feature_vectors.toarray()
        category_ids = np.array(X)[:, 0].T
        features_array = np.c_[category_ids, features_array]
        return self.estimator.predict(features_array)

示例#36

0

显示文件

 def test_saving(self):
     data_lines = \
         [
             'Temperature1: 56',
             'Temperature2=54 ',
             'Temperature3=53',
             'Temperature3=52',
             'Temperature1=666',
             'Temperature1',
             'Error',
             'Temperature1=666degrees',
             'Temperature4=No data',
             'Temperature1=54',
             'Temperature1=55',
             'Temperature4=40',
             'Temperature3=55',
         ]
     parser = DataParser()
     for line in data_lines:
         parser.parse(line)
     parser.save_results('mixed_json_results.json')
     self.assertTrue(
         filecmp.cmp('mixed_json_results.json', 'json_files//mixed_json_results.json'),
         'Files are different')
     os.remove('mixed_json_results.json')

示例#37

0

显示文件

文件： dnn-adagrad.py 项目： jwlin/mlds-mp1

		assert False, 'no activation function'

def cost_func(y, y_hat, batch_size, choice='euclidean'):
	if choice == 'euclidean':
		return T.sum((y-y_hat)**2) / batch_size
	else:
		assert False, 'no cost function'


t_start = time.time()

sample_file = 'mfcc/train-1.ark'
test_file = 'mfcc/test.ark'
label_file = 'label/train-1.lab'
label_map_file = 'phones/48_39.map'
DataParser.load(sample_file, label_file, label_map_file)
#DataParser.test()
dim_x = DataParser.dimension_x
dim_y_hat = DataParser.dimension_y
batch_size = 10
neuron_num = 64
epoch_cycle = 2 
learning_rate = 0.1
lr = theano.shared(learning_rate)
adagrad_t = theano.shared(0)

# e.g. matrix 3*2 dot matrix 2*1 = matrix 3*1
# [[1., 3.], [2., 2.], [3.,1.]] dot [[2], [1]] = [[5.], [6.], [7.]]
x = T.matrix('input', dtype='float64')  # matrix of dim_x * batch_size
y_hat = T.matrix('reference', dtype='float64')  # matrix of dim_y_hat * batch_size

示例#38

0

显示文件

文件： main.py 项目： dragonghy/file_parser

import itertools

from data_parser import DataParser
from database_connector import SimpleDatabaseConnector
from file_formats import FilenamePrefixFormatSelector

parser = argparse.ArgumentParser(description='Process data file and store to SQL.')
parser.add_argument('db_url', help='Database url')
parser.add_argument('table_name', help='Database table to create')
parser.add_argument('data_file', help='Data file to dump')

args = parser.parse_args()

# init
format_selector = FilenamePrefixFormatSelector.from_directory('specs/')
format = format_selector.get_format(args.data_file)
data_parser = DataParser(args.data_file, format)
db_connector = SimpleDatabaseConnector(args.db_url)

print 'Creating table ...'
db_connector.create_table(args.table_name, format)  # TODO rollback on failure?

print 'Insertion rows ...'
total_inserted = 0
for rows in data_parser.gen_data():
    db_connector.insert_rows(rows)
    total_inserted += len(rows)
    print '%d rows inserted' % total_inserted

print 'Completed'

示例#39

0

显示文件

文件： server.py 项目： brock7/joy

def devices():
    global flows
    global data
    global metadata
    global count_flocap
    global classifiers_to_display
    global classifier_names

    classifiers_to_display = []
    classifier_names = []
    display_fields = OrderedDict({})
    config_file = 'laui.cfg'
    fp = open(config_file,'r')
    for line in fp:
        if line.startswith('display_field'):
            tokens = line.split()
            display_fields[int(tokens[3])] = (tokens[1],tokens[2].replace('_',' '))
            continue
        elif line.strip() == '' or line.startswith('#') or not line.startswith('classifier'):
            continue
        tokens = line.split()
        if tokens[2] == 'logreg':
            classifiers_to_display.append((tokens[1], tokens[2], tokens[3], tokens[4]))
            classifier_names.append(tokens[1])
        elif tokens[2] == 'mapping':
            tmp_map = {}
            with open(tokens[4],'r') as fp2:
                for line2 in fp2:
                    tokens2 = line2.split()
                    tmp_map[tokens2[0]] = float(tokens2[1])
            classifiers_to_display.append((tokens[1], tokens[2], tmp_map, int(tokens[3])))
            classifier_names.append(tokens[1])
    fp.close()

    subnet = '10.0.2.'
    devices_ = {}

    file_names = []
    is_upload = False
    if request.files.get('upload') != None:
        upload = request.files.get('upload')

        dir_name = tempfile.mkdtemp()
        upload.save(dir_name + 'temp.json')

        file_names.append(dir_name+'temp.json')
        is_upload = True
    else:
        tmp_files = get_files_by_time(out_dir)
        tmp_files.reverse()
        if len(tmp_files) > 0:
            file_names.append(out_dir+tmp_files[0])
        if len(tmp_files) > 1:
            file_names.append(out_dir+tmp_files[1])
        if len(tmp_files) > 2:
            file_names.append(out_dir+tmp_files[2])
        if len(tmp_files) > 3:
            file_names.append(out_dir+tmp_files[3])
        if len(tmp_files) > 4:
            file_names.append(out_dir+tmp_files[4])
        if len(tmp_files) > 5:
            file_names.append(out_dir+tmp_files[5])

    start_time = time.time()
    data = []
    metadata = []
    total_flows = 0
    for f in file_names:
        try: # just a robustness check
            parser = DataParser(f)
            tmpBD = parser.getByteDistribution()
            tmpIPT = parser.getIndividualFlowIPTs()
            tmpPL = parser.getIndividualFlowPacketLengths()
            tmp,tmp_m = parser.getIndividualFlowMetadata()
        except:
            continue
#        flows += parser.advancedInfo
        if parser.advancedInfo == None:
            continue
        for k in parser.advancedInfo:
            flows[k] = parser.advancedInfo[k]

        if tmp != None and tmpPL != None and tmpIPT != None:
            for i in range(len(tmp)):
#                if not parser.flows['appflows'][i]['flow']['sa'].startswith(subnet) and \
#                   not parser.flows['appflows'][i]['flow']['da'].startswith(subnet):
#                    continue
                tmp_id = ''
                if tmp_m[len(tmp)-i-1][0].startswith(subnet):
                    tmp_id = tmp_m[len(tmp)-i-1][0]
                elif tmp_m[len(tmp)-i-1][1].startswith(subnet):
                    tmp_id = tmp_m[len(tmp)-i-1][1]
                else:
                    continue

                tmp_data = []
                tmp_data.extend(tmp[len(tmp)-i-1])
                tmp_data.extend(tmpPL[len(tmp)-i-1])
                tmp_data.extend(tmpIPT[len(tmp)-i-1])
                tmp_data.extend(tmpBD[len(tmp)-i-1])

                data.append(tmp_data)
                metadata.append(tmp_m[len(tmp)-i-1])
                total_flows += 1

                if total_flows == count_flocap*2 and not is_upload:
                    break
        if total_flows == count_flocap*2 and not is_upload:
            break

    if request.files.get('upload') != None:
        os.removedirs(dir_name)

    results = classify_samples(data, metadata)

    tmp = {}
    to_display = []
    to_display_names = []
    for key in display_fields:
        to_display_names.append(display_fields[key])
    for i in range(len(results)):
        color = []
        for j in range(len(results[i])):
            color.append(get_color(results[i][j]))

        tmp_id = ''
        if metadata[i][0].startswith(subnet):
            tmp_id = metadata[i][0]
        elif metadata[i][1].startswith(subnet):
            tmp_id = metadata[i][1]
        else:
            continue

        tmp_to_display = []
        for key in display_fields:
            tmp_to_display.append(metadata[i][key])

        if tmp_id not in devices_:
            devices_[tmp_id] = [0,0,0,0,0,0,0,0,0,0,0,0,0,0]
            tmp[tmp_id] = []
        devices_[tmp_id][0] += 1 # total flows

        if metadata[i][9] in ciphers:
            (name_, rec_) = ciphers[metadata[i][9]]
            if rec_ == 'RECOMMENDED':
                devices_[tmp_id][1] += 1
            elif rec_ == 'LEGACY':
                devices_[tmp_id][2] += 1
            elif rec_ == 'AVOID':
                devices_[tmp_id][3] += 1

        if metadata[i][10] != -1:
            devices_[tmp_id][metadata[i][12]+4] += 1

        tmp[tmp_id].append((results[i],metadata[i][0],metadata[i][1],metadata[i][2],metadata[i][3],metadata[i][4],metadata[i][5],metadata[i][6],metadata[i][7],color,'','',metadata[i][8],tmp_to_display))

    return template('devices',devices=devices_,subnet=subnet+'*',results=tmp,num_flows=len(results),classifier_names=classifier_names,
                    to_display_names=to_display_names)

示例#40

0

显示文件

文件： server.py 项目： brock7/joy

def results():
#def results(data={}):
    global flows
    global data
    global metadata
    global count_flocap
    global classifiers_to_display
    global classifier_names
        
    classifiers_to_display = []
    classifier_names = []
    display_fields = OrderedDict({})
    config_file = 'laui.cfg'
    fp = open(config_file,'r')
    for line in fp:
        if line.startswith('display_field'):
            tokens = line.split()
            display_fields[int(tokens[3])] = (tokens[1],tokens[2].replace('_',' '))
            continue
        elif line.strip() == '' or line.startswith('#') or not line.startswith('classifier'):
            continue
        tokens = line.split()
        if tokens[2] == 'logreg':
            classifiers_to_display.append((tokens[1], tokens[2], tokens[3], tokens[4]))
            classifier_names.append(tokens[1])
        elif tokens[2] == 'mapping':
            tmp_map = {}
            with open(tokens[4],'r') as fp2:
                for line2 in fp2:
                    tokens2 = line2.split()
                    tmp_map[tokens2[0]] = float(tokens2[1])
            classifiers_to_display.append((tokens[1], tokens[2], tmp_map, int(tokens[3])))
            classifier_names.append(tokens[1])
    fp.close()

    file_names = []
    is_upload = False
    if request.files.get('upload') != None:
#    if False:
        upload = request.files.get('upload')

        dir_name = tempfile.mkdtemp()
        upload.save(dir_name + 'temp.json')

        file_names.append(dir_name+'temp.json')
        is_upload = True
    else:
        tmp_files = get_files_by_time(out_dir)
        tmp_files.reverse()
        if len(tmp_files) > 0:
            file_names.append(out_dir+tmp_files[0])
        if len(tmp_files) > 1:
            file_names.append(out_dir+tmp_files[1])
        if len(tmp_files) > 2:
            file_names.append(out_dir+tmp_files[2])
        if len(tmp_files) > 3:
            file_names.append(out_dir+tmp_files[3])
        if len(tmp_files) > 4:
            file_names.append(out_dir+tmp_files[4])
        if len(tmp_files) > 5:
            file_names.append(out_dir+tmp_files[5])

    start_time = time.time()
    data = []
    metadata = []
    total_flows = 0
    for f in file_names:

        try: # just a robustness check
            parser = DataParser(f)
            tmpBD = parser.getByteDistribution()
            tmpIPT = parser.getIndividualFlowIPTs()
            tmpPL = parser.getIndividualFlowPacketLengths()
            tmp,tmp_m = parser.getIndividualFlowMetadata()
        except:
            continue
#        flows += parser.advancedInfo
        if parser.advancedInfo == None:
            continue
        for k in parser.advancedInfo:
            flows[k] = parser.advancedInfo[k]

        if tmp != None and tmpPL != None and tmpIPT != None:
            for i in range(len(tmp)):
                tmp_data = []
                tmp_data.extend(tmp[len(tmp)-i-1])
                tmp_data.extend(tmpPL[len(tmp)-i-1])
                tmp_data.extend(tmpIPT[len(tmp)-i-1])
                tmp_data.extend(tmpBD[len(tmp)-i-1])

                # nga issue, will fix when pcaps start flowing again
                if tmp_data[2] == 0 and tmp_data[4] > 0:
                    continue
                if tmp_data[3] == 0 and tmp_data[5] > 0:
                    continue

#                if len(tmp_data) != num_params:
#                    continue
                data.append(tmp_data)
                metadata.append(tmp_m[len(tmp)-i-1])
                total_flows += 1

                if total_flows == count_flocap*2 and not is_upload:
                    break
        if total_flows == count_flocap*2 and not is_upload:
            break

    if request.files.get('upload') != None:
        os.removedirs(dir_name)

    results = classify_samples(data, metadata)

    lhost = {}
    for i in range(len(metadata)):
        if metadata[i][0] not in lhost:
            lhost[metadata[i][0]] = 1
        else:
            lhost[metadata[i][0]] += 1
    sorted_lhost = sorted(lhost.items(), key=operator.itemgetter(1))
    sorted_lhost.reverse()
    if len(sorted_lhost) > 0:
        (lh,_) = sorted_lhost[0]
    else:
        lh = None

    tmp = []
    to_display = []
    to_display_names = []
    for key in display_fields:
        to_display_names.append(display_fields[key])
    for i in range(len(results)):
        color = []
        for j in range(len(results[i])):
            color.append(get_color(results[i][j]))

        s_orgName = ''
        d_orgName = ''
        if metadata[i][0] == lh:
            s_orgName = 'localhost'
        if metadata[i][1] == lh:
            d_orgName = 'localhost'

        tmp_to_display = []
        for key in display_fields:
            tmp_to_display.append(metadata[i][key])

        tmp.append((results[i],metadata[i][0],metadata[i][1],metadata[i][2],metadata[i][3],metadata[i][4],metadata[i][5],metadata[i][6],metadata[i][7],color,s_orgName,d_orgName,metadata[i][8],tmp_to_display))
    end_time = time.time()-start_time
    tmp = sorted(tmp,key=lambda x: x[0])
    tmp.reverse()

    return template('results',results=tmp,num_flows=len(results),t=end_time,classifier_names=classifier_names,
                    to_display_names=to_display_names)

示例#41

0

显示文件

文件： dnn.py 项目： jwlin/mlds-mp1

		assert False, 'no activation function'

def cost_func(y, y_hat, batch_size, choice='euclidean'):
	if choice == 'euclidean':
		return T.sum((y-y_hat)**2) / batch_size
	else:
		assert False, 'no cost function'


t_start = time.time()

sample_file = 'mfcc/train.ark'
test_file = 'mfcc/test.ark'
label_file = 'label/train.lab'
label_map_file = 'phones/48_39.map'
DataParser.load(sample_file, label_file, label_map_file)
#DataParser.test()
dim_x = DataParser.dimension_x
dim_y_hat = DataParser.dimension_y
batch_size = 21
neuron_num = 64
epoch_cycle = 50
learning_rate = 0.01
lr = theano.shared(learning_rate)
lr_decay = 1.0

# e.g. matrix 3*2 dot matrix 2*1 = matrix 3*1
# [[1., 3.], [2., 2.], [3.,1.]] dot [[2], [1]] = [[5.], [6.], [7.]]
x = T.matrix('input', dtype='float64')  # matrix of dim_x * batch_size
y_hat = T.matrix('reference', dtype='float64')  # matrix of dim_y_hat * batch_size

示例#42

0

显示文件

文件： launcher_script.py 项目： Sovianum/Parallel-data-processing

 def __init__(self, data_dir, initial_time, time_step, max_chunk_size):
     self.data_parser = DataParser(data_dir, initial_time, time_step, max_chunk_size)

示例#43

0

显示文件

文件： Routing_multiproctrial1.py 项目： Whisperingroad/WearableTechnology_Embedded-System-Design-Project

					needToRepeatInput = True
                                else:
                                        print 'inputAnswer is neither affirmative nor negative'
					pass
        print 'userInput is', userInput
	return userInput
        

if __name__ == '__main__':
        signal.signal(signal.SIGINT, signal_handler)
        #building = raw_input()
        #level = raw_input()
        internetConnection = False
	speechInput = None
	speaker = AudioFeedback()
	userInputInitialise = DataParser()
        voiceInput, numpadData = userInputInitialise.location_input_type()
        if voiceInput:
                print 'using voice input'
                print 'initialising speech recognition'
                speechInput = SpeechRecognition()
        else:
                print 'using keypad input'
	
	print 'starting program'
	askBuildingName = 'please enter building name'
	speaking_proc = Process(target=speaker.threadedFeedback, args=(askBuildingName,))
	speaking_proc.start()
	speaking_proc.join()

	#building = raw_input()

示例#44

0

显示文件

文件： Routing_multiproctrial1.py 项目： Whisperingroad/WearableTechnology_Embedded-System-Design-Project

        def provideDirections(self, nextCheckPoint, currentCheckPoint, pos_x, pos_y, speaker):
                #threading.Timer(1.0, provideDirections(nextCheckPoint, currentCheckPoint, pos_x, pos_y)).start()
                print "start of function"
                sayNextCheckPoint = 'your next checkpoint is %s\n' %(mapinfo['map'][nextCheckPoint - 1]['nodeName'])
		#speaker.threadedFeedback(sayNextCheckPoint)
                speaking_proc = Process(target=speaker.threadedFeedback, args=(sayNextCheckPoint,))
		speaking_proc.start()
		speaking_proc.join()
		detourCheckPoint = False
                reachCheckPoint = False
                start_time = time.time()
                time_to_speak = time.time()
		dist = None
		change_direction = None
		direction = '%s %lf degrees, %lf'
		speak_direction = '%s %d degrees, and walk %d point %d meters\n'
		isFirstTimeGivingDirections = False
		isFirstTimeProcDirections = True
		oneSecondHasPassed = False

                while True:
                        if (time.time() - start_time > 1) or isFirstTimeProcDirections:
                                start_time = time.time()
                                #distance, heading = input()
                                dataParser = DataParser()
                                step = dataParser.get_step_read()
                                print "NUMBER OF STEPS %d\n", step
                                compass_read = dataParser.get_compass_read()
                                print "compass_read", compass_read
                                del dataParser
				
				if not isFirstTimeProcDirections:
                                	if step == 0:
						print 'going into calcDisplacement\n\n\n'
                                	        reachCheckPoint, pos_x, pos_y, detourCheckPoint, speak_direction = self.calcDisplacement(0, compass_read, 5, pos_x, pos_y, nextCheckPoint, currentCheckPoint)
					else:
                                		for i in range(step):
							print 'step #', i
                                        		reachCheckPoint, pos_x, pos_y, detourCheckPoint, speak_direction = self.calcDisplacement(1, compass_read, i, pos_x, pos_y, nextCheckPoint, currentCheckPoint)
				if oneSecondHasPassed:
					isFirstTimeGivingDirections = True
					oneSecondHasPassed = False

				if isFirstTimeProcDirections:
					oneSecondHasPassed = True
					isFirstTimeProcDirections = False

                        if (time.time() - time_to_speak > 4) or isFirstTimeGivingDirections:
				isFirstTimeGivingDirections = False
                                time_to_speak = time.time()
                                speaking_proc = Process(target=speaker.threadedFeedback, args=(speak_direction,))
                                speaking_proc.start()

                        if reachCheckPoint:
                                speak_direction = 'checkpoint reached'
                                speaking_proc = Process(target=speaker.threadedFeedback, args=(speak_direction,))
                                speaking_proc.start()
                                break

                        if detourCheckPoint:
                                break
                                

                return reachCheckPoint, pos_x, pos_y, detourCheckPoint