Пример #1
0
 def test_filter_info_right_data_extra_column(self):
     data = [
         'RecordId,EmployID,Name,Age,Year,Salary,Type',
         '10021,1,Rob,23,2008,65580,Sport,ExtraData'
     ]
     test_processor = DataProcessor()
     self.assertEqual({}, test_processor.filter_info(data))
Пример #2
0
    def get_stations(self, station_id=None):
        """
        Gets all station data from API and inserts it into database.

        :param station_id: int
        :return: Union[list, tuple]
        """
        url = self.prepare_url(self.URL_STATIONS)

        if self.request_is_valid(url):
            try:
                raw_data = self.call_api(url)
                cities = DataProcessor.parse_cities(raw_data)
                stations = DataProcessor.parse_stations(raw_data)
                self.db.insert_from_list("cities", cities, True)
                self.db.insert_from_list("stations", stations, True)
            except (ApiError, DataProcessingError, DbManagerError) as e:
                raise DataManagerError("Could not handle API data properly") from e
        try:
            stations = self.db.get_all_station_data(station_id)
        except DbManagerError as e:
            logger.exception(e)
            raise DataManagerError("Error when obtaining data from database!") from e

        if station_id:
            return stations[0]

        return stations
Пример #3
0
    def create_connection(self, wb, switch):

        sheet = self.choose_sheet(wb)

        target_column = 2
        target_row = 1

        max_column = sheet.max_column
        max_row = sheet.max_row

        data_row = []
        row_dict = {}
        keys = []
        data_to_process = {}

        i = 0
        dup_keys = 0
        for row in self.row_names:
            row_dict[self.row_names[i]]: ''
            i = i + 1

        for row in range(0, max_row):

            # Get the first value from the row to set as the key
            output = sheet.cell(row=target_row, column=1).value
            key = dp.validate_key(str(output))

            # Check if it's a duplicate key
            if key in keys:
                dup_keys += 1
                data_to_log = "Duplicate Key" + str(key)
                Lfh.append_file('log.txt', data_to_log)

            # Add that key to the list of all keys
            keys.append(key)
            data_to_process[key] = {}

            col_num = 0
            for column in range(0, max_column):
                output = sheet.cell(row=target_row, column=target_column).value
                data_row.append(str(output))

                row_dict[self.row_names[col_num]] = data_row[col_num]
                target_column = target_column + 1
                col_num = col_num + 1

            # Skip the ID and Valid rows
            for row in self.row_names[1:-1]:
                data_to_process[key][row] = row_dict[row]

            data_to_process[key]['valid'] = "0"

            data_row = []
            target_column = 1
            target_row = target_row + 1

        # Send the data to be processed
        dict_valid = dp.send_to_validate(data_to_process, switch, dup_keys)

        return dict_valid
Пример #4
0
    def fetch_text_contents(file, switch, separator=","):
        f = FileReader()
        dup_keys = 0
        keep_going = True
        data_fields = DataFields.get_data_fields(DataFields)

        if file is not "":
            # Repeat for each line in the text file
            for line in file:
                # Split file into fields using ","
                fields = line.split(separator)
                checked_id = DataProcessor.validate_key(fields[0])
                if checked_id in f.dict_root:
                    dup_keys += 1
                    fields[6] = fields[6].rstrip()
                    data_to_log = "Duplicate Key" + str(fields[0:])
                    LogFileHandler.append_file('log.txt', data_to_log)
                else:
                    test_dict = {}
                    field_number = 1

                    # Ignore the ID field and the Valid field for now
                    for row_name in data_fields[1:-1]:
                        test_dict[row_name] = fields[field_number]
                        field_number += 1

                    test_dict['valid'] = '0'
                    f.dict_root.update({checked_id: test_dict})

            # Close the file to free up resources (good practice)
            file.close()
            if keep_going:
                valid_dict = DataProcessor.send_to_validate(f.dict_root,
                                                            switch, dup_keys)
                return valid_dict
Пример #5
0
 def test_parse_methods(self):
     data = {
         "Attribute keywords": ["have ", "must have ", "has "],
         "Method keywords": ["can ", "should "],
         "Initialization keywords": {
             "Attribute keywords":
             ["is initializing by setting ", "by default get "],
             "Attribute values keywords": [" as ", " equal to ", " = "]
         }
     }
     description_config = DescriptionConfig(data)
     test_data_processor = DataProcessor()
     test_attributes = ["apples", "oranges"]
     test_line1 = "Client can swim"
     test_line2 = "can give apples, take apples and eat oranges"
     test_output1 = [{"Method": "swim", "Attributes": []}]
     test_output2 = [{
         "Method": "give apples",
         "Attributes": ["apples"]
     }, {
         "Method": "take apples",
         "Attributes": ["apples"]
     }, {
         "Method": "eat oranges",
         "Attributes": ["oranges"]
     }]
     self.assertEqual(
         test_data_processor.parse_methods(test_line1, test_attributes,
                                           description_config),
         test_output1)
     self.assertEqual(
         test_data_processor.parse_methods(test_line2, test_attributes,
                                           description_config),
         test_output2)
Пример #6
0
 def test_parse_initialization(self):
     data = {
         "Attribute keywords": ["have ", "must have ", "has "],
         "Method keywords": ["can ", "should "],
         "Initialization keywords": {
             "Attribute keywords":
             ["is initializing by setting ", "by default get "],
             "Attribute values keywords": [" as ", " equal to ", " = "]
         }
     }
     description_config = DescriptionConfig(data)
     test_data_processor = DataProcessor()
     test_attributes = ["apples", "oranges"]
     test_line1 = "Client is initializing by setting apples as 1, oranges as ten"
     test_line2 = "is initializing by setting apples = 1 and oranges = 5"
     test_output1 = ["apples", "oranges"], [1, "ten"]
     test_output2 = ["apples", "oranges"], [1, 5]
     self.assertEqual(
         test_data_processor.parse_initialization(test_line1,
                                                  test_attributes,
                                                  description_config),
         test_output1)
     self.assertEqual(
         test_data_processor.parse_initialization(test_line2,
                                                  test_attributes,
                                                  description_config),
         test_output2)
Пример #7
0
    def get_contents(self):
        """
        Preparing resume contents which composes by header, spaces and
        contents with table formatting.
        """

        self.elements = []
        data_processor = DataProcessor(self.resume_data)

        # resume header
        header_contents = self._set_table(data_processor.header_data(),
                                          styles.header_col_widths,
                                          styles.header_table_styles)
        self.elements.append(header_contents)

        # spaces
        spaces = Spacer(width=0, height=styles.header_space_height)
        self.elements.append(spaces)

        # resume contents
        table_contents = self._set_table(data_processor.content_data(),
                                         styles.content_col_widths,
                                         styles.content_table_styles)
        self.elements.append(table_contents)

        return self.elements
Пример #8
0
 def test_clean_strings(self):
     test_data_processor = DataProcessor()
     test_input_string = "***Smth***\n\n**Smth more**\n     \n1. How are you? Are you here?!\n\n2. Hi, I'm Alex. And you?"
     test_output_string = "How are you Are you here\nHi, Im Alex And you\n"
     data_processor_output = test_data_processor.clean_strings(
         test_input_string)
     self.assertEqual(data_processor_output, test_output_string)
Пример #9
0
def estimate(args):
    input_model_file_name = args.input_model_file_name
    input_exe_file_name = args.input_feature_file_name
    extraction_method = args.extraction_method
    label_type = args.label_type
    
    # generate an .asm file from the executable file
    generator = IDAAsmGenerator()
    generator.generate(input_exe_file_name)
    
    # append the information of the input file to the database
    datproc = DataProcessor()
    datproc.update_database_from_file()
    
    # extract feature vector
    ams_file_name = os.path.join(os.path.splittext(input_file_name)[0], '.asm')
    datproc.extract_data_from_file(asm_file_name, extraction_method,
                                   label_type)
    
    # load classification model
    estimator = CompilerEstimator()
    estimator.load_model(input_model_file_name)
    
    # estimate
    result = estimator.estimate(feature_vector)
    print result
Пример #10
0
    def eval(self, test_file_path, model_path):
        """ Evaluate model's performance on test data
        Args:
            test_file_path: path to the jsonl file containing test datamodel_path
            model_path: path to the model to be restored
        Returns:
            float number indicating test accuracy
        """
        saver = tf.train.Saver(max_to_keep=500)
        saver.restore(self.sess, model_path)
        print("Model restored from " + str(model_path))
        self.dp_test = DataProcessor(input_file_path=test_file_path)

        accuracies = []
        for data in self.dp_test.get_single_data():
            test_feed = {
                self.a: data["sentence1"],
                self.b: data["sentence2"],
                self.labels: data["gold_label"]
            }
            accuracy, predictions = self.sess.run([self.accuracy, self.h_output], feed_dict=test_feed)
            print("predictions for the batch: {}".format(predictions))
            print("Actual gold labels: {}".format(test_data["gold_label"]))
            accuracies.append(accuracy)

        test_acc = sum(accuracies)/len(accuracies)
        print("Overall test accuracy is {}".format(test_acc))
        return test_acc
Пример #11
0
def main(args):
    df = pd.read_csv(args.dataset)
    # df = df.iloc[::24,:]

    # Preprocess input and reshapes to
    # (num_samples, window_size, 1)
    processor = DataProcessor(window_size=args.window_size,
                              forecast_size=args.forecast,
                              shift=args.shift)
    train_X, train_y, test_X, test_y, raw_series = processor.preprocess(df)

    # train or load model
    lstm = LSTMModel(args.window_size, args.forecast)
    print(lstm.model.summary())
    if not args.eval_only:
        lstm.fit(train_X, train_y, epochs=args.epochs)
        lstm.save(args.model_path)
    else:
        lstm.load(args.model_path)

    # evaluation and plots
    preds = lstm.predict(test_X[-1].reshape(1, -1, 1))
    preds = processor.postprocess(preds)
    plot_test_datapoint(test_X[-1], test_y[-1], preds[0], args.forecast)

    preds_moving = moving_test_window_preds(lstm,
                                            test_X[0, :],
                                            n_future_preds=1000,
                                            step=args.forecast)
    preds_moving = np.array(preds_moving).reshape(-1, 1)
    preds_moving = processor.postprocess(preds_moving)

    plot_moving_window(df['datetime'], raw_series, preds_moving)
Пример #12
0
def load_tracks_into_dataset(dataset):
    """ Loads all valid tracks in PATH_TO_WAVS
    into existing dataset

    Args:
        dataset (dict): Description
    """
    files = glob.glob(PATH_TO_WAVS + '*.wav')
    for file in files:
        try:
            genre, beets_id, y = get_metadata(file)

            for i in range(15):
                dataset['y'].append(np.array([y]))

                dp = DataProcessor(filepath=file)
                dp.load_data(n_secs=N_SECS)

                if MEL:
                    X = dp.mel_spectrogram
                else:
                    X = dp.spectrogram

                if X.shape != (64, 1022):
                    continue

                dataset['X'].append(X.astype(float))
                meta = {'beets_id': beets_id, 'genre': genre}

                dataset['meta'].append(meta)
        except Exception:
            logger.warning('Error loading id: {}'.format(beets_id),
                           exc_info=True)
Пример #13
0
 def test_make_lines(self):
     test_data_processor = DataProcessor()
     input1 = "string1\nstring2\n"
     input2 = "string1\nstring2\nstring3\n"
     output1 = ["string1", "string2"]
     output2 = ["string1", "string2", "string3"]
     self.assertEqual(test_data_processor.make_lines(input1), output1)
     self.assertEqual(test_data_processor.make_lines(input2), output2)
Пример #14
0
 def test_filter_info_right_data_double_header(self):
     data = [
         'RecordId,EmployID,Name,Age,Year,Salary,Type',
         'RecordId,EmployID,Name,Age,Year,Salary,Type'
     ]
     test_processor = DataProcessor()
     with self.assertRaises(ValueError):
         test_processor.filter_info(data)
Пример #15
0
    def train(self, train_file_path, epoch_number, save_models = True):
        """ Train the attention NLI model stochastically
        Args:
            train_file_path: jsonl file path to training data
            epoch_number: number of epochs of training
            save_models: saving models for each epoch
        Notes:
            trained models are saved in models/ every 50 epochs
        """
        saver = tf.train.Saver(max_to_keep=500)
        self.dp_train = DataProcessor(input_file_path=train_file_path)
        acc_list = []
        loss_list = []

        s1 = "someone to watch Netflix with me"
        s2 = "someone to watch TV shows"
        embeddings1 = self.dp_train.gloVe_embeddings(s1, self.token_count)
        embeddings2 = self.dp_train.gloVe_embeddings(s2, self.token_count)
        print("Prediction is:", self.sess.run(self.h_output, feed_dict = {self.a: embeddings1, self.b: embeddings2}))

        s1 = "a Penn student to chat for coffee"
        s2 = "chat and get to know a penn student"
        embeddings1 = self.dp_train.gloVe_embeddings(s1, self.token_count)
        embeddings2 = self.dp_train.gloVe_embeddings(s2, self.token_count)
        print("Prediction is:", self.sess.run(self.h_output, feed_dict = {self.a: embeddings1, self.b: embeddings2}))

        s1 = "a designer to cofound my startup"
        s2 = "a software designer interested in entrepreneurship"
        embeddings1 = self.dp_train.gloVe_embeddings(s1, self.token_count)
        embeddings2 = self.dp_train.gloVe_embeddings(s2, self.token_count)
        print("Prediction is:", self.sess.run(self.h_output, feed_dict = {self.a: embeddings1, self.b: embeddings2}))

        self.accuracy_records_by_epoch = []
        for i in range(epoch_number):
            data_num = 0
            for data in self.dp_train.get_single_data():
                data_num = data_num + 1
                data_feed_dict = {
                    self.a: data["sentence1"],
                    self.b: data["sentence2"],
                    self.labels: data["gold_label"]
                }
                _, acc, loss = self.sess.run([self.train_op, self.accuracy, self.loss], feed_dict=data_feed_dict)
                acc_list.append(acc)
                loss_list.append(loss)
                if (data_num % 1000 == 0):
                    print("At epoch: {}, {} data processed".format(i, data_num))
            epoch_acc = sum(acc_list)/len(acc_list)
            epoch_loss = sum(loss_list)/len(loss_list)
            self.accuracy_records_by_epoch.append(epoch_acc)
            print("finishing epoch {}, training accuracy: {}, loss:{}".format(i, epoch_acc, epoch_loss))

            if save_models:
                save_path = saver.save(self.sess, './models/', global_step=i)
                print("Model saved in file: %s" % save_path)
            elif not save_models and i + 1 == epoch_number:
                save_path = saver.save(self.sess, './models/', global_step=i)
                print("Model saved in file: %s" % save_path)
Пример #16
0
def main(ip, database_ip):
    # set up cockroachdb client
    Base = declarative_base()

    class Metrics(Base):
        __tablename__ = 'metrics'
        id = Column(sqltypes.VARCHAR, primary_key=True)
        ip = Column(sqltypes.VARCHAR)
        time = Column(sqltypes.VARCHAR)
        metric_name = Column(sqltypes.VARCHAR)
        labels = Column(sqltypes.JSON)
        metric_value = Column(sqltypes.FLOAT)

    engine = create_engine(
        'cockroachdb://prom@{}/prometheus'.format(database_ip))
    Session = sessionmaker(bind=engine)
    Base.metadata.create_all(engine)

    # set up processor
    data_processor = DataProcessor(None)

    # ip string
    collection = ip.split(".")[2]

    # main loop
    while True:
        try:
            r = requests.get("http://{}:9182/metrics".format(ip))
        except Exception as e:
            break
        data = []
        for line in r.text.split("\n"):
            data_point = data_processor.process_line(ip, line)
            if data_point is not None:
                data.append(data_point)

        metrics_to_add = []
        for data_point in data:
            computer_ip = data_point["computer_ip"]
            metric_name = data_point["data_type"]
            metric_value = data_point["value"]
            del data_point["computer_ip"]
            del data_point["data_type"]
            del data_point["value"]
            metrics_to_add.append(
                Metrics(id=str(uuid.uuid4()),
                        ip=computer_ip,
                        time=str(datetime.now()),
                        metric_name=metric_name,
                        metric_value=metric_value,
                        labels=json.dumps(data_point)))

        session = Session()
        session.add_all(metrics_to_add)
        session.commit()
        session.close()

        time.sleep(60)
Пример #17
0
    def __init__(self, root_dir, video_title):

        self.dp = DataProcessor(root_dir, video_title)
        self.viewer = Viewer
        self.max_frame = max_frame
        self.results = {}
        with open(eval_file, "r") as f:
            self.results = json.load(f)
        self.img_path_list = self.get_img_path_list()
Пример #18
0
 def test_make_val(self):
     test_data_processor = DataProcessor()
     val1 = "5"
     val2 = "5.2"
     val3 = "5.0"
     val4 = "char"
     self.assertEqual(test_data_processor.make_val(val1), 5)
     self.assertEqual(test_data_processor.make_val(val2), 5.2)
     self.assertEqual(test_data_processor.make_val(val3), 5)
     self.assertEqual(test_data_processor.make_val(val4), "char")
Пример #19
0
    def __init__(self, params, dataset_name):
        self.params = params
        # self.dataset = dataset
        self.variables_data = dict()

        self.dataset_vars = list()

        #List with names and characteristics of the dataset variables
        self.variables_dataset = list()

        #List with names and characteristics of the template variables
        self.variables_template = list()

        dataset_var_type = self.params.template.variables_dataset['type']
        if dataset_var_type == "multiple":
            self.data_processor = DataProcessor(
                dataset_name, dataset_var_type,
                self.params.template.variables_dataset['aggregation'])
        else:
            self.data_processor = DataProcessor(dataset_name, dataset_var_type)

        self.reverse_data = False if self.params.template.variables_dataset[
            'reverse'] == "false" else True

        #Se agrega las dimensiones del dataset
        self.data_processor.add_dimensions_variables(self.params.template.variables_dataset.lat.cdata,\
                   self.params.template.variables_dataset.lon.cdata,\
                   self.params.template.variables_dataset.time.cdata,\
                   self.reverse_data)

        if self.params.template.output["type"] == "images":
            self.template_dimensions = dict()
            self.template_dimensions['max_lat'] = int(
                self.params.template.layers["max_lat"])
            self.template_dimensions['max_lon'] = int(
                self.params.template.layers["max_lon"])
            self.template_dimensions['min_lat'] = int(
                self.params.template.layers["min_lat"])
            self.template_dimensions['min_lon'] = int(
                self.params.template.layers["min_lon"])
            self.interpolation_factor = int(
                self.params.template.layers["interpolation_factor"])
        if self.params.template.output["type"] == "csv":
            self.template_dimensions = dict()
            self.template_dimensions[
                'max_lat'] = self.data_processor.raw_variables["lat"].max()
            self.template_dimensions[
                'max_lon'] = self.data_processor.raw_variables["lon"].max()
            self.template_dimensions[
                'min_lat'] = self.data_processor.raw_variables["lat"].min()
            self.template_dimensions[
                'min_lon'] = self.data_processor.raw_variables["lon"].min()
            print(self.template_dimensions)
            self.interpolation_factor = int(
                self.params.template.points["interpolation_factor"])
Пример #20
0
 def test_make_name(self):
     test_data_processor = DataProcessor()
     name1 = ""
     name2 = "name"
     name3 = "word1 word2"
     name4 = "word1 word2 word3"
     self.assertEqual(test_data_processor.make_name(name1), "")
     self.assertEqual(test_data_processor.make_name(name2), "name")
     self.assertEqual(test_data_processor.make_name(name3), "word1_word2")
     self.assertEqual(test_data_processor.make_name(name4),
                      "word1_word2_word3")
Пример #21
0
def main():

    songs, notes = DataProcessor.get_parsed_data(config.MIDI_FILES_DIR)
    vocab = len(set(notes))

    network_input, network_output = DataProcessor.prepare_sequences(notes, vocab)
    lstm = LSTM(network_input, vocab, config.WEIGHTS_DUMP, config.PRETRAINED_MODEL)
    # lstm = LSTM(network_input, vocab, config.WEIGHTS_DUMP)
    # lstm.train(network_input, network_output)

    song_writer = SongWriter(lstm, notes, songs)
    song_writer.write_song(config.OUTPUT_DIR + config.SONG_NAME + config.MIDI_EXTENSION)
Пример #22
0
class FeatureExtractor(object):
    '''
    Controls the processing chain and fetches the values needed for the classificator
    '''

    def __init__(self, dataCollector):

        self.inputQueue = Queue()
        self.outputQueue = Queue()
        self.extractQueue = Queue()
        
        self.sigUtil = SignalUtil()
        self.eegUtil = EEGUtil()
        
        self.collector = dataCollector
        self.collectorThread = threading.Thread(target=self.collector.collectData)
        
        self.processor = DataProcessor(self.inputQueue, self.outputQueue)
        self.processingThread = threading.Thread(target=self.processor.processData)

        self.extract = True

    def start(self):
        '''setting data handler and starts collecting'''
        print("%s: starting feature extractor" % self.__class__.__name__)   
        self.collector.setHandler(self.handleDataSet)  
        self.collectorThread.start()
        self.processingThread.start()
        
        while self.extract:
            try:
                procData = self.outputQueue.get(timeout=1)
                self.extractFeatures(procData)
            except Empty:
                pass
    
    def extractFeatures(self, data):
        features = []
        for _, sigData in data.iteritems():
            theta = self.eegUtil.getThetaChannel(sigData["fft"])
            features.extend(theta)
        self.extractQueue.put(array(features))
    
    def handleDataSet(self, data):
        '''Add the given data to the processingQueue'''
        self.inputQueue.put(data)
    
    def close(self):
        self.processor.close()
        self.processingThread.join()
        self.collector.close()
        self.collectorThread.join()
        print("%s: closing feature extractor" % self.__class__.__name__)     
Пример #23
0
    def __init__(self, model_type=None, dummy=False, config_file=None):
        self.dummy = dummy
        #LOGGING FOLDERS
        self.log_dir = "tf_logs/"
        if not os.path.isdir(self.log_dir):
            os.mkdir(self.log_dir)
        self.cp_dir = "tf_models/"
        if not os.path.isdir(self.cp_dir):
            os.mkdir(self.cp_dir)
        self.config_dir = "model_configs/train/"
        if not os.path.isdir(self.config_dir):
            os.mkdir(self.config_dir)

        #IF THE CONFIG FILE WAS GIVEN AS INPUT
        #USES THAT
        if config_file != None:
            config_path = self.config_dir + config_file
            with open(config_path, "r") as f:
                config = json.load(f)
            self.warm_start = True

        #IF THE CONFIG FILE WAS NOT GIVEN AS INPUT
        #THE DEFAULT FILE WILL BE USED
        else:
            config_file = "main_config.json"
            config_path = f"{self.config_dir.split('/')[0]}/{config_file}"
            with open(config_path, "r") as f:
                config = json.load(f)

            #Checks the model type input
            assert type(model_type) == str, f"Invalid model type:{model_type}"
            assert True == (model_type in config.keys(
            )), f"Given model type was not found in config file: {model_type}"

            #DUMMY vs REAL
            version_str = "dummy" if self.dummy else "real"
            config["data"] = config["data"][version_str]

            #DROP USELESS KEYS FROM THE DICT
            for key in list(config.keys()):
                if key not in ["data", model_type]: config.pop(key)

            self.warm_start = False

        #DATA PROCESSOR
        self.dp = DataProcessor(config["data"], self.dummy)

        #CREATE THE MODEL
        self.model = self.build_model(config)

        #STORE THE CONFIG DICT FOR LATER
        self.config = config
Пример #24
0
    def setUp(self):
        super(TestDataProcessor, self).setUp()
        spec_file = (self.resource_folder /
                     "valid_data_processing_spec.json").absolute()
        self.data_processor = DataProcessor(spec_file=spec_file)

        self.existing_csv_file = (self.resource_folder /
                                  "output.csv").absolute()
        self.existing_hash_csv_file = (self.resource_folder /
                                       "output_hash.csv").absolute()

        self.csv_file = (self.temp_folder / "output.csv").absolute()
        self.hash_csv_file = (self.temp_folder / "output_hash.csv").absolute()
Пример #25
0
def updatedb(args):
    file_name = args.file_name
    dir_name = args.dir_name
    datproc = DataProcessor()
    
    if file_name is not None and dir_name is not None:
        sys.stderr.write('Error: please assign only one file name or directory name')
    elif file_name is not None:
        datproc.update_database_from_file(file_name)
    elif dir_name is not None:
        datproc.update_database_from_dir(dir_name)
    else:
        sys.stderr.write('Error: no file name or directory name specified')
Пример #26
0
 def test_check_attr(self):
     test_data_processor = DataProcessor()
     test_attributes = ["name", "last name"]
     test_line1 = "change name"
     test_line2 = "change last name"
     self.assertEqual(
         test_data_processor.check_attr("name", test_attributes,
                                        test_line1), 1)
     self.assertEqual(
         test_data_processor.check_attr("name", test_attributes,
                                        test_line2), 0)
     self.assertEqual(
         test_data_processor.check_attr("last name", test_attributes,
                                        test_line2), 1)
Пример #27
0
def main():
    config = json.load(open('config.json', 'r'))

    set_seed(config["seed"])

    if not os.path.exists(config["output_dir"]):
        os.makedirs(config["output_dir"])
    if not os.path.exists(config["save_dir"]):
        os.makedirs(config["save_dir"])

    # model_config = transformers.BertConfig.from_pretrained(config["model_name"])
    # tokenizer = AutoTokenizer.from_pretrained(config["model_name"])
    tokenizer = BertTokenizer.from_pretrained(config["model_name"])
    model = BertForClassification(config["model_name"])
    # model = AutoModelForMultipleChoice.from_pretrained(config["model_name"])

    model.cuda()

    processor = DataProcessor(config["data_dir"])

    train_examples = processor.get_train_examples()
    train_dataset = processor.get_dataset(train_examples, tokenizer,
                                          config["max_length"])

    valid_examples = processor.get_dev_examples()
    valid_dataset = processor.get_dataset(valid_examples, tokenizer,
                                          config["max_length"])

    test_examples = processor.get_test_examples()
    test_dataset = processor.get_dataset(test_examples, tokenizer,
                                         config["max_length"])

    train(config, model, train_dataset, valid_dataset)
    result = evaluate(config, model, test_dataset)
    print(result[:2])
def loadLevel():
    json_path = "../TheVGLC/Super Mario Bros/Multi-layer/smb-multi-layer.json"
    level_path = "../TheVGLC/Super Mario Bros/Multi-layer/Structural Layer/mario-1-1.txt"

    loader = DataLoader()
    loader.loadJson(json_path)
    loader.loadFile(level_path)

    original_size = loader.loaded_data[0].shape

    processor = DataProcessor(loader.loaded_data[0])
    processor.makeSegments(8, 1)

    return loader, processor, original_size
class DataProcessorTests(unittest.TestCase):
    def setUp(self):
        dict = {"id":1,"data":[[84,0,1456010413839],[72,0,1456010413903],[84,1,1456010413938],[73,0,1456010413979],[72,1,1456010414050],[83,0,1456010414079],[73,1,1456010414138],[83,1,1456010414214],[84,0,1456010414444],[89,0,1456010414508],[84,1,1456010414539],[89,1,1456010414587],[80,0,1456010414703],[80,1,1456010414799],[73,0,1456010414963],[78,0,1456010415063],[73,1,1456010415126],[78,1,1456010415182],[71,0,1456010415205],[71,1,1456010415332],[69,0,1456010415801],[69,1,1456010415892],[88,0,1456010416026],[88,1,1456010416125],[69,0,1456010416266],[82,0,1456010416350],[69,1,1456010416433],[82,1,1456010416461],[67,0,1456010416609],[67,1,1456010416684],[73,0,1456010416689],[67,0,1456010416777],[73,1,1456010416784],[67,1,1456010416900],[69,0,1456010416936],[69,1,1456010417027],[83,0,1456010417490],[83,1,1456010417593],[83,0,1456010419669],[83,1,1456010419756],[69,0,1456010419834],[69,1,1456010419941],[73,0,1456010420333],[83,0,1456010420406],[73,1,1456010420461],[83,1,1456010420497],[65,0,1456010420631],[65,1,1456010420759],[83,0,1456010420838],[83,1,1456010420950],[84,0,1456010421070],[84,1,1456010421133],[82,0,1456010421220],[82,1,1456010421324],[65,0,1456010421401],[78,0,1456010421510],[65,1,1456010421545],[71,0,1456010421602],[78,1,1456010421609],[69,0,1456010421702],[71,1,1456010421741],[69,1,1456010421853],[74,0,1456010422162],[74,1,1456010422261],[85,0,1456010422311],[85,1,1456010422415],[77,0,1456010422457],[66,0,1456010422545],[77,1,1456010422552],[66,1,1456010422652],[76,0,1456010422656],[76,1,1456010422760],[69,0,1456010422779],[69,1,1456010422912],[79,0,1456010423361],[79,1,1456010423460],[70,0,1456010423569],[70,1,1456010423649],[65,0,1456010423819],[65,1,1456010423954],[75,0,1456010424045],[75,1,1456010424160],[87,0,1456010424198],[87,1,1456010424310],[65,0,1456010424388],[65,1,1456010424532],[82,0,1456010424614],[82,1,1456010424722],[68,0,1456010424819],[68,1,1456010424926],[80,0,1456010425556],[80,1,1456010425667],[72,0,1456010425788],[82,0,1456010425856],[72,1,1456010425883],[82,1,1456010425943],[65,0,1456010426024],[83,0,1456010426120],[65,1,1456010426163],[83,1,1456010426235],[69,0,1456010426311],[69,1,1456010426406],[83,0,1456010426471],[188,0,1456010426567],[83,1,1456010426634],[188,1,1456010426706],[82,0,1456010427221],[82,1,1456010427312],[69,0,1456010427391],[80,0,1456010427439],[69,1,1456010427482],[80,1,1456010427546],[82,0,1456010427550],[82,1,1456010427630],[69,0,1456010427694],[69,1,1456010427797],[83,0,1456010427919],[83,1,1456010427994],[69,0,1456010428081],[69,1,1456010428177],[78,0,1456010428186],[78,1,1456010428281],[84,0,1456010428294],[84,1,1456010428397],[73,0,1456010428401],[78,0,1456010428490],[73,1,1456010428557],[71,0,1456010428578],[78,1,1456010428597],[71,1,1456010428717],[84,0,1456010428834],[72,0,1456010428901],[84,1,1456010428929],[72,1,1456010429025],[69,0,1456010429041],[69,1,1456010429165],[81,0,1456010429392],[85,0,1456010429496],[81,1,1456010429535],[73,0,1456010429544],[85,1,1456010429611],[73,1,1456010429663],[84,0,1456010429734],[84,1,1456010429817],[69,0,1456010429910],[69,1,1456010430005],[83,0,1456010430121],[83,1,1456010430176],[83,0,1456010430259],[83,1,1456010430354],[69,0,1456010430422],[78,0,1456010430486],[69,1,1456010430537],[67,0,1456010430586],[78,1,1456010430593],[69,0,1456010430682],[67,1,1456010430733],[69,1,1456010430829],[79,0,1456010430964],[70,0,1456010431079],[79,1,1456010431082],[70,1,1456010431187],[69,0,1456010431377],[69,1,1456010431465],[88,0,1456010431617],[88,1,1456010431708],[81,0,1456010431874],[85,0,1456010431942],[81,1,1456010431997],[73,0,1456010432006],[85,1,1456010432057],[73,1,1456010432089],[83,0,1456010432114],[73,0,1456010432194],[83,1,1456010432229],[73,1,1456010432289],[84,0,1456010432349],[84,1,1456010432424],[69,0,1456010432512],[69,1,1456010432640],[68,0,1456010432756],[73,0,1456010432848],[68,1,1456010432854],[73,1,1456010432959],[83,0,1456010432986],[83,1,1456010433093],[71,0,1456010434948],[82,0,1456010435056],[71,1,1456010435115],[82,1,1456010435191],[65,0,1456010435323],[65,1,1456010435474],[80,0,1456010435483],[80,1,1456010435593],[72,0,1456010435680],[83,0,1456010435760],[72,1,1456010435799],[83,1,1456010435919],[68,0,1456010436392],[73,0,1456010436480],[68,1,1456010436503],[73,1,1456010436587],[67,0,1456010436596],[67,1,1456010436707],[84,0,1456010436809],[84,1,1456010436877],[65,0,1456010436998],[84,0,1456010437158],[65,1,1456010437169],[69,0,1456010437262],[84,1,1456010437321],[69,1,1456010437405],[83,0,1456010437502],[83,1,1456010437653],[68,0,1456010437978],[68,1,1456010438114],[66,0,1456010438987],[89,0,1456010439107],[66,1,1456010439118],[89,1,1456010439238],[65,0,1456010439288],[65,1,1456010439471],[70,0,1456010440002],[79,0,1456010440070],[70,1,1456010440101],[79,1,1456010440193],[82,0,1456010440213],[82,1,1456010440309],[69,0,1456010440368],[73,0,1456010440428],[69,1,1456010440503],[78,0,1456010440528],[73,1,1456010440587],[78,1,1456010440659],[77,0,1456010441514],[73,0,1456010441599],[68,0,1456010441667],[77,1,1456010441678],[73,1,1456010441718],[68,1,1456010441782],[71,0,1456010441900],[71,1,1456010441996],[69,0,1456010442030],[84,0,1456010442115],[69,1,1456010442182],[84,1,1456010442222],[190,0,1456010442508],[190,1,1456010442631]]}

        self.dp = DataProcessor(dict)

    def testNgrams(self):
        self.assertListEqual([ (1,2), (2,3), (3,4) ], self.dp.ngrams([1,2,3,4], 2))
        self.assertListEqual([ (1,2,3), (2,3,4), (3,4,5) ], self.dp.ngrams([1,2,3,4,5], 3))

    def testProcess(self):

        self.dp.preprocess()
        f = self.dp.process()
Пример #30
0
def TestExtNegativeSampling():
    dataFile = "./dataset/samples/qa-dump-1460090355004_new.json"
    dataProvider = DataProcessor(dataFile)
    nNegSample = 100
    dataProvider.NegSampleExt(nNegSample)

    for title in dataProvider.data.keys():
        article = dataProvider.data[title]
        for i in range(len(article["answers"])):
            for negSample in article["negExtSamples"][i]:
                print " ".join(SentenceToWord(
                    (article["answers"][i], ))[0]), negSample
                assert " ".join(SentenceToWord(
                    (article["answers"][i], ))[0]) in negSample
    print "Extension negative sampling test passed!"
Пример #31
0
 def __process_one_product__(self):
     df = self.products.head(1)
     measurement = SentinelMeasurement(
             api=self.__api__, 
             geojson_path=self.__geojson_path__,
             dataframe=df,
             autofetch=True
             )
     dp = DataProcessor(
             measurement, 
             lambda tiff,result, profile: self.__save_result__(measurement, tiff, result, profile))
     dp.process_data(df)
     self.products = self.products.iloc[1:]
     products_left = len(self.products.index)
     print("{} measurements are left".format(products_left))
Пример #32
0
def aquire_and_append_metrics(inlet, fs, data_processor: DataProcessor):
    """Get metrics from inlet and append to data processor

    Parameters:
    -----------

    Returns:
    --------
    None: updates dataprocessor
    """
    # Obtain EEG data from the LSL stream
    eeg_data, timestamp = acquire_eeg_data(inlet, fs)

    data_processor.feed_new_data(eeg_data=eeg_data)  # Feed new data generated in the epoch
    data_processor.append_metrics()
Пример #33
0
    def test(self, load_model=False):
        """
        test
        :param load_model: 
        :return: 
        """
        if load_model:
            print 'Start loading model from "%s"' % self.config.load_model_path
            self.model.load_state_dict(torch.load(self.config.load_model_path))

        test_loader = DataProcessor(self.config.test_file, self.config.batch_size).load()

        with torch.no_grad():
            correct = 0
            total = 0
            for features, labels in test_loader:
                features = features.to(device)
                _, labels = torch.max(labels, 1)
                labels = labels.to(device)

                outputs = self.model(features)
                _, predicted = torch.max(outputs.data, 1)

                total += labels.size(0)
                correct += (predicted == labels).sum().item()

            print 'Test Accuracy of the model: {} %'.format(100 * correct / total)
Пример #34
0
    def train(self):
        """
        train
        :return: 
        """
        print 'Start training model.'
        train_loader = DataProcessor(self.config.training_file, self.config.batch_size).load()
        total_step = len(train_loader)
        for epoch in range(self.config.training_epoch):
            for i, (features, labels) in enumerate(train_loader):
                features = features.to(device)
                _, labels = torch.max(labels, 1) # 元组第一个维度为最大值,第二个维度为最大值的索引
                labels = labels.to(device)

                # Forward pass
                outputs = self.model(features)
                loss = self.criterion(outputs, labels)

                # Backward and optimize
                self.optimizer.zero_grad()  # 清空梯度缓存
                loss.backward()  # 反向传播,计算梯度
                self.optimizer.step()  # 利用梯度更新模型参数

                if (i + 1) % 100 == 0:
                    print 'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'\
                        .format(epoch + 1, self.config.training_epoch, i + 1, total_step, loss.item())

        # Save the model checkpoint
        print 'Start saving model to "%s".' % self.config.save_model_path
        torch.save(self.model.state_dict(), self.config.save_model_path)
Пример #35
0
    def __init__(self, main_window, c_logger=None, data_processor=None):
        """
        Init method of the 'MainWindow' class.
        :param main_window: Instance of the main Tk window.
        :param c_logger: Logger instance (ColoredLogger type is recommended).
                         Default is MAIN_LOGGER (Global variable.)
        :param data_processor: Instance of DataProcessor module.
        """

        self.c_logger = c_logger if c_logger else self.__set_up_default_logger(
        )
        self.main_window = main_window
        self.c_logger.info("Get main window: {}".format(main_window))

        self.c_logger.info("Creating DataProcessor instance.")
        self.data_processor = (data_processor if data_processor else
                               DataProcessor(c_logger=self.c_logger))
        self.c_logger.info("DataProcessor instance successfully created.")

        self.__create_main_gui_section()
        self.__create_personal_gui_section()
        self.__create_horizontal_separator_lines()
        self.__create_vertical_separator_lines()

        self.__set_resizable(row=9, col=3)
Пример #36
0
class TestProcessingChain(unittest.TestCase):

    # TODO test queue and threading
    def setUp(self):
        inputQueue = Queue()
        outputQueue = Queue()
        self.processor = DataProcessor(inputQueue, outputQueue)

    def test_process(self):
        self.processor.process(TEST_DATA)

    def test_splitData(self):
        eegData, gyroData = self.processor.splitData(TEST_DATA)
        intersect = set(eegData) & set(gyroData)
        self.assertTrue(len(intersect) == 0)
        self.assertTrue("F3" in eegData)
        self.assertTrue("X" in gyroData)
Пример #37
0
def extract(args):
    output_file_name = args.output_file_name
    extraction_method = args.extraction_method
    label_type = args.label_type
    
    if output_file_name is None:
        sys.stderr.write('Error: no file name to output is specified')
        sys.exit()
    
    if extraction_method is None:
        extraction_method = '3-gram'        # default: 3-gram
        
    if label_type is None:
        label_type = 'compiler'             # default: compiler
    
    datproc = DataProcessor()
    datproc.save_all_data_in_svmlight_format(output_file_name,
                                             extraction_method, label_type)
Пример #38
0
 def _data_procesor_init_(self):
     filters = [data_filter.Invertor(), 
                data_filter.SelfAdjustableNotchFilter()]
     self.data_processor = DataProcessor(self.plotter.plot_valid, 
                                         self.plotter.plot_error, filters)
     
     for filter_ in filters:
         name = filter_.get_name()
         name_repr = GTK_Wrapper.get_wrapper(name).get_gui_object()
         
         self.gui_filter_settings_box.pack_start(name_repr, True, True, 0)
         
         filter_settings_mgr = filter_.settings_manager()
         
         self._add_all_params(filter_settings_mgr, self.gui_filter_settings_box)
Пример #39
0
    def __init__(self, dataCollector):

        self.inputQueue = Queue()
        self.outputQueue = Queue()
        self.extractQueue = Queue()
        
        self.sigUtil = SignalUtil()
        self.eegUtil = EEGUtil()
        
        self.collector = dataCollector
        self.collectorThread = threading.Thread(target=self.collector.collectData)
        
        self.processor = DataProcessor(self.inputQueue, self.outputQueue)
        self.processingThread = threading.Thread(target=self.processor.processData)

        self.extract = True
Пример #40
0
class GUI:
    GLADE_FILE = "GUI.glade"
    EXPORT_RESPONSE_OK = 1

    def __init__(self): 
        
        
        self.builder = Gtk.Builder()
        self.builder.add_from_file(GUI.GLADE_FILE)
        
        # order is important!
        self._gui_elements_init_()
        self._graph_init_()
        self._data_procesor_init_()
        self._provider_init_()


        self.is_active = False

        self.builder.connect_signals(self)

        self.builder.get_object("main_window").show_all()

        self.stop()
        #
    def _gui_elements_init_(self):
        #attach elements to paned
        graph_window = self.builder.get_object("graph_window")
        control_panel = self.builder.get_object("control_panel")
        working_area_paned = self.builder.get_object("working_area_paned")
        
        #graph to the left: resizable
        #control panel to the right
        working_area_paned.pack1(graph_window, resize=True, shrink=True)
        working_area_paned.pack2(control_panel, resize=False, shrink=True)
         
        
        #start button
        self.gui_start_btn = self.builder.get_object("start_btn")
        self.gui_start_label = self.builder.get_object("start_lbl")
        self.gui_stop_label = self.builder.get_object("stop_lbl")

        #provider settings
        self.gui_provider_settings_area = self.builder.get_object("provider_settings_alignment")
        # 
        self.gui_provider_settings_box = self.builder.get_object("provider_settings_box")
        
        
        #filter settings
        self.gui_filter_settings_box = self.builder.get_object("filter_settings_box")
        
        #export dialog
        self.gui_export_btn = self.builder.get_object("export_btn")
        self.gui_export_dialog = None

        #error message dialog
        self.gui_error_message_dialog = None

    def _data_procesor_init_(self):
        filters = [data_filter.Invertor(), 
                   data_filter.SelfAdjustableNotchFilter()]
        self.data_processor = DataProcessor(self.plotter.plot_valid, 
                                            self.plotter.plot_error, filters)
        
        for filter_ in filters:
            name = filter_.get_name()
            name_repr = GTK_Wrapper.get_wrapper(name).get_gui_object()
            
            self.gui_filter_settings_box.pack_start(name_repr, True, True, 0)
            
            filter_settings_mgr = filter_.settings_manager()
            
            self._add_all_params(filter_settings_mgr, self.gui_filter_settings_box)
                
  
    def _provider_init_(self):
        
        self.data_provider = RandomWalkDataProvider(onData = self.data_processor.new_data, 
                                                    onError = self.error_stop)
        #self.data_provider = SerialPortDataProvider(self.data_processor.new_data, self.error_stop)
        
        data_provider_settings_mgr = self.data_provider.settings_manager()
        
        self._add_all_params(data_provider_settings_mgr, self.gui_provider_settings_box)
         

    def _graph_init_(self):
        # Create graph
        #self.graph = MatplotlibGraph(onClose=self.stop)        
        self.graph = GTK_Graph(self.builder.get_object("graph_area"),
                               settings.GRAPH_COLORS, 
                               settings.DATA_MIN_VALUE, settings.DATA_MAX_VALUE)
        self.plotter = Plotter(self.graph)                           

    def _add_all_params(self, obj_settings_mgr, gui_setting_box):
        for param in obj_settings_mgr.all_params():
            wrapper = GTK_Wrapper.get_wrapper(param)
            gui_obj = wrapper.get_gui_object()
            
            gui_setting_box.pack_start(gui_obj, True, True, 0)
        

    def close(self):
        self.stop()
        self.graph.close()        
        
    def start(self):
        if self.is_active:
            self.stop()

        logger.to_log("start")
        self.is_active = True

        # handling with gui first!!!

        #disable port settings
        self.gui_provider_settings_area.set_sensitive(False)
        #rename start button
        self.gui_start_btn.set_label(self.gui_stop_label.get_text())
        #disable export button
        self.gui_export_btn.set_sensitive(False)


        self.data_processor.enable()
        # start listening
        self.data_provider.activate()

    def error_stop(self, text):
        self.error_message(text)
        self.stop()

    def error_message(self, text):
        logger.to_log(text)
        
        if self.gui_error_message_dialog is None:
            self.gui_error_message_dialog = self.builder.get_object("error_message_dialog")

        self.gui_error_message_dialog.set_property("secondary-text", text)
        self.gui_error_message_dialog.run()
        self.gui_error_message_dialog.hide()

    def stop(self):
        logger.to_log("stop")
        self.data_provider.deactivate()
        self.data_processor.disable()

        self.is_active = False

        #enable settings
        self.gui_provider_settings_area.set_sensitive(True)
        #rename start button
        self.gui_start_btn.set_label(self.gui_start_label.get_text())
        #enable export button
        self.gui_export_btn.set_sensitive(True)


    def on_main_window_delete_event(self, *args):
        self.close()
        Gtk.main_quit()
    
    def on_start_clicked(self, *args):
        if self.is_active:
            self.stop()
        else:
            self.start()
    def on_export_clicked(self, *args):
        if self.gui_export_dialog is None:
            self.gui_export_dialog = self.builder.get_object("export_filechooser_dialog")

        fproc = FileProcessor()
        self.gui_export_dialog.set_current_name(fproc.get_name())

        response = self.gui_export_dialog.run()

        if response == gui.EXPORT_RESPONSE_OK:
            fproc.set_name(self.gui_export_dialog.get_filename())
            fproc.do_export(self.data_processor, onError=self.error_message)

        self.gui_export_dialog.hide()
Пример #41
0
import numpy as np

from data_processor import DataProcessor
from neural_network import NeuralNetwork

if __name__ == '__main__':
    data_processor = DataProcessor()

    x_train, y_train = data_processor.get_train_set()
    x_test, y_test = data_processor.get_test_set()

    input_nodes = 2
    hidden_nodes = 3
    output_nodes = 1

    network = NeuralNetwork(input_nodes=input_nodes,
                            hidden_nodes=hidden_nodes,
                            output_nodes=output_nodes,
                            lr=0.01)

    network.train(x_train, y_train)

    score = network.evaluate(x_test, y_test)
    print(score)

    x = np.array([[1, 1],
                  [10, 10],
                  [100, 100],
                  [2000, 1000]], dtype=float)
    y = network.predict(x)
    print(x)
Пример #42
0
 def setUp(self):
     inputQueue = Queue()
     outputQueue = Queue()
     self.processor = DataProcessor(inputQueue, outputQueue)