Пример #1
0
 def __init__(self):
     self.epoch_duration = 60
     self.vk_client = vk_autorization(LOGIN, PASSWORD)
     self.data = DataHandler()
     self.view = View(self, self.data.get_players(),
                      self.data.get_settings())
     self.view.show()
Пример #2
0
class Main():

    #DataHandler object
    data_handler = None

    #DDoSDetector object
    DDoS_detector = None

    def __init__(self):
        self.data_handler = DataHandler()
        self.DDoS_detector = DDoSDetector()

    #gives the user a menu
    def menu(self):

        print("1) Train Model")
        print("2) Test model on dataset")
        print("3) Run model on live packets")
        print("0) Quit")

        choice = int(input("Choice: "))

        #train model
        if choice == 1:
            print("Chose to train a model")
            pass

        #test model on dataset
        elif choice == 2:
            self.test_model()

        #Run model on live packets
        elif choice == 3:
            print("Chose to run the model on live packets")
            pass

        #Quits the program
        elif choice == 0:
            return False

        return True

    #menu choice to train a model
    def train_model(self):
        pass

    #menu choice to test a model
    def test_model(self):

        self.data_handler.print_dataset_list()

        choice = int(input("Dataset choice: "))

        print("Choice: " + str(self.data_handler.get_dataset_path(choice)))

        self.DDoS_detector.test(choice)

    #menu choice to run the model on live packets
    def run_model(self):
        pass
Пример #3
0
def find_distance(i_):
    path_to_covers = '/home/anosov/data/hard_base/covers/case_%i.dump' % (i_, )
    base, ext = os.path.splitext(path_to_covers)
    path_to_data = base + '__computing' + ext
    h = DataHandler(path_to_covers, path_to_data)
    tmp = h.distance_to_dead_ends
    h.dump()
Пример #4
0
def test(path, run_id, runs):
    # load parser and data handler
    parser = CYKParser.load(path)
    data_handler = DataHandler(config.test_set, run_id, runs)

    # parse sentences in parallel
    executor = ProcessPoolExecutor(config.processes)
    futures = [
        executor.submit(parse_tree, parser, sent, run_id)
        for sent in data_handler.generator()
    ]

    # following code is to track progress
    kwargs = {
        'total': len(futures),
        'unit': 'nap',
        'unit_scale': True,
        'leave': True
    }
    for _ in tqdm(as_completed(futures), **kwargs):
        pass
    for future in futures:
        if future.exception() is not None:
            print(future.exception())

    # stitch files if number of runs is 1
    if runs == 1:
        stitch_files()
    print("Done parsing")
Пример #5
0
    def __init__(self, ):

        # Initialize data loader
        self.data = DataHandler()

        # Initialize model
        self.ada_network = ADDANet()
Пример #6
0
    def __init__(self,master):
        super().__init__(master)
        
        self.data_handler = DataHandler("data.json")
        self.category_lists = self.data_handler.get_categories_list()
        self.category_data_labels = []

        self.element = 'Sports'
        
        #delete old labels..
        for self.element in self.category_data_labels:
            self.element.configure(text = "")
            del self.element

        #add new labels for name..
        self.list_data_for_category = self.data_handler.get_news_for_category(self.element)
        for i in range(len(self.list_data_for_category)):
            self.element = self.list_data_for_category[i]
            self.labelfont = ('times',15,'bold')
            self.labelcontent_font = ('times',10,'bold','italic')
            self.label_heading = Label(self, text = self.element['headline'])
            self.label_heading.config(font = self.labelfont)
            self.label_heading.grid(column = 0, row = 3*i)
            self.label_content = Label(self, text = self.element['content'])
            self.label_content.config(font = self.labelcontent_font)
            self.label_content.grid(column = 0, row = 3*i+1)
            self.label_content_separator = Label(self, text = '---------------------------')
            self.label_content_separator.grid(column = 0, row=3*i+2)
            self.category_data_labels.append(self.label_heading)
            self.category_data_labels.append(self.label_content)
            self.category_data_labels.append(self.label_content_separator)

        self.pack()
Пример #7
0
    def __init__(self, number_of_epochs=10):
        self.verbose = True
        self.number_of_channels = 2
        self.data_handler = \
            DataHandler(number_of_channels=self.number_of_channels,
                        number_of_negative_sets=50,
                        number_of_positive_sets=50,
                        number_of_test_sets=50,
                        verbose=self.verbose)
        self.data_handler.load_training_data()
        self.data_handler.load_test_data()
        self.data_handler.preprocess_data()

        self.mini_batch_size = 1
        self.model = CNN(number_of_channels=self.number_of_channels,
                         number_of_filters=12,
                         regularization_coefficient=1e0,
                         learning_rate=0.001,
                         filter_length=12,
                         pool_size=512,
                         fully_connected_layer_neurons=8,
                         momentum=0.9,
                         perform_normalization="no",
                         update_type="adam",
                         pool_mode="average_exc_pad")
        self.number_of_epochs = number_of_epochs

        self.training_errors = []
        self.test_errors = []
        self.classifier = SVC(C=11., kernel="rbf", gamma=1. / (2 * 2.85))
Пример #8
0
    def perform_exp(self):

        list_of_scores = []

        data_handler = DataHandler(self.data, self.var_dict)

        raw_X = data_handler.get_dummy_coded_data('dummy_only')
        n_cat_dummy_var = raw_X.shape[1] - len(self.var_dict['numerical_vars'])

        raw_clf_scores = self._get_classification_score(raw_X)
        list_of_scores.append(
            ('raw', raw_clf_scores, raw_X.shape[1] - n_cat_dummy_var))

        for n_init_bins in self.n_init_bins_list:
            sb_X = self.semantic_binning.fit_transform(self.data, n_init_bins)
            sb_clf_scores = self._get_classification_score(sb_X)
            list_of_scores.append(('sb_{}'.format(n_init_bins), sb_clf_scores,
                                   sb_X.shape[1] - n_cat_dummy_var))

        for n_bins in self.n_bins_range:
            ew_X = data_handler.get_dummy_coded_data('equal_width', n_bins)
            ew_clf_scores = self._get_classification_score(ew_X)
            list_of_scores.append(('ew_{}'.format(n_bins), ew_clf_scores,
                                   ew_X.shape[1] - n_cat_dummy_var))

            ef_X = data_handler.get_dummy_coded_data('equal_freq', n_bins)
            ef_clf_scores = self._get_classification_score(ef_X)
            list_of_scores.append(('ef_{}'.format(n_bins), ef_clf_scores,
                                   ef_X.shape[1] - n_cat_dummy_var))

        self.list_of_scores = list_of_scores
        print('Experiment Finished !. Result Saved in Exp Instance..')
Пример #9
0
def download_and_classify_in_batches(complete_links_list, classifier):
    print("Total amount of images to be downloaded and classified: %d" %
          len(complete_links_list))

    for index in range(0, len(complete_links_list), BATCH_SIZE):
        time_start = time.time()
        print("Downloading and classifying batch: %d -> %d" %
              (index, index + BATCH_SIZE))

        links_batch = complete_links_list[index:index + BATCH_SIZE]
        tensor_images = ImageDownloader.download_images(
            links_batch, NUM_DOWNLOAD_THREADS)

        if len(tensor_images) == 0:
            print("Skipping classification of empy list")
            continue

        results = classifier.classify_image_tensors(tensor_images)
        results_df = DataHandler.convert_classification_result_to_dataframe(
            results)
        DataHandler.write_classification_result(results_df,
                                                PARQUET_FILE_OUTPUT_LOCATION)

        duration = time.time() - time_start
        print("Duration of donwloading and classification for batch: %.2f" %
              duration)
Пример #10
0
    def runServer(self):
        Logger.writeInfo("Opening socket...")
        sock = socket(AF_INET, SOCK_STREAM)
        sock.bind((self.host, self.port))
        sock.listen(10)
        handler = DataHandler()

        try:
            while True:
                conn, addr = sock.accept()
                Logger.writeInfo("Connected by {}".format(addr))

                try:
                    data = Server.receiveAll(conn)
                    if data != None:
                        response = handler.process(data)
                        conn.sendall(response)
                    conn.close()
                except Exception as e:
                    Logger.writeError(str(e))

                Logger.writeInfo("Disconnected {}".format(addr))
        finally:
            Logger.writeInfo("Socket closed")
            sock.close()
            handler.close()
Пример #11
0
def find_distance(i_):
    path_to_covers = '/home/anosov/data/hard_base/covers/case_%i.dump' % (i_, )
    base, ext = os.path.splitext(path_to_covers)
    path_to_data = base + '__computing' + ext
    h = DataHandler(path_to_covers, path_to_data)
    tmp = h.distance_to_dead_ends
    h.dump()
Пример #12
0
def get_by_distance():
    distance = request.args.get("dist")
    latitudeN = request.args.get("latN")
    longitudeE = request.args.get("lonE")
    dh = DataHandler()
    return dh.select_schools_by_distance(float(latitudeN), float(longitudeE),
                                         float(distance))
Пример #13
0
 def __init__(self, server_socket, buffer, socket_ip):
     self.names = []
     self.record = {}
     self.buffer = buffer
     self.connected_list = []
     self.socket_ip = socket_ip
     self.data_handler = DataHandler(socket_ip)
     self.server_socket = server_socket
     self.add_connection(server_socket)
Пример #14
0
    def dataLoader(self):

        data_handler = DataHandler()
        npz = data_handler.npzLoader(self.target_file)

        data, label = npz[0], npz[1]
        data /= 255.0

        return data, label
Пример #15
0
    def __init__(self, file_sys_broadcast_addr, task_ping_addr,
                 current_worker_addr, job_url, job_id, function_url,
                 block_urls, task, answer_addr, load_byte, status_db_url,
                 block_id):

        self.status_handler = StatusHandler(
            status_db_url, self._verify_if_errors_in_fs,
            self._reset_method_if_no_answer_from_fs)
        self.data_handler = DataHandler(
            job_url, job_id, self._verify_if_errors_in_fs,
            self._reset_method_if_no_answer_from_fs)
        self.task_ping_addr = task_ping_addr
        self.file_sys_broadcast_addr = file_sys_broadcast_addr
        self.file_sys_addrs = []
        self.answer_addr = answer_addr
        self._update_filesystem_nodes()
        self.file_sys_addr = self._get_new_filesystem_node()
        self.job_url = job_url
        self.job_id = job_id
        self.function_url = function_url
        self.block_urls = block_urls
        # print("Task_Exc: Este es el block_urls: ",self.block_urls)
        self.task = task
        self.load_byte = load_byte
        self.current_worker_addr = current_worker_addr
        self.status_db_url = status_db_url
        self.block_id = block_id

        self.map_fun, self.red_fun, self.comb = self.get_func()
        self.record_readers = {
            True: self.record_reader_byte,
            False: self.record_reader_str
        }
        self.execute_methods = {
            'map': self.execute_map_task,
            'reduce': self.execute_reduce_task
        }

        self.start_listen_pings()
        #  nos aseguramos que si se cayo el master en el momento que me mando el mensaje de task, yo mismo pongo en
        # submitted el bloque
        to_update = [('state', mt.slices_states[1]),
                     ('worker_ip', self.task_ping_addr[0]),
                     ('worker_port', self.task_ping_addr[1])]
        print(
            "Task_Exc: ",
            "Salvamos el estado del bloque {} a SUBMITTED en el filesys: {}".
            format(block_id, self.file_sys_addr))

        self.status_handler.update_status_row(self.file_sys_addr, 'block',
                                              ('block_id', block_id),
                                              to_update)

        self.execute_task = self.execute_methods[self.task]
        self.record_reader = self.record_readers[load_byte]
Пример #16
0
def load_data(db_name,
              label="training_0000",
              n_imgs=None,
              thresh=1e5,
              step_size=1,
              db_dir=None):
    """
    loads rgb images and targets from an hdf5 database and returns them as a np array

    Expects data to be saved in the following group stucture:
        Training Data
        training_0000/data/0000     using %04d to increment data name

        Validation Data
        validation_0000/data/0000       using %04d to increment data name

        Both return an array with the rgb image saved under the 'rgb' key
        and the target saved under the 'target' key

    Parameters
    ----------
    db_name: string
        name of database to load from
    label: string, Optional (Default: 'training_0000')
        location in database to load from
    n_imgs: int
        how many images to load
    """
    # TODO: specify the data format expected in the comment above
    dat = DataHandler(db_dir=db_dir, db_name=db_name)

    # load training images
    images = []
    targets = []

    skip_list = ["datestamp", "timestamp"]
    keys = np.array([
        int(val) for val in dat.get_keys("%s" % label) if val not in skip_list
    ])
    n_imgs = max(keys) if n_imgs is None else n_imgs
    print("Total number of images in dataset: ", max(keys))

    for nn in range(0, n_imgs, step_size):
        data = dat.load(parameters=["rgb", "target"],
                        save_location="%s/%04d" % (label, nn))
        if np.linalg.norm(data["target"]) < thresh:
            images.append(data["rgb"])
            targets.append(data["target"])

    images = np.asarray(images)
    targets = np.asarray(targets)

    print("Total number of images within threshold: ", images.shape[0])

    return images, targets
Пример #17
0
    def store_analyzed_reviews(self, hotel_name, aspect_details, platforms):
        for key, value in self.aspect_details.items():
            value.review_list = {}

        to_json = json.dumps(aspect_details, cls=MyEncoder)
        data_handler = DataHandler()

        data_handler.set_analyzed_reviews(hotel_name, to_json, platforms)


# ra = ReviewAnalyzer()
# ra.get_analyzed_reviews('Kingsbury', ['ALL'])
Пример #18
0
 def test_get_hist_prices(self):
     """Given a pre-defined set of inputs the test checks if the method correctly computes the results"""
     data_handler = DataHandler('')
     method_handler = MethodHandler(data_handler, '')
     data = {dt.datetime(2021, 1, 1): 120, dt.datetime(2021, 1, 2): 122, dt.datetime(2021, 1, 3): 123,
             dt.datetime(2021, 1, 4): 122, dt.datetime(2021, 1, 5): 119, dt.datetime(2021, 1, 8): 118,
             dt.datetime(2021, 1, 9): 120, dt.datetime(2021, 1, 10): 122}
     data_handler.load_db_manually('MSFT', data)
     res_test = {dt.datetime(2021, 1, 3): 123, dt.datetime(2021, 1, 4): 122, dt.datetime(2021, 1, 5): 119,
                 dt.datetime(2021, 1, 8): 118, dt.datetime(2021, 1, 9): 120}
     res = method_handler.get_hist_prices('MSFT', dt.datetime(2021, 1, 3), dt.datetime(2021, 1, 9), '')
     self.assertEqual(res, res_test)
Пример #19
0
def online_eval():
    # evaluation code for online test
    handler = DataHandler()
    data = handler.generate_data(TRAIN_FILENAME)
    testing_data = handler.generate_data(TEST_FILENAME, "test")
    ann = ANN(9, 10, 1)
    for i in range(80):
        print(i + 1)
        ann.train(data, 5000)

    result = ann.test_without_true_label(testing_data, 0.23)
    handler.write_to_result(TEST_FILENAME, result)
Пример #20
0
    def get_analyzed_reviews(self, hotel_name, platforms):
        data_handler = DataHandler()
        reviews = data_handler.get_analyzed_reviews(hotel_name, platforms)
        print(hotel_name)
        print(reviews)
        if reviews is None:
            self.analyze_reviews(hotel_name, platforms)
            reviews = data_handler.get_analyzed_reviews(hotel_name, platforms)

            if reviews is None:
                return None

        return reviews[2]
Пример #21
0
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.ui = Ui_MainWindow()
        self.ui.setupUi(self)

        self.ui.graphicsView.setBackground(pg.mkColor(0.3))
        self.plot_box = self.ui.graphicsView.addViewBox(row=1,
                                                        col=1,
                                                        lockAspect=True,
                                                        enableMouse=True,
                                                        invertY=True)
        self.image_item = pg.ImageItem()
        self.image_item.setOpts(axisOrder='row-major')
        self.plot_box.addItem(self.image_item)

        self.roi = None
        self.ui.selectDataButton.toggled.connect(self.show_roi)
        self.ui.resetSelectDataButton.clicked.connect(self.reset_roi)

        self.settings_layout = QHBoxLayout()
        self.settings_widget = QWidget()
        self.settings_layout.addWidget(self.settings_widget)
        self.ui.camSettingsWidget.setLayout(self.settings_layout)

        self.data_handler = DataHandler()
        for plugin in self.data_handler.plugins:
            self.add_plugin(plugin.get_widget(), plugin.name)
        self.data_handler.ndarray_available.connect(self.show_ndarray)
        self.data_handler.camera_controls_changed.connect(
            self.set_camera_controls)
        self.ui.actionSave_image.triggered.connect(self.data_handler.save_file)
        self.data_handler.enable_saturation_widget.connect(
            self.enable_saturation_bar)
        self.data_handler.saturation_changed.connect(
            self.ui.progressBar.setValue)
        self.data_handler.message.connect(self.show_message)

        self.camera_dialog = CameraDialog()
        self.ui.actionChoose_camera.triggered.connect(
            self.camera_dialog.choose_camera)
        self.camera_dialog.camera_changed.connect(
            self.data_handler.change_camera)
        self.camera_dialog.choose_first_camera()
        self.ui.actionTune_camera_parameters.triggered.connect(self.tune_pid)

        self.ui.actionShow_Settings.toggled.connect(self.show_settings)

        self.ui.actionDraw_lines.toggled.connect(self.draw_lines)
        self.hline = None
        self.vline = None
Пример #22
0
    def __init__(self, worker_broadcast_addr, filesystem_broadcast_addr,
                 tracker_addr_ping, tracker_ip, current_worker_addr, job_url,
                 job_id, data_type, client_addr, functions_url, map_data_url,
                 status_db_url):

        self.worker_broadcast_addr = worker_broadcast_addr
        self.status_handler = StatusHandler(
            status_db_url, self._verify_if_errors_in_fs,
            self._reset_method_if_no_answer_from_fs)
        self.data_handler = DataHandler(
            job_url, job_id, self._verify_if_errors_in_fs,
            self._reset_method_if_no_answer_from_fs)

        self.filesystem_broadcast_addr = filesystem_broadcast_addr
        self.job_url = job_url
        self.file_sys_addrs = []
        self._update_filesystem_nodes()
        self.file_sys_addr = self._get_new_filesystem_node()
        self.client_addr = client_addr
        self.job_id = job_id

        self.current_worker_addr = current_worker_addr

        self.data_type = data_type
        self.states = ["map", "reduce"]
        self.job_phase = self.states[0]
        self.veto_workers = []
        self.tracker_ip = tracker_ip
        self.tracker_addr_ping = tracker_addr_ping
        self.tracker_addr = (tracker_ip, '8080')
        self.delimiters = [' ', '\n']
        self.map_results = None
        self.result_data_url = '{}/result_data'.format(self.job_url)
        self.map_data_url = map_data_url
        self.functions_url = functions_url
        self.status_db_url = status_db_url
        self.phases = [
            'GETWORKERS', 'SLICES', 'SENDTASK', 'WAITANSWERS', 'GETRESULTS',
            'DONE'
        ]
        self.load_job_methods = {
            'GETWORKERS': self.getting_workers,
            'SLICES': self.getting_workers,
            'SENDTASK': self._load_send_task_phase,
            'WAITANSWERS': self._load_wait_results,
            'GETRESULTS': self.getting_results,
        }
        self.status_phase = mt.task_phases[0]

        self.pinging_process = None
        self.get_data = self.data_handler.get_line_by_line_str
Пример #23
0
def solver_profiling():
    path_to_covers = '/home/anosov/data/hard_base/covers/case_0.dump'
    h = DataHandler(path_to_covers)

    from LG.solver import Solver as LG_Solver

    i_ = 0
    f = h.product_field(i_)
    for i in xrange(1):
        s = LG_Solver(f)
        a = s.run()
        b = s.alternative_path_lens()
        print a
        print [h.cells[i] for i in h.finished_packed_paths[i_]]
def get_pre_trained_model():
    pre_trained_model = InceptionV3(input_shape=(image_size, image_size, 3),
                                    include_top=False,
                                    weights=None)

    DataHandler.extract_inception_model(local_weights_file)
    pre_trained_model.load_weights(local_weights_file)

    for layer in pre_trained_model.layers:
        layer.trainable = False

    print(pre_trained_model.summary())

    return pre_trained_model
Пример #25
0
    def __init__(self, input_size=64, hidden_size=64, n_filters=16):

        # Copy params
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.n_filters = n_filters

        # Initialize data loader
        self.data = DataHandler(image_size=input_size)

        # Initialize model
        self.began_network = BEGANNet(input_size=input_size,
                                      hidden_size=hidden_size,
                                      n_filters=n_filters)
Пример #26
0
def solver_profiling():
    path_to_covers = '/home/anosov/data/hard_base/covers/case_0.dump'
    h = DataHandler(path_to_covers)

    from LG.solver import Solver as LG_Solver

    i_ = 0
    f = h.product_field(i_)
    for i in xrange(1):
        s = LG_Solver(f)
        a = s.run()
        b = s.alternative_path_lens()
        print a
        print[h.cells[i] for i in h.finished_packed_paths[i_]]
        print b
Пример #27
0
class NeuralNetwork:
    def __init__(self):
        self.data_handler = DataHandler()
        self.network_model = NetworkModel()

    def train(self, args):
        # Loading dataset
        network_input, network_output, vocab_length = self.data_handler.load_dataset(
            args)
        model = self.network_model.create(network_input, vocab_length)

        # callbacks
        stop_training = StopTrainingCallback()

        checkpoint = tf.keras.callbacks.ModelCheckpoint(
            constant.MODEL_PATH,
            monitor="acc",
            verbose=1,
            save_best_only=True,
            save_weights_only=False,
            save_freq='epoch')

        history = model.fit(network_input,
                            network_output,
                            epochs=constant.EPOCHS,
                            batch_size=constant.BATCH_SIZE,
                            callbacks=[checkpoint, stop_training])

        self.network_model.plot_loss_and_accuracy(args, history)
        return model

    def run(self, args):
        model = tf.keras.models.load_model(args["model"])

        with open(args["notes"], 'rb') as notes_path:
            notes = pickle.load(notes_path)
            pitches = sorted(set(item for item in notes))
            vocab_length = len(set(notes))

            with open(args["partition_info"], 'rb') as partition_info_path:
                partition_info = pickle.load(partition_info_path)

                network_input, network_output = self.data_handler.prepare_sequences(
                    notes, partition_info['sequence_length'], vocab_length)

                prediction_output = self.network_model.generate_notes(
                    model, network_input, pitches, vocab_length)
                self.data_handler.save_midi(partition_info, prediction_output)
Пример #28
0
    def system(self):
        if not login.current_user.is_authenticated:
            return redirect(url_for('.login_view'))

        self.disk = DataHandler.getInstance().disk
        self.header = "System"
        return render_template('sb-admin/pages/system.html', admin_view=self)
Пример #29
0
 def applications(self):
     if not login.current_user.is_authenticated:
         return redirect(url_for('.login_view'))
     self.apps = DataHandler.getInstance().apps
     self.header = "Applications"
     return render_template('sb-admin/pages/applications.html',
                            admin_view=self)
Пример #30
0
 def train(self, num_classifiers=50):
     bagged_datasets = DataHandler.create_bagged_datasets(
         num_classifiers, self.examples, self.targets)
     for bagged_dataset in bagged_datasets:
         naive_bayes = NaiveBayes(bagged_dataset[0], bagged_dataset[1])
         naive_bayes.train()
         self.nb_classifiers.append(naive_bayes)
Пример #31
0
 def train(self, forest_size=50, tree_depth=10):
     self.forest = []
     bagged_datasets = DataHandler.create_bagged_datasets(
         forest_size, self.examples, self.targets)
     for bagged_dataset in bagged_datasets:
         examples = bagged_dataset[0]
         targets = bagged_dataset[1]
         num_attributes = len(examples[0])
         num_chosen_attr = int(sqrt(num_attributes))
         while num_chosen_attr > len(examples[0]):
             DataHandler.rm_column(examples,
                                   random.randint(1,
                                                  len(examples[0]) - 1))
         id3 = ID3(examples, targets)
         id3.train(tree_depth)
         self.forest.append(id3)
Пример #32
0
def main():
    opts = configs.model_config

    os.environ["CUDA_VISIBLE_DEVICES"] = '0'
    gpu_config = tf.ConfigProto(device_count={'GPU': 1},
                                allow_soft_placement=False,
                                log_device_placement=False)
    gpu_config.gpu_options.allow_growth = True
    sess = tf.Session(config=gpu_config)

    print('starting processing data ...')

    data = DataHandler(opts)

    print('starting initialising model ...')
    opts['r_range_upper'] = data.train_r_max
    opts['r_range_lower'] = data.train_r_min
    model = Model_Decon(sess, opts)

    opts['batch_size'] = 1
    opts['va_sample_num'] = 6
    opts['model_bn_is_training'] = False

    print('starting testing policy using AC_Decon ...')
    ac = AC_Decon(sess, opts, model)
    ac.policy_test(data)
Пример #33
0
    def train_epoch(self, optimizer, training_data, epoch_id='unknown'):
        epoch_time = time.time()

        accumulated_loss = 0
        average_losses = []
        training_data_length = len(training_data)
        percent_done = 0
        for index, data in enumerate(training_data):
            sample, target = FloatTensor([[data[0]]]), FloatTensor([data[1]])
            if torch.cuda.is_available():
                sample, target = sample.cuda(0), target.cuda(0)
            sample, target = Variable(sample), Variable(target)

            optimizer.zero_grad()
            output = self(sample)
            loss = self.criterion(output, target)
            loss.backward()
            optimizer.step()
            accumulated_loss += loss.data[0]

            if percent_done - 100 * index // training_data_length != 0:
                percent_done = 100 * index // training_data_length
                average_losses.append(accumulated_loss/(index+1))
                print('Finished %s%% of epoch %s | average loss: %s' % (percent_done, epoch_id+1, accumulated_loss/(index+1)))

        print "Successively trained %s epochs (epoch timer: %s)" % (epoch_id+1, DataHandler.format_time(time.time() - epoch_time))
        return average_losses
Пример #34
0
	def __init__(self):
		# mechanize.RobustFactory() pozwala na prawidlowe odczytywanie stron z bledami(niepozamykane znaczniki, itp.)
		self.br = mechanize.Browser(factory=mechanize.RobustFactory())
		# olewa reguły z robots.txt
		self.br.set_handle_robots(False)
		# musimy udawać prawdziwą przeglądarkę, inaczej google nas nie puści :D
		self.br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; Linux i686; rv:7.0.1) Gecko/20100101 Firefox/7.0.1')]
		# inicjalizacja bazy
		self.db = DataHandler()
Пример #35
0
from flask import Flask, request, session, g, redirect, url_for, abort, \
     render_template, flash, _app_ctx_stack
     
from data_handler import DataHandler
import json

dataHandler = DataHandler()     
dataHandler.setup_tfidfMatcher()
app = Flask(__name__)     

     
@app.route('/layout.html')
def handle_layout():
    return render_template('layout.html')
     
@app.route('/')
@app.route('/index.html')
def handle_index():     
    
    resumes = dataHandler.get_resumes()    
    return render_template('index.html', resumes=resumes)

@app.route('/add_resume',  methods=['POST', 'GET'])     
def add_resume():    
    error = None
    if request.method == 'POST':
         resume_text = request.form['resume_txt'].strip()
    #     print "resume_text:", resume_text
         if len(resume_text) > 0 :
             dataHandler.save_resume(resume_text)
         
Пример #36
0
def plot_iter(result):
    width = 10
    x = np.arange(10, 101, 10)
    plt.ylim(0.6, 0.68)
    plt.ylabel("Precision")
    plt.xlabel("Iteration")
    plt.bar(x, [val for val in result], width, color="#ababab")
    plt.show()


# plot node number in hidden layer figure
def plot_node(result):
    width = 0.5
    x = np.arange(1, 11, 1)
    plt.ylim(0.62, 0.66)
    plt.ylabel("Precision")
    plt.xlabel("Hidden Layer Node Number")
    plt.bar(x, [val for val in result], width, color="#ababab")
    plt.show()


if __name__ == "__main__":

    handler = DataHandler()
    data = handler.generate_data(TRAIN_FILENAME)
    iteration_test(data)
    node_test(data)
    layer_test(data)
    cross_validation(5, 500, data)
    online_eval()
Пример #37
0
from data_handler import DataHandler
from matrix_generator import MatrixGenerator

c = DataHandler();
s = c.getData();
print s;

a = MatrixGenerator(s);
lis = a.getMatrix();

for index in range(len(lis)):
	print lis[index] 
Пример #38
0
class Tracker(object):
	def __init__(self):
		# mechanize.RobustFactory() pozwala na prawidlowe odczytywanie stron z bledami(niepozamykane znaczniki, itp.)
		self.br = mechanize.Browser(factory=mechanize.RobustFactory())
		# olewa reguły z robots.txt
		self.br.set_handle_robots(False)
		# musimy udawać prawdziwą przeglądarkę, inaczej google nas nie puści :D
		self.br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; Linux i686; rv:7.0.1) Gecko/20100101 Firefox/7.0.1')]
		# inicjalizacja bazy
		self.db = DataHandler()
		
	def askGoogle(self,question):
		self.br.open('http://google.pl')
		#wybieramy sobie formularz na stronie (można wybierać też po nazwie, ale na googlach jest tylko jeden, stąd po numerku)
		self.br.select_form(nr=0)
		# probojemy zaladowac dane z bazy danych
		try:
			links = self.db.load_search(question)
		except KeyError:		
		# jesli danych nie ma w bazie to pytamy googla
			self.br.form['q'] = question + ' dyskusja' #chcemy tylko strony z dyskusją
			self.br.submit()
			#do results włazi po prostu otwarty html
			results = self.br.response().read()
			print results[string.find(results,"Około "):string.find(results, "wyników")]+"wyników:"
			#soup to obiekt gotowy do parsowania
			self.soup = BeautifulSoup(results)
			print self.soup.findAll('a', attrs={'class':'l'})
			links =   [x['href'] for x in self.soup.findAll('a', attrs={'class':'l'})]
			#druga strona wyników
			fl_tags = self.soup.findAll('a', attrs = {'class':'fl'})
			second_page = ''
			for tag in fl_tags:
				if tag.findAll(text = '2') != []:
					second_page = tag['href']
			print 'adres drugiej strony:',second_page
			self.br.open(second_page)
			self.soup = BeautifulSoup(self.br.response().read())
			links.extend([x['href'] for x in self.soup.findAll('a', attrs={'class':'l'})])
			print "\n".join(links)
			self.db.add_search(question,links)
                #wyrzucamy linki, ktore zbiorą ocenę 0.0 - raczej nie są interesujące
		links = filter(lambda url: page_rater.rate_URL(url, self.db) > 0.0, links)
		#sortujemy względem aktywności
		links.sort(key = lambda url: page_rater.rate_URL(url, self.db), reverse = True)
		print "Sorted"
		return links

	def getSerializedStats(self, links):
		return map(lambda link: pickle.dumps(self.db.load_link(link)), links)
	def getStats(self, link):
		return self.db.load_link(link)
	
	#tutaj dobrze by bylo sprawdzac, czy forum spelnia jakies tam wymagania (np. czy to phpBB)
	def openForum(self,URL):
		self.br.open(URL)
		results = self.br.response().read()
		self.soup = BeautifulSoup(results)
		return self.__getSections()
		
	def __getSections(self):
		res = []
		forumtitles = self.soup.findAll('a', attrs={'class':'forumtitle'})
		for forumtitle in forumtitles:
			title =  u'DZIAŁ: ' + forumtitle.next
			desc = u'OPIS: ' + forumtitle.next.next.next.strip()
			print title
			print desc
			print u'forumtitle: ' + forumtitle['href']
			print forumtitle.parent.findNextSibling('dd', attrs={'class':'topics'}).next.next.text + ':'
			print forumtitle.parent.findNextSibling('dd', attrs={'class':'topics'}).next
			print forumtitle.parent.findNextSibling('dd', attrs={'class':'posts'}).next.next.text + ':'
			print forumtitle.parent.findNextSibling('dd', attrs={'class':'posts'}).next
			print '-------------------------------'	
			res.append(title + '\n' + desc + '\n-------------------------------')
		return res
Пример #39
0
from jobanalysis.jobdescparser import JobDescParser 
from filetotxt import fileToTxt
from jobanalysis.resume import  resumeparser  
from jobanalysis.similarity.modelsimilarity import ModelSimilarity
import indexer

dbinfo = {}
dbinfo["pagesize"] = 20

dbinfo['dbname'] = "jobaly"  
dbinfo['collname'] = "keyword_info_java"
dbinfo['modelcollname'] = dbinfo['collname']+"_model"


app = Flask(__name__)     
dataHandler = DataHandler()     
dataHandler.connectJobColl(dbinfo['dbname'] , dbinfo['collname'])
  
UPLOAD_FOLDER = 'uploads/'
ALLOWED_EXTENSIONS = set(['txt', 'pdf', 'doc', 'docx'])
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
app.config['resume'] = ""   
app.config['resume_name'] = ""
app.config['keyword'] = ""
app.config['matchjids'] = None

similarity = ModelSimilarity() 
     
@app.route('/layout.html')
def handle_layout():
    return render_template('layout.html')