예제 #1
0
    def save_text(self, data_tuple, labels, protocol="a"):
        """
        Save text file to new directory
        Mainly used for cluster to text
        :param filename: String
        :param text: String or List
        :param protocol: String --> "a", "wb", "w", "a+", "w+"
        :return: None
        """
        unique_labels = set(labels)
        for label in unique_labels:
            text = []
            for i, sent in enumerate(data_tuple[InformationType.SENTENCE.value][labels == label]):
                text.append(sent)
            text.append("\n------------------------------------------\n")

            for i, filename in enumerate(data_tuple[InformationType.FILE_NAME.value][labels == label]):
                text.append(filename)

            target_file_name = str(label) + ".txt"
            file_path = os.path.join(Path.cluster_directory + self.directory + "/", target_file_name)
            Log.write("saving" + target_file_name + " to: " + file_path)
            file = open(file_path, protocol)

            for lines in text:
                # Specific case when writing list of precedent filenames
                file.writelines(lines)
                file.writelines("\n")
            file.close()
            Log.write(target_file_name + " saved to: " + file_path)
예제 #2
0
    def dump_tours(num_hidden, tour_lengths, history, cd_steps, iter, epoch):
        with open(OUTPUT_FOLDER + 'tour_lengths_h=%s' % num_hidden, 'a') as f:
            f.write(" ".join(map(lambda x: str(int(x)), tour_lengths)))
            f.write(" ")

        short_tour = []
        inter_tour = []
        long_tour = []

        idxs = [0, 1, -1]

        for tour in history:
            v = [s.v for s in tour]
            Log.var(len_v_history=len(v))
            if len(v) == 2:
                short_tour += list(v[i] for i in idxs)
            elif len(v) == cd_steps + 1:
                long_tour += list(v[i] for i in idxs)
            else:
                inter_tour += list(v[i] for i in idxs)

        ImageService.get_tours_as_images(
            num_hidden, iter, epoch, len(idxs),
            Util.dictize(short_tour=np.array(short_tour),
                         inter_tour=np.array(inter_tour),
                         long_tour=np.array(long_tour)))
예제 #3
0
def main():
    if (len(sys.argv)) != 2:
        Log.fatal('Needs channel to record!')
    channel_name = sys.argv[1]
    recorder = Recorder()
    signal.signal(signal.SIGINT, lambda sig, frame: recorder.stop())
    recorder.record(channel_name)
예제 #4
0
class POP3Telnet:
    def __init__(self, host, port):
        self.tel = telnetlib.Telnet(host, port)
        self.log = Log()
        self.log.info("Connected to {host} on port {port}".format(host=host,
                                                                  port=port),
                      tag="TELNET")

    def close(self):
        self.tel.close()

    def read_line(self):
        return self.tel.read_until("\n")

    def write(self, msg):
        self.tel.write("{msg}\r\n".format(msg=msg).encode())

    def tell(self, whom, what):
        self.tel.write("tell {whom} {what}".format(whom=whom,
                                                   what=what).encode())

    def login(self, user, password):
        self.tel.read_until(b"login: "******"\n")
        self.tel.write(password.encode('ascii') + b"\n")
        self.log.info("Logged in as {user}".format(user=user), tag="TELNET")
예제 #5
0
class Command:
    def __init__(self, key, controller, req_args=False, description="No description provided.", restriction="none"):
        self.key = key
        self.log = Log()
        self.req_args = req_args
        self.controller = controller
        self.description = description
        self.restriction = restriction

        _controller_dir = self.controller.split(".")
        _module_name = _controller_dir[0]
        self.controller_name = _controller_dir[1]
        _module = importlib.import_module("controllers.{0}".format(_module_name))
        self.controller_class = getattr(_module, _module_name.capitalize())()

    def execute(self, user, telnet, lists, arg=None):

        if "none" not in self.restriction:
            if user not in self.lists[self.restriction].get_users(telnet):
                self.log.warn("{user} called {cmd} without permission (restricted by role)".format(user=user, cmd=self.controller_name), tag="command")
                return

        if self.req_args and not bool(arg):
            self.log.warn("{user} called {cmd} which requires arguments".format(user=user, cmd=self.controller_name), tag="command")
            return

        self.controller_class.execute(self.controller_name, arg, user, telnet)
예제 #6
0
 def save_model(rbm: RBM, filename):
     filename = 'model_%s_%s' % (filename, time.time())
     filename = MODEL_FOLDER + "%s.model" % filename
     with open(filename, 'wb') as fp:
         pickle.dump(rbm, fp)
         Log.info("Model dumped to %s" % filename)
     return filename
예제 #7
0
 def test_write(self):
     filename = "server.log"
     Log.write("testing")
     root_directory = os.path.abspath(__file__ + r"/../../")
     full_path = os.path.join(root_directory, filename)
     file_found = os.path.isfile(full_path)
     self.assertTrue(file_found)
예제 #8
0
파일: mail.py 프로젝트: linpeie/ZYSF
    def __init__(self,
                 server,
                 sender,
                 password,
                 receiver,
                 title,
                 message=None,
                 path=None):
        """初始化Email

        :param title: 邮件标题,必填。
        :param message: 邮件正文,非必填。
        :param path: 附件路径,可传入list(多附件)或str(单个附件),非必填。
        :param server: smtp服务器,必填。
        :param sender: 发件人,必填。
        :param password: 发件人密码,必填。
        :param receiver: 收件人,多收件人用“;”隔开,必填。
        """
        self.title = title
        self.message = message
        self.files = path

        self.msg = MIMEMultipart('related')

        self.server = server
        self.sender = sender
        self.receiver = receiver
        self.password = password

        self.log = Log()
예제 #9
0
def __dictionary_to_list():
    """

    Converts the binarize structured_data_dict to a list format

    precedent_vectors:{
        filename:{
            name: 'AZ-XXXXXXX.txt',
            demands_vector: [...],
            facts_vector: [...],
            outcomes_vector: [...]
        }
    }

    :return: data_list: [{
        name: 'AZ-XXXXXXX.txt',
        demands_vector: [...],
        facts_vector: [...],
        outcomes_vector: [...]
    },
    {
        ...
    }]
    """
    precedent_vector = Load.load_binary("precedent_vectors.bin")
    if precedent_vector is None:
        return []
    Log.write("Formatting data")
    data_list = []
    for precedent_file in precedent_vector:
        data_list.append(precedent_vector[precedent_file])
    return data_list
예제 #10
0
    def weights_to_csv(self):
        """
        Writes all the weights to .csv format
        1) get the facts
        2) for every outcome write the weights
        :return: None
        """
        try:
            if self.model is None:
                self.model = Load.load_binary('multi_class_svm_model.bin')
                self.classifier_labels = Load.load_binary('classifier_labels.bin')
        except BaseException:
            return None

        index = TagPrecedents().get_intent_index()
        fact_header = [" "]
        for header in index['facts_vector']:
            fact_header.append(header[1])

        with open('weights.csv', 'w') as outcsv:
            writer = csv.writer(outcsv)
            writer.writerow(fact_header)

            for i in range(len(self.model.estimators_)):
                outcome_list = [self.classifier_labels[i]]
                estimator = self.model.estimators_[i]
                try:
                    weights = estimator.coef_[0]
                    for j in range(len(weights)):
                        outcome_list.append(weights[j])
                    writer.writerow(outcome_list)
                except AttributeError:
                    pass
        Log.write('Weights saved to .csv')
예제 #11
0
def run_ensemble():
    all_args = get_args()
    # Create a logger
    log = Log(all_args.log_dir)
    print("Log dir: ", all_args.log_dir, flush=True)
    # Log the run arguments
    save_args(all_args, log.metadata_dir)
    
    if not all_args.disable_cuda and torch.cuda.is_available():
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    if not os.path.isdir(os.path.join(all_args.log_dir, "files")):
        os.mkdir(os.path.join(all_args.log_dir, "files")) 

    # Obtain the data loaders
    trainloader, projectloader, test_loader, classes, num_channels = get_dataloaders(all_args)

    log_dir_orig = all_args.log_dir

    trained_orig_trees = []
    trained_pruned_trees = []
    trained_pruned_projected_trees = []
    orig_test_accuracies = []
    pruned_test_accuracies = []
    pruned_projected_test_accuracies = []
    project_infos = []
    infos_sample_max = []
    infos_greedy = []
    infos_fidelity = []
    # Train trees in ensemble one by one and save corresponding trees and accuracies
    for pt in range(1,all_args.nr_trees_ensemble+1):
        torch.cuda.empty_cache()
        
        print("\nTraining tree ",pt, "/", all_args.nr_trees_ensemble, flush=True)
        log.log_message('Training tree %s...'%str(pt))

        args = deepcopy(all_args)
        args.log_dir = os.path.join(log_dir_orig,'tree_'+str(pt))

        trained_tree, pruned_tree, pruned_projected_tree, original_test_acc, pruned_test_acc, pruned_projected_test_acc, project_info, eval_info_samplemax, eval_info_greedy, info_fidelity = run_tree(args)

        trained_orig_trees.append(trained_tree)
        trained_pruned_trees.append(pruned_tree)
        trained_pruned_projected_trees.append(pruned_projected_tree)
    
        orig_test_accuracies.append(original_test_acc)
        pruned_test_accuracies.append(pruned_test_acc)
        pruned_projected_test_accuracies.append(pruned_projected_test_acc)
        
        project_infos.append(project_info)
        infos_sample_max.append(eval_info_samplemax)
        infos_greedy.append(eval_info_greedy)
        infos_fidelity.append(info_fidelity)
    
        if pt > 1:
            #analyse ensemble with > 1 trees:
            analyse_ensemble(log, all_args, test_loader, device, trained_orig_trees, trained_pruned_trees, trained_pruned_projected_trees, orig_test_accuracies, pruned_test_accuracies, pruned_projected_test_accuracies, project_infos, infos_sample_max, infos_greedy, infos_fidelity)
예제 #12
0
 def initializing_log(self):
     currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
     self.log = Log(self.algorName,
                    self.algorName + self.foldInfo + ' ' + currentTime)
     #save configuration
     self.log.add('### model configuration ###')
     for k in self.config.config:
         self.log.add(k + '=' + self.config[k])
예제 #13
0
def eval_fidelity(tree: ProtoTree,
                  test_loader: DataLoader,
                  device,
                  log: Log = None,
                  progress_prefix: str = 'Fidelity') -> dict:
    tree = tree.to(device)

    # Keep an info dict about the procedure
    info = dict()

    # Make sure the model is in evaluation mode
    tree.eval()
    # Show progress on progress bar
    test_iter = tqdm(enumerate(test_loader),
                     total=len(test_loader),
                     desc=progress_prefix,
                     ncols=0)

    distr_samplemax_fidelity = 0
    distr_greedy_fidelity = 0
    # Iterate through the test set
    for i, (xs, ys) in test_iter:
        xs, ys = xs.to(device), ys.to(device)

        # Use the model to classify this batch of input data, with 3 types of routing
        out_distr, _ = tree.forward(xs, 'distributed')
        ys_pred_distr = torch.argmax(out_distr, dim=1)

        out_samplemax, _ = tree.forward(xs, 'sample_max')
        ys_pred_samplemax = torch.argmax(out_samplemax, dim=1)

        out_greedy, _ = tree.forward(xs, 'greedy')
        ys_pred_greedy = torch.argmax(out_greedy, dim=1)

        # Calculate fidelity
        distr_samplemax_fidelity += torch.sum(
            torch.eq(ys_pred_samplemax, ys_pred_distr)).item()
        distr_greedy_fidelity += torch.sum(
            torch.eq(ys_pred_greedy, ys_pred_distr)).item()
        # Update the progress bar
        test_iter.set_postfix_str(f'Batch [{i + 1}/{len(test_iter)}]')
        del out_distr
        del out_samplemax
        del out_greedy

    distr_samplemax_fidelity = distr_samplemax_fidelity / float(
        len(test_loader.dataset))
    distr_greedy_fidelity = distr_greedy_fidelity / float(
        len(test_loader.dataset))
    info['distr_samplemax_fidelity'] = distr_samplemax_fidelity
    info['distr_greedy_fidelity'] = distr_greedy_fidelity
    log.log_message(
        "Fidelity between standard distributed routing and sample_max routing: "
        + str(distr_samplemax_fidelity))
    log.log_message(
        "Fidelity between standard distributed routing and greedy routing: " +
        str(distr_greedy_fidelity))
    return info
예제 #14
0
def run(command_list, dataset):
    if len(command_list) > 0:
        Log.write('Command not recognized')
        return False

    Log.write('Training multi output regression')
    svr = MultiOutputRegression(dataset)
    svr.train()
    return True
예제 #15
0
def main():
    (start_time, end_time, vod_id) = CommandLineParser().parse_command_line()
    m3u8_playlist = PlaylistFetcher().fetch_for_vod(vod_id)
    if m3u8_playlist is None:
        Log.fatal("Seems like vod {} doesn't exist".format(vod_id))
    playlist = Chunks.get(m3u8_playlist.segments, start_time, end_time)
    file_name = FileMaker.make_avoiding_overwrite(Vod.title(vod_id) + '.ts')
    downloader = PlaylistDownloader(playlist)
    signal.signal(signal.SIGINT, lambda sig, frame: downloader.stop())
    downloader.download_to(file_name)
예제 #16
0
def eval(tree: ProtoTree,
         test_loader: DataLoader,
         epoch,
         device,
         log: Log = None,
         sampling_strategy: str = 'distributed',
         log_prefix: str = 'log_eval_epochs',
         progress_prefix: str = 'Eval Epoch') -> dict:
    tree = tree.to(device)

    # Keep an info dict about the procedure
    info = dict()
    if sampling_strategy != 'distributed':
        info['out_leaf_ix'] = []
    # Build a confusion matrix
    cm = np.zeros((tree._num_classes, tree._num_classes), dtype=int)

    # Make sure the model is in evaluation mode
    tree.eval()

    # Show progress on progress bar
    test_iter = tqdm(enumerate(test_loader),
                     total=len(test_loader),
                     desc=progress_prefix + ' %s' % epoch,
                     ncols=0)

    # Iterate through the test set
    for i, (xs, ys) in test_iter:
        xs, ys = xs.to(device), ys.to(device)

        # Use the model to classify this batch of input data
        out, test_info = tree.forward(xs, sampling_strategy)
        ys_pred = torch.argmax(out, dim=1)

        # Update the confusion matrix
        cm_batch = np.zeros((tree._num_classes, tree._num_classes), dtype=int)
        for y_pred, y_true in zip(ys_pred, ys):
            cm[y_true][y_pred] += 1
            cm_batch[y_true][y_pred] += 1
        acc = acc_from_cm(cm_batch)
        test_iter.set_postfix_str(
            f'Batch [{i + 1}/{len(test_iter)}], Acc: {acc:.3f}')

        # keep list of leaf indices where test sample ends up when deterministic routing is used.
        if sampling_strategy != 'distributed':
            info['out_leaf_ix'] += test_info['out_leaf_ix']
        del out
        del ys_pred
        del test_info

    info['confusion_matrix'] = cm
    info['test_accuracy'] = acc_from_cm(cm)
    log.log_message("\nEpoch %s - Test accuracy with %s routing: " %
                    (epoch, sampling_strategy) + str(info['test_accuracy']))
    return info
예제 #17
0
def analyse_leaf_distributions(tree: ProtoTree, log: Log):
    # print for experimental purposes
    max_values = []
    for leaf in tree.leaves:
        if leaf._log_probabilities:
            max_values.append(torch.max(torch.exp(leaf.distribution())).item())
        else:
            max_values.append(torch.max(leaf.distribution()).item())
    max_values.sort()
    log.log_message("Max values in softmax leaf distributions: \n" +
                    str(max_values))
예제 #18
0
 def __init__(self, src_w2i, src_i2w, tgt_w2i, tgt_i2w, embedding_dim, encoder_hidden_dim, decoder_hidden_dim, encoder_n_layers = 1, decoder_n_layers = 1, encoder_drop_prob=0.5, decoder_drop_prob=0.5, latent_size = 64, lr = 0.01, teacher_forcing_ratio=0.5, gradient_clip = 5, model_store_path = None, vae_kld_anneal_k = 0.0025, vae_kld_anneal_x0 = 2500, vae_kld_anneal_function="linear", decoder_word_input_drop = 0.5):
     super(LSTMVAE, self).__init__()
     
     self.encoder_hidden_dim = encoder_hidden_dim
     self.decoder_hidden_dim = decoder_hidden_dim
     self.decoder_n_layers = decoder_n_layers
     self.latent_size = latent_size
     self.teacher_forcing_ratio = teacher_forcing_ratio
     self.gradient_clip = gradient_clip
     self.vae_kld_anneal_k = vae_kld_anneal_k
     self.vae_kld_anneal_x0 = vae_kld_anneal_x0
     self.vae_kld_anneal_function = vae_kld_anneal_function
     self.decoder_word_input_drop = decoder_word_input_drop
     
     self.encoder = SimpleLSTMEncoderLayer(len(src_w2i), embedding_dim, encoder_hidden_dim, encoder_n_layers, encoder_drop_prob)
     #self.decoder = SimpleLSTMDecoderLayer(len(tgt_w2i), embedding_dim, self.latent_size, decoder_hidden_dim, decoder_n_layers, decoder_drop_prob)
     self.decoder = DroppedLSTMDecoderLayer(len(tgt_w2i), embedding_dim, self.latent_size, decoder_hidden_dim, decoder_n_layers, decoder_drop_prob, decoder_word_input_drop)
     
     #self.attention = AttentionLayer(encoder_hidden_dim*2, decoder_hidden_dim) # *2 because encoder is bidirectional an thus hidden is double 
     self.vae = VAE(encoder_hidden_dim*2, self.latent_size)
     
     
     self.optimizer = torch.optim.Adam(list(self.encoder.parameters())+list(self.decoder.parameters())+list(self.vae.parameters()), lr=lr)        
     self.criterion = nn.CrossEntropyLoss(ignore_index = 0)
     
     self.src_w2i = src_w2i        
     self.src_i2w = src_i2w
     self.tgt_w2i = tgt_w2i
     self.tgt_i2w = tgt_i2w
     self.epoch = 0
     self.lr = lr
     self.src_vocab_size = len(src_w2i)
     self.tgt_vocab_size = len(tgt_w2i)
     print("Source vocab size: {}".format(self.src_vocab_size))
     print("Target vocab size: {}".format(self.tgt_vocab_size))
     
     self.train_on_gpu=torch.cuda.is_available()        
     if(self.train_on_gpu):
         print('Training on GPU.')
     else:
         print('No GPU available, training on CPU.')
     self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     
     if model_store_path == None:
         self.model_store_path = os.path.dirname(os.path.realpath(__file__))
     else:
         self.model_store_path = model_store_path
     if not os.path.exists(model_store_path):
         os.makedirs(model_store_path)
         
     self.log_path = os.path.join(self.model_store_path,"log")
     self.log = Log(self.log_path, clear=True)
     """
예제 #19
0
def get_similarity_maps(tree: ProtoTree, project_info: dict, log: Log = None):
    log.log_message("\nCalculating similarity maps (after projection)...")

    sim_maps = dict()
    for j in project_info.keys():
        nearest_x = project_info[j]['nearest_input']
        with torch.no_grad():
            _, distances_batch, _ = tree.forward_partial(nearest_x)
            sim_maps[j] = torch.exp(-distances_batch[0, j, :, :]).cpu().numpy()
        del nearest_x
        del project_info[j]['nearest_input']
    return sim_maps, project_info
예제 #20
0
 def save_binary(self, filename, content):
     """
     Uses joblib implementation over pickle for performance
     and memory. Saves content to binary format
     :param filename: String
     :param content: Object
     :return: None
     """
     file_path = os.path.join(Path.binary_directory, filename)
     Log.write("saving" + filename + " to: " + file_path)
     joblib.dump(content, file_path)
     Log.write(filename + " saved to: " + file_path)
예제 #21
0
 def load(self):
     """
         Loads the regressors different components
     """
     regressor_name = self.regressor_name
     Log.write("Loading " + '{}_regressor.bin'.format(regressor_name))
     file_path = os.path.join(Path.binary_directory, '{}_regressor.bin'.format(regressor_name))
     Log.write('{}_regressor.bin'.format(regressor_name) + " is successfully loaded")
     regressor = load_model(file_path)
     scaler = Load.load_binary('{}_scaler.bin'.format(regressor_name))
     self.model = AbstractRegressor._create_pipeline(scaler, regressor)
     self.mean_facts_vector = Load.load_binary('model_metrics.bin')['regressor'][regressor_name]['mean_facts_vector']
예제 #22
0
파일: vehicle.py 프로젝트: hfurhoff/exjobb
 def __init__(self, v):
     from dto.vehicledto import VehicleDTO
     self.pose = v.getPose()
     self.initialPosition = self.getPosition()
     self.desiredHeading = self.pose.getOrientation()
     self.currentSpeed = v.getCurrentSpeed()
     self.desiredSpeed = self.currentSpeed
     self.maxSpeed = v.getMaxSpeed()
     self.turningRadius = v.getTurningRadius()
     self.acceleration = self.maxSpeed / 2.0
     self.sensor = v.getSensor()
     self.log = Log([self])
예제 #23
0
 def save(self):
     """
         Saves the scaler and regressor. Does not use joblib
         for the regressor as it is not supported
     """
     regressor_name = self.regressor_name
     file_path = os.path.join(Path.binary_directory, '{}_regressor.bin'.format(regressor_name))
     Log.write("saving" + '{}_regressor.bin'.format(regressor_name) + " to: " + file_path)
     Log.write('{}_regressor.bin'.format(regressor_name) + " saved to: " + file_path)
     self.model.steps[1][1].model.save(file_path)
     Save().save_binary('{}_scaler.bin'.format(regressor_name), self.model.steps[0][1])
     Save().save_binary('model_metrics.bin', self.data_metrics())
     self.dataset = None
예제 #24
0
    def __init__(self, key, controller, req_args=False, description="No description provided.", restriction="none"):
        self.key = key
        self.log = Log()
        self.req_args = req_args
        self.controller = controller
        self.description = description
        self.restriction = restriction

        _controller_dir = self.controller.split(".")
        _module_name = _controller_dir[0]
        self.controller_name = _controller_dir[1]
        _module = importlib.import_module("controllers.{0}".format(_module_name))
        self.controller_class = getattr(_module, _module_name.capitalize())()
예제 #25
0
def run(command_list):
    """
    1) Converts dictionary a precedent vectors to a list of dictionaries
    2) Train the support vector machine model
    3) train the similarity finder model

    :param command_list: List of command line arguments. Not used yet since there is only 1 training technique
    :return: boolean
    """

    # ------------------- COMMAND LINE SYNTAX --------------------------
    if '--' == command_list[0][:2]:
        if command_list[0] not in CommandEnum.command_list:
            Log.write(command_list[0] + " not recognized")
            return False

    precedent_vector = __dictionary_to_list()
    if len(precedent_vector) == 0:
        return False
    try:
        data_size = command_list[-1]
        precedent_vector = precedent_vector[:int(data_size)]

    except IndexError:
        pass

    except ValueError:
        pass

    except TypeError:
        Log.write(
            "create the precedent vector model first.\nCommand: python main.py -post"
        )
        return False

    # ------------------- TRAINING --------------------------
    if CommandEnum.ALL in command_list:
        classifier_driver.run(command_list[1:], precedent_vector)
        regression_driver.run(command_list[1:], precedent_vector)
        SimilarFinder(train=True, dataset=precedent_vector)
        return True

    if CommandEnum.SVM in command_list:
        classifier_driver.run(command_list[1:], precedent_vector)

    if CommandEnum.SVR in command_list:
        regression_driver.run(command_list[1:], precedent_vector)

    if CommandEnum.SIMILARITY_FINDER in command_list:
        SimilarFinder(train=True, dataset=precedent_vector)
    return True
예제 #26
0
    def train(self):
        """
        Train a classifier using Linear SVC
        1) reshape date in a format that sklearn understands
        2) Binarize data for multi output
        3) split training data
        4) train (fit)
        5) test model
        :return: None
        """
        x_total, y_total = self.reshape_dataset()  # 1
        self.mlb = MultiLabelBinarizer()  # 2
        y_total = self.mlb.fit_transform(y_total)

        x_train, x_test, y_train, y_test = train_test_split(
            x_total, y_total, test_size=0.20, random_state=42)  # 3

        Log.write("Sample size: {}".format(len(x_total)))
        Log.write("Train size: {}".format(len(x_train)))
        Log.write("Test size: {}".format(len(x_test)))
        Log.write("Training Classifier Using Multi Class SVM")

        clf = OneVsRestClassifier(SVC(kernel='linear', random_state=42, probability=True))  # 4
        clf.fit(x_train, y_train)
        self.model = clf
        self.__test(x_test, y_test)  # 5
예제 #27
0
 def __init__(self, src_w2i, src_i2w, tgt_w2i, tgt_i2w, embedding_dim, encoder_hidden_dim, decoder_hidden_dim, encoder_n_layers = 1, decoder_n_layers = 1, encoder_drop_prob=0.5, decoder_drop_prob=0.5, latent_size = 64, lr = 0.01, teacher_forcing_ratio=0.5, gradient_clip = 5, model_store_path = None, decoder_word_input_drop = 0.5):
     super(LSTMTransformer, self).__init__()
     
     self.encoder_hidden_dim = encoder_hidden_dim
     self.decoder_hidden_dim = decoder_hidden_dim
     self.decoder_n_layers = decoder_n_layers
     self.latent_size = latent_size
     self.teacher_forcing_ratio = teacher_forcing_ratio
     self.gradient_clip = gradient_clip       
     self.decoder_word_input_drop = decoder_word_input_drop
     
     self.input = InputLayerWithAbsolutePosition(vocab_size=len(src_w2i), embedding_dim=embedding_dim, max_seq_len=512)
     self.encoder = SelfAttentionLSTMEncoderStack(2, embedding_dim, encoder_hidden_dim, max_seq_len=512, rnn_layers=1, drop_prob=encoder_drop_prob, attention_probs_dropout_prob=0.2, num_attention_heads = 8)
     #self.decoder = SimpleLSTMDecoderLayer(len(tgt_w2i), embedding_dim, self.latent_size, decoder_hidden_dim, decoder_n_layers, decoder_drop_prob)
     self.decoder = DroppedLSTMDecoderLayer(len(tgt_w2i), embedding_dim, self.latent_size, decoder_hidden_dim, decoder_n_layers, decoder_drop_prob, decoder_word_input_drop)
     
     self.attention = AdditiveAttention(encoder_hidden_dim*2, decoder_hidden_dim) # *2 because encoder is bidirectional an thus hidden is double 
     #self.vae = VAE(encoder_hidden_dim*2, self.latent_size)
     
     self.optimizer = torch.optim.Adam(list(self.encoder.parameters())+list(self.decoder.parameters())+list(self.attention.parameters()), lr=lr)
     self.criterion = nn.CrossEntropyLoss(ignore_index = 0)
     
     self.src_w2i = src_w2i        
     self.src_i2w = src_i2w
     self.tgt_w2i = tgt_w2i
     self.tgt_i2w = tgt_i2w
     self.epoch = 0
     self.lr = lr
     self.src_vocab_size = len(src_w2i)
     self.tgt_vocab_size = len(tgt_w2i)
     print("Source vocab size: {}".format(self.src_vocab_size))
     print("Target vocab size: {}".format(self.tgt_vocab_size))
     
     self.train_on_gpu=torch.cuda.is_available()        
     if(self.train_on_gpu):
         print('Training on GPU.')
     else:
         print('No GPU available, training on CPU.')
     self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     
     if model_store_path == None:
         self.model_store_path = os.path.dirname(os.path.realpath(__file__))
     else:
         self.model_store_path = model_store_path
     if not os.path.exists(model_store_path):
         os.makedirs(model_store_path)
         
     self.log_path = os.path.join(self.model_store_path,"log")
     self.log = Log(self.log_path, clear=True)
예제 #28
0
 def download_to(self, file_name):
     playlist = self.playlist
     progress_bar = ProgressBar(file_name, len(playlist.segments))
     with open(file_name, 'wb+') as file:
         for segment in playlist.segments:
             if self.stopped:
                 print('')
                 break
             try:
                 for chunk in Contents.chunked(segment):
                     if chunk:
                         file.write(chunk)
             except IOError as e:
                 Log.fatal(str(e))
             progress_bar.update_by(1)
예제 #29
0
def run(command_list):
    """
    Driver for feature extraction subsystem
    :param command_list: Command line arguments
    :return: None
    """
    command = command_list[0]
    if command == CommandEnum.PRE_PROCESSING:
        pre_processing_driver.run(command_list[1:])

    elif command == CommandEnum.POST_PROCESSING:
        post_processing_driver.run(command_list[1:])
    else:
        Log.write("Command not recognized: " + command_list[0])
        return False
    return True
예제 #30
0
 def train(self):
     """
         Trains the pipeline. After training the dataset is removed
         from the object to save space.
     """
     Log.write("Size of dataset: %d" % (len(self.dataset)))
     X = np.array([precedent['facts_vector'][self.important_facts_index] for precedent in self.dataset])
     Y = np.array([precedent['outcomes_vector'][self.outcome_index]
                   for precedent in self.dataset])
     self.input_dimensions = len(X[0])
     regressor = KerasRegressor(
         build_fn=self._nn_architecture, epochs=1000, batch_size=1024, verbose=0)
     scaler = StandardScaler()
     self.model = AbstractRegressor._create_pipeline(scaler, regressor)
     self.model.fit(X, Y)
     self.test()
예제 #31
0
파일: harness.py 프로젝트: bennn/bubbles
    def __init__(self, test_file, timeout):
        self.log = Log()
        self.timeout = timeout
        
        self.test_file = test_file
        self.src_file = "%s.ml" % test_file[:-(len("_test.ml"))]
        self.test_name = self.test_file.split("/")[-1]
        self.src_name = self.src_file.split("/")[-1]

        self.failures = self.run()
        print("") # Separator
예제 #32
0
 def unsubscribe_to_stream(self):
     super(RtspStreamHandler, self).unsubscribe_to_stream()
     Log.info("Unsubscribing to RTSP stream")
예제 #33
0
파일: runTest.py 프로젝트: bennn/bubbles
class RunTest:
    """
        2013-08-23:
            Execute all tests cases found inside a module. Specifically:
            - Accept the path to a test file as input (like "my_test.ml" or "worker_test.ml")
            - Compile the test case and the corresponding .ml file ("file.ml" + "file_test.ml")
            - Extract the inferred interface from the test file
            - Extract all test cases from the interface ("unit -> unit" functions beginning with "test_")
            - Execute each test case in a separate ocaml toplevel, record output
    """

    # In order of dependence
    LIBS = [
        "serializer.cma",
        "assertions.cma",
    ]

    def __init__(self, test_file, timeout):
        self.log = Log()
        self.subprocess = SubprocessWrapper()
        self.timeout = timeout
        
        self.test_file = test_file
        self.src_file = "%s.ml" % test_file[:-(len("_test.ml"))]
        self.test_name = self.test_file.split("/")[-1]
        self.src_name = self.src_file.split("/")[-1]

        self.failures = self.run()
        print("") # Separator

    def compile(self):
        """
            2013-08-23:
                Compile the source file + test file, generate the interface for 
                the test file. In detail:
                - Generate the ocamlc command to compile source + test in unison,
                  pulling in all necessary external libraries
                - Generate the ocamlc command to get the interface for the test,
                  using the .cmo file generated by compiling the source as a library
        """
        self.log.info("Compiling %s and %s" % (self.src_name, self.test_name))
        # Prepare compilation commands. 
        # 2013-08-23: Base command includes standard testing library
        base_command = " ".join(["ocamlc -c"] + self.LIBS)
        # 2013-08-23: Full compilations uses '-g' option to generate debug information
        compile_all = "%s -g %s %s" % (base_command, self.src_file, self.test_file)
        # Name of the .cmo file generated after compiling the source
        src_cmo = "%s.cmo" % self.src_file[:-(len(".ml"))]
        # 2013-08-23: Use '-i' option to just generate the interface for the function
        infer_interface = "%s -i %s %s" % (base_command, src_cmo, self.test_file)
        # Compile both files, then infer and return the interface
        self.subprocess.execute(compile_all, on_failure=self.compile_error)
        # 2013-08-23: Reached this line without making a .cmo Dear diary, this was bad
        interface = self.subprocess.execute(infer_interface)
        return interface.split("\n")

    def compile_error(self, cpe):
        # NO COMPILEEEEEEEE
        err_msg = cpe.output.strip()
        # 2013-08-23: Retrieve failing line from the file
        sourceError = self._source_of_exception(err_msg)
        # Put the OCaml exception + line from source into one string. 
        # Replace vanilla newlines with indented newlines.
        nocompile_msg = ("%s\n%s" % (err_msg, sourceError)).replace("\n", "\n  ")
        self.log.nocompile(nocompile_msg)
        raise NoCompileException(1)

    def generate_scripts(self, test_interface):
        """
            2013-08-23:
                Given the interface of a test file, generate a toplevel script
                for each test case. For instance, if the test file had an interface
                like:
                    val test_one : unit -> unit
                    val helper : int -> string
                    val test_two : unit -> unit
                    val test_three : int -> unit
                Then this function would generate scripts for `test_one` and
                `test_two`, because they are `unit -> unit` functions that
                start with the magic prefix "test_"
        """
        test_cases = []
        for defined_name in ( x for x in test_interface if x.startswith("val test_") ):
            val_name, val_type = defined_name[4:].split(" : ", 1)
            if val_type != "unit -> unit":
                self.log.warn("skipping test case %s with type %s" % (val_name, val_type))
            else:
                test_cases.append(val_name)
        if test_cases == []:
            return None
        else:
            # Change "my_test.ml" to the module "My_test"
            test_name = self.test_name[:-(len(".ml"))].capitalize()
            return ( (case, self._toplevel_input(test_name, case))
                for case in test_cases )
        
    def run(self):
        """
            2013-08-23:
        """
        self._check_paths()
        # Get the directory containing the test file, move to it
        if "/" in self.test_file:
            testcase_dir = self.test_file[::-1].split("/", 1)[1][::-1]
            os.chdir(testcase_dir)
        # Compile the test + source files
        self.log.header("Testing %s" % self.src_name)
        test_interface = self.compile()
        # Generate the test scripts
        self.log.info("Compilation succeeded! Generating test scripts...")
        test_scripts = self.generate_scripts(test_interface)
        if test_scripts is None:
            self.log.warn("No test cases in %s" % self.test_name)
        else:
            # Execute tests
            return self.run_tests(test_scripts)

    def run_test(self, script):
        """
            2013-08-23:
                Execute a single test script in a toplevel environment.
                Start a toplevel with the module and test case object files loaded, 
                pipe in the test script as an argument.

                I'm not entirely happy with the piping because it means that subprocess
                fails to throw an error when the test fails. Maybe fix that later.
        """
        run_test = " ".join([
            "echo \"%s\" |" % script,
            "ocaml",
            ] + self.LIBS + [
            "%s.cmo" % self.src_file[:-(len(".ml"))],
            "%s.cmo" % self.test_file[:-(len(".ml"))]
        ])
        with Timer() as t:
            try:
                output, err = TimedProcess(run_test).run(self.timeout)
                err_msg = self._error_of_output(output) # Maybe None
            except TimeoutException:
                err_msg = "TIMEOUT"
        if not err_msg:
            self.log.success("PASS in %0.3f seconds" % t.duration)
        else:
            self.log.failure("FAIL with '%s' in %0.3f seconds" % (err_msg, t.duration))
        return err_msg

    def run_tests(self, test_scripts):
        """
            2013-08-23:
                Given an association list of ("test_case_name", "toplevel script"),
                execute each test in an ocaml toplevel and record the output.
        """
        errors = []
        for (fn_name, script) in test_scripts:
            self.log.run("Running %s..." % fn_name)
            err_msg = self.run_test(script)
            if err_msg:
                errors.append((fn_name, err_msg))
        return errors

    def _check_paths(self):
        """
            2013-08-23:
                Make sure the source and test files (still) exist.
        """
        if not os.path.exists(self.src_file):
            self.log.warn("Source file '%s' not found. Skipping %s..." % (self.src_name, self.test_name))
            raise InvalidTestException(0)
        if not os.path.exists(self.test_file):
            self.log.warn("Test file '%s' not found. Exiting..." % self.test_name)
            raise InvalidTestException(0)

    def _error_of_output(self, toplevel_output):
        """
            2013-08-04:
                Toplevel output is always echoed to subprocess, regardless of
                whether the tests passed. Manually check if the code raised an
                assertion error. 
                
                TODO this is not very rigorous! It assumes there will be an octothorp
                at the end of the output!
                This is a reasonable assumption but still it makes me nervous

            2013-08-23:
                Ignores input errors. If the code this file sends to the toplevel
                has a syntax error or whatever, things will break down seriously. 
                I think its safe to assume that'll never happen in a release.

            2013-08-24:
                Added logic to print the non-exception printouts
                You know, we could probably just check that the output's "- : unit"
        """
        match = re.search(r"#.*?(Exception:[\s].*)\n#", toplevel_output, re.DOTALL)
        if match is not None:
            # Debug output will be octothorp to exception.
            debug_match = re.search(r"# (.*?)Exception:", toplevel_output, re.DOTALL)
            message = match.group(1).strip()
        else:
            # Debug output will be octothorp to return value
            debug_match = re.search(r"# (.*?)\n- :", toplevel_output, re.DOTALL)
            message = None
        # Print the debug output, if any
        if debug_match is not None and debug_match.group(1):
            print(debug_match.group(1).rstrip())
        return message

    def _source_of_exception(self, errorMessage):
        """
            2013-08-23:
                Get the line number and source file that spawned `errorMessage`,
                extract that line of code from that source file.
        """
        match = re.search(r"File \"(.*?)\", line ([0-9]+),", errorMessage)
        if match is None:
            return ""
        else:
            fname = match.group(1)
            line_num = int(match.group(2))
            with open(fname, "r") as f:
                currentLine = 1
                message = ""
                while currentLine < line_num:
                    currentLine += 1
                    message = next(f)
                try:
                    if message:
                        return("     %s %s---> %s %s" % \
                            (line_num-1, message, line_num, next(f).rstrip()))
                    else:
                        return("---> %s %s" % (line_num, next(f).rstrip()))
                except StopIteration:
                    # File ended unexpectedly. Add an empty line and point to it
                    return("     %s %s---> %s <unexpected end of file>" \
                        % (line_num-1, message, line_num))

    def _toplevel_input(self, module_name, test_case):
        """
            2013-07-28:
                 Write a script for the toplevel. Call the right function
                 from the right module
        """
        return "%s.%s ();;" % (module_name.capitalize(), test_case)
예제 #34
0
 def subscribe_to_stream(self):
     super(RtspStreamHandler, self).subscribe_to_stream()
     url = "RtspStreamHandler#subscribe_to_stream"
     Log.info("Subscribing to RTSP stream")
     return url
예제 #35
0
 def _stop_streaming(self):
     super(RtspStreamHandler, self)._stop_streaming()
     Log.info("Stopping RTSP stream")