def predict_target_with_query(
        sparql, query, source, timeout=TIMEOUT, limit=LIMIT):
    """Predicts target with given query.

    For example for pagerank_bidi:
    SELECT distinct(?target) ?score {
     { dbr:Circle ?p ?target .}
     UNION
     { ?target ?q dbr:Circle . }
     ?target dbo:wikiPageRank ?score
    }
    ORDER BY DESC(?score)
    LIMIT 100
    """
    q = query % {'source': source.n3()}
    q += '\nLIMIT %d' % limit
    t, q_res = gp_query._query(sparql, timeout, q)
    res_rows_path = ['results', 'bindings']
    bindings = sparql_json_result_bindings_to_rdflib(
        get_path(q_res, res_rows_path, default=[])
    )
    target_scores = [
        (get_path(row, [TARGET_VAR]), get_path(row, [Variable('score')]))
        for row in bindings]
    # print(target_scores)
    return target_scores
Пример #2
0
def _export_datasets(dataset, features, classes, origin, sufix):
    from itertools import combinations
    from tasks.linker import params
    from multiprocessing.pool import Pool
    
    nfolds = 3
    folds = [i for i in range(nfolds)]
    partitions = [list(c) + list((set(folds) - set(c))) for c in combinations(folds, 2)]
    datasets = _fold(dataset, nfolds)
    
    for pt in partitions:
        training = []
        for i in pt[:-1]: 
            training.extend(datasets[i])
        test = datasets[pt[-1]]
         
        name_ = 'all{}{}{}'.format(origin, sufix + '_tr', pt[-1])
        filename = get_path('datasets', '{}.arff'.format(name_))
        classes_ = [next((v['short_name'] for k, v in params.items() if v['metadata_uri'] == c), None) for c in classes]    
        dataset_ = ([d, classes_] for d in _chunks(training, os.cpu_count()))
        with Pool(os.cpu_count()) as p: sets_ = p.starmap(_expand, dataset_); dataset_ = []
        for s in sets_: dataset_.extend(s)
        with Pool(os.cpu_count()) as p: dataset_ = p.map(_flatten, dataset_)
        _save(dataset_, features, 'class', name_, filename)
        
        
        name_ = 'all{}{}{}'.format(origin, sufix + '_tt', pt[-1])
        filename = get_path('datasets', '{}.arff'.format(name_))
        dataset_ = ([l, classes_] for l in test)
        with Pool(os.cpu_count()) as p: dataset_ = p.starmap(_concat, dataset_)
        with Pool(os.cpu_count()) as p: dataset_ = p.map(_flatten, dataset_)
        _save_test(dataset_, features, 'class', name_, filename)
Пример #3
0
    def __init__(self,  data_fold=None, full_dataset=False, out_path=None, in_path=None, output_patches=False, scale=1.5, minSize=(200,200), windowSize=(40,40), stepSize=15):
        self.scale = scale
        self.minSize = minSize
        self.windowSize = windowSize
        self.output_patches = output_patches 
 
        self.stepSize = stepSize if self.output_patches == False else 30
        self.total_window_num = 0
        if data_fold is None:
            self.data_fold = utils.TRAINING if self.output_patches or full_dataset else utils.VALIDATION
        else:
            self.data_fold = data_fold

        self.in_path = in_path if in_path is not None else utils.get_path(in_or_out=utils.IN, data_fold=self.data_fold, full_dataset=full_dataset)
        self.img_names = [img_name for img_name in os.listdir(self.in_path) if img_name.endswith('.jpg')]
        self.img_names = self.img_names[:20] if DEBUG else self.img_names

        self.detections = Detections()
        folder_name = 'scale{}_minSize{}-{}_windowSize{}-{}_stepSize{}_dividedSizes/'.format(self.scale, self.minSize[0], self.minSize[1], 
                                                self.windowSize[0], self.windowSize[1], self.stepSize) 

        self.out_path = out_path if out_path is not None else '{}'.format(utils.get_path(full_dataset=True, in_or_out=utils.OUT, slide=True, data_fold=self.data_fold, out_folder_name=folder_name))

        self.evaluation = Evaluation(full_dataset=full_dataset, method='slide',folder_name=folder_name, save_imgs=False, out_path=self.out_path, 
                            detections=self.detections, in_path=self.in_path)
def predict_target_with_query(sparql,
                              query,
                              source,
                              timeout=TIMEOUT,
                              limit=LIMIT):
    """Predicts target with given query.

    For example for pagerank_bidi:
    SELECT distinct(?target) ?score {
     { dbr:Circle ?p ?target .}
     UNION
     { ?target ?q dbr:Circle . }
     ?target dbo:wikiPageRank ?score
    }
    ORDER BY DESC(?score)
    LIMIT 100
    """
    q = query % {'source': source.n3()}
    q += '\nLIMIT %d' % limit
    t, q_res = gp_query._query(sparql, timeout, q)
    res_rows_path = ['results', 'bindings']
    bindings = sparql_json_result_bindings_to_rdflib(
        get_path(q_res, res_rows_path, default=[]))
    target_scores = [(get_path(row,
                               [TARGET_VAR]), get_path(row,
                                                       [Variable('score')]))
                     for row in bindings]
    # print(target_scores)
    return target_scores
Пример #5
0
    def _find_conflicting_path(self, conflict):
        """Return the shortest path commown to two patches."""
        p1p = get_path(conflict.first_patch)
        p2p = get_path(conflict.second_patch)

        # This returns the shortest path
        return p1p if len(p1p) <= len(p2p) else p2p
Пример #6
0
def main():
    args = PARSER.parse_args()

    data_path = get_path(args, "record")
    model_save_path = get_path(args, "tf_vae", create=True)

    ensure_validation_split(data_path)
    _n_train, _avg_frames, mean, var = analyse_dataset(data_path)
    if args.normalize_images:
        train_data, val_data = create_tf_dataset(data_path, args.z_size, True, mean, var)
    else:
        train_data, val_data = create_tf_dataset(data_path, args.z_size)

    shuffle_size = 5 * 1000  # Roughly 20 full episodes for shuffle windows, more increases RAM usage
    train_data = train_data.shuffle(shuffle_size, reshuffle_each_iteration=True).batch(args.vae_batch_size).prefetch(2)
    val_data = val_data.batch(args.vae_batch_size).prefetch(2)

    current_time = datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_dir = model_save_path / "tensorboard" / current_time

    vae = CVAE(args=args)
    vae.compile(optimizer=vae.optimizer, loss=vae.get_loss())
    vae.fit(train_data, validation_data=val_data, epochs=args.vae_num_epoch, callbacks=[
        tf.keras.callbacks.TensorBoard(log_dir=str(tensorboard_dir), update_freq=50, histogram_freq=1),
        LogImage(str(tensorboard_dir), val_data),
        tf.keras.callbacks.ModelCheckpoint(str(model_save_path / "ckpt-e{epoch:02d}"), verbose=1),
    ])
    vae.save(str(model_save_path))
Пример #7
0
def fetch_lightsaber_images(hilt, blade, button, pommel):
    hilt = get_path(hilt, HILT_PATH)
    blade = get_path(blade, BLADE_PATH)
    button = get_path(button, BUTTON_PATH)
    pommel = get_path(pommel, POMMEL_PATH)

    return (blade, hilt, button, pommel)
Пример #8
0
def load_board_from_image(file_name):
    """ Load a csv file and return a 2D list."""

    # File does not exist, throw error and exit
    if not os.path.isfile(utils.get_path() + file_name):
        print('file does not exist')
        sys.exit()
    else:
        dead_cells = GAMECONFIG['dead_cells']
        living_cells = GAMECONFIG['living_cells']

        im = Image.open(utils.get_path() + file_name)
        if im.mode not in ('L', 'RGB'):
            im = im.convert('RGB')
        pixel_values = im.load()
        width, height = im.size
        make_image.save_width_height(width, height)
        data = [[dead_cells for i in range(width)] for j in range(height)]

        for col in range(0, width):
            for row in range(0, height):
                if pixel_values[col, row] == (0, 0, 0):
                    data[row][col] = living_cells
                    STATS['initial_population'] += 1
        im.close()
    return data, height, width
def main(pickled_evaluation=False, combo_f_name=None, output_patches=True, 
                detector_params=None, original_dataset=True, save_imgs=True, data_fold=utils.VALIDATION):
    combo_f_name = None
    try:
        opts, args = getopt.getopt(sys.argv[1:], "f:")
    except getopt.GetoptError:
        sys.exit(2)
        print 'Command line failed'
    for opt, arg in opts:
        if opt == '-f':
            combo_f_name = arg

    assert combo_f_name is not None
    detector = viola_detector_helpers.get_detectors(combo_f_name)

    viola = False if data_fold == utils.TRAINING else True
    in_path = utils.get_path(viola=viola, in_or_out=utils.IN, data_fold=data_fold)

    #name the output_folder
    folder_name = ['combo'+combo_f_name]
    for k, v in detector_params.iteritems():
        folder_name.append('{0}{1}'.format(k,v))
    folder_name = '_'.join(folder_name)

    out_path = utils.get_path(out_folder_name=folder_name, viola=True, in_or_out=utils.OUT, data_fold=data_fold)
    out_path = 'output_viola_uninhabited/'
    viola = ViolaDetector(pickled_evaluation=pickled_evaluation, output_patches=output_patches,  
                            out_path=out_path, in_path=in_path, folder_name = folder_name, 
                            save_imgs=save_imgs, detector_names=detector,  **detector_params)
    return viola
Пример #10
0
def proxy(path):
    # ensure authorization header is present
    api_key = authorization_header_exists(request.headers)

    # retrieve usage plan from api key
    usage_plan = get_usage_plan(REDIS_CLIENT, USAGE_PLANS.keys(), api_key)

    # apply usage plan quota and throttling limits
    max_calls, period_in_seconds, throttling_rate = get_usage_plan_info(
        USAGE_PLANS, usage_plan)
    quota_per_seconds(REDIS_CLIENT, api_key, max_calls, period_in_seconds)
    rate_per_second(REDIS_CLIENT, api_key, throttling_rate)

    if request.method == "GET":
        redirect_url = get_path(ALB, path)
        resp = requests.get(redirect_url)
        response = get_response(resp)
        return response
    elif request.method == "POST":
        redirect_url = get_path(ALB, path)
        resp = requests.post(redirect_url, json=request.get_json())
        response = get_response(resp)
        return response
    elif request.method == "DELETE":
        redirect_url = get_path(ALB, path)
        resp = requests.delete(redirect_url).content
        response = get_response(resp)
        return response
Пример #11
0
def main(config_file='config/bert_config.json'):
    """Main method for training.

    Args:
        config_file: in config dir
    """
    # 0. Load config and mkdir
    with open(config_file) as fin:
        config = json.load(fin, object_hook=lambda d: SimpleNamespace(**d))
    get_path(os.path.join(config.model_path, config.experiment_name))
    get_path(config.log_path)
    # if config.model_type in ['rnn', 'lr','cnn']:  # build vocab for rnn
    #     build_vocab(file_in=config.all_train_file_path,
    #                 file_out=os.path.join(config.model_path, 'vocab.txt'))
    # 1. Load data
    data = Data(vocab_file=os.path.join(config.model_path, 'vocab.txt'),
                max_seq_len=config.max_seq_len,
                model_type=config.model_type, config=config)
    datasets = data.load_train_and_valid_files(
        train_file=config.train_file_path,
        valid_file=config.valid_file_path)
    train_set, valid_set_train, valid_set_valid, train_labels, valid_labels = datasets
    if torch.cuda.is_available():
        device = torch.device('cuda')
        # device = torch.device('cpu')
        # torch.distributed.init_process_group(backend="nccl")
        # sampler_train = DistributedSampler(train_set)
        sampler_train = RandomSampler(train_set)
    else:
        device = torch.device('cpu')
        sampler_train = RandomSampler(train_set)

    data_loader = {
        'train': DataLoader(
            train_set, sampler=sampler_train, batch_size=config.batch_size),
        'valid_train': DataLoader(
            train_set, batch_size=config.batch_size, shuffle=False),
        'valid_valid': DataLoader(
            valid_set_valid, batch_size=config.batch_size, shuffle=False),
        'train_label': train_labels,
        'valid_label': valid_labels
    }
    # 2. Build model
    model = MODEL_MAP[config.model_type](config)
    #load model states.
    if config.trained_weight:
        model.load_state_dict(torch.load(config.trained_weight))
    model.to(device)
    if torch.cuda.is_available():
        model = model
        # model = torch.nn.parallel.DistributedDataParallel(
        #     model, find_unused_parameters=True)
    # 3. Train
    trainer = Trainer(model=model, data_loader=data_loader,
                      device=device, config=config)
    best_model_state_dict = trainer.train()
    # 4. Save model
    torch.save(best_model_state_dict,
               os.path.join(config.model_path, 'model.bin'))
Пример #12
0
def make_mini(from_fname: str = 'arxiv_data',
              name: str = 'mini',
              size: int = 100000,
              data_dir: str = '.data',
              batch_size: int = 10000):
    ''' Make a smaller version of a given dataset, without needing to load
    the larger dataset into memory.
    
    INPUT
        from_fname: str = 'arxiv_data'
            The large dataset
        name: str = 'mini'
            The name of the smaller dataset, which will be appended to the
            file name of the larger one
        size: int = 100000
            The number of rows in the small dataset
        data_dir: str = '.data'
            The name of the data directory
        batch_size: int = 10000
            How many rows of the large dataset we are processing at a time
    '''
    import pandas as pd
    import numpy as np
    from tqdm.auto import tqdm

    from_path = get_path(data_dir) / f'{from_fname}_pp.tsv'
    to_path = get_path(data_dir) / f'{from_fname}_{name}_pp.tsv'

    df = pd.read_csv(from_path, sep='\t', chunksize=batch_size)
    cats = get_cats(data_dir=data_dir)['id']
    nrows = get_nrows(f'{from_fname}_pp.tsv', data_dir=data_dir)

    text_path = get_path(data_dir) / 'text.tmp'
    labels_path = get_path(data_dir) / 'labels.tmp'

    text = np.memmap(text_path, dtype=object, mode='w+', shape=(nrows, 1))

    labels = np.memmap(labels_path,
                       dtype=int,
                       mode='w+',
                       shape=(nrows, len(cats)))

    with tqdm(total=nrows, desc=f'Loading {from_fname}_pp.tsv') as pbar:
        for idx, row in enumerate(df):
            text[idx * batch_size:(idx + 1) * batch_size, 0] = row['text']
            labels[idx * batch_size:(idx + 1) * batch_size, :] = row[cats]
            pbar.update(len(row))

    rnd_idxs = np.random.choice(nrows, size=size, replace=False)
    text = text[rnd_idxs, 0]
    labels = labels[rnd_idxs, :]

    mini_df = pd.DataFrame(columns=['text'] + cats)
    mini_df['text'] = text
    mini_df[cats] = labels
    mini_df.to_csv(to_path, sep='\t', index=False)

    text_path.unlink()
    labels_path.unlink()
Пример #13
0
def get_credentials():
    store = file.Storage(get_path('docs_token'))
    credentials = store.get()

    if not credentials or credentials.invalid:
        flow = client.flow_from_clientsecrets(get_path('docs_credential'),
                                              SCOPES)
        credentials = tools.run_flow(flow, store)
    return credentials
Пример #14
0
def preprocess_data(tsv_fname: str = 'arxiv_data',
                    txt_fname: str = 'preprocessed_docs.txt',
                    data_dir: str = '.data',
                    batch_size: int = 1000):
    ''' 
    Preprocess text data. This merges titles and abstracts and separates 
    tokens by spaces. It saves this into a text file and also saves a
    dataframe with all the categories. Note that this function uses a 
    constant amount of memory, which is achieved by working in batches 
    and writing directly to the disk.
    
    INPUT
        tsv_fname: str
            The name of the tsv file containing all the categories, 
            without file extension
        txt_fname: str
            The name of the txt file containing the preprocessed texts
        data_dir: str = '.data'
            The data directory
        batch_size: int = 1000
            The amount of rows being preprocessed at a time
    '''
    import spacy

    # Specify the input- and output paths
    cats_in = get_path(data_dir) / (tsv_fname + '.tsv')
    cats_out = get_path(data_dir) / (tsv_fname + '_pp.tsv')
    txt_path = get_path(data_dir) / txt_fname

    # Load the English spaCy model used for tokenisation
    nlp = spacy.load('en')
    tokenizer = nlp.Defaults.create_tokenizer(nlp)

    # Load in the dataframe, merge titles and abstracts and batch them
    df = pd.read_csv(cats_in, sep='\t', usecols=['title', 'abstract'])
    df.dropna(inplace=True)
    docs = '-TITLE_START- ' + df['title'] + ' -TITLE_END- '\
           '-ABSTRACT_START- ' + df['abstract'] + ' -ABSTRACT_END-'
    del df

    # Tokenisation loop
    with tqdm(desc='Preprocessing texts', total=len(docs)) as pbar:
        with open(txt_path, 'w') as f:
            for doc in tokenizer.pipe(docs, batch_size=batch_size):
                f.write(' '.join(tok.text for tok in doc) + '\n')
                pbar.update()

    # Add the preprocessed texts to the dataframe as the first column
    # and save to disk
    df = pd.read_csv(cats_in, sep='\t').dropna()
    df.drop(columns=['title', 'abstract'], inplace=True)
    cats = df.columns.tolist()
    with open(txt_path, 'r') as f:
        df['text'] = f.readlines()
    df = df[['text'] + cats]
    df.to_csv(cats_out, sep='\t', index=False)
Пример #15
0
def n(neighbours=False):
    'shows your neighbours'
    conf = utils.load_conf(CJDROUTE_CONF)
    c = cjdns.connect(password=conf['admin']['password'])

    STAT_FORMAT = '%s %19s  v%-2d  %9d %9d  %12s  %d/%d/%d  '
    nodestore = list(c.dumpTable())

    connections = {}

    try:
        for peer in os.listdir(YRD_PEERS):
            with open(os.path.join(YRD_PEERS, peer)) as f:
                info = json.load(f)
                try:
                    connections[info['pk']] = str(info['name'])
                except KeyError:
                    pass
    except OSError:
        pass

    for peer in c.peerStats():
        result = c.nodeForAddr(peer.ip)['result']

        route = utils.grep_ns(nodestore, peer.ip)
        path = utils.get_path(route)

        setattr(peer, 'path', path)

        line = STAT_FORMAT % (peer.ip, peer.path, peer.version, peer.bytesIn,
                              peer.bytesOut, peer.state, peer.duplicates,
                              peer.lostPackets, peer.receivedOutOfRange)

        if hasattr(peer, 'user'):
            line += repr(peer.user)
        elif peer.publicKey in connections:
            line += repr(connections[peer.publicKey])

        yield line

        if neighbours:
            for i in range(result['linkCount']):
                link = c.getLink(peer.ip, i)

                if link and 'child' in link['result']:
                    child = link['result']['child']
                    route = utils.grep_ns(nodestore, child)

                    version = utils.get_version(route)
                    path = utils.get_path(route)

                    yield '   %s   %s  v%s' % (child, path, version)
                else:
                    yield '   -'

    c.disconnect()
Пример #16
0
def main():
    server_address = ('127.0.0.1', 4443)

    server = BaseHTTPServer.HTTPServer(server_address, HTTPHandler)
    server.socket = ssl.wrap_socket(server.socket,
                                    keyfile=utils.get_path('cert', 'key.pem'),
                                    certfile=utils.get_path(
                                        'cert', 'cert.pem'),
                                    server_side=True)
    server.serve_forever()
Пример #17
0
def main(config_file='config/bert_config.json'):
    """Main method for training.

    Args:
        config_file: in config dir
    """
    # 0. Load config and mkdir
    with open(config_file) as fin:
        config = json.load(fin, object_hook=lambda d: SimpleNamespace(**d))
    get_path(os.path.join(config.model_path, config.experiment_name))
    get_path(config.log_path)
    # 1. Load data
    data = Data()
    datasets = data.load_train_and_valid_files(
        train_file=config.train_file_path, valid_file=config.valid_file_path)
    train_set, valid_set_train = datasets
    if torch.cuda.is_available():
        device = torch.device('cuda')
        sampler_train = RandomSampler(train_set)
    else:
        device = torch.device('cpu')
        sampler_train = RandomSampler(train_set)
    data_loader = {
        'train':
        DataLoader(train_set,
                   sampler=sampler_train,
                   batch_size=config.batch_size),
        'valid_train':
        DataLoader(valid_set_train,
                   batch_size=config.batch_size,
                   shuffle=False),
        'valid_valid':
        DataLoader(valid_set_train,
                   batch_size=config.batch_size,
                   shuffle=False)
    }
    # 2. Build model
    model = MODEL_MAP[config.model_type](config)
    #load model states.
    if config.trained_weight:
        model.load_state_dict(torch.load(config.trained_weight))
    model.to(device)
    if torch.cuda.is_available():
        model = model
        # model = torch.nn.parallel.DistributedDataParallel(
        #     model, find_unused_parameters=True)
    # 3. Train
    trainer = Trainer(model=model,
                      data_loader=data_loader,
                      device=device,
                      config=config)
    best_model_state_dict = trainer.train()
    # 4. Save model
    torch.save(best_model_state_dict,
               os.path.join(config.model_path, 'model.bin'))
def main(): 
    script_dirname = os.path.abspath(os.path.dirname(__file__))
    output_patches = False 
    fold = utils.TRAINING if output_patches else utils.VALIDATION
    
    #only use this path to get the names of the files you want to use
    in_path = utils.get_path(in_or_out=utils.IN, data_fold=fold)
    in_path_selective = script_dirname+'/' #this is where the files actually live
    img_names = [img for img in os.listdir(in_path) if img.endswith('jpg')]
    image_filenames = [in_path_selective+img for img in os.listdir(in_path) if img.endswith('jpg')]


    #get the proposals
    k, scale = get_parameters()
    sim = 'all'
    color = 'hsv'
    cmd = 'selective_search'
    if cmd == 'selective_search':
        folder_name = 'k{}_scale{}_sim{}_color{}_FIXING/'.format(k, scale, sim, color)
    else:
        folder_name = 'selectiveRCNN/'
    print 'Folder name is: {}'.format(folder_name)

    with Timer() as t:
        boxes = get_windows(image_filenames, script_dirname, cmd=cmd, k=k, scale=scale)
    print 'Time to process {}'.format(t.secs)

    detections = Detections()
    detections.total_time = t.secs
    out_path = utils.get_path(selective=True, in_or_out=utils.OUT, data_fold=fold, out_folder_name=folder_name)

    evaluation = Evaluation(#use_corrected_roofs=True,
                        report_name='report.txt', method='windows', 
                        folder_name=folder_name,  out_path=out_path, 
                        detections=detections, in_path=in_path)
    
    #score the proposals
    for img, proposals in zip(img_names, boxes):
        print 'Evaluating {}'.format(img)
        print("Found {} windows".format(len(proposals)))

        proposals = selectionboxes2polygons(proposals)
        detections.set_detections(detection_list=proposals,roof_type='metal', img_name=img)  
        detections.set_detections(detection_list=proposals,roof_type='thatch', img_name=img)  
        print 'Evaluating...'
        evaluation.score_img(img, (1200,2000)) 

        evaluation.save_images(img)

    save_training_TP_FP_using_voc(evaluation, img_names, in_path_selective, out_folder_name=folder_name, neg_thresh=0.3)
    evaluation.print_report() 

    with open(out_path+'evaluation.pickle', 'wb') as f:
        pickle.dump(evaluation, f)
Пример #19
0
def _ask_chunk_result_extractor(q_res, _vars, _ret_val_mapping):
    chunk_res = {}
    res_rows_path = ['results', 'bindings']
    bindings = sparql_json_result_bindings_to_rdflib(
        get_path(q_res, res_rows_path, default=[])
    )
    for row in bindings:
        row_res = tuple([get_path(row, [v]) for v in _vars])
        stps = _ret_val_mapping[row_res]
        chunk_res.update({stp: True for stp in stps})
    return chunk_res
Пример #20
0
 async def after_ready(self):
     with open(get_path('reactions'), 'r', encoding='utf-8') as f:
         self.reactions = f.read().split('\n')
     with open(get_path('greetings'), 'r', encoding='utf-8') as f:
         self.greetings = f.read().split('\n')
     self.protocol_cog = self.client.get_cog(get_cog('ProtocolCog')['name'])
     for initial in ('ㅁ', 'ㅇ', 'ㄹ', 'ㄴ'):
         for final in ('ㅁ', 'ㅇ', 'ㄴ', None):
             for medial in map(str, CHAR_MEDIALS):
                 self.characters.append(
                     join_jamos_char(initial, medial, final))
Пример #21
0
def end2end(mcat_ratio: float, epochs: int, dim: int, nlayers: int, fname: str,
            gpu: bool, name: str, lr: float, batch_size: int,
            split_ratio: float, vectors: str, data_dir: str, pbar_width: int,
            wandb: bool, boom_dim: int, dropout: float, ema: float,
            overwrite_model: bool) -> str:
    ''' Loads the data, preprocesses it if needed, builds the SHARNN model,
        trains it and evaluates it. '''
    from data import load_data
    from modules import SHARNN

    pp_path = get_path(data_dir) / f'{fname}_pp.tsv'
    if not pp_path.is_file():
        from data import preprocess_data
        raw_path = get_path(data_dir) / f'{fname}.tsv'
        cats_path = get_path(data_dir) / 'cats.json'
        mcat_dict_path = get_path(data_dir) / 'mcat_dict.json'

        if not (raw_path.is_file() and cats_path.is_file()
                and mcat_dict_path.is_file()):
            from db import ArXivDatabase
            db = ArXivDatabase(data_dir=data_dir)
            db.get_mcat_dict()
            db.get_cats()
            if not raw_path.is_file():
                db.get_training_df()

        preprocess_data(data_dir=data_dir)

    train_dl, val_dl, vocab = load_data(tsv_fname=f'{fname}_pp',
                                        batch_size=batch_size,
                                        split_ratio=split_ratio,
                                        vectors=vectors,
                                        data_dir=data_dir)

    model = SHARNN(dim=dim,
                   nlayers=nlayers,
                   data_dir=data_dir,
                   pbar_width=pbar_width,
                   vocab=vocab,
                   boom_dim=boom_dim,
                   dropout=dropout)
    if gpu: model.cuda()

    model = model.fit(train_dl,
                      val_dl,
                      epochs=epochs,
                      lr=lr,
                      mcat_ratio=mcat_ratio,
                      name=name,
                      use_wandb=wandb,
                      ema=ema,
                      overwrite_model=overwrite_model)

    return model.evaluate(val_dl)
Пример #22
0
def _ask_chunk_result_extractor(q_res, _vars, _ret_val_mapping):
    chunk_res = {}
    res_rows_path = ['results', 'bindings']
    bindings = sparql_json_result_bindings_to_rdflib(
        get_path(q_res, res_rows_path, default=[])
    )
    for row in bindings:
        row_res = tuple([get_path(row, [v]) for v in _vars])
        stps = _ret_val_mapping[row_res]
        chunk_res.update({stp: True for stp in stps})
    return chunk_res
def gen_stats_vector_from_cat_vector(stats_name, size, kinds):
    """
    为了train_test_id
    :param stats_name: str, 统计名字
    :param size: str, 时间粒度
    :param kinds: str, 类别变量种类
    :return:
    """

    # 0 读取train_test_data的cat matrix
    print('gen_stats_vector_from_cat_vector("{}", "{}", "{}")'.format(
        stats_name, size, kinds))
    input_matrix_name = '{}_vector_from_ftr51'.format(kinds)
    input_sparse_matrix = sparse.load_npz(
        get_path() +
        'Data/Feature/{}.npz'.format(input_matrix_name)).toarray()
    print('The shape of matrix is ( {}, {}) '.format(
        input_sparse_matrix.shape[0], input_sparse_matrix.shape[1]))
    # 1 读取基本数据
    train_id, test_id, train_data, test_data = ReadData(Ytrain=False,
                                                        sort_by_time=True)
    train_test_id = pd.concat([train_id, test_id], axis=0, ignore_index=True)
    train_test_data = pd.concat([train_data, test_data],
                                axis=0,
                                ignore_index=True)
    # 2 形成pd.dataframe, 便于分组统计
    input_sparse_df = pd.DataFrame(data=input_sparse_matrix)
    print('2')
    del input_sparse_matrix
    gc.collect()
    input_sparse_df['PERSONID'] = train_test_data['PERSONID']
    input_sparse_df['CREATETIME'] = train_test_data['CREATETIME']

    # 3 开始统计
    output_stats_df = input_sparse_df.groupby('PERSONID').apply(
        lambda df_person: compute_stats_dict_from_cat_matrix(
            df_person, stats_name, size)).to_frame('stats_dict').reset_index()
    print(3)
    train_test_id = train_test_id.merge(output_stats_df,
                                        on=['PERSONID'],
                                        how='left')
    # 4 转化成稀疏矩阵并保存
    v = DictVectorizer()
    # 计算统计向量
    stats_sparse_matrix = v.fit_transform(train_test_id['stats_dict'].values)
    print(4)
    stats_matrix_name = '{}_{}_vector_by_{}'.format(stats_name, kinds, size)
    sparse.save_npz(
        get_path() + 'Data/Feature/{}.npz'.format(stats_matrix_name),
        stats_sparse_matrix)
    return stats_matrix_name, 'gen_stats_vector_from_cat_vector("{}", "{}", "{}")'.format(
        stats_name, size, kinds)
Пример #24
0
def main():
    args = PARSER.parse_args()

    data_path = get_path(args, "record")
    model_save_path = get_path(args, "tf_gqn", create=True)

    ensure_validation_split(data_path)
    train_data = load_from_tfrecord(data_path,
                                    args.gqn_context_size,
                                    args.gqn_batch_size,
                                    mode='train')
    test_data = load_from_tfrecord(data_path,
                                   args.gqn_context_size,
                                   args.gqn_batch_size,
                                   mode='test')

    current_time = datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_dir = model_save_path / "tensorboard" / current_time

    # lr  = tf.optimizers.schedules.ExponentialDecay(mu_i, mu_n, mu_f / mu_i,   name="lr_schedule"   )
    lr = tf.optimizers.schedules.PolynomialDecay(mu_i,
                                                 mu_n,
                                                 mu_f,
                                                 name="lr_schedule")
    sigma = tf.optimizers.schedules.PolynomialDecay(sigma_i,
                                                    sigma_n,
                                                    sigma_f,
                                                    name="sigma_schedule")
    optimizer = tf.optimizers.Adam(learning_rate=lr)

    model = GenerativeQueryNetwork(args.gqn_x_dim,
                                   args.gqn_r_dim,
                                   args.gqn_h_dim,
                                   args.gqn_z_dim,
                                   args.gqn_l,
                                   name="gqn")
    model.compile(optimizer, sigma, const_sigma=sigma_f)
    model.fit(train_data,
              validation_data=test_data,
              validation_steps=5,
              steps_per_epoch=S_epoch,
              epochs=num_epochs,
              callbacks=[
                  tf.keras.callbacks.TensorBoard(log_dir=str(tensorboard_dir),
                                                 update_freq=20,
                                                 histogram_freq=1),
                  tf.keras.callbacks.ModelCheckpoint(
                      str(model_save_path / "ckpt-e{epoch:02d}"),
                      save_freq=checkpoint_every,
                      verbose=1),
                  LogImages(tensorboard_dir, test_data),
              ])
Пример #25
0
 def _launch_simulation_if_needed(self):
     """If the simulation is not already running, run it with the local godot executable
     """
     if not self.is_godot_launched:
         self.godot_path_str = get_path(self.godot_path_str, add_absolute=True)
         self.env_path_str = get_path(self.env_path_str) 
         print(f"environment path: {self.env_path_str}")
         print(f"godot path: {self.godot_path_str}")
         command = "{} --main-pack {}".format(self.godot_path_str, self.env_path_str)
         if not self.is_rendering:
             command = command + " --disable-render-loop --no-window"
         self.godot_process = subprocess.Popen(command, shell=True)
         self.is_godot_launched = True
Пример #26
0
def _var_subst_chunk_result_ext(q_res, _sel_var_and_vars, _, **kwds):
    var, _vars = _sel_var_and_vars
    chunk_res = Counter()
    res_rows_path = ['results', 'bindings']
    bindings = sparql_json_result_bindings_to_rdflib(
        get_path(q_res, res_rows_path, default=[])
    )

    for row in bindings:
        row_res = get_path(row, [var])
        count_res = int(get_path(row, [COUNT_VAR], '0'))
        chunk_res[row_res] += count_res
    return chunk_res
Пример #27
0
def _combined_chunk_res(q_res, _vars, _ret_val_mapping):
    chunk_res = {}
    res_rows_path = ['results', 'bindings']
    bindings = sparql_json_result_bindings_to_rdflib(
        get_path(q_res, res_rows_path, default=[])
    )
    for row in bindings:
        row_res = tuple([get_path(row, [v]) for v in _vars])
        stps = _ret_val_mapping[row_res]
        ask_res = int(get_path(row, [ASK_VAR], '0'))
        count_res = int(get_path(row, [COUNT_VAR], '0'))
        chunk_res.update({stp: (ask_res, count_res) for stp in stps})
    return chunk_res
Пример #28
0
def _combined_chunk_res(q_res, _vars, _ret_val_mapping):
    chunk_res = {}
    res_rows_path = ['results', 'bindings']
    bindings = sparql_json_result_bindings_to_rdflib(
        get_path(q_res, res_rows_path, default=[])
    )
    for row in bindings:
        row_res = tuple([get_path(row, [v]) for v in _vars])
        stps = _ret_val_mapping[row_res]
        ask_res = int(get_path(row, [ASK_VAR], '0'))
        count_res = int(get_path(row, [COUNT_VAR], '0'))
        chunk_res.update({stp: (ask_res, count_res) for stp in stps})
    return chunk_res
Пример #29
0
def _var_subst_chunk_result_ext(q_res, _sel_var_and_vars, _, **kwds):
    var, _vars = _sel_var_and_vars
    chunk_res = Counter()
    res_rows_path = ['results', 'bindings']
    bindings = sparql_json_result_bindings_to_rdflib(
        get_path(q_res, res_rows_path, default=[])
    )

    for row in bindings:
        row_res = get_path(row, [var])
        count_res = int(get_path(row, [COUNT_VAR], '0'))
        chunk_res[row_res] += count_res
    return chunk_res
Пример #30
0
    def __init__(self,
                 data_fold=None,
                 full_dataset=False,
                 out_path=None,
                 in_path=None,
                 output_patches=False,
                 scale=1.5,
                 minSize=(200, 200),
                 windowSize=(40, 40),
                 stepSize=15):
        self.scale = scale
        self.minSize = minSize
        self.windowSize = windowSize
        self.output_patches = output_patches

        self.stepSize = stepSize if self.output_patches == False else 30
        self.total_window_num = 0
        if data_fold is None:
            self.data_fold = utils.TRAINING if self.output_patches or full_dataset else utils.VALIDATION
        else:
            self.data_fold = data_fold

        self.in_path = in_path if in_path is not None else utils.get_path(
            in_or_out=utils.IN,
            data_fold=self.data_fold,
            full_dataset=full_dataset)
        self.img_names = [
            img_name for img_name in os.listdir(self.in_path)
            if img_name.endswith('.jpg')
        ]
        self.img_names = self.img_names[:20] if DEBUG else self.img_names

        self.detections = Detections()
        folder_name = 'scale{}_minSize{}-{}_windowSize{}-{}_stepSize{}_dividedSizes/'.format(
            self.scale, self.minSize[0], self.minSize[1], self.windowSize[0],
            self.windowSize[1], self.stepSize)

        self.out_path = out_path if out_path is not None else '{}'.format(
            utils.get_path(full_dataset=True,
                           in_or_out=utils.OUT,
                           slide=True,
                           data_fold=self.data_fold,
                           out_folder_name=folder_name))

        self.evaluation = Evaluation(full_dataset=full_dataset,
                                     method='slide',
                                     folder_name=folder_name,
                                     save_imgs=False,
                                     out_path=self.out_path,
                                     detections=self.detections,
                                     in_path=self.in_path)
 def __init__(self, alpha, gamma):
     self.alpha = alpha
     self.gamma = gamma
     self.dow = -1
     self.cur_discrete_time = 0
     self.grid_values = collections.defaultdict(float)
     # tile coding
     self.layer_values = collections.defaultdict(float)
     if sys.platform != 'darwin':
         self.hung = ctypes.cdll.LoadLibrary(get_path(
             __file__, "hungnp.so"))
     else:
         self.hung = ctypes.cdll.LoadLibrary(
             get_path(__file__, "hungnpmc.so"))
     self.hung.MaxProfMatching.restype = ctypes.c_double
Пример #32
0
async def fetch_all():
    """Fetch pdf files."""

    with get_path("answers").open() as answers_file:
        nsolved = sum(1 for line in answers_file)

    cookies = {
        'DYNSRV': 'lin-10-170-0-31',
        'PHPSESSID': 'a4fb01c0de27e200683b4d556461b5aa',
        'keep_alive': '1119831347%23333574%233PpV0T6RtnqnCB6GNF4PvEH1TiEX1nlc'
    }

    headers = {
        'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:58.0) Gecko/20100101 Firefox/58.0',
    }

    async with aiohttp.ClientSession(cookies=cookies, headers=headers) as session:
        coros = [fetch_one(session, num) for num in range(1, nsolved + 1)]

        results = await asyncio.gather(*coros)
        files = 0
        for num in results:
            if num is not None:
                print(f'Saved a file for problem {num}')
                files += 1

        return files
Пример #33
0
	def add(self, cate):
		url = cate['url']

		domain = get_domain(url)
		subdomains = get_subdomains(url)
		paths = get_path(url).split('/')
		query = urlparse.urlparse(url).query

		if domain not in self.root:
			self.root[domain] = {'sub':{}, 'path':{}}

		node = self.root[domain]
		if len(subdomains) > 1 or len(subdomains) == 1 and subdomains[0] != 'www':
			for sub in subdomains:
				if sub not in node['sub']:
					node['sub'][sub] = {'sub':{}, 'path':{}}
				node = node['sub'][sub]

		for path in paths:
			if path not in node['path']:
				node['path'][path] = {'path':{}}
			node = node['path'][path]

		if query:
			node['path']['query___' + query] = {'path':{}}
			node = node['path']['query___' + query]

		node['cate'] = cate
Пример #34
0
    def __init__(self, handle):
        ''' Initialize the toolbars and the work surface '''
        super(BBoardActivity, self).__init__(handle)

        self.datapath = get_path(activity, 'instance')

        self._hw = get_hardware()

        self._playback_buttons = {}
        self._audio_recordings = {}
        self.colors = profile.get_color().to_string().split(',')

        self._setup_toolbars()
        self._setup_canvas()

        self.slides = []
        self._setup_workspace()

        self._buddies = [profile.get_nick_name()]
        self._setup_presence_service()

        self._thumbs = []
        self._thumbnail_mode = False

        self._recording = False
        self._grecord = None
        self._alert = None

        self._dirty = False
Пример #35
0
def plot_loss():
    path = utils.get_path(neural=True, in_or_out=utils.OUT, data_fold=utils.TRAINING)
    path_slide = path+'slide/'
    path_viola = path+'viola/'
    for path in [path_slide, path_viola]:
        for file in os.listdir(path):
            if file.endswith('_history'):
                training_loss = list()
                validation_loss = list()
                with open(path+file, 'rb') as csv_file:
                    csv_reader = csv.reader(csv_file, delimiter='\t')
                    for i, row in enumerate(csv_reader):
                        if i==0:
                            continue
                        training_loss.append(float(row[1]))
                        validation_loss.append(float(row[2]))

                plt.plot(training_loss, linewidth=3, label='train loss')
                plt.plot(validation_loss, linewidth=3, label='valid loss')

                #plt.title('History of {0}'.format(file[:-(len('_history'))]))
                plt.legend(loc='best')
                plt.grid()
                plt.xlabel("epoch")
                plt.ylabel("loss")

                plot_name = path+file+'.jpg' 
                plt.savefig(plot_name)
                plt.close()
Пример #36
0
    def __init__(self, config, test=False, target=None):
        self._cwd = os.getcwd()
        self._root = get_path()
        self._config = config
        self._test = test
        self._target = target

        try:
            self._skeleton_path = resource_filename(__name__, os.path.join('skeleton', 'default'))
        except NotImplementedError:
            self._skeleton_path = os.path.join(sys.prefix, 'skeleton', 'default')

        try:
            self._assetPath = os.path.join(resource_filename(__name__, os.path.join('assets', 'manage.py')))
        except NotImplementedError:
            self._assetPath = os.path.join(sys.prefix, 'assets', 'manage.py')

        self._projectName = self._config['name']
        if 'deployment_path' in self._config:
            self._deployment_path = self._config['deployment_path']
        else:
            self._deployment_path = ''
        if 'zip_path' in self._config:
            self._zip_path = self._config['zip_path']
        else:
            self._zip_path = ''
        if 'doc_path' in self._config:
            self._doc_path = self._config['doc_path']
        else:
            self._doc_path = ''
def save_training_TP_FP_using_voc(evaluation, img_names, in_path, out_folder_name=None, neg_thresh=0.3):
    '''use the voc scores to decide if a patch should be saved as a TP or FP or not
    '''
    assert out_folder_name is not None
    general_path = utils.get_path(neural=True, data_fold=utils.TRAINING, in_or_out=utils.IN, out_folder_name=out_folder_name)
    path_true = general_path+'truepos_from_selective_search/'
    utils.mkdir(path_true)

    path_false = general_path+'falsepos_from_selective_search/'
    utils.mkdir(path_false)

    for img_name in img_names:
        good_detections = defaultdict(list)
        bad_detections = defaultdict(list)
        try:
            img = cv2.imread(in_path+img_name, flags=cv2.IMREAD_COLOR)
        except:
            print 'Cannot open image'
            sys.exit(-1)

        for roof_type in utils.ROOF_TYPES:
            detection_scores = evaluation.detections.best_score_per_detection[img_name][roof_type]
            for detection, score in detection_scores:
                if score > 0.5:
                    #true positive
                    good_detections[roof_type].append(detection)
                if score < neg_thresh:
                    #false positive
                    bad_detections[roof_type].append(detection)
                
        for roof_type in utils.ROOF_TYPES:
            extraction_type = 'good'
            save_training_FP_and_TP_helper(img_name, evaluation, good_detections[roof_type], path_true, general_path, img, roof_type, extraction_type, (0,255,0))               
            extraction_type = 'background'
            save_training_FP_and_TP_helper(img_name, evaluation, bad_detections[roof_type], path_false, general_path, img, roof_type, extraction_type, (0,0,255))               
Пример #38
0
async def fetch_all():
    """Fetch pdf files."""

    with get_path("answers").open() as answers_file:
        nsolved = sum(1 for line in answers_file)

    cookies = {
        'DYNSRV': 'lin-10-170-0-31',
        'PHPSESSID': 'a4fb01c0de27e200683b4d556461b5aa',
        'keep_alive': '1119831347%23333574%233PpV0T6RtnqnCB6GNF4PvEH1TiEX1nlc'
    }

    headers = {
        'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:58.0) Gecko/20100101 Firefox/58.0',
    }

    async with aiohttp.ClientSession(cookies=cookies, headers=headers) as session:
        coros = [fetch_one(session, num) for num in range(1, nsolved + 1)]

        results = await asyncio.gather(*coros)
        files = 0
        for num in results:
            if num is not None:
                print(f'Saved a file for problem {num}')
                files += 1

        return files
Пример #39
0
def test():
    paths = glob(join(c.TEST_DIR, '*.png'))
    imgs = run(paths)

    for i, path in enumerate(paths):
        save_path = utils.get_path(join(c.SAVE_DIR, 'test/' + basename(path)))
        utils.save_output([imgs[i]], save_path)
Пример #40
0
 def save_results(self, results):
     with open(
             utils.get_path(
                 "data",
                 self.player1.__class__.__name__ + "LosingResults.pkl"),
             "wb") as f:
         pickle.dump(results, f)
 def scrap_a_day_as_corpus(self):
     urls = self._get_urls_from_breaking_news()
     n_successes = 0
     
     docs = []
     indexs = []
     oid_aids = []
     
     for i, url in enumerate(urls):
         try:
             json_dict = scrap(url)
             content = json_dict.get('content', '')
             if not content:
                 continue
             index = '{}\t{}\t{}\t{}'.format(
                 get_path(json_dict['oid'], self.year, self.month, self.date, json_dict['aid']),
                 json_dict.get('sid1',''),
                 json_dict.get('writtenTime', ''),
                 json_dict.get('title', '')
             )
             docs.append(content.replace('\n', '  ').replace('\r\n', '  ').strip())
             indexs.append(index)
             oid_aids.append((json_dict['oid'], json_dict['aid']))
             n_successes += 1
         except Exception as e:
             print('Exception: {}\n{}'.format(url, str(e)))
             continue
         finally:
             if i % 1000 == 999:
                 print('\r  - {}scraping {} in {} ({} success) ...'.format(self._name + (': ' if self._name else ''), i+1, len(urls), n_successes), flush=True, end='')
     print('\rScrapped news')
     return docs, indexs, oid_aids
Пример #42
0
    def __init__(self, parameters=None, models_path=None, model_path=None):
        """
        Initialize the model. We either provide the parameters and a path where
        we store the models, or the location of a trained model.
        """
        if model_path is None:
            assert parameters and models_path
            # Create a name based on the parameters
            self.parameters = parameters
            self.name = get_name(parameters)

            # Model location
            model_path = os.path.join(models_path, get_path(parameters))

            self.model_path = model_path
            self.parameters_path = os.path.join(model_path, 'parameters.pkl')
            self.mappings_path = os.path.join(model_path, 'mappings.pkl')
            # Create directory for the model if it does not exist
            if not os.path.exists(self.model_path):
                os.makedirs(self.model_path)
            # Save the parameters to disk
            with open(self.parameters_path, 'wb') as f:
                self.parameters = cPickle.dump(parameters, f)
        else:
            assert parameters is None and models_path is None
            # Model location
            self.model_path = model_path
            self.parameters_path = os.path.join(model_path, 'parameters.pkl')
            self.mappings_path = os.path.join(model_path, 'mappings.pkl')
            # Load the parameters and the mappings from disk
            with open(self.parameters_path, 'rb') as f:
                self.parameters = cPickle.load(f)

            self.reload_mappings()
        self.components = {}
Пример #43
0
def main():
    size = 40
    total = spanning = 0

    graph = Graph(size)

    with get_path("data", "network.txt").open() as data_file:
        for u, line in enumerate(data_file):
            for v, w in enumerate(line.rstrip().split(",")):
                if v > u and w != "-":
                    w = int(w)
                    total += w
                    graph.insert_edge(u, v, w)

    # Implement the Prim–Jarník algorithm
    D = [float("inf")] * size
    root = 0  # can be any vertex of the graph

    heap = [(0, root)]
    seen = [False] * size

    while heap:
        w, u = heappop(heap)
        if not seen[u]:
            spanning += w
            seen[u] = True

            for e in graph.incident_edges(u):
                v = e.opposite(u)
                if e.weight < D[v]:
                    D[v] = e.weight
                    heappush(heap, (e.weight, v))

    return total - spanning
def main():
    N = 40
    total = spanning = 0
    graph = Graph(N)

    with get_path("data", "network.txt").open() as data_file:
        for u, line in enumerate(data_file):
            for v, w in enumerate(line.rstrip().split(",")):
                if v > u and w != "-":
                    w = int(w)
                    total += w
                    graph.insert_edge(u, v, w)

    # Implement the Prim–Jarník algorithm
    D = [float("inf")] * N
    root = 0  # can be any vertex of the graph

    heap = [(0, root)]
    seen = [False] * N

    while heap:
        w, u = heappop(heap)
        if not seen[u]:
            spanning += w
            seen[u] = True

            for e in graph.incident_edges(u):
                v = e.opposite(u)
                if e.weight < D[v]:
                    D[v] = e.weight
                    heappush(heap, (e.weight, v))

    return total - spanning
Пример #45
0
    def __init__(self, config):
        self._cwd = os.getcwd()
        self._root = get_path()
        self._config = config
        self._verify_ssl = True

        if 'urls' not in self._config:
            raise MissingKeyError('Could not find url settings in either global or local configuration file.')

        if 'upload' not in self._config['urls']:
            raise MissingKeyError('Could not find an upload url in either the global or local configuration file.')
        else:
            self._upload_url = self._config['upload_url']

        if 'login_url' not in self._config['urls']:
            self._login_url = self._upload_url
        else:
            self._login_url = self._config['login_url']

        if 'username' not in self._config['urls']:
            self._username = raw_input('Please provide the username for your upload server (or leave blank if none is required): ')
        else:
            self._username = self._config['urls']['username'].encode()

        if 'password' not in self._config['urls']:
            self._password = getpass.getpass('Please provide the password for your upload server (or leave blank if none is required): ')
        else:
            self._password = self._config['urls']['password'].encode()

        self._zip_name = self._config['name'] + '_v' + self._config['version'] + '.zip'
        self._zip_path = os.path.join(self._cwd, 'build', self._zip_name)
Пример #46
0
def main():
    chars_saved = 0

    # From the problem definition:
    # You can assume that all the Roman numerals in the file contain no more
    # than four consecutive identical units.

    with get_path("data", "roman.txt").open() as data_file:
        for line in data_file:
            if "VIIII" in line:
                chars_saved += 3  # VIIII => IX
            elif "IIII" in line:
                chars_saved += 2  # IIII => IV

            if "LXXXX" in line:
                chars_saved += 3  # LXXXX => XC
            elif "XXXX" in line:
                chars_saved += 2  # XXXX => XL

            if "DCCCC" in line:
                chars_saved += 3  # DCCCC => CM
            elif "CCCC" in line:
                chars_saved += 2  # CCCC => CD

    return chars_saved
Пример #47
0
def _load_statistics():
    global _statistics
    filename = get_path('datasets' , 'statistics.json')
    _statistics = load_data(filename, verbose=False)
    if not _statistics or not isinstance(_statistics, dict) : 
        _statistics = {}
    print ('')
Пример #48
0
def main():
    args = parse_args()

    if args.path:
        paths = []
        for path in args.path:
            path_obj = get_path("python", path)
            if not path_obj.exists():
                print(f"File not found: {path}")
                return 1
            paths.append(path_obj)
    else:
        paths = sorted(get_path("python").glob("problem???.py"))

    loop = asyncio.get_event_loop()
    loop.run_until_complete(add_docstrings(paths))
    return 0
Пример #49
0
 def __init__(self):
     import os
     from utils import get_path
     self.categories_by_id = None
     self.categories_by_name = None
     self._filename = get_path('fs-categories.json')
     self._api_venues = Foursquare(client_id=_fsc.CLIENT_ID, client_secret=_fsc.CLIENT_SECRET)
     self.load_categories()
Пример #50
0
def main():
    paths = defaultdict(list)
    #for fold in [utils.TRAINING, utils.TESTING, utils.VALIDATION]:
    #for full_dataset in [True, False]:
    for fold in [utils.VALIDATION]:
        viola_path = utils.get_path(in_or_out=utils.OUT, viola=True, data_fold=fold) 
        pipe_path = utils.get_path(in_or_out=utils.OUT, pipe=True, data_fold=fold)

        #for path in [viola_path, pipe_path]:
        for path in [viola_path]:
            for folder in os.listdir(path):
                if os.path.isfile(path+folder+'/report.txt'):
                    paths[fold].append((path, folder))
        #print viola_paths
        process_viola_reports(paths[fold], fold=fold)

    '''
Пример #51
0
def predict_query(sparql, timeout, graph_pattern, source,
                  limit=config.PREDICTION_RESULT_LIMIT):
    """Performs a single query starting at ?SOURCE returning all ?TARGETs."""
    assert isinstance(graph_pattern, GraphPattern)
    assert isinstance(source, Identifier)

    vars_in_graph = graph_pattern.vars_in_graph
    if TARGET_VAR not in vars_in_graph:
        logger.warning(
            'graph pattern without %s used for prediction:\n%r',
            TARGET_VAR.n3(), graph_pattern
        )
        return timeout, []

    q = graph_pattern.to_sparql_select_query(
        projection=[TARGET_VAR],
        distinct=True,
        bind={SOURCE_VAR: source},
        limit=limit,
    )
    try:
        t, q_res = _query(sparql, timeout, q)
    except (SPARQLWrapperException, SAXParseException, URLError):
        logger.warning(
            'Exception occurred during prediction, assuming empty result...\n'
            'Query:\n%s\nException:', q,
            exc_info=1,  # appends exception to message
        )
        t, q_res = timeout, {}
    else:
        if query_time_soft_exceeded(t, timeout):
            kind = 'hard' if query_time_hard_exceeded(t, timeout) else 'soft'
            logger.info(
                'prediction query exceeded %s timeout %s:\n%s',
                kind, t, q
            )

    res = []
    res_rows_path = ['results', 'bindings']
    bindings = sparql_json_result_bindings_to_rdflib(
        get_path(q_res, res_rows_path, default=[])
    )
    for row in bindings:
        res.append(get_path(row, [TARGET_VAR]))
    return timeout, set(res)
Пример #52
0
 def save_loss(self):
     '''Save the plot of the training and validation loss
     '''
     train_loss = [row['train_loss'] for row in self.train_history_]
     valid_loss = [row['valid_loss'] for row in self.train_history_]
     plt.plot(train_loss, label='train loss')
     plt.plot(valid_loss, label='valid loss')
     plt.legend(loc='best')
     path = utils.get_path(neural=True, in_or_out=utils.OUT, data_fold=utils.TRAIN) 
     plt.savefig(path+self.net_name+'_loss.png')
Пример #53
0
    def setup_augmented_patches():
        '''
        No division between different roof sizes: if a roof has a size that is off, we resize it
        Make them lie down, save patches to folder
        Augment patches, save them to augmented folder
        '''
        in_path = utils.get_path(in_or_out=utils.IN, data_fold=utils.TRAINING)
        out_path = utils.get_path(viola=True, in_or_out=utils.IN, data_fold=utils.TRAINING)

        img_names_list = [img_name for img_name in os.listdir(in_path) if img_name.endswith('.jpg')]

        for roof_type in ['metal', 'thatch']:
            for img_id, img_name in enumerate(img_names_list):

                print 'Processing image: {0}'.format(img_name)
                img_path = in_path+img_name

                polygon_list = DataLoader.get_polygons(roof_type=roof_type, xml_name=img_name[:-3]+'xml', xml_path=in_path, padding=)
                roof_patches = DataLoader.extract_patches(polygon_list, img_path=img_path, grayscale=True)

                for roof_id, roof_img in enumerate(roof_patches):
                    print 'Processing image {0}: roof {1}'.format(img_id, roof_id)
                       
                    #if it's vertical, make it lie down
                    if roof_img.shape[0] > roof_img.shape[1]:
                        roof_img = DataAugmentation.rotateImage(roof_img, clockwise=True)
                    
                    #write basic positive example to the right folder
                    general_path = '{0}{1}_{2}_{3}'.format(out_path, roof_type, img_name[:-4], roof_id)

                    #calculate and write the augmented images 
                    for i in range(4):
                        roof_img_cp = np.copy(roof_img)

                        if i == 1:
                            roof_img_cp = cv2.flip(roof_img_cp,flipCode=0)
                        elif i == 2:
                            roof_img_cp = cv2.flip(roof_img_cp,flipCode=1)
                        elif i==3:
                            roof_img_cp = cv2.flip(roof_img_cp,flipCode=-1)

                        write_to_path = '{0}_flip{1}.jpg'.format(general_path, i)
                        cv2.imwrite(write_to_path, roof_img_cp)
Пример #54
0
def main():
    names = get_path('data', 'names.txt').read_text().split('","')
    names[0] = names[0][1:]
    names[-1] = names[-1][:-1]
    names.sort()

    values = {c: i for i, c in enumerate(string.ascii_uppercase, 1)}

    return sum([sum([values[c] for c in name]) * pos
                for pos, name in enumerate(names, 1)])
def get_detectors(combo_f):
    detectors = dict()
    if combo_f.startswith('combo'):
        detector_file = utils.get_path(viola=True, params=True)+str(combo_f)+'.csv'
    else:
        detector_file = utils.get_path(viola=True, params=True)+'combo'+str(combo_f)+'.csv'

    detectors = defaultdict(list)
    with open(detector_file, 'r') as csvfile:
        r = csv.reader(csvfile, delimiter=',')
        for line in r:
            if len(line) < 2:
                continue
            if line[0] == 'metal':
                detectors['metal'].append(line[1].strip())
            elif line[0] == 'thatch':
                detectors['thatch'].append(line[1].strip())
            else:
                raise ValueError("Unknown detector type {0}".format(line[0]))
    return detectors
Пример #56
0
def main():
    words = get_path('data', 'words.txt').read_text().split('","')
    words[0] = words[0][1:]
    words[-1] = words[-1][:-1]

    char_map = {c: i for i, c in enumerate(string.ascii_uppercase, 1)}
    values = [sum([char_map[char] for char in word]) for word in words]

    triangle_numbers = set([n*(n+1)//2 for n in
                           range(1, (-1 + int(sqrt(1 + 8*max(values))))//2 + 1)])
    return len([v for v in values if v in triangle_numbers])
Пример #57
0
def vote():
    if request.method == "POST" :
        
        vote = request.form["vote"]
        
        url = "http://gdata.youtube.com/feeds/api/videos?q=%s&max-results=1&v=2&alt=jsonc" % urllib.quote_plus(vote)
        result = simplejson.load(urllib.urlopen(url))
        video_id = result['data']['items'][0]['id']
        
        utils.append(utils.get_path(RADIO_ROOT, 'to_process_votes'), video_id)
        
    return render_template("vote.html")
Пример #58
0
 def get_all_books(self, source="ZhangBook", limit=1, **kwargs):
     spider = Cluster.get_spider()
     params = {
         "host": spider['host'],
         "port": spider['port'],
         "source": source,
         "path": "get_all_books"
     }
     path = get_path(**params)
     data = kwargs
     data["limit"] = limit
     return requests(path, data=json.dumps(data))
def copy_images(data_fold):
    if data_fold == utils.TRAINING:
        prefix = 'training_'
    elif data_fold == utils.VALIDATION:
        prefix = 'validation_'

    in_path = utils.get_path(in_or_out = utils.IN, data_fold=data_fold)
    pdb.set_trace()
    for img_name in os.listdir(in_path):
        if img_name.endswith('jpg') or img_name.endswith('xml'):
            #move the image over and save it with a prefix       
            subprocess.check_call('cp {} {}'.format(in_path+img_name, prefix+img_name), shell=True)