Exemplos de readlines em Python, exemplos de utils.readlines em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: generate_sentence_classifier_input.py Projeto: MorkYoung/evidence_extraction

def generate_ev_binary(group='dev', output_name='ev_binary'):
    outdir = '{}/sent_classifier/{}/'.format(utils.DATA_DIR, output_name)
    try:
        os.system('mkdir -p {}'.format(outdir))
    except OSError:
        print('Target dir: {} already exists'.format(outdir))
        input('Proceeding with generation...')

    pmids = utils.group_ids('ev_inf', group)
    with open('{}/{}.tsv'.format(outdir, group), 'w') as fout:
        for pmid in pmids:
            sents = utils.readlines('{}/documents/sents/{}.sents'.format(
                utils.DATA_DIR, pmid))
            frames = utils.readlines('{}/documents/sents/{}.frame_idx'.format(
                utils.DATA_DIR, pmid))

            pos_sents = [s for s, fs in zip(sents, frames) if len(fs) >= 1]
            neg_sents = [s for s, fs in zip(sents, frames) if len(fs) == 0]

            print(pmid)
            print(pos_sents)
            input()
            shuffle(neg_sents)
            neg_sents = neg_sents[:len(pos_sents)]

            for s in pos_sents:
                fout.write('{}\t{}\n'.format(1, s))
            for s in neg_sents:
                fout.write('{}\t{}\n'.format(0, s))

Exemplo n.º 2

0

Exibir arquivo

Arquivo: process_ebm_nlp.py Projeto: MorkYoung/evidence_extraction

def read_docs(phase='starting_spans'):
    pmid_groups = {}
    for g in GROUPS:
        pmids = utils.readlines(
            os.path.join(config.EBM_NLP_DIR, 'pmids_{}.txt'.format(g)))
        for pmid in pmids:
            pmid_groups[pmid] = g

    def get_e_fname(pmid, e):
        if pmid_groups[pmid] == 'test':
            subdir = os.path.join('test', 'gold')
        else:
            subdir = 'train'
        f = '{}.AGGREGATED.ann'.format(pmid)
        return os.path.join(config.EBM_NLP_DIR, 'annotations', 'aggregated',
                            phase, e, subdir, f)

    docs = []
    for pmid, group in pmid_groups.items():
        tokens = utils.readlines(
            os.path.join(config.EBM_NLP_DIR, 'documents',
                         '{}.tokens'.format(pmid)))
        text, token_offsets = utils.join_tokens(tokens)
        doc = classes.Doc(pmid, text)
        doc.group = group
        for e in ['participants', 'interventions', 'outcomes']:
            label_name = 'GOLD_{}'.format(e[0])
            labels = [int(l) for l in utils.readlines(get_e_fname(pmid, e))]
            for token_i, token_f, l in utils.condense_labels(labels):
                char_i = token_offsets[token_i][0]
                char_f = token_offsets[token_f - 1][1]
                doc.labels[label_name].append(
                    classes.Span(char_i, char_f, text[char_i:char_f]))
        docs.append(doc)
    return docs

Exemplo n.º 3

0

Exibir arquivo

def generate_frames(output_name, group='dev'):
    icodir = '{}/ico_acceptor/{}/'.format(utils.DATA_DIR, output_name)

    data = {}

    for sample in ['x', 'c', 'o']:
        input_lines = utils.readlines('{}/{}_sample_{}.txt'.format(
            icodir, group, sample))
        input_lines = [l.split('\t') for l in input_lines]
        output_lines = utils.readlines('{}/{}_sample_{}_results.txt'.format(
            icodir, group, sample))
        output_lines = [l.split('\t') for l in output_lines]

        #sample_c.write(utils.joinstr([pmid, sent_idx, frame_idx, i, c, o, s])) don't show Jay
        assert len(input_lines) == len(output_lines)

        all_lines = [i_l + o_l for i_l, o_l in zip(input_lines, output_lines)]

        for pmid, pmid_lines in groupby(all_lines, itemgetter(0)):
            if pmid not in data:
                data[pmid] = {}
            for frame_idx, frame_lines in groupby(pmid_lines, itemgetter(2)):
                pmids, sent_idxs, frame_idxs, i_s, c_s, o_s, s_s, p0s, p1s = zip(
                    *frame_lines)
                assert len(set(pmids)) == 1
                assert len(set(sent_idxs)) == 1
                assert len(set(frame_idxs)) == 1
                assert len(set(s_s)) == 1

                if len(nltk.tokenize.word_tokenize(s_s[0])) < 10:
                    continue

                sent_idx = sent_idxs[0]
                frame_idx = frame_idxs[0]
                ev_span = s_s[0]

                top_frames = sorted(zip(p1s, i_s, c_s, o_s),
                                    key=itemgetter(0),
                                    reverse=True)

                assert frame_idx not in data[pmid]
                frame = {
                    'sent_idx': sent_idx,
                    'frame_idx': frame_idx,
                    'ev': ev_span,
                    'icos': top_frames[:5],
                    'sample': sample,
                }
                data[pmid][frame_idx] = frame

    for pmid, frames in data.items():
        with open(
                '{}/documents/frames/{}.bert_frames'.format(
                    utils.DATA_DIR, pmid), 'w') as fout:
            for frame in frames.values():
                fout.write(json.dumps(frame) + '\n')

Exemplo n.º 4

0

Exibir arquivo

Arquivo: export_gt_depth_odom.py Projeto: starbike/DNet

def export_gt_depths_kitti():
    parser = argparse.ArgumentParser(description='export_gt_depth')

    parser.add_argument('--data_path',
                        type=str,
                        help='path to the root of the KITTI data',
                        required=True)
    parser.add_argument('--split',
                        type=str,
                        help='which split to export gt from',
                        required=True,
                        choices=["eigen", "eigen_benchmark","odom", "AirSim"])
    parser.add_argument('--sequence',
                        type=int,
                        help='which odom sequnce to export gt from',
                        required=False,
                        default=0)
    opt = parser.parse_args()

    split_folder = os.path.join(os.path.dirname(__file__), "splits", opt.split)
    if opt.split == "odom":
        lines = readlines(os.path.join(split_folder,"{:02d}_exp.txt".format(opt.sequence)))
    else:
        lines = readlines(os.path.join(split_folder, "test_files.txt"))

    print("Exporting ground truth depths for {}".format(opt.split))

    gt_depths = []
    for line in lines:
        folder, frame_id, _ = line.split()
        frame_id = int(frame_id)

        if (opt.split == "eigen")|(opt.split == "odom"):
            calib_dir = os.path.join(opt.data_path, folder.split("/")[0])
            velo_filename = os.path.join(
                opt.data_path, folder,
                "velodyne_points/data", "{:010d}.bin".format(frame_id))
            gt_depth = generate_depth_map(calib_dir, velo_filename, 2, True)
        elif opt.split == "eigen_benchmark":
            gt_depth_path = os.path.join(
                opt.data_path, folder, "proj_depth",
                "groundtruth", "image_02", "{:010d}.png".format(frame_id))
            gt_depth = np.array(pil.open(gt_depth_path)).astype(np.float32) / 256
        elif opt.split == "AirSim"
            gt_depth_path = os.path.join(
                opt.data_path, folder, "{}.pfm".format(frame_id))
            gt_depth,_ = read_pfm(gt_depth_path)

        gt_depths.append(gt_depth.astype(np.float32))

    output_path = os.path.join(split_folder, "gt_depths.npz")

    print("Saving to {}".format(opt.split))

    np.savez_compressed(output_path, data=np.array(gt_depths))

Exemplo n.º 5

0

Exibir arquivo

def create_sfnorm_pair_with_pole(opts):
    from datasets_sfgan import SFGAN_Base_Dataset
    from torch.utils.data import DataLoader
    from utils import readlines
    import torch
    from utils import tensor2disp

    fpath = os.path.join(os.path.dirname(__file__), "..", "splits", opts.split,
                         "{}_files.txt")
    train_filenames = readlines(fpath.format("train"))
    val_filenames = readlines(fpath.format("val"))
    syn_train_filenames = readlines(fpath.format("syn_train"))
    syn_val_filenames = readlines(fpath.format("syn_val"))

    train_dataset = SFGAN_Base_Dataset(opts.data_path,
                                       train_filenames,
                                       syn_train_filenames,
                                       opts.height,
                                       opts.width,
                                       opts.frame_ids,
                                       4,
                                       opts=opts,
                                       is_train=False,
                                       load_seman=True)
    train_loader = DataLoader(train_dataset,
                              1,
                              shuffle=not opts.noShuffle,
                              num_workers=opts.num_workers,
                              pin_memory=True,
                              drop_last=False)

    min_num = 100
    poleId = 5
    pole_ind_rec = list()
    for batch_idx, inputs in enumerate(train_loader):
        num_syn = torch.sum(inputs['syn_semanLabel'] == poleId)
        num_real = torch.sum(inputs['real_semanLabel'] == poleId)

        if num_syn > min_num and num_real > min_num:
            pole_ind_rec.append(batch_idx)

        print(batch_idx)

    split_root = '../splits/sfnorm_pole'

    wf1 = open(os.path.join(split_root, 'train_files.txt'), "w")
    for pole_ind in pole_ind_rec:
        wf1.write(train_filenames[pole_ind] + '\n')
    wf1.close()

    wf1 = open(os.path.join(split_root, 'syn_train_files.txt'), "w")
    for pole_ind in pole_ind_rec:
        wf1.write(syn_train_filenames[pole_ind] + '\n')
    wf1.close()

Exemplo n.º 6

0

Exibir arquivo

def ibo_tagging(corpus, keywords, output=None, thread=None):
    """
    Arguments:
        corpus(str): Path to the corpus file.
        keywords(str): Path to where keywords dictionaries is.
        thread(int): Number of thread to process.
        output(str): Path to the output file.
    """
    # output name
    if output is None:
        output = corpus[:-4] + "_ibo.tsv"

    # Load and merge dictionary
    # files = [itr for itr in os.listdir(rule) if itr.endswith("_leaf.json")]

    # Load entities
    # entity = dict()
    # for itr in files:
    #     entity.update(json.load(open(rule + itr, "r")))
    entity = json.load(open(keywords, "r"))

    # Read corpus
    raw_data = readlines(corpus)

    # Threading
    param = (entity,)
    result = generic_threading(thread, raw_data, generate_ibo, param)

    # Write result to file
    file_io(output, result)

Exemplo n.º 7

0

Exibir arquivo

Arquivo: day4.py Projeto: tingvarsson/adventofcode

def main():
    # We preprocess the list with a simple cli call to sort
    lines = utils.readlines("day4/sortedinput")

    guards = {}
    for line in lines:
        guardIdMatch = re.search(r"#(\d+)", line)
        if guardIdMatch is not None:
            guardId = int(guardIdMatch.group(1))
            if guardId in guards:
                currentSleepRecord = guards[guardId]
            else:
                currentSleepRecord = [0] * 60
                guards[guardId] = currentSleepRecord

        asleepMatch = re.search(r":(\d+)] falls asleep", line)
        if asleepMatch is not None:
            asleepMinute = int(asleepMatch.group(1))

        awakeMatch = re.search(r":(\d+)] wakes up", line)
        if awakeMatch is not None:
            awakeMinute = int(awakeMatch.group(1))
            for t in range(asleepMinute, awakeMinute):
                currentSleepRecord[t] += 1

    scenarioOneGuard = max(guards.items(), key=lambda g: sum(g[1]))
    print(scenarioOneGuard[0] * scenarioOneGuard[1].index(max(scenarioOneGuard[1])))

    scenarioTwoGuard = max(guards.items(), key=lambda g: max(g[1]))
    print(scenarioTwoGuard[0] * scenarioTwoGuard[1].index(max(scenarioTwoGuard[1])))

Exemplo n.º 8

0

Exibir arquivo

def preliminary_cleanup(corpus, rule, output=None, thread=None, limit=None):
    """
    Preliminary cleanup the corpus to make it easier for further
    processing methods. This method can be used to correct the
    missing spaces after punctuations any other customized rules
    can be added to the rule file, see punctuation_cleanup in utils
    for the formatting of the rules.

    Arguments:
        corpus(str): Path to the corpus file.
        rule(str): Path to the processing rule file.
        thread(int): Number of thread to process.
        output(str): Path to the output file.
    """
    # output name
    if output is None:
        output = corpus[:-4] + "_preprocessed.tsv"

    # Load rules
    rules = load_rules(rule)
    # Load data
    raw_data = readlines(corpus, limit=limit, skip=True)

    # Threading
    param = (rules, "PRELIMINARY")
    result = generic_threading(thread, raw_data, punctuation_cleanup, param)

    # Write result to file
    write_to_file(output, result)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: parse_dblp.py Projeto: houking-can/GenDataset

def remove_old(path):
    files = iter_files(path)
    for file in files:
        filename = os.path.basename(file)
        print(filename)
        f = open(os.path.join('./final', filename), 'a+')
        lines = readlines(file)
        i = 0
        cnt = 0
        for line in lines:
            article = json.loads(line)
            if 'year' in article:
                if int(article['year']) >= 2000:
                    if "author" in article:
                        article['author'] = list(set(article['author']))
                        tmp = json.dumps(article)
                        f.write(tmp + '\n')
                        f.flush()

                        i += 1
                        # if i % 100000 == 0:
                        #     print(i)
                    else:
                        cnt += 1

            else:
                cnt += 1
        print('%s  skip:%d, save %d' % (filename, cnt, i))

Exemplo n.º 10

0

Exibir arquivo

def refine_corpus(corpus, rule_path, output=None, thread=None):
    """
    Clean up the given corpus according to the rules defined in the files.
    This method utilizes multithreading to accelerate the process.

    Arguments:
        corpus(str): Path to the corpus file.
        rule_path(str): Path to where "parentheses.tsv" and 
            "refine_list.tsv" are.
        thread(int): Number of thread to process.
        output(str): Path to the output file.
    """
    if output is None:
        output = corpus[:-4] + "_cleaned.txt"
    if not rule_path.endswith("/"):
        rule_path += "/"

    # Load rule files
    file_p = rule_path + "parentheses.tsv"
    file_r = rule_path + "refine_list.tsv"
    parentheses = load_rules(file_p)
    refine_list = load_rules(file_r)

    # Acquire the corpus (skip first line)
    raw_data = readlines(corpus)

    # Threading
    param = (parentheses, refine_list)
    result = generic_threading(thread, raw_data, corpus_cleanup, param)

    # Write all result to file
    write_to_file(output, result)

Exemplo n.º 11

0

Exibir arquivo

Arquivo: export_gt_depth.py Projeto: wangqiqi577/Self-supervised-Monocular-Trained-Depth-Estimation-using-Self-attention-and-Discrete-Disparity-Volum

def export_gt_depths_kitti(opt):
    """
    Generate ground-truth data and store as .npz file
    """
    split_folder = os.path.join(os.path.dirname(__file__), "splits", opt.split)
    lines = readlines(os.path.join(split_folder, "test_files.txt"))

    print("Exporting ground truth depths for {}".format(opt.split))

    gt_depths = []
    for line in lines:

        folder, frame_id, _ = line.split()
        frame_id = int(frame_id)

        if opt.split == "eigen":
            calib_dir = os.path.join(opt.data_path, folder.split("/")[0])
            velo_filename = os.path.join(opt.data_path, folder,
                                         "velodyne_points/data",
                                         "{:010d}.bin".format(frame_id))
            gt_depth = generate_depth_map(calib_dir, velo_filename, 2, True)
        elif opt.split == "eigen_benchmark":
            gt_depth_path = os.path.join(opt.data_path, folder, "proj_depth",
                                         "groundtruth", "image_02",
                                         "{:010d}.png".format(frame_id))
            gt_depth = np.array(pil.open(gt_depth_path)).astype(
                np.float32) / 256

        gt_depths.append(gt_depth.astype(np.float32))

    output_path = os.path.join(split_folder, "gt_depths.npz")

    print("Saving to {}".format(opt.split))

    np.savez_compressed(output_path, data=np.array(gt_depths))

Exemplo n.º 12

0

Exibir arquivo

 def read_filenames(self):
     # e.g. splits\\eigen_zhou\\train.txt
     split_path = os.path.join(self.dataset.split_folder,
                               self.dataset.split_name).replace('\\', '/')
     self.filenames = readlines(split_path)
     self.num_items = len(self.filenames)
     self.steps_per_epoch = self.num_items // self.batch_size

Exemplo n.º 13

0

Exibir arquivo

Arquivo: parse_entity.py Projeto: ss87021456/Large-Scale-Subword-Entity-Typing

def parse_subwords(file):
    """
    Parse subword mapping to dictionary.

    Args:
        file(str): Path to the subword mapping file.
                   format: <[WORD]>S1,S2,...
    """
    # Load subwords
    raw_data = readlines(file, limit=None)

    # Parsing information
    mentions, subwords = list(), list()
    for itr in raw_data:
        index = itr.find(">")
        mentions.append(itr[1:index])
        subwords.append(itr[index + 2:].split(","))

    # Create dictionary for mentions and its subwords
    dictionary = dict(zip(mentions, subwords))

    write_to_file("data/subwords.json", dictionary)

    # Subword pool for subword embedding
    # subword_pool = np.unique(list(chain.from_iterable(subwords)))
    subword_pool = dict(Counter(list(chain.from_iterable(subwords))))
    # print(subword_pool)
    print("Raw number of subwords: {:8d}".format(len(subword_pool)))
    write_to_file("data/subword_pool.json", subword_pool)

Exemplo n.º 14

0

Exibir arquivo

def main():
    lines = utils.readlines("day2/input")

    sum2 = 0
    sum3 = 0
    for line in lines:
        letters = {}
        for c in line:
            if c in letters:
                letters[c] += 1
            else:
                letters[c] = 1

        found2 = False
        found3 = False
        for c in letters:
            if not found2 and letters[c] == 2:
                sum2 += 1
                found2 = True
            elif not found3 and letters[c] == 3:
                sum3 += 1
                found3 = True

    print("Checksum:", sum2 * sum3)

    for line in lines:
        for secondline in lines:
            if len(line) - 1 == len(strIntersection(line, secondline)):
                print("Common letters:", strIntersection(line, secondline))
                return

Exemplo n.º 15

0

Exibir arquivo

def extract_vocabularies(corpus, rule, output=None, thread=None):
    """
    Extract vocabularies from the corpus, additional rules to achieve
    purer vocabularies can be defined in src/refine_rules/voc_cleanup.tsv

    Arguments:
        corpus(str): Path to the corpus file.
        rule(str): Path to the processing rule file.
        thread(int): Number of thread to process.
        output(str): Path to the output file.
    """
    if output is None:
        output = corpus[:-4] + "_vocabulary_list.json"

    # Load rules
    rules = load_rules(rule)

    # Acquire the corpus
    raw_data = readlines(corpus, limit=None)

    # Threading (TO-BE-IMPLEMENTED)
    # param = (rules, "SPLIT_WORDS")
    # generic_threading(thread, raw_data, punctuation_cleanup, param)
    result = punctuation_cleanup(0, raw_data, rules, mode='SPLIT_WORDS')

    # Counting occurance
    print("Counting occurance...")
    voc_list = Counter(result)

    # Save vocabulary to file
    write_to_file(output, voc_list)

Exemplo n.º 16

0

Exibir arquivo

Arquivo: day16.py Projeto: tingvarsson/adventofcode

def parseData(filepath):
    samples = utils.readlines(filepath)
    output = []
    for sample in samples:
        m = re.search(r"(\d+) (\d+) (\d+) (\d+)", sample)
        e = executionData(*[int(i) for i in m.group(1, 2, 3, 4)])
        output.append(e)
    return output

Exemplo n.º 17

0

Exibir arquivo

def export_gt_depths_kitti():

    parser = argparse.ArgumentParser(description="export_gt_depth")

    parser.add_argument(
        "--data_path",
        type=str,
        help="path to the root of the KITTI data",
        required=True,
    )
    parser.add_argument(
        "--split",
        type=str,
        help="which split to export gt from",
        required=True,
        choices=["eigen", "eigen_benchmark"],
    )
    opt = parser.parse_args()

    split_folder = os.path.join(os.path.dirname(__file__), "splits", opt.split)
    lines = readlines(os.path.join(split_folder, "test_files.txt"))

    print("Exporting ground truth depths for {}".format(opt.split))

    gt_depths = []
    for line in lines:

        folder, frame_id, _ = line.split()
        frame_id = int(frame_id)

        if opt.split == "eigen":
            calib_dir = os.path.join(opt.data_path, folder.split("/")[0])
            velo_filename = os.path.join(
                opt.data_path,
                folder,
                "velodyne_points/data",
                "{:010d}.bin".format(frame_id),
            )
            gt_depth = generate_depth_map(calib_dir, velo_filename, 2, True)
        elif opt.split == "eigen_benchmark":
            gt_depth_path = os.path.join(
                opt.data_path,
                folder,
                "proj_depth",
                "groundtruth",
                "image_02",
                "{:010d}.png".format(frame_id),
            )
            gt_depth = np.array(pil.open(gt_depth_path)).astype(
                np.float32) / 256

        gt_depths.append(gt_depth.astype(np.float32))

    output_path = os.path.join(split_folder, "gt_depths.npz")

    print("Saving to {}".format(opt.split))

    np.savez_compressed(output_path, data=np.array(gt_depths))

Exemplo n.º 18

0

Exibir arquivo

def evaluate(opt):
    """Evaluates a pretrained model using a specified test set
    """
    lc_window_sz = 1

    ratios = []
    normal_errors = []
        
    encoder, depth_decoder, thisH, thisW = prepare_model_for_test(opt)
    backproject_depth = BackprojectDepth(1, thisH, thisW)

    filenames = readlines('./splits/nyu_test.txt')
    dataset = datasets.NYUTestDataset(
            opt.data_path,
            filenames,
            thisH, thisW,
    )
    
    dataloader = DataLoader(
            dataset, 1, shuffle=False, 
            num_workers=opt.num_workers
    )
    print("-> Computing predictions with size {}x{}".format(thisH, thisW))

    with torch.no_grad():
        for ind, (data, _, gt_norm, gt_norm_mask, K, K_inv) in enumerate(tqdm(dataloader)):
            input_color = data.cuda()
            if opt.post_process:
                input_color = torch.cat((input_color, torch.flip(input_color, [3])), 0)
            output = depth_decoder(encoder(input_color))

            pred_disp, _ = disp_to_depth(
                    output[("disp", 0)], 
                    opt.min_depth, 
                    opt.max_depth
            )
            pred_disp = pred_disp.data.cpu() 

            if opt.post_process:
                N = pred_disp.shape[0] // 2
                pred_disp = batch_post_process_disparity(
                        pred_disp[:N], torch.flip(pred_disp[N:], [3]) 
                )
            pred_depth = 1 / pred_disp

            cam_points = backproject_depth(pred_depth, K_inv)
            cam_points = cam_points[:, :3, ...].view(1, 3, thisH, thisW)
            normal = depth_2_normal(cam_points, lc_window_sz)

            normal = F.pad(normal, (0, lc_window_sz, 0, lc_window_sz), mode='replicate')
            normal = F.interpolate(normal, (gt_norm.shape[2], gt_norm.shape[3])) 
            normal_errors.append(compute_normal_errors(normal, gt_norm, gt_norm_mask))

    mean_normal_errors = np.array(normal_errors).mean(0)
    
    print("\n  " + ("{:>8} | " * 6).format("mean", "rmse", "a1", "a2", "a3", "a4"))
    print(("&{: 8.3f}  " * 6).format(*mean_normal_errors.tolist()) + "\\\\")
    print("\n-> Done!")

Exemplo n.º 19

0

Exibir arquivo

Arquivo: export_gt_depth.py Projeto: zebrajack/DepthC3D

def export_gt_depths_kitti():

    parser = argparse.ArgumentParser(description='export_gt_depth')

    parser.add_argument('--data_path',
                        type=str,
                        help='path to the root of the KITTI data',
                        required=True)
    parser.add_argument('--split',
                        type=str,
                        help='which split to export gt from',
                        required=True,
                        choices=["eigen", "eigen_benchmark"])
    opt = parser.parse_args()

    split_folder = os.path.join(os.path.dirname(__file__), "splits", opt.split)
    lines = readlines(os.path.join(split_folder, "test_files.txt"))

    print("Exporting ground truth depths for {}".format(opt.split))

    gt_depths = []
    for line in lines:

        folder, frame_id, _ = line.split()
        frame_id = int(frame_id)

        if opt.split == "eigen":
            calib_dir = os.path.join(opt.data_path, folder.split("/")[0])
            velo_filename = os.path.join(opt.data_path, folder,
                                         "velodyne_points/data", "{:010d}.bin".format(frame_id))
            # gt_depth = generate_depth_map(calib_dir, velo_filename, 2, True) ## ZMH: This won't work because the generate_depth_map function has been redefined.
            # gt_depth = generate_depth_map_original(calib_dir, velo_filename, 2, True) ## ZMH: the original function in monodepth2
            # gt_depth = generate_depth_map_original(calib_dir, velo_filename, 2, False) ## ZMH: the original function in monodepth2, use transformed depth

            velo_rect, P_rect_norm, im_shape  = generate_depth_map(calib_dir, velo_filename, 2)
            gt_depth = project_lidar_to_img(velo_rect, P_rect_norm, im_shape)                   ## ZMH: the way gt is generated I used in training

        elif opt.split == "eigen_benchmark":
            # gt_depth_path = os.path.join(opt.data_path, folder, "proj_depth",
            #                              "groundtruth", "image_02", "{:010d}.png".format(frame_id))
            gt_depth_path = os.path.join(opt.data_path, folder, "proj_depth",
                                         "groundtruth", "image_02", "{:010d}.png".format(frame_id), 'val', folder.split("/")[1], "proj_depth",
                                         "groundtruth", "image_02", "{:010d}.png".format(frame_id))
            if not os.path.exists(gt_depth_path):
                gt_depth_path = os.path.join(opt.data_path, folder, "proj_depth",
                                         "groundtruth", "image_02", "{:010d}.png".format(frame_id), 'train', folder.split("/")[1], "proj_depth",
                                         "groundtruth", "image_02", "{:010d}.png".format(frame_id))
                if not os.path.exists(gt_depth_path):
                    raise ValueError("This file does not exist! {} {}".format(folder, frame_id))
            gt_depth = np.array(pil.open(gt_depth_path)).astype(np.float32) / 256

        gt_depths.append(gt_depth.astype(np.float32))

    output_path = os.path.join(split_folder, "gt_depths_im_cus.npz")

    print("Saving to {}".format(opt.split))

    np.savez_compressed(output_path, data=np.array(gt_depths))

Exemplo n.º 20

0

Exibir arquivo

Arquivo: compare_eval.py Projeto: zebrajack/DepthC3D

def network_define(opt, data_path, height, width):
    opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)

    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    print("-> Loading weights from {}".format(opt.load_weights_folder))

    filenames = readlines(os.path.join(splits_dir, opt.eval_split, split_file))
    encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
    decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

    encoder_dict = torch.load(encoder_path,
                              map_location=torch.device("cuda:1"))

    if opt.dataset_val[0] == "kitti":
        dataset = datasets.KITTIRAWDataset(data_path,
                                           filenames,
                                           height,
                                           width, [0],
                                           4,
                                           is_train=False)
    elif opt.dataset_val[0] == "vkitti":
        dataset = datasets.VKITTIDataset(data_path,
                                         filenames,
                                         height,
                                         width, [0],
                                         4,
                                         is_train=False)
    # dataloader = DataLoader(dataset, 16, shuffle=False, num_workers=opt.num_workers,
    #                         pin_memory=True, drop_last=False)
    dataloader = DataLoader(
        dataset,
        1,
        shuffle=False,
        num_workers=opt.num_workers,
        pin_memory=True,
        drop_last=False,
        collate_fn=my_collate_fn
    )  ## the default collate_fn will fail because there are non-deterministic length sample

    encoder = networks.ResnetEncoder(opt.num_layers, False)
    depth_decoder = networks.DepthDecoder(encoder.num_ch_enc)

    model_dict = encoder.state_dict()
    encoder.load_state_dict(
        {k: v
         for k, v in encoder_dict.items() if k in model_dict})
    depth_decoder.load_state_dict(
        torch.load(decoder_path, map_location=torch.device("cuda:1")))

    encoder.cuda(1)
    encoder.eval()
    depth_decoder.cuda(1)
    depth_decoder.eval()

    return encoder, depth_decoder, dataloader, filenames

Exemplo n.º 21

0

Exibir arquivo

Arquivo: day21.py Projeto: tingvarsson/adventofcode

def parseData(filepath, instructions):
    lines = utils.readlines(filepath)
    ipMatch = re.search(r"#ip (\d+)", lines[0])
    ip = int(ipMatch.group(1))

    for line in lines[1:]:
        instrMatch = re.search(r"(.*) (\d+) (\d+) (\d+)", line)
        e = executionData(instrMatch.group(1),
                          *[int(i) for i in instrMatch.group(2, 3, 4)])
        instructions.append(e)
    return ip

Exemplo n.º 22

0

Exibir arquivo

def cleanup(file, output, cleanup_only=False, threshold=5, thread=None):
    """
    Cleanup the dataset according to the specs of the task.

    Args:
        file(str): Input corpus filename.
        output(str): Output filename.
        cleanup_only(bool): Just cleanup the words using predefined frequent words.
        threshold(int): The threshold to filter out infrequent words.
        thread(int): Number of thread to run simultaneously
    """

    # 1. Load and convert each title to lowercase.
    data = readlines(file, delimiter="\t", lower=True)

    # 2. Remove all characters that are not
    #    (1) lowercase characters (a-z),
    #    (2) whitespace, or
    #    (3) hyphen '-'
    data = remove_redundant_char(data, index=1)

    # 3. Tokenize each title into words by splitting on whitespace.
    words, data = tokenize_context(data, index=1, thread=thread)

    # 4. Remove all tokens that appear fewer than 5 times in the dataset.
    # 4-1. Find frequent words
    if not cleanup_only:
        frequent_words = find_frequent_words(words, threshold=threshold)
        write_to_file("models/frequent_words.txt", frequent_words)
    else:
        print("Loading frequent_words from training set.")
        frequent_words = readlines("models/frequent_words.txt", lower=True)
        frequent_words = list(chain.from_iterable(frequent_words))

    # 4-2. Remove infrequent words in titles
    data = filter_title(data,
                        index=1,
                        frequent_words=frequent_words,
                        thread=thread)
    # Save to file
    write_to_file(output, data, delimiter="\t", row_as_line=True)

Exemplo n.º 23

0

Exibir arquivo

def get_docs(default_span='mv'):
    ebm_nlp = '/home/ben/Desktop/EBM-NLP/ebm_nlp_2_00'
    ann_dir = '{}/annotations'.format(ebm_nlp)

    pmids = utils.readlines('../data/id_splits/ebm_nlp/test.txt')
    docs = {p: {'i': {}, 'o': {}} for p in pmids}

    for p in pmids:

        token_fname = os.path.join('../data/documents/tokens/',
                                   '{}.tokens'.format(p))
        tokens = utils.readlines(token_fname)
        docs[p]['tokens'] = tokens

        for el in ['interventions', 'outcomes']:

            agg_fname = '{}/aggregated/starting_spans/{}/test/gold/{}.AGGREGATED.ann'.format(
                ann_dir, el, p)
            indv_fnames = glob.glob(
                '{}/individual/phase_1/{}/test/gold/{}.*.ann'.format(
                    ann_dir, el, p))
            e = el[0]

            docs[p][e]['hmm'] = list(map(int, utils.readlines(agg_fname)))
            docs[p][e]['indv'] = []
            for f in indv_fnames:
                docs[p][e]['indv'].append(list(map(int, utils.readlines(f))))
            docs[p][e]['avg'] = list(map(np.mean, zip(*docs[p][e]['indv'])))

            agg_strats = [\
                ('mv',           lambda x: int(x + 0.5)),
                ('union',        lambda x: int(x > 0)),
                ('intersection', lambda x: int(x))]

            for name, func in agg_strats:
                docs[p][e][name] = list(map(func, docs[p][e]['avg']))

            spans = utils.condense_labels(docs[p][e][default_span])
            docs[p][e]['spans'] = [' '.join(tokens[i:f]) for i, f, l in spans]

    return docs

Exemplo n.º 24

0

Exibir arquivo

Arquivo: compare_frames.py Projeto: MorkYoung/evidence_extraction

def compare(group = 'dev'):
  pmids = utils.group_ids(group)

  with open('frames.csv', 'w') as fout:
    fout.write('\t'.join('label i c o span'.split()) + '\n')
    for pmid in pmids:
      try:
        bert_frames = utils.readlines('{}/documents/frames/{}.bert_frames'.format(utils.DATA_DIR, pmid))
      except FileNotFoundError:
        # womp womp no frames for this doc
        continue
      gold_frames = utils.readlines('{}/documents/frames/{}.frames'.format(utils.DATA_DIR, pmid))
      gold_sent_idxs = utils.readlines('{}/documents/frames/{}.sent_idxs'.format(utils.DATA_DIR, pmid))

      gold_lookup = { i: l.split('\t') for i,l in enumerate(gold_sent_idxs) if len(l.split('\t')) == 1 }

      for frame_str in bert_frames:
        frame = json.loads(frame_str)
        matching_frames = [i for i, idxs in gold_lookup.items() if frame['sent_idx'] in idxs]
        if matching_frames:
          matching_frame = gold_frames[matching_frames[0]]
          i, c, o, _, _, ev = matching_frame.split('\t')
          if ev not in frame['ev']:
            continue
          fout.write('\t'.join(['gold', i, c, o, ev]) + '\n')

          ico = frame['icos'][0]
          i_score = ''
          c_score = ''
          o_score = ''
          if o.lower() == ico[3].lower():
            o_score = '5'
          if i.lower() == ico[1].lower():
            i_score = '5'
          elif i.lower() == ico[2].lower():
            ico[1], ico[2] = ico[2], ico[1]
            i_score = '5'
            if frame['sample'] == 'c': frame['sample'] = 'i'
          if c.lower() == ico[2].lower():
            c_score = '5'
          fout.write('\t'.join([frame['sample'], ico[1], ico[2], ico[3], frame['ev']]) + '\n')

Exemplo n.º 25

0

Exibir arquivo

    def test_challenge20(self):
        plaintexts = [
            base64_to_bytes(line)
            for line in utils.readlines('20.txt')
        ]

        key = encryption_key()
        nonce = '\0' * 8
        ciphertexts = [
            ctr_encrypt(m, key, nonce)
            for m in plaintexts
        ]

        # Because of the fixed-nonce, the encrypted keystream bytes are
        # repeated for every plaintext message.
        #
        # ciphertext[i] ^ keystream[i] = plaintext[i]
        #
        # We can create a transposed ciphertext message by concatenating
        # ciphertext[i] from every encrypted message and then xor'ing that
        # against a guessed keystream byte. Then we can test whether the
        # resulting plaintext looks like english based on character
        # distributions. If so, then we've figured out the keystream byte.

        keystream = ''
        for index in itertools.count():
            transposed = ''.join(m[index:index+1] for m in ciphertexts)
            if not transposed:
                break

            allowed_chars = None
            if index == 0:
                allowed_chars = string.ascii_uppercase + '"\''

            score, _, key = crack.find_best_single_byte_key(
                transposed,
                allowed_chars=allowed_chars
            )
            # print 'Best score for index {}: {}'.format(index, score)
            keystream += key[0]

        recovered_plaintexts = [
            bitops.xor(m, keystream) for m in ciphertexts
        ]

        # for m in recovered_plaintexts:
        #     print m

        self.assertIn(
            '\'Cause my girl is definitely mad / \'Cause it took us too long to do this album',
            recovered_plaintexts
        )

Exemplo n.º 26

0

Exibir arquivo

Arquivo: evaluate_depth_offline.py Projeto: zhangmaoxiansheng/LEAD

def uncertainty_map(res_path):
    uncertainty_dir = os.path.join(res_path, 'uncertainty_map')

    if not os.path.exists(uncertainty_dir):
        os.mkdir(uncertainty_dir)
    filenames = readlines(
        os.path.join(splits_dir, "eigen_benchmark", "test_files.txt"))
    for i in range(len(filenames)):
        imageset = np.load(os.path.join(res_path, '{}_stage4.npy'.format(i)))
        uncert = np.std(imageset, 0)
        plt.imsave(os.path.join(uncertainty_dir, '{}_uncert.png'.format(i)),
                   uncert,
                   cmap='Greys')

Exemplo n.º 27

0

Exibir arquivo

def format_results(fdir, group='dev'):
    input_lines = utils.readlines('{}/sent_classifier/{}/{}.tsv'.format(
        utils.DATA_DIR, fdir, group))
    output_lines = utils.readlines(
        '{}/sent_classifier/{}/{}_results.tsv'.format(utils.DATA_DIR, fdir,
                                                      group))

    assert len(input_lines) == len(output_lines)
    assert utils.SENT_INPUT_FIELDS == 'dummy pmid sent_idx sent'

    input_data = [l.split('\t') for l in input_lines]
    output_probs = [[float(x) for x in l.split('\t')] for l in output_lines]
    output_preds = [l.index(max(l)) for l in output_probs]

    all_data = [inputs + [p] for inputs, p in zip(input_data, output_preds)]
    doc_data = groupby(all_data, itemgetter(1))
    for pmid, lines in doc_data:
        with open(
                '{}/documents/sents/{}.bert_{}'.format(utils.DATA_DIR, pmid,
                                                       fdir), 'w') as fout:
            for _, pmid, _, sent, label in lines:
                fout.write('{}\n'.format(label))

Exemplo n.º 28

0

Exibir arquivo

def sample_dataset(file, amount):
    """
    Sample the given amount of data from the file.

    Args:
        file(str): File to be sampled.
        amount(int): Amount of data to be drawn from the file.
    """

    # Load and convert each title to lowercase.
    data = readlines(file, delimiter="\t", lower=True)
    # Sample
    sample_data(file, data, amount=amount)

Exemplo n.º 29

0

Exibir arquivo

Arquivo: export_gt_pose.py Projeto: robuved/monodepth2

def export_gt_poses_kitti():

    parser = argparse.ArgumentParser(description='export_gt_depth')

    parser.add_argument('--data_path',
                        type=str,
                        help='path to the root of the KITTI data',
                        required=True)
    parser.add_argument('--split',
                        type=str,
                        help='which split to export gt from',
                        required=True,
                        choices=["raw_odometry"])
    opt = parser.parse_args()

    split_folder = os.path.join(os.path.dirname(__file__), "splits", opt.split)
    files = readlines(os.path.join(split_folder, "test_files.txt"))
    videos = readlines(os.path.join(split_folder, "test_video_list.txt"))

    print("Exporting ground truth depths for {}".format(opt.split))

    for video in videos:
        oxts_paths = []
        ids = sorted([
            int(file.stem)
            for file in Path(opt.data_path).glob(f"{video}/oxts/data/*.txt")
        ])
        for frame_id in ids:
            filepath_oxst = os.path.join(opt.data_path, video, "oxts", "data",
                                         "{:010d}.txt".format(frame_id))

            oxts_paths.append(filepath_oxst)
        oxts = load_oxts_packets_and_poses(oxts_paths)
        poses_path = os.path.join(opt.data_path, video, "oxts", "poses.txt")
        poses = np.stack([np.array(o[1]) for o in oxts])

        print("Saving to {}".format(poses_path))
        poses = poses[:, :3, :].reshape(-1, 12)
        np.savetxt(poses_path, poses)

Exemplo n.º 30

0

Exibir arquivo

def export_gt_depths_kitti():
    class Opt:
        data_path = '/home/jihyo/PycharmProjects/RNNslam/KITTI_data/KITTI_depth/data_depth_annotated/sum'
        split = 'eigen_benchmark'

    # parser = argparse.ArgumentParser(description='export_gt_depth')
    #
    # parser.add_argument('--data_path',
    #                     type=str,
    #                     help='path to the root of the KITTI data',
    #                     required=True)
    # parser.add_argument('--split',
    #                     type=str,
    #                     help='which split to export gt from',
    #                     required=True,
    #                     choices=["eigen", "eigen_benchmark"])
    # opt = parser.parse_args()

    opt = Opt()
    split_folder = os.path.join(os.path.dirname(__file__), "splits", opt.split)
    lines = readlines(os.path.join(split_folder, "test_files.txt"))

    print("Exporting ground truth depths for {}".format(opt.split))

    gt_depths = []
    for line in lines:

        folder, frame_id, _ = line.split()
        frame_id = int(frame_id)

        if opt.split == "eigen":
            calib_dir = os.path.join(opt.data_path, folder.split("/")[0])
            velo_filename = os.path.join(opt.data_path, folder,
                                         "velodyne_points/data",
                                         "{:010d}.bin".format(frame_id))
            gt_depth = generate_depth_map(calib_dir, velo_filename, 2, True)
        elif opt.split == "eigen_benchmark":
            gt_depth_path = os.path.join(opt.data_path, folder, "proj_depth",
                                         "groundtruth", "image_02",
                                         "{:010d}.png".format(frame_id))
            gt_depth = np.array(pil.open(gt_depth_path)).astype(
                np.float32) / 256

        gt_depths.append(gt_depth.astype(np.float32))

    output_path = os.path.join(split_folder, "gt_depths.npz")

    print("Saving to {}".format(opt.split))

    np.savez_compressed(output_path, data=np.array(gt_depths))

Exemplo n.º 31

0

Exibir arquivo

Arquivo: w2vsentiment.py Projeto: huu12/sentiment_analysis

import utils
import recognize
import sys

sentlabel = ["喜び", "信頼", "不安", "驚き", "悲しみ", "嫌気", "怒り", "予測"]


def w2vsentiment(w2v, sentence):
    sims = []

    testdata = utils.wakati_sentence(sentence)

    dic = w2v.calc_each_sentiment(testdata)

    for ratio in dic:
        sims.append(ratio / sum(dic))

    return sims


if __name__ == '__main__':
    ratios = [sentlabel]

    w2v = recognize.RecognizeWord2Vec()
    documents = utils.readlines(sys.argv[1])

    for doc in documents:
        ratios.append(w2vsentiment(w2v, doc))

    utils.output_csv(sys.argv[2], ratios)