예제 #1
0
def test_get_answer():
    df = read_csv('test/2020/data/day7.csv', col_names=['outer_bags','inner_bags'], sep='contain', engine='python')
    answer1 = get_answer1(df)
    answer2 = get_answer2(df)

    df2 = read_csv('test/2020/data/day7_2.csv', col_names=['outer_bags','inner_bags'], sep='contain', engine='python')
    answer2_2 = get_answer2(df2)

    assert answer1 == 4
    assert answer2 == 32
    assert answer2_2 == 126
예제 #2
0
def test_get_answer():
    df = read_csv('test/2021/data/day12.csv', col_names=['f', 't'], sep='-')
    answer1 = get_answer1(df)
    answer2 = get_answer2(df)

    assert answer1 == 10
    assert answer2 == 36
예제 #3
0
def test_get_answer():
    df = read_csv('test/2020/data/day20.csv', col_names=['XX'], sep=',')
    answer1 = get_answer1(df)
    answer2 = get_answer2(df)

    #assert answer1 == 20899048083289
    #assert answer2 == 336
예제 #4
0
def test_get_answer():
    df = read_csv('test/2021/data/day5.csv', col_names=['from','to'], sep='->', engine='python')
    answer1 = get_answer1(df)
    answer2 = get_answer2(df)

    assert answer1 == 5
    assert answer2 == 12
예제 #5
0
def test_get_answer():
    df = read_csv('test/2020/data/day9.csv', col_names=['xmas'], sep=',')
    answer1 = get_answer1(df, 5)
    answer2 = get_answer2(df, 5)

    assert answer1 == 127
    assert answer2 == 62
예제 #6
0
def mirnaid_fix(fname: str):
    d = {
        "mouse": "mmu",
        "human": "hsa",
        "elegans": "cel",
        "cattle": "bta",
        "fly": "aga"
    }
    prefix = None
    for k, v in d.items():
        if k in fname:
            prefix = v
    if prefix is None:
        raise Exception("unrecognized mirbase prefix")

    mirbase_df: DataFrame = pd.read_csv(MIRBASE_FILE).query("prefix==@prefix")
    mirbase_df.sort_values(by="version", ascending=False, inplace=True)
    mirbase_df.drop_duplicates("miRNA sequence", keep="first", inplace=True)

    fin_full_path = READ_PATH / fname
    fout_full_path = MIRNA_SEQ_PATH / fname

    d: DataFrame = read_csv(fin_full_path)

    join_df = d.merge(mirbase_df,
                      how="left",
                      left_on="miRNA sequence",
                      right_on="miRNA sequence")
    d['miRNA ID'] = join_df['miRNA ID_y']
    to_csv(d, fout_full_path)
예제 #7
0
def test_get_answer():
    df = read_csv('test/2020/data/day18.csv', col_names=['expr'], sep=',')
    answer1 = get_answer1(df)
    answer2 = get_answer2(df)

    assert answer1 == 26335
    assert answer2 == 693891
예제 #8
0
def test_get_answer():
    df_expenses = read_csv('test/2020/data/day1.csv', col_names=['amount'])
    answer1 = get_answer1(df_expenses)
    answer2 = get_answer2(df_expenses)

    assert answer1 == 514579
    assert answer2 == 241861950
예제 #9
0
def main(args):
  global executor
  executor = concurrent.futures.ThreadPoolExecutor(16)

  # Make dataset directories
  for action in ACTIONS:
    os.makedirs(os.path.join(args.output_dir, action), exist_ok=True)

  # Load match file
  match_file = os.path.join(args.data_root_dir, args.date,
                            'match_{}_{}.pkl'.format(args.thermal_sensor, args.depth_sensor))
  utils.check_exists(match_file)
  with open(match_file, 'rb') as f:
    tasks = pickle.load(f)

  thermal_video_dir = os.path.join(args.data_root_dir, args.date, 'thermal',
                                   args.thermal_sensor, 'videos')
  annotation_dir = os.path.join(args.data_root_dir, args.date, 'results',
                                '{}_{}'.format(args.thermal_sensor, args.depth_sensor))
  task_ids = sorted([int(f.split('.')[0]) for f in os.listdir(annotation_dir)])

  # Read and process each task.
  videos = {}
  for task_id in task_ids:
    print("Task: {}".format(task_id))
    clips = utils.read_csv(os.path.join(annotation_dir, '{}.csv'.format(task_id)))
    task = tasks[task_id]
    thermal_time_str = task[0][0]
    # Raw thermal data
    frames = get_thermal_frames(videos, thermal_video_dir, thermal_time_str)
    process_annotations(args, task, clips, frames)

  print("Waiting for jobs to finish...")
  executor.shutdown(wait=True)
예제 #10
0
def test_get_answer():
    df = read_csv('test/2020/data/day3.csv', col_names=['pattern'], sep=',')
    answer1 = get_answer1(df)
    answer2 = get_answer2(df)

    assert answer1 == 7
    assert answer2 == 336
예제 #11
0
def human_mapping_merge_by_name(fin: Path, fout: Path):
    def verify_sequence(seq: str, subseq: str) -> bool:
        try:
            return seq.find(subseq) != -1
        except AttributeError:
            return False

    in_df: DataFrame = read_csv(fin)
    in_df["join_key"] = in_df["mRNA ID"].apply(
        lambda x: "|".join(x.split("_")[0:2]))
    mRNA_df = concatenate_biomart_df("human")

    in_df = in_df.merge(mRNA_df,
                        how="left",
                        left_on=["region", "join_key"],
                        right_on=["region", "ID"])

    in_df = in_df.rename(columns={"sequence": "region sequence"})
    in_df = in_df[[
        'key', 'paper name', 'miRNA ID', 'miRNA sequence', 'mRNA ID',
        'mRNA_seq_extended', 'region', 'region_sequence', 'mRNA_start',
        'mRNA_end_extended'
    ]]

    in_df["join_ok"] = in_df.apply(func=get_wrapper(verify_sequence,
                                                    'region sequence',
                                                    'mRNA_seq_extended'),
                                   axis=1)

    to_csv(in_df, fout)
예제 #12
0
def test_get_answer():
    df = read_csv('test/2021/data/day1.csv', col_names=['XX'], sep=',')
    answer1 = get_answer1(df)
    answer2 = get_answer2(df)

    assert answer1 == 7
    assert answer2 == 5
예제 #13
0
def get_site_from_extended_site(fin: str, fout: str):
    def calc_chimera_start(seq: str, subseq: str) -> int:
        try:
            if seq.find(subseq) == -1:
                return -1
            return seq.find(subseq) + 1
        except AttributeError:
            return -1

    def calc_chimera_end(chimera_start: int, seq_extended: str) -> int:
        if chimera_start == -1:
            return -1
        return chimera_start + len(seq_extended) - 1 - HUMAN_SITE_EXTENDED_LEN

    logger.info(f"Insert site to {fin}")
    df: DataFrame = read_csv(Path(fin))
    df["chimera_start"] = df.apply(func=get_wrapper(calc_chimera_start,
                                                    'region sequence',
                                                    'mRNA_seq_extended'),
                                   axis=1)
    df["chimera_end"] = df.apply(func=get_wrapper(calc_chimera_end,
                                                  'chimera_start',
                                                  'mRNA_seq_extended'),
                                 axis=1)

    df["site"] = df.apply(func=get_wrapper(get_subsequence_by_coordinates,
                                           "region sequence",
                                           "chimera_start",
                                           "chimera_end",
                                           extra_chars=SITE_EXTRA_CHARS),
                          axis=1)

    to_csv(df, Path(fout))
    logger.info(f"finish the site sequence insertion to {fin}")
예제 #14
0
def test_get_answer():
    df = read_csv('test/2020/data/day25.csv', col_names=['pub_key'], sep=',')
    answer1 = get_answer1(df)
    answer2 = get_answer2(df)

    assert answer1 == 14897079
    #assert answer2 == 336
예제 #15
0
def test_get_answer():
    df = read_csv('test/2021/data/dayX.csv', col_names=['XX'], sep=',')
    dl = open('test/2021/data/dayX.csv').read().splitlines()
    answer1 = get_answer1(df)
    answer2 = get_answer2(df)

    assert answer1 == 7
    assert answer2 == 336
예제 #16
0
def test_get_answer():
    df = read_csv('test/2021/data/day2.csv',
                  col_names=['dir', 'step'],
                  sep=' ')
    answer1 = get_answer1(df)
    answer2 = get_answer2(df)

    assert answer1 == 150
    assert answer2 == 900
예제 #17
0
def test_get_answer():
    df = read_csv('test/2020/data/day8.csv',
                  col_names=['operation', 'argument'],
                  sep=' ')
    answer1 = get_answer1(df)
    answer2 = get_answer2(df)

    assert answer1 == 5
    assert answer2 == 8
예제 #18
0
def test_get_answer():
    df_passwords = read_csv('test/2020/data/day2.csv',
                            col_names=['policy', 'password'],
                            sep=':')
    answer1 = get_answer1(df_passwords)
    answer2 = get_answer2(df_passwords)

    assert answer1 == 2
    assert answer2 == 1
예제 #19
0
def test_get_answer():
    df = read_csv('test/2021/data/day8.csv',
                  col_names=['all', 'shown'],
                  sep='|')
    answer1 = get_answer1(df)
    answer2 = get_answer2(df)

    assert answer1 == 26
    assert answer2 == 61229
예제 #20
0
def test_get_answer():
    df = read_csv('test/2021/data/day3.csv',
                  col_names=['XX'],
                  sep=',',
                  dtype=str)
    answer1 = get_answer1(df)
    answer2 = get_answer2(df)

    assert answer1 == 198
    assert answer2 == 230
예제 #21
0
def test_get_answer():
    df = read_csv('test/2020/data/day6.csv',
                  col_names=['response'],
                  sep=',',
                  skip_blank_lines=False)
    answer1 = get_answer1(df)
    answer2 = get_answer2(df)

    assert answer1 == 11
    assert answer2 == 6
예제 #22
0
def splunk(fin: str, fout: str):
    in_df: DataFrame = read_csv(Path(fin))
    in_df = in_df[in_df['valid_row']]
    in_df = in_df.astype({'duplex_valid':bool})
    in_df = in_df[in_df['duplex_valid']]
    print(in_df['duplex_valid'].unique())

    in_df.drop(columns=SPLUNK_COL_TO_DROP, inplace=True)
    Path(fout).parent.mkdir(parents=True, exist_ok=True)
    in_df.to_csv(fout)
예제 #23
0
def feature_extraction(fin: str, fout: str):
    in_df: DataFrame = read_csv(Path(fin))
    valid_df = in_df.query("valid_row & duplex_valid=='True'")
    feature_df = df_feature_extractor(valid_df)
    result = pd.merge(left=in_df,
                      right=feature_df,
                      left_index=True,
                      right_index=True,
                      how='left')
    to_csv(result, Path(fout))
예제 #24
0
def fast_blast_file(fin: Path, fout: Path, db_title: str):
    logger.info(f"fast blast file {fin} against {db_title}")
    in_df: DataFrame = read_csv(fin)
    seq_file = BIOMART_DATA_PATH / f"{db_title}.csv"
    df_contains_db_title = partial(df_contains, df=pd.read_csv(seq_file))

    in_df["blast sequence"] = in_df.apply(func=get_wrapper(
        df_contains_db_title, "site"),
                                          axis=1)
    to_csv(in_df, fout)
예제 #25
0
def rna_insertion(fin_full_path: Path, fout_full_path: Path,
                  rna_df: DataFrame):
    logger.info(f"Insert rna sequence to {fin_full_path}")
    df: DataFrame = read_csv(fin_full_path)
    join_df = df.merge(rna_df,
                       how="left",
                       left_on="mRNA ID",
                       right_on="mRNA ID",
                       validate="many_to_one")
    to_csv(join_df, fout_full_path)
    logger.info(f"Finish the rna sequence insertion to {fin_full_path}")
예제 #26
0
def read_blast_result_file(fin: Path) -> DataFrame:
    logger.info(f"read_blast_result_file {fin}")
    df: DataFrame = read_csv(Path(fin))
    df = df.astype({'s.start': 'Int32', 's.end': 'Int32'}, errors="ignore")

    region = fin.stem.split("_")[-1]
    df["region"] = region

    # take only the rows with valid results
    df.dropna(axis=0, how='any', subset=['sequence'], inplace=True)
    return df
예제 #27
0
def test_get_answer():
    df = read_csv('test/2020/data/day4.csv',
                  col_names=['passport'],
                  sep=',',
                  skip_blank_lines=False)
    answer1 = get_answer1(df)
    assert answer1 == 2

    df_valid = read_csv('test/2020/data/day4_valid.csv',
                        col_names=['passport'],
                        sep=',',
                        skip_blank_lines=False)
    answer2_valid = get_answer2(df_valid)
    assert answer2_valid == 4

    df_invalid = read_csv('test/2020/data/day4_invalid.csv',
                          col_names=['passport'],
                          sep=',',
                          skip_blank_lines=False)
    answer2_invalid = get_answer2(df_invalid)
    assert answer2_invalid == 0
예제 #28
0
def insert_site_by_coordinates(fin: str, fout: str):
    logger.info(f"Insert site to {fin}")
    df: DataFrame = read_csv(Path(fin))
    df["site"] = df.apply(func=get_wrapper(get_subsequence_by_coordinates,
                                           "mRNA sequence",
                                           "chimera_start",
                                           "chimera_end",
                                           extra_chars=SITE_EXTRA_CHARS),
                          axis=1)

    to_csv(df, Path(fout))
    logger.info(f"finish the site sequence insertion to {fin}")
예제 #29
0
def blast_file(fin: Path, fout: Path, db_title: str):

    logger.info(f"blast file {fin} against {db_title}")
    in_df: DataFrame = read_csv(fin)
    blastn_df: DataFrame = in_df.apply(func=get_wrapper(run_blastn,
                                                        "site",
                                                        db_title=db_title),
                                       axis=1)
    result = pd.concat([in_df, blastn_df], axis=1)

    # in_df["blast region"] = in_df["blast sequence"].apply(lambda x: "" if np.isnan(x) else db_title)

    to_csv(result, fout)
예제 #30
0
def duplex(method: str, fin: str, fout: str):
    duplex_cls: Duplex = DUPLEX_DICT[method]
    logger.info(f"{method} do_duplex to {fin}")
    in_df: DataFrame = read_csv(Path(fin))
    # [in_df["miRNA sequence"].notnull() & in_df.site.notnull()]
    duplex_df = in_df.query("valid_row").apply(func=get_wrapper(
        do_duplex, "miRNA sequence", "site", cls=duplex_cls),
        axis=1)


    result = pd.merge(left=in_df, right=duplex_df, left_index=True, right_index=True, how='left')

    result["duplex_method"] = method
    to_csv(result, Path(fout))