Exemplo n.º 1
0
    f2 = "/mnt/scratch/endrebak/pyranges_benchmark/data/download/input_15000000.bed.gz"

    nrows = int(1e6) # None # int(1e6)
    df = pd.read_table(f1, sep="\t", usecols=[0, 1, 2, 5], header=None,
                       names="Chromosome Start End Strand".split(), nrows=nrows)

    df2 = pd.read_table(f2, sep="\t", usecols=[0, 1, 2, 5], header=None,
                        names="Chromosome Start End Strand".split(), nrows=nrows)


    print("Done reading")
    start = time()

    a = PyRanges(df)
    b = PyRanges(df2)
    result = a.nearest(b)

    end = time()
    total = end - start

    total_dt = datetime.datetime.fromtimestamp(total)

    minutes_seconds = total_dt.strftime('%M\t%S\n')

    print(minutes_seconds)
    print(result)


# +--------------+----------+----------+----------+
# | Chromosome   | Start    | End      | Strand   |
# |--------------+----------+----------+----------|
Exemplo n.º 2
0
infile = argv[1]
infile_bg = argv[2]
runs = int(argv[3])

chip_f = infile
background_f = infile_bg

nrows = None
chip = pd.read_table(chip_f,
                     sep="\t",
                     nrows=nrows,
                     usecols=[0, 1, 2, 5],
                     header=None,
                     names="Chromosome Start End Strand".split())

background = pd.read_table(background_f,
                           sep="\t",
                           nrows=nrows,
                           usecols=[0, 1, 2, 5],
                           header=None,
                           names="Chromosome Start End Strand".split())
cgr = PyRanges(chip)
bgr = PyRanges(background)

for i in range(runs):

    print("nearest", i, runs)

    result = cgr.nearest(bgr, strandedness="same")