def process():
    lin_files = sorted(glob.glob("data/cell_lines/lineage*.npy"))
    spots = []
    for lf in lin_files:
        lineage_num = int(re.search("lineage(\d+).npy", lf).group(1))
        cell_line = np.load(lf)
        elongation_rate = shared.get_elongation_rate(cell_line, discard=True)
        if not elongation_rate or len(cell_line) < THRESHOLD_FRAMES:
            continue

        parB_paths = shared.get_parB_path(cell_line, cell_line[0].T, lineage_num)
        spot_num = 1
        for path in parB_paths:
            spot_trace = path.spots()
            lengths = list(path.len())
            idx = 0
            spot_data = {
                "t": [],
                "intensity": [],
                "cell_length": lengths,
                "x_mid": [],
                "x_new": [],
                "x_old": [],
                "pole_known": cell_line[0].pole_assignment,
                "spot_num": spot_num,
                "lineage_num": lineage_num,
            }
            for x in spot_trace:
                l = lengths[idx]
                x_M = x[1]
                x_N = x[1] + (l / 2)
                x_O = l - x_N
                spot_data["t"].append(x[0])
                spot_data["intensity"].append(x[2])
                spot_data["x_mid"].append(x_M)
                spot_data["x_new"].append(x_N)
                spot_data["x_old"].append(x_O)
                idx += 1
            spot_num += 1
            if len(spot_data["t"]) >= THRESHOLD_FRAMES:
                spots.append(spot_data)
                # calculate diffusion parameters
                d_mid, d_new, d_old = diffusion(spot_data)

    s = pd.DataFrame(spots)
    print(s)
def get_traces(orig_dir=None, two_spot=False, reuse=True):
    data_hash = hashlib.sha1(os.getcwd().encode("utf8")).hexdigest()
    if reuse and orig_dir and os.path.exists(os.path.join(orig_dir, "ParB_velocity", "data", data_hash)):
        data_dir = os.path.join(orig_dir, "ParB_velocity", "data", data_hash)
        files = sorted(glob.glob(os.path.join(data_dir, "*.pandas")))
        spot_data = []
        progress = progressbar.ProgressBar()
        for f in progress(files):
            spot_data.append(pd.read_pickle(f))
        return spot_data

    lin_files = sorted(glob.glob("data/cell_lines/lineage*.npy"))
    lineage_nums = [int(re.search("lineage(\d+).npy", x).group(1)) for x in lin_files]
    spot_data = []
    progress = progressbar.ProgressBar()
    for lineage_num, lf in progress(list(zip(lineage_nums, lin_files))):
        cell_line = np.load(lf)
        if not hasattr(cell_line[0], "pole_assignment") or cell_line[0].pole_assignment is None:
            continue
#        pole_assignment = cell_line[0].pole_assignment

        T = cell_line[0].T
        paths = shared.get_parB_path(cell_line, T, lineage_num)

        if two_spot:
            if len(paths) != 3:
                continue

            if len(cell_line[0].ParB) != 1:
                continue

        cell_elongation_rate = shared.get_elongation_rate(cell_line)
        if cell_elongation_rate and cell_elongation_rate < 0:
            cell_elongation_rate = 0
        spot_num = 1
        for path in paths:
            # path.positions: distance from midcell
            spot_trace = path.spots()
            timing = []
            d_mid = []
            d_parA = []

            intensity = []
            lengths = path.len()
            for x in spot_trace:
                timing.append(x[0])
                d_mid.append(x[1])
                intensity.append(x[2])

                c_idx = list(cell_line[0].t).index(x[0])
                cell = cell_line[c_idx]
                parA_mid = cell.ParA[0] - (cell.length[0][0] / 2)
                dparA = np.abs(parA_mid - x[1])
                d_parA.append(dparA)

            data = pd.DataFrame(
                data={
                    "timing": timing,
                    "d_mid": d_mid,  # negative = closer to new pole
                    "d_parA": d_parA,
                    "intensity": intensity,
                    "cell_length": lengths,
                },
            )

            data["d_new"] = data.d_mid + (data.cell_length / 2)
            data["d_old"] = data.cell_length - data.d_new

            path, subdir = os.path.split(os.getcwd())
            topdir = os.path.basename(path)
            data._path = os.getcwd()
            data._top_dir = topdir
            data._sub_dir = subdir
            data._lineage_num = lineage_num
            data._spot_num = spot_num
            data._cell_line_id = cell_line[0].id
            data._elongation_rate = cell_elongation_rate
            data._hash = hashlib.sha256("{0}-{1}-{2}-{3}".format(
                topdir,
                subdir,
                lineage_num,
                spot_num,
            ).encode("utf-8")).hexdigest()
            data._metadata = [
                "_path", "_top_dir", "_sub_dir", "_lineage_num",
                "_spot_num", "_cell_line_id",
                "_elongation_rate", "_hash"
            ]

            if orig_dir:
                target_dir = os.path.join(orig_dir, "ParB_velocity", "data", data_hash)
                if not os.path.exists(target_dir):
                    os.makedirs(target_dir)
                data.to_pickle(os.path.join(
                    target_dir, "{0:03d}-{1:03d}.pandas".format(lineage_num, spot_num)
                ))

            spot_data.append(data)
            spot_num += 1
    return spot_data
def process():
    lin_files = sorted(glob.glob("data/cell_lines/lineage*.npy"))
    lookup = json.loads(open("ancestry.json").read())
    siblings = {}  # (mother_lin, daughter_lin, daughter_lin
    cell_lines = {}
    data = pd.DataFrame(columns=DATA_INDEX)

    for l in lin_files:
        c = np.load(l)
        mother_lin = lookup[c[0].id]
        cell_lines[mother_lin] = c
        if c[-1].children:
            siblings[lookup[c[0].id]] = (lookup[c[-1].children[0]], lookup[c[-1].children[1]])

    for parent_num in sorted(siblings.keys()):
        child1_num, child2_num = siblings[parent_num]
#        parent = cell_lines[parent_num][-1]
        # make child1 the smaller cell
        child1 = cell_lines[child1_num][0]
        child2 = cell_lines[child2_num][0]

        if child1.length < child2.length:
            child2_num, child1_num = siblings[parent_num]

        child1 = cell_lines[child1_num][0]
        child2 = cell_lines[child2_num][0]

        parent_lin = cell_lines[parent_num]
        parent_growth = shared.get_growth_rate(parent_lin)
        parent_elong = shared.get_elongation_rate(parent_lin)
        child1_lin = cell_lines[child1_num]
        child1_growth = shared.get_growth_rate(child1_lin)
        child1_elong = shared.get_elongation_rate(child1_lin)
        child2_lin = cell_lines[child2_num]
        child2_growth = shared.get_growth_rate(child2_lin)
        child2_elong = shared.get_elongation_rate(child2_lin)

        c1_inten = get_intensity(child1)
        c2_inten = get_intensity(child2)
        c1_max = get_intensity(child1, "max")
        c2_max = get_intensity(child2, "max")

#        c1_maximal = get_maximal(child1)

        c1_split = get_parB_split(child1_lin, child1_num)
        c2_split = get_parB_split(child2_lin, child2_num)

        if c1_inten == 0:
            continue

        c_ratio = c1_inten / c2_inten  # ratio of intensity between children
        m_ratio = c1_max / c2_max  # ratio of max intensity between children
        l_ratio = (child1.length / child2.length)[0][0]  # ratio of child lengths
        a_ratio = (child1.area / child2.area)[0][0]  # ratio of child areas

        cwd = os.getcwd()
        twd, subdir = os.path.split(cwd)
        topdir = os.path.basename(twd)
        unique_id = hashlib.sha1(
            "{0} {1} {2}".format(topdir, subdir, parent_num).encode("utf-8")
        ).hexdigest()
        temp = [
            topdir,
            subdir,
            cell_lines[parent_num][-1].id,
            child1.id,
            child2.id,
            parent_num,
            child1_num,
            child2_num,
            c_ratio,
            m_ratio,
            l_ratio,
            a_ratio,
            parent_growth, child1_growth, child2_growth,
            parent_elong, child1_elong, child2_elong,
            c1_split, c2_split,
            parent_lin[0].length[0][0] * PX,
            child1_lin[0].length[0][0] * PX,
            child2_lin[0].length[0][0] * PX,
            parent_lin[0].area[0][0] * PX * PX,
            child1_lin[0].area[0][0] * PX * PX,
            child2_lin[0].area[0][0] * PX * PX,
            c1_inten, c2_inten,
            c1_max, c2_max,
        ]
        temp_data = pd.Series(
            data=temp, index=DATA_INDEX, name=unique_id
        )
        data = data.append(temp_data)
    return data