def process(): lin_files = sorted(glob.glob("data/cell_lines/lineage*.npy")) spots = [] for lf in lin_files: lineage_num = int(re.search("lineage(\d+).npy", lf).group(1)) cell_line = np.load(lf) elongation_rate = shared.get_elongation_rate(cell_line, discard=True) if not elongation_rate or len(cell_line) < THRESHOLD_FRAMES: continue parB_paths = shared.get_parB_path(cell_line, cell_line[0].T, lineage_num) spot_num = 1 for path in parB_paths: spot_trace = path.spots() lengths = list(path.len()) idx = 0 spot_data = { "t": [], "intensity": [], "cell_length": lengths, "x_mid": [], "x_new": [], "x_old": [], "pole_known": cell_line[0].pole_assignment, "spot_num": spot_num, "lineage_num": lineage_num, } for x in spot_trace: l = lengths[idx] x_M = x[1] x_N = x[1] + (l / 2) x_O = l - x_N spot_data["t"].append(x[0]) spot_data["intensity"].append(x[2]) spot_data["x_mid"].append(x_M) spot_data["x_new"].append(x_N) spot_data["x_old"].append(x_O) idx += 1 spot_num += 1 if len(spot_data["t"]) >= THRESHOLD_FRAMES: spots.append(spot_data) # calculate diffusion parameters d_mid, d_new, d_old = diffusion(spot_data) s = pd.DataFrame(spots) print(s)
def get_traces(orig_dir=None, two_spot=False, reuse=True): data_hash = hashlib.sha1(os.getcwd().encode("utf8")).hexdigest() if reuse and orig_dir and os.path.exists(os.path.join(orig_dir, "ParB_velocity", "data", data_hash)): data_dir = os.path.join(orig_dir, "ParB_velocity", "data", data_hash) files = sorted(glob.glob(os.path.join(data_dir, "*.pandas"))) spot_data = [] progress = progressbar.ProgressBar() for f in progress(files): spot_data.append(pd.read_pickle(f)) return spot_data lin_files = sorted(glob.glob("data/cell_lines/lineage*.npy")) lineage_nums = [int(re.search("lineage(\d+).npy", x).group(1)) for x in lin_files] spot_data = [] progress = progressbar.ProgressBar() for lineage_num, lf in progress(list(zip(lineage_nums, lin_files))): cell_line = np.load(lf) if not hasattr(cell_line[0], "pole_assignment") or cell_line[0].pole_assignment is None: continue # pole_assignment = cell_line[0].pole_assignment T = cell_line[0].T paths = shared.get_parB_path(cell_line, T, lineage_num) if two_spot: if len(paths) != 3: continue if len(cell_line[0].ParB) != 1: continue cell_elongation_rate = shared.get_elongation_rate(cell_line) if cell_elongation_rate and cell_elongation_rate < 0: cell_elongation_rate = 0 spot_num = 1 for path in paths: # path.positions: distance from midcell spot_trace = path.spots() timing = [] d_mid = [] d_parA = [] intensity = [] lengths = path.len() for x in spot_trace: timing.append(x[0]) d_mid.append(x[1]) intensity.append(x[2]) c_idx = list(cell_line[0].t).index(x[0]) cell = cell_line[c_idx] parA_mid = cell.ParA[0] - (cell.length[0][0] / 2) dparA = np.abs(parA_mid - x[1]) d_parA.append(dparA) data = pd.DataFrame( data={ "timing": timing, "d_mid": d_mid, # negative = closer to new pole "d_parA": d_parA, "intensity": intensity, "cell_length": lengths, }, ) data["d_new"] = data.d_mid + (data.cell_length / 2) data["d_old"] = data.cell_length - data.d_new path, subdir = os.path.split(os.getcwd()) topdir = os.path.basename(path) data._path = os.getcwd() data._top_dir = topdir data._sub_dir = subdir data._lineage_num = lineage_num data._spot_num = spot_num data._cell_line_id = cell_line[0].id data._elongation_rate = cell_elongation_rate data._hash = hashlib.sha256("{0}-{1}-{2}-{3}".format( topdir, subdir, lineage_num, spot_num, ).encode("utf-8")).hexdigest() data._metadata = [ "_path", "_top_dir", "_sub_dir", "_lineage_num", "_spot_num", "_cell_line_id", "_elongation_rate", "_hash" ] if orig_dir: target_dir = os.path.join(orig_dir, "ParB_velocity", "data", data_hash) if not os.path.exists(target_dir): os.makedirs(target_dir) data.to_pickle(os.path.join( target_dir, "{0:03d}-{1:03d}.pandas".format(lineage_num, spot_num) )) spot_data.append(data) spot_num += 1 return spot_data
def process(): lin_files = sorted(glob.glob("data/cell_lines/lineage*.npy")) lookup = json.loads(open("ancestry.json").read()) siblings = {} # (mother_lin, daughter_lin, daughter_lin cell_lines = {} data = pd.DataFrame(columns=DATA_INDEX) for l in lin_files: c = np.load(l) mother_lin = lookup[c[0].id] cell_lines[mother_lin] = c if c[-1].children: siblings[lookup[c[0].id]] = (lookup[c[-1].children[0]], lookup[c[-1].children[1]]) for parent_num in sorted(siblings.keys()): child1_num, child2_num = siblings[parent_num] # parent = cell_lines[parent_num][-1] # make child1 the smaller cell child1 = cell_lines[child1_num][0] child2 = cell_lines[child2_num][0] if child1.length < child2.length: child2_num, child1_num = siblings[parent_num] child1 = cell_lines[child1_num][0] child2 = cell_lines[child2_num][0] parent_lin = cell_lines[parent_num] parent_growth = shared.get_growth_rate(parent_lin) parent_elong = shared.get_elongation_rate(parent_lin) child1_lin = cell_lines[child1_num] child1_growth = shared.get_growth_rate(child1_lin) child1_elong = shared.get_elongation_rate(child1_lin) child2_lin = cell_lines[child2_num] child2_growth = shared.get_growth_rate(child2_lin) child2_elong = shared.get_elongation_rate(child2_lin) c1_inten = get_intensity(child1) c2_inten = get_intensity(child2) c1_max = get_intensity(child1, "max") c2_max = get_intensity(child2, "max") # c1_maximal = get_maximal(child1) c1_split = get_parB_split(child1_lin, child1_num) c2_split = get_parB_split(child2_lin, child2_num) if c1_inten == 0: continue c_ratio = c1_inten / c2_inten # ratio of intensity between children m_ratio = c1_max / c2_max # ratio of max intensity between children l_ratio = (child1.length / child2.length)[0][0] # ratio of child lengths a_ratio = (child1.area / child2.area)[0][0] # ratio of child areas cwd = os.getcwd() twd, subdir = os.path.split(cwd) topdir = os.path.basename(twd) unique_id = hashlib.sha1( "{0} {1} {2}".format(topdir, subdir, parent_num).encode("utf-8") ).hexdigest() temp = [ topdir, subdir, cell_lines[parent_num][-1].id, child1.id, child2.id, parent_num, child1_num, child2_num, c_ratio, m_ratio, l_ratio, a_ratio, parent_growth, child1_growth, child2_growth, parent_elong, child1_elong, child2_elong, c1_split, c2_split, parent_lin[0].length[0][0] * PX, child1_lin[0].length[0][0] * PX, child2_lin[0].length[0][0] * PX, parent_lin[0].area[0][0] * PX * PX, child1_lin[0].area[0][0] * PX * PX, child2_lin[0].area[0][0] * PX * PX, c1_inten, c2_inten, c1_max, c2_max, ] temp_data = pd.Series( data=temp, index=DATA_INDEX, name=unique_id ) data = data.append(temp_data) return data