Exemplo n.º 1
0

def setup_upload_from_s3(rel_fp, recursive=True):
    s3_fp = 's3://mousebrainatlas-data/' + rel_fp
    local_fp = os.environ['ROOT_DIR'] + rel_fp

    if recursive:
        run('aws s3 cp --recursive {0} {1}'.format(local_fp, s3_fp))
    else:
        run('aws s3 cp {0} {1}'.format(local_fp, s3_fp))


yamlfile = os.environ['REPO_DIR'] + args.yaml
params = configuration(yamlfile).getParams()

extractor = patch_extractor(params)
images_all = pd.DataFrame()

fp = os.path.join('CSHL_data_processed', stack,
                  stack + '_sorted_filenames.txt')
setup_download_from_s3(fp, recursive=False)
with open(os.environ['ROOT_DIR'] + fp, 'r') as f:
    fn_idx_tuples = [line.strip().split() for line in f.readlines()]
    section_to_filename = {int(idx): fn for fn, idx in fn_idx_tuples}

db_dir = 'CSHL_databases/'
if not os.path.exists(os.environ['ROOT_DIR'] + db_dir):
    os.mkdir(os.environ['ROOT_DIR'] + db_dir)
db_dir += stack + '/'
if not os.path.exists(os.environ['ROOT_DIR'] + db_dir):
    os.mkdir(os.environ['ROOT_DIR'] + db_dir)
Exemplo n.º 2
0
def generator(structure, state, threshold, cell_dir, patch_dir, stack, params):
    for state in [state]:
        t1 = time()
        extractor = patch_extractor(params)
        #savepath = cell_dir + 'Properties/' + structure + '/'
        #img_path = cell_dir + 'Images/' + structure + '/'
        savepath = cell_dir + structure + '/'
        pkl_out_file = savepath+stack+'_'+structure+'_'+state+'.pkl'
        #img_out_file = img_path+stack+'_'+structure+'_'+state+'_images.pkl'
        if os.path.exists(os.environ['ROOT_DIR']+pkl_out_file):
            print(structure +'_'+state+ ' ALREADY EXIST')
            continue
        else:
            if not os.path.exists(os.environ['ROOT_DIR']+savepath):
                os.mkdir(os.environ['ROOT_DIR']+savepath)
                #os.mkdir(img_path)

        if structure == '7nn':
            structure = '7n'

        if state=='positive':
            setup_download_from_s3(patch_dir+structure)
            patches = [dir for dir in glob(os.environ['ROOT_DIR']+patch_dir+structure+'/*')]
        else:
            setup_download_from_s3(patch_dir+structure+'_surround_500um_noclass')
            patches = [dir for dir in glob(os.environ['ROOT_DIR']+patch_dir+structure+'_surround_500um_noclass/*')]

        features=[]

        n_choose = min(len(patches), 1000)
        indices_choose = np.random.choice(range(len(patches)), n_choose, replace=False)
        patches = np.array(patches)
        patches = patches[indices_choose]

        for i in range(len(patches)):
            tile=cv2.imread(patches[i],0)
            # contours, _ = cv2.findContours(tile.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
            # if state=='positive':
            #     if len(contours)==1:
            #         object_area = cv2.contourArea(contours[0])
            #     else:
            #         areas=[]
            #         for j in range(len(contours)):
            #             areas.extend([cv2.contourArea(contours[j])])
            #         object_area = max(areas)
            # else:
            #     if len(contours)==2:
            #         object_area = cv2.contourArea(contours[0])-cv2.contourArea(contours[1])
            #     else:
            #         areas=[]
            #         for j in range(len(contours)):
            #             areas.extend([cv2.contourArea(contours[j])])
            #         areas=np.sort(areas)
            #         object_area = areas[-1]-areas[-2]
            
            if params['preprocessing']['polarity']==-1:
                tile = 255-tile
            min_std=params['preprocessing']['min_std']
            _std = np.std(tile.flatten())
            
            extracted = []
            if _std < min_std:
                print('image',patches[i],'std=',_std, 'too blank')
                # features.append([0] * 201)
                features.append([0] * 1982)
                # features.append([0] * 1581)
            else:
                try:
                    Stats = extractor.segment_cells(tile)
                    cells = extractor.extract_blobs(Stats,tile)
                    cells = pd.DataFrame(cells)
                    cells = cells[cells['padded_patch'].notnull()]
                    cells = cells.drop(['padded_patch','left','top'],1)
                    cells = np.asarray(cells)
                    for k in range(len(cells)):
                        cells[k][0] = cells[k][0][:10]
                    origin = np.concatenate((np.array(list(cells[:,0])),cells[:,1:]),axis=1)
                    for k in range(origin.shape[1]):
                        x, y = CDF(origin[:,k])
                        # ten = [x[np.argmin(np.absolute(y - 0.1*(1+j)))] for j in range(10)]
                        ten = [y[np.argmin(np.absolute(x - threshold[k][j]))] for j in range(99)]
                        # ten = [y[np.argmin(np.absolute(x-threshold[10*k+j]))] for j in range(10)]
                        extracted.extend(ten)
                    extracted.extend([cells.shape[0]])
                    extracted.extend([origin[:,10].sum()/(224*224)])
                    features.append(extracted)
                except:
                    continue
            if i%10==0:
                count = len(features)
                print(structure + '_' + state, count, i, '/', len(patches))
                    
#                 Stats=extractor.segment_cells(tile)
#                 extracted= extractor.extract_blobs(Stats,tile)
#                 cells.extend(extracted)
                    # for j in range(len(extracted)):
                    #     try:
                    #         filename=savepath+str(extracted[j]['padded_size'])+'/'+str(count)+'.tif'
                    #         count+=1
                    #         img=extracted[j]['padded_patch']
                    #         img=img/img.max()*255
                    #         img=img.astype(np.uint8)
                    #         cv2.imwrite(filename, img)
                    #     except:
                    #         continue
                

#         cells = pd.DataFrame(cells)
#         cells = cells[cells['padded_patch'].notnull()]
#         images = cells[['padded_size','padded_patch']]
#         cells = cells.drop('padded_patch',1)
        count = len(features)
        print(structure + '_' + state, count)
#         cells.to_pickle(pkl_out_file)
#         images.to_pickle(img_out_file)
        pickle.dump(features, open(os.environ['ROOT_DIR']+pkl_out_file, 'wb'))
        setup_upload_from_s3(pkl_out_file, recursive=False)
        #s3_directory = 's3://mousebrainatlas-data/CSHL_cells_dm/'+stack+'/'+structure+'/'
        #run('aws s3 cp {0} {1}'.format(pkl_out_file,s3_directory))
        print(structure + '_'+state+ ' finished in %5.1f seconds' % (time() - t1))
Exemplo n.º 3
0
def setup_upload_from_s3(rel_fp, recursive=True):
    s3_fp = 's3://mousebrainatlas-data/' + rel_fp
    local_fp = os.environ['ROOT_DIR'] + rel_fp

    if recursive:
        run('aws s3 cp --recursive {0} {1}'.format(local_fp, s3_fp))
    else:
        run('aws s3 cp {0} {1}'.format(local_fp, s3_fp))


yamlfile = args.yaml
params = configuration(yamlfile).getParams()
# extractor = patch_extractor(params, dm=False)
extractor = patch_extractor(params,
                            dm=True,
                            stem=os.path.join(os.environ['ROOT_DIR'],
                                              'diffusionmap', stack,
                                              'diffusionMap'))
images_all = pd.DataFrame()
if stack != 'DK39':
    transform = pickle.load(
        open(
            os.path.join(os.environ['ROOT_DIR'], 'diffusionmap', stack,
                         'transform.pkl'), 'rb'))

db_dir = 'CSHL_databases/'
if not os.path.exists(os.environ['ROOT_DIR'] + db_dir):
    os.mkdir(os.environ['ROOT_DIR'] + db_dir)
db_dir += stack + '/'
if not os.path.exists(os.environ['ROOT_DIR'] + db_dir):
    os.mkdir(os.environ['ROOT_DIR'] + db_dir)
Exemplo n.º 4
0
def image_generator(section, savepath, features_fn, cell_dir, cell2_dir, param, params, num_round, step_size,\
                    contours_grouped, raw_images_root, section_to_filename, all_structures, thresholds, valid_sections):
    t1 = time()
    img_fn = raw_images_root + section_to_filename[
        section] + '_prep2_lossless_gray.tif'
    setup_download_from_s3(img_fn, recursive=False)
    img = cv2.imread(os.environ['ROOT_DIR'] + img_fn, 2)
    m, n = img.shape
    margin = 200 / 0.46
    extractor = patch_extractor(params)

    polygons = [(contour['name'], contour['vertices']) \
                for contour_id, contour in contours_grouped.get_group(section).iterrows()]

    # grid_fn = features_fn + str(section) + '.pkl'
    # try:
    #     setup_download_from_s3(grid_fn, recursive=False)
    #     grid_features = pickle.load(open(os.environ['ROOT_DIR']+grid_fn,'rb'))
    #     NotUpload = False
    # except:
    #     grid_features = {}
    #     NotUpload = True

    count = 0
    Scores = {}
    for contour_id, contour in polygons:
        structure = contour_id
        if structure not in all_structures:
            continue
        polygon = contour.copy()
        Scores[structure] = {}

        if structure == '7n':
            structure = '7nn'

        fp = []
        fp.append(cell_dir + structure + '/MD589_' + structure +
                  '_positive.pkl')
        fp.append(cell_dir + structure + '/MD589_' + structure +
                  '_negative.pkl')
        X_train = []
        y_train = []
        for state in range(2):
            clouds = pickle.load(open(fp[state], 'rb'))
            X_train.extend(np.array(clouds))
            y_train.extend([1 - state] * len(clouds))

        fp = []
        fp.append(cell2_dir + structure + '/MD585_' + structure +
                  '_positive.pkl')
        fp.append(cell2_dir + structure + '/MD585_' + structure +
                  '_negative.pkl')
        for state in range(2):
            clouds = pickle.load(open(fp[state], 'rb'))
            X_train.extend(np.array(clouds))
            y_train.extend([1 - state] * len(clouds))
        X_train = np.array(X_train)
        y_train = np.array(y_train)
        dtrain = xgb.DMatrix(X_train, label=y_train)
        bst = xgb.train(param, dtrain, num_round, verbose_eval=False)

        if structure == '7nn':
            structure = '7n'

        [left, right, up, down] = [
            int(max(min(polygon[:, 0]) - margin, 0)),
            int(min(np.ceil(max(polygon[:, 0]) + margin), n - 1)),
            int(max(min(polygon[:, 1]) - margin, 0)),
            int(min(np.ceil(max(polygon[:, 1]) + margin), m - 1))
        ]
        xs, ys = np.meshgrid(np.arange(left, right + 1),
                             np.arange(up, down + 1),
                             indexing='xy')
        locations = np.c_[xs.flat, ys.flat]

        path = Path(polygon)
        indices_inside = np.where(path.contains_points(locations))[0]
        indices_in = locations[indices_inside]
        x_raw = indices_in[:, 0] - left
        y_raw = indices_in[:, 1] - up
        mask = np.zeros((down - up + 1, right - left + 1))
        for i in range(len(indices_in)):
            mask[y_raw[i], x_raw[i]] = 1
        mask = mask.astype(np.uint8)

        Scores[structure][str(section) + '_positive'] = {}
        x_shift = []
        y_shift = []
        z_shift = []
        for i in range(-10, 11):
            try:
                nleft = int(max(left + i * step_size, 0))
                nright = int(min(right + i * step_size, n - 1))
                patch = img[up:down + 1, nleft:nright + 1] * mask[:,int(nleft -left-i*step_size):\
                                                             int(nleft -left-i*step_size)+nright - nleft + 1]
                # grid_index = str(section) + '_' + structure + '_' + 'postive_x_'+str(i)
                # if grid_index in grid_features.keys():
                #     extracted = grid_features[grid_index]
                # else:
                extracted = features_extractor(patch, 'positive', params,
                                               extractor, thresholds)
                # grid_features[grid_index] = extracted

                xtest = xgb.DMatrix(extracted)
                score = bst.predict(xtest,
                                    output_margin=True,
                                    ntree_limit=bst.best_ntree_limit)
                x_shift.append(score)
            except:
                x_shift.append(0)

            try:
                nup = int(max(up + i * step_size, 0))
                ndown = int(min(down + i * step_size, m - 1))
                patch = img[nup:ndown + 1, left:right + 1] * mask[int(nup -up-i*step_size):\
                                                             int(nup -up-i*step_size)+ndown - nup + 1, :]
                # grid_index = str(section) + '_' + structure + '_' + 'postive_y_' + str(i)
                # if grid_index in grid_features.keys():
                #     extracted = grid_features[grid_index]
                # else:
                extracted = features_extractor(patch, 'positive', params,
                                               extractor, thresholds)
                # grid_features[grid_index] = extracted

                xtest = xgb.DMatrix(extracted)
                score = bst.predict(xtest,
                                    output_margin=True,
                                    ntree_limit=bst.best_ntree_limit)
                y_shift.append(score)
            except:
                y_shift.append(0)

            loc_z = section + i * 2
            if loc_z in valid_sections:
                sec_fn = raw_images_root + section_to_filename[
                    loc_z] + '_prep2_lossless_gray.tif'
                setup_download_from_s3(sec_fn, recursive=False)
                sec = cv2.imread(os.environ['ROOT_DIR'] + sec_fn, 2)
                try:
                    patch = sec[up:down + 1, left:right + 1] * mask
                    extracted = features_extractor(patch, 'positive', params,
                                                   extractor, thresholds)
                    xtest = xgb.DMatrix(extracted)
                    score = bst.predict(xtest,
                                        output_margin=True,
                                        ntree_limit=bst.best_ntree_limit)
                    z_shift.append(score)
                except:
                    z_shift.append(0)
            else:
                z_shift.append(0)

        Scores[structure][str(section) + '_positive']['x'] = x_shift
        Scores[structure][str(section) + '_positive']['y'] = y_shift
        Scores[structure][str(section) + '_positive']['z'] = z_shift

        surround = Polygon(polygon).buffer(margin, resolution=2)
        path = Path(list(surround.exterior.coords))

        indices_sur = np.where(path.contains_points(locations))[0]
        indices_outside = np.setdiff1d(indices_sur, indices_inside)
        indices_out = locations[indices_outside]

        x_raw = indices_out[:, 0] - left
        y_raw = indices_out[:, 1] - up
        mask = np.zeros((down - up + 1, right - left + 1))
        for i in range(len(indices_out)):
            mask[y_raw[i], x_raw[i]] = 1
        mask = mask.astype(np.uint8)

        Scores[structure][str(section) + '_negative'] = {}
        x_shift = []
        y_shift = []
        z_shift = []
        for i in range(-10, 11):
            try:
                nleft = int(max(left + i * step_size, 0))
                nright = int(min(right + i * step_size, n - 1))
                patch = img[up:down + 1, nleft:nright + 1] * mask[:, int(nleft - left - i * step_size): \
                                                                int(nleft - left - i * step_size) + nright - nleft + 1]
                # grid_index = str(section) + '_' + structure + '_' + 'negative_x_' + str(i)
                # if grid_index in grid_features.keys():
                #     extracted = grid_features[grid_index]
                # else:
                extracted = features_extractor(patch, 'negative', params,
                                               extractor, thresholds)
                # grid_features[grid_index] = extracted

                xtest = xgb.DMatrix(extracted)
                score = bst.predict(xtest,
                                    output_margin=True,
                                    ntree_limit=bst.best_ntree_limit)
                x_shift.append(score)
            except:
                x_shift.append(0)

            try:
                nup = int(max(up + i * step_size, 0))
                ndown = int(min(down + i * step_size, m - 1))
                patch = img[nup:ndown + 1, left:right + 1] * mask[int(nup - up - i * step_size): \
                                                                  int(nup - up - i * step_size) + ndown - nup + 1, :]
                # grid_index = str(section) + '_' + structure + '_' + 'negative_y_' + str(i)
                # if grid_index in grid_features.keys():
                #     extracted = grid_features[grid_index]
                # else:
                extracted = features_extractor(patch, 'negative', params,
                                               extractor, thresholds)
                #     grid_features[grid_index] = extracted

                xtest = xgb.DMatrix(extracted)
                score = bst.predict(xtest,
                                    output_margin=True,
                                    ntree_limit=bst.best_ntree_limit)
                y_shift.append(score)
            except:
                y_shift.append(0)

            loc_z = section + i * 2
            if loc_z in valid_sections:
                sec_fn = raw_images_root + section_to_filename[
                    loc_z] + '_prep2_lossless_gray.tif'
                # setup_download_from_s3(sec_fn, recursive=False)
                sec = cv2.imread(os.environ['ROOT_DIR'] + sec_fn, 2)
                # os.remove(os.environ['ROOT_DIR'] + sec_fn)
                try:
                    patch = sec[up:down + 1, left:right + 1] * mask
                    extracted = features_extractor(patch, 'positive', params,
                                                   extractor, thresholds)
                    xtest = xgb.DMatrix(extracted)
                    score = bst.predict(xtest,
                                        output_margin=True,
                                        ntree_limit=bst.best_ntree_limit)
                    z_shift.append(score)
                except:
                    z_shift.append(0)
            else:
                z_shift.append(0)

        Scores[structure][str(section) + '_negative']['x'] = x_shift
        Scores[structure][str(section) + '_negative']['y'] = y_shift
        Scores[structure][str(section) + '_negative']['z'] = z_shift

        count += 1
        print(section, structure, count, '/', len(polygons))

    # if NotUpload:
    #     pickle.dump(grid_features, open(os.environ['ROOT_DIR'] + grid_fn, 'wb'))
    #     setup_upload_from_s3(grid_fn, recursive=False)
    filename = savepath + str(section) + '.pkl'
    pickle.dump(Scores, open(os.environ['ROOT_DIR'] + filename, 'wb'))
    setup_upload_from_s3(filename, recursive=False)
    shutil.rmtree(os.environ['ROOT_DIR'] + raw_images_root)
    # os.remove(os.environ['ROOT_DIR']+img_fn)
    print(str(section) + ' finished in %5.1f seconds' % (time() - t1))
Exemplo n.º 5
0
def image_generator(section, savepath, features_fn, cell_dir, param, params, num_round, half_size,\
                    contours_grouped, raw_images_root, section_to_filename, all_patch_locations, thresholds):
    t1 = time()
    img_fn = raw_images_root + section_to_filename[section] + '_prep2_lossless_gray.tif'
    setup_download_from_s3(img_fn, recursive=False)
    img = cv2.imread(os.environ['ROOT_DIR']+img_fn, 2)
    m, n = img.shape
    extractor = patch_extractor(params)

    polygons = [(contour['name'], contour['vertices']) \
                for contour_id, contour in contours_grouped.get_group(section).iterrows()]

    grid_fn = features_fn + str(section) + '.pkl'
    try:
        setup_download_from_s3(grid_fn, recursive=False)
        grid_features = pickle.load(open(os.environ['ROOT_DIR']+grid_fn,'rb'))
        NotUpload = False
    except:
        grid_features = {}
        NotUpload = True

    count = 0
    for contour_id, contour in polygons:
        structure = contour_id
        if structure not in all_patch_locations[section].keys():
            continue
        polygon = contour.copy()
        grid_features[structure] = {}

        if structure == '7n':
            structure = '7nn'

        threshold = thresholds[structure]

        subpath = savepath + structure + '/'
        if not os.path.exists(os.environ['ROOT_DIR']+subpath):
            os.mkdir(os.environ['ROOT_DIR']+subpath)

        fp = []
        fp.append(cell_dir + structure + '/MD589_' + structure + '_positive.pkl')
        fp.append(cell_dir + structure + '/MD589_' + structure + '_negative.pkl')
        features = []
        labels = []
        for state in range(2):
            clouds = pickle.load(open(fp[state], 'rb'))
            features.extend(np.array(clouds))
            labels.extend([1 - state] * len(clouds))
        features = np.array(features)
        labels = np.array(labels)
        X_train = features
        y_train = labels
        dtrain = xgb.DMatrix(X_train, label=y_train)
        bst = xgb.train(param, dtrain, num_round, verbose_eval=False)

        if structure == '7nn':
            structure = '7n'

        negative = structure + '_surround_500um_noclass'

        [left, right, up, down] = [int(max(min(all_patch_locations[section][negative][:, 0]) - half_size, 0)),
                                   int(min(np.ceil(max(all_patch_locations[section][negative][:, 0]) + half_size),
                                           n - 1)),
                                   int(max(min(all_patch_locations[section][negative][:, 1]) - half_size, 0)),
                                   int(min(np.ceil(max(all_patch_locations[section][negative][:, 1]) + half_size),
                                           m - 1))]

        xs, ys = np.meshgrid(np.arange(left + half_size, right - half_size + 1, half_size * 2),
                             np.arange(up + half_size, down - half_size + 1, half_size * 2), indexing='xy')
        locations = np.c_[xs.flat, ys.flat]
        inside = all_patch_locations[section][structure]
        all_rows = locations.view([('', locations.dtype)] * locations.shape[1])
        inside_rows = inside.view([('', inside.dtype)] * inside.shape[1])
        outside = np.setdiff1d(all_rows, inside_rows).view(locations.dtype).reshape(-1, locations.shape[1])
        windows = []
        windows.append(inside)
        windows.append(outside)
        polygon[:, 0] = polygon[:, 0] - left
        polygon[:, 1] = polygon[:, 1] - up

        hsv = np.zeros([down - up + 1, right - left + 1, 3])
        hsv[:, :, 2] = 1
        for state in range(2):
            for index in range(len(windows[state])):
                try:
                    x = int(float(windows[state][index][0]))
                    y = int(float(windows[state][index][1]))
                    patch = img[y - half_size:y + half_size, x - half_size:x + half_size].copy()
                    grid_index = str(section)+'_'+str(x)+'_'+str(y)
                    if grid_index in grid_features[structure].keys():
                        extracted = grid_features[structure][grid_index]
                    else:
                        extracted = features_extractor(patch, params, extractor, threshold)
                        grid_features[structure][grid_index] = extracted

                    xtest = xgb.DMatrix(extracted)
                    score = bst.predict(xtest, output_margin=True, ntree_limit=bst.best_ntree_limit)
                    value_img = patch / 255
                    hsv[y - half_size - up:y + half_size - up, x - half_size - left:x + half_size - left, 2] = value_img
                    satua_img = np.zeros_like(value_img) + score
                    origin = hsv[y - half_size - up:y + half_size - up, x - half_size - left:x + half_size - left, 1]
                    comp = np.absolute(origin) - np.absolute(satua_img)
                    hsv[y - half_size - up:y + half_size - up, x - half_size - left:x + half_size - left, \
                        1] = origin * (comp > 0) + satua_img * (comp < 0)
                except:
                    continue
        hsv[:, :, 0] = (hsv[:, :, 1] < 0) * 0.66 + (hsv[:, :, 1] > 0) * 1.0
        hsv[:, :, 1] = np.absolute(hsv[:, :, 1])
        hsv[:, :, 1] = (hsv[:, :, 1] - hsv[:, :, 1].min()) / (hsv[:, :, 1].max() - hsv[:, :, 1].min()) * 0.8 + 0.2
        rgb = skimage.color.hsv2rgb(hsv)
        rgb = rgb * 255
        rgb = rgb.astype(np.uint8)
        com = cv2.polylines(rgb.copy(), [polygon.astype(np.int32)], True, [0, 255, 0], 15, lineType=8)
        filename = subpath + structure + '_' + str(section) + '.tif'
        cv2.imwrite(os.environ['ROOT_DIR']+filename, com)
        setup_upload_from_s3(filename, recursive=False)
        count += 1
        print(section, structure, count, '/', len(polygons))
    if NotUpload:
        pickle.dump(grid_features, open(os.environ['ROOT_DIR'] + grid_fn, 'wb'))
        setup_upload_from_s3(grid_fn, recursive=False)
    os.remove(os.environ['ROOT_DIR']+img_fn)
    print(str(section) + ' finished in %5.1f seconds' % (time() - t1))