示例#1
0
    def test_top_bottom(self):
        top_gct_path = os.path.join(FUNCTIONAL_TESTS_DIR, "test_merge_top.gct")
        bottom_gct_path = os.path.join(FUNCTIONAL_TESTS_DIR, "test_merge_bottom.gct")
        expected_gct_path = os.path.join(FUNCTIONAL_TESTS_DIR, "test_merged_top_bottom.gct")

        top_gct = pg.parse(top_gct_path)
        bottom_gct = pg.parse(bottom_gct_path)
        expected_gct = pg.parse(expected_gct_path)

        # Merge top and bottom
        concated_gct = cg.vstack([top_gct, bottom_gct], [], False)

        pd.util.testing.assert_frame_equal(expected_gct.data_df, concated_gct.data_df, check_names=False)
        pd.util.testing.assert_frame_equal(expected_gct.row_metadata_df, concated_gct.row_metadata_df, check_names=False)
        pd.util.testing.assert_frame_equal(expected_gct.col_metadata_df, concated_gct.col_metadata_df, check_names=False)
示例#2
0
def main(args):

    # Record start_time
    start_time = datetime.datetime.now()
    start_time_msg = "external_query_many.py started at {}".format(
        start_time.strftime('%Y-%m-%d %H:%M:%S'))

    # Create output directory
    assert os.path.exists(args.out_dir), "args.out_dir: {}".format(
        args.out_dir)

    try:

        # Read and unpack config file
        (cells, internal_gct_dir, bg_gct_dir,
         fields_to_aggregate_for_internal_profiles, similarity_metric,
         connectivity_metric) = read_config_file(args.psp_on_clue_config_path)

        # Read in the external profiles only once
        external_gct = parse(args.external_gct_path)

        # If requested, do introspect
        (_, introspect_gct) = introspect.do_steep_and_sip(
            external_gct, similarity_metric, connectivity_metric,
            args.fields_to_aggregate_for_external_profiles)

        # Write introspect result
        actual_out_introspect_name = os.path.join(args.out_dir,
                                                  OUT_INTROSPECT_NAME)
        wg.write(introspect_gct,
                 actual_out_introspect_name,
                 data_null="NaN",
                 metadata_null="NaN",
                 filler_null="NaN")

        # Initialize list to store connectivity gcts
        list_of_conn_gcts = []

        # Loop over cell lines in corpus
        for cell in cells:

            # Import gct with the internal profiles for this cell line
            internal_gct_path = os.path.join(
                internal_gct_dir,
                INTERNAL_GCT_FORMAT.format(assay=args.assay, cell=cell))
            internal_gct = parse(internal_gct_path)

            # Import gct with the similarity matrix for this cell line
            bg_gct_path = os.path.join(
                bg_gct_dir, BG_GCT_FORMAT.format(assay=args.assay, cell=cell))
            bg_gct = parse(bg_gct_path)

            (sim_gct, conn_gct) = eq.do_steep_and_sip(
                external_gct, internal_gct, bg_gct, "spearman", "ks_test",
                args.fields_to_aggregate_for_external_profiles,
                fields_to_aggregate_for_internal_profiles)

            # Append this connectivity gct
            list_of_conn_gcts.append(conn_gct)

            # Write all output gcts if requested
            if args.all:
                out_steep_name = os.path.join(
                    args.out_dir, OUT_STEEP_FORMAT.format(cell=cell))
                out_sip_name = os.path.join(args.out_dir,
                                            OUT_SIP_FORMAT.format(cell=cell))

                wg.write(sim_gct, out_steep_name)
                wg.write(conn_gct, out_sip_name)

        # Concatenate connectivity GCTs
        concated = cg.vstack(list_of_conn_gcts)
        actual_out_concated_name = os.path.join(args.out_dir,
                                                OUT_CONCATED_NAME)

        # Write concatenated result
        wg.write(concated,
                 actual_out_concated_name,
                 data_null="NaN",
                 filler_null="NaN",
                 metadata_null="NaN")

        # Write success.txt with timestamp
        success_path = os.path.join(args.out_dir, "success.txt")
        write_success(success_path, start_time_msg)

        # Return how much time it took
        end_time = datetime.datetime.now()
        seconds_elapsed = (end_time - start_time).seconds
        logger.info("external_query_many.py completed in {:.0f} sec.".format(
            seconds_elapsed))

    except Exception:
        failure_path = os.path.join(args.out_dir, "failure.txt")
        msg = "external_query_many.py failed. See {} for stacktrace.".format(
            failure_path)

        # Write failure.txt
        write_failure(failure_path, start_time_msg)

        # Raise exception
        logger.error(msg)
        raise Exception(msg)

    return None