Пример #1
0
def _write_to_worksheet(client, ssheet, wsheet_title, rows, header, append, keys=[]):
    """Generic method to write a set of rows to a worksheet on google docs.
    """
    # Convert the worksheet title to unicode
    wsheet_title = _to_unicode(wsheet_title)

    # Add a new worksheet, possibly appending or replacing a pre-existing
    # worksheet according to the append-flag.
    wsheet = g_spreadsheet.add_worksheet(client, \
                                         ssheet, \
                                         wsheet_title, \
                                         len(rows) + 1, \
                                         len(header), \
                                         append)
    if wsheet is None:
        logger2.error("ERROR: Could not add a worksheet {!r} to " \
            "spreadsheet {!r}".format(wsheet_title, ssheet.title.text))
        return False
    
    # If keys are specified (will correspond to indexes in the header), delete pre-existing rows with matching keys
    if append and len(keys) > 0:
        wsheet_data = g_spreadsheet.get_cell_content(client, ssheet, wsheet, '2')
        wsheet_header = g_spreadsheet.get_header(client, ssheet, wsheet)
        try:
            wsheet_indexes = [wsheet_header.index(key) for key in keys]
            header_indexes = [header.index(key) for key in keys]
        except ValueError:
            logger2.warn("WARNING: Could not identify correct header for duplicate detection")
        else:
            for row in rows:
                try:
                    key = "#".join([row[i] for i in header_indexes])        
                    for i, wrow in enumerate(wsheet_data):
                        wkey = "#".join([wrow[j] for j in wsheet_indexes])
                        if wkey == key:
                            g_spreadsheet.delete_row(client, ssheet, wsheet, i+1)
                            wsheet_data.pop(i)
                            break
                except:
                    logger2.warn("WARNING: Could not identify/replace duplicate rows")

    # Write the data to the worksheet
    success = g_spreadsheet.write_rows(client, ssheet, wsheet, header, rows)
    if success:
        logger2.info("Wrote data to the {!r}:{!r} " \
                     "worksheet".format(ssheet.title.text, wsheet_title))
    else:
        logger2.error("ERROR: Could not write data to the {!r}:{!r} " \
                      "worksheet".format(ssheet.title.text, wsheet_title))
    return success
Пример #2
0
def write_project_report_summary_to_gdocs(client, ssheet):
    """Summarize the data from the worksheets and write them to a "Summary"
    worksheet.
    """
    # Summary data
    flowcells = {}
    samples = {}
    # Get the list of worksheets in the spreadsheet
    wsheet_feed = g_spreadsheet.get_worksheets_feed(client, ssheet)
    # Loop over the worksheets and parse the data from the ones that contain
    # flowcell data.

    for wsheet in wsheet_feed.entry:
        wsheet_title = wsheet.title.text
        if wsheet_title.endswith("_QC"):
            continue
        
        # Use the bcbio.solexa.flowcell.get_flowcell_info method to determine if the wsheet title contains a valid flowcell id
        try:
            bcbio.solexa.flowcell.get_flowcell_info(wsheet_title)
        except ValueError:
            continue

        # Get the worksheet header
        wsheet_header = g_spreadsheet.get_header(client, ssheet, wsheet)

        wsheet_data = g_spreadsheet.get_cell_content(client, ssheet, wsheet, '2')
        delim = ';'
        # Map the column names to the correct index using the header
        sample_col, run_col, lane_col, count_col, bc_col = [_header_index(SEQUENCING_RESULT_HEADER,col_name,wsheet_header) for col_name in ('sample_name', 'run', 'lane', 'read_count', 'barcode_sequence')]
        
        # Add the results from the worksheet to the summarized data.
        for row in wsheet_data:

            sample_name, run_name, lane_name, read_count, barcode_sequence = [row[col] if col >= 0 else None for col in (sample_col, run_col, lane_col, count_col, bc_col)]
                 
            data = {"name": sample_name,
                    "read_count": read_count,
                    "sequence": barcode_sequence}

            lane = bcbio.pipeline.flowcell.Lane({"lane": lane_name})
            sample = bcbio.pipeline.flowcell.BarcodedSample(data, lane)

            if sample_name in samples:
                samples[sample_name]["object"].add_sample(sample, delim)
                samples[sample_name]["flowcells"] += "{}{}".format(delim, run_name)
                if not samples[sample_name]["object"].barcode_sequence and barcode_sequence:
                    samples[sample_name]["object"].barcode_sequence = barcode_sequence

            else:
                samples[sample_name] = {"object": sample, "flowcells": run_name}

    wsheet_title = "Summary"

    # Try getting already existing 'comment' and 'pass' values.
    name_data = {}
    existing_summary_wsheet = g_spreadsheet.get_worksheet(client, ssheet, wsheet_title)
    if existing_summary_wsheet:
        summary_header = g_spreadsheet.get_header(client, ssheet, existing_summary_wsheet)
        summary_data = g_spreadsheet.get_cell_content(client, ssheet, existing_summary_wsheet, '2')
        sample_col, comment_col, pass_col = [_header_index(SEQUENCING_RESULT_HEADER,col_name,summary_header) for col_name in ('sample_name', 'comment', 'pass')]
        
        for content in summary_data:
            sample_name, comment, pass_field = [content[col] if col >= 0 else None for col in (sample_col, comment_col, pass_col)]
            name_data[sample_name] = [comment, pass_field]

    # Flatten the project_data structure into a list
    rows = []
    for sample_data in samples.values():
        sample = sample_data["object"]
        flowcells = sample_data["flowcells"]

        sample_name = sample.get_name()
        comment, pass_field = name_data.get(sample_name, ["", ""])

        row = [sample_name, \
               flowcells, \
               sample.get_lane(), \
               sample.get_read_count(), \
               sample.get_rounded_read_count(), \
               sample.barcode_sequence, \
               comment, \
               pass_field]

        rows.append(row)

    # Write the data to the worksheet
    column_headers = [col_header[0] for col_header in SEQUENCING_RESULT_HEADER]
    return _write_to_worksheet(client, \
                               ssheet, \
                               wsheet_title, \
                               rows, \
                               column_headers, \
                               False)