def _write_to_worksheet(client, ssheet, wsheet_title, rows, header, append, keys=[]): """Generic method to write a set of rows to a worksheet on google docs. """ # Convert the worksheet title to unicode wsheet_title = _to_unicode(wsheet_title) # Add a new worksheet, possibly appending or replacing a pre-existing # worksheet according to the append-flag. wsheet = g_spreadsheet.add_worksheet(client, \ ssheet, \ wsheet_title, \ len(rows) + 1, \ len(header), \ append) if wsheet is None: logger2.error("ERROR: Could not add a worksheet {!r} to " \ "spreadsheet {!r}".format(wsheet_title, ssheet.title.text)) return False # If keys are specified (will correspond to indexes in the header), delete pre-existing rows with matching keys if append and len(keys) > 0: wsheet_data = g_spreadsheet.get_cell_content(client, ssheet, wsheet, '2') wsheet_header = g_spreadsheet.get_header(client, ssheet, wsheet) try: wsheet_indexes = [wsheet_header.index(key) for key in keys] header_indexes = [header.index(key) for key in keys] except ValueError: logger2.warn("WARNING: Could not identify correct header for duplicate detection") else: for row in rows: try: key = "#".join([row[i] for i in header_indexes]) for i, wrow in enumerate(wsheet_data): wkey = "#".join([wrow[j] for j in wsheet_indexes]) if wkey == key: g_spreadsheet.delete_row(client, ssheet, wsheet, i+1) wsheet_data.pop(i) break except: logger2.warn("WARNING: Could not identify/replace duplicate rows") # Write the data to the worksheet success = g_spreadsheet.write_rows(client, ssheet, wsheet, header, rows) if success: logger2.info("Wrote data to the {!r}:{!r} " \ "worksheet".format(ssheet.title.text, wsheet_title)) else: logger2.error("ERROR: Could not write data to the {!r}:{!r} " \ "worksheet".format(ssheet.title.text, wsheet_title)) return success
def write_project_report_summary_to_gdocs(client, ssheet): """Summarize the data from the worksheets and write them to a "Summary" worksheet. """ # Summary data flowcells = {} samples = {} # Get the list of worksheets in the spreadsheet wsheet_feed = g_spreadsheet.get_worksheets_feed(client, ssheet) # Loop over the worksheets and parse the data from the ones that contain # flowcell data. for wsheet in wsheet_feed.entry: wsheet_title = wsheet.title.text if wsheet_title.endswith("_QC"): continue # Use the bcbio.solexa.flowcell.get_flowcell_info method to determine if the wsheet title contains a valid flowcell id try: bcbio.solexa.flowcell.get_flowcell_info(wsheet_title) except ValueError: continue # Get the worksheet header wsheet_header = g_spreadsheet.get_header(client, ssheet, wsheet) wsheet_data = g_spreadsheet.get_cell_content(client, ssheet, wsheet, '2') delim = ';' # Map the column names to the correct index using the header sample_col, run_col, lane_col, count_col, bc_col = [_header_index(SEQUENCING_RESULT_HEADER,col_name,wsheet_header) for col_name in ('sample_name', 'run', 'lane', 'read_count', 'barcode_sequence')] # Add the results from the worksheet to the summarized data. for row in wsheet_data: sample_name, run_name, lane_name, read_count, barcode_sequence = [row[col] if col >= 0 else None for col in (sample_col, run_col, lane_col, count_col, bc_col)] data = {"name": sample_name, "read_count": read_count, "sequence": barcode_sequence} lane = bcbio.pipeline.flowcell.Lane({"lane": lane_name}) sample = bcbio.pipeline.flowcell.BarcodedSample(data, lane) if sample_name in samples: samples[sample_name]["object"].add_sample(sample, delim) samples[sample_name]["flowcells"] += "{}{}".format(delim, run_name) if not samples[sample_name]["object"].barcode_sequence and barcode_sequence: samples[sample_name]["object"].barcode_sequence = barcode_sequence else: samples[sample_name] = {"object": sample, "flowcells": run_name} wsheet_title = "Summary" # Try getting already existing 'comment' and 'pass' values. name_data = {} existing_summary_wsheet = g_spreadsheet.get_worksheet(client, ssheet, wsheet_title) if existing_summary_wsheet: summary_header = g_spreadsheet.get_header(client, ssheet, existing_summary_wsheet) summary_data = g_spreadsheet.get_cell_content(client, ssheet, existing_summary_wsheet, '2') sample_col, comment_col, pass_col = [_header_index(SEQUENCING_RESULT_HEADER,col_name,summary_header) for col_name in ('sample_name', 'comment', 'pass')] for content in summary_data: sample_name, comment, pass_field = [content[col] if col >= 0 else None for col in (sample_col, comment_col, pass_col)] name_data[sample_name] = [comment, pass_field] # Flatten the project_data structure into a list rows = [] for sample_data in samples.values(): sample = sample_data["object"] flowcells = sample_data["flowcells"] sample_name = sample.get_name() comment, pass_field = name_data.get(sample_name, ["", ""]) row = [sample_name, \ flowcells, \ sample.get_lane(), \ sample.get_read_count(), \ sample.get_rounded_read_count(), \ sample.barcode_sequence, \ comment, \ pass_field] rows.append(row) # Write the data to the worksheet column_headers = [col_header[0] for col_header in SEQUENCING_RESULT_HEADER] return _write_to_worksheet(client, \ ssheet, \ wsheet_title, \ rows, \ column_headers, \ False)