def add_nested_ratings(self, ratings, row, col): rating_rows = [] for info in ratings: weight, rating = info[0:2] rating_rows.append(row) self.rating_ws.write(row, col, weight) self.rating_ws.write(row, col + 1, rating) if len(info) > 2: # add sub-ratings rows, last_row = self.add_nested_ratings(info[2], row + 1, col + 1) # now set this rating's score to the product of the children col_name = xl_col_to_name(col + 1) for i in xrange(self.n_columns): rating_col_name = xl_col_to_name(self.rating_col(i)) # weight * score formula = '+'.join('%s%s*%s%s' % (col_name, r + 1, rating_col_name, r + 1) for r in rows) self.rating_ws.write_formula(row, self.rating_col(i), formula) row = last_row else: # actual rating score_row = self.score_row[rating] for i in xrange(self.n_columns): cell = xl_rowcol_to_cell(score_row, self.score_col(i), row_abs=True, col_abs=True) self.rating_ws.write(row, self.rating_col(i), '=Raw!%s' % cell) row += 1 return rating_rows, row
def periodLine(self): for jChart in self.workBook.__dict__.get('charts'): jSeries = jChart.__getattribute__('series')[0] jCategories = jSeries['categories'] # if jSeries['values'] =='=данные!$' + xl_col_to_name(self.numCols-1) + '$2:$' + xl_col_to_name(self.numCols-1) + '$' + str(self.numRows): jChart.set_y2_axis({ 'label_position': 'right', 'max': 1, 'visible': None, 'major_unit': 1 }) jChart.add_series({ 'name': '=данные!$' + xl_col_to_name(self.numCols - 1) + '$1', 'categories': jCategories, 'values': '=данные!$' + xl_col_to_name(self.numCols - 1) + '$2:$' + xl_col_to_name(self.numCols - 1) + '$' + str(self.numRows), 'y2_axis': True, 'line': { 'dash_type': 'dash', 'width': 0.5 } })
def insertChart(self, xCol, yCol): self.addSheet("графики") self.returnWorksheet("графики") for i in yCol: cCol = xl_col_to_name(i) self.chart = self.workBook.add_chart({'type': 'line'}) self.chart.add_series({ 'categories': '=данные!$' + xl_col_to_name(xCol) + '$2:$' + xl_col_to_name(xCol) + '$' + str(self.numRows), 'values': '=данные!$' + cCol + '$2:$' + cCol + '$' + str(self.numRows), 'name': '=данные!$' + cCol + '$1', 'line': { 'color': '#008B8B' } }) self.chart.__dict__.__setitem__('id', self.textData[0][i]) self.chart.set_x_axis({'num_font': {'rotation': -90}}) self.chart.set_legend({'position': 'bottom'}) self.chart.set_title({'none': True}) curXPosition = (i // 3) * 8 + 3 curYPosition = (i % 3) * 16 + 2 self.activeSheet.insert_chart( xl_col_to_name(curXPosition) + str(curYPosition), self.chart) self.chart = None
def header2column(oldFormula): newFormula = oldFormula for columnHeader in re.findall("\$(\w+)", newFormula): for column in [ column for column, columnName in enumerate(table_body) if columnName == columnHeader ]: newcolumnHeader = xl_col_to_name(column) + ":$" + xl_col_to_name( column) newFormula = newFormula.replace(columnHeader, newcolumnHeader) return newFormula
def locate_col(match): sign = match.group('sign') offset = match.group('offset') try: offset = int(offset) except ValueError: return '' if sign == 'm': return xl_col_to_name(current_col - offset) elif sign == 'p': return xl_col_to_name(current_col + offset) return ''
def add_nn_comp_forms(worksheet, names_dict, num_rows): """Creates a percentage formula for two columns in excel.""" notnull_let = xl_col_to_name(names_dict['Not-NULL']) incor_let = xl_col_to_name(names_dict['Incorrect Data']) total_let = xl_col_to_name(names_dict['Total']) for i in range(1, num_rows): nn_form_str = '=' + notnull_let + str(i + 1) + '/' + total_let + str(i + 1) comp_form_str = ('=(' + notnull_let + str(i + 1) + '-' + incor_let + str(i + 1) + ')/' + total_let + str(i + 1)) worksheet.write_formula(i, names_dict['% Not-NULL'], nn_form_str) worksheet.write_formula(i, names_dict['% Complete'], comp_form_str) return worksheet
def write_races_list_to_sheet(row, column, worksheet, races): write_races_list_headers(row=row, column=column, worksheet=worksheet) cell_format = get_track_name_format() cell_format.set_locked(False) time_format = get_time_format() time_format.set_locked(False) for index, race in enumerate(races): worksheet.write(row + index + 1, column, race['N_time'], time_format) worksheet.write(row + index + 1, column + 1, race['G_track'], cell_format) if 'Winner' in race: winner = race['Winner'] else: winner = '' if 'WinnerName' in race: winner_name = race['WinnerName'] else: winner_name = '' worksheet.write(row + index + 1, column + 2, winner, cell_format) worksheet.write(row + index + 1, column + 3, winner_name, cell_format) start_column = xl_col_to_name(column + 2) start_row = row + 2 end_row = row + len(races) + 1 format_range = '{start_column}{start_row}:{end_column}{end_row}'.format( **{ 'start_column': start_column, 'start_row': start_row, 'end_column': start_column, 'end_row': end_row }) set_winner_conditional_format(format_range=format_range, worksheet=worksheet)
def gen_sheetcolumn(column): ''' 根据序列号返回对应字符串 :param column: 从 1 开始 :return: 26->Z, 1->A, 27->AA ''' return utility.xl_col_to_name(column - 1)
def set_race_time_forumula(row, column, worksheet): track_name_format = get_track_name_format() start_column = xl_col_to_name(column) target_cell = '{column}{row}'.format(**{ 'column': start_column, 'row': row }) formula = ( '=IF(LEN(VLOOKUP({start_column}{start_row},AD:AF,3,FALSE))=0,"",' 'VLOOKUP({start_column}{start_row},AD:AF,3,FALSE))').format( **{ 'start_column': xl_col_to_name(column - 2), 'start_row': row }) worksheet.write_formula(target_cell, formula, track_name_format)
def to_xlsx(csv): """Convert typeperf's CSV to xlsx charts.""" if not csv.endswith('.csv'): click.echo("please select typeperf's csv") return xlsx = re.sub(r"\.csv$", ".xlsx", csv) if os.path.exists(xlsx): click.echo("please remove {0}".format(xlsx)) return click.echo("convert {0} to {1}".format(csv, xlsx)) df = pd.read_csv(csv, index_col=[0], parse_dates=[0], na_values=[" "]) df.index.name = None writer = pd.ExcelWriter(xlsx, engine="xlsxwriter", datetime_format="yyyy/MM/dd-hh:mm:ss") df.to_excel(writer, sheet_name="data") workbook = writer.book for idx in range(df.shape[1]): name = "Chart" + xl_col_to_name(idx + 1) chartname = df.columns[idx] print "idx:{0} name:{1} df:{2}".format(idx, name, chartname) chart = workbook.add_chart({'type': 'scatter', 'subtype': 'straight'}) chart.set_legend({'none': True}) chart.set_title({'name': chartname, 'overlay': True, 'none': False}) categories = '=data!' + xl_range(1, 0, df.shape[0], 0) values = '=data!' + xl_range(1, idx + 1, df.shape[0], idx + 1) chart.add_series({'categories': categories, 'values': values}) cs = workbook.add_chartsheet(name) cs.set_chart(chart) workbook.close()
def add_table(self,first_row,first_col,last_row,last_col,\ sheet=None,**kwargs): '''添加图表,如sheet为空,则使用默认的工作表''' sheet=self.get_sheet(sheet)if sheet else self.sheet columns=kwargs.get('columns') if columns: new_columns=[] for idx,column in enumerate(columns): if 'width' in column: sheet.set_column("{0}:{0}".format( xl_col_to_name(idx+first_col)),\ column.get('width')) format=column.get("format") if format and isinstance(format,str): new_column=column.copy() new_column['format']=self.formats.get(format) new_columns.append(new_column) else: new_columns.append(column) kwargs['columns']=new_columns last_col=first_col+len(columns)-1 if 'data' in kwargs: last_row=first_row+len(kwargs['data']) if kwargs.get('total_row',False): last_row+=1 sheet.add_table(first_row,first_col,last_row,last_col,kwargs)
def write_formulas(row, column, offset, start, worksheet, cell_format1): cell_val = xl_rowcol_to_cell(row, column) letter = xl_col_to_name(start) form_str = letter + '2:' + letter + str(offset + 1) + ')' worksheet.write_formula(cell_val, '=MAX(' + form_str, cell_format1) cell_val = xl_rowcol_to_cell(row, column + 1) worksheet.write_formula(cell_val, '=MIN(' + form_str, cell_format1)
def get_position(column_list, datacase): position_list = [] for column_value in column_list: intloc = datacase.columns.get_loc(column_value) from xlsxwriter.utility import xl_col_to_name position = '{}2'.format(xl_col_to_name(intloc)) position_list.append(position) return position_list
def sample_matrix(size: Tuple = (5, 5), loc: int = 10, scale: int = 10, lowercase_cols: bool = True, round: int = 3, seed: int = 42) -> pd.DataFrame: ''' Generate sample data for given size (tuple) Draw random samples from a normal (Gaussian) distribution. (Uses np.random.normal) Examples -------- .. code-block:: df = sample_matrix() head(df, output=print) a b c d e 0 14.967 8.617 16.477 25.230 7.658 1 7.659 25.792 17.674 5.305 15.426 2 5.366 5.343 12.420 -9.133 -7.249 3 4.377 -0.128 13.142 0.920 -4.123 4 24.656 7.742 10.675 -4.247 4.556 Parameters ---------- size tuple - (row, column) size required. loc float or array_like of floats. Mean ("centre") of the distribution. scale float or array_like of floats. Standard deviation (spread or "width") of the distribution. Must be non-negative. lowercase_cols Alphabetical column names are lowercase by default. Returns ------- a pandas DataFrame ''' if seed: np.random.seed(seed) rows, cols = size data = np.random.normal(loc=loc, scale=scale, size=(rows, cols)) cols = [xl_col_to_name(x - 1) for x in range(1, cols + 1)] if lowercase_cols: cols = list(map(str.lower, cols)) df = pd.DataFrame(data, columns=cols).round(round) return df
def validate_qualifier_headers(self, df, qualifier_cols): header_row, data_row = self.inhouse_utilty.find_data_start_row(df) rename_columns = [] for i, col in enumerate(df.iloc[header_row]): if utility.xl_col_to_name(i) in qualifier_cols: if col in self.reserved_qualifier_columns: # record the cell and renamed rename_columns.append( (header_row, i, self.rename_column(col))) return rename_columns
def write_param_sheet(writer, common_tables_dict): sheet_name = 'Параметры' tables_count = len(common_tables_dict) full_param_table = pd.concat([common_tables_dict[key] for key in common_tables_dict], copy=False, axis=1, join_axes=[common_tables_dict['record_1'].index]) full_param_table.to_excel(writer, sheet_name, index=False, header=['Мнем', 'Юнит', 'Параметр'] * tables_count ) sheet = writer.sheets[sheet_name] for i in range(2, len(full_param_table.columns), tables_count): sheet.set_column(':'.join([xl_col_to_name(i)] * 2), 28)
def __init__(self, parent : 'Reporter') -> None: peakstype = parent.config.sheettype('peaks') peaks = peakstype(parent.book, parent.config) self._row = peaks.tablerow()+1 self._formula = '' ind = next(iter(peaks.columnindex('σ[Peaks]'))) self._formula = ('=MEDIAN(INDIRECT("{sheet}!{col}:{col}"))' .format(sheet = peaks.sheet_name, col = xl_col_to_name(ind)+'{}'))
def _apply_conditional_format(self, params): for param in params: # has to convert the column to 'A1:A999' notation column = xl_col_to_name(param['col']) column += '1:' + column + str(self.row_pos) for val in param['vals']: self.sheet.conditional_format( column, { 'type': 'text', 'criteria': 'containing', 'value': val, 'format': self.format_emphasis })
def makeANRHelperTable(anr_sheet, header, total_rows, worksheet_name): #Note that the number of rows passed in is 0 indexed. #excel_row is 1 indexed. columns = len(header) ANR_bar_value = "=('Helper Table'!$C{excel_row})*MAX('{worksheet_name}'!{excel_col}$2:{excel_col}${total_rows})" anr_sheet.write_row(0, 0, header) for row in range(total_rows): for col in range(columns): anr_sheet.write( row + 1, col, ANR_bar_value.format(excel_row=str(row + 1 + 1), excel_col=xl_col_to_name(col), total_rows=str(total_rows), worksheet_name=worksheet_name))
def set_race_winners_conditional_format(row, start_column, number_of_favourites, worksheet): fav_format = get_fav_format() outsider_format = get_outsider_format() fav_format_range = '{start_column}{start_row}:{end_column}{end_row}'.format( **{ 'start_column': xl_col_to_name(start_column), 'start_row': row - number_of_favourites + 1, 'end_column': xl_col_to_name(start_column + 10), 'end_row': row - number_of_favourites + 2 }) outsider_format_range = '{start_column}{start_row}:{end_column}{end_row}'.format( **{ 'start_column': xl_col_to_name(start_column), 'start_row': row - number_of_favourites + 3, 'end_column': xl_col_to_name(start_column + 10), 'end_row': row }) set_fav_conditional_format(format_range=fav_format_range, worksheet=worksheet, cell_format=fav_format) set_outsider_conditional_format(format_range=outsider_format_range, worksheet=worksheet, cell_format=outsider_format)
def write_totals_line(row, start_column, worksheet, number_of_favourites): for column_number in range(start_column, start_column + 11): column = xl_col_to_name(column_number) target_cell = '{column}{row}'.format(**{ 'column': column, 'row': row + 1 }) formula = '=SUM({column}{row_start}:{column}{row_end})'.format( **{ 'column': column, 'row_start': row - number_of_favourites + 1, 'row_end': row }) total_format = get_total_format() worksheet.write_formula(target_cell, formula, total_format)
def __init__(self, row_i, col_i, value, sheet): self.row_index = row_i self.column_index = col_i self._rawdata = value # The raw data is the data read in from the input file and won't be processed self.value = value # The value can be post-processed before writing to the table self.displaydata = None # If this data is set to other than None, this data will be displayed over cell.value self.sheet = sheet # 0-indexed self.col_name = xl_col_to_name(self.column_index) self.is_toprow = self.row_index == 0 self.filtered = False self.type = get_type(value) self.formula = None self.visuals = None self.is_blank = self.value in EMPTY_CELL self.format = None assert self.row_index >= 0 and self.column_index >= 0
def write_formula_score_row(self, name, formula, row): """ Write a single formula as a score row, where +formula+ is a string or a lambda. If it's a tsring, it can contain {row} and {col} format strings. If a lambda, it will be given row and column as arguments and must return the formula string. """ self.set_score_row(name, row) if isinstance(formula, basestring): f = formula formula = lambda r, c: f.format(row=r, col=c) for i, medium in enumerate(self.media): medium_col = self.score_col(i) col_name = xl_col_to_name(medium_col) self.scores_ws.write_formula(row, medium_col, formula(row + 1, col_name))
def makeANRHelperTable(anr_sheet, header, total_rows, worksheet_name): #Note that the number of rows passed in is 0 indexed. #excel_row is 1 indexed. columns = len(header) ANR_bar_value = "=('Helper Table'!$C{excel_row})*MAX('{worksheet_name}'!{excel_col}$2:{excel_col}${total_rows})" anr_sheet.write_row(0,0,header) for row in range(total_rows): for col in range(columns): anr_sheet.write(row+1, col, ANR_bar_value.format( excel_row = str(row+1+1), excel_col = xl_col_to_name(col), total_rows = str(total_rows), worksheet_name=worksheet_name ) )
def GetFormuleBQ(intMaxCol, inRow): intcount = 1 for i in range(7, intMaxCol): if intcount == 3: intcount = 1 if intcount == 1: formula = xl_col_to_name(i - 1) if i == 7: formula1 = 'F' + str(inRow) + '-' + (formula + str(inRow)) + '-' elif i > 7 & i != intMaxCol: formula1 = formula1 + (formula + str(inRow) + '-') intcount = intcount + 1 else: intcount = intcount + 1 return formula1
def _get_columns_and_column_names(row): column_names = [] columns = [] duplicate_counter = 1 for i, column_name in enumerate(row): if not column_name: column_name = "column_{}".format(xl_col_to_name(i)) if column_name in column_names: column_name = "{}{}".format(column_name, duplicate_counter) duplicate_counter += 1 column_names.append(column_name) columns.append( {"name": column_name, "friendly_name": column_name, "type": TYPE_STRING} ) return columns, column_names
def _get_columns_and_column_names(row): column_names = [] columns = [] duplicate_counter = 1 for i, column_name in enumerate(row): if not column_name: column_name = 'column_{}'.format(xl_col_to_name(i)) if column_name in column_names: column_name = u"{}{}".format(column_name, duplicate_counter) duplicate_counter += 1 column_names.append(column_name) columns.append({ 'name': column_name, 'friendly_name': column_name, 'type': TYPE_STRING }) return columns, column_names
def df_to_excel_sheet_autoformat(df, writer, sheetname): # Convert the dataframe to an XlsxWriter Excel object. df.to_excel(writer, sheet_name=sheetname) # Get the xlsxwriter workbook and worksheet objects. workbook = writer.book worksheet = writer.sheets[sheetname] # Add some cell formats format_costs = workbook.add_format({'num_format': '$#,##0.00'}) format_number = workbook.add_format({'num_format': '#,##0.00'}) format_percentage = workbook.add_format({'num_format': '0.0%'}) # Set the column width and format for each column according to its header for idx, col_header in enumerate(df.columns): col_format = None if column_header_is_money(col_header): col_format = format_costs elif column_header_is_output(col_header): col_format = format_number elif column_header_is_percentage(col_header): col_format = format_percentage i1 = xl_col_to_name(idx + 1) cols = "{0}:{0}".format(i1) worksheet.set_column(cols, 15, col_format)
def main(): if len(sys.argv) != 2 or sys.argv[1] == "-h": print "Make sure that file 'Calendario edenrock Historico Closed Loop.xlsx' is in the same directory." print "Correct usage: python createGraph.py <vendor(nokia or ericsson)>" quit() #Use user input #input_file = sys.argv[1] vendor = sys.argv[1] INPUT = "kpi_checker*.csv" if vendor != "nokia" and vendor != "ericsson": print "Not a valid vendor: nokia or ericsson" quit() csv_file_list = glob.glob(INPUT) COMMON_GRAPHS = [ #([y1_axis_cols],[y2_axis_cols],number,name) ([2],[4,14],1,"Volumen de trafico de voz cursado & Tasa de caidas de voz & Tasa de fallos de accesibilidad de voz"), ([5],[6,12],2,"Volumen de trafico de datos & Tasa de fallos de accesibilidad & Tasa de caidas de datos"), ([12,10],[],3,"Tasa de Accesibilidad HSDPA & Tasa Accesibilidad HSUPA"), ([8],[],4,"Tasa de llamadas de voz originadas en 3G y que terminan en 2G"), ([15],[16],5,"Volumen de SHO y Tasa de exito de SHO"), ([17],[18],6,"Volumen de IFHO y Tasa de Fallos de IFHO") ] NOKIA_GRAPHS = [ ([2],[21],8,"Volumen de Voz & Total de caidas por detected"), ([2],[22],9,"Volumen de Voz & Tasa de Caidas por Detected") ] OFFSET = 2 #Difference between columns in Metricas_Datos and Helper Table. SUMMARY_HEADER = ["", "Antes", "Despues", "% Diferencia"] SUMMARY_TABLE_ORIGIN = (12,1) SUMMARY_SHEET_NAME = "Resumen" ANR_SHEET_NAME = "ANR Helper" for file_name in csv_file_list: CSV_TIME_FORMAT = "%Y-%m-%d %H:%M:%S %Z" RNC_NAME_RE_STRING = r"^.*[0-9]_(.*)\.csv" rnc_name_re = re.compile(RNC_NAME_RE_STRING) file_name_split = file_name.split(".") assert(len(file_name_split) == 2) file_name_no_ext = file_name_split[0] wb = xlsxwriter.Workbook(file_name_no_ext+".xlsx", {'strings_to_numbers': True}) ws = wb.add_worksheet("Metricas_Datos") #Holds csv data helper_sheet = wb.add_worksheet("Helper Table") #Calculates pre and post ANR averages. anr_sheet = wb.add_worksheet(ANR_SHEET_NAME) #Holds values for ANR exec graphs #Formats good = wb.add_format() good.set_bg_color("#afceb8") good.set_font_color("#27a700") bad = wb.add_format() bad.set_bg_color("#f6918c") bad.set_font_color("#c50900") summary_kpi_format = wb.add_format() summary_kpi_format.set_bg_color('#759FCD') green_bg = wb.add_format() green_bg.set_bg_color('#1e6810') percent = wb.add_format() percent.set_num_format(10) #Conditional formats greater_good = {'type': 'cell', 'criteria': '>=', 'value': 0, 'format': good} greater_bad = {'type': 'cell', 'criteria': '>=', 'value': 0, 'format': bad} lower_good = {'type': 'cell', 'criteria': '<', 'value': 0, 'format': good} lower_bad = {'type': 'cell', 'criteria': '<', 'value': 0, 'format': bad} #Need to read the csv dates to look them up in the execution matrix. time_col = [] #Open the csv, read it and write it in data_worksheet #Also, while reading memorize the dates. with open(file_name, "r") as csv_file: csv_reader = csv.reader(csv_file) number_of_rows = 0 header = [] for i,row in enumerate(csv_reader): try: time_col.append(datetime.datetime.strptime(row[0],CSV_TIME_FORMAT)) except ValueError: time_col.append(row[0]) if i==0: header = row clean_row = [] for element in row: if element=="None": clean_row.append("") else: clean_row.append(element) ws.write_row(i,0,clean_row) else: number_of_rows = i+1 number_of_columns = len(row) helper_sheet.write_column(0,0,time_col) #Copy from input file into helper sheet. #Make them date objects so they are compatible with dict. date_col = [x.date() for x in time_col[1:]] #Get the RNC name. re_match_filename = rnc_name_re.match(file_name) rnc_name = re_match_filename.groups()[0] #Get the execution dictionary exec_dict = parseExecutionCalendar() average_flag = 0 #Turns into 1 after first ANR for i,date_var in enumerate(date_col): anr_exec = "" lms_exec = "" if date_var in exec_dict[rnc_name]["ANR"]: anr_exec = 1 average_flag = 1 if date_var in exec_dict[rnc_name]["LMS"]: lms_exec = 1 row = [lms_exec, anr_exec, average_flag] helper_sheet.write_row(i+1,1,row) """ with open(input_file, "r") as anr_file: anr_reader = csv.reader(anr_file) for i,row in enumerate(anr_reader): if i==0: continue helper_sheet.write_row(i,1,row) """ #Make the Helper table helper_header = [header[0]] + ["ANR Execution", "ANR Execution", "ANR avg"] + header[2:] helper_sheet.write_row(0,0,helper_header) avg_if_formula = "=AVERAGEIF($D:$D,$D{0},Metricas_Datos!{1}:{1})" for row in range(1,number_of_rows): for col in range(2,number_of_columns): col_letter = xl_col_to_name(col) helper_sheet.write(row,col+OFFSET,avg_if_formula.format(row+1,col_letter)) #Make ANR helper table makeANRHelperTable(anr_sheet, header, number_of_rows, ws.get_name()) #Create the summary table. summary_sheet = wb.add_worksheet(SUMMARY_SHEET_NAME) summary_sheet.write_row(SUMMARY_TABLE_ORIGIN[0], SUMMARY_TABLE_ORIGIN[1], SUMMARY_HEADER, green_bg) difference_cell = "=('{0}'!{1}-'{0}'!{2})/'{0}'!{2}" copy_cell = "='{}'!{}" for x in range(number_of_columns-2): #Using magic numbers to copy the summary table! summary_sheet.write(SUMMARY_TABLE_ORIGIN[0]+1+x, SUMMARY_TABLE_ORIGIN[1], copy_cell.format(helper_sheet.get_name(),xl_rowcol_to_cell(0,4+x)), summary_kpi_format) summary_sheet.write(SUMMARY_TABLE_ORIGIN[0]+1+x, SUMMARY_TABLE_ORIGIN[1]+1, copy_cell.format(helper_sheet.get_name(),xl_rowcol_to_cell(1,4+x))) summary_sheet.write(SUMMARY_TABLE_ORIGIN[0]+1+x, SUMMARY_TABLE_ORIGIN[1]+2, copy_cell.format(helper_sheet.get_name(),xl_rowcol_to_cell(number_of_rows-1,4+x))) before_cell = xl_rowcol_to_cell(SUMMARY_TABLE_ORIGIN[0]+x+1,SUMMARY_TABLE_ORIGIN[1]+1) after_cell = xl_rowcol_to_cell(SUMMARY_TABLE_ORIGIN[0]+x+1,SUMMARY_TABLE_ORIGIN[1]+2) summary_sheet.write(SUMMARY_TABLE_ORIGIN[0]+1+x, SUMMARY_TABLE_ORIGIN[1]+3, difference_cell.format(summary_sheet.get_name(),after_cell,before_cell), percent) #Conditional formats good_metrics = [] bad_metrics = [] if vendor == "nokia": good_metrics = [12,14,16,18] bad_metrics = [1,2,4,5,6,7,8,9,10,11,19] else: bad_metrics = [1,2,4,5,6,7,8,9,10,11,12,16] good_metrics = [14] for row in good_metrics: summary_sheet.conditional_format(SUMMARY_TABLE_ORIGIN[0]+row+1, SUMMARY_TABLE_ORIGIN[1]+3,SUMMARY_TABLE_ORIGIN[0]+row+1, SUMMARY_TABLE_ORIGIN[1]+3, greater_good) summary_sheet.conditional_format(SUMMARY_TABLE_ORIGIN[0]+row+1, SUMMARY_TABLE_ORIGIN[1]+3,SUMMARY_TABLE_ORIGIN[0]+row+1, SUMMARY_TABLE_ORIGIN[1]+3, lower_bad) for row in bad_metrics: summary_sheet.conditional_format(SUMMARY_TABLE_ORIGIN[0]+row+1, SUMMARY_TABLE_ORIGIN[1]+3,SUMMARY_TABLE_ORIGIN[0]+row+1, SUMMARY_TABLE_ORIGIN[1]+3, greater_bad) summary_sheet.conditional_format(SUMMARY_TABLE_ORIGIN[0]+row+1, SUMMARY_TABLE_ORIGIN[1]+3,SUMMARY_TABLE_ORIGIN[0]+row+1, SUMMARY_TABLE_ORIGIN[1]+3, lower_good) #Magic numbers set column width. (Empirical) summary_sheet.set_column(1,1,54.57) summary_sheet.set_column(4,4,11.43) summary_sheet.activate() #Make all charts for info in COMMON_GRAPHS: makeChart(wb,ws,info,number_of_rows, anr_sheet) if vendor == "nokia": for info in NOKIA_GRAPHS: makeChart(wb,ws,info,number_of_rows, anr_sheet) wb.close()
def create_output_data(screen_facility_id, fields, result_values ): ''' Translate Screen Result data into a data structure ready for Serialization: { 'Screen Info': [ [ row1 ], [ row2 ]...]. 'Data Columns': [ [ row1 ], [ row2 ]...]. 'Data': [ [ row1 ], [ row2 ]...]. } @param fields an iterable containing result_value data_column dicts and field information dicts for the non-result value columns @param result_values an iterable containing result_value dicts ''' logger.info('create screen result data structure for %r', screen_facility_id) control_type_mapping = {v:k for k,v in ASSAY_WELL_CONTROL_TYPES.items()} data = OrderedDict() data['Screen Info'] = { 'Screen Number': screen_facility_id } data_column_structure = [] data['Data Columns'] = data_column_structure datacolumn_labels = DATA_COLUMN_FIELD_MAP.keys() data_columns = [] data_column_names = [] other_columns = [] for key,field in fields.items(): if ( field.get('is_datacolumn',False) or field.get('data_worksheet_column', None)): data_columns.append(key) data_column_names.append(field['name']) elif ( key not in ['well_id', 'plate_number','well_name', 'screen_facility_id', 'assay_well_control_type'] and key not in RESULT_VALUE_FIELD_MAP.keys() ): other_columns.append(key) data_columns = sorted(data_columns, key=lambda x: fields[x]['ordinal']) other_columns = sorted(other_columns, key=lambda x: fields[x]['ordinal']) data_column_names_to_col_letter = { dc:xl_col_to_name(len(RESULT_VALUE_FIELD_MAP)+i) for (i,dc) in enumerate(data_column_names) } logger.info('data columns: %r, other_columns: %r', data_columns, other_columns) # Transpose the field definitions into the output data_column sheet: # Row 0 - "Data" Worksheet Column # Row 1 - name # Row 2 - data_type # Row N - other data column fields # Column 0 - data column field label # Column 1-N data column values header_row = [datacolumn_labels[0]] header_row.extend([xl_col_to_name(len(RESULT_VALUE_FIELD_MAP)+i) for i in range(len(data_columns))]) logger.debug('header_row: %r', header_row) for i,(sheet_label,sheet_key) in enumerate( DATA_COLUMN_FIELD_MAP.items()[1:]): row = [sheet_label] for j,key in enumerate(data_columns): val = fields[key].get(sheet_key, None) if sheet_key == 'data_type': val = fields[key].get( 'assay_data_type',fields[key].get('data_type',None)) if val: if sheet_key == 'is_follow_up_data': if val == True: val = 'Follow up' elif val == False: val = 'Primary' elif sheet_key == 'derived_from_columns': if fields[key].get('screen_facility_id', None) == screen_facility_id: logger.info('Translate derived_from_columns: %r', val) if not set(data_column_names_to_col_letter.keys()).issuperset(set(val)): raise ValidationError( key='derived_from_columns', msg=('col: %r, values: %r are not in %r' %(key,val,data_column_names_to_col_letter.keys()))) val = ', '.join( [data_column_names_to_col_letter[dc_name] for dc_name in val]) else: # Manually serialize using commas val = ', '.join(val) row.append(val) else: row.append(None) logger.debug( 'Note: sheet key not found in schema field: %r, %r', sheet_key, fields[key]) logger.debug('data column row: %r', row) data_column_structure.append(OrderedDict(zip(header_row,row))) def result_value_generator(result_values): logger.info('Write the result values sheet') header_row = [] header_row.extend(RESULT_VALUE_FIELD_MAP.keys()) # TODO: allow column titles to be optional header_row.extend([fields[key].get('title', key) for key in data_columns]) header_row.extend(other_columns) row_count = 0 for result_value in result_values: row_count += 1 row = [] row.extend(result_value['well_id'].split(':')) if ( result_value.has_key('assay_well_control_type') and result_value['assay_well_control_type'] ): control_type = default_converter(result_value['assay_well_control_type']) # note: "empty", "experimental", "buffer" are values that can be # found in this column, due to legacy data entry, but they are # not valid if control_type in control_type_mapping: row.append(control_type_mapping[control_type]) else: row.append(None) else: row.append(None) excluded_cols = [] if result_value.has_key('exclude') and result_value['exclude']: temp = result_value['exclude'] if hasattr(temp, 'split'): temp = temp.split(LIST_DELIMITER_SQL_ARRAY) logger.debug('excluded data_columns: find %r, in %r', temp, data_columns) for data_column_name in temp: excluded_cols.append(get_column_letter( len(RESULT_VALUE_FIELD_MAP)+1 +data_columns.index(data_column_name))) excluded_cols = sorted(excluded_cols) row.append(','.join(excluded_cols)) for j,key in enumerate(data_columns): if result_value.has_key(key): row.append(result_value[key]) else: row.append(None) # append the non-result value columns to the end of the row for j,key in enumerate(other_columns): if result_value.has_key(key): row.append(result_value[key]) if row_count % 10000 == 0: logger.info('wrote %d rows', row_count) yield OrderedDict(zip(header_row,row)) data['Data'] = result_value_generator(result_values) return data
def export_to_excel(json_file_path: str, excel_file_path: str) -> None: content = retrieve_tamplates_and_set_them(json_file_path) # pyre-fixme[45]: Cannot instantiate abstract class `ExcelWriter`. writer = pd.ExcelWriter(excel_file_path, engine="xlsxwriter") workbook = writer.book assert workbook is not None title_format, date_format, black_format, cell_format = add_workbook_formats( workbook) translations = content.get("translations", {}) bool_options = { "Yes": translations.get("Yes", "Yes"), "No": translations.get("No", "No"), } for category in content["categories"]: full_question_paths, questions, max_deps = _extract_questions(category) form_name_to_description = _extract_form_descriptions(category) rows = [( full_question_path[0], *rpad( list(map(lambda x: x[0], list(full_question_path[1:-1]))), " ", max_deps, ), full_question_path[-1], ) for full_question_path in full_question_paths] df = pd.DataFrame({row: [] for row in rows}) for i in range(len(rows) - 1, -1, -1): if i != 0 and rows[i - 1][0] != rows[i][0]: df.insert( i, column=tuple([" "] * (max_deps + 2)), value="", allow_duplicates=True, ) df = df.transpose() df.to_excel(writer, sheet_name=category["categoryName"]) rows = [record for record in df.to_records()] row_indexes = [i + 2 for i, row in enumerate(rows) if row[0] != " "] question_index_to_row_index = dict( zip(range(2, len(row_indexes) + 2), row_indexes)) num_columns = max_deps + 2 worksheet = writer.sheets[category["categoryName"]] write_column_titles(worksheet, num_columns, title_format, translations) adjust_column_and_row_sizes(worksheet, rows, num_columns) question_to_cell = {} # add validations and formatting to columns for i, (full_question_path, question) in enumerate(zip(full_question_paths, questions)): cell_name = "{}{}".format(xl_col_to_name(num_columns), question_index_to_row_index[i + 2]) question_to_cell[( full_question_path[0], *list(map(lambda x: x[0], list(full_question_path[1:-1]))), full_question_path[-1], )] = cell_name worksheet.write_blank(cell_name, "", cell_format) # pyre-fixme[19]: Expected 9 positional arguments. add_cell_validation( worksheet, question_index_to_row_index[i + 2], num_columns, question_to_cell, full_question_path, question, question_to_cell, bool_options, date_format, black_format, ) form_names = [ full_question_path[0] for full_question_path in full_question_paths ] write_form_description_as_cell_comment(worksheet, form_names, question_index_to_row_index, form_name_to_description) worksheet.protect() writer.save()
def works(): dates = datetime.today().strftime("%Y%m%d%H%M") filename = u"PlatON网站服务优化监测表-汇总-%s.xlsx" % (dates) workbook = xlsxwriter.Workbook(filename=filename) titlestyle = workbook.add_format() coltitlestyle = workbook.add_format() title_style(titlestyle, "#00B0F0", ["center", "vcenter"]) title_style(coltitlestyle, "#00B0F0", ["center"]) areaStyle = workbook.add_format() set_font(areaStyle, 12, "宋体", 1) dataStyle = workbook.add_format() set_font(dataStyle, 12, "Arial", 1) workchart = workbook.add_worksheet("网站访问监测图表") currentcol = 1 source = Asm() for site in sites.keys(): # 数据写入行号及列数 row = 2 # add_worksheet限制名称不能多于32个字符 worksheet = workbook.add_worksheet("%s数据" % (sites.get(site))) # 设置标题所在行行高 worksheet.set_row(0, 70) # 设置第二列宽度为38个字符,设置第三列到第七列宽度为17个字符 worksheet.set_column(1, 1, 38) worksheet.set_column(2, 6, 17) # 合并第1行的第2到7列,并写入标题 worksheet.merge_range(0, 1, 0, 6, "%s网站服务网页监测例检表%s" % (sites.get(site), dates), titlestyle) # 在第二行写入表格的列名 cells = [(1, 1, "地区/国家", coltitlestyle), (1, 2, "访问总时间(s)", coltitlestyle), (1, 3, "解析时间(ms)", coltitlestyle), (1, 4, "连接时间(ms)", coltitlestyle), (1, 5, "下载时间(ms)", coltitlestyle), (1, 6, "访问状态(ms)", coltitlestyle)] write_cell(worksheet, cells) api = CONFIG.cp_config urls = [] for code in codes: url = api + ("?checkloc=%s&type=https&host=%s&path=&port=443&" "callback=update_") % (code, site) urls.append(url) results = source.get_data(urls) print(results) for result in results: try: country, city, rtime, ctime, dtime = result except (AsmError, AssertionError): continue write_cell( worksheet, [(row, 1, "%s-%s" % (countryCode.get(country), cityCode.get(city)), areaStyle)]) cells = [(row, 3, int(rtime), dataStyle), (row, 4, int(ctime), dataStyle), (row, 5, int(dtime), dataStyle)] write_number(worksheet, cells) worksheet.write_formula(row, 2, "=SUM(D%s:F%s)/1000" % (row + 1, row + 1), dataStyle) worksheet.write_formula( row, 6, '=IF(C%s>3,"超时",IF(C%s=0,"超时",IF(C%s>3,"超时","OK")))' % (row + 1, row + 1, row + 1), dataStyle) row += 1 column = xl_col_to_name(currentcol) for item in [("访问总时间(s)", "C", "%s2" % column), ("解析时间(ms)", "D", "%s20" % column), ("连接时间(ms)", "E", "%s38" % column), ("下载时间(ms)", "F", "%s56" % column)]: work_charts(workbook, workchart, worksheet.name, item[0], item[1], item[2]) currentcol += 10 create_extra_table(workbook, titlestyle, coltitlestyle) workbook.close() LOG.info("%s写入数据完成" % filename)
def fetchData(self, inputFilename, outputFilename): print("Preparing Spreadsheet...") try: file = open(inputFilename, "r") except FileNotFoundError: print("No file named '" + inputFilename + "' exists in folder 'input'") sys.exit() workbook = xlsxwriter.Workbook("output/" + outputFilename + '.xlsx') worksheets = [ workbook.add_worksheet('Computer'), workbook.add_worksheet('Biology') ] x = 0 row = 1 col = 0 print("Initiating Automation...\n") browser = Browser('chrome') browser.visit('http://biselahore.com/') browser.fill('student_rno', file.readline()) sleep(0.5) # browser.find_by_xpath('//*[@id="main-wrapper"]/div[2]/ul/li/table/tbody/tr[2]/td/form/table/tbody/tr[3]/td/input').click() file.seek(0) for worksheet in worksheets: worksheet.write(0, 0, "Roll No") worksheet.write(0, 1, "Name") worksheet.set_column( xl_col_to_name(1) + ":" + xl_col_to_name(1), 24) row_comp = 1 row_bio = 1 for i in range(7): word = browser.find_by_xpath( '/html/body/div/ul/li/table[1]/tbody/tr[5]/td/table/tbody/tr[{}]/td[1]' .format(2 + i)).text for worksheet in worksheets: worksheet.write(0, i + 2, word) worksheets[0].write(0, 9, "COMPUTER") worksheets[1].write(0, 9, "BIOLOGY") for worksheet in worksheets: worksheet.write(0, 10, "Total") browser.back() print("Fetching Result Data...\n") for line in file: browser.fill('student_rno', line.strip()) browser.find_by_xpath( '//*[@id="main-wrapper"]/div[2]/ul/li/table/tbody/tr[2]/td/form/table/tbody/tr[3]/td/input' ).click() print("Rollno: " + line) retry = True while retry: try: elective_sub = browser.find_by_xpath( '/html/body/div[1]/ul/li/table[1]/tbody/tr[5]/td/table/tbody/tr[9]/td[1]' ).text retry = False except IndexError: retry = True except exceptions.ElementDoesNotExist: retry = True if 'COMPUTER SCIENCE'.lower() in elective_sub.lower(): row = row_comp worksheet = worksheets[0] else: worksheet = worksheets[1] row = row_bio worksheet.write(row, col, str(line)) col = col + 1 retry = True while retry: try: name = browser.find_by_xpath( '/html/body/div[1]/ul/li/table[1]/tbody/tr[3]/td/table/tbody/tr[2]/td[3]' ).text print(name) worksheet.write(row, col, name) retry = False except IndexError: retry = True except exceptions.ElementDoesNotExist: retry = True # /html/body/div[1]/ul/li/table[1]/tbody/tr[5]/td/table/tbody/tr[2]/td[2] # /html/body/div[1]/ul/li/table[1]/tbody/tr[5]/td/table/tbody/tr[3]/td[2] col = col + 1 for index in range(8): retry = True while retry: try: marks = browser.find_by_xpath( '/html/body/div[1]/ul/li/table[1]/tbody/tr[5]/td/table/tbody/tr[{}]/td[2]' .format(2 + index)).text worksheet.write(row, col, int(marks)) except IndexError: retry = True except ValueError: marks = 0 worksheet.write(row, col, int(marks)) retry = False except exceptions.ElementDoesNotExist: retry = True else: retry = False col = col + 1 worksheet.write( row, col, "=SUM(" + xl_rowcol_to_cell(row, 2) + ":" + xl_rowcol_to_cell(row, col - 1) + ")") col = 0 row = row + 1 if 'COMPUTER SCIENCE'.lower() in elective_sub.lower(): row_comp = row else: row_bio = row browser.back() workbook.close() browser.quit()
def col_index_to_name(idx): from xlsxwriter.utility import xl_col_to_name column_name = xl_col_to_name(idx) return column_name + ":" + column_name
def save_simulation_result_to_excel_file(result: dict, path: str, insert_value_chart=False, compare_with_bench=False) -> None: performance = result['performance'] event_log = result.get('event_log', None) rebalancing_weight = result.get('rebalancing_weight', None) order_weight = result.get('order_weight', None) turnover_weight = result.get('turnover_weight', None) portfolio_weight_history_df = result.get('portfolio_weight_history', None) rebalancing_factor_history_df = result.get('rebalancing_factor_history', None) portfolio_log = performance["portfolio_log"] monthly_returns = performance["monthly_returns"] annual_summary = performance["annual_summary"] performance_summary = performance["performance_summary"] returns_until_next_rebal = performance.get('returns_until_next_rebal', None) with pd.ExcelWriter(path, datetime_format="yyyy-mm-dd") as writer: portfolio_log.to_excel(writer, sheet_name="portfolio log") monthly_returns.to_excel(writer, sheet_name="월별수익률") annual_summary.to_excel(writer, sheet_name="연도별 요약") performance_summary.to_excel(writer, sheet_name="요약") workbook = writer.book if turnover_weight is not None: turnover_weight.to_frame("회전율").to_excel(writer, sheet_name="회전율") if event_log is not None and len(event_log) > 0: event_log.to_excel(writer, sheet_name="event log") if order_weight is not None: order_weight.to_excel(writer, sheet_name="주문 비중") if returns_until_next_rebal is not None: returns_until_next_rebal.to_excel(writer, sheet_name="리밸간 수익률") if insert_value_chart: sheet_name = 'portfolio log' worksheet = writer.sheets[sheet_name] chart = workbook.add_chart({'type': 'line'}) for i in range(len(performance_summary.columns)): col = i + 1 chart.add_series({ 'name': [sheet_name, 0, col], 'categories': [sheet_name, 1, 0, len(portfolio_log), 0], 'values': [sheet_name, 1, col, len(portfolio_log), col], }) chart.set_x_axis({'name': 'strategy'}) chart.set_y_axis({ 'name': 'value', 'major_gridlines': { 'visible': True } }) chart.set_legend({'position': 'bottom'}) worksheet.insert_chart('B2', chart) percentge_styles = { 'num_format': '0.00%', } float_styles = { 'num_format': '0.00', } worksheet = writer.sheets['요약'] percentge_format = workbook.add_format(percentge_styles) float_format = workbook.add_format(float_styles) columns_alphabet = xl_col_to_name( len(performance_summary.columns) ) if type(performance_summary) is pd.DataFrame else xl_col_to_name(1) worksheet.conditional_format( f'B4:{columns_alphabet}7', { 'type': 'cell', 'criteria': '>=', 'value': -999, 'format': percentge_format }) worksheet.conditional_format( f'B8:{columns_alphabet}8', { 'type': 'cell', 'criteria': '>=', 'value': -999, 'format': float_format }) worksheet.conditional_format( f'B9:{columns_alphabet}9', { 'type': 'cell', 'criteria': '>=', 'value': -999, 'format': percentge_format }) if compare_with_bench: # 벤치마크가 첫 자산 benchmark_series = portfolio_log.iloc[:, 0] strategies_ratio_df = portfolio_log.divide(benchmark_series, axis=0) start_col = len(portfolio_log.columns) + 3 strategies_ratio_df.to_excel(writer, sheet_name="portfolio log", startrow=0, startcol=start_col) workbook = writer.book sheet_name = 'portfolio log' worksheet = writer.sheets[sheet_name] chart = workbook.add_chart({'type': 'line'}) for i in range(len(strategies_ratio_df.columns)): col = i + start_col + 1 chart.add_series({ 'name': [sheet_name, 0, col], 'categories': [sheet_name, 1, start_col, len(portfolio_log), start_col], 'values': [sheet_name, 1, col, len(portfolio_log), col], }) chart.set_x_axis({'name': 'strategy'}) chart.set_y_axis({ 'name': 'value', 'major_gridlines': { 'visible': True } }) chart.set_legend({'position': 'bottom'}) worksheet.insert_chart(f'{xl_col_to_name(start_col + 5)}2', chart) portfolio_log['port_value'].to_excel(writer, sheet_name="value_history") if portfolio_weight_history_df is not None: portfolio_weight_history_df.to_excel(writer, sheet_name="weight_history") if rebalancing_weight is not None and len(rebalancing_weight) > 0: rebalancing_weight.to_excel(writer, sheet_name="rebalancing_history") if rebalancing_factor_history_df is not None and len( rebalancing_factor_history_df) > 0: rebalancing_factor_history_df.to_excel( writer, sheet_name="rebalancing_factor_history")
def result_value_generator(result_values): ''' Generate the Screen Result load file format from an API generated result value list: - Split the well_id into the plate_number and well_name columns - Convert the API schema assaywell.control_type to the load file format values using the ASSAY_WELL_CONTROL_TYPES mapping ''' logger.info('Write the result values sheet...') header_row = [] header_row.extend(RESULT_VALUE_FIELD_MAP.keys()) header_row.extend([ fields[key].get('title', key) for key in data_column_keys]) header_row.extend(non_data_column_keys) logger.info('Result Values Header row: %r', header_row) control_type_mapping = {v:k for k,v in ASSAY_WELL_CONTROL_TYPES.items()} row_count = 0 for result_value in result_values: row_count += 1 if DEBUG_IMPORTER: logger.info('result_value: %d: %r', row_count, result_value) row = [] row.extend(result_value['well_id'].split(':')) control_type = result_value.get(SCREEN_RESULT.ASSAY_CONTROL_TYPE) if control_type: control_type = default_converter( result_value['assay_well_control_type']) # note: "empty", "experimental", "buffer" are values that can be # found in this column, due to legacy data entry, but they are # not valid if control_type in control_type_mapping: row.append(control_type_mapping[control_type]) else: row.append(None) else: row.append(None) excluded_cols = [] if result_value.has_key('exclude') and result_value['exclude']: temp = result_value['exclude'] if hasattr(temp, 'split'): temp = temp.split(LIST_DELIMITER_SQL_ARRAY) logger.debug('excluded data_column_keys: find %r, in %r', temp, data_column_keys) for data_column_name in temp: # excluded_cols.append(get_column_letter( # len(RESULT_VALUE_FIELD_MAP)+1 # +data_column_keys.index(data_column_name))) excluded_cols.append(xl_col_to_name( len(RESULT_VALUE_FIELD_MAP) + data_column_keys.index(data_column_name))) excluded_cols = sorted(excluded_cols) row.append(','.join(excluded_cols)) if DEBUG_IMPORTER: logger.info('write rvs: data_column_keys: %r', data_column_keys) for j,key in enumerate(data_column_keys): if result_value.has_key(key): row.append(result_value[key]) else: row.append(None) # append the non-result value columns to the end of the row for j,key in enumerate(non_data_column_keys): if result_value.has_key(key): row.append(result_value[key]) if row_count % 10000 == 0: logger.info('generated %d rows', row_count) if DEBUG_IMPORTER: logger.info('generate row %d: %r',row_count, row) yield OrderedDict(zip(header_row,row))
""" #worksheet.freeze_panes(1, 0) #notnull_range = xl_range(1,names_dict['Not-NULL'],num_rows,names_dict['Not-NULL']) #incor_range = xl_range(1,names_dict['Incorrect Data'],num_rows,names_dict['Incorrect Data']) #total_range = xl_range(1,names_dict['Total'],num_rows,names_dict['Total']) #perc_nn_str = '{=' + notnull_range + '/' + total_range + '}' #worksheet.write_array_formula(1,names_dict['% Not-NULL'],num_rows,names_dict['% Not-NULL'], perc_nn_str) #perc_comp_str = '{=(' + notnull_range + '-' + incor_range + ')/' + total_range + '}' #worksheet.write_array_formula(1,names_dict['% Complete'],num_rows,names_dict['% Complete'], perc_comp_str) """ notnull_let = xl_col_to_name(names_dict['Not-NULL']) incor_let = xl_col_to_name(names_dict['Incorrect Data']) total_let = xl_col_to_name(names_dict['Total']) for i in range(1, num_rows): nn_form_str = '=' + notnull_let + str(i+1) + '/' + total_let + str(i+1) comp_form_str = '=(' + notnull_let + str(i+1) + '-' + incor_let + str(i+1) + ')/' + total_let + str(i+1) worksheet.write_formula(i, names_dict['% Not-NULL'], nn_form_str) worksheet.write_formula(i, names_dict['% Complete'], comp_form_str) perc_format = workbook.add_format({'num_format': '0%'}) bg_red = workbook.add_format({'bg_color': '#FF8080'}) worksheet.set_column(names_dict['% Not-NULL'], names_dict['% Complete'], None,
def create_output_data(screen_facility_id, fields, result_values ): ''' Translate API Screen Result data into Screen Result load file format: { 'Screen Info': [ [ row1 ], [ row2 ]...]. 'Data Columns': [ [ row1 ], [ row2 ]...]. 'Data': [ [ row1 ], [ row2 ]...]. } @param fields an iterable containing result_value data_column dicts and field information dicts for the non-result value columns @param result_values an iterable containing result_value dicts ''' logger.info('create screen result output data for screen %r', screen_facility_id) data = OrderedDict() # 1. Meta sheet data['Screen Info'] = { 'Screen Number': screen_facility_id } # 2. Data Columns data_column_structure = [] data_column_keys = [] non_data_column_keys = [] keys = sorted(fields.keys()) for i,key in enumerate(keys): field = fields[key] if 'ordinal' not in field: field['ordinal'] = i if ( field.get('is_datacolumn',False) or field.get('data_worksheet_column') is not None): data_column_keys.append(key) elif ( key not in RESULT_VALUE_FIELD_MAP.keys() and key not in REPORTING_NON_RV_COLUMNS ): non_data_column_keys.append(key) data_column_keys = sorted( data_column_keys, key=lambda x: fields[x]['ordinal']) non_data_column_keys = sorted( non_data_column_keys, key=lambda x: fields[x]['ordinal']) data_column_names_to_col_letter = {} for i, key in enumerate(data_column_keys): data_column_names_to_col_letter[fields[key]['name']] = \ xl_col_to_name(len(RESULT_VALUE_FIELD_MAP)+i) logger.info('data columns: %r, non_data_column_keys: %r', data_column_keys, non_data_column_keys) logger.info('data_column_names_to_col_letter: %r', data_column_names_to_col_letter) logger.info('Build the data columns worksheet...') # Transpose/Pivot the field definitions into the output data_column sheet: # Row 0 - "Data" Worksheet Column # Row 1 - name # Row 2 - data_type # Row N - non data column fields # Column 0 - data column field label # Column 1-N data column values datacolumn_labels = DATA_COLUMN_FIELD_MAP.keys() header_row = [datacolumn_labels[0]] header_row.extend([ xl_col_to_name(len(RESULT_VALUE_FIELD_MAP)+i) for i in range(len(data_column_keys))]) for i,(output_label,field_key) in enumerate( DATA_COLUMN_FIELD_MAP.items()[1:]): row = [output_label] for j,key in enumerate(data_column_keys): field = fields[key] val = field.get(field_key) if field_key == 'data_type': # TODO: 20170731: migrate the screenresult datacolumn to use # "vocabulary_scope_ref" for the "positive" column types # This is a hack to preserve symmetry for read/write for now newval = None if val == 'string': vocab_scope_ref = field.get('vocabulary_scope_ref') if vocab_scope_ref == 'resultvalue.partitioned_positive': newval = DATA_TYPE.PARTITIONED_POSITIVE elif vocab_scope_ref \ == 'resultvalue.confirmed_positive_indicator': newval = DATA_TYPE.CONFIRMED_POSITIVE elif val == 'boolean': newval = DATA_TYPE.BOOLEAN_POSITIVE if newval: logger.debug('converted: %r:%r to %r: %r', key, field_key, val, newval) val = newval if val: if field_key == 'is_follow_up_data': if val == True: val = 'Follow up' elif val == False: val = 'Primary' elif field_key == 'derived_from_columns': logger.info('derived_from_columns: %r', val) if field.get('screen_facility_id') == screen_facility_id: # If the column is not available, just use the text as given val = ', '.join([ data_column_names_to_col_letter.get(dc_name, dc_name) for dc_name in val]) logger.info('Translated derived_from_columns: %r', val) else: # Derived column for another screen val = ', '.join(val) row.append(val) else: row.append(None) logger.debug( 'Note: datacolumn schema key %r is null in result value: %r', field_key, field) logger.debug('data column row: %r', row) data_column_structure.append(OrderedDict(zip(header_row,row))) data['Data Columns'] = data_column_structure logger.info('Data columns worksheet built.') # 3. Result Values sheet def result_value_generator(result_values): ''' Generate the Screen Result load file format from an API generated result value list: - Split the well_id into the plate_number and well_name columns - Convert the API schema assaywell.control_type to the load file format values using the ASSAY_WELL_CONTROL_TYPES mapping ''' logger.info('Write the result values sheet...') header_row = [] header_row.extend(RESULT_VALUE_FIELD_MAP.keys()) header_row.extend([ fields[key].get('title', key) for key in data_column_keys]) header_row.extend(non_data_column_keys) logger.info('Result Values Header row: %r', header_row) control_type_mapping = {v:k for k,v in ASSAY_WELL_CONTROL_TYPES.items()} row_count = 0 for result_value in result_values: row_count += 1 if DEBUG_IMPORTER: logger.info('result_value: %d: %r', row_count, result_value) row = [] row.extend(result_value['well_id'].split(':')) control_type = result_value.get(SCREEN_RESULT.ASSAY_CONTROL_TYPE) if control_type: control_type = default_converter( result_value['assay_well_control_type']) # note: "empty", "experimental", "buffer" are values that can be # found in this column, due to legacy data entry, but they are # not valid if control_type in control_type_mapping: row.append(control_type_mapping[control_type]) else: row.append(None) else: row.append(None) excluded_cols = [] if result_value.has_key('exclude') and result_value['exclude']: temp = result_value['exclude'] if hasattr(temp, 'split'): temp = temp.split(LIST_DELIMITER_SQL_ARRAY) logger.debug('excluded data_column_keys: find %r, in %r', temp, data_column_keys) for data_column_name in temp: # excluded_cols.append(get_column_letter( # len(RESULT_VALUE_FIELD_MAP)+1 # +data_column_keys.index(data_column_name))) excluded_cols.append(xl_col_to_name( len(RESULT_VALUE_FIELD_MAP) + data_column_keys.index(data_column_name))) excluded_cols = sorted(excluded_cols) row.append(','.join(excluded_cols)) if DEBUG_IMPORTER: logger.info('write rvs: data_column_keys: %r', data_column_keys) for j,key in enumerate(data_column_keys): if result_value.has_key(key): row.append(result_value[key]) else: row.append(None) # append the non-result value columns to the end of the row for j,key in enumerate(non_data_column_keys): if result_value.has_key(key): row.append(result_value[key]) if row_count % 10000 == 0: logger.info('generated %d rows', row_count) if DEBUG_IMPORTER: logger.info('generate row %d: %r',row_count, row) yield OrderedDict(zip(header_row,row)) data['Data'] = result_value_generator(result_values) return data