def write_to_file(file_name, companies, summary_columns, wb_sheet): """ This method generates a new Excel file with the name <file_name> that will contains the data found in PreSeries for each entry in the original excel file :param file_name: the name of the file to be generated :param companies: a list of companies with the basic data found in PreSeries :param summary_columns: the columns in the original file that will be used in the new file to give more information about the companies. :param wb_sheet: the excel sheet of the original file where we will find the summary fields of the companies. """ workbook = Workbook() companies_sheet = workbook.add_sheet('Companies') # Build the header names header = ["Original Row", "Company Name", "Domain", "Country"] header.extend(summary_columns) \ if summary_columns and len(summary_columns) > 0 else None [companies_sheet.write(0, index, value) for index, value in enumerate(header)] for index, company_data in enumerate(companies): companies_sheet.write(1 + index, 0, company_data["row"]) companies_sheet.write( 1 + index, 1, company_data["name"].decode('utf-8', 'ignore') if "name" in company_data else "") companies_sheet.write( 1 + index, 2, company_data["country_code"] if "country_code" in company_data else "") companies_sheet.write( 1 + index, 3, company_data["domain"] if "domain" in company_data else "") for index2, summary_column in enumerate(summary_columns): try: columnvalue = wb_sheet.cell_value( company_data["row"], PreSeriesUtils.excel2num(summary_column)).encode('cp1252') except UnicodeEncodeError: columnvalue = wb_sheet.cell_value( company_data["row"], PreSeriesUtils.excel2num(summary_column)).encode('utf-8') pass companies_sheet.write(1 + index, 4 + index2, columnvalue.decode('utf-8', 'ignore')) workbook.save(file_name)
def read_search_data_from_excel(self, file_name, column_id=None, column_name=None, column_country=None, column_domain=None, skip_rows=False): """ This method is responsible for extract from an Excel file all the companies we will need to find in PreSeries. for build the query parameters that we are going to use to look for the companies in PreSeries informed in an Excel file. The query string will have only the id criteria or the name of the company if the id is not informed. The domain and country_code won't be used in the query, we will use them later for select the best match from all the candidates that matched the query. :return: a list where each row is one company which contains a tuple with two items, the query string to look in preseries for the company and the map with all the parameters used in the query """ logging.debug("Looking for the first sheet in the Excel.") wb = open_workbook(file_name) first_sheet = wb.sheets()[0] logging.debug("Sheet name [%s]." % first_sheet.name) self.companies_query = [] for row in range(skip_rows, first_sheet.nrows): logging.debug("Processing row: %d" % row) if column_id: company_id = first_sheet.cell_value( row, PreSeriesUtils.excel2num(column_id)) self.companies_query.append(("id=%s" % company_id, { "row": row, "id": company_id })) continue query_string = {} query_params = {"row": row} if column_name and \ first_sheet.cell_value( row, PreSeriesUtils.excel2num(column_name)): try: company_name = first_sheet.cell_value( row, PreSeriesUtils.excel2num(column_name)).encode('cp1252') except UnicodeEncodeError: company_name = first_sheet.cell_value( row, PreSeriesUtils.excel2num(column_name)).encode('utf-8') pass query_string['name__icontains'] = company_name query_params["name"] = company_name if column_domain: company_domain = PreSeriesUtils.resolve_domain( first_sheet.cell_value( row, PreSeriesUtils.excel2num(column_domain))) if company_domain: # We only use the domain after the search to select the # best candidate query_params["domain"] = company_domain if column_country and \ first_sheet.cell_value( row, PreSeriesUtils.excel2num(column_country)): country_code = PreSeriesUtils.resolve_country( first_sheet.cell_value( row, PreSeriesUtils.excel2num(column_country))) if country_code: # We only use the country_code after the search to # select the best candidate query_params['country_code'] = country_code self.companies_query.append( (urllib.urlencode(query_string), query_params))