def handle_transmitter_merge(self, job_syntax_item, endpoint): opname = job_syntax_item[OP_KEY] csv_key_val = var_replaced(self.variables, job_syntax_item, CSV_KEY) csv_filename = SqlExecutor.csv_name(csv_key_val) csv_data = self.csvdata(csv_filename) num_lines = len(csv_data) # do nothing for empty data set if num_lines <= 1: getLogger(LOG).info('skip empty csv') from mriya.sf_merge_wrapper import HEADER result_ids = BulkData(HEADER, []) else: objname = job_syntax_item[endpoint] conn = self.endpoints.endpoint(endpoint) max_batch_size = int(job_syntax_item[BATCH_SIZE_KEY]) getLogger(STDOUT).info('EXECUTE: %s %s, lines count=%d', opname, objname, num_lines - 1) t_before = time.time() if len(csv_data): result_ids = conn.soap_merge(objname, csv_data, max_batch_size) t_after = time.time() getLogger(STDOUT).info('SF %s Took time: %.2f' \ % (opname, t_after-t_before)) if NEW_IDS_TABLE in job_syntax_item: results_file_name = \ SqlExecutor.csv_name(job_syntax_item[NEW_IDS_TABLE]) with open(results_file_name, 'w') as result_ids_file: csv_data = csv_from_bulk_data(result_ids) result_ids_file.write(csv_data) getLogger(LOG).info('Saved result ids: %s', results_file_name) getLogger(LOG).info('Done: %s operation', opname)
def handle_job_item_(self, job_syntax_item): if job_syntax_item and QUERY_KEY in job_syntax_item: query = job_syntax_item[QUERY_KEY].strip() is_csv = CSV_KEY in job_syntax_item is_var = VAR_KEY in job_syntax_item is_cache = CACHE_KEY in job_syntax_item if not query: return getLogger(LOG).info(job_syntax_item) if not is_var and is_csv and is_cache: csv_key_val = var_replaced(self.variables, job_syntax_item, CSV_KEY) csv_name = SqlExecutor.csv_name(csv_key_val) csv_size = SqlExecutor.csv_size(csv_key_val) if csv_size and SqlExecutor.valid_csv_exist(csv_key_val): getLogger(LOG).info("SKIP query: '%s', csvfile exist: %s", query, csv_name) return sql_exec = self.create_executor(job_syntax_item) try: sql_exec.execute() except: print job_syntax_item raise sql_exec.saved_csv() self.post_operation(job_syntax_item) self.variables = sql_exec.variables del sql_exec
def _create_script(self, variables): imports = '' if CSVLIST_KEY in self.job_syntax_item: for table_name in self.job_syntax_item[CSVLIST_KEY]: #check if table name contain vars table_name = SqlExecutor.prepare_query_put_vars( table_name, self.variables) imports += ".import {csv} {name}\n"\ .format(csv=self.csv_name(table_name), name=table_name) output = '' if CSV_KEY in self.job_syntax_item: table_name = var_replaced(variables, self.job_syntax_item, CSV_KEY) output += ".headers on\n" output += ".output {csv}\n"\ .format(csv=self.csv_name(table_name)) getLogger(LOG).info('working on table=%s', table_name) elif VAR_KEY in self.job_syntax_item: output += ".headers off\n" output += ".output stdout\n" elif BATCH_BEGIN_KEY in self.job_syntax_item: output += ".headers on\n" output += ".output stdout\n" getLogger(MOREINFO).info('EXECUTE [CSV]: %s', self.get_query()) input_data = SQLITE_SCRIPT_FMT.format(imports=imports, output=output, query=self.get_query()) return input_data
def execute(self): t_before = time.time() executor = Executor() cmd = 'sqlite3 -batch' script = self._create_script(self.variables) getLogger(LOG).debug('Sqlite script:\n%s', script) executor.execute('refname', cmd, input_data=script, output_pipe=True) res = executor.poll_for_complete(observer) del executor t_after = time.time() csvname = '' if CSV_KEY in self.job_syntax_item: csvname = var_replaced(self.variables, self.job_syntax_item, CSV_KEY) self.fix_empty_res_table(csvname) if ismoreinfo(): getLogger(MOREINFO).info('%s.csv - %.2fs' % (csvname, t_after - t_before)) else: getLogger(STDOUT).info('.') res = res['refname'] if res[0] != 0: raise Exception("Sqlite query error", self.get_query()) else: self._handle_var_create(res) retcode = res[1] return retcode
def handle_transmitter_op(self, job_syntax_item, endpoint): opname = job_syntax_item[OP_KEY] # run batches sequentially / parallel if BATCH_TYPE_KEY in job_syntax_item: if job_syntax_item[BATCH_TYPE_KEY] == BATCH_TYPE_PARALLEL_KEY: batch_seq = False elif job_syntax_item[BATCH_TYPE_KEY] == BATCH_TYPE_SEQUENTIAL_KEY: batch_seq = True else: getLogger(STDERR).error('Unknown batch type: %s', job_syntax_item[BATCH_TYPE_KEY]) exit(1) else: batch_seq = False # parallel by default csv_key_val = var_replaced(self.variables, job_syntax_item, CSV_KEY) csv_filename = SqlExecutor.csv_name(csv_key_val) csv_data = self.csvdata(csv_filename) num_lines = len(csv_data) # do nothing for empty data set if num_lines <= 1: getLogger(LOG).info('skip empty csv') stub = ['"Id","Success","Created","Error"\n'] result_ids = parse_batch_res_data(stub) else: objname = job_syntax_item[endpoint] conn = self.endpoints.endpoint(endpoint) max_batch_size = int(job_syntax_item[BATCH_SIZE_KEY]) getLogger(STDOUT).info('EXECUTE: %s %s, lines count=%d', opname, objname, num_lines - 1) t_before = time.time() if len(csv_data): if opname == OP_UPDATE: res = conn.bulk_update(objname, csv_data, max_batch_size, batch_seq) elif opname == OP_DELETE: res = conn.bulk_delete(objname, csv_data, max_batch_size, batch_seq) elif opname == OP_INSERT: res = conn.bulk_insert(objname, csv_data, max_batch_size, batch_seq) else: getLogger(STDERR).error("Operation '%s' isn't supported" % opname) exit(1) result_ids = parse_batch_res_data(res) t_after = time.time() getLogger(STDOUT).info('SF %s Took time: %.2f' \ % (opname, t_after-t_before)) if NEW_IDS_TABLE in job_syntax_item: results_file_name = \ SqlExecutor.csv_name(job_syntax_item[NEW_IDS_TABLE]) with open(results_file_name, 'w') as result_ids_file: csv_data = csv_from_bulk_data(result_ids) result_ids_file.write(csv_data) getLogger(LOG).info('Saved result ids: %s', results_file_name) getLogger(LOG).info('Done: %s operation', opname)
def handle_result(self, bulk_res): # handle empty result - fix it by adding column names if bulk_res and bulk_res[0] == EMPTY_SF_RESPONSE: cols = SqlExecutor.get_query_columns(self.get_query()) header = ','.join(cols) bulk_res = [header] if len(bulk_res) > 1: #ignore last empty results bulk_res = bulk_res[:-1] # handle result if CSV_KEY in self.job_syntax_item: csv_key_val = var_replaced(self.variables, self.job_syntax_item, CSV_KEY) csvfname = SqlExecutor.csv_name(csv_key_val) bulk_data.save_escape_csv_lines_as_csv_file(csvfname, bulk_res) elif VAR_KEY in self.job_syntax_item: res = bulk_data.parse_batch_res_data(bulk_res) if res.rows: self.save_var(self.job_syntax_item[VAR_KEY], res.rows[0][0])