示例#1
0
 def __init__(self, partition_id, fpath):
     self._partition_id = partition_id
     self._fpath = fpath
     self._tmp_fpath = self._get_tmp_fpath()
     self._csv_dict_writer = csv_dict_writer.CsvDictWriter(
             self._tmp_fpath
         )
示例#2
0
 def _get_csv_dict_writer(self):
     if self._csv_dict_writer is None:
         fname = common.encode_merged_sort_run_fname(
             self._partition_id, self._process_index)
         fpath = os.path.join(self._merged_dir, fname)
         self._csv_dict_writer = csv_dict_writer.CsvDictWriter(fpath)
         self._merged_fpaths.append(fpath)
     return self._csv_dict_writer
示例#3
0
 def __init__(self, process_index, output_dir):
     self._process_index = process_index
     self._output_dir = output_dir
     self._tmp_fpath = self._gen_tmp_fpath()
     self._fpath = None
     self._csv_writer = csv_dict_writer.CsvDictWriter(self._tmp_fpath)
     self._start_index = None
     self._end_index = None
示例#4
0
 def _generate_input_csv(self, cands, base_dir):
     if not gfile.Exists(base_dir):
         gfile.MakeDirs(base_dir)
     fpaths = []
     random.shuffle(cands)
     csv_writers = []
     partition_num = self._data_source_l.data_source_meta.partition_num
     for partition_id in range(partition_num):
         fpath = os.path.join(base_dir, str(partition_id) + '.rd')
         fpaths.append(fpath)
         csv_writers.append(csv_dict_writer.CsvDictWriter(fpath))
     for item in cands:
         partition_id = CityHash32(item) % partition_num
         raw = OrderedDict()
         raw['raw_id'] = item
         raw['feat_0'] = str((partition_id << 30) + 0) + item
         raw['feat_1'] = str((partition_id << 30) + 1) + item
         raw['feat_2'] = str((partition_id << 30) + 2) + item
         csv_writers[partition_id].write(raw)
     for csv_writer in csv_writers:
         csv_writer.close()
     return fpaths
示例#5
0
 def _get_csv_dict_writer(self):
     if self._csv_dict_writer is None:
         self._tmp_fpath = common.gen_tmp_fpath(self._merged_dir)
         self._csv_dict_writer = \
                 csv_dict_writer.CsvDictWriter(self._tmp_fpath)
     return self._csv_dict_writer