def save_as_csv(self, path, **params): """ Saves the RDD to file as text. """ self._entry(path) def to_csv(row, **params): sio = StringIO.StringIO() writer = csv.writer(sio, **params) try: writer.writerow([row], **params) ret = sio.getvalue() return ret except IOError: return '' delete_file_or_dir(path) with open(path, 'w') as f: self.begin_iterator() elems_at_a_time = 10000 ret = self.iterator_get_next(elems_at_a_time) while True: for row in ret: line = to_csv(row, **params) f.write(line) if len(ret) == elems_at_a_time: ret = self.iterator_get_next(elems_at_a_time) else: break self._exit()
def save(self, path): """ Save a model. The model can be saved, then reloaded later to provide recommendations. Parameters ---------- path : str The path where the model will be saved. This should refer to a file, not to a directory. Three items will be stored here: the underlying model parameters, the original ratings, and the column names. These are stored with suffix '.model', '.ratings', and '.metadata'. """ sc = CommonSparkContext.Instance().sc() delete_file_or_dir(path) os.makedirs(path) model_path, ratings_path, metadata_path = self._file_paths(path) # save model self.model.save(sc, model_path) # save ratings self.ratings.save(ratings_path) # save metadata metadata = [self.user_col, self.item_col, self.rating_col] with open(metadata_path, 'w') as f: pickle.dump(metadata, f)
def save_as_text(self, path): """ Saves the RDD to file as text. """ self._entry(path) # this only works for local files delete_file_or_dir(path) try: self._rdd.saveAsTextFile(path) # action ? except: # TODO distinguish between filesystem errors and pickle errors raise TypeError('The XArray save failed.') metadata = self.elem_type metadata_path = os.path.join(path, 'metadata') with open(metadata_path, 'w') as md: # TODO detect filesystem errors pickle.dump(metadata, md) self._exit()