def save_model(args, user_map, item_map, row_factor, col_factor): """Save the user map, item map, row factor and column factor matrices in numpy format. These matrices together constitute the "recommendation model." Args: args: input args to training job user_map: user map numpy array item_map: item map numpy array row_factor: row_factor numpy array col_factor: col_factor numpy array """ model_dir = os.path.join(args['output_dir'], 'model') # if our output directory is a GCS bucket, write model files to /tmp, # then copy to GCS gs_model_dir = None if model_dir.startswith('gs://'): gs_model_dir = model_dir model_dir = '/tmp/{0}'.format(args['job_name']) os.makedirs(model_dir) np.save(os.path.join(model_dir, 'user'), user_map) np.save(os.path.join(model_dir, 'item'), item_map) np.save(os.path.join(model_dir, 'row'), row_factor) np.save(os.path.join(model_dir, 'col'), col_factor) if gs_model_dir: sh.gsutil('cp', '-r', os.path.join(model_dir, '*'), gs_model_dir)
def save_model(args, user_map, item_map, row_factor, col_factor): """Save the user map, item map, row factor and column factor matrices in numpy format. These matrices together constitute the "recommendation model." Args: args: input args to training job user_map: user map numpy array item_map: item map numpy array row_factor: row_factor numpy array col_factor: col_factor numpy array """ model_dir = os.path.join(args.output_dir, 'model') # write model files to /tmp, then copy to GCS gs_model_dir = model_dir model_dir = '/tmp/{0}'.format(args.job_name) os.makedirs(model_dir) np.save(os.path.join(model_dir, 'user'), user_map) np.save(os.path.join(model_dir, 'item'), item_map) np.save(os.path.join(model_dir, 'row'), row_factor) np.save(os.path.join(model_dir, 'col'), col_factor) sh.gsutil('cp', '-r', os.path.join(model_dir, '*'), gs_model_dir)
def save_model(args, user_map, item_map, row_factor, col_factor, item_ID_mapping_dd): """ These matrices together constitute the "recommendation model." Inputs: args: input args to training job user_map: user map numpy array item_map: item map numpy array row_factor: row_factor numpy array col_factor: col_factor numpy array item_ID_mapping_dd: original item ID to rebased item ID mapping """ model_dir = os.path.join(args.output_dir, 'model') # if our output directory is a GCS bucket, write model files to /tmp, # then copy to GCS gs_model_dir = None if model_dir.startswith('gs://'): gs_model_dir = model_dir model_dir = '/tmp/{0}'.format(args.job_name) os.makedirs(model_dir) np.save(os.path.join(model_dir, 'user'), user_map) np.save(os.path.join(model_dir, 'item'), item_map) np.save(os.path.join(model_dir, 'row'), row_factor) np.save(os.path.join(model_dir, 'col'), col_factor) item_ID_mapping_dd.to_csv(os.path.join(model_dir, 'item_ID_mapping_dd.csv')) if gs_model_dir: sh.gsutil('cp', '-r', os.path.join(model_dir, '*'), gs_model_dir)
def ensure_local_file(input_file): """ Ensure the training ratings file is stored locally. """ os.makedirs(input_path) tmp_input_file = os.path.join(input_path, os.path.basename(input_file)) sh.gsutil("cp", "-r", input_file, tmp_input_file) return tmp_input_file
def ensure_local_file(input_file): """ Ensure the training ratings file is stored locally. """ if input_file.startswith('gs:/'): input_path = os.path.join('/tmp/', str(uuid.uuid4())) os.makedirs(input_path) tmp_input_file = os.path.join(input_path, os.path.basename(input_file)) sh.gsutil("cp", "-r", input_file, tmp_input_file) return tmp_input_file else: return input_file
def save_model(r_factor, c_factor, job_dir='.', job_name='myjob'): model_dir = os.path.join(job_dir, 'model') gs_model_dir = None if model_dir.startswith('gs://'): gs_model_dir = model_dir model_dir = '/tmp/{0}'.format(job_name) if not os.path.exists(model_dir): os.makedirs(model_dir) np.save(os.path.join(model_dir, 'row'), r_factor) np.save(os.path.join(model_dir, 'col'), c_factor) if gs_model_dir: import sh sh.gsutil('cp', '-r', os.path.join(model_dir, '*'), gs_model_dir)
def save_model(r_factor, c_factor, job_dir = '.', job_name = 'myjob'): model_dir = os.path.join(job_dir, 'model') gs_model_dir = None if model_dir.startswith('gs://'): gs_model_dir = model_dir model_dir = '/tmp/{0}'.format(job_name) if not os.path.exists(model_dir): os.makedirs(model_dir) np.save(os.path.join(model_dir, 'row'), r_factor) np.save(os.path.join(model_dir, 'col'), c_factor) if gs_model_dir: import sh sh.gsutil('cp', '-r', os.path.join(model_dir, '*'), gs_model_dir)
def save_user_items_w(args, user_items_w): """Save the user map, item map, row factor and column factor matrices in numpy format. """ model_dir = os.path.join(args['output_dir'], 'model') # if our output directory is a GCS bucket, write model files to /tmp, # then copy to GCS gs_model_dir = None if model_dir.startswith('gs://'): gs_model_dir = model_dir model_dir = '/tmp/{0}'.format(args['job_name']) # os.makedirs(model_dir) user_items_w.to_json(os.path.join(model_dir, 'user_item_w.json'),orient='records') if gs_model_dir: sh.gsutil('cp', '-r', os.path.join(model_dir, '*'), gs_model_dir)
def move(self, path_to_src, path_to_dest): self.logger.info("moving %s to %s", path_to_src, path_to_dest) path_to_dest = path_to_dest[1:] #remove / from beginning path_to_src = path_to_src[1:] k = self.bucket.get_key(path_to_src) #assume that src is a directory, if the key does not exist #a more secure way would be to check if the directory exists by appending a slash to the path and trying to get it is_dir = k == None if not is_dir: k.copy(self.bucket, path_to_dest) k.delete() return path_to_dest += '/' path_to_src += '/' listing = self.bucket.list(path_to_src, '/') directories = [d for d in listing if self._is_dir(d)] files = [f for f in listing if not self._is_dir(d)] for key in directories: new_path = path_to_dest+key.name.split(path_to_src,1)[1] self.create_directory('/'+new_path[:-1]) for key in files: new_path = path_to_dest+key.name.split(path_to_src,1)[1] key.copy(self.bucket, new_path) gsutil('rm', 'gs://%s/%s**' % (self.bucket_name, path_to_src) )
def get_used_space(self): self.logger.debug("retrieving used space") return int( awk(gsutil('du', '-s', 'gs://cloudfusion'), '{print $1}') )