def merge(path, dst): """ Merges files in a specified directory to a specified file. """ input = DatasetPath(happy.getJobConf(), path) output = DatasetPath(happy.getJobConf(), dst) input.copyTo(output)
def createCollector(path, fs="dfs", type="text", compressiontype="lzo", sequencetype="BLOCK"): """ Creates a type "text" (default) or "sequence" file collector at the specified path. Collectors are automatically closed at the end of the job. """ filesystem = getFileSystem(fs) datasetPath = DatasetPath(filesystem, path) datasetPath.deletePath() if type == "sequence": collector = TextSequenceFileCollector(filesystem, happy.getJobConf(), Path(path), _getSequenceFileType(sequencetype), _getCodecInstance(compressiontype)) elif type == "text": collector = TextFileCollector(filesystem, happy.getJobConf(), Path(path)) elif type == "bjson": collector = BJSONCollector(filesystem, happy.getJobConf(), Path(path), _getSequenceFileType(sequencetype), _getCodecInstance(compressiontype)) else: raise Exception("Unknown collector type " + type) # add as a closeable so that it is closed correctly: if happy.job is not None: happy.job.addCloseable(collector) return collector