def predict(self, x_test): desktops = Repository.get_data('desktops') mobiles_sample = x_test merge_key = 'anonymous_c2' mobiles_sample = mobiles_sample[['device_id', merge_key]].set_index( [merge_key, 'device_id']) desktops_sample = desktops[['cookie_id', merge_key]].set_index([merge_key]) merge_data = mobiles_sample.join(desktops_sample).reset_index(1) return merge_data
def regenerate_repodata(repository_path, marked_repository_path): """ Re-generates the repodata for the given repository. @param repository_path The path to the repository. Uses group.xml and patterns.xml from any path inside repository, if these files don't exist they're unpacked from package-groups.rpm """ repository = Repository(repository_path) repodata = repository.get_data() if repodata.groups_data is None: logging.warning("There is no groups data in " "{0}".format(repository_path)) if repodata.patterns_data is None: logging.warning("There is no patterns data in " "{0}".format(repository_path)) repository.generate_derived_data() marked_repository = Repository(marked_repository_path) marked_repository.set_data(repodata) marked_repository.generate_derived_data()
def create_submission(self): submission = Repository.get_data('submission') time_stamp = datetime.datetime.now().strftime("%Y_%M_%d_%H_%m") file_name = '../output/{}-submission-{}.csv'.format(time_stamp, self.description) submission.to_csv(file_name, header=True, index=False) print "Submission file", file_name, "is created."