def generateLibSVMDataset(self, filename): feature_group = self.collectFeatureAndGroup() datadir = '../feature_set/' out = open(datadir + filename + '.libsvm', 'w') idout = open(datadir + filename + '.id', 'w') for pair in feature_group: # pair = (context, label, meta) context = pair[0] label = pair[1] meta = pair[2] converted = self.convertSingleToTupleList(context) out.write(str(friends.groupToNumeric(label))) for index, value in converted: out.write(' ' + str(index) + ':' + str(value)) out.write('\n') idout.write(str(meta['user_id'])) idout.write('\n') out.close() out = open(datadir + filename + '.fmap', 'w') out.write(str(self.indexer)) out.close()
def _collectData(pair): context = pair[0] label = str(friends.groupToNumeric(pair[1])) return (context, label, 1.0)