def main(): feature = Feature() tests = [] with open('./data/basis.txt') as p: for test_line in p: tests.append(feature.convert_sentence(test_line)) max_width = 35 # TODO: 自動で取れるように tests = [(test + [0] * (max_width - len(test)))[:max_width] for test in tests] svc = joblib.load('./model/svc.pkl') prediction = svc.predict(tests) with open('./data/output', mode='w') as p: p.write("\n".join([ '1' if result == '+1' else '-1' for result in prediction.tolist() ]))
def main(): feature = Feature() trains, labels = [], [] with open('data/train.list') as p: for train_line in p: label, _, train_txt = train_line.split(maxsplit=2) trains.append(feature.convert_sentence(train_txt)) labels.append(label) max_width = int(statistics.mean([len(train) for train in trains]) * 2) trains = [(train + [0] * (max_width - len(train)))[:max_width] for train in trains] assert statistics.mean([len(train) for train in trains]) == max_width, '配列の要素数が次元ごとで揃っていない' assert len(trains) == len(labels), '学習データとラベルの数が不一致' numpy.savez('data/dataset.npz', trains=numpy.array(trains, dtype=numpy.float), labels=numpy.array(labels, dtype=numpy.str))