def loadOrTrainLSHModel(forceGenerate=False): lshModel = None if os.path.exists("./pickle_files/lshModel.pickle") and not forceGenerate: print("LSH model found on disk") pickleIn = open("./pickle_files/lshModel.pickle", "rb") lshModel = pickle.load(pickleIn) else: print("Training LSH model") trainAudioDataAndRateArray = loadAllFiles("train", '') trainingData = generateData(trainAudioDataAndRateArray) lshModel = LSH() for data in trainingData: print("fileName", data[1]) validFrameList = extractValidFrames(data[0]) for i in range(0, validFrameList.shape[0]): reshapedValidFrame = validFrameList[i].reshape(1, -1) # print("reshapedValidFrame", reshapedValidFrame) lshModel.train(reshapedValidFrame, { "name": data[1] + "_" + str(i), "frameIndex": i }) # hr.train(validFrames[1:2], data[1]) # print("lshModel", lshModel) pickleOut = open("./pickle_files/lshModel.pickle", "wb") pickle.dump(lshModel, pickleOut) pickleOut.close() return lshModel
if "-h" in sys.argv or "--help" in sys.argv: print( "Usage: ./nn.py [OPTION] \n\n" " -h | --help Show this help message and exit \n" " --fetch <plugin> Fetch new data with proprietary plugin \n" " --train Train LSH model \n" " --query <ID> Nearest Neighbor query \n") exit(0) if "--fetch" in sys.argv: pluginName = sys.argv[2].replace('.py', '') Dimport("%s" % pluginName, pluginName, FULLNAME('plugins'))(CONST.DATASET) if "--train" in sys.argv: dataframe = pd.read_csv(CONST.DATASET) corpus = TfidfVectorizer().fit_transform(dataframe['content']) lsh = LSH(corpus) model = lsh.train() pickle.dump(model, open(CONST.MODEL, 'wb')) if "--query" in sys.argv: print(query(sys.argv[2])) # eof