def getDifferentTrainAndTestData(trainDataSize, testDataSize): data = dataReader.getWholeTrainingData() if trainDataSize+testDataSize > data.shape[0]: # request more rows than the DF has print "Getting different train & test data with possible duplicates" trainData = data.sample(trainDataSize) testData = data.sample(testDataSize) else: print "Getting totally different train & test data" indexes = np.arange(data.shape[0]) #0->873k random.shuffle(indexes) # works in-place trainData = data.ix[indexes[0:trainDataSize]] testData = data.ix[indexes[trainDataSize+1:trainDataSize+1+testDataSize]] return trainData,testData