print >> sys.stderr, "\r*> [PreparingTrainTagDB] %d train tag indexes [%2.2f%s parsed] ~%.2fs" % ( lines, (float(lines)/len(TrainTagIndex)*100), '%', (t2-t1)*1.0 ) # return dictionaries TrainTagCoord = {} TrainTagIndex = {} return newTrainCoord, newTrainIndex #----------------------------------------- def print_help(): """ Print information in case of wrong input """ #~ if the input parameters are not correct print >> sys.stderr, "\t Usage: python "+sys.argv[0]+" <train_set.txt> <out_new_train_set> <train_set_geoSpread.txt>" print >> sys.stderr, "\t e.g. python filteringTrainingSetWithGeoSpreading.py dataset/flickrVideosTrain_ok.txt dataset/flickrVideosTrain_db_geoSpread > dataset/flickrVideosTrain_ok_geoSpread.txt " sys.exit(1) #----------------------------------------- """ Checking the arguments in input """ if len(sys.argv) < 3: print_help() else: trainFilePath = sys.argv[1] outFilePath = sys.argv[2] #----------------------------------------- # Loading Train Set TrainTagCoord, TrainTagIndex = loadGroupOfTagsFromConvertedFile( trainFilePath, False ) minMatches=50 maxAvg=200.0 newTrainTagCoord, newTrainTagIndex = geographicSpread( TrainTagCoord, TrainTagIndex, minMatches, maxAvg ) serializeTrainSetDB( newTrainTagCoord, newTrainTagIndex, outFilePath )
print >> sys.stderr, "\t Usage: python "+sys.argv[0]+" <train_set.txt> <geoNames.txt> <test_set.txt> > <outputFile.txt>" print >> sys.stderr, "\t e.g. python convertDatasetsIntoDBformat.py dataset/bak/flickrVideosTrain_ok.txt dataset/flickrVideosTrain_db extra/allCountries.txt extra/allCountries_db dataset/bak/flickrVideosTest_10_geo.txt dataset/flickrVideosTest_db" sys.exit(1) #----------------------------------------- """ Checking the arguments in input """ if len(sys.argv) < 4: print_help() else: inFile = sys.argv[1] outFile = sys.argv[2] typefile = sys.argv[3] #----------------------------------------- print >> sys.stderr, "Processing %s" % typefile if typefile == 'geonames': # Read GeoNames GeoNames, GeoNamesIndex = loadGeoNamesSplittingKeys( inFile ) # Specify which filter use serializeGeoNamesFilterDB( GeoNames, GeoNamesIndex, outFile ) elif typefile == 'trainset': # Read train set TrainTagCoord, TrainTagIndex = loadGroupOfTagsFromConvertedFile( inFile ) # Serialize train set serializeTrainSetDB( TrainTagCoord, TrainTagIndex, outFile )
def print_help(): #~ if the input parameters are not correct print >> sys.stderr, "\t Usage: python " + sys.argv[ 0] + " <train_set.txt> <geoNames.txt> <test_set.txt> > <outputFile.txt>" print >> sys.stderr, "\t e.g. python convertDatasetsIntoDBformat.py dataset/bak/flickrVideosTrain_ok.txt dataset/flickrVideosTrain_db extra/allCountries.txt extra/allCountries_db dataset/bak/flickrVideosTest_10_geo.txt dataset/flickrVideosTest_db" sys.exit(1) #----------------------------------------- """ Checking the arguments in input """ if len(sys.argv) < 4: print_help() else: inFile = sys.argv[1] outFile = sys.argv[2] typefile = sys.argv[3] #----------------------------------------- print >> sys.stderr, "Processing %s" % typefile if typefile == 'geonames': # Read GeoNames GeoNames, GeoNamesIndex = loadGeoNamesSplittingKeys(inFile) # Specify which filter use serializeGeoNamesFilterDB(GeoNames, GeoNamesIndex, outFile) elif typefile == 'trainset': # Read train set TrainTagCoord, TrainTagIndex = loadGroupOfTagsFromConvertedFile(inFile) # Serialize train set serializeTrainSetDB(TrainTagCoord, TrainTagIndex, outFile)