print >> sys.stderr, "\r*> [PreparingTrainTagDB]  %d train tag indexes [%2.2f%s parsed] ~%.2fs" % ( lines, (float(lines)/len(TrainTagIndex)*100), '%', (t2-t1)*1.0 )
	# return dictionaries
	TrainTagCoord = {}
	TrainTagIndex = {}
	return newTrainCoord, newTrainIndex

#-----------------------------------------
def print_help():
	"""	Print information in case of wrong input """
	#~ if the input parameters are not correct
	print >> sys.stderr, "\t Usage: python "+sys.argv[0]+" <train_set.txt>  <out_new_train_set>  <train_set_geoSpread.txt>"
	print >> sys.stderr, "\t e.g. python filteringTrainingSetWithGeoSpreading.py dataset/flickrVideosTrain_ok.txt dataset/flickrVideosTrain_db_geoSpread  > dataset/flickrVideosTrain_ok_geoSpread.txt "
	sys.exit(1)	
#-----------------------------------------
""" Checking the arguments in input """
if len(sys.argv) < 3:
	print_help()
else:
	trainFilePath = sys.argv[1]
	outFilePath = sys.argv[2]
#-----------------------------------------

# Loading Train Set
TrainTagCoord, TrainTagIndex = loadGroupOfTagsFromConvertedFile( trainFilePath, False )

minMatches=50
maxAvg=200.0
newTrainTagCoord, newTrainTagIndex = geographicSpread( TrainTagCoord, TrainTagIndex, minMatches, maxAvg )

serializeTrainSetDB( newTrainTagCoord, newTrainTagIndex, outFilePath )
  print >> sys.stderr, "\t Usage: python "+sys.argv[0]+" <train_set.txt> <geoNames.txt> <test_set.txt> > <outputFile.txt>"
  print >> sys.stderr, "\t e.g. python convertDatasetsIntoDBformat.py dataset/bak/flickrVideosTrain_ok.txt dataset/flickrVideosTrain_db extra/allCountries.txt extra/allCountries_db dataset/bak/flickrVideosTest_10_geo.txt dataset/flickrVideosTest_db"
  sys.exit(1)	
#-----------------------------------------
""" Checking the arguments in input """
if len(sys.argv) < 4:
	print_help()
else:
	inFile = sys.argv[1]
	outFile = sys.argv[2]
	typefile = sys.argv[3]
#-----------------------------------------

print >> sys.stderr, "Processing %s" % typefile

if typefile == 'geonames':
	# Read GeoNames
	GeoNames, GeoNamesIndex = loadGeoNamesSplittingKeys( inFile )
	# Specify which filter use
	serializeGeoNamesFilterDB( GeoNames, GeoNamesIndex, outFile )
elif typefile == 'trainset':
	# Read train set
	TrainTagCoord, TrainTagIndex = loadGroupOfTagsFromConvertedFile( inFile )
	# Serialize train set 
	serializeTrainSetDB( TrainTagCoord, TrainTagIndex, outFile )





示例#3
0
def print_help():
    #~ if the input parameters are not correct
    print >> sys.stderr, "\t Usage: python " + sys.argv[
        0] + " <train_set.txt> <geoNames.txt> <test_set.txt> > <outputFile.txt>"
    print >> sys.stderr, "\t e.g. python convertDatasetsIntoDBformat.py dataset/bak/flickrVideosTrain_ok.txt dataset/flickrVideosTrain_db extra/allCountries.txt extra/allCountries_db dataset/bak/flickrVideosTest_10_geo.txt dataset/flickrVideosTest_db"
    sys.exit(1)


#-----------------------------------------
""" Checking the arguments in input """
if len(sys.argv) < 4:
    print_help()
else:
    inFile = sys.argv[1]
    outFile = sys.argv[2]
    typefile = sys.argv[3]
#-----------------------------------------

print >> sys.stderr, "Processing %s" % typefile

if typefile == 'geonames':
    # Read GeoNames
    GeoNames, GeoNamesIndex = loadGeoNamesSplittingKeys(inFile)
    # Specify which filter use
    serializeGeoNamesFilterDB(GeoNames, GeoNamesIndex, outFile)
elif typefile == 'trainset':
    # Read train set
    TrainTagCoord, TrainTagIndex = loadGroupOfTagsFromConvertedFile(inFile)
    # Serialize train set
    serializeTrainSetDB(TrainTagCoord, TrainTagIndex, outFile)