def PredictProperty(models, predictors, predFiles):

    allsequences = dict()

    ##allresults shall be a nested dictionary, e.g, allresults[proteinName][response] = predicted_property_list
    allresults4prob = dict()
    allresults = dict()

    for model, predictor in zip(models, predictors):

        predict, inputVariables = predictor

        ## We shall load these files for each model separately since each model may use a different set of features
        predData = DataProcessor.LoadPropertyFeatures(predFiles,
                                                      modelSpecs=model,
                                                      forTrainValidation=False)

        ##make sure the input has the same number of features as the model
        rindex = np.random.randint(0, high=len(predData))
        assert model['n_in_seq'] == predData[rindex]['seqFeatures'].shape[1]

        ## collecting sequences
        for d in predData:
            if not allsequences.has_key(d['name']):
                allsequences[d['name']] = d['sequence']
            elif allsequences[d['name']] != d['sequence']:
                print 'ERROR: inconsistent primary sequence for the same protein in the protein feature files'
                exit(1)

        predSeqData, names = DataProcessor.SplitData2Batches(
            data=predData,
            numDataPoints=30,
            modelSpecs=model,
            forTrainValidation=False)
        print '#predData: ', len(predData), '#batches: ', len(predSeqData)

        for onebatch, names4onebatch in zip(predSeqData, names):
            input = onebatch[:len(inputVariables)]
            result4prob, result = predict(*input)

            ## x1d has shape (batchSize, maxSeqLen, numFeatures) and x1dmask has shape (batchSize, #cols_to_be_masked)
            x1d, x1dmask = input[0:2]
            seqLens = x1d.shape[1] - x1dmask.shape[1] + np.sum(x1dmask, axis=1)
            maxSeqLen = x1d.shape[1]

            ##result4prob has shape (batchSize, maxSeqLen, sum( responseProbDims{res] for res in modelSpecs['responses'])  )
            assert result4prob.shape[2] == sum([
                config.responseProbDims[Response2LabelType(res)]
                for res in model['responses']
            ])

            ##result has shape (batchSize, maxSeqLen, sum( responseValueDims{res] for res in modelSpecs['responses'])  )
            assert result.shape[2] == sum([
                config.responseValueDims[Response2LabelType(res)]
                for res in model['responses']
            ])

            nameGenerator = (name for name in names4onebatch
                             if not allresults.has_key(name))
            for name in nameGenerator:
                allresults[name] = dict()
                allresults4prob[name] = dict()

            dims = [
                config.responseProbDims[Response2LabelType(res)]
                for res in model['responses']
            ]
            endPositions = np.cumsum(dims)
            startPositions = endPositions - dims

            for res, start, end in zip(model['responses'], startPositions,
                                       endPositions):
                nameGenerator = (name for name in names4onebatch
                                 if not allresults4prob[name].has_key(res))
                for name in nameGenerator:
                    allresults4prob[name][res] = []

                ## remove masked positions
                revised_batchres = [
                    tmp[maxSeqLen - seqLen:, :]
                    for tmp, seqLen in zip(result4prob[:, :,
                                                       start:end], seqLens)
                ]

                [
                    allresults4prob[name][res].append(res4one)
                    for res4one, name in zip(revised_batchres, names4onebatch)
                ]

            dims = [
                config.responseValueDims[Response2LabelType(res)]
                for res in model['responses']
            ]
            endPositions = np.cumsum(dims)
            startPositions = endPositions - dims

            for res, start, end in zip(model['responses'], startPositions,
                                       endPositions):
                nameGenerator = (name for name in names4onebatch
                                 if not allresults[name].has_key(res))
                for name in nameGenerator:
                    allresults[name][res] = []

                ## remove masked positions
                revised_batchres = [
                    tmp[maxSeqLen - seqLen:, :]
                    for tmp, seqLen in zip(result[:, :, start:end], seqLens)
                ]
                [
                    allresults[name][res].append(res4one)
                    for res4one, name in zip(revised_batchres, names4onebatch)
                ]

    ## calculate the final result, which is the average of all the predictd properties for the same protein and response name
    finalresults = dict()
    for name, results in allresults.iteritems():
        if not finalresults.has_key(name):
            finalresults[name] = dict()
        for response in results.keys():
            tmpresult = np.average(allresults[name][response], axis=0)

            ##convert coding of discrete labels to more meaningful representation
            labelType = Response2LabelType(response)
            if not labelType.startswith('Discrete'):
                finalresults[name][response] = tmpresult

    finalresults4prob = dict()
    for name, results in allresults4prob.iteritems():
        if not finalresults4prob.has_key(name):
            finalresults4prob[name] = dict()
        for response in results.keys():
            finalresults4prob[name][response] = np.average(
                allresults4prob[name][response], axis=0)

            labelType = Response2LabelType(response)
            if labelType.startswith('Discrete'):
                tmpresult = np.argmax(finalresults4prob[name][response],
                                      axis=1)
                finalresults[name][response] = PropertyUtils.Coding2String(
                    tmpresult, response)
    """
	## collect the average label distributions and weight matrix. We collect all the matrices and then calculate their average.
	labelDistributions = dict()
	labelWeights = dict()
	for model in models:
		for apt in model['responseNames']:
			if not labelDistributions.has_key(apt):
				labelDistributions[apt] = []
			if not labelWeights.has_key(apt):
				labelWeights[apt] = []

			labelDistributions[apt].append(model['labelRefProbs'][apt])
			labelWeights[apt].append(model['weight4' + model['labelType'] ][apt])

	finalLabelDistributions = dict()
	finalLabelWeights = dict()

	for apt in labelDistributions.keys():
		finalLabelDistributions[apt] = np.average(labelDistributions[apt], axis=0)
	for apt in labelWeights.keys():
		finalLabelWeights[apt] = np.average(labelWeights[apt], axis=0)
	"""

    return finalresults4prob, finalresults, allsequences
def main(argv):

    #modelSpecs = config.InitializeModelSpecs()
    modelSpecs = InitializeModelSpecs()
    modelSpecs = ParseCommandLine.ParseArguments(argv, modelSpecs)

    startTime = datetime.datetime.now()

    ##trainData and validData are a list. Each element corresponds to one protein, which is a dict()
    trainData = DataProcessor.LoadPropertyFeatures(modelSpecs['trainFile'],
                                                   modelSpecs=modelSpecs)
    validData = DataProcessor.LoadPropertyFeatures(modelSpecs['validFile'],
                                                   modelSpecs=modelSpecs)
    print '#trainData: ', len(trainData), '#validData: ', len(validData)

    ## where to add code to assign weight to each residue? We need to deal with the residues without 3D coordinates for angle and SS prediction
    ##a, b = DataProcessor.CalcLabelDistributionAndWeight(trainData, modelSpecs)

    modelSpecs['numOfTrainProteins'] = len(trainData)

    beforeBatchTime = datetime.datetime.now()
    print 'time spent on data loading: ', beforeBatchTime - startTime

    print 'Preparing batch data for training...'
    groupSize = modelSpecs['minibatchSize']
    trainSeqDataset, _ = DataProcessor.SplitData2Batches(
        data=trainData, numDataPoints=groupSize, modelSpecs=modelSpecs)
    validSeqDataset, _ = DataProcessor.SplitData2Batches(
        data=validData, numDataPoints=groupSize, modelSpecs=modelSpecs)
    #validSeqDataset = DataProcessor.SplitData2Batches(data=validData, numDataPoints=20000, modelSpecs=modelSpecs)
    print "#trainData minibatches:", len(
        trainSeqDataset), "#validData minibatches:", len(validSeqDataset)

    predSeqDataset = None
    if modelSpecs['predFile'] is not None:
        predData = DataProcessor.LoadPropertyFeatures(modelSpecs['predFile'],
                                                      modelSpecs=modelSpecs,
                                                      forTrainValidation=False)
        print '#predData: ', len(predData)
        predSeqDataset, _ = DataProcessor.SplitData2Batches(
            data=predData, numDataPoints=40, modelSpecs=modelSpecs)
        print "#predData minibatches:", len(predSeqDataset)

## Each protein in trainData contains three or four components: seqFeatures and label
    modelSpecs['n_in_seq'] = trainData[0]['seqFeatures'].shape[1]

    beforeTrainTime = datetime.datetime.now()

    print 'time spent on generating batch data:', beforeTrainTime - beforeBatchTime

    result = TrainModel(modelSpecs=modelSpecs,
                        trainSeqData=trainSeqDataset,
                        validSeqData=validSeqDataset,
                        predSeqData=predSeqDataset)

    ##merge ModelSpecs and result
    resultModel = modelSpecs.copy()
    resultModel.update(result)

    modelFile = GenerateModelFileName(resultModel)
    print 'Writing the resultant model to ', modelFile
    cPickle.dump(resultModel, file(modelFile, 'wb'), cPickle.HIGHEST_PROTOCOL)