def bootStrapping_allneg_continue_keras2(trainfile,
                                         valfile=None,
                                         srate=0.8,
                                         nb_epoch1=3,
                                         nb_epoch2=30,
                                         earlystop=None,
                                         maxneg=None,
                                         model=0,
                                         codingMode=0,
                                         lam_recon=0,
                                         inputweights=None,
                                         outputweights=None,
                                         nb_classes=2):
    trainX = trainfile
    train_pos = trainX[np.where(trainX[:, 0] != 0)]
    train_neg = trainX[np.where(trainX[:, 0] == 0)]
    train_pos = pd.DataFrame(train_pos)
    train_neg = pd.DataFrame(train_neg)
    train_pos_s = train_pos.sample(train_pos.shape[0])
    #shuffle train pos
    train_neg_s = train_neg.sample(train_neg.shape[0])
    #shuffle train neg
    slength = int(train_pos.shape[0] * srate)
    nclass = int(train_neg.shape[0] / slength)
    if (valfile is not None):  # use all data in valfile as val
        valX = valfile
        val_pos = valX[np.where(valX[:, 0] != 0)]
        val_neg = valX[np.where(valX[:, 0] == 0)]
        val_pos = pd.DataFrame(val_pos)
        val_neg = pd.DataFrame(val_neg)
        val_all = pd.concat([val_pos, val_neg])
        valX1, valY1 = convertRawToXY(val_all.as_matrix(),
                                      codingMode=codingMode)
    else:  #selct 0.1 samples of training data as val
        a = int(train_pos.shape[0] * 0.9)
        b = train_neg.shape[0] - int(train_pos.shape[0] * 0.1)
        print "train pos=" + str(train_pos.shape[0]) + str('\n')
        print "train neg=" + str(train_neg.shape[0]) + str('\n')
        print " a=" + str(a) + " b=" + str(b) + str('\n')
        train_pos_s = train_pos[0:a]
        train_neg_s = train_neg[0:b]
        print "train pos s=" + str(train_pos_s.shape[0]) + str('\n')
        print "train neg s=" + str(train_neg_s.shape[0]) + str('\n')

        val_pos = train_pos[(a + 1):]
        print "val_pos=" + str(val_pos.shape[0]) + str('\n')
        val_neg = train_neg[b + 1:]
        print "val_neg=" + str(val_neg.shape[0]) + str('\n')

        val_all = pd.concat([val_pos, val_neg])
        valX1, valY1 = convertRawToXY(val_all.as_matrix(),
                                      codingMode=codingMode)
        slength = int(train_pos_s.shape[0] * srate)
        #update slength
        nclass = int(train_neg_s.shape[0] / slength)

    if (maxneg is not None):
        nclass = min(maxneg, nclass)
        #cannot do more than maxneg times

    #modelweights=None;
    for I in range(nb_epoch1):
        train_neg_s = train_neg_s.sample(train_neg_s.shape[0])
        #shuffle neg sample
        train_pos_ss = train_pos_s.sample(slength)
        for t in range(nclass):
            train_neg_ss = train_neg_s[(slength * t):(slength * t + slength)]
            train_all = pd.concat([train_pos_ss, train_neg_ss])
            trainX1, trainY1 = convertRawToXY(train_all.as_matrix(),
                                              codingMode=codingMode)
            if t == 0:
                models, eval_model, manipulate_model, weight_c_model, fitHistory = Capsnet_main(
                    trainX=trainX1,
                    trainY=trainY1,
                    valX=valX1,
                    valY=valY1,
                    nb_classes=nb_classes,
                    nb_epoch=nb_epoch2,
                    earlystop=earlystop,
                    weights=inputweights,
                    compiletimes=t,
                    lr=0.001,
                    batch_size=500,
                    lam_recon=lam_recon,
                    routings=3,
                    class_weight=None,
                    modeltype=model)
            else:
                models, eval_model, manipulate_model, weight_c_model, fitHistory = Capsnet_main(
                    trainX=trainX1,
                    trainY=trainY1,
                    valX=valX1,
                    valY=valY1,
                    nb_classes=nb_classes,
                    nb_epoch=nb_epoch2,
                    earlystop=earlystop,
                    weights=inputweights,
                    compiletimes=t,
                    compilemodels=(models, eval_model, manipulate_model,
                                   weight_c_model),
                    lr=0.001,
                    batch_size=500,
                    lam_recon=lam_recon,
                    routings=3,
                    class_weight=None,
                    modeltype=model)

            print "modelweights assigned for " + str(I) + " and " + str(
                t) + "\n"
            if (outputweights is not None):
                models.save_weights(outputweights, overwrite=True)

    return models, eval_model, manipulate_model, weight_c_model, fitHistory
def bootStrapping_allneg_continue_keras2(
    trainfile,
    valfile=None,
    srate=0.8,
    nb_epoch1=3,
    nb_epoch2=30,
    earlystop=None,
    maxneg=None,
    model=0,
    codingMode=0,
    frozenlayer=1,
    inputweights=None,
    outputweights=None,
    forkinas=False,
    nb_classes=2,
    hw_res=None,
    hc_res=None,
    hc_res2=None
):  #inputfile:fragments (n*34);srate:selection rate for positive data;nclass:number of class models
    train_pos = {}  #0S,1T,2Y
    train_neg = {}  #0S,1T,2Y
    train_pos_s = {}
    train_neg_s = {}
    train_pos_ss = {}
    train_neg_ss = {}
    slength = {}
    nclass = {}
    trainX = pd.read_table(trainfile, sep='\t', header=None).values
    for i in range(2):
        train_pos[i] = trainX[np.where(
            trainX[:, 0] == i)]  #sp 0 tp 1 yp 2 sn 3 tn 4 yn 5  p<=2 n>2
        train_neg[i] = trainX[np.where(trainX[:, 0] == i +
                                       2)]  #sp 0 tp 1 yp 2 sn 3 tn 4 yn 5
        train_pos[i] = pd.DataFrame(train_pos[i])
        train_neg[i] = pd.DataFrame(train_neg[i])
        train_pos_s[i] = train_pos[i].sample(train_pos[i].shape[0])
        #shuffle train pos
        train_neg_s[i] = train_neg[i].sample(train_neg[i].shape[0])
        #shuffle train neg
        slength[i] = int(train_pos[i].shape[0] * srate)
        nclass[i] = int(train_neg[i].shape[0] / slength[i])

    if (valfile is not None):  # use all data as val
        valX = pd.read_table(valfile, sep='\t', header=None).values
        val_all = pd.DataFrame()
        for i in range(2):
            val_pos = valX[np.where(valX[:, 0] == i)]
            val_neg = valX[np.where(valX[:, 0] == i + 2)]
            val_pos = pd.DataFrame(val_pos)
            val_neg = pd.DataFrame(val_neg)
            val_all = pd.concat([val_all, val_pos, val_neg])

        valX1, valY1 = convertRawToXY(
            val_all.as_matrix(), codingMode=codingMode
        )  #(355340,1,33,21) after extract same size as positive (48050,1,33,21)
    else:
        val_all = pd.DataFrame()
        nclass = {}
        for i in range(2):
            a = int(train_pos[i].shape[0] * 0.9)
            b = train_neg[i].shape[0] - int(train_pos[i].shape[0] * 0.1)
            print "train pos=" + str(train_pos[i].shape[0]) + str('\n')
            print "train neg=" + str(train_neg[i].shape[0]) + str('\n')
            print " a=" + str(a) + " b=" + str(b) + str('\n')
            train_pos_s[i] = train_pos[i][0:a]
            train_neg_s[i] = train_neg[i][0:b]
            print "train pos s=" + str(train_pos_s[i].shape[0]) + str('\n')
            print "train neg s=" + str(train_neg_s[i].shape[0]) + str('\n')

            val_pos = train_pos[i][(a + 1):]
            print "val_pos=" + str(val_pos.shape[0]) + str('\n')
            val_neg = train_neg[i][b + 1:]
            print "val_neg=" + str(val_neg.shape[0]) + str('\n')
            val_all = pd.concat([val_all, val_pos, val_neg])

            slength[i] = int(train_pos_s[i].shape[0] * srate)
            #transfer 0.1 to val so update slength
            nclass[i] = int(train_neg_s[i].shape[0] / slength[i])
        valX1, valY1 = convertRawToXY(val_all.as_matrix(),
                                      codingMode=codingMode)

    if (maxneg is not None):
        #nclass_n=min(nclass[hc_res2[0]],maxneg); #cannot do more than maxneg times
        nclass_n = min(max(nclass.values()), maxneg)
    #modelweights=None;
    for I in range(nb_epoch1):
        for i in range(2):
            train_neg_s[i] = train_neg_s[i].sample(train_neg_s[i].shape[0])
            #shuffle neg sample
            train_pos_ss[i] = train_pos_s[i].sample(slength[i])

        for t in range(nclass_n):
            train_all = pd.DataFrame()
            for i in range(2):
                train_neg_ss[i] = train_neg_s[i][(slength[i] * t % nclass[i]):(
                    slength[i] * t % nclass[i] + slength[i])]
                train_all = pd.concat(
                    [train_all, train_pos_ss[i], train_neg_ss[i]])

            sampleweights = None

            if (hw_res is not None):
                sampleweights = np.ones(len(train_all))
                sampleweights[np.where(
                    train_all.as_matrix()[:, 0] == hw_res)] *= sum(
                        sampleweights[np.where(
                            train_all.as_matrix()[:, 0] != 0)]) / sum(
                                sampleweights[np.where(
                                    train_all.as_matrix()[:, 0] == hw_res)])

            classweights = None
            if (hc_res is not None):
                classweights = {
                    0: 1,
                    1: 1,
                    2: 1,
                    3: 1
                }  #0 negative, 1 S 2 T 3 Y
                classweights[hc_res] = sum(
                    train_all.as_matrix()[:, 0] != 0) / sum(
                        train_all.as_matrix()[:, 0] == hc_res)

            if (hc_res2 is not None):  #negative has weight!
                # classweights={0:1.0,1:1.0,2:1.0,3:1.0,4:1.0,5:1.0} #sp 0 tp 1 yp 2 sn 3 tn 4 yn 5
                classweights = {k: 1.0 for k in range(nb_classes)}
                classweights[hc_res2[0]] = float(
                    sum(train_all.as_matrix()[:, 0] < 2)) / sum(
                        train_all.as_matrix()[:, 0] == hc_res2[0])
                classweights[hc_res2[1]] = float(
                    sum(train_all.as_matrix()[:, 0] < 2)) / sum(
                        train_all.as_matrix()[:, 0] == hc_res2[1])
            print(train_all.as_matrix())
            trainX1, trainY1 = convertRawToXY(
                train_all.as_matrix(), codingMode=codingMode
            )  #(355340,1,33,21) after extract same size as positive (48050,1,33,21)
            #models=MultiCNN(trainX1,trainY1,valX1,valY1,nb_epoch=nb_epoch2,earlystop=earlystop,model=model,frozenlayer=frozenlayer,weights=inputweights,modelweights=modelweights,forkinas=forkinas,compiletimes=t,compilemodels=models)
            print("#" * 30)
            print(trainX1.shape)
            print("#" * 30)
            # print(trainX1.ix[:,0])
            if t == 0:
                models = MultiCNN(trainX1,
                                  trainY1,
                                  valX1,
                                  valY1,
                                  nb_epoch=nb_epoch2,
                                  earlystop=earlystop,
                                  model=model,
                                  frozenlayer=frozenlayer,
                                  weights=inputweights,
                                  sample_weight=sampleweights,
                                  nb_classes=nb_classes,
                                  class_weight=classweights,
                                  forkinas=forkinas,
                                  compiletimes=t)
            else:
                models = MultiCNN(trainX1,
                                  trainY1,
                                  valX1,
                                  valY1,
                                  nb_epoch=nb_epoch2,
                                  earlystop=earlystop,
                                  model=model,
                                  frozenlayer=frozenlayer,
                                  weights=inputweights,
                                  sample_weight=sampleweights,
                                  nb_classes=nb_classes,
                                  class_weight=classweights,
                                  forkinas=forkinas,
                                  compiletimes=t,
                                  compilemodels=models)

            #modelweights=models.get_weights()
            print "modelweights assigned for " + str(I) + " and " + str(
                t) + "\n"
            if (outputweights is not None):
                models.save_weights(outputweights + '_iteration' + str(t),
                                    overwrite=True)
            #print "learning rate="+str(models.optimizer.lr.get_value())+"\n";

    return models
def bootStrapping_allneg_continue_keras2(trainfile,valfile=None,srate=0.8,
                                         nb_epoch1=3,nb_epoch2=30,earlystop=None,
                                         maxneg=None,model=0,codingMode=0,lam_recon=0,
                                         inputweights=None,outputweights=None,nb_classes=2,
                                         hw_res=None,hc_res=None,hc_res2=None): #inputfile:fragments (n*34);srate:selection rate for positive data;nclass:number of class models
  train_pos={} #0 S/T positive;1Y positive
  train_neg={} #0 S/T negative;1Y negative
  train_pos_s={}
  train_neg_s={}
  train_pos_ss={}
  train_neg_ss={}
  slength={}
  nclass={}
  trainX = trainfile
  for i in range(len(trainX)):
      trainX[i,0]=int(trainX[i,0])


  for i in range(2):
      train_pos[i]=trainX[np.where(trainX[:,0]==i)] #sp/tp 0 yp 1 sn/tn 2 yn 3
      train_neg[i]=trainX[np.where(trainX[:,0]==i+2)]
      train_pos[i]=pd.DataFrame(train_pos[i])
      train_neg[i]=pd.DataFrame(train_neg[i])
      train_pos_s[i]=train_pos[i].sample(train_pos[i].shape[0]); #shuffle train pos
      train_neg_s[i]=train_neg[i].sample(train_neg[i].shape[0]); #shuffle train neg
      slength[i]=int(train_pos[i].shape[0]*srate);
      nclass[i]=int(train_neg[i].shape[0]/slength[i]);

  if(valfile is not None): # use all data as val
     valX = valfile
     for i in range(len(valX)):
         valX[i,0]=int(valX[i,0])

     val_all=pd.DataFrame();
     for i in range(2):
         val_pos=valX[np.where(valX[:,0]==i)]
         val_neg=valX[np.where(valX[:,0]==i+2)]
         val_pos=pd.DataFrame(val_pos)
         val_neg=pd.DataFrame(val_neg)
         val_all=pd.concat([val_all,val_pos,val_neg])

     valX1,valY1 = convertRawToXY(val_all.as_matrix(),codingMode=codingMode) #(355340,1,33,21) after extract same size as positive (48050,1,33,21)
  else:
        val_all=pd.DataFrame()
        nclass={}
        for i in range(2):
            a=int(train_pos[i].shape[0]*0.9);
            b=train_neg[i].shape[0]-int(train_pos[i].shape[0]*0.1);
            print "train pos="+str(train_pos[i].shape[0])+str('\n');
            print "train neg="+str(train_neg[i].shape[0])+str('\n');
            print " a="+str(a)+" b="+str(b)+str('\n');
            train_pos_s[i]=train_pos[i][0:a]
            train_neg_s[i]=train_neg[i][0:b];
            print "train pos s="+str(train_pos_s[i].shape[0])+str('\n');
            print "train neg s="+str(train_neg_s[i].shape[0])+str('\n');

            val_pos=train_pos[i][(a+1):];
            print "val_pos="+str(val_pos.shape[0])+str('\n');
            val_neg=train_neg[i][b+1:];
            print "val_neg="+str(val_neg.shape[0])+str('\n');
            val_all=pd.concat([val_all,val_pos,val_neg])

            slength[i]=int(train_pos_s[i].shape[0]*srate); #transfer 0.1 to val so update slength
            nclass[i]=int(train_neg_s[i].shape[0]/slength[i])

        valX1,valY1 = convertRawToXY(val_all.as_matrix(),codingMode=codingMode)

  if(maxneg is not None):
       nclass_n=min(max([nclass[0],nclass[1]]),maxneg)

  #modelweights=None;
  for I in range(nb_epoch1):
    for i in range(2):
        train_neg_s[i]=train_neg_s[i].sample(train_neg_s[i].shape[0]); #shuffle neg sample
        train_pos_ss[i]=train_pos_s[i].sample(slength[i])

    for t in range(nclass_n):
        train_all=pd.DataFrame()
        for i in range(2):
            train_neg_ss[i]=train_neg_s[i][(slength[i]*t%nclass[i]):(slength[i]*t%nclass[i]+slength[i])];
            train_all=pd.concat([train_all,train_pos_ss[i],train_neg_ss[i]])

        classweights=None
        if(hc_res2 is not None): #negative has weight! hc_res2 is [0,2] for T
             classweights = { k:1.0 for k in range(nb_classes)} #stp 0 yp 1 stn 2 yn 3
             classweights[hc_res2[0]]=float(sum(train_all.as_matrix()[:,0]<=1))/sum(train_all.as_matrix()[:,0]==hc_res2[0])
             classweights[hc_res2[1]]=float(sum(train_all.as_matrix()[:,0]<=1))/sum(train_all.as_matrix()[:,0]==hc_res2[1])

        trainX1,trainY1 = convertRawToXY(train_all.as_matrix(),codingMode=codingMode) #(355340,1,33,21) after extract same size as positive (48050,1,33,21)
        if t==0:
            models,eval_model,manipulate_model,weight_c_model,fitHistory=Capsnet_main(trainX=trainX1,trainY=trainY1,valX=valX1,valY=valY1,nb_classes=nb_classes,nb_epoch=nb_epoch2,earlystop=earlystop,weights=inputweights,compiletimes=t,lr=0.001,batch_size=1000,lam_recon=lam_recon,routings=3,class_weight=classweights,modeltype=model)
        else:
            models,eval_model,manipulate_model,weight_c_model,fitHistory=Capsnet_main(trainX=trainX1,trainY=trainY1,valX=valX1,valY=valY1,nb_classes=nb_classes,nb_epoch=nb_epoch2,earlystop=earlystop,weights=inputweights,compiletimes=t,compilemodels=(models,eval_models,manipulate_models,weight_c_models),lr=0.001,batch_size=1000,lam_recon=lam_recon,routings=3,class_weight=classweights,modeltype=model)
        #modelweights=models.get_weights()

        print "modelweights assigned for "+str(I)+" and "+str(t)+"\n";
        if(outputweights is not None):
            models.save_weights(outputweights+ '_iteration'+str(t),overwrite=True)
        #print "learning rate="+str(models.optimizer.lr.get_value())+"\n";


  return models,eval_model,manipulate_model,weight_c_model,fitHistory
def bootStrapping_allneg_continue_val(
    trainfile,
    valfile=None,
    srate=0.8,
    nb_epoch1=3,
    nb_epoch2=30,
    earlystop=None,
    maxneg=None,
    codingMode=0,
    transferlayer=1,
    inputweights=None,
    outputweights=None,
    forkinas=False
):  #inputfile:fragments (n*34);srate:selection rate for positive data;nclass:number of class models

    trainX = trainfile
    train_pos = trainX[np.where(trainX[:, 0] == 1)]
    train_neg = trainX[np.where(trainX[:, 0] != 1)]
    train_pos = pd.DataFrame(train_pos)
    train_neg = pd.DataFrame(train_neg)
    if (train_pos.shape[0] == 0):
        print 'ERROR: size of positive sites is 0. Please check positive sites in training data!\n'
        exit()

    if (train_neg.shape[0] == 0):
        print 'ERROR: size of negative sites is 0. Please check negative sites in training data!\n'
        exit()

    train_pos_s = train_pos.sample(train_pos.shape[0])
    #shuffle train pos
    train_neg_s = train_neg.sample(train_neg.shape[0])
    #shuffle train neg
    slength = int(train_pos.shape[0] * srate)
    nclass = int(train_neg.shape[0] / slength)
    if (valfile is not None):
        valX = valfile.as_matrix()
        val_pos = valX[np.where(valX[:, 0] == 1)]
        val_neg = valX[np.where(valX[:, 0] != 1)]
        val_pos = pd.DataFrame(val_pos)
        val_neg = pd.DataFrame(val_neg)
        val_all = pd.concat([val_pos, val_neg])
        valX1, valY1 = convertRawToXY(val_all.as_matrix(),
                                      codingMode=codingMode)
    else:
        a = int(train_pos.shape[0] * 0.9)
        b = train_neg.shape[0] - int(train_pos.shape[0] * 0.1)
        train_pos_s = train_pos[0:a]
        train_neg_s = train_neg[0:b]

        val_pos = train_pos[(a + 1):]
        val_neg = train_neg[b + 1:]

        val_all = pd.concat([val_pos, val_neg])
        valX1, valY1 = convertRawToXY(val_all.as_matrix(),
                                      codingMode=codingMode)
        slength = int(train_pos_s.shape[0] * srate)
        nclass = int(train_neg_s.shape[0] / slength)

    if (maxneg is not None):
        nclass = min(maxneg, nclass)
        #cannot do more than maxneg times

    for I in range(nb_epoch1):
        train_neg_s = train_neg_s.sample(train_neg_s.shape[0])
        #shuffle neg sample
        train_pos_ss = train_pos_s.sample(slength)
        for t in range(nclass):
            train_neg_ss = train_neg_s[(slength * t):(slength * t + slength)]
            train_all = pd.concat([train_pos_ss, train_neg_ss])
            trainX1, trainY1 = convertRawToXY(train_all.as_matrix(),
                                              codingMode=codingMode)
            if t == 0:
                models = MultiCNN(trainX1,
                                  trainY1,
                                  valX1,
                                  valY1,
                                  nb_epoch=nb_epoch2,
                                  earlystop=earlystop,
                                  transferlayer=transferlayer,
                                  weights=inputweights,
                                  forkinas=forkinas,
                                  compiletimes=t)
            else:
                models = MultiCNN(trainX1,
                                  trainY1,
                                  valX1,
                                  valY1,
                                  nb_epoch=nb_epoch2,
                                  earlystop=earlystop,
                                  transferlayer=transferlayer,
                                  weights=inputweights,
                                  forkinas=forkinas,
                                  compiletimes=t,
                                  compilemodels=models)

            print "modelweights assigned for " + str(t) + " bootstrap.\n"
            if (outputweights is not None):
                models.save_weights(outputweights, overwrite=True)

    return models