def main():

    print("This is program about steganography")
    print("The program is builed by Team 2 of L02 from KMA-HaNoi")
    print("How to use?")
    print("Put your images u wanto use to folder named input")
    print("This program have 02 mode are: Embeding and Extraction")
    print("With Embeding use form command like this (below):")
    print("python main.py -i [PATH_HOST_IMAGE] -w [PATH_WATERMARK_IMAGE] -m 0")
    print("With extraction use form command like this (below):")
    print("python main.py -i [PATH_HOST_IMAGE] -w [PATH_WATERMARK_IMAGE] -m 1")
    print("Output will be in folder named output")
    print("=    =   =         =        ==")
    print("=   =    =  =    = =       =  =")
    print("= =      =    ==   =      =    =")
    print("= =      =         =     = = = ==")
    print("=   =    =         =    =        =")
    print("=    =   =         =   =          =")
    ap = argparse.ArgumentParser()
    ap.add_argument("-i", "--image", required=False, help="Host imgae")
    ap.add_argument("-w",
                    "--watermark",
                    required=False,
                    help="Watermark image")
    ap.add_argument("-m", "--mode", required=True, help="Mode to use")

    args = vars(ap.parse_args())
    print(args["mode"])
    if args["mode"] == '0':
        print("Start Embeding")
        ed.embedding(pathhostimge=args["image"],
                     pathwatermarkimage=args["watermark"])
        print("have been done")
    elif args["mode"] == '1':
        ex.extraction()
示例#2
0
    def __init__(self, input_sequence, input_profile, training):
        self.embedding_layer = embedding.embedding(input_sequence)
        self.input_layer = tf.concat([self.embedding_layer, input_profile],
                                     axis=2)
        self.cnn_layer = cnn.cnns(self.input_layer, training)
        self.rnn_layer = self._recurrentLayer(training)
        self.logits = fc.fc(self.rnn_layer, self.cnn_layer, training)
        self.readout = tf.nn.softmax(self.logits)

        tf.summary.histogram('logits', self.logits)
示例#3
0
def inference(sentence):
    print("input sentence:")
    print(sentence)
    sentences = []
    words = sentence.split(' ')
    sentences.append(words)
    sentences_embedding = embedding(sentences, batch_size,
                                    single_sentence_length)
    print("input embedding:")
    print(sentences_embedding)
    output = mod_inference(sentences_embedding)
    print("output vector:")
    print(output)
    return output
示例#4
0
def run(word_train, label_train, word_dev, label_dev, vocab, device, kf_index=0):
    # build dataset
    train_dataset = SegDataset(word_train, label_train, vocab, config.label2id)
    dev_dataset = SegDataset(word_dev, label_dev, vocab, config.label2id)
    # build data_loader
    train_loader = DataLoader(train_dataset, batch_size=config.batch_size,
                              shuffle=True, collate_fn=train_dataset.collate_fn)
    dev_loader = DataLoader(dev_dataset, batch_size=config.batch_size,
                            shuffle=True, collate_fn=dev_dataset.collate_fn)
    # get GloVe embedding
    if config.pretrained_embedding:
        embedding_weight = embedding(vocab)
    else:
        embedding_weight = None
    # model
    model = BiLSTM_CRF(embedding_size=config.embedding_size,
                       hidden_size=config.hidden_size,
                       vocab_size=vocab.vocab_size(),
                       target_size=vocab.label_size(),
                       num_layers=config.lstm_layers,
                       lstm_drop_out=config.lstm_drop_out,
                       nn_drop_out=config.nn_drop_out,
                       pretrained_embedding=config.pretrained_embedding,
                       embedding_weight=embedding_weight)
    model.to(device)
    # optimizer
    optimizer = optim.Adam(model.parameters(), lr=config.lr, betas=config.betas)
    scheduler = StepLR(optimizer, step_size=config.lr_step, gamma=config.lr_gamma)
    # how to initialize these parameters elegantly
    for p in model.crf.parameters():
        _ = torch.nn.init.uniform_(p, -1, 1)
    # train and test
    train(train_loader, dev_loader, vocab, model, optimizer, scheduler, device, kf_index)
    with torch.no_grad():
        # test on the final test set
        test_loss, f1 = test(config.test_dir, vocab, device, kf_index)
    return test_loss, f1
示例#5
0
def run_sci():
    parser = argparse.ArgumentParser(description=("SCI a method to predict"
                                                  "sub-compartments from HiC"
                                                  "data"),
                                     add_help=False)
    requiredArguments = parser.add_argument_group('Required arguments')
    requiredArguments.add_argument("-n",
                                   "--name",
                                   action="store",
                                   dest="name",
                                   help="Name of the experiment",
                                   type=str,
                                   required=True)

    requiredArguments.add_argument("-f",
                                   "--infile",
                                   action="store",
                                   dest="infile",
                                   help="Name of HiC interaction file",
                                   type=str,
                                   required=True)

    requiredArguments.add_argument("-r",
                                   "--resolution",
                                   action="store",
                                   dest="res",
                                   help=("Required resolution to predict"
                                         "compartments,provided bins size"
                                         "should have resolution greater than"
                                         "or equal the provided value"),
                                   type=int,
                                   required=True)
    requiredArguments.add_argument("-g",
                                   "--genome_size",
                                   action="store",
                                   dest="genome_size",
                                   help=("File containing chromosome size of"
                                         "the target genome"),
                                   type=str,
                                   required=True)

    optional = parser.add_argument_group('optional arguments')
    optional.add_argument("-h",
                          "--help",
                          action="help",
                          help="show this help message and exit")
    optional.add_argument("-o",
                          "--order",
                          action="store",
                          dest="order",
                          help=("Graph order to consider when performing graph"
                                "embedding. Available options are 1,2 or both."
                                "Default: 1"),
                          type=str,
                          default="1")
    optional.add_argument("-s",
                          "--samples",
                          action="store",
                          dest="samples",
                          help=("Number of edges to sample in millions order"
                                "from the graph. Default: 25"),
                          type=int,
                          default=25)
    optional.add_argument("-k",
                          "--clusters",
                          action="store",
                          dest="clusters",
                          help=("Nubmer of sub-compartments to be predicted."
                                " Default: 5"),
                          type=int,
                          default=5)
    optional.add_argument("--adj",
                          action="store",
                          dest="adj_matrix",
                          help="Adjaceny matrix file of the HiC graph",
                          default=None,
                          type=str)

    optional.add_argument("--alpha",
                          action="store",
                          dest="alpha",
                          help=("Weight for graph embeddine optimization"
                                " Default: 5"),
                          type=float,
                          default=0.5)

    oArgs = parser.parse_args()
    myobject = HicData(oArgs.res, oArgs.name)
    myobject.initialize(oArgs.genome_size)
    #myobject.load_interaction_data(oArgs.infile)
    hic_graph = myobject.write_inter_chrom_graph()
    GW_metadata = myobject.get_bins_info()

    if oArgs.adj_matrix is not None:
        myobject.write_GW_matrix(oArgs.adj_matrix)
    emb = embedding(oArgs.name, GW_metadata, oArgs.res)
    emb.make_embedding_file(hic_graph, oArgs.order, oArgs.samples)
示例#6
0
    def hl_solver (self, chempot=0., threshold=1.0e-12):
#        energy = self.mol.energy_nuc()
        energy = 0.
        nelec  = 0.

        rdm_ao  = np.dot(self.cf, self.cf.T)
        AX_val  = np.dot(self.Sf, self.A_val)
        rdm_val = np.dot(AX_val.T, np.dot(rdm_ao, AX_val))

        print ( "shapes" )
        print ( "cf",self.cf.shape )
        print ( "rdm_ao",rdm_ao.shape )
        print ( "AX_val",AX_val.shape )
        print ( "rdm_val",rdm_val.shape )

        if(not self.parallel):
           myrange = range(self.nimp)
        else:
           from mpi4py import MPI
           comm = MPI.COMM_WORLD
           rank = MPI.COMM_WORLD.Get_rank()
           size = MPI.COMM_WORLD.Get_size()
           myrange = range(rank,rank+1)

        for i in myrange:
            # prepare orbital indexing
            imp_val   = np.zeros((self.nvl,), dtype=bool)
            imp_val_  = np.zeros((self.nvl,), dtype=bool)
            if self.nc > 0:
                imp_core  = np.zeros((self.nc,),  dtype=bool)
                imp_core_ = np.zeros((self.nc,),  dtype=bool)
            if self.nvt > 0:
                imp_virt  = np.zeros((self.nvt,), dtype=bool)
                imp_virt_ = np.zeros((self.nvt,), dtype=bool)
            for k in range(self.mol.natm):
                if self.imp_atx[i][k]:
                    imp_val[self.at_val == k]   = True
                    if self.nc > 0:
                        imp_core[self.at_core == k] = True
                    if self.nvt > 0:
                        imp_virt[self.at_virt == k] = True
                if self.imp_at[i][k]:
                    imp_val_[self.at_val == k]   = True
                    if self.nc > 0:
                        imp_core_[self.at_core == k] = True
                    if self.nvt > 0:
                        imp_virt_[self.at_virt == k] = True
            print("imp val", imp_val)

            # embedding
            cf_tmp, ncore, nact, ImpOrbs_x = \
                embedding.embedding (rdm_val, imp_val, \
                                     threshold=self.thresh, \
                                     transform_imp='hf')
            print("Doing EMBEDDING")
            print("cf_tmp", cf_tmp)
            print("ncore, nact", ncore, nact)
            print("ImpOrbs_x", ImpOrbs_x)
            cf_tmp = np.dot(self.A_val, cf_tmp)
            print("cf_tmp", cf_tmp)

            # localize imp+bath orbitals
            if self.method == 'dmrg':
                XR = np.random.rand(nact,nact)
                XR -= XR.T
                XS = sla.expm(0.01*XR)
                cf_ib = np.dot(cf_tmp[:,ncore:ncore+nact], XS)
                # loc = localizer.localizer (self.mol, cf_ib, 'boys')
                # loc.verbose = 5
                # cf_ib = loc.optimize(threshold=1.0e-5)
                # del loc
                cf_ib = lo.Boys(mol, cd_ib).kernel()

                R = np.dot(cf_ib.T, \
                           np.dot(self.Sf, cf_tmp[:,ncore:ncore+nact]))
                print ( np.allclose(np.dot(cf_tmp[:,ncore:ncore+nact], \
                                         ImpOrbs_x), \
                                  np.dot(cf_ib, np.dot(R, ImpOrbs_x))) )
                ImpOrbs_x = np.dot(R, ImpOrbs_x)
                cf_tmp[:,ncore:ncore+nact] = cf_ib
                print ( cf_ib )

            # prepare ImpOrbs
            ni_val = nact
            nj_val = np.count_nonzero(imp_val_)
            if self.nc > 0:
                ni_core = np.count_nonzero(imp_core)
                nj_core = np.count_nonzero(imp_core_)
            else:
                ni_core = nj_core = 0
            if self.nvt > 0:
                ni_virt = np.count_nonzero(imp_virt)
                nj_virt = np.count_nonzero(imp_virt_)
            else:
                ni_virt = nj_virt = 0

            ii = 0
            ImpOrbs = np.zeros((ni_val+ni_core+ni_virt,\
                                nj_val+nj_core+nj_virt,))
            if self.nc > 0:
                j = 0
                for i in range(self.nc):
                    if imp_core[i] and imp_core_[i]:
                        ImpOrbs[j,ii] = 1.
                        ii += 1
                    if imp_core[i]:
                        j += 1
            j = 0
            for i in range(self.nvl):
                if imp_val[i] and imp_val_[i]:
                    ImpOrbs[ni_core:ni_core+ni_val,ii] = ImpOrbs_x[:,j]
                    ii += 1
                if imp_val[i]:
                    j += 1
            if self.nvt > 0:
                j = 0
                for i in range(self.nvt):
                    if imp_virt[i] and imp_virt_[i]:
                        ImpOrbs[ni_core+ni_val+j,ii] = 1.
                        ii += 1
                    if imp_virt[i]:
                        j += 1

            # prepare orbitals
            cf_core = cf_virt = None
            if self.nc > 0:
                cf_core = self.A_core[:,imp_core]
            if self.nvt > 0:
                cf_virt = self.A_virt[:,imp_virt]
            cf_val = cf_tmp[:,ncore:ncore+nact]

            if cf_core is not None and cf_virt is not None:
                cf = np.hstack ((cf_core, cf_val, cf_virt,))
            elif cf_core is not None:
                cf = np.hstack ((cf_core, cf_val,))
            elif cf_virt is not None:
                cf = np.hstack ((cf_val, cf_virt,))
            else:
                cf = cf_val

            # prepare core
            if self.nc > 0:
                Ac_ = self.A_core[:,~(imp_core)]
                X_core = np.hstack((Ac_, cf_tmp[:,:ncore],))
            else:
                X_core = cf_tmp[:,:ncore]

            n_orth = cf.shape[1]
            if cf_virt is not None:
                n_orth -= cf_virt.shape[1]
            print("x-core", X_core)
            print("cf b4 solver", cf)
            print("imporbs", ImpOrbs)

            if self.method == 'hf':
                nel_, en_ = \
                    pyscf_hf.solve (self.mol, \
                                2*(self.nup-X_core.shape[1]), \
                                X_core, cf, ImpOrbs, chempot=chempot, \
                                n_orth=n_orth)

            elif self.method == 'cc':
                nel_, en_ = \
                    pyscf_cc.solve (self.mol, \
                                2*(self.nup-X_core.shape[1]), \
                                X_core, cf, ImpOrbs, chempot=chempot, \
                                n_orth=n_orth,FrozenPot=self.FrozenPot)

            elif self.method == 'ccsd(t)':
                nel_, en_ = \
                    pyscf_ccsdt.solve (self.mol, \
                                2*(self.nup-X_core.shape[1]), \
                                X_core, cf, ImpOrbs, chempot=chempot, \
                                n_orth=n_orth)

            elif self.method == 'mp2':
                nel_, en_ = \
                    pyscf_mp2.solve (self.mol, \
                                2*(self.nup-X_core.shape[1]), \
                                X_core, cf, ImpOrbs, chempot=chempot, \
                                n_orth=n_orth,FrozenPot=self.FrozenPot)


            elif self.method == 'dfmp2':
                nel_, en_ = \
                    pyscf_dfmp2.solve (self.mol, \
                                2*(self.nup-X_core.shape[1]), \
                                X_core, cf, ImpOrbs, chempot=chempot, \
                                n_orth=n_orth,FrozenPot=self.FrozenPot, mf_tot=self.mf_tot)

            elif self.method == 'dfmp2_testing':
                nel_, en_ = \
                    dfmp2_testing.solve (self.mol, \
                                2*(self.nup-X_core.shape[1]), \
                                X_core, cf, ImpOrbs, chempot=chempot, \
                                n_orth=n_orth,FrozenPot=self.FrozenPot, mf_tot=self.mf_tot)

            elif self.method == 'dfmp2_testing2':
                # print(self.mol)
                # print(2*(self.nup-X_core.shape[1]))
                # print(X_core.shape)
                # print(cf.shape)
                # print(ImpOrbs.shape)
                # print(n_orth)
                nel_, en_ = \
                    dfmp2_testing2.solve (self.mol, \
                                2*(self.nup-X_core.shape[1]), \
                                X_core, cf, ImpOrbs, chempot=chempot, \
                                n_orth=n_orth,FrozenPot=self.FrozenPot ) #, mf_tot=self.mf_tot)

            elif self.method == 'dfmp2_testing3':
                nel_, en_ = \
                    dfmp2_testing3.solve (self.mol, \
                                2*(self.nup-X_core.shape[1]), \
                                X_core, cf, ImpOrbs, chempot=chempot, \
                                n_orth=n_orth,FrozenPot=self.FrozenPot, mf_tot=self.mf_tot)

            elif self.method == 'dfmp2_testing4':
                nel_, en_ = \
                    dfmp2_testing4.solve (self.mol, \
                                2*(self.nup-X_core.shape[1]), \
                                X_core, cf, ImpOrbs, chempot=chempot, \
                                n_orth=n_orth,FrozenPot=self.FrozenPot, mf_tot=self.mf_tot)


            elif self.method == 'fci':
                nel_, en_ = \
                    pyscf_fci.solve (self.mol, \
                                2*(self.nup-X_core.shape[1]), \
                                X_core, cf, ImpOrbs, chempot=chempot, \
                                n_orth=n_orth)

            elif self.method == 'dmrg':
                nel_, en_ = \
                    dmrg.solve (self.mol, \
                                2*(self.nup-X_core.shape[1]), \
                                X_core, cf, ImpOrbs, chempot=chempot, \
                                n_orth=n_orth)

            nelec  += nel_
            energy += en_

        if(self.parallel):
           nelec_tot  = comm.reduce(nelec, op=MPI.SUM,root=0)
           energy_tot = comm.reduce(energy,op=MPI.SUM,root=0)
           if(rank==0):
              energy_tot    += self.mol.energy_nuc()+self.e_core
           nelec  = comm.bcast(nelec_tot, root=0)
           energy = comm.bcast(energy_tot,root=0)
           comm.barrier()
           if(rank==0): print ( 'DMET energy = ', energy )
        else:
           energy+=self.mol.energy_nuc()+self.e_core
           print ( 'DMET energy = ', energy )

        return nelec
# Split data into: Training, Testing, Validating
SPLIT = 0.2     # Ratio of tests sample verses training samples
SEED = 42       # random state of train_test_split, for better debugging

# Hyper Parameters
LEARNING_RATE = 0.05
LEARNING_DECAY = LEARNING_RATE / 32
BN_EPS = 0.8
EARLY_STOP = EarlyStopping(monitor='val_acc', patience=3)
BATCH_SIZE = 128
EPOCH = 1

# Prepare training and testing samples
TRAIN_PATH = os.path.join(os.getcwd(), 'TrainingSamples')
logger.debug('Path for training sample: %s', TRAIN_PATH)
LABEL, FEATURE = embedding(TRAIN_PATH)
logger.debug('Training Size: %s', len(LABEL))
X_train, X_test, Y_train, Y_test = train_test_split(FEATURE,
                                                    LABEL,
                                                    test_size=SPLIT,
                                                    random_state=SEED)
X_train = np.array([i for i in X_train])
X_test = np.array([i for i in X_test])
Y_train = to_categorical(Y_train)
Y_test = to_categorical(Y_test)
logger.debug('training shape = %s', X_train.shape)

DATA = [X_train, X_train, X_train, X_train]

FIT_HISTORY = model.fit(DATA, Y_train,
                        batch_size=BATCH_SIZE,
示例#8
0
# --------------------------- Embedding process --------
if process_name == 'embedding':
    files = os.listdir(data_path)
    shutil.rmtree('workspace/img_marked', ignore_errors=True)
    os.makedirs('workspace/img_marked')  # Path of watermarked image

    # --------- Generate the random binary watermark -----------
    total_bit = int(capacity*np.prod(img_size))
    mark = np.random.randint(2, size=(total_bit, 1), dtype='uint8')

    switched_block = []  # Percent of switched NROI block into ROI block
    start_time = time.time()
    for file in tqdm(files):
        img_org = Image.open(os.path.join(data_path, file))
        img_org = np.asarray(img_org)
        [img_marked,  switched] = embedding(img_org, img_size, mark, block_size, thresh, coefficient,
                                            segment_model_path)  # embedding
        switched_block.append(switched)
        img_marked = Image.fromarray(img_marked)
        img_marked.save(os.path.join('workspace/img_marked', file))
    elapsed = time.time() - start_time
    sio.savemat('workspace/mark_'+str(capacity)+'.mat', {'mark': mark})
    print('Average percent of switched NROI block into ROI block : ' + str(np.mean(switched_block)))
    print('Embedding time: ', elapsed)

# ------------------------------------ Extraction process-----------
elif process_name == 'extraction':
    files = os.listdir(data_path)
    shutil.rmtree('workspace/img_recovered', ignore_errors=True)
    os.makedirs('workspace/img_recovered')
    mark_orig = sio.loadmat('./workspace/mark_' + str(capacity) + '.mat')['mark']
示例#9
0
def page3():

    train_file_list = os.listdir(train_file_path)
    test_file_list = os.listdir(test_file_path)
    embed_model_list = os.listdir(embed_model_path)
    machine_model_list = os.listdir(machine_model_path)

    if request.method == "POST":

        # 시각화 버튼을 눌렀을 경우
        if request.form.get("visual_button"):

            response_data = request.form.get("visual_button")
            response_data = json.loads(response_data)
            print(response_data)

            trainFile = response_data['trainData']
            testFile = response_data['testData']

            # 데이터 읽기
            train = pd.read_csv(train_file_path + trainFile)
            test = pd.read_csv(test_file_path + testFile)

            # 결측치가 있는지 확인하기(우선은 제거하는 방식)
            if pd.isnull(train['x']).sum() > 0 or pd.isnull(
                    train['y']).sum() > 0:
                train = train.dropna()
            if pd.isnull(test['x']).sum() > 0 or pd.isnull(
                    test['y']).sum() > 0:
                test = test.dropna()

            train = train.sample(frac=1).reset_index(drop=True)
            test = test.sample(frac=1).reset_index(drop=True)

            # 1) 처음 임베딩 및 시각화인 경우 -> 임베딩 파라미터만 받아오면 됨
            # is_pre_embed 없음, is_pre_train 없음, machine_value []
            if 'is_pre_embed' not in response_data and 'is_pre_machine' not in response_data and response_data[
                    'machine_value'] == []:

                print('first-embed, no-machine')

                embed_type = response_data['embed_type']
                embed_params = get_embed_params(embed_type,
                                                response_data['embed_value'])

                # 임베딩
                X_train, X_test, y_train, y_test = embedding(
                    trainFile.split(".")[0], embed_type, train, test,
                    embed_params)

                # 차원축소
                dimension_type = response_data['dimension_type']
                dimension_reduction(dimension_type, X_train, X_test, y_train,
                                    y_test)

                return render_template(
                    'visualization.html',
                    visualization="embedding_and_visualization")

            # 2) pre 임베딩 및 시각화인 경우 -> 어떠한 파라미터도 받을 필요 없음
            # is_pre_embed 있음, is_pre_train 없음, embed_value [], machine_value []
            elif 'is_pre_embed' in response_data and 'is_pre_machine' not in response_data and response_data[
                    'embed_value'] == [] and response_data[
                        'machine_value'] == []:

                print('pre-embed, no-machine')

                embed_type = response_data['embed_type']
                pre_embed_model = response_data['pre_embed_model']

                # 임베딩
                X_train, X_test, y_train, y_test = pre_train_embedding(
                    embed_type, pre_embed_model, train, test)

                # 차원축소
                dimension_type = response_data['dimension_type']
                dimension_reduction(dimension_type, X_train, X_test, y_train,
                                    y_test)

                return render_template(
                    'visualization.html',
                    visualization="embedding_and_visualization")

            # 3) 처음 임베딩 및 처음 머신러닝 및 시각화인 경우 -> 임베딩, 머신러닝 파라미터 모두 받아오면 됨
            # is_pre_embed 없음, is_pre_train 없음, machine_value 있음
            elif 'is_pre_embed' not in response_data and 'is_pre_machine' not in response_data and response_data[
                    'machine_value'] != []:

                print('first-embed, first-machine')

                embed_type = response_data['embed_type']
                embed_params = get_embed_params(embed_type,
                                                response_data['embed_value'])

                machine_type = response_data['machine_type']
                machine_params = get_machine_params(
                    machine_type, response_data['machine_value'])

                # 임베딩
                X_train, X_test, y_train, y_test = embedding(
                    trainFile.split(".")[0], embed_type, train, test,
                    embed_params)

                # 차원축소
                dimension_type = response_data['dimension_type']
                dimension_reduction(dimension_type, X_train, X_test, y_train,
                                    y_test)

                # 머신러닝
                train_y_pred, test_y_pred = machine_learning(
                    embed_type, machine_type, X_train, X_test, y_train, y_test,
                    machine_params)

            # 4) pre 임베딩 및 처음 머신러닝 및 시각화인 경우 -> 머신러닝 파라미터만 받아오면 됨
            # is_pre_embed 있음, is_pre_train 없음, embed_value [], machine_value 있음
            elif 'is_pre_embed' in response_data and 'is_pre_machine' not in response_data and response_data[
                    'embed_value'] == [] and response_data[
                        'machine_value'] != []:

                print('pre-embed, first-machine')

                embed_type = response_data['embed_type']
                pre_embed_model = response_data['pre_embed_model']

                machine_type = response_data['machine_type']
                machine_params = get_machine_params(
                    machine_type, response_data['machine_value'])

                # 임베딩
                X_train, X_test, y_train, y_test = pre_train_embedding(
                    embed_type, pre_embed_model, train, test)

                # 차원축소
                dimension_type = response_data['dimension_type']
                dimension_reduction(dimension_type, X_train, X_test, y_train,
                                    y_test)

                # 머신러닝
                train_y_pred, test_y_pred = machine_learning(
                    embed_type, machine_type, X_train, X_test, y_train, y_test,
                    machine_params)

            # 5) pre 임베딩 및 pre 머신러닝 및 시각화인 경우 -> 어떠한 파라미터도 받을 필요 없음
            # is_pre_embed 있음, is_pre_train 있음
            elif 'is_pre_embed' in response_data and 'is_pre_machine' in response_data:

                print('pre-embed, pre-machine')

                embed_type = response_data['embed_type']
                machine_type = response_data['machine_type']

                pre_embed_model = response_data['pre_embed_model']

                # 임베딩
                X_train, X_test, y_train, y_test = pre_train_embedding(
                    embed_type, pre_embed_model, train, test)

                # 차원축소
                dimension_type = response_data['dimension_type']
                dimension_reduction(dimension_type, X_train, X_test, y_train,
                                    y_test)

                # 머신러닝
                train_y_pred, test_y_pred = pre_train_machine_learning(
                    embed_type, machine_type, X_train, X_test, y_train, y_test)

            # 훈련 종료 후 머신러닝 결과
            from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

            target_names = list(set(y_train))
            train_df = pd.DataFrame(confusion_matrix(y_train, train_y_pred),
                                    index=target_names,
                                    columns=target_names)
            test_df = pd.DataFrame(confusion_matrix(y_test, test_y_pred),
                                   index=target_names,
                                   columns=target_names)

            path = r'/home/ubuntu/project2/csv_files/'

            train_df.to_csv(path + 'confusion_matrix_train.csv', index=False)
            test_df.to_csv(path + 'confusion_matrix_test.csv', index=False)

            # 분류 평가 지표
            train_accuracy = accuracy_score(y_train, train_y_pred)
            train_precision = precision_score(y_train,
                                              train_y_pred,
                                              average='macro')
            train_recall = recall_score(y_train, train_y_pred, average='macro')
            train_f1 = f1_score(y_train, train_y_pred, average='macro')

            test_accuracy = accuracy_score(y_test, test_y_pred)
            test_precision = precision_score(y_test,
                                             test_y_pred,
                                             average='macro')
            test_recall = recall_score(y_test, test_y_pred, average='macro')
            test_f1 = f1_score(y_test, test_y_pred, average='macro')

            print('train accuracy: {}, test accuracy: {}'.format(
                train_accuracy, test_accuracy))
            print('train precision: {}, test precision: {}'.format(
                train_precision, test_precision))
            print('train recall: {}, test recall: {}'.format(
                train_recall, test_recall))
            print('train f1: {}, test f1: {}'.format(train_f1, test_f1))

            train_score_df = pd.DataFrame(columns=['Metrics', 'Score'])
            train_score_df['Metrics'] = [
                'accuracy', 'precision', 'recall', 'f1'
            ]
            train_score_df['Score'] = [
                round(train_accuracy, 2),
                round(train_precision, 2),
                round(train_recall, 2),
                round(train_f1, 2)
            ]
            train_score_df.to_csv(path + 'metrics_score_train.csv',
                                  index=False)

            test_score_df = pd.DataFrame(columns=['Metrics', 'Score'])
            test_score_df['Metrics'] = [
                'accuracy', 'precision', 'recall', 'f1'
            ]
            test_score_df['Score'] = [
                round(test_accuracy, 2),
                round(test_precision, 2),
                round(test_recall, 2),
                round(test_f1, 2)
            ]
            test_score_df.to_csv(path + 'metrics_score_test.csv', index=False)

            train_df = pd.read_csv(path +
                                   'embedding_and_visualization_train.csv')
            test_df = pd.read_csv(path +
                                  'embedding_and_visualization_test.csv')

            train_df['pred'] = train_y_pred
            train_df['success'] = train_df['pred'] == train_df['target']
            train_df['success'] = train_df['success'].astype(int)

            test_df['pred'] = test_y_pred
            test_df['success'] = test_df['pred'] == test_df['target']
            test_df['success'] = test_df['success'].astype(int)

            success_mapping_table = {0: "실패", 1: "성공"}
            train_df['success'] = train_df['success'].map(
                success_mapping_table)
            test_df['success'] = test_df['success'].map(success_mapping_table)

            train_df.to_csv(
                path + 'embedding_and_machinelearning_visualization_train.csv',
                index=False)
            test_df.to_csv(
                path + 'embedding_and_machinelearning_visualization_test.csv',
                index=False)

            return render_template(
                'visualization.html',
                visualization="embedding_and_machineLearning_visualization")

        return render_template('machineLearning.html',
                               train_file_list=train_file_list,
                               test_file_list=test_file_list,
                               embed_model_list=embed_model_list,
                               machine_model_list=machine_model_list)
    else:
        return render_template("machineLearning.html",
                               train_file_list=train_file_list,
                               test_file_list=test_file_list,
                               embed_model_list=embed_model_list,
                               machine_model_list=machine_model_list)
示例#10
0
    input_filepath, keyword, delay, m = trend_options[args.keyword]

    data = utilities.read_csv(input_filepath, "   ")
    utilities.plot_series(data, input_filepath, keyword)

    embedding.mutual_information(input_filepath, len(data))

    theiler = 0
    min_dim = 1
    max_dim = 10
    ratio = 10.0
    embedding.false_nearest_neighbors(input_filepath, delay, theiler, min_dim,
                                      max_dim, ratio)

    embedded = embedding.embedding(input_filepath, data, delay, m, keyword)
    utilities.plot_embedding(embedded, input_filepath, [1, 2])

    #embedding.recurrence(input_filepath, delay)

    # args.k = 5 # baseball (Error: 0.387174821025)
    # args.k = 5 # influenza (Error: 1.25175439578)
    # args.k = 5 # full mooon (Error: 0.907941254943)

    if args.multistep:
        print(
            "Since multi-step forecast is {0}, number of nearest neighbors (currently {1}) must be set to 1"
            .format(args.multistep, args.k))
        args.k = 1

    data = knn.Data(input_filepath + ".embed")
示例#11
0
 def embedding(self, texts):
     # 전처리, 임베딩 수행
     texts = pre.preprocess(texts)
     embed = emb.embedding(texts)
     embed = emb.padding(embed)
     return embed