def _main():
    """ Main function. Prints the AST of a file. """
    config = Config()
    filesname = get_opts(sys.argv[1:], config)
    files = []
    if len(filesname) == 1 and os.path.isfile(filesname[0]):
        for line in ast(config, '\n'.join(open(filesname[0]).readlines())):
            print line
        return
    for filename in filesname:
        if os.path.isdir(filename):
            for root, dirs, d_files in os.walk(filename):
                for d_file in d_files:
                    files.append(root + os.sep + d_file)
        else:
            files.append(filename)
    files = filter(lambda item: item.endswith('.py'), files)
    docs = read_docs_from_list(config, files)
    y = docs.values()[0]
    conn = similarity.main(config['sim'], [x for x in docs.values() if x])
    print "Com erro de sintax: %s. Sem erro de sintax: %s." % (len([x for x in docs.values() if not x]), (len([x for x in docs.values() if x])))

    linhaString = []
    linhaString.append("")
    
    for i in range(len(docs.values())):
        linhaString.append("e" + str(i))       
    #print linhaString
    
    with open('test.csv', 'wb') as csvfile:
        spamwriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
        for d1, d2, value in conn:
            #print d1.name, d2.name, value        
            spamwriter.writerow([d1.name, d2.name, value])

    linhaAnterior = ""
    listaColunas = []
    auxLista = []

    with open('test2.csv', 'wb') as csvfile:
        spamwriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
        spamwriter.writerow(linhaString)
        for d1, d2, value in conn:
            
            #print d1.name, d2.name, value
            auxNomeQuestao =  splitNomeQuesta(d1.name)
            #spamwriter.writerow([test])
            if (not auxLista.__contains__(auxNomeQuestao)):
                auxLista.append(auxNomeQuestao)
                listaColunas.append({auxNomeQuestao: [value]})
            elif (auxLista.__contains__(auxNomeQuestao)):
                auxKey = getObjeto(auxLista, auxNomeQuestao)
                for item in listaColunas:
                    if (item.has_key(auxKey)):
                        item.get(auxKey).append(value)
        for i in listaColunas:    
            spamwriter.writerow([i, len(i.values()[0])]) 
            spamwriter.writerow([])
Пример #2
0
def search_most_similar_movie(standard_title):
    global G, max, mLst
    resList = []

    # DB 폴더를 선택
    dirName = "txt"
    file_list = glob.glob(os.path.join(dirName, "*_append3.txt"))
    dic = {}

    resList.append(standard_title)
    # 폴더 내의 txt파일
    for inFile in file_list:

        # 파일명 추출
        filename = os.path.basename(inFile).split(".")[0]
        filename = filename[:-8]
        # print(filename)
        for str in mLst:
            if filename == str:
                # 같은 파일끼리는 X
                if filename == standard_title:
                    resList.append(0)
                else:
                    cor = 0
                    dist = similarity.main(standard_title, filename)
                    print(standard_title, "--", filename, dist)
                    # if dist > 70:
                    #     cor = 100
                    # elif dist > 60:
                    #     cor = 80
                    # elif dist > 50:
                    #     cor = 60
                    # elif dist > 40:
                    #     cor = 50
                    # elif dist > 30:
                    #     cor = 40
                    # elif dist > 20:
                    #     cor = 30
                    # elif dist > 15:
                    #     cor = 20
                    # elif dist > 10:
                    #     cor = 10
                    # elif dist > 5:
                    #     cor = 5
                    # else:
                    #     cor = 1
                    resList.append(1 / dist)

        # # similarity 계산 후 dictionary에 저장 (key : filename, value : similarity)
        # if filename not in dic.keys():
        #     # G.add_edge(standard_title, filename, distance= 35-dist)
        #     dic[filename] = dist
    return resList
Пример #3
0
def _main():
    """ Main function. Prints the AST of a file. """
    config = Config()
    filesname = get_opts(sys.argv[1:], config)
    files = []
    if len(filesname) == 1 and os.path.isfile(filesname[0]):
        for line in ast(config, '\n'.join(open(filesname[0]).readlines())):
            print line
        return
    for filename in filesname:
        if os.path.isdir(filename):
            for root, dirs, d_files in os.walk(filename):
                for d_file in d_files:
                    files.append(root + os.sep + d_file)
        else:
            files.append(filename)
    files = filter(lambda item: item.endswith('.py'), files)
    docs = read_docs_from_list(config, files)
    conn = similarity.main(config['sim'], [x for x in docs.values() if x])
    for d1, d2, value in conn:
        print d1.name, d2.name, value
Пример #4
0
def _main():
    """ Main function. Prints the AST of a file. """
    config = Config()
    filesname = get_opts(sys.argv[1:], config)
    files = []
    if len(filesname) == 1 and os.path.isfile(filesname[0]):
        for line in ast(config, '\n'.join(open(filesname[0]).readlines())):
            print line
        return
    for filename in filesname:
        if os.path.isdir(filename):
            for root, dirs, d_files in os.walk(filename):
                for d_file in d_files:
                    files.append(root + os.sep + d_file)
        else:
            files.append(filename)
    files = filter(lambda item: item.endswith('.py'), files)
    docs = read_docs_from_list(config, files)
    y = docs.values()[0]
    conn = similarity.main(config['sim'], [x for x in docs.values() if x])
    print "Com erro de sintax: %s. Sem erro de sintax: %s." % (
        len([x for x in docs.values() if not x]),
        (len([x for x in docs.values() if x])))

    linhaString = []
    linhaString.append("")

    for i in range(len(docs.values())):
        linhaString.append("e" + str(i))
    #print linhaString

    with open('test.csv', 'wb') as csvfile:
        spamwriter = csv.writer(csvfile,
                                delimiter=',',
                                quotechar='|',
                                quoting=csv.QUOTE_MINIMAL)
        for d1, d2, value in conn:
            #print d1.name, d2.name, value
            spamwriter.writerow([d1.name, d2.name, value])

    linhaAnterior = ""
    listaColunas = []
    auxLista = []

    with open('test2.csv', 'wb') as csvfile:
        spamwriter = csv.writer(csvfile,
                                delimiter=',',
                                quotechar='|',
                                quoting=csv.QUOTE_MINIMAL)
        spamwriter.writerow(linhaString)
        for d1, d2, value in conn:

            #print d1.name, d2.name, value
            auxNomeQuestao = splitNomeQuesta(d1.name)
            #spamwriter.writerow([test])
            if (not auxLista.__contains__(auxNomeQuestao)):
                auxLista.append(auxNomeQuestao)
                listaColunas.append({auxNomeQuestao: [value]})
            elif (auxLista.__contains__(auxNomeQuestao)):
                auxKey = getObjeto(auxLista, auxNomeQuestao)
                for item in listaColunas:
                    if (item.has_key(auxKey)):
                        item.get(auxKey).append(value)
        for i in listaColunas:
            spamwriter.writerow([i, len(i.values()[0])])
            spamwriter.writerow([])
Пример #5
0
    def exec_function(self, args):
        new_args = []
        local_output = None
        train_data = None
        local_model_dir = None

        if args.cmd == "simtrain":
            local_model_dir = tempfile.mkdtemp()
        if args.cmd == "simapply":
            local_model_dir = self.convert_to_local_file([args.mdir], is_dir=True)[0]

        new_args.extend(
            [
                "-mdir",
                local_model_dir,
                "-batch_size",
                str(args.batch_size),
                "-seed",
                str(args.seed),
            ]
        )
        if args.debug:
            new_args.append("-debug")
        if args.cmd == "simtrain":
            setCUDA_VISIBLE_DEVICES(args.gpuid)
            train_data = self.prepare_train_data(args)
            # TODO: if user provides tokenization config file
            # '-src_tok', self.convert_to_local_file([args.src_tok])[0],
            # '-tgt_tok', self.convert_to_local_file([args.tgt_tok])[0],
            new_args.extend(
                [
                    "-trn",
                    train_data["train"],
                    "-dev",
                    train_data["dev"],
                    "-src_voc",
                    train_data["src_voc"],
                    "-tgt_voc",
                    train_data["tgt_voc"],
                    "-src_emb_size",
                    str(args.src_emb_size),
                    "-tgt_emb_size",
                    str(args.tgt_emb_size),
                    "-src_lstm_size",
                    str(args.src_lstm_size),
                    "-tgt_lstm_size",
                    str(args.tgt_lstm_size),
                    "-lr",
                    str(args.lr),
                    "-lr_decay",
                    str(args.lr_decay),
                    "-lr_method",
                    args.lr_method,
                    "-aggr",
                    args.aggr,
                    "-r",
                    str(args.r),
                    "-dropout",
                    str(args.dropout),
                    "-mode",
                    args.mode,
                    "-max_sents",
                    str(args.max_sents),
                    "-n_epochs",
                    str(args.n_epochs),
                    "-report_every",
                    str(args.report_every),
                ]
            )
            if args.src_emb:
                new_args.extend(
                    ["-src_emb", self.convert_to_local_file([args.src_emb])[0]]
                )
            if args.tgt_emb:
                new_args.extend(
                    ["-tgt_emb", self.convert_to_local_file([args.tgt_emb])[0]]
                )

        if args.cmd == "simapply":
            local_src_file = self.convert_to_local_file([args.tst_src])[0]
            local_tgt_file = self.convert_to_local_file([args.tst_tgt])[0]
            new_args.append("-tst")
            new_args.append(local_src_file + "," + local_tgt_file)
            if args.epoch:
                new_args.append("-epoch")
                new_args.append(str(args.epoch))
            new_args.append("-output")
            if args.output == "-":
                new_args.append(args.output)
            else:
                local_output = tempfile.NamedTemporaryFile(delete=False)
                new_args.append(local_output.name)
            if args.q:
                new_args.append("-q")
            if args.show_matrix:
                new_args.append("-show_matrix")
            if args.show_svg:
                new_args.append("-show_svg")
            if args.show_align:
                new_args.append("-show_align")
            if args.show_last:
                new_args.append("-show_last")
            if args.show_aggr:
                new_args.append("-show_aggr")

        logger.info("command line option: %s" % " ".join(new_args))
        main(["similarity.py"] + new_args)

        if args.cmd == "simtrain":
            default_sp_model_name = "joint_spm_50k.model"
            generate_default_tok_config(
                os.path.join(local_model_dir, "tokenization_src.json"),
                default_sp_model_name,
                source=True,
            )
            generate_default_tok_config(
                os.path.join(local_model_dir, "tokenization_tgt.json"),
                default_sp_model_name,
                source=False,
            )
            os.rename(
                train_data["sp_model"],
                os.path.join(local_model_dir, default_sp_model_name),
            )

            os.remove(train_data["train"])
            os.remove(train_data["dev"])
            os.remove(train_data["src_voc"])
            os.remove(train_data["tgt_voc"])

            self._storage.push(local_model_dir, args.mdir)
            shutil.rmtree(local_model_dir)

        if args.cmd == "simapply" and local_output is not None:
            self._storage.push(local_output.name, args.output)
Пример #6
0
import similarity
import wordsense

name = input("Enter one or more words (separated by a space): ")
words = name.split()

for word in words:
    wordsense.main(word)
    similarity.main(word)
Пример #7
0
    def exec_function(self, args):
        new_args = []
        local_output = None
        train_data = None
        local_model_dir = None

        if args.cmd == 'simtrain':
            local_model_dir = tempfile.mkdtemp()
        if args.cmd == 'simapply':
            local_model_dir = self.convert_to_local_file([args.mdir], is_dir=True)[0]

        new_args.extend([
            '-mdir',        local_model_dir,
            '-batch_size',  str(args.batch_size),
            '-seed',        str(args.seed)
            ])
        if args.debug:
            new_args.append('-debug')
        if args.cmd == 'simtrain':
            setCUDA_VISIBLE_DEVICES(args.gpuid)
            train_data = self.prepare_train_data(args)
            # TODO: if user provides tokenization config file
            # '-src_tok', self.convert_to_local_file([args.src_tok])[0],
            # '-tgt_tok', self.convert_to_local_file([args.tgt_tok])[0],
            new_args.extend([
                '-trn',     train_data['train'],
                '-dev',     train_data['dev'],
                '-src_voc', train_data['src_voc'],
                '-tgt_voc', train_data['tgt_voc'],
                '-src_emb_size',    str(args.src_emb_size),
                '-tgt_emb_size',    str(args.tgt_emb_size),
                '-src_lstm_size',   str(args.src_lstm_size),
                '-tgt_lstm_size',   str(args.tgt_lstm_size),
                '-lr',              str(args.lr),
                '-lr_decay',        str(args.lr_decay),
                '-lr_method',       args.lr_method,
                '-aggr',            args.aggr,
                '-r',               str(args.r),
                '-dropout',         str(args.dropout),
                '-mode',            args.mode,
                '-max_sents',       str(args.max_sents),
                '-n_epochs',        str(args.n_epochs),
                '-report_every',    str(args.report_every)
                ])
            if args.src_emb:
                new_args.extend(['-src_emb', self.convert_to_local_file([args.src_emb])[0]])
            if args.tgt_emb:
                new_args.extend(['-tgt_emb', self.convert_to_local_file([args.tgt_emb])[0]])

        if args.cmd == 'simapply':
            local_src_file = self.convert_to_local_file([args.tst_src])[0]
            local_tgt_file = self.convert_to_local_file([args.tst_tgt])[0]
            new_args.append('-tst')
            new_args.append(local_src_file + ',' + local_tgt_file)
            if args.epoch:
                new_args.append('-epoch')
                new_args.append(str(args.epoch))
            new_args.append('-output')
            if args.output == '-':
                new_args.append(args.output)
            else:
                local_output = tempfile.NamedTemporaryFile(delete=False)
                new_args.append(local_output.name)
            if args.q:
                new_args.append('-q')
            if args.show_matrix:
                new_args.append('-show_matrix')
            if args.show_svg:
                new_args.append('-show_svg')
            if args.show_align:
                new_args.append('-show_align')
            if args.show_last:
                new_args.append('-show_last')
            if args.show_aggr:
                new_args.append('-show_aggr')

        logger.info("command line option: %s" % " ".join(new_args))
        main(['similarity.py'] + new_args)

        if args.cmd == 'simtrain':
            default_sp_model_name = 'joint_spm_50k.model'
            generate_default_tok_config(os.path.join(local_model_dir, 'tokenization_src.json'), default_sp_model_name, source=True)
            generate_default_tok_config(os.path.join(local_model_dir, 'tokenization_tgt.json'), default_sp_model_name, source=False)
            os.rename(train_data['sp_model'], os.path.join(local_model_dir, default_sp_model_name))

            os.remove(train_data['train'])
            os.remove(train_data['dev'])
            os.remove(train_data['src_voc'])
            os.remove(train_data['tgt_voc'])

            self._storage.push(local_model_dir, args.mdir)
            shutil.rmtree(local_model_dir)

        if args.cmd == 'simapply' and local_output is not None:
            self._storage.push(local_output.name, args.output)
Пример #8
0
    def exec_function(self, args):
        new_args = []
        local_output = None

        new_args.extend([
            '-mdir',
            self.convert_to_local_file([args.mdir], is_dir=True)[0],
            '-batch_size',
            str(args.batch_size), '-seed',
            str(args.seed)
        ])
        if args.debug:
            new_args.append('-debug')
        if args.cmd == 'simtrain':
            new_args.extend([
                '-trn',
                self.convert_to_local_file([args.trn])[0], '-dev',
                self.convert_to_local_file([args.dev])[0], '-src_tok',
                self.convert_to_local_file([args.src_tok])[0], '-src_voc',
                self.convert_to_local_file([args.src_voc])[0], '-tgt_tok',
                self.convert_to_local_file([args.tgt_tok])[0], '-tgt_voc',
                self.convert_to_local_file([args.tgt_voc])[0], '-src_emb',
                self.convert_to_local_file([args.src_emb])[0], '-tgt_emb',
                self.convert_to_local_file([args.tgt_emb])[0], '-src_emb_size',
                str(args.src_emb_size), '-tgt_emb_size',
                str(args.tgt_emb_size), '-src_lstm_size',
                str(args.src_lstm_size), '-tgt_lstm_size',
                str(args.tgt_lstm_size), '-lr',
                str(args.lr), '-lr_decay',
                str(args.lr_decay), '-lr_method', args.lr_method, '-aggr',
                args.aggr, '-r',
                str(args.r), '-dropout',
                str(args.dropout), '-mode', args.mode, '-max_sents',
                str(args.max_sents), '-n_epochs',
                str(args.n_epochs), '-report_every',
                str(args.report_every)
            ])
        if args.cmd == 'simapply':
            local_src_file = self.convert_to_local_file([args.tst_src])[0]
            local_tgt_file = self.convert_to_local_file([args.tst_tgt])[0]
            new_args.append('-tst')
            new_args.append(local_src_file + ',' + local_tgt_file)
            if args.epoch:
                new_args.append('-epoch')
                new_args.append(str(args.epoch))
            new_args.append('-output')
            if args.output == '-':
                new_args.append(args.output)
            else:
                local_output = tempfile.NamedTemporaryFile(delete=False)
                new_args.append(local_output.name)
            if args.q:
                new_args.append('-q')
            if args.show_matrix:
                new_args.append('-show_matrix')
            if args.show_svg:
                new_args.append('-show_svg')
            if args.show_align:
                new_args.append('-show_align')
            if args.show_last:
                new_args.append('-show_last')
            if args.show_aggr:
                new_args.append('-show_aggr')

        logger.info("command line option: %s" % " ".join(new_args))
        main(['similarity.py'] + new_args)

        if local_output is not None:
            self._storage.push(local_output.name, args.output)
Пример #9
0
def opp_cos_route():
    return flask.jsonify(similarity.main(flask.request.json['opp'], cos_odict))
Пример #10
0
def co_opps_route():
    return flask.jsonify(similarity.main(flask.request.json['co'], opps_odict))