Пример #1
0
def descript(query_decp, source_category, except_files=None,extend=False, pool_size=32):
    """
    生成描述文件
    ~1分钟得出结果
    :param query_decp: 描述文件矩阵
    example line: xml_file_name, class_name, element_name
    :param except_files: 排除文件关键词,接受字符串或字符串数组
    :param pool_size: 并行池大小
    :return: a tuple. 得到src app与 数据库每个app的总相似度,按照相似度降序排列. 用作 搜索 app
    """
    query_decp = nlp_util.process_xsv(query_decp)
    if extend :
        src_dir = work_path.in_project('./model/data/description_extend_all')
    else:
        src_dir = work_path.in_project('./model/data/description')
    print("PATH!!!! {}".format(src_dir))
    logger = logging.getLogger("StreamLogger")
    file_list = os.listdir(src_dir)
    file_list = [os.path.join(src_dir, f) for f in file_list]

    if except_files is not None:
        tmp = []
        rms = []
        if type(except_files) == str:
            for i in file_list:
                if except_files not in i:
                    tmp.append(i)
                else:
                    rms.append(i)
        elif type(except_files) == list or type(except_files) == set:
            except_files = set(except_files)
            for i in file_list:
                flag = False
                for j in except_files:
                    if j in i:
                        flag = True
                        break
                if not flag:
                    tmp.append(i)
                else:
                    rms.append(i)
        logger.debug(pp.pformat(rms))
    file_list = tmp
    logger.debug(pp.pformat(file_list))

    scan_output = _scan_match(source_category, query_decp, file_list, match_name.ngram_compare, [1, 0.5, 0.5],
                              threshold=0.7,
                              pool_size=pool_size)
    # 得到src app与 数据库每个app的总相似度,按照相似度降序排列。
    # tuple(
    # str "参考APP描述文件名",
    # float "APP相似度",
    # list "参考APP的组件相似度" [(请求app组件, 参考app组件,组件相似度)]
    # )
    logger.debug(pp.pformat(util.get_col(scan_output, [0, 1])))
    return scan_output
Пример #2
0
def get_concern_label():
    c_label = set()
    with open(work_path.in_project("./model/conf/concern_label.dat"),
              'r',
              encoding='utf8') as f:
        for row in f.readlines():
            tmp = row.strip()
            if tmp != "":
                c_label.add(tmp)
    return c_label
Пример #3
0
def get_stops():
    stops = set(punctuation)
    with open(work_path.in_project("./model/conf/stopwords.dat"),
              'r',
              encoding='utf8') as f:
        for row in f.readlines():
            tmp = row.strip()
            if tmp != "":
                stops.add(tmp)
    return stops
Пример #4
0
def get_hot_keys():
    hot_k = set()
    with open(work_path.in_project("./model/conf/hotkey.dat"),
              'r',
              encoding='utf8') as f:
        for row in f.readlines():
            tmp = row.strip()
            if tmp != "":
                tmp = stem_word(tmp)
                hot_k.add(tmp)
    return hot_k
Пример #5
0
def except_list_build_helper():
    src_dir = work_path.in_project('./model/data/description')
    file_list = os.listdir(src_dir)
    file_list = [util.bare_name(f) for f in file_list]
    rt = []
    for i in range(len(file_list)):
        tmp = {}
        tmp['id'] = f'cf_{i + 1}'
        tmp['text'] = " ".join(file_list[i].split('_'))
        tmp['val'] = file_list[i]
        rt.append(tmp)
    return rt
Пример #6
0
from model import issuedb as idb
import os
from model import util, work_path

__GENERATE__ = False
if __GENERATE__:
    SRC_DIR = 'tsv/'
    TEST_DIR = 'tsv_test/'

TSV_FILE = work_path.in_project('./model/conf/tab_url.tsv')
__data_tsv = util.read_tsv(TSV_FILE)


def generate_lookup_table():
    db_driver = idb.ISSuedb()
    output = db_driver.db_retrieve(
        "select name from sqlite_master where type='table' order by name;")
    table_dict = {i[0].replace("$", "_"): i[0] for i in output}

    file_list = os.listdir(SRC_DIR)
    file_list = [os.path.join(SRC_DIR, f) for f in file_list]

    file_list_test = os.listdir(TEST_DIR)
    file_list_test = [os.path.join(TEST_DIR, f) for f in file_list_test]

    files = file_list + file_list_test
    files_dict = {i: False for i in files}

    reload = util.Reload(TSV_FILE)

    for item in table_dict:
Пример #7
0
 def __init__(self, filepath=work_path.in_project('issue.db')):
     logger = logging.getLogger("StreamLogger")
     logger.info("DB file location: %s" % filepath)
     self.conn = sqlite3.connect(filepath)
     self.cursor = self.conn.cursor()
Пример #8
0
import copy
import logging
from model import util, work_path

CONF_JSON = util.load_json(work_path.in_project('./model/conf/rank_coef.json'))
SCORE_COEF = CONF_JSON["data"]
MAX_VAL = CONF_JSON["scale_max"]


def get_key_sea_count(all_key, text, unique=False):
    if type(text) != set:
        f_c = set(text) if unique else text
    count_dict = {}
    for k in all_key:
        count_dict[k] = 0
    for k in f_c:
        if k in all_key:
            count_dict[k] += 1
    count_dict["__corpus_len__"] = len(f_c)
    return copy.deepcopy(count_dict)


def get_key_sea_count_corpus(all_key, corpus, unique=False):
    key_count = []
    if type(all_key) != set:
        all_key = set(all_key)
    for c in corpus:
        key_count.append(get_key_sea_count(all_key, c, unique))
    return key_count

Пример #9
0
from werkzeug.middleware.shared_data import SharedDataMiddleware
from urllib.parse import quote, unquote
from model import work_path, util
from datetime import datetime, timezone, timedelta
import api
import logging
from tasks import iss_query, job_ready_byid, job_get_byid

ALLOWED_EXTENSIONS = set(['zip'])
local_tz = timezone(timedelta(hours=8))

app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = work_path.get_upload()
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)

app.config['DOWNLOAD_FOLDER'] = work_path.in_project('./downloads')
os.makedirs(app.config['DOWNLOAD_FOLDER'], exist_ok=True)

app.logger.setLevel(logging.INFO)
logger = app.logger


def allowed_file(filename):
    return '.' in filename and filename.rsplit('.', 1)[1] in ALLOWED_EXTENSIONS


def secure_filename(filename):
    filename, file_extension = os.path.splitext(filename)
    return quote(filename) + file_extension