Пример #1
0
def get_target_object(raw_target):
    list_without_comments = []
    for line in raw_target:
        tmp_line = util.remove_forward_and_back_spaces(line.split('#')[0])
        if tmp_line:
            list_without_comments.append(tmp_line)
    
    # TODO(zaqwes): очистить от комментов
    target = {}
    def process_one_line(line):
        ptr = line.find(':')
        key = util.remove_forward_and_back_spaces(line[0:ptr])
        value = util.remove_forward_and_back_spaces(line[ptr+1:])
        if key == kKeyRoot:
            if key not in target: 
                target[key] = []
            target[key].append(value)
            target[key] = list(set(target[key]))
            
        elif key == kKeyTargetExts:
            list_ext = value.split(',')
            target[key] = list(util.remove_fandb_spaces_in_tuple(tuple(list_ext)))
        elif key == kKeyIndexName:
            # Тоже список, хотя из одного элемента. Удобно при дальнейшей обработке
            target[key] = [util.remove_forward_and_back_spaces(value)]
        elif key == kKeyIgnoredDir:
            if key not in target: 
                target[key] = []
            target[key].append(value)
            target[key] = list(set(target[key]))
        else:
            print 'No used'
    
    map(process_one_line, list_without_comments)
    return target
Пример #2
0
 def process_one_line(line):
     ptr = line.find(':')
     key = util.remove_forward_and_back_spaces(line[0:ptr])
     value = util.remove_forward_and_back_spaces(line[ptr+1:])
     if key == kKeyRoot:
         if key not in target: 
             target[key] = []
         target[key].append(value)
         target[key] = list(set(target[key]))
         
     elif key == kKeyTargetExts:
         list_ext = value.split(',')
         target[key] = list(util.remove_fandb_spaces_in_tuple(tuple(list_ext)))
     elif key == kKeyIndexName:
         # Тоже список, хотя из одного элемента. Удобно при дальнейшей обработке
         target[key] = [util.remove_forward_and_back_spaces(value)]
     elif key == kKeyIgnoredDir:
         if key not in target: 
             target[key] = []
         target[key].append(value)
         target[key] = list(set(target[key]))
     else:
         print 'No used'
Пример #3
0
def _parse_target_params(str_params):
    rpt = []
    if str_params.count('[') != str_params.count(']'):
        return None, 1, "\tError: [Count '[' != count ']']"

    if str_params.count(':') != str_params.count('[') or \
        str_params.count(':') != str_params.count(']'):
        return None, 1, "\tError: [Format param - [anything : something]]"
    
    params = str_params.replace('[', '')
    params = params.split(']')
    params_map = {}
    for at in params:
        if at:
            pair = remove_forward_and_back_spaces(at)
            key, value = remove_fandb_spaces_in_tuple(tuple(pair.split(':')))
            # Запрещаем второе значени, соотв. ключу
            if key not in params_map:
                params_map[key] = value
            else:
                return None, 1, "\tError: only one params key permitted"

    params_json = json.dumps(params_map)
    return params_json, 0, None
Пример #4
0
def parser_target_for_spider(target_fname):
    """ 
    
    Thinks:
        А что если файл пустой?
        
    TODO:
        Сделать кастомизацию преобразоватлелей в текст
    """
    sets = dal.get_utf8_template()
    sets['name'] = target_fname
    list_lines, err = dal.efile2list(sets)
    if err[0]:
        rpt = err[1]
        yield None, 1, rpt
        return
   
    # Можно обрабатывать
    list_without_comments = map(
            lambda line: remove_forward_and_back_spaces(line.split('#')[0]), 
            list_lines)
    
    # Удаление пустых строк
    result_job_list = []
    map(lambda line: result_job_list.append(line) if line \
        else None, list_without_comments)

    # В первой информационной строке должно быть имя узла
    if not is_node(result_job_list[0]):
        rpt = 'target_fname: '+target_fname+ \
                '. Неверный формат файла - первое имя узла должно быть до адресов.'+ \
                'Либо файл с заданиями пуст.'
        code_err = 2
        yield None, code_err, rpt
        return
    
    current_node = get_node_name(result_job_list[0])
    i = 0
    nodes = []
    for at in result_job_list:
        if is_node(at):
            current_node = get_node_name(at)
            if current_node not in nodes:
                nodes.append(current_node)
            else:
                code_err = 2
                yield None, code_err, 'Name node: ['+current_node+ \
                        ']\n'+"\tError: Node name need be unic."
            i = 0
        else:
            i += 1
            # Выделяем обработчик
            pos_first_settings_item = at.find('[')          
            if pos_first_settings_item != -1:
                url =  remove_forward_and_back_spaces(
                        at[:pos_first_settings_item])
                params = at[pos_first_settings_item:]
                params, code_err, rpt = _parse_target_params(params)
                if code_err != 0 and rpt:
                    rpt = 'Name node: ['+current_node+']\nUrl: ['+url+']\n'+rpt
                yield (current_node, url, i, params), 0, rpt
            else:
                url =  remove_forward_and_back_spaces(at)
                rpt = None
                yield (current_node, url, i, '{}'), 0, rpt
Пример #5
0
 def get_one_node(line):
     line = line.split('*')[0]
     line = line.replace('[','')
     node = util.remove_forward_and_back_spaces(line.replace(']',''))
     return node
Пример #6
0
def get_node_name(src_node_name): 
    return remove_forward_and_back_spaces(
            src_node_name.replace('[', '').replace(']', ''))
Пример #7
0
def is_node(line):
    line = remove_forward_and_back_spaces(line)
    if line[0] == '[' and line[-1] == ']':
        return True
    else:
        return False
Пример #8
0
def get_url(line):
    line = line.split('*')[1]
    node = util.remove_forward_and_back_spaces(line)
    return node