Exemplo n.º 1
0
def convert_file(dev, file, _from, _to, out):
    dev_str = 'dev' if dev else 'prod'

    py_file = get_path(dev, 'py', file, out=out)
    lua_file = get_path(dev, 'lua', file, out=out)
    in_file = py_file if _from == 'py' else lua_file
    out_file = py_file if _to == 'py' else lua_file

    content = read(in_file)
    content = \
        content.replace(before[_from].replace('{dev}', dev_str).strip(), '')

    for pattern, replace in regexps_before[_from]:
        content = re.sub(pattern, replace, content, flags=re.DOTALL)

    for pattern, replace in regexps_DOTALL[_from]:
        content = re.sub(pattern, replace, content, flags=re.DOTALL)

    for pattern, replace in regexps_MULTILINE[_from]:
        pattern = pattern.replace('{dev}', dev_str)
        replace = replace.replace('{dev}', dev_str)
        content = re.sub(pattern, replace, content, flags=re.MULTILINE)

    for i in range(5):
        for pattern, replace in regexps_DOTALL[_from]:
            content = re.sub(pattern, replace, content, flags=re.DOTALL)

    for pattern, replace in regexps_after[_from]:
        content = re.sub(pattern, replace, content, flags=re.DOTALL)

    content = \
        f"{before[_to].replace('{dev}', dev_str)}\n\n" \
        f"{content.strip()}\n".lstrip()
    write(out_file, content)
Exemplo n.º 2
0
def convert_file(filename, _from, _to):
    py_file = join(get_path('py'), f'{filename}.py')
    lua_file = join(get_path('lua'), f'{filename}.lua')
    in_file = py_file if _from == 'py' else lua_file
    out_file = py_file if _to == 'py' else lua_file

    content = read(in_file)
    content = content.replace(before[_from].strip(), '')

    for pattern, replace in regexps_before[_from]:
        content = re.sub(pattern, replace, content, flags=re.DOTALL)

    for pattern, replace in regexps_DOTALL[_from]:
        content = re.sub(pattern, replace, content, flags=re.DOTALL)

    for pattern, replace in regexps_MULTILINE[_from]:
        content = re.sub(pattern, replace, content, flags=re.MULTILINE)

    for i in range(5):
        for pattern, replace in regexps_DOTALL[_from]:
            content = re.sub(pattern, replace, content, flags=re.DOTALL)

    for pattern, replace in regexps_after[_from]:
        content = re.sub(pattern, replace, content, flags=re.DOTALL)

    content = f"{before[_to]}\n\n{content.strip()}\n".lstrip()
    write(out_file, content)
Exemplo n.º 3
0
 def get_contents_and_title(self):
     block_content = read(self.path)
     if block_content:
         self.contents = block_content.split('\n')
     else:
         # это тоже может быть, например, когда в файлы была единственная
         # статья, котороая потом была удалена
         self.contents = []
     self.titles = [line.split('\t')[0] for line in self.contents]
Exemplo n.º 4
0
    def __init__(self, path):
        # print(path)
        self.path = path
        self.contents = None  # should be set in inheritors
        self.titles = None  # should be set in inheritors

        self.block_content = read(self.path)
        if not self.block_content:
            raise StorageError(f'Block is empty: "{self.path}"')
Exemplo n.º 5
0
 def get_contents_and_title(self):
     block_content = read(self.path)
     if not block_content:
         if exists(f'{self.path}.bak'):
             raise StorageError(f'Block is empty: "{self.path}"')
     if block_content:
         self.contents = block_content.split('\n')
     else:
         self.contents = []
     self.titles = [line.split('\t')[0] for line in self.contents]
Exemplo n.º 6
0
def save_pages():
    endings = json.loads(read('endings_ru_new.json'))
    for ending, entries in endings.items():
        entries.sort(key=lambda x: x[0][::-1])
        values = [entry[1] for entry in entries]
        for i, chunk in enumerate(chunks(values, 3700)):
            # content = f"= Глаголы на '''-{ending}''' =\n" \
            #           f'<div class="reverseindex">\n' + \
            #           f''.join(chunk) + \
            #           f'</div>'
            content = f"= Глаголы на '''-{ending}''' =\n" + f''.join(chunk)
            save_page(f'User:Vitalik/reports/verbs/-{ending}/{i+1}', content,
                      'Обратный список глаголов по окончаниям')
            print(ending, len(chunk))
Exemplo n.º 7
0
def download_page(title, path):
    print(f'- {title}', end='')
    try:
        content = load_page(title) + '\n'
    except NoPage:
        print(' - No page')
        return
    content = content.replace("\n-- dev_prefix = 'User:Vitalik/'",
                              "\ndev_prefix = 'User:Vitalik/'")
    if exists(path):
        old_content = read(path)
        if old_content != content:
            print(' - OK')
        else:
            write(path, content)
            print(' - Not changed')
    else:
        write(path, content)
        print(' - NEW')
Exemplo n.º 8
0
def get_stats():
    new_data = {}
    for lang in langs:
        html_content = read(html_path(lang))
        # table = re.search(
        #     '<table class="wikitable mw-statistics-table">(.*?)</table>',
        #     html_content,
        #     re.DOTALL,
        # ).group(1)
        # write(f'table/{lang}.html', table)
        numbers = re.findall('<td class="mw-statistics-numbers">(.*?)</td>',
                             html_content)
        # write(f'numbers/{lang}.txt', '\n'.join(numbers))
        # print(':', ' | '.join(numbers))
        # if len(numbers) != 20:
        #     print(lang, len(numbers))
        values = []
        for index in [0, 1, 3, 5, 6]:
            value = re.sub('[^0-9]', '', numbers[index])
            if value:
                values.append(value)
        if values:
            # write(f'values/{lang}.txt', '\n'.join(values))
            new_data[lang] = {
                'goods': int(values[0]),
                'pages': int(values[1]),
                'edits': int(values[2]),
                'users': int(values[3]),
                'active': int(values[4]),
            }
        # else:
        #     print(lang)

    history_filename = join(conf.data_path, 'daily_stats', 'history', 'values',
                            f'{dtf("Ym/Ymd")}.json')
    active_filename = join(conf.data_path, 'daily_stats', 'active.json')

    old_data = json_load(active_filename)
    json_dump(history_filename, new_data)
    json_dump(active_filename, new_data)

    return old_data, new_data
Exemplo n.º 9
0
 def __init__(self, path, tables, max_counts=None, lock_slug=''):
     """
     Если lock_slug == '', то хранилище не блокируется
     """
     self.path = path
     self.lock_slug = lock_slug
     self.locked = False
     if lock_slug:
         self.lock(lock_slug)
     self.handlers = {}
     for table, handler_type in tables.items():
         table_path = join(path, table)
         max_count_path = join(table_path, '_sys', 'max_count')
         if exists(max_count_path):
             max_count = int(read(max_count_path))
         elif max_counts and table in max_counts:
             max_count = max_counts[table]
         else:
             raise Exception('Undefined `max_count` for storage')
         self.handlers[table] = \
             self.handler_types[handler_type](table_path, max_count)
     self._titles = None
     self._titles_set = None
Exemplo n.º 10
0
def load_languages():  # todo: move this to some `lib`
    path = join(conf.SYNC_PATH, 'data', 'language-data.lua')
    content = read(path)
    languages = dict()
    """
    m["en"] = {
        name = "Английск{ий} [язык]",
        ...
    }   
    """
    entries = re.findall('m\["([^"]+)"\] *= *{\s*name *= *"([^"]+)"', content)
    for lang, name in entries:
        name = re.sub(r'\[[^]]+\]', '', name)  # removes: [язык]
        name = re.sub(r'[{}]', '', name)       # removes: {ий}
        name = re.sub(r"'[^']+'", '', name)    # removes: '...'
        languages[lang] = name.strip().lower()
    """
    m["rus"] = table.copy(m["ru"])
    """
    entries = re.findall('m\["([^"]+)"\] *= *table\.copy\(m\["([^"]+)"\]\)"',
                         content)
    for lang1, lang2 in entries:
        languages[lang1] = languages[lang2]
    return languages
Exemplo n.º 11
0
 def load_redirects(self):
     return read(self.redirects_filename).split('\n')
Exemplo n.º 12
0
 def latest_updated(self):
     if not exists(self.latest_updated_filename):
         return None
     return dtp(read(self.latest_updated_filename))
Exemplo n.º 13
0
 def all_pages_start_from(self):
     if not exists(self.all_pages_start_from_filename):
         return None
     return read(self.all_pages_start_from_filename)
Exemplo n.º 14
0
 def get(cls):
     return convert_date(
         datetime.strptime(read(cls.filename).strip(), '%Y-%m-%d %H:%M:%S'))
Exemplo n.º 15
0
 def get_contents_and_title(self):
     block_content = read(self.path)
     if not block_content:
         raise StorageError(f'Block is empty: "{self.path}"')
     self.contents = block_content.split(SEPARATOR)
     self.titles = self.contents[0].split('\n')
Exemplo n.º 16
0
 def load_articles(self):
     return read(self.articles_filename).split('\n')
Exemplo n.º 17
0
 def load(cls):
     return read(cls.active_filename).split(cls.sep)
Exemplo n.º 18
0
def read_file(dev, filename):
    result = read(filename).replace('\r', '')
    if not dev:
        result = result.replace("\ndev_prefix = 'User:Vitalik/'",
                                "\n-- dev_prefix = 'User:Vitalik/'")
    return result
Exemplo n.º 19
0
 def get(self, request, *args, **kwargs):
     root, path, full_path = self.get_path(kwargs)
     content = read(full_path)
     return HttpResponse(f'<pre>{content}</pre>')