Пример #1
0
def read_data(dataroot):
    filenames = get_filenames(dataroot)
    data = read_csv(os_join(dataroot, filenames[0]))
    for filename in filenames[1:]:
        data_i = read_csv(os_join(dataroot, filename))
        data = np.concatenate((data, data_i), axis=0)
    return data
Пример #2
0
def download(ftp, remote_file, target_dir):
    error = 0
    attempt = 0
    status = 1
    max_attempts = getenv('MAX_RETRIES')
    if max_attempts is None:
        max_attempts = 3
    else:
        max_attempts = int(max_attempts)
    sleep_retry = getenv('SLEEP_RETRY')
    if sleep_retry is None:
        sleep_retry = 5
    else:
        sleep_retry = int(sleep_retry)
    ext = splitext(remote_file)[1]
    f_name = basename(remote_file)
    lock_file = os_join(target_dir, '.' + f_name)
    local_file = os_join(target_dir, f_name)
    while (status != 0 and attempt < max_attempts):
        attempt += 1
        try:
            status = 0
            with open(lock_file, 'wb') as f:
                ftp.retrbinary('RETR %s' % remote_file, f.write)
            file_rename(lock_file, local_file)
        except ftplib.all_errors as e:
            status = e
        if status != 0:
            time.sleep(sleep_retry)
    if attempt == max_attempts:
        error = status
    return error
Пример #3
0
def find_files(start_dir):
    """Find all .pyc[o] files as well as all __pycache__ directories.

    Returns a tuple consisting of:
        - a list of directories found that are called __pycache__
        - a list of .pyc[o] files to remove
        - a list of directories that can be removed after the files
          are removed

    Note that the first and third lists may not be equal.
    """
    dirs_to_check = list()

    for root, dirs, _ in os.walk(start_dir):
        if '__pycache__' in dirs:
            dirs_to_check.append(os_join(root, '__pycache__'))

    files_to_remove = list()
    dirs_to_remove = list()

    for directory in dirs_to_check:
        with os.scandir(directory) as dir_it:
            pyc_count = 0
            dir_count = 0
            for item in dir_it:
                dir_count += 1
                if item.name.endswith('.pyc') or item.name.endswith('.pyco'):
                    pyc_count += 1
                    files_to_remove.append(os_join(directory, item.name))
            if dir_count == pyc_count:
                dirs_to_remove.append(directory)

    return dirs_to_check, files_to_remove, dirs_to_remove
Пример #4
0
    def init_dir(self, passphrase):
        LOG.info('Init directory %s' % self.directory)
        create_dir(self.directory)

        self.git.init()
        self.git.add_gitignore(['*.ini', '/*.raw'])

        # Write default template file
        sample_file = ("""[foo]
name = foo access
type = web
url = http://foo.com
password = bar
login = foo
comments = foo is good website
""")

        LOG.debug('Write sample file default.ini')
        with open(os_join(self.directory, 'default.ini'), 'w') as f:
            f.write(sample_file)

        # Write default raw
        create_dir(os_join(self.directory, 'default.raw'))
        sample_raw = ("""-----BEGIN RSA PRIVATE KEY-----
MIIEpAIBAAKCA
-----END RSA PRIVATE KEY-----""")
        LOG.debug('Write sample file default raw')
        with open(os_join(self.directory, 'default.raw', 'ssh_id.rsa'),
                  'w') as f:
            f.write(sample_raw)

        self.encrypt(passphrase=passphrase)
        # Remove old passkeeper files
        self.remove_old_encrypted_files(force_remove=True)
        self.cleanup()
Пример #5
0
    def test_02_parse_peaks(self):
        """
        function
        """
        peak_files = [
            os_join(TEST_PATH, 'data', 'peaks_protA.bed'),
            os_join(TEST_PATH, 'data', 'peaks_protB.bed')
        ]
        in_feature = False
        biases = os_join(TEST_PATH, 'data', 'biases.pickle')
        fh = open(biases, "rb")
        try:
            badcols = Unpickler(fh, encoding='latin1').load()['badcol']
        except TypeError:
            badcols = Unpickler(fh).load()['badcol']
        fh.close()
        peak_coord1, peak_coord2, npeaks1, npeaks2, submatrices, coord_conv = parse_peaks(
            peak_files[0], peak_files[1], RESOLUTION, in_feature, CHROM_SIZES,
            badcols, SECTION_POS, WINDOWS_SPAN)

        global COORD_CONV
        COORD_CONV = coord_conv
        global SUBMATRICES
        SUBMATRICES = submatrices
        self.assertEqual(peak_coord1, PEAK_COORD1)
        self.assertEqual(peak_coord2, PEAK_COORD2)
        self.assertEqual(npeaks1, 6)
        self.assertEqual(npeaks2, 14)
Пример #6
0
def scan_directory():

    if type_scan == 'a':
        scan_result = [
            root
            for root, dirs, files in os.walk(os_join(os.environ['HOMEPATH'], 'Music'))
            for song in files
            if Path(song).suffix == '.mp3'
        ]
    else:
        scan_result = [
            os_join(folder[0], song)
            for folder in os.walk(music_directory)
            for song in folder[2]
            if Path(song).suffix == '.mp3'
        ]

    if config_file_readings['DEFAULT']['scan_type'] == 'date_created':
        sort_key = os.path.getctime
    elif config_file_readings['DEFAULT']['scan_type'] == 'date_modified':
        sort_key = os.path.getmtime
    else:
        print('ERROR, inappropriate scan type!')
        exit()

    scan_result = sorted(list(set(scan_result)), key=sort_key, reverse=True)

    if config_file_readings['DEFAULT']['scan_limit'] != 'none':
        scan_result = scan_result[:int(config_file_readings['DEFAULT']['scan_limit'])]

    return scan_result
Пример #7
0
    def init_dir(self, passphrase):
        LOG.info('Init directory %s' % self.directory)
        create_dir(self.directory)

        self.git.init()
        self.git.add_gitignore(['*.ini', '/*.raw'])

        # Write default template file
        sample_file = ("""[foo]
name = foo access
type = web
url = http://foo.com
password = bar
login = foo
comments = foo is good website
""")

        LOG.debug('Write sample file default.ini')
        with open(os_join(self.directory, 'default.ini'), 'w') as f:
            f.write(sample_file)

        # Write default raw
        create_dir(os_join(self.directory, 'default.raw'))
        sample_raw = ("""-----BEGIN RSA PRIVATE KEY-----
MIIEpAIBAAKCA
-----END RSA PRIVATE KEY-----""")
        LOG.debug('Write sample file default raw')
        with open(os_join(self.directory, 'default.raw', 'ssh_id.rsa'), 'w') as f:
            f.write(sample_raw)

        self.encrypt(passphrase=passphrase)
        # Remove old passkeeper files
        self.remove_old_encrypted_files(force_remove=True)
        self.cleanup()
Пример #8
0
def globalGetChanges(year,
                     section,
                     proposal_workless=False,
                     job_opening=False):
    global path
    pathes = [
        os_join(path, str(f"output/without_work{year}.json"))
        if not proposal_workless else '',
        os_join(path, str(f"output/opportunities{year}.json"))
        if not job_opening else ''
    ]

    data = []
    for loc_path in filter(bool, pathes):
        if not isfile(loc_path):
            from data_analyze.ex import generate_opportunities_and_without_work_json_file
            generate_opportunities_and_without_work_json_file(year)
        if not isfile(loc_path):
            return dict(), dict()
        with open(loc_path, "r") as file:
            data.append(loads(file.read()))
    print(*data[0].keys(), sep='\n')
    data = [unsetBrackets(dat[section]) for dat in data]
    if job_opening or proposal_workless and len(data) > 1:
        return data[0]

    null_elements1 = set(data[0].keys()) - set(
        data[1].keys())  # элементы, которых нет во втором
    null_elements2 = set(data[1].keys()) - set(
        data[0].keys())  # элементы, которых нет в первом

    data[0].update({key: 0 for key in null_elements2})
    data[1].update({key: 0 for key in null_elements1})

    return data  # data_1{квалификация: количество}
Пример #9
0
def shred_dir(directory):
    """
    Shred all files in directory and remove this directory.

    Shred files on a directory to avoid a malicious read

    :param directory: Path of directory to shred
    :type directory: str

    :Example:

    >>> shred_dir('/opt/mypasskeeper/.git')
    Clean file master
    Clean file HEAD
    Clean file exclude

    .. seealso:: run_cmd()
    """
    # Remove directory content
    for root, dirs, files in os.walk(directory, topdown=False):
        for fname in files:
            filepath = os_join(root, fname)
            LOG.info('Clean file %s' % fname)
            run_cmd('shred -f --remove %s' % filepath)
        for dname in dirs:
            dpath = os_join(root, dname)
            os.rmdir('%s' % dpath)
    # Remove the directory
    os.rmdir(directory)
Пример #10
0
    def decrypt(self, passphrase):
        LOG.info('Decrypt files :')
        source_dir = os_join(self.directory, self.encrypted_dir)
        status = True
        for root, dirs, files in os.walk(source_dir, topdown=False):
            for name in files:
                file_path = os_join(root, name)
                relative_file_path = relative_path(file_path,
                                                   source_dir).lstrip('/')

                if (name.endswith('.passkeeper')
                        and os.path.isfile(file_path)):
                    LOG.info('Decrypt file %s' % relative_file_path)
                    decrypted_file_path = os_join(
                        self.directory,
                        re.sub('.passkeeper$', '', relative_file_path))
                    create_dir(path=dirname(decrypted_file_path))
                    decrypted = decrypt(source=file_path,
                                        output=decrypted_file_path,
                                        passphrase=passphrase)
                    if decrypted.status == "decryption failed":
                        status = False
                    LOG.info(decrypted.status)
                    if not decrypted.ok:
                        LOG.error("Decrypt file %s - %s" %
                                  (name, decrypted.stderr))
        return status
Пример #11
0
    def test_05_interactions_at_intersection(self):
        """
        function
        """
        genomic_mat = os_join(TEST_PATH, 'data', 'data_bam_10kb.tsv')
        submatrices = os_join(TEST_PATH, 'tmp.tsv')

        groups = {
            '': {
                'sum_nrm': defaultdict(float),
                'sqr_nrm': defaultdict(float),
                'passage': defaultdict(int)
            }
        }

        interactions_at_intersection(groups,
                                     genomic_mat, (v for v in ITER_PAIRS),
                                     submatrices,
                                     '',
                                     window_size,
                                     both_features=False)
        self.assertEqual(groups['']['passage'], GROUPS['']['passage'])
        self.assertEqual([round(v, 5) for k, v in groups['']['sum_nrm']],
                         [round(v, 5) for k, v in GROUPS['']['sum_nrm']])
        self.assertEqual([round(v, 5) for k, v in groups['']['sqr_nrm']],
                         [round(v, 5) for k, v in GROUPS['']['sqr_nrm']])
Пример #12
0
    def remove_old_encrypted_files(self, force_remove=False):
        "remove old passkeeper files"
        root_dir = os_join(self.directory, self.encrypted_dir)
        for root, dirs, files in os.walk(root_dir, topdown=False):
            for efname in files:
                # /git/encrypt/foo/bar.passkeeper
                root_file_path = os_join(root, efname)
                # encrypt/foo/bar.passkeeper
                git_relative_encrypted_file_path = relative_path(path=root_file_path, start=self.directory)
                LOG.debug('Check %s.' % git_relative_encrypted_file_path)

                # /git/foo/bar
                orig_file_path = os_join(self.directory, re.sub('.passkeeper$', '',
                                                                relative_path(path=root_file_path, start=root_dir)))
                # Skip if file exist
                if os.path.isfile(orig_file_path):
                    continue

                if not force_remove:
                    # If not force, ask
                    req = raw_input("File %s will be deleted because origin file hasn't been found, are you sure (y/n)\n" % git_relative_encrypted_file_path)
                    if req != "y":
                        LOG.info('File %s has been concerved.' % git_relative_encrypted_file_path)
                        continue

                # Remove the file
                LOG.info('File %s will be deleted because origin file hasn t been found.' % git_relative_encrypted_file_path)
                # shred file and then git remove because git remove automaticaly empty dirs
                run_cmd('shred %s' % root_file_path)
                self.git.force_remove([git_relative_encrypted_file_path])
                self.git.commit('Remove file %s' % git_relative_encrypted_file_path)
Пример #13
0
def plot_csvs(config):
    """Generate png graphs for all urls. From csv files"""
    # Ensure directory exist
    plot_path = config.get('plot_path')
    csv_path = config.get('csv_path')
    ensure_dir(plot_path)

    # Open csv and plot thems   
    for url_config in config.get('urls'):
        label = url_config.get('label')
        render_step = url_config.get('render_step')
        render_rows = url_config.get('render_rows')
        # Get filename
        csv_file = os_join(csv_path, '%s.csv' % label)
        figure_file = os_join(plot_path, '%s.png' % label)

        if not isfile(csv_file):
            continue
        # Resample datas to display desired time periode
        time_serie =  pd.read_csv(csv_file, index_col=0, parse_dates=True)
        time_serie = resampled_time_serie(time_serie=time_serie,
                                         rows=render_rows,
                                         step=render_step)
        # Get datas and generate figure
        data_frame = pd.DataFrame(data=time_serie, index=time_serie.index, columns=['time'])
        plot = data_frame.plot()
        figure = plot.get_figure()
        figure.savefig(figure_file)
Пример #14
0
 def _get_dir_files(title):
     files = []
     for (dirpath, dirnames, filenames) in walk(os_join(MEDIA_DIR, title)):
         # print(dirpath, filenames)
         for f in filenames:
             if f[-4:] == '.jpg':
                 files.append(abspath(os_join(dirpath, f)))
         break
     return files
Пример #15
0
    def test_06_windows(self):
        """
        test if total intra chromsomal is the same as several windows
        """
        biases = os_join(TEST_PATH, 'data', 'biases.pickle')
        fh = open(biases, "rb")
        try:
            badcols = Unpickler(fh, encoding='latin1').load()['badcol']
        except TypeError:
            badcols = Unpickler(fh).load()['badcol']
        fh.close()
        window = 'intra'
        groups = {}
        windows = [(0, 100), (100, 200), (200, 300), (300, 400)]
        for window in ['intra'] + windows:
            pair_peaks = generate_pairs(PEAK_COORD1,
                                        PEAK_COORD2,
                                        WINDOWS_SPAN,
                                        window,
                                        COORD_CONV,
                                        both_features=False)
            counter = defaultdict(int)
            iter_pairs = submatrix_coordinates(pair_peaks,
                                               (WINDOWS_SPAN * 1000) + 1,
                                               SUBMATRICES,
                                               counter,
                                               both_features=False)
            genomic_mat = os_join(TEST_PATH, 'data', 'data_bam_10kb.tsv')
            submatrices = os_join(TEST_PATH, 'tmp.tsv')

            groups[window] = {
                '': {
                    'sum_raw': defaultdict(int),
                    'sqr_raw': defaultdict(int),
                    'sum_nrm': defaultdict(float),
                    'sqr_nrm': defaultdict(float),
                    'passage': defaultdict(int)
                }
            }

            interactions_at_intersection(groups[window],
                                         genomic_mat,
                                         iter_pairs,
                                         submatrices,
                                         '',
                                         window_size,
                                         both_features=False)
        self.assertEqual(
            round(sum(groups['intra']['']['sum_nrm'].values()), 5),
            round(
                sum(
                    sum(groups[window]['']['sum_nrm'].values())
                    for window in windows), 5))
        self.assertEqual(
            round(sum(groups['intra']['']['sum_nrm'].values()), 5),
            round(2720.13242866, 5))
def test_corpus_restore():
    ''' test the corpus restore sequence '''
    with open(os_join(TEST_DIR, CORPUS_FILE)) as f:
        reference_sentences = f.read().decode("utf-8").strip()

    for input_file in INPUT_HTML:
        sentences = corpus_restore(diff_file=os_join(TEST_DIR, INPUT_CDIFF), 
            working_directory=TEST_DIR, html_url=input_file)

        assert sentences == reference_sentences
Пример #17
0
def main():
    argument_parser = ArgumentParser()
    args = argument_parser.parse_args()

    loader = Loader()
    loader.load_files(args.source)
    unique, duplicated = DuplicateAnalyzer.split(loader.files, File.compare)
    FileSystem.create_dir(args.destination, 'duplicates')
    duplicated_path = os_join(args.destination, 'duplicates')
    [Copier.copy_file(file.filepath, duplicated_path) for file in duplicated]
    [FileSystem.create_dir(args.destination, file.formatted_modification_date, ignore_errors=True) for file in unique]
    [Copier.copy_file(file.filepath, os_join(args.destination, file.formatted_modification_date)) for file in unique]
    PyDuplicateLogger.debug("Successfully ended!")
Пример #18
0
 def create_dump(self, name=None):
     name = name or "vocabularDump"
     backup_dump_file = os_join(
         self.temp_folder,
         "{0}.backup".format(name)
     )
     dump_file = os_join(
         self.temp_folder,
         "{0}.json".format(name)
     )
     with open(backup_dump_file, "w", encoding="utf-8") as js_file:
         json.dump(self.tokens_array, js_file, ensure_ascii=False)
     copy2(backup_dump_file, dump_file)
     remove(backup_dump_file)
Пример #19
0
def main():
    args = argparser()

    #if os.path.isdir(args.output_dir):
    #    print('Error : output dir is exist.')
    #    sys.exit()

    check_dataset(args.input_dir,args.year, args.test_set)

    for _set in ['train', args.test_set]:
        print('== output {} =='.format(_set))
        anno = os_join(args.input_dir, '{}/instances_{}{}.json'.format(annotations, _set, args.year))
        img = os_join(args.input_dir, '{}/{}{}'.format(images, _set, args.year))
        mscoco.mscoco_to_voc(anno, img, args.output_dir, _set,
                             rect_thr=args.rect_thr, name_size=args.name_length, view=args.view)
Пример #20
0
def check_dataset(input_path,year, test_set):
    # check dataset dir
    if not os.path.isdir('{}'.format(input_path)):
        os.makedirs(input_path)

    # check annotation file
    input_full_path = os.path.abspath(input_path)
    print('Full Path = {}'.format(input_full_path))
    if not os.path.isfile(os_join(input_path, '{}/instances_train{}.json'.format(annotations, year))):
        if not os.path.isfile(os_join(input_path, 'annotations_trainval{}.zip'.format(year))):
            if year == '2014':
                #subprocess.check_call(['wget', '-c', anno_2014_url], cwd=input_full_path)
                wget.download(anno_2014_url,out=input_full_path)
            else:
                #subprocess.check_call(['wget', '-c', anno_2017_url], cwd=input_full_path)
                wget.download(anno_2017_url,out=input_full_path)
        subprocess.check_call(['unzip', os_join(input_path, 'annotations_trainval{}.zip'.format(year))],
                              cwd=input_full_path)

    # check train data
    images_dir = os_join(input_full_path, images)
    if not os.path.isdir(os_join(input_full_path, '{}/train{}'.format(images, year))):
        if not os.path.isfile(os_join(input_full_path, 'train{}.zip'.format(year))):
            if year == '2014':
                #subprocess.check_call(['wget', '-c', train_2014_url], cwd=input_full_path)
                wget.download(train_2014_url, out=input_full_path)
            else:
                #subprocess.check_call(['wget', '-c', train_2017_url], cwd=input_full_path)
                wget.download(train_2017_url, out=input_full_path)

        if not os.path.isdir(images_dir):
            os.mkdir(images_dir)
        subprocess.check_call(
            ['unzip', os_join(input_full_path, 'train{}.zip'.format(year))], cwd=images_dir)

    # check val data
    if test_set == 'val':
        if not os.path.isdir(os_join(input_path, '{}/{}{}'.format(images, test_set, year))):
            if not os.path.isfile(os_join(input_full_path, '{}{}.zip'.format(test_set, year))):
                if year == '2014':
                    #subprocess.check_call(['wget', '-c', val_2014_url], cwd=input_full_path)
                    wget.download(val_2014_url, out=input_full_path)
                else:
                    #subprocess.check_call(['wget', '-c', val_2017_url], cwd=input_full_path)
                    wget.download(val_2017_url, out=input_full_path)

            subprocess.check_call(
                ['unzip', os_join(input_full_path, '{}{}.zip'.format(test_set, year))], cwd=images_dir)
Пример #21
0
    def search(self, pattern):
        LOG.info('Search in files :')
        pattern = pattern.lower()
        config = ConfigParser.RawConfigParser()

        # Load files
        for fname in os.listdir(self.directory):
            file_path = os_join(self.directory, fname)
            if (fname.endswith('.ini') and os.path.isfile(file_path)):
                LOG.info('Loading file %s' % fname)
                config.read(file_path)
        # Search
        re_prep = re.compile(pattern)
        matching_sections = []
        for section in config.sections():

            # Section name match ?
            match = re_prep.search(section.lower())
            if match is not None:
                matching_sections.append(section)
                continue

            for option, value in config.items(section):
                # Value match ?
                match = re_prep.search(value.lower())
                if match is not None:
                    matching_sections.append(section)
                    break

        return config, matching_sections
Пример #22
0
    def search(self, pattern):
        LOG.info('Search in files :')
        pattern = pattern.lower()
        config = ConfigParser.RawConfigParser()

        # Load files
        for fname in os.listdir(self.directory):
            file_path = os_join(self.directory, fname)
            if (fname.endswith('.ini')
            and os.path.isfile(file_path)):
                LOG.info('Loading file %s' % fname)
                config.read(file_path)
        # Search
        re_prep = re.compile(pattern)
        matching_sections = []
        for section in config.sections():

            # Section name match ?
            match = re_prep.search(section.lower())
            if match is not None:
                matching_sections.append(section)
                continue

            for option, value in config.items(section):
                # Value match ?
                match = re_prep.search(value.lower())
                if match is not None:
                    matching_sections.append(section)
                    break

        return config, matching_sections
Пример #23
0
 def add_gitignore(self, lines):
     LOG.debug('Write .gitignore')
     gitignore_path = os_join(self.directory, '.gitignore')
     with open(gitignore_path, 'w') as f:
         f.write('\n'.join(lines))
     self.add(['.gitignore'])
     self.commit('Update .gitignore file')
Пример #24
0
 def _run_git_cmd(self, command):
     work_tree = self.directory
     git_dir = os_join(self.directory, '.git')
     git_cmd = 'git --work-tree=%s  --git-dir=%s %s' % (work_tree,
                                                        git_dir, command)
     LOG.debug('Launch : %s' % git_cmd)
     run_cmd(git_cmd)
Пример #25
0
def is_DB_created():
    from os import getcwd, chdir
    from os.path import (join as os_join, isfile)
    from settings.config import cfg

    path = getcwd()
    if path[-3:] in ['\db', '\\db', '/db']:
        chdir(path[:-2])  # изменяем текущую директорию до директории проекта
    path = getcwd()
    name_db = cfg.get("db", "name")
    if not isfile(os_join(path, "db", name_db)):
        db.bind(provider=cfg.get("db", "type"),
                filename=name_db,
                create_db=True)
        db.generate_mapping(create_tables=True)
        print('create db')
    else:
        db.bind(provider=cfg.get("db", "type"), filename=name_db)
        try:
            db.generate_mapping()
        except Exception as e:
            print(
                'при создании бд произошла какая-то ошибка (видимо, структура БД была изменена)\n',
                e)
            print('попытка исправить.....')
            db.generate_mapping(create_tables=True)
Пример #26
0
def get_files(file_list):
    """ Returns a list of all the files in filename.

    """

    from os.path import isdir as os_isdir
    from os.path import isdir as os_isfile
    from os.path import join as os_join
    from os import walk as os_walk
    from pathlib import Path

    out_list = []
    ext = ['.mp3', '.flac', '.ogg', '.s3m', '.mod', '.xm', '.it']

    for name in file_list:
        if os_isdir(name):
            for root, sub, files in os_walk(name):
                join_list = [
                    os_join(root, f) for f in files
                    if Path(f.lower()).suffix in ext
                ]
                out_list.extend(join_list)
        else:
            out_list.append(name)

    return out_list
Пример #27
0
def wget(url, saved_dir, filename=None):
    resp = get(url)
    filename = filename or url.split('/')[-1]

    with open(os_join(saved_dir, filename), "wb") as fp:
        fp.write(resp.content)
    return True
Пример #28
0
    def output_kinetic_energy(self, global_folder_path, pdf_report, display_plots, settings):
        print("Calculate modal kinetic energy")       
        
        m = self.structure_model.m  # which mass matrix ? lumped masses ?
        # k = self.structure_model.k  
        vel = self.solver.velocity
        # disp = self.solver.displacement

        kin_energy = np.zeros(len(self.array_time))
        # el_energy = np.zeros(len(self.array_time))

        for i in range(0,len(self.array_time)):
            kin_energy[i] = 0.5 * np.dot(np.transpose(vel[:,i]),np.dot(m,vel[:,i]))
            # el_energy[i] = 0.5 * np.dot(np.transpose(disp[:,i]),np.dot(k,disp[:,i]))


        sum_energy = kin_energy #+ el_energy

        sum_over_time = np.sum(np.multiply(sum_energy, self.dt/self.array_time[-1]))

        ## EXPLIZIT WAY
        # vel = {}

        # for idx, label in zip(list(range(GD.DOFS_PER_NODE[self.structure_model.domain_size])),
        #                       GD.DOF_LABELS[self.structure_model.domain_size]):
        #     start = idx
        #     step = GD.DOFS_PER_NODE[self.structure_model.domain_size]
        #     stop = self.solver.displacement.shape[0] + idx - step
        #     vel[label] = self.solver.velocity[start:stop +1:step][:]

        # # TODO: make robust for 2d and 3d, here hard coded for 3d
        # # here taking sqrt
        # vel_magn = np.power(np.power(vel['y'],2) + np.power(vel['z'],2), 0.5)

        # # here power of previous sqrt
        # kin_energy = 0.5 * np.dot(self.structure_model.parameters['m'],np.power(vel_magn,2))

        # sum_energy = kin_energy #+ el_energy

        # # first here introducing dt (integral) 
        # # and division with total length of time to get a normed integral
        # # could be extended to possible 
        # sum_over_time = np.sum(np.multiply(sum_energy, self.dt/self.array_time[-1]))

        result_data = sum_energy

        if settings["write"]:
            file_header = "# Modal Kinetic Energy: Normed integral over time = " + str(sum_over_time) +" J \n"
            file_name = 'kinetic_energy.dat'
            writer_utilities.write_result_at_dof(os_join(global_folder_path, file_name),
                                                file_header,
                                                result_data,
                                                self.array_time)
        if settings["plot"]:
            plot_title = "Modal Kinetic Energy: Normed integral over time = " + str(sum_over_time) +" J"
            plotter_utilities.plot_dynamic_result(pdf_report,
                                                display_plots,
                                                plot_title,
                                                result_data,
                                                self.array_time)
Пример #29
0
def batch_predict_and_save(
        csv_infilename=setting.batch_filename_csv,
        storage_dir_infile=setting.data_dir_interim,
        batch_predictions_outfilename=setting.batch_predictions_outfilename,
        storage_dir_out=setting.data_dir_processed):
    """
    runs batch predictions and saves output in parquet files
    :param csv_infilename: Input filename
    :param storage_dir_infile: storage directory for input file
    :param batch_predictions_outfilename: output filename
    :param storage_dir_out: storage directory for output file
    :return: None
    """
    df = make_dataset.ingest_raw_csv(raw_csv_filename=csv_infilename,
                                     storage_dir=storage_dir_infile,
                                     tip_amount_present=False,
                                     cleanup=True)
    logger.info("input file for batch predictions: "
                "{}".format(os_join(storage_dir_infile, csv_infilename)))
    ml_model = train_model.EnsembleModel(load_existing=True)

    _batch_predict_and_save(
        df=df,
        batch_predictions_outfilename=batch_predictions_outfilename,
        storage_dir_out=storage_dir_out,
        ml_model=ml_model)
Пример #30
0
def arguments():
    readme_md = os_join(PATH, 'README.md')
    if os.path.isfile(readme_md):
        with open(readme_md, 'r') as f:
            description = f.read().decode("utf-8")
    else:
        description = DESCRIPTION

    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('--url', help='Ссылка на маршрут', required=True)
    parser.add_argument('--phantomjs',
                        default=DEFAULT_PHANTOMJS,
                        help='Путь к phantomjs')
    parser.add_argument('--resolution',
                        '-r',
                        type=int,
                        default=FULLHD,
                        help='Разрешение скриншота')
    parser.add_argument('--ya_class',
                        default=DEFAULT_YA_CLASS,
                        help="""Название класса DOM элемента из которого
                        возьмётся время и дистанция""")
    parser.add_argument('--screen_path', help="Путь к папке со скриншотами")
    parser.add_argument('--screen_pattern', help="Паттерн названия скриншотов")
    parser.add_argument('--csv_path', help="Путь к csv файлу со статистикой")

    args = parser.parse_args()
    return vars(args)
Пример #31
0
def get_gtk_builder(name):
    filename = os_join(UI_PATH, "%s.ui" % name)
    assert ospath.exists(filename)
    b = gtk.Builder()
    b.set_property("translation-domain", I18N_APP)
    b.add_from_file(filename)
    return b
Пример #32
0
    def __init__(self, chain_order=2, vk_object=None, *file_paths):
        """
        :chain_order: Количество звеньев цепи, для принятия решения о следующем.
        :vk_object: Объект класса Владя-бота, для интеграции. Не обязателен.
        :file_paths: Пути к текстовым файлам, для обучения модели.
        """

        if chain_order < 1:
            raise MarkovTextExcept(
                "Цепь не может быть {0}-порядка.".format(chain_order)
            )
        self.chain_order = chain_order
        self.tokens_array = ()
        self.base_dict = {}
        self.start_arrays = ()
        self.vk_object = vk_object
        self.vocabulars = {}

        self.temp_folder = abspath(
            os_join(expanduser("~"), "textGeneratorTemp")
        )
        if not isdir(self.temp_folder):
            makedirs(self.temp_folder)

        for _path in frozenset(filter(isfile, map(abspath, file_paths))):
            self.update(_path)
Пример #33
0
 def _lead_add_attachment(self):
     for file_name in [
             os_join(settings.BASE_DIR, 'deep/tests_files/leads/doc.docx'),
             os_join(settings.BASE_DIR, 'deep/tests_files/leads/doc.pdf'),
             os_join(settings.BASE_DIR, 'deep/tests_files/leads/doc.pptx')
             ]:
         with open(file_name, 'rb') as fp:
             response = self.c.post(self.add_url,
                                    {
                                     **self.basic_lead_data,
                                     'lead-type': 'attachment',
                                     'file': fp,
                                    })
             self.assertEqual(response.status_code, 302,
                              "Leads Add Att file: "+file_name +
                              " POST :Failed")
Пример #34
0
def sing_msg_multi_num(numb_path, msg_path):
    with open(numb_path) as numbers:
        with open(msg_path) as msg_chunks:
            msg = msg_chunks.readlines()
            m = "\n".join(msg)
            browser = Wsp(driver_path=os_join(getcwd(), 'chromedriver'))
            browser.pressEnter([s_url(n, m) for n in numbers])
Пример #35
0
 def _save_screenshot(self, now):
     if '%s' in self.screen_pattern:
         file_name = self.screen_pattern % (now, )
     else:
         file_name = self.screen_pattern
     file_name = os_join(self.screen_path, file_name)
     self.driver.save_screenshot(file_name)
Пример #36
0
    def __init__(self,
                 url,
                 phantomjs=None,
                 resolution=None,
                 ya_class=None,
                 screen_path=None,
                 screen_pattern=None,
                 csv_path=None):
        self.url = url

        self.phantomjs = phantomjs or DEFAULT_PHANTOMJS
        assert os.path.isfile(self.phantomjs), "phantomjs не найден"

        resolution = resolution or FULLHD
        assert isinstance(resolution, (list, tuple))
        assert len(resolution) == 2

        self.ya_class = ya_class or DEFAULT_YA_CLASS
        self.screen_path = screen_path or PATH

        self.screen_pattern = screen_pattern or '%s.png'
        assert '%s' in self.screen_pattern

        self.csv_path = csv_path or os_join(PATH, 'statistic.csv')

        self.driver = PhantomJS(self.phantomjs)
        self.driver.set_window_size(*resolution)
Пример #37
0
    def _load_reflist(cls):
        """ Load the reference list.

        """

        filename = os_join(data_path, 'ref_list.json.gz')
        with gzip.open(filename, 'rb') as reflist:
            cls._ref_list = json.loads(reflist.read().decode())
Пример #38
0
    def _load_reflist(cls):
        """ Load the reference list.

        """

        filename = os_join(data_path, 'ref_list.json.gz')
        with gzip.open(filename, 'rb') as reflist:
            cls._ref_list = json.loads(reflist.read().decode())
Пример #39
0
def initialize_app(config_name):
    app = Flask(__name__)
    app.config.from_object(config[config_name])
    config[config_name].initapp(app)

    logfile = os_join(app.config['LOG_DIR'], 'haxxorbb.log')
    handler = TimedRotatingFileHandler(logfile, when='d', interval=1, backupCount=3)
    handler.setFormatter(Formatter("[%(asctime)s] %(levelname)s %(name)s: %(message)s"))
    handler.setLevel('INFO')
    app.logger.addHandler(handler)

    app.logger.info('Initialization started')
    app.wsgi_app = ReverseProxied(app.wsgi_app)
    login_manager.init_app(app)
    db.init_app(app)
    mail.init_app(app)
    pagedown.init_app(app)

    # Init the Media directory
    app.media = app.config['MEDIA_ROOT']

    # Register blueprints
    from .auth import auth as authentication
    app.register_blueprint(authentication)

    from .front_page import front_page
    app.register_blueprint(front_page)

    from .profile import profile
    app.register_blueprint(profile)

    from .utilities import filters
    app.register_blueprint(filters.filters)

    from .forum import forum
    app.register_blueprint(forum)

    @app.route('/robots.txt')
    def robots():
        return send_from_directory(app.static_folder, request.path[1:])

    @app.errorhandler(404)
    def page_not_found(e):
        app.logger.warning("Request for {} from source IP {}".format(request.path, request.remote_addr))
        return render_template("404.html", error=e), 404

    @app.route('/media/<path:filename>')
    def media(filename):
        return send_from_directory(app.config['MEDIA_ROOT'], filename)

    app.logger.info('Initialization completed.')
    return app
Пример #40
0
 def detect( self , value  ):
     """
     detects the format of the sequence(s) contained in fileName
     @param value: the src object and the filename in the src of the data to detect
     @type value: tuple ( session/Job/MobyleJob instance , string filename )
     @return: a tuple of the detection run information:
         - the detected format,
         - the detected items number,
         - program name used for the detection.
     """
     detected_mt = super( AlignmentDataType , self ).detect( value )
     squizzDir = _cfg.format_detector_cache_path()
     if squizzDir :
         ##############################################
         # This part of is used to back up all        # 
         # submitted sequences which are not detected # 
         # by squizz for further analysis             #
         ##############################################
         squizz_detector = None
         for detector in _cfg.dataconverter( self.__class__.__name__[:-8] ):
             if detector.program_name == 'squizz':
                 squizz_detector = detector
                 break
         if squizz_detector is not None :
             detected_data_format = detected_mt.getDataFormat()
             from os import link
             from os.path import join as os_join
             if ( detected_data_format is None ) or ( detected_data_format in squizz_detector.detectedFormat() and not detected_mt.getFormatProgram() == 'squizz' ):
                 try:
                     #dump the data to further annalysis 
                     link(  os_join( value[0].getDir() , value[1] ) , 
                            os_join( squizzDir , "%s_%s_%s"%( self.__class__.__name__[:-8] ,
                                                              value[0].getKey() ,
                                                              value[1] ) )
                            )
                 except Exception , err : 
                     c_log.error( "unable to link data in  format_detector_cache_path : %s " % err ) 
Пример #41
0
    def __init__(self, name="", path=""):
        """ Initialize the dbm.

        """

        self._non_key_text_regx = re.compile(r"[<>\{\}]")

        path = path if path else INDEX_PATH
        self._name = "%s.dbm" % name
        self._path = path

        dbm_name = os_join(path, "%s.dbm" % name)
        self._dbm_dict = IndexDbm(dbm_name, "r")

        super(DbmDict, self).__init__()
Пример #42
0
    def decrypt(self, passphrase):
        LOG.info('Decrypt files :')
        source_dir = os_join(self.directory, self.encrypted_dir)
        status = True
        for root, dirs, files in os.walk(source_dir, topdown=False):
            for name in files:
                file_path = os_join(root, name)
                relative_file_path = relative_path(file_path, source_dir).lstrip('/')

                if (name.endswith('.passkeeper')
                and os.path.isfile(file_path)):
                    LOG.info('Decrypt file %s' % relative_file_path)
                    decrypted_file_path = os_join(self.directory,
                                                  re.sub('.passkeeper$', '', relative_file_path))
                    create_dir(path=dirname(decrypted_file_path))
                    decrypted = decrypt(source=file_path,
                            output=decrypted_file_path,
                            passphrase=passphrase)
                    if decrypted.status == "decryption failed":
                        status = False
                    LOG.info(decrypted.status)
                    if not decrypted.ok:
                        LOG.error("Decrypt file %s - %s" % (name, decrypted.stderr))
        return status
Пример #43
0
    def cleanup(self):
        "Shred all ini and raw files"

        # Remove ini files
        for fname in os.listdir(self.directory):
            file_path = os_join(self.directory, fname)
            if (fname.endswith('.ini')
            and os.path.isfile(file_path)):
                LOG.info('Clean file %s' % fname)
                run_cmd('shred --remove %s' % file_path)
        # Remove raw files
            elif (fname.endswith('.raw')
            and os.path.isdir(file_path)):
                LOG.info('Clean directory %s' % fname)
                shred_dir(file_path)
def corpus_restore(diff_file, working_directory="", html_url=""):
    '''
    creates the corpus diff file for the given two texts.

    ::param diff_file:
        the diff_file
    ::param working_directory:
        an optional working_directory containing the HTML files.
    ::param html_url:
        optionally overwrite the url used in the diff_file (useful for testing)

    ::returns
        the text file used to create the cdiff
    '''
    cdiff = CDiffFile(diff_file)
    if html_url:
        fpath = os_join(working_directory, html_url)
    else:
        fpath = os_join(working_directory, cdiff.url)
    from_text = get_text(get_resource(fpath))

    sentences = filter(None, (extract_sentence(from_text, sentence_cdiff) 
                              for sentence_cdiff in cdiff.sentences))
    return "\n".join(sentences)
Пример #45
0
    def encrypt(self, passphrase, commit_message='Update encrypted files'):
        LOG.info('Encryption')
        create_dir(os_join(self.directory, self.encrypted_dir))

        LOG.info('Encrypt files :')
        # Encrypt files
        for fname in os.listdir(self.directory):
            file_path = os_join(self.directory, fname)
            # Handle ini file
            if (fname.endswith('.ini')
            and os.path.isfile(file_path)):
                LOG.info('Encrypt file %s' % fname)
                git_relative_encrypted_file_path = os_join(self.encrypted_dir,
                                                       '%s.passkeeper' % fname)
                encrypted_file_path = os_join(self.directory,
                                              git_relative_encrypted_file_path)
                encrypted = encrypt(source=file_path,
                                    output=encrypted_file_path,
                                    passphrase=passphrase)
                if not encrypted.ok:
                    LOG.error("Encrypt file %s - %s" % (fname, encrypted.stderr))
                self.git.add([git_relative_encrypted_file_path])
            # Handle .raw directory
            if (fname.endswith('.raw')
            and os.path.isdir(file_path)):
                for root, dirs, files in os.walk(file_path, topdown=False):
                    for name in files:
                        # /git/foo.raw/file
                        root_raw_file_path = os.path.join(root, name)
                        # foo.raw/file
                        git_relative_file_path = relative_path(root_raw_file_path, self.directory).lstrip('/')
                        LOG.info('Encrypt file %s' % git_relative_file_path)

                        # encrypt/foo.raw/file.passkeeper
                        git_encrypted_relative_file_path = os_join(self.encrypted_dir,
                                                         '%s.passkeeper' % git_relative_file_path)
                        # /git/encrypt/foo.raw/file.passkeeper
                        root_encrypted_file_path = os_join(self.directory,
                                                      git_encrypted_relative_file_path)

                        # /git/encrypt/foo.raw
                        root_encrypted_dirname_path = dirname(root_encrypted_file_path)

                        create_dir(root_encrypted_dirname_path)
                        encrypted = encrypt(source=root_raw_file_path,
                                            output=root_encrypted_file_path,
                                            passphrase=passphrase)
                        if not encrypted.ok:
                            LOG.error("Encrypt file %s - %s" % (fname, encrypted.stderr))
                        self.git.add([git_encrypted_relative_file_path])

        self.git.commit('%s' % commit_message)
        return True
Пример #46
0
    def __getitem__(self, key):
        """ If a filename was given then use it to retrieve keys when
        they are needed.

        """

        # Cleanup Strong's and Morphology
        key = self._non_key_text_regx.sub("", key).strip()
        if self._name and (key not in self):
            # Load the value from the database if we don't have it.
            try:
                dbm_name = os_join(self._path, self._name)
                with IndexDbm(dbm_name, "r") as dbm_dict:
                    self[key] = dbm_dict.get(key)
            except Exception as err:
                print("The error was: %s" % err)
                return err.message

        return super(DbmDict, self).__getitem__(key)
Пример #47
0
def get_files(file_list):
    """ Returns a list of all the files in filename.

    """

    from os.path import isdir as os_isdir
    from os.path import isdir as os_isfile
    from os.path import join as os_join
    from os import walk as os_walk

    out_list = []

    for name in file_list:
        if os_isdir(name):
            for root, sub, files in os_walk(name):
                join_list = [os_join(root, f) for f in files]
                out_list.extend(join_list)
        else:
            out_list.append(name)

    return out_list
Пример #48
0
def upload(local_fname, lifetime, webdav_file_pattern, file_url_pattern):
    ''' Uploads the given file to the webdav server :)

    :param local_fname: file name of the local file
    :param lifetime: suggested lifetime of the uploaded file
    '''
    file_storage = get_passwords()[0]
    webdav = easywebdav.connect(file_storage.server,
                                username=file_storage.username,
                                password=file_storage.password,
                                protocol=file_storage.protocol)
    file_url_dict = get_file_name_dict(local_fname, lifetime, get_version_suffix())
    file_url_dict['protocol'] = file_storage.protocol
    file_url_dict['file_server'] = file_storage.server
    file_url_dict['file_path'] = file_storage.path
    file_url_dict['lifetime'] = lifetime
    file_url_dict['url'] = file_url_pattern.format(**file_url_dict)

    remote_fname = os_join(file_storage.path,
                           quote(webdav_file_pattern.format(**file_url_dict)))
    webdav.upload(local_fname, remote_fname)
    return file_url_dict
Пример #49
0
    def flush_history(self):
        """
        Flush the git history

        Destroy .git directory by shred all files and init git again.
        This allow to clear git history and to insure more security

        :Example:

        >>> flush_history()
        Clean file master
        Clean file HEAD
        Clean file exclude
        ...
        Dépôt Git vide initialisé dans /opt/mypasskeeper/.git/
        master (commit racine) 9e4a2a0] Clean git History

        .. seealso:: shred_dir(), git.init(), git.add(), git.commit()
        """
        shred_dir(os_join(self.directory, '.git'))
        self.git.init()
        self.git.add([self.encrypted_dir, '.gitignore'])
        self.git.commit('Clean git History')
Пример #50
0
def resample_csvs(config):
    """Resample csv files to match the config.
     Do the average and delete spared values"""
    csv_path = config.get('csv_path')
    # For each urls
    for url_config in config.get('urls'):
        label = url_config.get('label')
        render_step = url_config.get('render_step')
        render_rows = url_config.get('render_rows')
        # Get filename
        csv_file = os_join(csv_path, '%s.csv' % label)

        if not isfile(csv_file):
            continue
        # Load time serie
        time_serie =  pd.read_csv(csv_file, index_col=0, parse_dates=True)
        # Resample datas for desired time periode
        time_serie = resampled_time_serie(time_serie=time_serie,
                                         rows=render_rows,
                                         step=render_step)

        # Dump datas in csv file (resampled file)
        time_serie.to_csv(csv_file, header=['time'], index_label='date')
Пример #51
0
def get_files(file_list):
    """ Returns a list of all the files in filename.

    """

    from os.path import isdir as os_isdir
    from os.path import isdir as os_isfile
    from os.path import join as os_join
    from os import walk as os_walk
    from pathlib import Path

    out_list = []
    ext = ['.mp3', '.flac', '.ogg', '.s3m', '.mod', '.xm', '.it']

    for name in file_list:
        if os_isdir(name):
            for root, sub, files in os_walk(name):
                join_list = [os_join(root, f) for f in files if Path(f.lower()).suffix in ext]
                out_list.extend(join_list)
        else:
            out_list.append(name)

    return out_list
Пример #52
0
def consumer(stop_process, result_queue, config):
    "Consume results from workers. And write them in csv files"
    # Disable ctrl + c
    signal.signal(signal.SIGINT, signal.SIG_IGN)

    # Ensure directory exist (move to consumer)
    csv_path = config.get('csv_path')
    ensure_dir(csv_path)

    # Get and print results
    LOG.info('consumer - Start consumer')
    while stop_process.value != 1:
        try:
            msg = result_queue.get_nowait()
            label = msg['url_config']['label']
            record = msg['job_result']
            record_file = os_join(csv_path, '%s.csv' % label)
            LOG.debug('consumer - Receved ->>> %s\n' % str(msg))
            LOG.info('consumer - Save record %s in %s\n' % (str(record), record_file))
            record_http_time(record=record, record_file=record_file)
        except Empty:
            sys.stdout.write('.')
            sys.stdout.flush()
            time.sleep(config.get('consumer_frequency', 0.1))
Пример #53
0
"""

from db.configs import *
from os.path import join as os_join
from config.defaults import MEDIA_ROOT

MAPTYPE = 'slopes'

ROUTEDB = SlopeDBConfig()
ROUTEDB.schema = 'slopes'
ROUTEDB.relation_subset = """
    tags ? 'route' and tags->'type' IN ('route', 'superroute')
    AND array['ski', 'piste'] && regexp_split_to_array(tags->'route', ';')
    AND NOT (tags ? 'state' AND tags->'state' = 'proposed')
    AND NOT (tags->'piste:type' = 'skitour')"""
ROUTEDB.way_subset = """
    tags ? 'piste:type'
    AND NOT (tags ? 'state' AND tags->'state' = 'proposed')
    AND NOT (tags->'piste:type' = 'downhill'
             AND nodes[array_lower(nodes,1)] = nodes[array_upper(nodes,1)])
    AND NOT (tags->'piste:type' = 'skitour')"""

PISTE = PisteTableConfig()
PISTE.symbols = ('Slopes', 'Nordic')

SYMBOLS = ShieldConfiguration()
SYMBOLS.symbol_outdir = os_join(MEDIA_ROOT, 'symbols/slopes')
SYMBOLS.image_size = (20, 20)
SYMBOLS.text_color = (1, 1, 1) # white
SYMBOLS.text_bgcolor = (0, 0, 0) # black
Пример #54
0
# along with this program.  If not, see <http:#www.gnu.org/licenses/>.

from collections import defaultdict
from xml.dom.minidom import parseString
from textwrap import fill
from os.path import dirname as os_dirname
from os.path import join as os_join
import dbm
import sys
import re

import Sword

from .utils import *

data_path = os_join(os_dirname(__file__), 'data')


def book_gen():
    """ A Generator function that yields book names in order.

    """

    # Yield a list of all the book names in the bible.
    verse_key = Sword.VerseKey('Genesis 1:1')
    for testament in [1, 2]:
        for book in range(1, verse_key.bookCount(testament) + 1):
            yield(verse_key.bookName(testament, book))
# book_list = list(book_gen())
try:
    book_list = []
Пример #55
0
ROUTEDB = RouteDBConfig()
ROUTEDB.schema = 'cycling'
ROUTEDB.relation_subset = """
    tags ? 'route' and tags->'type' IN ('route', 'superroute')
    AND 'bicycle' = any(regexp_split_to_array(tags->'route', ';'))
    AND NOT (tags ? 'state' AND tags->'state' = 'proposed')"""

ROUTES = RouteTableConfig()
ROUTES.network_map = { 'icn': 0,'ncn': 10, 'rcn': 20, 'lcn': 30 }
ROUTES.symbols = ( 'NorwichColorBox',
                   'SwissMobile',
                   'JelRef',
                   'TextColorBelow',
                   'TextSymbol',
                   'ColorBox')

DEFSTYLE = RouteStyleTableConfig()

GUIDEPOSTS = GuidePostConfig()
GUIDEPOSTS.subtype = 'bicycle'
GUIDEPOSTS.require_subtype = True

NETWORKNODES = NetworkNodeConfig()
NETWORKNODES.node_tag = 'rcn_ref'

SYMBOLS = ShieldConfiguration()
SYMBOLS.symbol_outdir = os_join(MEDIA_ROOT, 'symbols/cycling')
SYMBOLS.swiss_mobil_bgcolor = (0.66, 0.93, 1.0)
SYMBOLS.swiss_mobil_networks = ('rcn', 'ncn')
Пример #56
0
def get_tokens(sentence):
    from json import loads
    sentence = loads(sentence)
    text = sentence['value']
    tokens = []
    for indices in sentence['tok_list'].split(' '):
        start, end = map(int, indices.split(','))
        tokens.append(text[start:end])

    return tokens


if __name__ == '__main__':

    print("Reading corpus...")
    CORPUS_PATH = os_join(dirname(__file__), '../corpus', '*.txt.gz')
    corpus_documents = read_corpus_files(CORPUS_PATH)

    print("Pre-processing corpus...")
    xml_corpus_documents = get_weblyzard_xml_documents(corpus_documents)

    print("Configuring keyword service...")
    jesaja = Jesaja()
    jesaja.set_stoplist(STOPLIST_NAME, [stopword.strip()
                                        for stopword in GzipFile(STOPLIST_FILE)])
    jesaja.set_keyword_profile(PROFILE_NAME, PROFILE)
    jesaja.set_matview_profile(
        matview_id=MATVIEW_NAME, profile_name=PROFILE_NAME)

    # check whether we have already shards available for the given matview
    if not jesaja.has_corpus(matview_id=MATVIEW_NAME):
Пример #57
0
    """, default="")
    args = parser.parse_args()

    
    # Search for the keyfile
    _is_temp_file = False
    if len(args.key) != 0:
        if not os.path.isfile(args.key):
            print("Keyfile '%s' not found." % args.key)
            exit(1)
        keyfile = args.key
    else:
        if os.path.isfile('server.pem'):
            keyfile = 'server.pem'
            os.path.join(a)
        elif isfile(os_join(dirname(realpath(__file__)), 'server.pem')):
            keyfile = os_join(dirname(realpath(__file__)), 'server.pem')
        else:
            # We need to create a temporary file for the key
            # as ssl.wrap_socket just pass the path.
            from tempfile import mkstemp
            fd, keyfile = mkstemp()
            offset = 0
            _len = len(_fallback_key)
            while offset < _len:
                offset += os.write(fd, _fallback_key[offset:])
            os.close(fd)
            _is_temp_file = True
        
    print('Using keyfile "%s"' % keyfile)
    print('START LISTENING ON https://%s:%i\n' % ('localhost', args.port))