示例#1
0
def from_feather_to_csv_all():
    for seq_len in range(2, 13):
        print('seq_len = ' + str(seq_len) + '...')
        for nF in range(1, 9999):  # 1,...,(n-1)
            fichtr = 'clicks_X_train_' + str(seq_len) + '-' + str(
                nF) + '.feather'
            if not os_path_isfile(
                    s_input_path + 'ok_en_hdfs/' +
                    fichtr.replace('.feather', '_para_spark.csv')):
                if not os_path_isfile(
                        s_input_path +
                        fichtr.replace('.feather', '_para_spark.csv')):
                    if os_path_isfile(s_input_path + fichtr):
                        fich = 'clicks_X_train_' + str(seq_len) + '-' + str(
                            nF) + '.feather'
                        fich = from_feather_to_csv(fich)
                        fich = 'clicks_X_valid_' + str(seq_len) + '-' + str(
                            nF) + '.feather'
                        fich = from_feather_to_csv(fich)
                        fich = 'clicks_X_test_' + str(seq_len) + '-' + str(
                            nF) + '.feather'
                        fich = from_feather_to_csv(fich)
                        fich = 'clicks_y_train_' + str(seq_len) + '-' + str(
                            nF) + '.feather'
                        fich = from_feather_to_csv(fich)
                        fich = 'clicks_y_valid_' + str(seq_len) + '-' + str(
                            nF) + '.feather'
                        fich = from_feather_to_csv(fich)
                        fich = 'clicks_y_test_' + str(seq_len) + '-' + str(
                            nF) + '.feather'
                        fich = from_feather_to_csv(fich)
def process_single_target(target_dirname):
    # Load chandat
    chandat_fpath = os_path_join(target_dirname, CHANDAT_FNAME)
    try:
        chandat_obj = get_mat_obj_from_h5py(chandat_fpath)
    except OSError:
        chandat_obj = loadmat(chandat_fpath)

    # print('chandat_obj[\'chandat\'].shape =', chandat_obj['chandat'].shape)
    # Load old_stft_obj
    old_stft_fpath = os_path_join(target_dirname, OLD_STFT_FNAME)
    if os_path_isfile(old_stft_fpath):
        try:
            old_stft_obj = get_mat_obj_from_h5py(old_stft_fpath)
        except OSError:
            old_stft_obj = loadmat(old_stft_fpath)
    else:
        old_stft_obj = r2_dnn_stft(target_dirname, saving_to_disk=False)

    # print('old_stft_obj[\'old_stft_real\'].shape =', old_stft_obj['old_stft_real'].shape)
    # print('old_stft_obj[\'old_stft_imag\'].shape =', old_stft_obj['old_stft_imag'].shape)
    new_stft_object = r3_dnn_apply(target_dirname,
                                   old_stft_obj=old_stft_obj,
                                   saving_to_disk=False)
    del old_stft_obj
    # print('process_single_scan_battery: new_stft_object[\'new_stft_real\'].shape =', new_stft_object['new_stft_real'].shape)
    chandat_dnn_object = r4_dnn_istft(target_dirname,
                                      chandat_obj=chandat_obj,
                                      new_stft_object=new_stft_object,
                                      is_saving_chandat_dnn=False)
    del new_stft_object
    chandat_image_obj = r5_dnn_image(target_dirname,
                                     chandat_obj=chandat_obj,
                                     chandat_dnn_obj=chandat_dnn_object,
                                     is_saving_chandat_image=False)
    del chandat_obj, chandat_dnn_object
    r6_dnn_image_display(target_dirname,
                         dnn_image_obj=chandat_image_obj,
                         show_fig=False)

    # Remove target-level files and folders
    for file in TARGET_FILES_TO_REMOVE:
        file_path = os_path_join(target_dirname, file)
        if os_path_isfile(file_path):
            # print('{}: Trying to remove {}'.format(SCRIPT_FNAME, file_path))
            try:
                os_remove(file_path)
            except:
                raise OSError(
                    'Error: unable to remove file {}'.format(file_path))
示例#3
0
文件: bot.py 项目: pageflt/tatianna
def expand_bot_path(filename):
    '''
    '''
    # try "core/"
    first_try = os_path_join(os_path_dirname(__file__), filename)
    if os_path_isfile(first_try):
        return first_try

    # try "core/.."
    second_try = os_path_join(os_path_dirname(__file__), '..', filename)
    if os_path_isfile(second_try):
        return second_try

    raise IOError('File "{0}" not found under "{1}" or "{2}"'.format(
        filename, first_try, second_try))
示例#4
0
文件: bot.py 项目: apalos/tatianna
def expand_bot_path(filename):
    '''
    '''
    # try "core/"
    first_try = os_path_join(os_path_dirname(__file__), filename)
    if os_path_isfile(first_try):
        return first_try

    # try "core/.."
    second_try = os_path_join(os_path_dirname(__file__), '..', filename)
    if os_path_isfile(second_try):
        return second_try

    raise IOError('File "{0}" not found under "{1}" or "{2}"'.format(
                  filename, first_try, second_try))
示例#5
0
def preparar_RDD(seq_len = 0):
  from elephas.utils.rdd_utils import to_simple_rdd
  from os import rename as os_rename
  for nF in range(1, 99): # 1,...,(n-1)
    fichtr = 'clicks_X_train_' + str(seq_len) + '-' + str(nF) + '_para_spark.csv'
    if os_path_isfile(s_input_path + fichtr):
      print('Leyendo ficheros train+valid ' + str(nF) + ' - numAds ' + str(seq_len) + '...')
      X_train = read_csv(s_input_path + 'clicks_X_train_' + str(seq_len) + '-' + str(nF) + '_para_spark.csv', dtype=np_float64, header = None).values
      y_train = read_csv(s_input_path + 'clicks_y_train_' + str(seq_len) + '-' + str(nF) + '_para_spark.csv', dtype=int, header = None).values
      X_valid = read_csv(s_input_path + 'clicks_X_valid_' + str(seq_len) + '-' + str(nF) + '_para_spark.csv', dtype=np_float64, header = None).values
      y_valid = read_csv(s_input_path + 'clicks_y_valid_' + str(seq_len) + '-' + str(nF) + '_para_spark.csv', dtype=int, header = None).values
      print(X_train.shape, y_train.shape, X_valid.shape, y_valid.shape)
      X_train, y_train = mi_reshape(X_train, to_categorical(y_train), seq_len)
      X_valid, y_valid = mi_reshape(X_valid, to_categorical(y_valid), seq_len)
      X_train = np_concat((X_train, X_valid), axis=0) # Incluimos validset dentro del trainset en Spark
      y_train = np_concat((y_train, y_valid), axis=0) # Incluimos validset dentro del trainset en Spark
      print(X_train.shape, y_train.shape)
      print('Creando RDD (train+valid) ' + str(nF) + ' - numAds ' + str(seq_len) + '...')
      rdd_ini = to_simple_rdd(sc, X_train, y_train)
      # Convertimos ndarray [ i.e. array(...) ] en list [ i.e. [...] ]:
      rdd_lista = rdd_ini.map(lambda i: map(lambda s: s.tolist(), i))
      # Y ahora guardamos como txt:
      rdd_lista.coalesce(numSparkWorkers, True).saveAsTextFile(s_spark_inputpath + 'clicks_train_seq' + str(seq_len) + '-' + str(nF) + '_rdd') # Forzamos a guardarlo en 4 trozos (al menos)
      print('Ok. Guardado en HDFS el RDD (train+valid) ' + str(nF) + ' - numAds ' + str(seq_len) + '.')
  os_rename(s_input_path + fichtr, s_input_path + 'ok_en_hdfs/' + 'clicks_X_train_' + str(seq_len) + '-' + str(nF) + '_para_spark.csv')
示例#6
0
def process_single_scan_battery_keras(model_folder,
                                      source_scan_battery_dirname):
    # Make sure model_folder and source_scan_battery_dirname exist.
    if not os_path_isdir(model_folder):
        raise OSError('{}: model folder {} does not exist'.format(
            SCRIPT_FNAME, model_folder))
    if not os_path_isdir(source_scan_battery_dirname):
        raise OSError(
            '{}: source scan battery folder {} does not exist'.format(
                SCRIPT_FNAME, source_scan_battery_dirname))

    # model/scan_batteries folders.
    model_scan_batteries_dirname = os_path_join(model_folder,
                                                SCAN_BATTERIES_DIRNAME)
    model_scan_battery_dirname = os_path_join(
        model_scan_batteries_dirname,
        os_path_basename(source_scan_battery_dirname))

    # Copy source scan_batteries folder into model scan_batteries folder
    # TODO: Could also just copy the entire scan_batteries folder (all 3 types) into model_folder
    # logging_info('{}: copying {} to {}'.format(SCRIPT_FNAME, source_scan_battery_dirname, model_scan_battery_dirname))
    # copy_anything(source_scan_battery_dirname, model_scan_battery_dirname)
    # model_scan_battery_process_scripts_dirname = os_path_abspath(os_path_join(model_scan_battery_dirname, PROCESS_SCRIPTS_DIRNAME))

    # Grab all targets with glob
    mode_scan_battery_target_prefix = os_path_join(model_scan_battery_dirname,
                                                   TARGET_PREFIX + '*')
    target_dirnames = glob_glob(mode_scan_battery_target_prefix)
    if not target_dirnames:
        raise ValueError('{}: no targets found with prefix {}'.format(
            SCRIPT_FNAME, mode_scan_battery_target_prefix))

    for target_dirname in target_dirnames:
        process_single_target(target_dirname)

    # for target_dirname in target_dirnames:
    #     # print('{}: processing target directory {}'.format(SCRIPT_FNAME, target_dirname))
    #     process_single_target(target_dirname)

    # Remove scan battery-level folders
    for folder in SCAN_BATTERY_FOLDERS_TO_REMOVE:
        folder_path = os_path_join(model_scan_battery_dirname, folder)
        if os_path_isdir(folder_path):
            # print('{}: Trying to remove {}'.format(SCRIPT_FNAME, folder_path))
            try:
                shutil.rmtree(folder_path)
            except:
                raise OSError(
                    'Error: unable to remove file {}'.format(folder_path))

    # Remove scan battery-level files
    for file in SCAN_BATTERY_FILES_TO_REMOVE:
        file_path = os_path_join(model_scan_battery_dirname, file)
        if os_path_isfile(file_path):
            # print('{}: Trying to remove {}'.format(SCRIPT_FNAME, file_path))
            try:
                os_remove(file_path)
            except:
                raise OSError(
                    'Error: unable to remove file {}'.format(file_path))
示例#7
0
 def release(self):
     if not os_path_isfile(self._path):
         raise Exception('Can\'t release unacquired Lock!')
     if current_thread() != self._owner:
         raise Exception('Can\'t release Lock, not the right Thread!')
     os_remove(self._path)
     self._owner = None
def increase_index_and_move(src_folder, dst_folder, file, extension, src_index, dst_index, max_index):
    # Helper function to format the full source and destination path
    path = lambda f, i: os_path_join(f, extension.format(file, i))
    # If destination file's index is lesser than
    # the maximum number of backups allowed
    if src_index <= max_index:
        src = path(src_folder, src_index)
        dst = path(dst_folder, dst_index)
        # If the destination file exists
        if os_path_isfile(dst):
            # Call this function recursivly
            increase_index_and_move(
                src_folder=dst_folder,
                dst_folder=dst_folder,
                file=file,
                extension=extension,
                src_index=dst_index,
                dst_index=dst_index + 1,
                max_index=max_index,
            )
        cleanup = ""
    # If destination file's index is equal or
    # greater than the maximum number of backups allowed
    else:
        src = path(src_folder, max_index - 1)
        dst = path(dst_folder, max_index)
        cleanup = path(src_folder, src_index)
    # Move source file to destination
    try:
        shutil_move(src, dst)
        return cleanup
    # If source does not found
    except FileNotFoundError:
        return ""
示例#9
0
def _storeArticle(article):
    """
    _safeArticle(Dict) -> Bool

    private help method to safe an aticle

    param article:Dict -
    """
    #    try:
    #make a path according to the article's topics
    path = re_sub('http://www.spiegel.de/','', article['link']).split('/')
    filename = path.pop(-1)
    storePath = os_path_join(BASE_PATH,os_path_join(*path))
    #create directories
    if not os_path_exists(storePath):
        os_makedirs(storePath)
    #write article as json to the file
    with open(os_path_join(storePath, filename),'w') as o:
        json.dump(article, o)
    #write the article name to the log
    if os_path_isfile(BASE_PATH + 'article_log'):
        log = open(BASE_PATH + 'article_log','a')
    else:
        log = open(BASE_PATH + 'article_log','w')
    log.write(article['link'] + '\n')
    log.close()
    return True
示例#10
0
 def wrapper(*args, **kwargs):
     # TODO: find 'self' object - untested
     report = kwargs.get('self')
     if report is None:
         for i in args:
             if hasattr(i, 'dirLog'):
                 report = i
     if report is None:
         report = globals().get('self')
     if report is None:
         raise LogError
     # Configure logger
     import logging
     bolNewLog = False
     try:
         if not os_path_isfile(report.dirLog):
             open(report.dirLog, 'w').close()
             bolNewLog = True
         strFormat = '%(asctime)s %(levelname)s: %(message)s'
         logging.basicConfig(filename=report.dirLog, filemode='a',
                             format=strFormat, level=logging.DEBUG)
     except Exception as err:
         print(err)
         raise LogError
     if callable(fun):
         fun(args, kwargs)
     logging.shutdown()
     del logging
     gc_collect()
     return bolNewLog
示例#11
0
def from_feather_to_csv(fich='clicks_X_valid_4-1.feather',
                        s_input_path='kaggle/Outbrain/In/python/'):
    fich_dest = fich.replace('.feather', '_para_spark.csv')
    if not os_path_isfile(s_input_path + fich_dest):
        from feather import read_dataframe as fthr_read_dataframe
        from numpy import savetxt as np_savetxt
        X = fthr_read_dataframe(s_input_path + fich)
        np_savetxt(s_input_path + fich_dest, X, delimiter=',')
        print(fich_dest, X.values.shape, ' Ok.')
    return (fich_dest)
示例#12
0
 def test_main_option_output(self):
     tmpfile = os_path_join(
         self.tmpdir,
         'test_main_option_output.' + str(randint(100000, 999999)))
     cmds = (['python3', '-m', 'passphrase', '--output',
              tmpfile], ['python3', '-m', 'passphrase', '-o', tmpfile])
     for cmd in cmds:
         result = subprocess.run(
             cmd, stdout=subprocess.PIPE).stdout.decode('utf-8')
         self.assertTrue(os_path_isfile(tmpfile))
         with open(tmpfile, mode='rt', encoding='utf-8') as tfile:
             self.assertEqual(result, tfile.read())
示例#13
0
def from_feather_to_csv_all():
  from os.path import isfile as os_path_isfile
  for seq_len in range(2,13):
    for nF in range(1, 9999): # 1,...,(n-1)
      fichtr = 'clicks_X_train_' + str(seq_len) + '-' + str(nF) + '.feather'
      if not os_path_isfile(s_input_path + fichtr):
        break # Ya no hay más
      fich = 'clicks_X_train_' + str(seq_len) + '-' + str(nF) + '.feather'; fich = from_feather_to_csv(fich)
      fich = 'clicks_X_valid_' + str(seq_len) + '-' + str(nF) + '.feather'; fich = from_feather_to_csv(fich)
      fich = 'clicks_X_test_' + str(seq_len) + '-' + str(nF) + '.feather'; fich = from_feather_to_csv(fich)
      fich = 'clicks_y_train_' + str(seq_len) + '-' + str(nF) + '.feather'; fich = from_feather_to_csv(fich)
      fich = 'clicks_y_valid_' + str(seq_len) + '-' + str(nF) + '.feather'; fich = from_feather_to_csv(fich)
      fich = 'clicks_y_test_' + str(seq_len) + '-' + str(nF) + '.feather'; fich = from_feather_to_csv(fich)
示例#14
0
def get_filepaths_in_folder(folderpath: str,
                            ignore: List[str],
                            recursive: bool = False) -> List[str]:
    folderpath = os_path_normpath(folderpath)
    print('get_filepaths_in_folder:', folderpath)
    list_filepaths = []
    for f in os_listdir(folderpath):
        if f not in ignore:
            f_path = myjoin(folderpath, f)
            if os_path_isfile(f_path):
                list_filepaths.append(myjoin(folderpath, f))
            elif recursive:
                list_filepaths += get_filepaths_in_folder(
                    f_path, ignore, recursive)

    return list_filepaths
示例#15
0
    def start(self, wait_on=True):
        """Create and launch LXC container with switchpp.

        Args:
            wait_on(bool):  Indicates if wait for device status

        """
        self.class_logger.info(
            "Starting LXC for switch with ip:%s port:%s..." %
            (self.ipaddr, self.port))

        # Check if it is an altamodel.
        if os_path_isfile(os_path_join(self.build_path, "bin", "ons-fulcrum")):
            self.class_logger.info("AltaModel is found.")
            self.__class__.SWITCH_APP = {"FulcrumApp"}

        log_wrap_out, log_wrap_err = loggers.pipe_loggers(
            "switchpp%s" % (self.id, ), self.popen_logfile)

        # sudo env LD_LIBRARY_PATH=$PWD/lib ./bin/ons-lxc -n 1 -i br0 -a 10.0.5.101/24 -p 52
        lxc_id = str(int(self.port) - 8080)
        command = [
            "./ons-ctl", "start", "-n", lxc_id, "-i", self.vlab_iface, "-a",
            "%s/24" % self.ipaddr, "-p",
            str(self.ports_count)
        ]
        self.class_logger.debug("LXC start command: %s" % (" ".join(command)))
        process = Popen(command,
                        stdout=log_wrap_out,
                        stderr=log_wrap_err,
                        close_fds=True,
                        cwd=os_path_join(self.build_path, "bin"))
        process = Popen(['lxc-wait', '-n', lxc_id, '-s', 'RUNNING'],
                        stdout=log_wrap_out,
                        stderr=log_wrap_err,
                        close_fds=True)
        process.wait()

        # let's wait until device is up and running:
        if wait_on:
            time.sleep(5)
            self.waiton(timeout=self.startup_time)

        # Set On(True) status
        self.status = True

        return self.xmlproxy
示例#16
0
 def __init__(self, folder, file, reset=False, lazy_update=False):
     self._file = os_path_join(folder, file)
     self._cache = cache = {}
     self._last = None
     if reset:
         return
     # If cache file already exists
     try:
         with open(self._file, 'rb') as file:
             for filepath, checksum in pickle_load(file).items():
                 # If file still exists
                 if os_path_isfile(filepath):
                     cache[filepath] = checksum
             if lazy_update:
                 self._lcache = cache.copy()
     except (FileNotFoundError, EOFError):
         pass
示例#17
0
 def __init__(self, folder, file, reset=False, lazy_update=False):
     self._file = os_path_join(folder, file)
     self._cache = cache = {}
     self._last = None
     if reset:
         return
     # If cache file already exists
     try:
         with open(self._file, 'rb') as file:
             for filepath, checksum in pickle_load(file).items():
                 # If file still exists
                 if os_path_isfile(filepath):
                     cache[filepath] = checksum
             if lazy_update:
                 self._lcache = cache.copy()
     except (FileNotFoundError, EOFError):
         pass
示例#18
0
	def update_db(self):
		DB_update_folder = DB_DIR+"/Updates";
		if(not os_path_exists(DB_update_folder)): return;

		# Get all files that match versioning
		file_versions = [];
		for file in os_listdir(DB_update_folder):
			filepath = os_path_join(DB_update_folder, file);
			# Try to get the file's name excluding extension (valid filename example: v0.0.0.sql)
			version_string = Version.version_string(os_path_splitext(os_path_basename(filepath))[0]);

			# Include only files with proper version names within update range
			if(os_path_isfile(filepath) and version_string and self.version_is_usable(Version(version_string))):
				file_versions.append({"path": filepath, "version": Version(version_string)});

		file_versions.sort(key=lambda file_version : file_version["version"]);

		for file in file_versions:
			if(self.call_shell_command(["sudo", "mysql", "-u", "root", "<", file["path"]])):
				raise Exception(f"Failed to update DB with file {file['path']}");
示例#19
0
    def cvs_add(self, input_path: str) -> Tuple[str, str]:
        """Adds single file to staged changes

        Returns pair: result status AND filename

        Possible return statuses:
            'does not exist'
            'success'
            'not a file'
            'already added'
        """

        if not os_path_exists(self._CVS_DIR_PATH):
            raise CodecException('add: init repo first')

        if not os_path_exists(input_path):
            return 'does not exist', input_path

        elif os_path_isfile(input_path):
            repo_changes: 'ChangesCodec.RepositoryChanges'

            if os_path_exists(self._STAGED_PATH):
                repo_changes = self._decode_changes(self._STAGED_PATH)
            else:
                repo_changes = self.RepositoryChanges()

            if input_path in repo_changes.addition:
                return 'already added', input_path

            repo_changes.addition.append(input_path)

            self._encode_changes(repo_changes, self._STAGED_PATH)

            return 'success', input_path

        else:
            return 'not a file', input_path
示例#20
0
 def acquire(self):
     while os_path_isfile(self._path):
         time_sleep(1)
     open(self._path,'w').close()
     self._owner = current_thread()
示例#21
0
                        type=str,
                        action='store',
                        nargs='?',
                        default='')
    parser.add_argument("--no_splash_screen",
                        help="Does not display the splash screen",
                        action='store_true')
    args = parser.parse_args()
    if args.filename:
        from pywinauto_recorder.recorder import overlay_add_mode_icon
        from pywinauto_recorder.recorder import overlay_add_progress_icon
        import traceback
        import codecs

        recorder = None
        if os_path_isfile(args.filename):
            with codecs.open(args.filename, "r",
                             encoding='utf-8') as python_file:
                data = python_file.read()
            print("Replaying: " + args.filename)
            replay(data, args.filename)
        else:
            print("Error: file '" + args.filename + "' not found.")
            input("Press Enter to continue...")
        print("Exit")
    else:
        from pywinauto_recorder.player import *
        from pywinauto_recorder.recorder import *
        from win32api import GetSystemMetrics

        display_splash_screen()
示例#22
0
文件: util.py 项目: linex-cd/puf
def existfile(filename):
	if os_path_exists(filename) and os_path_isfile(filename) and os_access(filename, os_W_OK):
		return True;
	#endif
	return False;
示例#23
0
    ,\'{author}\',{timestamp},\'{comment}\'',
    u'articletags':u'{tagid},{articleid}',
    u'tags':u'{tagid},{tagname}'
}

CREATE = 'create table {}({})'

TABLE_QUERY = {
    'articles':u'articleid integer primary key, link text, \
    content text, site text, author text, timestamp integer, \
    comment text',
    u'articletags':u'tagid integer,articleid integer',
    u'tags':u'tagid integer primary key, tagname text unique'
}

if not os_path_isfile(DBPATH):
    db = sqlite3.connect(DBPATH)
    c = db.cursor()
    for table, query in TABLE_QUERY.iteritems():
        c.execute(CREATE.format(table,query))
        db.commit()
    db.close()

class LockFile:
    def __init__(self, path=DBPATH):
        self._path = os_join(path, '.lock')
        self._owner = None

    def acquire(self):
        while os_path_isfile(self._path):
            time_sleep(1)
示例#24
0
    def GET(self):
        URIs_especiales = {
            '_raices': 'self.indexar_json()',
            '_configuracion': 'self.indexar_configuracion()'
        }

        # 0 Si el valor es menor que 0
        acciones_parametros_especiales = {
            '_limite':
            ' datos_almacenados[ 0: 0 if int(valor_parametro_especial) < 0 else int(valor_parametro_especial) ] ',
            '_desde':
            ' datos_almacenados[ 0 if int(valor_parametro_especial) - 1 < 0 else int(valor_parametro_especial) - 1 : ]',
            '_total': ' {"total objetos":len(datos_almacenados)}'
        }

        trozos_URI, parametros, parametros_especiales = self.trocear_URI(
            parametros=True)

        if len(trozos_URI) == 0:

            if self.CONFIGURACION['SERVIDOR_ESTATICO']:

                try:

                    with open(
                            self.CONFIGURACION['PAGINA_ESTATICA_DIRECTORIO'] +
                            '/' +
                            self.CONFIGURACION['PAGINA_ESTATICA_ARCHIVO'],
                            'r') as PAGINA_ESTATICA:
                        self.devolver_estado(
                            200,
                            PAGINA_ESTATICA.read(),
                            nombre_archivo=self.
                            CONFIGURACION['PAGINA_ESTATICA_ARCHIVO'])

                except Exception as e:
                    self.indexar_json()

            else:
                self.indexar_json()

        elif trozos_URI[0] in URIs_especiales.keys():
            if not self.CONFIGURACION['URI_ESPECIALES']:
                self.devolver_estado(403)
                return

            eval(URIs_especiales[trozos_URI[0]])

        elif trozos_URI[0] == self.CONFIGURACION['PAGINA_ESTATICA_DIRECTORIO']:

            directorio = self.CONFIGURACION['PAGINA_ESTATICA_DIRECTORIO']
            for x in range(1, len(trozos_URI)):
                directorio += "/" + trozos_URI[x]

            if os_path_isdir(directorio):
                if self.CONFIGURACION['INDEXAR_DIRECTORIOS']:
                    codigo_estado, contenido, nombre_archivo = almacenamiento.leer_directorio(
                        directorio, trozos_URI,
                        self.CONFIGURACION['PAGINA_ESTATICA_ARCHIVO'],
                        self.CONFIGURACION[
                            'BUSCAR_PAGINA_ESTATICA_AL_INDEXAR_DIRECTORIO'])
                else:
                    codigo_estado, contenido, nombre_archivo = 403, False, False
                self.devolver_estado(codigo_estado, contenido, nombre_archivo)

            elif os_path_isfile(directorio):
                codigo_estado, contenido, nombre_archivo = almacenamiento.leer_archivo(
                    directorio, trozos_URI)
                self.devolver_estado(codigo_estado, contenido, nombre_archivo)

            else:
                self.devolver_estado(404)

        else:
            try:
                datos_almacenados = almacenamiento.leer_json(
                    self.CONFIGURACION, trozos_URI)

                # Si existen parametros, por cada parametro y por cada dato recuperado se comprueba, solo funciona con objetos
                if len(parametros) > 0:
                    if not isinstance(datos_almacenados, list):
                        self.devolver_estado(400)
                        return True

                    for parametro in parametros:
                        indice = 0

                        while indice < len(datos_almacenados):

                            if not isinstance(datos_almacenados[indice], dict):
                                self.devolver_estado(400)
                                return True

                            if not str(datos_almacenados[indice][
                                    parametro.split("=")[0]]) == str(
                                        parametro.split("=")[1]):
                                datos_almacenados.remove(
                                    datos_almacenados[indice])

                            else:
                                indice += 1

                # Si existen parametros especiales y estan permitidos, se recorren y ejecutan
                if len(parametros_especiales) > 0:
                    if not self.CONFIGURACION['PARAMETROS_ESPECIALES']:
                        self.devolver_estado(
                            403, 'PARAMETROS_ESPECIALES_DESACTIVADOS')
                        return True

                    if not isinstance(datos_almacenados, list):
                        self.devolver_estado(400)
                        return True

                    for parametro in parametros_especiales:
                        parametro_especial = parametro.split("=")[0]
                        try:
                            valor_parametro_especial = parametro.split("=")[1]
                        except:
                            valor_parametro_especial = None  # Para casos como ?_total

                        if parametro_especial in acciones_parametros_especiales.keys(
                        ):
                            datos_almacenados = eval(
                                acciones_parametros_especiales[
                                    parametro_especial])

                self.devolver_estado(200, datos_almacenados, es_json=True)

            except Exception as e:
                self.captura_error(str(e), cod_error=404, msg_error=str(e))
示例#25
0
def collect(infolder,
            line  = comment_LINE,
            block = comment_BLOCK,
            tags  = WORDS,
            marks = MARKS,
            include=INCLUDE,
            exclude=EXCLUDE,
            overwrite=False):
    # Process block comment marks
    blocks_open, blocks_close = comment_block_comments(block)

    # TODO: Make hidden files OS independent, probably using
    #       https://docs.python.org/3.4/library/tempfile.html ?

    # FIXME: for some reason, if a comment-type ever existed in the TODO
    #        file, but after a while its posts are all gone, the keyword
    #        still remains there, according to the current TODO file,
    #        which still have the "QUESTIONS" keyword, and comment

    # TODO: Add explicit-remove/browsing capabilities of the .*_cache files
    #       (for example: if git reverted changes --> remove hash from cache file)
    #       The best solution would be a complete CLI tool, to read and manage
    #       and use the cutils command line tools

    # Compile regular expression patterns
    pattern1 = re_compile(_COMMENT.format(r'|'.join(map(comment_escape, line)),
                                          blocks_open,
                                          r'|'.join(map(comment_escape, tags)),
                                          r'|'.join(map(comment_escape, marks)),
                                          blocks_close),
                         flags=re_IGNORECASE | re_DOTALL | re_MULTILINE | re_VERBOSE)
    pattern2 = re_compile(r'\n')

    # Get previously generated collection of all posts
    COLLECTED = os_path_join(infolder, '.ccom_todo')
    try:
        with open(COLLECTED, 'rb') as file:
            collected = pickle_load(file)
    except (FileNotFoundError, EOFError):
        collected = table_Table(row=OrderedDict)

    # Clear cache -- remove all non-existing files
    for filepath in collected.rows():
        if not os_path_isfile(filepath):
            del collected[filepath]

    # Exception containers
    except_dirs  = []  # relative path to dir from root
    except_files = []  # relative path to file from root
    except_names = []  # filename (with extension) anywhere
    except_exts  = []  # extension anywhere

    # If 'exclude' is dictionary like object
    try:
        _empty = ()
        # Exceptions relative to root
        for key, container in zip(('folders', 'files'),
                                  (except_dirs, except_files)):
            container.extend(os_path_join(infolder, p) for p in exclude.get(key, _empty))
        # Exceptions anywhere
        for key, container in zip(('names', 'extensions'),
                                  (except_names, except_exts)):
            container.extend(exclude.get(key, _empty))
    # If 'exclude' is an iterable object
    except AttributeError:
        except_names = exclude

    # Include containers
    permit_names = []  # filename (with extension) anywhere
    permit_exts  = []  # extension anywhere

    # If 'include' is dictionary like object
    try:
        _empty = ()
        # Includes anywhere
        for key, container in zip(('names', 'extensions'),
                                  (permit_names, permit_exts)):
            container.extend(include.get(key, _empty))
    # If 'include' is an iterable object
    except AttributeError:
        permit_names = include

    # Scan through all files and folders
    with check_Checker(infolder, file='.ccom_cache') as checker:
        for root, dirs, filenames in os_walk(infolder):
            # If skip this folder and all subfolders
            if root in except_dirs:
                dirs.clear()
                continue
            # Check all files in folder
            for filename in filenames:
                filepath = os_path_join(root, filename)[2:]
                # If skip this exact file
                if filepath in except_files:
                    continue
                name, extension = os_path_splitext(filename)
                # If file or extension is not banned and it is on the
                # white-list and it changed since last time checked and
                # this is not and overwrite-call
                if (filename not in except_names and
                    extension not in except_exts and
                    (extension in permit_exts or filename in permit_names) and
                    checker.ischanged(filepath) and
                    not overwrite):
                    with open(filepath, encoding='utf-8') as file:
                        _search(collected, pattern1, pattern2,
                                file.read(), filepath, marks)

    # Save collection of all posts
    with open(COLLECTED, 'wb') as file:
        pickle_dump(collected, file, pickle_HIGHEST_PROTOCOL)

    # Open the todo file and write out the results
    with open('TODO', 'w', encoding='utf-8') as todo:
        # Make it compatible with cver.py
        todo.write('## INFO ##\n'*2)
        # Format TODO file as yaml
        for key in itertools_chain(tags, marks.values()):
            KEY = key.upper()
            try:
                types = collected[KEY].items()
                len_pos = todo.tell()
                # Offset for separator comment and
                # leading and trailing new lines
                todo.write(' '*82)
                todo.write('{}:\n'.format(KEY))
                index = 1
                for filename, posts in types:
                    for i, (linenumber, content) in enumerate(posts, start=index):
                        todo.write(_ITEM.format(msg='\n'.join(content),
                                                index=i,
                                                short=_SHORT,
                                                long=_SHORT*2,
                                                sep='- '*38,
                                                file=filename,
                                                line=linenumber))
                    index = i + 1
                todo.write('\n')
                # Move back to tag separator comment
                todo.seek(len_pos)
                todo.write('\n#{:-^78}#\n'.format(
                    ' {} POSTS IN {} FILES '.format(index - 1, len(types))))
                # Move back to the end
                todo.seek(0, 2)
            except KeyError:
                continue
        print('CCOM: placed {!r}'.format(os_path_join(infolder, 'TODO')))
示例#26
0
    def parse_NYC_snd_datafile(self,fpath='',with_regex=True):

        from json import dumps as j_dumps
        from os.path import isfile as os_path_isfile

        f                                   =   open(src_fpath,'r')
        x                                   =   f.readlines()
        f.close(                                )


        print '\n',SND_NON_S_PATH,'\n',SND_S_PATH,'\n'

        # Characters Allowed:  [a-z0-9A-Z],[-&'/]

        # Borough:
            # 1 = MN
            # 2 = Bronx
            # 3 = Brooklyn
            # 4 = Queens
            # 5 = Staten Island

        ## GFT:
            # blank None of the below, e.g., either a name of a street that has no hyphenated house numbers
                # and no part of which is within
                # Edgewater Park, or a name of a tunnel, etc
            # A Addressable place name
            # B Name of bridge
            # C Business Improvement Districts
            # D Duplicate Address Pseudo-Street name (DAPS)
            # E Street is entirely within Edgewater Park
            # F Street is partially within Edgewater Park
            # G Non-Addressable Place name (NAP) of a complex
            # H All house numbers on this street are hyphenated
            # I Intersection Name
            # J Non-Physical Boundary Features
            # M Some house numbers on this street are hyphenated, some are not
            # N NAP of a 'stand-alone' geographic feature (not a complex
                # or a constituent entity of a complex)
            # O Shore Line
            # P Pseudo-street name (BEND, CITY LIMIT, DEAD END and their aliases)
            # R Rail line
            # S Front-truncated street name
            # T Tunnel
            # U Miscellaneous Structures
            # X NAP of a constituent entity of a complex Z Ramp

            ## ignore B,G,N,O,R,T
            ## watch H,M


        def get_snd_non_s(x):
            # non-type 'S' / size 34
            a = {
        #             'rec_type':x[0:1],
                    'boro':x[1:2],                # (see "boro" below)
                    'stname':x[2:34].strip(),               # full street name
                    'primary_flag':x[34:35].strip(),        # P(=primary) or V(=non-primary)
                    'principal_flag':x[35:36].strip(),      # F or S
                    'boro':x[36:37].strip(),                # 1,2,3,4,5
                    'sc5':x[36:42].strip(),
                    'lgc':x[42:44].strip(),                 # Local Group Code
                    'spv':x[44:47].strip(),                 # Spelling Variation
            #         filler = x[47:49]
                    'numeric_ind':x[49:50].strip(),         # Numeric Name Indicator
                    'GFT':x[50:51].strip(),                 # (see description above)
        #             'len_full_name':x[51:53].strip(),
                    'full_stname':x[53:85].strip(),
                    'min_SNL':x[85:87].strip(),
                    'stn20':x[87:107].strip(),
                    'ht_name_type_code':x[107:108].strip(), # blank or R(= roadbed), G(= generic), U(= undivided)
            #         filler = x[109:200]
                }
            return a

        def get_snd_s(x):
            a = {
        #             filler = x[0:1]           # always '1'
        #             'rectype':x[0:1],
                    'boro':x[1:2].strip(),              # only 1 or 2 (MN and BX)
                    'stname':x[2:34].strip(),           # front truncated name
            #         filler = x[34:49]         # P or V
                    'numeric_ind':x[49:50].strip(),     # blank or N
                    'GFT':x[50:51],             # always 'S'
        #             'len_full_name':x[51:53].strip(),
                    'num_of_progens':x[53:54].strip(),  # either 1 or 2 ?
                    'progen_word_1':x[54:55].strip(),   # E or W
                    'progen_gft_1':x[55:56].strip(),
                    'progen_b10sc_1':x[56:67].strip(),
                    'sc5_1':x[56:62].strip(),
            #         filler = x[67:70]
                    'progen_word_2':x[70:71].strip(),   # E or W
                    'progen_gft_2':x[71:72].strip(),
                    'progen_b10sc_2':x[72:83].strip(),
                    'sc5_2':x[72:78].strip(),
            #         filler = x[83:86]
            #         filler = x[86:200]
                }
            return a

        non_s,s,xlen=[],[],len(x)

        for i in range(1,xlen):
            rec = re_sub(r'(\r\n)$',r'',x[i])
            if rec[34]=='P' or rec[34]=='V':    # non-'S'
                r = get_snd_non_s(rec)
                r['source_ln_num'] = i
                non_s.append(r)
            else:
                r = get_snd_s(rec)
                r['source_ln_num'] = i
                s.append(r)

        # print len(non_s),'rows of non-type S'
        # print len(s),'rows of S-type'
        # print xlen,'total rows in data'
        assert len(non_s)+len(s)==xlen-1

        # TESTING:
        # print x[1] # for non s-type
        # non_s[0]
        # --for s-type
        # print x[11]
        # s[0]

        p = j_dumps(non_s)
        df_non_s=pd.read_json(p)
        ns_cols = sorted(get_snd_non_s('').keys())
        df_non_s = df_non_s.ix[:,ns_cols]

        # -  Remove Blank Columns
        remove_cols = []
        # ---- PROVE THAT OK TO REMOVE 'min_SNL' b/c NO VALUES EXIST
        test_col='min_SNL'
        t=df_non_s[test_col].unique().tolist()
        assert True == (len(t)==1) == (t[0]=='')
        remove_cols.append(test_col)
        # ---- PROVE THAT OK TO REMOVE 'stn20' b/c NO VALUES EXIST
        test_col='stn20'
        t=df_non_s[test_col].unique().tolist()
        assert True == (len(t)==1) == (t[0]=='')
        remove_cols.append(test_col)
        # --
        df_non_s = df_non_s.drop(remove_cols,axis=1)



        print '\n',len(df_non_s),'non-S-Type records\n'
        # print df_non_s.head()
        assert False==os_path_isfile(SND_NON_S_PATH)
        df_non_s.to_csv(SND_NON_S_PATH)
        assert True==os_path_isfile(SND_NON_S_PATH)


        p = j_dumps(s)
        df_s=pd.read_json(p)
        s_cols = sorted(get_snd_s('').keys())
        df_s = df_s.ix[:,s_cols]
        l_funct = lambda s: 0 if len(str(s).strip())==0 else int(s)
        df_s['progen_b10sc_2'] = df_s.progen_b10sc_2.map(l_funct)
        df_s['sc5_2'] = df_s.sc5_2.map(l_funct)

        # -  Remove Blank Columns
        remove_cols = []
        # ---- PROVE THAT OK TO REMOVE 'min_SNL' b/c NO VALUES EXIST
        test_col='progen_gft_2'
        t=df_s[test_col].unique().tolist()
        assert True == (len(t)==1) == (t[0]=='')
        remove_cols.append(test_col)
        # --
        df_s = df_s.drop(remove_cols,axis=1)


        print '\n',len(df_s),'S-Type records\n'
        # print df_s.head()
        assert False==os_path_isfile(SND_S_PATH)
        df_s.to_csv(SND_S_PATH)
        assert True==os_path_isfile(SND_S_PATH)
        return 'success'
示例#27
0
    def parse_NYC_snd_datafile(self, fpath='', with_regex=True):

        from json import dumps as j_dumps
        from os.path import isfile as os_path_isfile

        f = open(src_fpath, 'r')
        x = f.readlines()
        f.close()

        print '\n', SND_NON_S_PATH, '\n', SND_S_PATH, '\n'

        # Characters Allowed:  [a-z0-9A-Z],[-&'/]

        # Borough:
        # 1 = MN
        # 2 = Bronx
        # 3 = Brooklyn
        # 4 = Queens
        # 5 = Staten Island

        ## GFT:
        # blank None of the below, e.g., either a name of a street that has no hyphenated house numbers
        # and no part of which is within
        # Edgewater Park, or a name of a tunnel, etc
        # A Addressable place name
        # B Name of bridge
        # C Business Improvement Districts
        # D Duplicate Address Pseudo-Street name (DAPS)
        # E Street is entirely within Edgewater Park
        # F Street is partially within Edgewater Park
        # G Non-Addressable Place name (NAP) of a complex
        # H All house numbers on this street are hyphenated
        # I Intersection Name
        # J Non-Physical Boundary Features
        # M Some house numbers on this street are hyphenated, some are not
        # N NAP of a 'stand-alone' geographic feature (not a complex
        # or a constituent entity of a complex)
        # O Shore Line
        # P Pseudo-street name (BEND, CITY LIMIT, DEAD END and their aliases)
        # R Rail line
        # S Front-truncated street name
        # T Tunnel
        # U Miscellaneous Structures
        # X NAP of a constituent entity of a complex Z Ramp

        ## ignore B,G,N,O,R,T
        ## watch H,M

        def get_snd_non_s(x):
            # non-type 'S' / size 34
            a = {
                #             'rec_type':x[0:1],
                'boro': x[1:2],  # (see "boro" below)
                'stname': x[2:34].strip(),  # full street name
                'primary_flag':
                x[34:35].strip(),  # P(=primary) or V(=non-primary)
                'principal_flag': x[35:36].strip(),  # F or S
                'boro': x[36:37].strip(),  # 1,2,3,4,5
                'sc5': x[36:42].strip(),
                'lgc': x[42:44].strip(),  # Local Group Code
                'spv': x[44:47].strip(),  # Spelling Variation
                #         filler = x[47:49]
                'numeric_ind': x[49:50].strip(),  # Numeric Name Indicator
                'GFT': x[50:51].strip(),  # (see description above)
                #             'len_full_name':x[51:53].strip(),
                'full_stname': x[53:85].strip(),
                'min_SNL': x[85:87].strip(),
                'stn20': x[87:107].strip(),
                'ht_name_type_code': x[107:108].strip(
                ),  # blank or R(= roadbed), G(= generic), U(= undivided)
                #         filler = x[109:200]
            }
            return a

        def get_snd_s(x):
            a = {
                #             filler = x[0:1]           # always '1'
                #             'rectype':x[0:1],
                'boro': x[1:2].strip(),  # only 1 or 2 (MN and BX)
                'stname': x[2:34].strip(),  # front truncated name
                #         filler = x[34:49]         # P or V
                'numeric_ind': x[49:50].strip(),  # blank or N
                'GFT': x[50:51],  # always 'S'
                #             'len_full_name':x[51:53].strip(),
                'num_of_progens': x[53:54].strip(),  # either 1 or 2 ?
                'progen_word_1': x[54:55].strip(),  # E or W
                'progen_gft_1': x[55:56].strip(),
                'progen_b10sc_1': x[56:67].strip(),
                'sc5_1': x[56:62].strip(),
                #         filler = x[67:70]
                'progen_word_2': x[70:71].strip(),  # E or W
                'progen_gft_2': x[71:72].strip(),
                'progen_b10sc_2': x[72:83].strip(),
                'sc5_2': x[72:78].strip(),
                #         filler = x[83:86]
                #         filler = x[86:200]
            }
            return a

        non_s, s, xlen = [], [], len(x)

        for i in range(1, xlen):
            rec = re_sub(r'(\r\n)$', r'', x[i])
            if rec[34] == 'P' or rec[34] == 'V':  # non-'S'
                r = get_snd_non_s(rec)
                r['source_ln_num'] = i
                non_s.append(r)
            else:
                r = get_snd_s(rec)
                r['source_ln_num'] = i
                s.append(r)

        # print len(non_s),'rows of non-type S'
        # print len(s),'rows of S-type'
        # print xlen,'total rows in data'
        assert len(non_s) + len(s) == xlen - 1

        # TESTING:
        # print x[1] # for non s-type
        # non_s[0]
        # --for s-type
        # print x[11]
        # s[0]

        p = j_dumps(non_s)
        df_non_s = pd.read_json(p)
        ns_cols = sorted(get_snd_non_s('').keys())
        df_non_s = df_non_s.ix[:, ns_cols]

        # -  Remove Blank Columns
        remove_cols = []
        # ---- PROVE THAT OK TO REMOVE 'min_SNL' b/c NO VALUES EXIST
        test_col = 'min_SNL'
        t = df_non_s[test_col].unique().tolist()
        assert True == (len(t) == 1) == (t[0] == '')
        remove_cols.append(test_col)
        # ---- PROVE THAT OK TO REMOVE 'stn20' b/c NO VALUES EXIST
        test_col = 'stn20'
        t = df_non_s[test_col].unique().tolist()
        assert True == (len(t) == 1) == (t[0] == '')
        remove_cols.append(test_col)
        # --
        df_non_s = df_non_s.drop(remove_cols, axis=1)

        print '\n', len(df_non_s), 'non-S-Type records\n'
        # print df_non_s.head()
        assert False == os_path_isfile(SND_NON_S_PATH)
        df_non_s.to_csv(SND_NON_S_PATH)
        assert True == os_path_isfile(SND_NON_S_PATH)

        p = j_dumps(s)
        df_s = pd.read_json(p)
        s_cols = sorted(get_snd_s('').keys())
        df_s = df_s.ix[:, s_cols]
        l_funct = lambda s: 0 if len(str(s).strip()) == 0 else int(s)
        df_s['progen_b10sc_2'] = df_s.progen_b10sc_2.map(l_funct)
        df_s['sc5_2'] = df_s.sc5_2.map(l_funct)

        # -  Remove Blank Columns
        remove_cols = []
        # ---- PROVE THAT OK TO REMOVE 'min_SNL' b/c NO VALUES EXIST
        test_col = 'progen_gft_2'
        t = df_s[test_col].unique().tolist()
        assert True == (len(t) == 1) == (t[0] == '')
        remove_cols.append(test_col)
        # --
        df_s = df_s.drop(remove_cols, axis=1)

        print '\n', len(df_s), 'S-Type records\n'
        # print df_s.head()
        assert False == os_path_isfile(SND_S_PATH)
        df_s.to_csv(SND_S_PATH)
        assert True == os_path_isfile(SND_S_PATH)
        return 'success'