示例#1
0
def ckpt_up_to_date(local_ckpt_prefix, bucket, folder):
    fm = FileManager(bucket)
    ckpt_names = fm.get_folder_list(folder)
    most_recent_prefix = most_recent_ckpt_from_list(ckpt_names)
    if not local_ckpt_prefix == most_recent_prefix:
        return False
    return True
示例#2
0
    def main_menu(self):
        print("Main menu:")
        while True:
            print('Please select one of the following options:\n'
                  '(R -> register, E -> exit, P -> print, F -> find)')
            user_input = input()

            if user_input == 'r' or user_input == 'R':
                std = self.register_student()
                self._students.append(std)
                print("Register a new student...")
                time.sleep(1)
                FileManager.write_file(r'files\students.txt', std)
                print("Done.")
            elif user_input == 'f' or user_input == 'F':
                self.find_student()
            elif user_input == 'p' or user_input == 'P':
                printer = Printer(self._students)
                # self.print_all_students()
                printer.show_printer_menu()
                printer.print_sorted_list(printer.get_user_input())
                self.main_menu()
            else:
                print("Exiting program...")
                time.sleep(1)
                exit()
示例#3
0
 def __init__(self):
     QObject.__init__(self)
     ParallelWorker.__init__(self)
     settings = Settings()
     FileManager.__init__(self, sites=settings.sites_path)
     self.options["multiprocess"] = False
     self.to_save = None
示例#4
0
def read_cleaned_docs(documents_path, category):
    """
    Legge i documenti relativi alla categoria in esame ed effettua la pulizia del testo.

    :param documents_path: lista contenente i percorsi alle cartelle in cui risiedono i documenti del dataset e i
    documenti caricati dall'utente.
    :param category: nome della categoria di interesse. Il nome della cartella contenente i file relativi alla categoria
    deve coincidere con quello della relativa classe presente nell'ontologia.
    :return: dizionario contenente i documenti letti. In particolare:
        - La chiave corrisponde al seguente nome '(category)_doc_(nome_del_documento)';
        - Il valore corrisponde al testo pulito.
    """

    file_manager = FileManager()

    # Collezione dei documenti contenuti nella directory esaminata
    docs = {}

    for path in documents_path:
        # Path della cartella contenente i documenti relativi alla categoria esaminata
        cat_dir_path = path + '/' + category

        if os.path.exists(cat_dir_path):
            # Elenco dei file contenuti nella directory esaminata
            files = os.listdir(cat_dir_path)

            # Lettura file e pulizia del testo contenuto
            for file_name in files:
                text = file_manager.read_file(cat_dir_path + '/' + file_name)
                docs[category + '_doc_' +
                     file_name.split('.')[0]] = clean_text(text)
        else:
            os.makedirs(cat_dir_path)

    return docs
示例#5
0
 def __init__(self, file):
     fm = FileManager(file)
     fm.read_param()
     self.size = fm.size
     self.gamma = fm.gamma
     self.noise = fm.noise
     self.table = fm.table
     self.count = 0
示例#6
0
 def __init__(self, dataset_path, user_path, cat_docs, keywords_extractor,
              text_classifier, entity_recognizer):
     self._dataset_path = dataset_path
     self._user_path = user_path
     self._cat_docs = cat_docs
     self._ke = keywords_extractor
     self._tc = text_classifier
     self._er = entity_recognizer
     self._file_manager = FileManager()
示例#7
0
def get_matrix_from_annotations(s3_bucket_name, annotation_path):
    if not annotation_path.endswith('.json'):
        raise ValueError('Cannot read from non json annotation file')
    fm = FileManager(s3_bucket_name)
    im_data = fm.read_image_dict('', annotation_path)
    M = im_data.get('warp', {}).get('M', {})
    if not M:
        return None

    M = np.array(M, dtype=np.float32).reshape(9)
    return M
示例#8
0
def test():
    hmm = import_table()
    print(hmm.stage_table['AWARE'].next_state)
    print(hmm.emission_table)

    fm = FileManager(
        "D:\\PyProject\\AI_HMM\\AI_HMM\\examples\\hmm_customer_1586733276720.txt"
    )
    emission_list = fm.read_emissions()
    print(emission_list)
    print(hmm.assume('ZERO', emission_list))
示例#9
0
    def _get_file_content(self, filepath):
        """
        Legge il contenuto di un documento testuale.
        :param filepath: relative path del file da leggere
        :return: contenuto testuale del file.
        """
        fileManager = FileManager()

        # Project Root path
        ROOT_DIR = os.path.abspath(os.path.dirname(__file__))

        # Lettura file e restituzione del contenuto
        return fileManager.read_file(os.path.join(ROOT_DIR, filepath))
示例#10
0
    def __init__(self, ontology_builder, keyword_extractor, text_classifier,
                 entity_recognizer, dataset_path, user_path):
        self._ob = ontology_builder
        self._ke = keyword_extractor
        self._er = entity_recognizer
        self._dataset_path = dataset_path
        self._user_path = user_path

        # Dizionario contenente la lista di documenti associata ad ogni categoria. In particolare:
        # - chiave: coincide con il nome della categoria;
        # - valore: rappresenta un ulteriore dizionazio in cui le coppie (chiave, valore) coincidono con le coppie
        # (nome_documento, testo_documento).
        self._cat_docs = {}
        # Gestore delle operazioni di lettura e scrittura su file.
        self._file_manager = FileManager()
        # Processa i documenti forniti in input al sistema dall'utente.
        self._tp = TextProcessor(dataset_path, user_path, {},
                                 keyword_extractor, text_classifier,
                                 entity_recognizer)

        # Inizializzo il sistema.
        self._init_system()
        # Recupero l'ontologia.
        self._onto = self._ob.get_onto()
        # Creo un'istanza di Query Builder per eseguire query SPARQL.
        self._query_builder = QueryBuilder()
 def test_init(self):
     _ = FileManager(file_src_dir=self.FILE_SRC_DIR,
                     symbolic_link_dst=self.SYMBOLIC_LINK_DST)
     assert os.path.exists(f"{self.FILE_SRC_DIR}/published")
     assert os.path.exists(f"{self.SYMBOLIC_LINK_DST}/unpublished")
     shutil.rmtree(self.FILE_SRC_DIR)
     os.remove(self.SYMBOLIC_LINK_DST)
示例#12
0
		def _load_json_files(self):
			arrayjson_kpis = []
			for kpi in Config.JOBS_NAMES:
				for date in self.daterange:
					data = FileManager.read_from_json_file(Config.WORKDIRECTORY_FOR_KPIS.format(date=date), kpi)
					if data:
						arrayjson_kpis.append(data)
			return arrayjson_kpis
示例#13
0
def download_ckpt_to_dir(bucket, folder, dest_dir):
    '''
    download most recent ckpt files (index, meta, data) from s3 bucket and directory.
    dest: local folder to put files into
    returns: path/to/ckpt_prefix
    '''

    fm = FileManager(bucket)
    # need .data and .index files (don't necessarily need meta, but will download)
    ckpt_names = fm.get_folder_list(folder)
    most_recent_ckpt_prefix = most_recent_ckpt_from_list(ckpt_names)
    print('Downloading ckpts from s3: {}'.format(most_recent_ckpt_prefix))
    ckpt_file_names = [x for x in ckpt_names if most_recent_ckpt_prefix in x]

    path_and_prefix = dest_dir + '/' + most_recent_ckpt_prefix
    for key in ckpt_file_names:
        dest_filepath = dest_dir + '/' + key
        fm.download_file(folder, key, dest_filepath)
    return path_and_prefix
示例#14
0
 def run(self, collections):
     Log.Instance().appendFinalReport(
         "\nStarting WRITING stage...\n===================")
     for collection in collections:
         Log.Instance().append("Writing " +
                               collection['etl_meta']['label'] + " for " +
                               collection['etl_meta']['timestamp'] + "...")
         if collection['etl_meta']['is_kpi']:
             filepath = Config.WORKDIRECTORY_FOR_KPIS
         else:
             filepath = Config.WORKDIRECTORY_FOR_TEMPS
         filepath = filepath.format(
             date=collection['etl_meta']['timestamp'][0:10])
         FileManager.create_if_dont_exist(filepath)
         print(collection)
         FileManager.write_json_to_file(filepath,
                                        collection['etl_meta']['label'],
                                        collection)
     Log.Instance().appendFinalReport(
         "===================\nWRITING stage ended.")
示例#15
0
def create_app():
    """
    Initialize Flask and setup database

    """
    project_dir = os.path.dirname(os.path.abspath(__file__))
    app = CustomFlask(__name__)
    app.config.from_mapping(
        SECRET_KEY=os.urandom(16),
        CACHED_TIME=time.time(),
        THEME_DIR="./data/theme.json",
        REAL_MARKDOWN_DIR=os.path.join(project_dir, "./data/topaz_docs"),
        MARKDOWN_DIR="./data/docs",
        FILE_MANAGER=FileManager(
            file_src_dir=os.path.join(project_dir, "./data/topaz_docs") + "/",
            symbolic_link_dst="./data/topaz_docs"))

    CORS(app, supports_credentials=True)

    # Init database
    import db
    db.init_db()

    # Init github cache
    from utils.github import update_public_repos
    db_conn = db.get_db()
    update_public_repos(db_conn)

    # Init meidum cache
    db_conn = db.get_db()
    with open(app.config["THEME_DIR"], "r") as f:
        data = json.load(f)
        medium_url = data["nav_bar_footer"]["medium"]["link"]
    from utils.medium import update_articles
    update_articles(db_conn, medium_url)

    # Register blueprints
    import home
    app.register_blueprint(home.bp)

    import auth
    app.register_blueprint(auth.bp)

    # Initialize login manager
    login_manager = LoginManager()
    login_manager.init_app(app)

    @login_manager.user_loader
    def user_loader(username):
        db_conn = db.get_db()
        return get_user(db_conn, username)

    return app
示例#16
0
def extract_training_data(s3_bucket_name, top_dir):
    fm = FileManager(s3_bucket_name)

    annotation_dir = os.path.join(top_dir, 'annotations')
    frames_dir = os.path.join(top_dir, 'frames')
    
    annotations_names = fm.get_folder_list(annotation_dir, extension_filter='json')
    frame_names = [x for x in fm.get_folder_list(frames_dir) if len(x.split('.')) == 2]
    frame_exts = set([x.split('.')[1] for x in frame_names])
    frame_names = set(frame_names)
    
    data = []
    for filename in annotations_names:
        # if not json file, skip
        if not filename.split('.')[1] == 'json':
            continue
        # strange behavior with os.path.join
        # depending on whether or not looking in s3 bucket
        if s3_bucket_name is not None:
            annotation_filename = annotation_dir + filename
        else:
            annotation_filename = os.path.join(annotation_dir, filename)
        M = get_matrix_from_annotations(s3_bucket_name, annotation_filename)
        if M is not None:
            filename_noext = filename.split('.')[0]
            for ext in frame_exts:
                frame_name = filename_noext + '.' + ext
                if frame_name in frame_names:
                    # same strange behavior 
                    if s3_bucket_name is not None:
                        frame_path_from_bucket = top_dir + '/frames' + frame_name
                    else:
                        frame_path_from_bucket = os.path.join(top_dir, 'frames', frame_name)
                    data.append((frame_path_from_bucket, M))
    
    return data
示例#17
0
def api_proxy(action: str, kwargs: dict) -> str:
    serving_storage_dir = os.getenv('SERVING_STORAGE_DIR', tempfile.gettempdir())
    logging.info(f'serving_storage_dir is {serving_storage_dir}')
    model_config_dir = os.path.join(serving_storage_dir, 'configs')
    model_storage_dir = os.path.join(serving_storage_dir, 'models')
    model_config_manager = ModelConfigManager(model_config_dir)
    file_manager = FileManager(model_storage_dir)
    
    try:
        if action == 'list':
            configs = model_config_manager.list()
            return json.dumps(configs)
        elif action == 'register':
            model_config_manager.register(**kwargs)
        elif action == 'update':
            model_base_path = file_manager.copy_model_to_serving(**kwargs)
            kwargs['model_base_path'] = model_base_path
            model_config_manager.update(**kwargs)
        elif action == 'delete':
            file_manager.delete_model_from_serving(**kwargs)
            model_config_manager.delete(**kwargs)
        return 'Done.'
    except Exception as err:
        return f'Failed to {action}, because {err}, {traceback.format_exc()}'
示例#18
0
def main():
    print("Testing program: Student registration\n")

    students = FileManager.read_file(r'files\students.txt')
    menu = Menu(students)
    menu.main_menu()
if __name__ == '__main__':
    server = 'https://www.80s.tw/'
    url = 'https://www.80s.tw/movie/list'
    headers = {
        'Referer':
        'https://www.80s.tw/movie/list',
        'User-Agent':
        'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Mobile Safari/537.36'
    }
    r = requests.get(url=url, headers=headers)
    bf = BeautifulSoup(r.text, 'lxml')
    clearfix = bf.find(class_='me1 clearfix')
    clearfix_bf = BeautifulSoup(str(clearfix), 'lxml')
    div_a = clearfix_bf.find_all('a')

    fm = FileManager()

    for a in div_a:
        a_bf = BeautifulSoup(str(a), 'lxml')
        href = a_bf.a.get('href')
        url_info = server + href
        print('详情链接', server + href)  #迅雷谜中电影详情链接
        info = requests.get(url=url_info, headers=headers)

        info_bf = BeautifulSoup(info.text, 'lxml')
        minfo = info_bf.find('div', class_='clearfix', id='minfo')
        img_info = info_bf.find('div', class_='img')

        name = ''
        src_url = ''
        if img_info is not None:
示例#20
0
from utils.display import Display
from utils.file_manager import FileManager
from utils.warp_tools import *
from utils.rink_specs import HockeyRink
import random
import cv2
fm = FileManager('bsivisiondata')
d = Display()

annotations = fm.get_folder_list('PHI-PIT_6m-8m/annotations',
                                 extension_filter='json')
# random.shuffle(annotations)
for f in annotations:
    print f
    im_dict = fm.read_image_dict('PHI-PIT_6m-8m/annotations', f)
    if not 'warp' in im_dict:
        continue

    imname = f.split('.')[0] + '.png'
    im = fm.read_image_file('PHI-PIT_6m-8m/frames', imname)
    im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)

    H = np.array(im_dict['warp']['M'])
    hr = HockeyRink()

    scaled_H = scale_homography(H, 600, 300)
    H1280 = scale_homography(H, 1280, 720)
    '''
    NEEDED TO BE RESIZING IMAGES BEFORE CALLING WARP!!!
    '''
示例#21
0
class TextProcessor:
    """ Processa un testo fornito in input al sistema dall'utente. """
    def __init__(self, dataset_path, user_path, cat_docs, keywords_extractor,
                 text_classifier, entity_recognizer):
        self._dataset_path = dataset_path
        self._user_path = user_path
        self._cat_docs = cat_docs
        self._ke = keywords_extractor
        self._tc = text_classifier
        self._er = entity_recognizer
        self._file_manager = FileManager()

    def set_cat_docs(self, cat_docs):
        self._cat_docs = cat_docs

    def process_text(self, text):
        """
        Prende un testo come parametro di input e utilizza le API di Meaning Cloud per effettuare l'estrazione
        della categoria di appartenenza (con relativo score) e delle API di Dandelion per l'estrazione delle entità
        contenute. Inoltre, utilizzando il TFIDF viene effettuata anche l'estrazione delle relative keywords.

        :param text: testo da processare
        :return:
            - cat: dizionario contenente il nome della categoria in cui è stato classificato il documento e il relativo
            score;
            - fname: nome del file in cui è stato salvato il testo passato come parametro in ingresso;
            - doc_keys: keywords del nuovo documento;
            - doc_ents: entità contenute nel nuovo documento;
        """

        # Uso del classificatore (API Meaning Cloud) per recuperare la categoria a cui appartiene il documento
        # insieme alla relativa percentuale di appartenenza (score).
        cat = self._tc.get_category(text)
        cat_name = cat['name']

        # Salvataggio documento nella relativa cartella utente.
        fname = self._save_doc(cat_name, text)

        # Pulizia testo
        cleaned_text = clean_text(text)
        # Estrazione keywords associate al testo
        doc_keys = self._extract_keywords(cat_name, fname, cleaned_text)
        # Estrazione entità associate al testo
        doc_ents = self._er.get_entities(cleaned_text)

        return cat, fname, doc_keys, doc_ents

    def _save_doc(self, cat_name, text):
        """
        Salvataggio del documento nella cartella utente relativa alla categoria in cui è stato classificato il documento.

        :param cat_name: categoria a cui appartiene il documento. Il nome della cartella in cui salvare il documento
        deve coincidere con il nome della categoria estratta dal classificatore.
        :param text: testo da salvare nel documento.
        :return: nome del file in cui è stato salvato il testo.
        """

        user_cat_dir_path = self._user_path + '/' + cat_name

        # Se non esiste la cartella contenente i file caricati dall'utente, relativi alla categoria in esame, la creo.
        if not os.path.exists(user_cat_dir_path):
            os.makedirs(user_cat_dir_path)

        # Costruzione del nome da assegnare al documento in cui salveremo il testo passato come parametro di ingresso.
        # Conto i file contenuti all'interno della cartella relativa alla categoria a cui appartiene il documento
        num_docs = len(os.listdir(user_cat_dir_path))
        # Aggiungo 1 e ottengo, così, il nome da assegnare al nuovo documento.
        fname = 'user_' + str(num_docs + 1) + '.txt'

        self._file_manager.write_file(user_cat_dir_path + '/' + fname, text)
        return fname

    def _extract_keywords(self, cat_name, fname, cleaned_text):
        """
        Estrazione delle keyword relative al testo passato come parametro di input.

        :param cat_name: nome della categoria a cui appartiene il testo classificato.
        :param fname: nome del file in cui è stato salvato il testo inziale.
        :param cleaned_text: testo pulito.
        :return: keywords relative al documento in esame.
        """

        # Se non si dispone della collezione di documenti relativi alla categoria in esame, viene effettuata la relativa
        # lettura e la pulizia.
        if not cat_name in self._cat_docs:
            print("lettura documenti...", end=" ")
            self._cat_docs[cat_name] = read_cleaned_docs(
                [self._dataset_path, self._user_path], cat_name)
            print("completata.")

        # Recupero tutti i documenti relativi alla categoria a cui appartiene il nuovo documento
        docs = self._cat_docs[cat_name]

        # Estrazione delle keywords relative al documento in esame
        doc_istance_name = cat_name + '_doc_' + fname.split('.')[0]
        docs[doc_istance_name] = cleaned_text
        doc_keys = self._ke.extract(docs)[doc_istance_name]

        return doc_keys
示例#22
0
from utils.file_manager import FileManager

SERVER_PORT = "28888"

MAX_SIM_THREADS = 2
DEFAULT_QUEUE_CHECK_INTERVAL = 2
DEFAULT_THREAD_CHECK_INTERVAL = .2
DEFAULT_REGION = "us"

CONTROLLER_MODULES = [
    'rest_controllers',
    'socket_controllers',
]

# Logger config
fm = FileManager()
logger_settings = {
    "DEFAULT_DEBUG": fm.stdout,
    "DEFAULT_LOG": fm.stdout,
    "DEFAULT_ERR": fm.stderr,
    "DEFAULT_WARN": fm.stderr,
    "DEFAULT_NULL": fm.null,
    "INCLUDE": {
                "debug": False,
                "log": True,
                "err": True,
                "warn": True,
                "null": True,
                },
    "TIMESTAMP": True,
}
 def fm(self):
     yield FileManager(file_src_dir=self.FILE_SRC_DIR,
                       symbolic_link_dst=self.SYMBOLIC_LINK_DST)
     shutil.rmtree(self.FILE_SRC_DIR)
     os.remove(self.SYMBOLIC_LINK_DST)
示例#24
0
from utils.file_manager import FileManager
from utils.engine import Engine

B, L, D, libraries, books_scores, picked_books = FileManager.read_file('a_example.txt')


engine = Engine(libraries=libraries, D=D, books_scores=books_scores, picked_books=picked_books)
output: list = engine.start()

FileManager.write_file('a.txt', output)