Exemplo n.º 1
0
def mismas_features_distinto_humor(corpus):
    print("Buscando tweets con mismos valores de features pero distinto de humor...")

    humoristicos = [tweet for tweet in corpus if tweet.es_humor]
    no_humoristicos = [tweet for tweet in corpus if not tweet.es_humor]

    res = []

    bar = IncrementalBar("Buscando en tweets\t\t", max=len(humoristicos) * len(no_humoristicos),
                         suffix=SUFIJO_PROGRESS_BAR)
    bar.next(0)
    for tweet_humor in humoristicos:
        for tweet_no_humor in no_humoristicos:
            if tweet_humor.features == tweet_no_humor.features:
                res.append((tweet_humor, tweet_no_humor))
                if tweet_humor.texto_original == tweet_no_humor.texto_original:
                    print("-----MISMO TEXTO ORIGINAL------")
                if tweet_humor.texto == tweet_no_humor.texto:
                    print("----------MISMO TEXTO----------")
                if tweet_humor.id == tweet_no_humor.id:
                    print("-----------MISMO ID------------")
                if tweet_humor.cuenta == tweet_no_humor.cuenta:
                    print("----------MISMA CUENTA---------")
                print('')
                print(tweet_humor.id)
                print(tweet_humor.texto)
                print("------------")
                print(tweet_no_humor.id)
                print(tweet_no_humor.texto)
                print("------------")
                print('')
            bar.next()
    bar.finish()

    return res
Exemplo n.º 2
0
    def crawl(self):
        n = MIN_NUMBER_OF_DOCS
        startingURL = START_PAGES
        os.makedirs(AFTER_CRAWL_BASE_DIR, exist_ok=True)
        self.n = n
        for sURL in startingURL:
            try:
                self.queue.extend(self.parseProfilePage(sURL))
            except:
                print('cannot parse profile page')
                with open(os.path.join(AFTER_CRAWL_BASE_DIR, ERRORS_FILE_NAME), "a") as ErrorFile:
                    ErrorFile.write('cannot parse profile page ',sURL,'\n')

        from progress.bar import IncrementalBar
        progress_bar = IncrementalBar('Crawling', max=MIN_NUMBER_OF_DOCS, suffix='%(percent)d%% %(remaining)s remaining - eta %(eta_td)s')
        threads = [CrawlThread(self, progress_bar) for t in range(NUMBER_OF_THREADS)]

        for t in threads:
            t.start()

        for t in threads:
            t.join()

        with open(os.path.join(AFTER_CRAWL_BASE_DIR, MAP_FILE_NAME), 'w') as outfile:
            json.dump(self.URLIDMap, outfile)

        progress_bar.finish()
Exemplo n.º 3
0
def run():
    for model, num_to_create in to_create.items():
        model_name = model._meta.model_name
        bar = Bar('Creating {}'.format(model_name), max=num_to_create)
        model_count = model.objects.count()
        create_f = globals()['populate_{}'.format(model_name)]

        for i in range(num_to_create):
            ident = '{}{}'.format(model_name, i)
            if i < model_count:
                unit = model.objects.all()[i]
            else:
                unit = create_f(model, i)
            globals()[ident] = unit
            bar.next()
        bar.finish()

    # This bit is special: Associate all rpms with the first repo,
    # for maximum relational query fun
    
    num_units = platform.ContentUnit.objects.count() 
    repo = globals()['repository0']
    bar = Bar('Adding all units to {} repo'.format(repo.slug))
    bar.max = num_units
    for unit in platform.ContentUnit.objects.all():
        repo.add_units(unit)
        bar.next()
    bar.finish()
Exemplo n.º 4
0
def save_frames(source, vertices, images_dir):
    print('Saving frames...')
    if not os.path.isdir(images_dir):
        os.makedirs(images_dir)
    bar = IncrementalBar(max=len(vertices))
    angle_change = 360 // len(vertices)
    for i, v in enumerate(vertices):
        update(source, v, angle_change=angle_change)
        mlab.savefig(filename=os.path.join(images_dir, frame_fn(i)))
        bar.next()
    bar.finish()
    mlab.close()
Exemplo n.º 5
0
 def calcular_feature_thread(self, tweets, nombre_feature, identificador):
     if len(tweets) > 0:
         bar = IncrementalBar("Calculando feature " + nombre_feature + ' - ' + unicode(identificador),
                              max=len(tweets),
                              suffix=SUFIJO_PROGRESS_BAR)
         bar.next(0)
         feature = self.features[nombre_feature]
         self.abortar_si_feature_no_es_thread_safe(feature)
         for tweet in tweets:
             tweet.features[feature.nombre] = feature.calcular_feature(tweet)
             bar.next()
         bar.finish()
Exemplo n.º 6
0
 def calcular_features_thread(self, tweets, identificador):
     if len(tweets) > 0:
         bar = IncrementalBar("Calculando features - " + unicode(identificador),
                              max=len(tweets) * len(self.features),
                              suffix=SUFIJO_PROGRESS_BAR)
         bar.next(0)
         for tweet in tweets:
             for feature in list(self.features.values()):
                 self.abortar_si_feature_no_es_thread_safe(feature)
                 tweet.features[feature.nombre] = feature.calcular_feature(tweet)
                 bar.next()
         bar.finish()
Exemplo n.º 7
0
def guardar_parecidos_con_distinto_humor(pares_parecidos_distinto_humor):
    with closing(open_db()) as conexion:
        with closing(conexion.cursor()) as cursor:
            consulta = "INSERT INTO tweets_parecidos_distinto_humor VALUES (%s, %s)" \
                       + " ON DUPLICATE KEY UPDATE id_tweet_no_humor = %s"

            bar = IncrementalBar("Guardando tweets parecidos\t", max=len(pares_parecidos_distinto_humor),
                                 suffix=SUFIJO_PROGRESS_BAR)
            bar.next(0)

            for tweet_humor, tweet_no_humor in pares_parecidos_distinto_humor:
                cursor.execute(consulta, (tweet_humor.id, tweet_no_humor.id, tweet_no_humor.id))
                bar.next()

            conexion.commit()
            bar.finish()
Exemplo n.º 8
0
def cross_validation_y_reportar(clasificador, features, clases, numero_particiones):
    skf = cross_validation.StratifiedKFold(clases, n_folds=numero_particiones)
    features = np.array(features)
    clases = np.array(clases)
    matrices = []
    medidas = defaultdict(list)

    bar = IncrementalBar("Realizando cross-validation\t", max=numero_particiones, suffix=SUFIJO_PROGRESS_BAR)
    bar.next(0)
    for entrenamiento, evaluacion in skf:
        clasificador.fit(features[entrenamiento], clases[entrenamiento])
        clases_predecidas = clasificador.predict(features[evaluacion])
        matriz_de_confusion = metrics.confusion_matrix(clases[evaluacion], clases_predecidas).flatten()
        matrices.append(matriz_de_confusion)
        for medida, valor_medida in calcular_medidas(*matriz_de_confusion).items():
            medidas[medida].append(valor_medida)
        bar.next()

    bar.finish()

    promedios = {}

    print('')
    print("Resultados de cross-validation:")
    print('')
    for medida, valor_medida in medidas.items():
        print("\t{medida: >18s}:\t{valor_medida}".format(medida=medida, valor_medida=valor_medida))
        promedio = np.mean(valor_medida)
        promedios[medida] = promedio
        delta = np.std(valor_medida) * 1.96 / math.sqrt(numero_particiones)
        print("Intervalo de confianza 95%:\t{promedio:0.4f} ± {delta:0.4f} --- [{inf:0.4f}, {sup:0.4f}]".format(
            promedio=promedio, delta=delta, inf=promedio - delta, sup=promedio + delta))
        print('')

    imprimir_matriz_metricas(
        promedios['Precision No humor'],
        promedios['Recall No humor'],
        promedios['F1-score No humor'],
        promedios['Precision Humor'],
        promedios['Recall Humor'],
        promedios['F1-score Humor'],
    )

    print('')
    print('')
    print('')
Exemplo n.º 9
0
    def render(self, ctx, invert=False, filename=None, pbar=False):
        """ Generate image of layer.

        Parameters
        ----------
        ctx : :class:`GerberContext`
            GerberContext subclass used for rendering the image

        filename : string <optional>
            If provided, save the rendered image to `filename`

        pbar : bool <optional>
            If true, render a progress bar
        """
        ctx.set_bounds(self.bounds)
        ctx._paint_background()

        if invert:
            ctx.invert = True
            ctx._clear_mask()
        for p in self.primitives:
            ctx.render(p)
        if invert:
            ctx.invert = False
            ctx._render_mask()

        _pbar = None
        if pbar:
            try:
                from progress.bar import IncrementalBar
                _pbar = IncrementalBar(
                    self.filename, max=len(self.primitives)
                )
            except ImportError:
                pbar = False

        for p in self.primitives:
            ctx.render(p)
            if pbar:
                _pbar.next()
        if pbar:
            _pbar.finish()

        if filename is not None:
            ctx.dump(filename)
Exemplo n.º 10
0
def guardar_parecidos_con_distinto_humor(pares_parecidos_distinto_humor):
    with closing(mysql.connector.connect(user=DB_USER, password=DB_PASS, host=DB_HOST, database=DB_NAME)) as conexion:
        with closing(conexion.cursor()) as cursor:
            consulta = (
                "INSERT INTO tweets_parecidos_distinto_humor VALUES (%s, %s)"
                + " ON DUPLICATE KEY UPDATE id_tweet_no_humor = %s"
            )

            bar = IncrementalBar(
                "Guardando tweets parecidos\t", max=len(pares_parecidos_distinto_humor), suffix=SUFIJO_PROGRESS_BAR
            )
            bar.next(0)

            for tweet_humor, tweet_no_humor in pares_parecidos_distinto_humor:
                cursor.execute(consulta, (tweet_humor.id, tweet_no_humor.id, tweet_no_humor.id))
                bar.next()

            conexion.commit()
            bar.finish()
Exemplo n.º 11
0
def _create_unfilled_voxel_data(
        model_id, edge_length_threshold=0.1, voxel_config=None,
        overwrite=False, example_ids=None):
    from template_ffd.data.ids import get_example_ids
    from shapenet.core import cat_desc_to_id
    from template_ffd.model import load_params
    import numpy as np
    from progress.bar import IncrementalBar
    if voxel_config is None:
        voxel_config = _default_config
    cat_id = cat_desc_to_id(load_params(model_id)['cat_desc'])
    if example_ids is None:
        example_ids = get_example_ids(cat_id, 'eval')
    mesh_dataset = get_inferred_mesh_dataset(model_id, edge_length_threshold)
    voxel_dataset = get_voxel_dataset(
        model_id, edge_length_threshold, voxel_config, filled=False,
        auto_save=False)
    if not overwrite:
        example_ids = [i for i in example_ids if i not in voxel_dataset]
    if len(example_ids) == 0:
        return
    print('Creating %d voxels for model %s' % (len(example_ids), model_id))

    kwargs = dict(
        voxel_dim=voxel_config.voxel_dim,
        exact=voxel_config.exact,
        dc=voxel_config.dc,
        aw=voxel_config.aw)

    with mesh_dataset:
        bar = IncrementalBar(max=len(example_ids))
        for example_id in example_ids:
            bar.next()
            mesh = mesh_dataset[example_id]
            vertices, faces = (
                np.array(mesh[k]) for k in ('vertices', 'faces'))
            binvox_path = voxel_dataset.path(example_id)
            # x, z, y = vertices.T
            # vertices = np.stack([x, y, z], axis=1)
            bio.mesh_to_binvox(
                vertices, faces, binvox_path, **kwargs)
        bar.finish()
Exemplo n.º 12
0
    def render_deferred(self):

        if not len(self._deferred):
            return

        print("Optimizing deferred elements")
        paths = self._optimize_deferred().paths

        print("Rendering Paths")
        try:
            from progress.bar import IncrementalBar
            _pbar = IncrementalBar(max=len(paths))
        except ImportError:
            _pbar = None

        for path in paths:
            self._render_path(path)
            if _pbar:
                _pbar.next()
        if _pbar:
            _pbar.finish()
Exemplo n.º 13
0
def cargar_parecidos_con_distinto_humor():
    with closing(open_db()) as conexion:
        # buffered=True así sé la cantidad que son antes de iterarlos.
        with closing(conexion.cursor() if DB_ENGINE == 'sqlite3' else conexion.cursor(buffered=True)) as cursor:
            consulta = """
            SELECT id_tweet_humor,
                   id_tweet_no_humor
            FROM   tweets_parecidos_distinto_humor
            """

            cursor.execute(consulta)

            pares_ids_parecidos_con_distinto_humor = []

            bar = IncrementalBar("Cargando tweets parecidos\t", max=cursor.rowcount, suffix=SUFIJO_PROGRESS_BAR)
            bar.next(0)

            for par_ids in cursor:
                pares_ids_parecidos_con_distinto_humor.append(par_ids)
                bar.next()

            bar.finish()

            return pares_ids_parecidos_con_distinto_humor
Exemplo n.º 14
0
def guardar_features(tweets, **opciones):
    nombre_feature = opciones.pop('nombre_feature', None)
    conexion = open_db()
    cursor = conexion.cursor()

    consulta = "INSERT INTO features VALUES (%s, %s, %s) ON DUPLICATE KEY UPDATE valor_feature = %s"

    if nombre_feature:
        mensaje = 'Guardando feature ' + nombre_feature
    else:
        mensaje = 'Guardando features'

    bar = IncrementalBar(mensaje, max=len(tweets), suffix=SUFIJO_PROGRESS_BAR)
    bar.next(0)

    for tweet in tweets:
        if nombre_feature:
            cursor.execute(
                consulta,
                (
                    tweet.id,
                    nombre_feature,
                    unicode(tweet.features[nombre_feature]),
                    unicode(tweet.features[nombre_feature])
                )
            )
        else:
            for nombre_feature, valor_feature in tweet.features.items():
                cursor.execute(consulta, (tweet.id, nombre_feature, unicode(valor_feature), unicode(valor_feature)))
        bar.next()

    conexion.commit()
    bar.finish()

    cursor.close()
    conexion.close()
Exemplo n.º 15
0
def guardar_features(tweets, **opciones):
    nombre_feature = opciones.pop("nombre_feature", None)
    conexion = mysql.connector.connect(user=DB_USER, password=DB_PASS, host=DB_HOST, database=DB_NAME)
    cursor = conexion.cursor()

    consulta = "INSERT INTO features VALUES (%s, %s, %s) ON DUPLICATE KEY UPDATE valor_feature = %s"

    if nombre_feature:
        mensaje = "Guardando feature " + nombre_feature
    else:
        mensaje = "Guardando features"

    bar = IncrementalBar(mensaje, max=len(tweets), suffix=SUFIJO_PROGRESS_BAR)
    bar.next(0)

    for tweet in tweets:
        if nombre_feature:
            cursor.execute(
                consulta,
                (
                    tweet.id,
                    nombre_feature,
                    unicode(tweet.features[nombre_feature]),
                    unicode(tweet.features[nombre_feature]),
                ),
            )
        else:
            for nombre_feature, valor_feature in tweet.features.items():
                cursor.execute(consulta, (tweet.id, nombre_feature, unicode(valor_feature), unicode(valor_feature)))
        bar.next()

    conexion.commit()
    bar.finish()

    cursor.close()
    conexion.close()
Exemplo n.º 16
0
def cargar_parecidos_con_distinto_humor():
    with closing(mysql.connector.connect(user=DB_USER, password=DB_PASS, host=DB_HOST, database=DB_NAME)) as conexion:
        # buffered=True así sé la cantidad que son antes de iterarlos.
        with closing(conexion.cursor(buffered=True)) as cursor:
            consulta = """
            SELECT id_tweet_humor,
                   id_tweet_no_humor
            FROM   tweets_parecidos_distinto_humor
            """

            cursor.execute(consulta)

            pares_ids_parecidos_con_distinto_humor = []

            bar = IncrementalBar("Cargando tweets parecidos\t", max=cursor.rowcount, suffix=SUFIJO_PROGRESS_BAR)
            bar.next(0)

            for par_ids in cursor:
                pares_ids_parecidos_con_distinto_humor.append(par_ids)
                bar.next()

            bar.finish()

            return pares_ids_parecidos_con_distinto_humor
Exemplo n.º 17
0
def cargar_tweets(limite=None, agregar_sexuales=False, cargar_features=True):
    """Carga todos los tweets, inclusive aquellos para evaluación, aunque no se quiera evaluar,
    y aquellos mal votados, así se calculan las features para todos. Que el filtro se haga luego."""
    conexion = open_db()
    if DB_ENGINE == 'sqlite3':
        cursor = conexion.cursor()
    else:
        cursor = conexion.cursor(buffered=True)  # buffered así sé la cantidad que son antes de iterarlos

    if agregar_sexuales:
        consulta_sexuales_tweets = ""
        consulta_limite_sexuales = ""
    else:
        consulta_sexuales_tweets = "censurado_tweet = 0"
        consulta_limite_sexuales = "AND " + consulta_sexuales_tweets
    consulta_sexuales_features = consulta_sexuales_tweets

    if limite:
        consulta = "SELECT id_tweet FROM tweets WHERE evaluacion = 0 " + consulta_limite_sexuales + " ORDER BY RAND() LIMIT "\
                   + unicode(limite)

        cursor.execute(consulta)

        bar = IncrementalBar("Eligiendo tweets aleatorios\t", max=cursor.rowcount, suffix=SUFIJO_PROGRESS_BAR)
        bar.next(0)

        ids = []

        for (tweet_id,) in cursor:
            ids.append(tweet_id)
            bar.next()

        bar.finish()

        str_ids = '(' + unicode(ids).strip('[]L') + ')'
        consulta_prueba_tweets = "T.id_tweet IN {ids}".format(ids=str_ids)
        consulta_prueba_features = "id_tweet IN {ids}".format(ids=str_ids)

    else:
        consulta_prueba_features = ""
        consulta_prueba_tweets = ""

    if not agregar_sexuales and limite:
        restricciones_tweets = "WHERE " + consulta_sexuales_tweets + " AND " + consulta_prueba_tweets
        restricciones_features = "WHERE " + consulta_sexuales_features + " AND " + consulta_prueba_features
    elif not agregar_sexuales:
        restricciones_tweets = "WHERE " + consulta_sexuales_tweets
        restricciones_features = "WHERE " + consulta_sexuales_features
    elif limite:
        restricciones_tweets = "WHERE " + consulta_prueba_tweets
        restricciones_features = "WHERE " + consulta_prueba_features
    else:
        restricciones_tweets = ""
        restricciones_features = ""

    if DB_ENGINE == 'sqlite3':
            consulta = """
    SELECT id_account,
           T.id_tweet,
           text_tweet,
           favorite_count_tweet,
           retweet_count_tweet,
           eschiste_tweet,
           censurado_tweet,
           name_account,
           followers_count_account,
           evaluacion,
           votos,
           votos_humor,
           promedio_votos,
           categoria_tweet
    FROM   tweets AS T
           NATURAL JOIN twitter_accounts
                        LEFT JOIN (SELECT id_tweet,
                                          Avg(voto) AS promedio_votos,
                                          Count(*) AS votos,
                                          Count(case when voto <> 'x' then 1 else NULL end) AS votos_humor
                                   FROM   votos
                                   WHERE voto <> 'n'
                                   GROUP  BY id_tweet) V
                               ON ( V.id_tweet = T.id_tweet )
    {restricciones}
    """.format(restricciones=restricciones_tweets)
    else:
        consulta = """
    SELECT id_account,
           T.id_tweet,
           text_tweet,
           favorite_count_tweet,
           retweet_count_tweet,
           eschiste_tweet,
           censurado_tweet,
           name_account,
           followers_count_account,
           evaluacion,
           votos,
           votos_humor,
           promedio_votos,
           categoria_tweet
    FROM   tweets AS T
           NATURAL JOIN twitter_accounts
                        LEFT JOIN (SELECT id_tweet,
                                          Avg(voto) AS promedio_votos,
                                          Count(*) AS votos,
                                          Count(If(voto <> 'x', 1, NULL)) AS votos_humor
                                   FROM   votos
                                   WHERE voto <> 'n'
                                   GROUP  BY id_tweet) V
                               ON ( V.id_tweet = T.id_tweet )
    {restricciones}
    """.format(restricciones=restricciones_tweets)

    cursor.execute(consulta)

    bar = IncrementalBar("Cargando tweets\t\t\t", max=(999999 if DB_ENGINE == 'sqlite3' else cursor.rowcount), suffix=SUFIJO_PROGRESS_BAR)
    bar.next(0)

    resultado = {}

    for (id_account, tweet_id, texto, favoritos, retweets, es_humor, censurado, cuenta, seguidores, evaluacion, votos,
         votos_humor, promedio_votos, categoria) in cursor:
        tweet = Tweet()
        tweet.id = tweet_id
        tweet.texto_original = texto
        tweet.texto = texto
        tweet.favoritos = favoritos
        tweet.retweets = retweets
        tweet.es_humor = es_humor
        tweet.es_chiste = es_humor
        tweet.censurado = censurado
        tweet.cuenta = cuenta
        tweet.seguidores = seguidores
        tweet.evaluacion = evaluacion
        tweet.categoria = categoria
        if votos:
            tweet.votos = int(votos)  # Esta y la siguiente al venir de count y sum, son decimal.
        if votos_humor:
            tweet.votos_humor = int(votos_humor)
        if promedio_votos:
            tweet.promedio_de_humor = promedio_votos

        resultado[tweet.id] = tweet
        bar.next()

    bar.finish()

    if cargar_features:
        consulta = """
        SELECT id_tweet,
               nombre_feature,
               valor_feature
        FROM   features
               NATURAL JOIN tweets
        {restricciones}
        """.format(restricciones=restricciones_features)

        cursor.execute(consulta)

        bar = IncrementalBar("Cargando features\t\t", max=(9999999 if DB_ENGINE == 'sqlite3' else cursor.rowcount), suffix=SUFIJO_PROGRESS_BAR)
        bar.next(0)

        for (id_tweet, nombre_feature, valor_feature) in cursor:
            if id_tweet in resultado:
                resultado[id_tweet].features[nombre_feature] = valor_feature
            bar.next()

        bar.finish()

        cursor.close()
        conexion.close()

    return list(resultado.values())
Exemplo n.º 18
0
    def fat_experiment(self):
        it_list = [100, 200, 300]
        c1_list = [0.2, 0.4, 0.6]
        c2_list = [0.2, 0.4, 0.6]
        w_list = [0.3, 0.6, 0.9]
        pop_list = [100, 200, 300]
        data = {
            'id': [],
            'it_number': [],
            'size': [],
            'c1': [],
            'c2': [],
            'w': [],
            'best_fo': [],
            'best_s_it': []
        }
        id = 0

        data_100 = {'id': [], 'min_fo': [], 'max_fo': [], 'mean_fo': []}
        data_200 = {'id': [], 'min_fo': [], 'max_fo': [], 'mean_fo': []}
        data_300 = {'id': [], 'min_fo': [], 'max_fo': [], 'mean_fo': []}
        bar = IncrementalBar('Processing', max=(3**5) * 10)
        for it in it_list:
            for c1 in c1_list:
                for c2 in c2_list:
                    for w in w_list:
                        for size in pop_list:
                            for i in range(10):
                                best_s, best_fo, results = self.solve(
                                    it_number=it,
                                    pop_size=size,
                                    c1=c1,
                                    c2=c2,
                                    w=w)
                                data['id'].append(id)
                                data['it_number'].append(it)
                                data['size'].append(size)
                                data['c1'].append(c1)
                                data['c2'].append(c2)
                                data['w'].append(w)
                                data['best_s_it'].append(results['best_s_it'])
                                data['best_fo'].append(best_fo)

                                if it == 100:
                                    for i in range(len(results['min_fo'])):
                                        data_100['id'].append(id)
                                        data_100['min_fo'].append(
                                            results['min_fo'][i])
                                        data_100['max_fo'].append(
                                            results['max_fo'][i])
                                        data_100['mean_fo'].append(
                                            results['mean_fo'][i])
                                    #print('data',data_100)

                                if it == 200:
                                    for i in range(len(results['min_fo'])):
                                        data_200['id'].append(id)
                                        data_200['min_fo'].append(
                                            results['min_fo'][i])
                                        data_200['max_fo'].append(
                                            results['max_fo'][i])
                                        data_200['mean_fo'].append(
                                            results['mean_fo'][i])

                                if it == 300:
                                    for i in range(len(results['min_fo'])):
                                        data_300['id'].append(id)
                                        data_300['min_fo'].append(
                                            results['min_fo'][i])
                                        data_300['max_fo'].append(
                                            results['max_fo'][i])
                                        data_300['mean_fo'].append(
                                            results['mean_fo'][i])
                                bar.next()
                            id += 1

        pd.DataFrame.from_dict(data).to_csv('results/header.csv', index=False)
        pd.DataFrame.from_dict(data_100).to_csv('results/history_100.csv',
                                                index=False)
        pd.DataFrame.from_dict(data_200).to_csv('results/history_200.csv',
                                                index=False)
        pd.DataFrame.from_dict(data_300).to_csv('results/history_300.csv',
                                                index=False)
        bar.finish()
Exemplo n.º 19
0
    def run(self):
        # print("starting file thread")
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.bind(('127.0.0.1', 10001))
        sock.settimeout(10.0)
        sock.listen(5)
        # print("Entering while loop")
        block_size = 4096
        while not self.file_terminate.is_set():
            # print("In while loop")
            c, addr = sock.accept()  # Establish connection with client.
            # print('Got connection from', addr)
            # print("Receiving...")
            data = str(c.recv(5).decode("utf-8"))
            # print("data:",data)
            num_files = int(data)
            for i in range(num_files):
                data = str(c.recv(10).decode('utf-8'))
                # print("filebytes: "+data)
                fileb = int(data.split(" ")
                            [0])  #endianness may be affecting data transfer
                filenamebytes = int(data.split(" ")[1])

                data = str(c.recv(filenamebytes).decode('utf-8'))
                # print("filename received: "+data)
                filename = data
                curb = 0
                print('Receiving ' + filename + "...")
                bar = IncrementalBar('Percentage of file transferred', max=100)
                percentage = 0
                path = "/".join(filename.split("/")[:-1])
                destination = self.id + "/" + path
                file_name = filename.split("/")[-1]
                if not os.path.exists(destination + "/" + file_name):
                    if not os.path.exists(destination):
                        os.makedirs(destination)
                    writeto = open(destination + '/' + file_name, 'w+')
                    while curb < fileb:
                        sys.stdout.flush()
                        if curb + block_size > fileb:
                            data = str(c.recv(fileb - curb).decode('utf-8'))
                        else:
                            data = str(c.recv(block_size).decode('utf-8'))
                        writeto.write(data)
                        next_val = int(100 * (curb / fileb))
                        for i in range(next_val - percentage):
                            bar.next()
                        curb += len(data)
                        percentage = next_val
                        #print('\r' + str(curb) + "/" + str(fileb))
                        # print("")
                    # print("Last text:",data)
                    next_val = int(100 * (curb / fileb))
                    for i in range(next_val - percentage):
                        bar.next()
                    curb += len(data)
                    percentage = next_val
                    writeto.close()
                    bar.finish()
                    print('Transfer of ' + filename + ' successful.')
                else:
                    lines = ""
                    while curb < fileb:
                        sys.stdout.flush()
                        if curb + block_size > fileb:
                            data = str(c.recv(fileb - curb).decode('utf-8'))
                        else:
                            data = str(c.recv(block_size).decode('utf-8'))
                        # data = str(data.decode('utf-8'))
                        if data:
                            lines += data
                        next_val = int(100 * (curb / fileb))
                        for i in range(next_val - percentage):
                            bar.next()
                        curb += len(data)
                        percentage = next_val
                        #print('\r' + str(curb) + "/" + str(fileb))
                        # print("")
                    lines = lines.split("\n")
                    next_val = int(100 * (curb / fileb))
                    for i in range(next_val - percentage):
                        bar.next()
                    curb += len(data)
                    percentage = next_val
                    #print("lines: ",lines)
                    #print("end")
                    for i in range(len(lines) - 1):
                        lines[i] = lines[i] + "\n"
                    self.writeToFile(lines, destination + "/" + file_name,
                                     destination + "/" + file_name)
                    bar.finish()
                    print('Transfer of ' + filename + ' successful.')
            self.file_terminate.set()
        print("File reading client closed")
        sock.close()
Exemplo n.º 20
0
if __name__ == "__main__":
    scriptPath = os.path.realpath(__file__)
    scriptDir = os.path.dirname(scriptPath)

    subDirs = next(os.walk(scriptDir))[1]

    # Upload all of the top-level files
    for fileName in os.listdir(scriptDir):
        if not os.path.isdir(os.path.join(scriptDir, fileName)):
            if fileName in IGNORE_FILES: continue
            upload_file(os.path.join(scriptDir, fileName),
                        AWS_WEBSITE_BUCKET_NAME, fileName,
                        determine_content_type(fileName))

    # For each subDir within the top-level directory, upload all files using an ObjectName of subdir/fileName
    # to recreate the folder structure within S3
    progress_bar = IncrementalBar(
        "   Uploading website assets to S3",
        max=return_total_num_of_asset_files(scriptDir),
        suffix='%(percent)d%% ')
    for subDir in subDirs:
        if subDir in IGNORE_DIRS: continue
        for fileName in os.listdir(os.path.join(scriptDir, subDir)):
            if fileName in IGNORE_FILES: continue
            upload_file(os.path.join(scriptDir, subDir,
                                     fileName), AWS_WEBSITE_BUCKET_NAME,
                        os.path.join(subDir, fileName),
                        determine_content_type(fileName))
            progress_bar.next()
    progress_bar.finish()
Exemplo n.º 21
0
def create_temp_frustrum_voxels(view_manager,
                                voxel_config,
                                out_dim,
                                cat_id,
                                compression='lzf'):
    from progress.bar import IncrementalBar
    view_params = view_manager.get_view_params()
    n_views = view_params['n_views']
    f = view_params['f']
    in_dims = (voxel_config.voxel_dim, ) * 3
    ray_shape = (out_dim, ) * 3
    example_ids = tuple(view_manager.get_example_ids(cat_id))
    n0 = len(example_ids)

    temp_path = _get_frustrum_voxels_path(voxel_config,
                                          view_manager.view_id,
                                          out_dim,
                                          cat_id,
                                          code='temp')
    _make_dir(temp_path)
    with h5py.File(temp_path, 'a') as vox_dst:
        attrs = vox_dst.attrs
        prog = attrs.get('prog', 0)
        if prog == n0:
            return temp_path

        attrs.setdefault('n_views', n_views)
        max_len = attrs.setdefault('max_len', 0)

        vox_manager = get_voxel_manager(voxel_config,
                                        cat_id,
                                        key='rle',
                                        compression=compression,
                                        shape_key='pad')
        vox_manager.get_dataset()  # ensure data exists
        assert (vox_manager.has_dataset())
        with h5py.File(vox_manager.path, 'r') as vox_src:
            rle_src = vox_src[GROUP_KEY]

            n, m = rle_src.shape
            max_max_len = m * 3
            assert (n == n0)

            print('Creating temp rle frustrum voxel data at %s' % temp_path)
            rle_dst = vox_dst.require_dataset(GROUP_KEY,
                                              shape=(n, n_views, max_max_len),
                                              dtype=np.uint8,
                                              compression=compression)
            bar = IncrementalBar(max=n - prog)
            for i in range(prog, n):
                bar.next()
                voxels = RleVoxels(np.array(rle_src[i]), in_dims)
                eye = view_manager.get_camera_positions(cat_id, example_ids[i])
                for j in range(n_views):
                    out = convert(voxels, eye[j], ray_shape, f)
                    data = out.rle_data()
                    dlen = len(data)
                    if dlen > max_len:
                        attrs['max_len'] = dlen
                        max_len = dlen
                        if dlen > max_max_len:
                            raise ValueError('max_max_len exceeded. %d > %d' %
                                             (dlen, max_max_len))
                    rle_dst[i, j, :dlen] = data
                attrs['prog'] = i + 1
            bar.finish()
    return temp_path
    print(f"{Fore.GREEN}All settings valid, proceeding...")
    print(f"Downloading {filename[0]}")
    chunkSize = 10240
    try:
        r = requests.get(url + filename[0], stream=True)
        with open(tempDir + filename[0], "wb") as f:
            pbar = IncrementalBar(
                "Downloading",
                max=int(r.headers["Content-Length"]) / chunkSize,
                suffix="%(percent)d%%",
            )
            for chunk in r.iter_content(chunk_size=chunkSize):
                if chunk:  # filter out keep-alive new chunks
                    pbar.next()
                    f.write(chunk)
            pbar.finish()
    except Exception:
        print(f"Download {Fore.RED}failed, please try again. Exiting.")
        sys.exit()
    print(f"Download {Fore.GREEN}done")

    # Extraction
    spinnerExtract = Spinner("Extracting... ")
    spinnerExtract.start()
    try:
        shutil.unpack_archive(tempDir + filename[0], tempDir)
    except Exception:
        print(f"Extraction {Fore.RED}failed, please try again. Exiting.")
        sys.exit()
    spinnerExtract.stop()
    print(f"Extraction {Fore.GREEN}done")
Exemplo n.º 23
0
    def initiateTraining(self):
        bar = IncrementalBar('Training', max=self.epochs)
        self.d_x = []
        self.d_x2 = []
        for e in range(self.epochs):
            bar.next()
            self.epoch = e
            noise, noise_hyperparams = self.get_noise()
            batch_DS, batch_hyperparams = self.get_train_data()
            self.generated_ds = self.layout.g.predict(
                [noise] + noise_hyperparams, batch_size=self.batch_size)
            real_label = np.array(
                [[1., 0.]
                 for i in range(len(self.energies) * self.batch_size)])
            fake_label = np.array(
                [[0., 1.]
                 for i in range(len(self.energies) * self.batch_size)])
            train_label = np.array(
                [[1., 0.]
                 for i in range(len(self.energies) * self.batch_size)])
            X = np.concatenate([batch_DS, self.generated_ds])
            all_Xh = [X]
            for num in range(len(noise_hyperparams)):
                all_Xh.append(
                    np.concatenate(
                        [batch_hyperparams[num], noise_hyperparams[num]]))
            Y = np.concatenate([real_label, fake_label])
            W = np.concatenate([
                np.ones(shape=(len(self.energies) * self.batch_size, )),
                np.full(fill_value=1,
                        shape=(len(self.energies) * self.batch_size, ))
            ])

            self.layout.d.trainable = True
            d_loss, d_acc = self.layout.d.train_on_batch(all_Xh,
                                                         Y,
                                                         sample_weight=W)
            d_loss, d_acc = self.layout.d.train_on_batch(all_Xh,
                                                         Y,
                                                         sample_weight=W)
            d_loss, d_acc = self.layout.d.train_on_batch(all_Xh,
                                                         Y,
                                                         sample_weight=W)
            d_loss, d_acc = self.layout.d.train_on_batch(all_Xh,
                                                         Y,
                                                         sample_weight=W)
            d_loss, d_acc = self.layout.d.train_on_batch(all_Xh,
                                                         Y,
                                                         sample_weight=W)

            self.layout.d.trainable = False
            logs = self.gan.train_on_batch([noise] + noise_hyperparams,
                                           train_label)

            self.layout.tensorboard.on_epoch_end(
                self.epoch, self.named_logs(self.gan, logs))

            if e == 0 or (e + 1) % self.epochCheck == 0:
                self.generated_ds = {}

                noise, noise_hyperparams = self.get_noise()
                temp_generated = self.layout.g.predict([noise] +
                                                       noise_hyperparams)

                gen_class_length = int(temp_generated.shape[0] /
                                       (len(self.energies)))

                for en in range(1, len(self.energies) + 1):
                    self.generated_ds[self.energies[en - 1]] = {}
                    for num, var in enumerate(self.variables_of_interest):
                        current_var = np.asarray(temp_generated)[:, num]
                        gen_energies_var = current_var[(en - 1) *
                                                       gen_class_length:en *
                                                       gen_class_length]
                        self.generated_ds[self.energies[
                            en - 1]][var] = gen_energies_var

                multiples = int(
                    len(self.training_ds[self.energies[0]][
                        self.variables_of_interest[0]]) / self.batch_size)
                for i in range(1, multiples):
                    noise, noise_hyperparams = self.get_noise()
                    temp_generated = self.layout.g.predict([noise] +
                                                           noise_hyperparams)

                    gen_class_length = int(temp_generated.shape[0] /
                                           (len(self.energies)))

                    for en in range(1, len(self.energies) + 1):
                        for num, var in enumerate(self.variables_of_interest):
                            current_var = np.asarray(temp_generated)[:, num]
                            gen_energies_var = current_var[
                                (en - 1) * gen_class_length:en *
                                gen_class_length]
                            self.generated_ds[self.energies[
                                en - 1]][var] = np.concatenate([
                                    self.generated_ds[self.energies[en -
                                                                    1]][var],
                                    gen_energies_var
                                ])

                if (self.epoch + self.epochCheck >= self.epochs):
                    self.last = True
                #Set instead of 3 the number of candidates to show
                indexes = np.round(np.linspace(0,
                                               len(self.energies) - 1,
                                               3)).astype(int)
                selected_energies = [
                    self.energies[i] for i in range(len(self.energies))
                    if i in indexes
                ]
                selected_training_ds = dict(
                    filter(lambda elem: elem[0] in selected_energies,
                           self.training_ds.items()))
                selected_generated_ds = dict(
                    filter(lambda elem: elem[0] in selected_energies,
                           self.generated_ds.items()))
                for num, var in enumerate(self.variables_of_interest):
                    random_energy = randint(0, len(selected_energies) - 1)
                    true_d = selected_training_ds[selected_energies[
                        random_energy]][var] * self.normalisation[
                            selected_energies[random_energy]][var]
                    false_d = selected_generated_ds[selected_energies[
                        random_energy]][var] * self.normalisation[
                            selected_energies[random_energy]][var]
                    self.d_x.append(self.getMoment1(true_d, false_d))
                    self.d_x2.append(self.getMoment2(true_d, false_d))
                self.visualiseCurrentEpoch(selected_training_ds,
                                           selected_generated_ds,
                                           selected_energies,
                                           self.generated_ds)

            self.all_epochs.append(e)
            self.d_loss.append(d_loss)
            self.d_acc.append(d_acc)

        self.layout.tensorboard.on_train_end(None)
        bar.finish()
        return self.final_produced
def assemble_collage():
    print('start assembling collage')

    # load all from downsized path
    files = os.listdir(downsized_path)
    files = [
        file for file in files
        if os.path.isfile(os.path.join(downsized_path, file))
    ]
    images = []
    bar = IncrementalBar('Loading', max=len(files))
    for file in files:
        im = Image.open(os.path.join(downsized_path, file))
        im = np.asarray(im)
        images.append(im)
        bar.next()
    bar.finish()

    # compute total amount of light in each image and only keep the N brightest
    images = [(np.sum(image), image) for image in images]
    images.sort(key=lambda x: x[0], reverse=True)
    images = images[:N]
    images = [x[1] for x in images]

    # compute the average color in each quadrant
    Cx = int(target_height / 2)
    Cy = int(target_width / 2)
    U = [np.mean(image[:Cx, :, :], axis=(1, 2)) for image in images]
    D = [np.mean(image[Cx:, :, :], axis=(1, 2)) for image in images]
    R = [np.mean(image[:, :Cy, :], axis=(1, 2)) for image in images]
    L = [np.mean(image[:, Cy:, :], axis=(1, 2)) for image in images]

    # initially just sort them in randomly
    map = np.random.permutation(N).reshape((Nx, Ny))

    # optimize neighbors with a stochastic metropolis algorithm
    Ni = 500000
    T = np.linspace(150, 2, Ni)
    A = np.zeros((Ni, 1))
    u = lambda x: (x + 1) % Nx
    d = lambda x: (x - 1) % Nx
    r = lambda x: (x + 1) % Ny
    l = lambda x: (x - 1) % Ny
    score = lambda i1, j1, i2, j2: np.linalg.norm(U[map[i1, j1]] - D[map[u(
        i2), j2]]) + np.linalg.norm(D[map[i1, j1]] - U[map[d(
            i2), j2]]) + np.linalg.norm(L[map[i1, j1]] - R[map[i2, l(
                j2)]]) + np.linalg.norm(R[map[i1, j1]] - L[map[i2, r(j2)]])
    bar = IncrementalBar('Optimization', max=Ni)
    for ai in range(Ni):
        # get two non-equal random locations
        i1 = np.random.randint(Nx)
        j1 = np.random.randint(Ny)
        while True:
            i2 = np.random.randint(Nx)
            j2 = np.random.randint(Ny)
            if i1 != i2 or j1 != j2:
                break
        # compute score
        x = score(i1, j1, i1, j1) - score(i1, j1, i2, j2) + score(
            i2, j2, i2, j2) - score(i2, j2, i1, j1)

        # exchange
        # if x < 0:
        # if x > 0:
        if x > 0 or np.exp(x / T[ai]) > np.random.uniform():
            map[i1, j1], map[i2, j2] = map[i2, j2], map[i1, j1]
            A[ai] = 1

        bar.next()
    bar.finish()
    # time evolution of acceptance rate
    Nc = int(np.floor(Ni / 20))
    for ai in range(20):
        print('{}: {}'.format(ai, np.mean(A[ai * Nc:(ai + 1) * Nc])))

    # shift brightest to center
    B = np.zeros((Nx, Ny))
    for i in range(Nx):
        for j in range(Ny):
            B[i, j] = np.sum(images[map[i, j]])
    sk = np.array([0.25, 0.5, 1, 0.5, 0.25])
    # convolve in 1D along all rows and all columns
    for i in range(Nx):
        B[i, :] = np.convolve(B[i, :], sk, mode='same')
    for j in range(Ny):
        B[:, j] = np.convolve(B[:, j], sk, mode='same')
    cx, cy = np.unravel_index(np.argmax(B), B.shape)
    map = np.roll(map, (int(Nx / 2 - cx), int(Ny / 2 - cy)), axis=(0, 1))

    # assemble image
    final = np.zeros((Nx * target_height, Ny * target_width, 3),
                     dtype=np.uint8)
    for i in range(Nx):
        for j in range(Ny):
            final[i * target_height:(i + 1) * target_height,
                  j * target_width:(j + 1) * target_width] = images[map[i, j]]

    # convert back to pillow image and save
    im = Image.fromarray(final)
    im.save(output_file)
Exemplo n.º 25
0
def lc2SDS():
    """
    Convert fixed LCHEAPO data to SeisComp Data Structure

    SIMPLE drift and leapsecond correction:
        - offset is constant within each daily file
        - offset information is not written in header
        - data quality field is not modified
        - leapsecond flag is not raised (causes apparent 1-s gap/overlap).
    Writes to a directory named SDS/ in the output directory.
    """
    print(lc2SDS.__doc__)
    parser = argparse.ArgumentParser(
        description=inspect.cleandoc(lc2SDS.__doc__),
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("infiles",
                        nargs='+',
                        help="Input filename(s).  If there are captured "
                        "wildcards (put in '' so that they aren't "
                        "interpreted by the shell), will expand them "
                        "in the input directory")
    parser.add_argument("-t",
                        "--obs_type",
                        default='SPOBS2',
                        help="obs type.  Controls channel and location codes",
                        choices=[s for s in chan_maps])
    parser.add_argument("--station",
                        default='SSSSS',
                        help="station code for this instrument")
    parser.add_argument("--network",
                        default='XX',
                        help="network code for this instrument")
    parser.add_argument("-s",
                        "--start_times",
                        nargs='+',
                        metavar=("REF_START", "INST_START"),
                        help="Start datetimes for the reference (usually GPS) "
                        "and instrument.  If only one value is provided, "
                        "it will be used for both")
    parser.add_argument("-e",
                        "--end_times",
                        nargs=2,
                        metavar=("REF_END", "INST_END"),
                        help="End datetimes for the reference and instrument")
    parser.add_argument("--leapsecond_times",
                        nargs='+',
                        help="leapsecond times")
    parser.add_argument("--leapsecond_types",
                        default='+',
                        help="'+' for extra second, '-' for removed second. "
                        "If there is one character it is applied to all "
                        "leapseconds, if there is more than one the "
                        "length of the string must match "
                        "the number of leapsecond_times")
    parser.add_argument("-d",
                        dest="base_dir",
                        metavar="BASE_DIR",
                        default='.',
                        help="base directory for files")
    parser.add_argument("-i",
                        dest="in_dir",
                        metavar="IN_DIR",
                        default='.',
                        help="input file directory (absolute, " +
                        "or relative to base_dir)")
    parser.add_argument("-o",
                        dest="out_dir",
                        metavar="OUT_DIR",
                        default='.',
                        help="output file directory (absolute, " +
                        "or relative to base_dir)")
    parser.add_argument("-v",
                        "--verbose",
                        action='store_true',
                        help="verbose output")
    parser.add_argument("--version",
                        action='store_true',
                        help="Print version number and quit")
    args = parser.parse_args()
    parameters = vars(args).copy()
    if args.version is True:
        print(f"Version {__version__}")
        sys.exit(0)

    # ADJUST INPUT PARAMETERS
    if args.start_times is not None:
        args.start_times = [UTCDateTime(x) for x in args.start_times]
    if args.end_times is not None:
        args.end_times = [UTCDateTime(x) for x in args.end_times]
    ls_times, ls_types = _adjust_leapseconds(args.leapsecond_times,
                                             args.leapsecond_types)

    # SETUP FOR PROCESS-STEPS
    process_step = ProcessStep('lc2SDS',
                               " ".join(sys.argv),
                               app_description=__doc__,
                               app_version=__version__,
                               parameters=parameters)
    args.in_dir, args.out_dir, args.infiles = ProcessStep.setup_paths(args)
    # Expand captured wildcards
    #args.infiles = [x.name for f in args.infiles
    #                for x in Path(args.in_dir).glob(f)]

    for infile in args.infiles:
        lc_start, lc_end = get_data_timelimits(Path(args.in_dir) / infile)

        if args.start_times and args.end_times:
            ref_start = args.start_times[0]
            if len(args.start_times) > 1:
                inst_start = args.start_times[1]
            else:
                inst_start = ref_start
            ref_end, inst_end = args.end_times
            if inst_start == 0:
                inst_start = ref_start
            inst_start_offset = inst_start - ref_start
            inst_drift = ((inst_end - ref_end) - inst_start_offset)\
                / (ref_end - inst_start)
            print(
                'instrument start offset = {:g}s, drift rate = {:.4g}'.format(
                    inst_start_offset, inst_drift))
            # quality_flag = 'Q'  # Don't know how to put this in miniSEED
        else:
            ref_start, inst_start = lc_start, lc_start
            inst_start_offset = 0
            inst_drift = 0
            warnings.warn('Could not calculate clock drift, assuming zero!')
            # quality_flag = 'D'  # Don't know how to put this in miniSEED

        lc_start_day = lc_start.replace(hour=0,
                                        minute=0,
                                        second=0,
                                        microsecond=0)
        lc_end_day = lc_end.replace(hour=0, minute=0, second=0, microsecond=0)
        stime = lc_start_day
        bar = IncrementalBar(f'Processing {infile}',
                             max=(lc_end_day - lc_start_day) / 86400 + 1)
        while stime <= lc_end_day:
            inst_offset = inst_start_offset + inst_drift * (stime - ref_start)
            _write_daily(inst_offset, stime, infile, args, ls_times, ls_types)
            bar.next()
            stime += 86400
        bar.finish()

    return_code = 0
    process_step.exit_code = return_code
    process_step.write(args.in_dir, args.out_dir)
    sys.exit(return_code)
Exemplo n.º 26
0
    def _seed_districts(self, graph, districts):
        """
        A simple procedure that selects n random seed nodes (n = number of districts)
        and then selects neighbors of those seeds and claims them to be of the same
        district.

        Performance Notes:
        o(n^3), but operations are cheap.
        """

        bar = IncrementalBar("Seeding Districts", max=len(graph.nodes))
        graph_pool = [_ for _ in graph.nodes]
        random.shuffle(graph_pool)

        district_sizes = [[1, district] for district in range(districts)]

        # Start the district with some seeds
        for district in range(districts):
            bar.next()

            seed = graph_pool.pop()
            graph.nodes.get(seed)['dis'] = district

        # While there are unclaimed nodes
        while graph_pool:
            last_run = len(graph_pool)
            # Let each district claim a new node
            district_sizes = sorted(district_sizes)
            for i, (size, district) in enumerate(district_sizes):
                round_complete = False
                # Find the nodes that belong to a district
                for node, props in graph.nodes(data=True):
                    if props.get('dis') == district:
                        # Iterate through edges and find an unclaimed neighbor
                        for _, neighbor in graph.edges(node):
                            if neighbor in graph_pool:
                                graph_pool.remove(neighbor)
                                district_sizes[i][0] += 1
                                bar.next()
                                graph.nodes.get(neighbor)['dis'] = district
                                round_complete = True
                                break
                    if round_complete: break  # Quicker breaking
                # if round_complete: break # Quicker breaking

            if len(graph_pool) == last_run:
                for candidate in graph_pool:
                    for _, neighbor in graph.edges(candidate):
                        district = graph.nodes[neighbor].get('dis', -1)
                        if district != -1:
                            graph_pool.remove(candidate)
                            district_sizes[district][0] += 1
                            bar.next()
                            graph.nodes[candidate]['dis'] = district
                            round_complete = True
                            break
                    if round_complete: break
                if round_complete: break

            if len(graph_pool) == last_run:
                # PANIC
                import pdb
                pdb.set_trace()

        bar.finish()

        return graph
Exemplo n.º 27
0
def random_sampling_program(literal, bn, samples):
    global inconsistent_program, repeated_program, uniquePrograms, uniqueOnlyPrograms, uniqueEvidences, repeated_evidence
    all_possible_programs = pow(2, dimension)
    bar = IncrementalBar("Random sampling programs...", max=samples)
    initial_time = time.time()
    for i in range(samples):
        int_program = np.random.choice(all_possible_programs + 1, 1)
        program = int_to_bin_with_format(int_program, dimension)[0]
        tuple_program = tuple(program)
        if tuple_program not in uniqueOnlyPrograms:
            uniqueOnlyPrograms.add(tuple_program)
            delp = build_delp_from_binaries(program)
            evidence = get_evidence(program)
            if evidence != 'incorrect_program':
                tuple_evidence = tuple(evidence.items())
                if tuple_evidence not in uniqueEvidences:
                    uniqueEvidences.add(tuple_evidence)
                    status = queryToProgram([delp, program], literal,
                                            uniquePrograms)
                    prWorld = bn.get_sampling_prob(evidence)
                    if status[1] == 'yes':
                        results['yes']['total'] += 1
                        results['yes'][
                            'prob'] = results['yes']['prob'] + prWorld
                    elif status[1] == 'no':
                        results['no']['total'] += 1
                        results['no']['prob'] = results['no']['prob'] + prWorld
                    elif status[1] == 'undecided':
                        results['und']['total'] += 1
                        results['und'][
                            'prob'] = results['und']['prob'] + prWorld
                    elif status[1] == 'unknown':
                        results['unk']['total'] += 1
                        results['unk'][
                            'prob'] = results['unk']['prob'] + prWorld
                else:
                    repeated_evidence += 1
            else:
                inconsistent_program += 1
        else:
            repeated_program += 1
        bar.next()
    bar.finish()
    time_execution = time.time() - initial_time
    results['execution_time'] = time_execution
    results['unique_programs'] = len(uniqueOnlyPrograms)
    results['inconsistent']['total'] = inconsistent_program
    results['inconsistent']['perc'] = "{:.2f}".format(
        (results['inconsistent']['total'] * 100) / samples)
    results['repeated']['total'] = repeated_program
    results['repeated']['perc'] = "{:.2f}".format(
        (results['repeated']['total'] * 100) / samples)
    results['domain'] = 'programs'
    results['yes']['perc'] = "{:.2f}".format(
        (results['yes']['total'] * 100) / samples)
    results['no']['perc'] = "{:.2f}".format(
        (results['no']['total'] * 100) / samples)
    results['und']['perc'] = "{:.2f}".format(
        (results['und']['total'] * 100) / samples)
    results['unk']['perc'] = "{:.2f}".format(
        (results['unk']['total'] * 100) / samples)
    results['l'] = results['yes']['prob']
    results['u'] = results['u'] - results['no']['prob']
    results['total_sampling'] = samples
    print("Unique programs: ", end='')
    print_ok_ops("%s" % len(uniqueOnlyPrograms))
    print("Unique evidence: ", end='')
    print_ok_ops("%s" % len(uniqueEvidences))
    print("Inconsistent programs: ", end='')
    print_ok_ops("%s" % inconsistent_program)

    print("repeated evidence: ", repeated_evidence)

    with open('/home/mario/results/umda/UMDARandomPrograms.json',
              'w') as outfile:
        json.dump(results, outfile, indent=4)
Exemplo n.º 28
0
def umda_brute_force_programs(literal, bn):
    global uniqueEvidence, uniquePrograms, repeated_evidence, inconsistent_program

    all_possible_programs = pow(2, dimension)
    bar = IncrementalBar("Analyzing programs...", max=all_possible_programs)
    initial_time = time.time()
    for int_value in range(all_possible_programs):
        program = int_to_bin_with_format(
            int_value, dimension)[0]  # Return [program, evidence] REVISAR
        evidence = get_evidence(program)
        if evidence != 'incorrect_program':
            tuple_evidence = tuple(evidence.items())
            if tuple_evidence not in uniqueEvidences:
                uniqueEvidences.add(tuple_evidence)
                delp = build_delp_from_binaries(program)
                status = queryToProgram([delp, program], literal,
                                        uniquePrograms)
                prWorld = bn.get_sampling_prob(evidence)
                if status[1] == 'yes':
                    results['yes']['total'] += 1
                    results['yes']['prob'] = results['yes']['prob'] + prWorld
                elif status[1] == 'no':
                    results['no']['total'] += 1
                    results['no']['prob'] = results['no']['prob'] + prWorld
                elif status[1] == 'undecided':
                    results['und']['total'] += 1
                    results['und']['prob'] = results['und']['prob'] + prWorld
                elif status[1] == 'unknown':
                    results['unk']['total'] += 1
                    results['unk']['prob'] = results['unk']['prob'] + prWorld
            else:
                repeated_evidence += 1
        else:
            inconsistent_program += 1
        bar.next()
    bar.finish()

    time_execution = time.time() - initial_time
    results['execution_time'] = time_execution
    results['yes']['perc'] = "{:.2f}".format(
        (results['yes']['total'] * 100) / all_possible_programs)
    results['no']['perc'] = "{:.2f}".format(
        (results['no']['total'] * 100) / all_possible_programs)
    results['und']['perc'] = "{:.2f}".format(
        (results['und']['total'] * 100) / all_possible_programs)
    results['unk']['perc'] = "{:.2f}".format(
        (results['unk']['total'] * 100) / all_possible_programs)
    results['inconsistent']['total'] = inconsistent_program
    results['inconsistent']['perc'] = "{:.2f}".format(
        (results['inconsistent']['total'] * 100) / all_possible_programs)
    results['programsAnalyzed'] = all_possible_programs
    results['l'] = results['yes']['prob']
    results['u'] = results['u'] - results['no']['prob']
    print("Unique programs: ", end='')
    print_ok_ops("%s" % (int_value + 1))
    print("Unique evidence: ", end='')
    print_ok_ops("%s" % len(uniqueEvidences))
    print("Inconsistent programs: ", end='')
    print_ok_ops("%s" % inconsistent_program)

    with open('/home/mario/results/umda/UMDAForceBrutePrograms.json',
              'w') as outfile:
        json.dump(results, outfile, indent=4)
Exemplo n.º 29
0
#filename = "tuchan.html"
Manga_name = "VuLuyenDienPhong"
Path(Manga_name).mkdir(parents=True, exist_ok=True)
f = open(Manga_name + "/" + Manga_name + ".html",
         'a+')  # Write to only one file

chapter_count = 1

for chapter in all_chapter:
    if int(args.square) > 1 and chapter_count < int(args.square):
        chapter_count = chapter_count + 1
        #_IncrementalBar.next()
        continue
    chapter_count = chapter_count + 1
    #print(chapter['href'])
    Content = Get_Chapter_Content(chapter['href'])
    #print(Content)
    #exit()
    _IncrementalBar.next()
    #print(str(Content[0]))
    #xit()
    f.write(str(Content[0]))
    content_final = Remove_unwanted_string(str(Content[1]))
    f.write(content_final)
    f.write("\n")
f.close()
_IncrementalBar.finish()
end_time = time.time()
print(end_time - star_time)
Exemplo n.º 30
0
def texttype_freqs(database, folder, prop_names):
    """
    Used to collect lemmas by the types of text they appear in and sort
    them by frequency. Filters the RMH in order to retrieve the desired
    results. The script can be modified according to the user's need 
    and to fit another corpus.  
    """
    dci = SQLDatabase(db_name='gagnagrunnar/nmo.db')
    dim = SQLDatabase(db_name='gagnagrunnar/bin_lemmur_ordmyndir.db')
    filters = SQLDatabase(db_name='gagnagrunnar/IGC_filters.db') # Predefined stop-word list based on the RMH

    print("""
    ============================================================
    Les skjöl úr málheildinni.
    ============================================================
    """)
    xml_files = glob.glob(folder+'/**/*.xml', recursive=True)

    alltexttypes = []
    freqdic1 = {}
    freqdic2 = {}
    filebar = IncrementalBar('Framvinda', max = len(xml_files))
    for file in xml_files:
        with open(file, 'r', encoding='utf-8') as content:
            try:
                tree = ET.parse(content)
                root = tree.getroot()
                textClass = root[0][2][0][0][0][0] # Retrieve the texttype tag from the XML file
                texttype = textClass.text 
                if texttype not in alltexttypes:
                    alltexttypes.append(texttype) # Collect all unique texttypes
                pos_to_ignore = ['e', 'c', 'v', 'as', 'to', 'tp', 'ta', 'au'] # The POS tags that should not be displayed in the results
                for word in tree.iter():
                    pos = word.attrib.get('type')
                    if pos is not None:
                        if prop_names==False:
                            if pos.startswith('n') and pos.endswith('s'): # Ignore proper names
                                continue
                        if pos in pos_to_ignore:
                            continue
                        if (not all(i.isalpha() or i == '-' for i in word.text)): # Ignore all that are not alphabetic letters or hyphen 
                            continue
                        if len(word.text) < 3: # Ignore very short words, likely to be particles
                            continue
                        if word.text[-1] == '-': # Ignore words starting or ending with a hypen (likely OCR errors)
                            continue
                        if word.text[0] == '-':
                            continue
                        if word.attrib.get('lemma') is not None:
                            lemma = word.attrib.get('lemma')
                            filter_query = SQLiteQuery(lemma,'filter','FILTER_WORD_FORMS', cursor=filters.cursor) # Ignore stop words
                            if filter_query.exists:
                                continue
                            else:
                                if database == 'NMO':
                                    query = SQLiteQuery(lemma, 'lemma','DCI_ELEMENT', cursor = dci.cursor) # Capitalized words included
                                    query_lower = SQLiteQuery(lemma.lower(),'lemma','DCI_ELEMENT', cursor = dci.cursor)
                                elif database == 'BIN':
                                    query = SQLiteQuery(lemma, 'lemma','DIM_ELEMENT', cursor = dim.cursor) # Capitalized words included
                                    query_lower = SQLiteQuery(lemma.lower(),'lemma','DIM_ELEMENT', cursor = dim.cursor)
                                if not query.exists and not query_lower.exists: # If the word is not found in the DIM or the stopwords
                                    if lemma not in freqdic1: # Collect total freqs
                                        freqdic1[lemma] = 1
                                    else:
                                        freqdic1[lemma] += 1
                                    if (lemma,texttype) not in freqdic2: # Collect texttype freqs
                                        freqdic2[(lemma,texttype)] = 1
                                    else:
                                        freqdic2[(lemma,texttype)] += 1
            except IndexError:
                continue
            except ET.ParseError:
                continue

        filebar.next()
        sys.stdout.flush()
    filebar.finish()

    print("""
    ============================================================
    Flokkar tíðni eftir textagerðum. 
    ============================================================
    """)

    tempfinal = []
    bar1 = IncrementalBar('Framvinda', max = len(freqdic1))
    for key, value in sorted(freqdic1.items()): # Lemma, total freq
        tempf = []
        tempf.append(key)
        temp = []
        for k, v in freqdic2.items(): 
            if k[0] == key:
                temp.append((k[1], v)) # A list of all possible texttypes that appear with the lemma
        for tt in alltexttypes:
            if tt in [item[0] for item in temp]:
                continue
            else:
                temp.append((tt, 0)) 
        tempf.append(value)
        for tup in sorted(temp):
            tempf.append(tup[1]) 
        tempfinal.append(tempf) # The format of this list is [lemma, totalfreq, texttype_a freq, texttype_b freq...]
        bar1.next()
        sys.stdout.flush()
    bar1.finish()

    header = ['Lemma', 'Heildartíðni'] + sorted(alltexttypes)

    if folder == "malheildir/RMH/":
        with open(f"uttak/{database}/RMH_textagerdir.csv", mode='w+') as outputfile:
            csvwriter = csv.writer(outputfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
            csvwriter.writerow(header)
            for i in tempfinal:
                csvwriter.writerow(i)
        print(f"""
    ============================================================
    Úttaksskjalið RMH_textagerdir.freq er tilbúið og 
    er að finna í undirmöppunni uttak/{database}/
    ============================================================
        """)
    elif folder == "malheildir/RMH/CC_BY/":
        with open(f'uttak/{database}/CC_BY_textagerdir.csv', mode='w+') as outputfile:
            csvwriter = csv.writer(outputfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
            csvwriter.writerow(header)
            for i in tempfinal:
                csvwriter.writerow(i)
        print(f"""
    ============================================================
    Úttaksskjalið CC_BY_textagerdir.freq er tilbúið og 
    er að finna í undirmöppunni uttak/{database}/
    ============================================================
        """)
    elif folder == "malheildir/RMH/MIM/":
        with open(f'uttak/{database}/MIM_textagerdir.csv', mode='w+') as outputfile:
            csvwriter = csv.writer(outputfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
            csvwriter.writerow(header)
            for i in tempfinal:
                csvwriter.writerow(i)
        print(f"""
    ============================================================
    Úttaksskjalið MIM_textagerdir.freq er tilbúið og 
    er að finna í undirmöppunni uttak/{database}/
    ============================================================
        """)
    else:
        namefolder = folder.split("/")[3]
        with open(f'uttak/{database}/'+namefolder+"_textagerdir.csv", mode='w+') as outputfile:
           csvwriter = csv.writer(outputfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
           csvwriter.writerow(header)
           for i in tempfinal:
               csvwriter.writerow(i)

        print(f"""
    ============================================================
    Úttaksskjalið {namefolder}_textagerdir.freq er tilbúið og 
    er að finna í undirmöppunni uttak/{database}/
    ============================================================
        """)
def migrate(callback):
    connection = op.get_bind()

    s = sa.select([n.c.node, n.c.path])
    nodes = connection.execute(s).fetchall()
    bar = IncrementalBar('Migrating node paths...', max=len(nodes))
    for node, path in nodes:
        account, sep, rest = path.partition('/')
        match = callback(account)
        if not match:
            bar.next()
            continue
        path = sep.join([match, rest])
        u = n.update().where(n.c.node == node).values({'path':path})
        connection.execute(u)
        bar.next()
    bar.finish()

    s = sa.select([v.c.muser]).distinct()
    musers = connection.execute(s).fetchall()
    bar = IncrementalBar('Migrating version modification users...',
                         max=len(musers)
    )
    for muser, in musers:
        match = callback(muser)
        if not match:
            bar.next()
            continue
        u = v.update().where(v.c.muser == muser).values({'muser': match})
        connection.execute(u)
        bar.next()
    bar.finish()

    s = sa.select([p.c.public_id, p.c.path])
    public = connection.execute(s).fetchall()
    bar = IncrementalBar('Migrating public paths...', max=len(public))
    for id, path in public:
        account, sep, rest = path.partition('/')
        match = callback(account)
        if not match:
            bar.next()
            continue
        path = sep.join([match, rest])
        u = p.update().where(p.c.public_id == id).values({'path':path})
        connection.execute(u)
        bar.next()
    bar.finish()

    s = sa.select([x.c.feature_id, x.c.path])
    xfeatures = connection.execute(s).fetchall()
    bar = IncrementalBar('Migrating permission paths...', max=len(xfeatures))
    for id, path in xfeatures:
        account, sep, rest = path.partition('/')
        match = callback(account)
        if not match:
            bar.next()
            continue
        path = sep.join([match, rest])
        u = x.update().where(x.c.feature_id == id).values({'path':path})
        connection.execute(u)
        bar.next()
    bar.finish()

    s = sa.select([xvals.c.feature_id, xvals.c.key, xvals.c.value])
    s = s.where(xvals.c.value != '*')
    xfeaturevals = connection.execute(s).fetchall()
    bar = IncrementalBar('Migrating permission holders...',
                         max=len(xfeaturevals))
    for feature_id, key, value in xfeaturevals:
        account, sep, group = value.partition(':')
        match = callback(account)
        if not match:
            bar.next()
            continue
        new_value = sep.join([match, group])
        u = xvals.update()
        u = u.where(and_(
                xvals.c.feature_id == feature_id,
                xvals.c.key == key,
                xvals.c.value == value))
        u = u.values({'value':new_value})
        connection.execute(u)
        bar.next()
    bar.finish()

    s = sa.select([g.c.owner, g.c.name, g.c.member])
    groups = connection.execute(s).fetchall()
    bar = IncrementalBar('Migrating group owners & members...',
                         max=len(groups))
    for owner, name, member in groups:
        owner_match = callback(owner)
        member_match = callback(member)
        if owner_match or member_match:
            u = g.update()
            u = u.where(and_(
                g.c.owner == owner,
                g.c.name == name,
                g.c.member == member))
            values = {}
            if owner_match:
                values['owner'] = owner_match
            if member_match:
                values['member'] = member_match
            u = u.values(values)
            connection.execute(u)
            bar.next()
    bar.finish()
Exemplo n.º 32
0
    def find_solutions(self, graph_setting_groups):
        results = {}
        # check for solutions for a specific set of interaction settings
        logging.info(
            "Number of interaction settings groups being processed: " +
            str(len(graph_setting_groups)))
        for strength, graph_setting_group in sorted(
                graph_setting_groups.items(), reverse=True):
            logging.info("processing interaction settings group with "
                         "strength " + str(strength))
            logging.info(
                str(len(graph_setting_group)) + " entries in this group")
            logging.info("running with " + str(self.number_of_threads) +
                         " threads...")

            temp_results = []
            bar = IncrementalBar('Propagating quantum numbers...',
                                 max=len(graph_setting_group))
            bar.update()
            if self.number_of_threads > 1:
                with Pool(self.number_of_threads) as p:
                    for result in p.imap_unordered(
                            self.propagate_quantum_numbers,
                            graph_setting_group, 1):
                        temp_results.append(result)
                        bar.next()
            else:
                for graph_setting_pair in graph_setting_group:
                    temp_results.append(
                        self.propagate_quantum_numbers(graph_setting_pair))
                    bar.next()
            bar.finish()
            logging.info('Finished!')
            if strength not in results:
                results[strength] = []
            results[strength].extend(temp_results)

        for k, v in results.items():
            logging.info("number of solutions for strength (" + str(k) +
                         ") after qn propagation: " +
                         str(sum([len(x[0]) for x in v])))

        # remove duplicate solutions, which only differ in the interaction qn S
        results = remove_duplicate_solutions(results, self.filter_remove_qns,
                                             self.filter_ignore_qns)

        node_non_satisfied_rules = []
        solutions = []
        for result in results.values():
            for (tempsolutions, non_satisfied_laws) in result:
                solutions.extend(tempsolutions)
                node_non_satisfied_rules.append(non_satisfied_laws)
        logging.info("total number of found solutions: " + str(len(solutions)))
        violated_laws = []
        if len(solutions) == 0:
            violated_laws = analyse_solution_failure(node_non_satisfied_rules)
            logging.info("violated rules: " + str(violated_laws))

        # finally perform combinatorics of identical external edges
        # (initial or final state edges) and prepare graphs for
        # amplitude generation
        match_external_edges(solutions)
        final_solutions = []
        for sol in solutions:
            final_solutions.extend(
                perform_external_edge_identical_particle_combinatorics(sol))

        return (final_solutions, violated_laws)
Exemplo n.º 33
0
class ReportCompile(object):
    def __init__(self, job_name, template, **kwargs):
        self.job_name = job_name
        self.template = template
        self.no_artifacts = kwargs.get('no_artifacts', True)
        self.num_builds = int(kwargs.get('num_builds', composite['num_builds']))
        self.minimum_build = int(kwargs.get('minimum_build', composite['min_build']))
        self.exclude_builds = [int(xb) for xb in kwargs.get('exclude_builds', [])]
        try:
            self.work_dir = local(kwargs.get('work_dir', composite['work_dir']))
            self.work_dir.ensure(dir=True)
        except KeyError:
            self.work_dir = local.mkdtemp()
            print('Writing composite report to {}'.format(self.work_dir.strpath))
        self._progress = None
        self._queue = Queue()
        num_workers = 4
        for __ in xrange(num_workers):
            worker = Thread(target=_queue_worker, args=(self,))
            worker.daemon = True
            worker.start()

    @property
    def ssh_client(self):
        c = SSHClient()
        return c

    @staticmethod
    def _best_result(*results):
        # results should be a list of (result_id, result_value) tuples
        # result ranking, best to worst
        results_ranking = ('passed', 'xfailed', 'failed', 'xpassed', 'skipped', 'error')
        # Go through all the results, returning the best outcome based on results_ranking
        for result in results_ranking:
            for result_id, result_value in reversed(sorted(results, key=lambda r: r[0])):
                if result_value == result:
                    return (result_id, result_value)

    @staticmethod
    def _streak(*results):
        sorted_results = sorted(results, key=lambda r: r[0])
        # the value of the highest numbered (and therefore more recent) build
        latest_result = sorted_results[-1][1]
        streak = 0
        for __, result_value in reversed(sorted_results):
            if result_value == latest_result:
                streak += 1
            else:
                break
        return {'latest_result': latest_result, 'count': streak}

    def _progress_update(self, item, items_done):
        if self._progress is None:
            self._progress = Bar()
            self._progress.message = '%(index)d/%(max)d'
            self._progress.suffix = ''
        if item:
            items_done[item] = True
        self._progress.max = len(items_done)
        self._progress.index = len(filter(None, items_done.values()))
        with lock:
            try:
                self._progress.update()
            except ZeroDivisionError:
                pass

    def _progress_finish(self):
        self._progress.finish()
        self._progress = None

    def compile(self):
        return self.composite_report()

    def build_numbers(self):
        api = trackerbot.api()
        builds = trackerbot.depaginate(api,
            api.build.get(job_name=self.job_name, template=self.template)
        )
        build_numbers = []
        # XXX relying on trackerbot giving us the most recent builds first, should be explicit
        for build in builds.get('objects', []):
            if (build['number'] not in self.exclude_builds and
                    build['number'] >= self.minimum_build):
                build_numbers.append(build['number'])
                if self.num_builds and len(build_numbers) == self.num_builds:
                    break
        if build_numbers:
            print('Pulling reports from builds {}'.format(
                ', '.join([str(n) for n in build_numbers])))
        return build_numbers

    def template_log_dirs(self):
        log_dir_tpl = composite['log_dir_tpl']
        log_dirs = []
        for build_number in self.build_numbers():
            log_dirs.append((build_number, log_dir_tpl.format(self.job_name, build_number)))
        return log_dirs

    def test_reports(self):
        print('Collecting test reports to determine best build nodes')
        log_dirs = self.template_log_dirs()
        reports = {}
        c = self.ssh_client
        jenkins_host = composite['jenkins_host']
        c.connect(jenkins_host, username=credentials['jenkins-result']['username'],
            password=credentials['jenkins-result']['password'],
            timeout=10,
            allow_agent=False,
            look_for_keys=False,
            gss_auth=False)
        builds_done = {}
        self._progress_update(None, builds_done)
        for build_number, log_dir in log_dirs:
            build_work_dir = local(self.work_dir.join(str(build_number)))
            build_work_dir.ensure(dir=True)
            _remote = local(log_dir).join('test-report.json').strpath
            _local = build_work_dir.join('test-report.json').strpath
            builds_done[build_number] = False
            self._progress_update(None, builds_done)
            self._queue.put((_remote, _local, build_number, builds_done))
        self._queue.join()
        self._progress_finish()
        for build_number, __ in log_dirs:
            build_work_dir = local(self.work_dir.join(str(build_number)))
            for path in build_work_dir.visit('*/test-report.json'):
                try:
                    report = json.load(path.open())
                    reports[build_number] = report
                except:
                    # invalid json, skip this report
                    pass
        return reports

    def composite_status(self, reports=None):
        jenkins_host = composite['jenkins_host']
        reports = reports or self.test_reports()
        results = {}
        # results dict structure:
        # {
        #   nodeid: {
        #     'build_results': {build_id_1: build_id_1_result, build_id_2: ...}
        #     'best_result': (best_build_id, best_build_result)
        #     'result_url': http://jenkins/path/to/build
        #     'streak': (latest_build_result, number_of_results_in_a_row)
        #   },
        #   nodeid: {
        #     ...
        #   }
        # }
        for build_number, report in reports:
            for nodeid, nodedata in report.get('tests', {}).items():
                try:
                    # Try to pull the build statuses, skip the node if we can't
                    node_results_temp = nodedata['statuses']['overall']
                    node_results = results.setdefault(nodeid, {'build_results': {}})
                    node_results['build_results'][build_number] = node_results_temp
                except KeyError:
                    continue
        for nodeid, nodedata in results.items():
            node_results = nodedata['build_results'].items()
            nodedata['best_result'] = self._best_result(*node_results)
            nodedata['result_url'] = 'https://{}/job/{}/{}/'.format(
                jenkins_host, self.job_name, nodedata['best_result'][0]
            )
            nodedata['streak'] = self._streak(*node_results)
            test_counts[nodedata['best_result'][1]] += 1
        return results

    def composite_report(self):
        reports = self.test_reports()
        composite_status = self.composite_status(reports.iteritems())
        composite_report = {
            'test_counts': test_counts,
            'tests': OrderedDict()
        }

        print('Collecting artifacts from best build nodes')
        # tracking dict for file pull progress
        remotes_done = {}
        self._progress_update(None, remotes_done)
        for nodeid, nodedata in sorted(composite_status.items(),
                key=lambda s: s[1]['streak']['count'], reverse=True):
            best_build_number = nodedata['best_result'][0]
            best_build_test = reports[best_build_number]['tests'][nodeid]
            composite_report['tests'][nodeid] = best_build_test
            composite_report['tests'][nodeid]['composite'] = nodedata
            reports[best_build_number]['tests'][nodeid]['files'] = []
        # wait for all the files to arrive before building the report
        self._queue.join()
        self._progress_finish()
        json.dump(composite_report, self.work_dir.join('composite-report.json').open('w'),
            indent=1)
        try:
            passing_percent = (100. * (test_counts['passed'] + test_counts['skipped'] +
                test_counts['xfailed'])) / sum(test_counts.values())
            print('Passing percent:', passing_percent)
            # XXX: Terrible artifactor spoofing happens here.
            print('Running artifactor reports')
            r = reporter.ReporterBase()
            reports_done = {'composite': False, 'provider': False}
            self._progress_update(None, reports_done)
            r._run_report(composite_report['tests'], self.work_dir.strpath)
            self._progress_update('composite', reports_done)
            r._run_provider_report(composite_report['tests'], self.work_dir.strpath)
            self._progress_update('provider', reports_done)
            self._progress_finish()
        except ZeroDivisionError:
            print('No tests collected from test reports (?!)')
        return composite_report

    def _translate_artifacts_path(self, artifact_path, build_number):
        preamble = composite['preamble'].format(self.job_name)
        replacement = composite['replacement'].format(self.job_name, build_number)
        artifact_remote = artifact_path.replace(preamble, replacement)
        artifact_local = self.work_dir.join(str(build_number), artifact_path[len(preamble):])
        try:
            assert artifact_remote.startswith(composite['remote_sw'])
            assert artifact_local.strpath.startswith(self.work_dir.strpath)
        except AssertionError:
            print('wat?')
            print('path', artifact_path)
            print('remote', artifact_remote)
            print('local', artifact_local.strpath)
        return artifact_remote, artifact_local.strpath
Exemplo n.º 34
0
class Converter:

    def __init__(self, color_count_method=None):
        if color_count_method:
            self.color_count_method = color_count_method
        else:
            self.color_count_method = self.color_count_all

        self.__img = None
        self.__output_img = None
        self.__progress = 0
        self.__progress_bar = None

    def set_image(self, img: np.ndarray):
        self.__img = img.copy()

    def quadify_image(self, max_colors):
        self.__progress_bar = IncrementalBar('Render Progress', suffix='%(percent)d%%')

        self.__output_img = self.__img.copy()
        width, height, *_ = self.__output_img.shape
        self._quad(0, 0, width, height, max_colors, 0)

        self.__progress_bar.finish()
        return self.__output_img

    def _update_progress(self, new_progress):
        temp = self.__progress
        self.__progress += new_progress
        self.__progress_bar.goto(self.__progress * 100)
        if temp // 0.05 < self.__progress // 0.05:
            print('#', end='')

    def _quad(self, x, y, nx, ny, max_colors, depth):

        width = nx - x
        height = ny - y

        num_of_colors = self.color_count_method(self.__output_img, x, y, nx, ny, max_colors)

        if num_of_colors <= max_colors:
            # pixel_to_color_ratio = width * height / num_of_colors
            self.__output_img[x: nx, y: ny, :] = np.mean(self.__output_img[x: nx, y: ny, :],
                                                         axis=(0, 1))  # * pixel_to_color_ratio
            self._update_progress(0.25 ** depth)
        else:
            mx, my = width // 2 + x, height // 2 + y
            self._quad(x, y, mx, my, max_colors, depth + 1)
            self._quad(mx, y, nx, my, max_colors, depth + 1)
            self._quad(x, my, mx, ny, max_colors, depth + 1)
            self._quad(mx, my, nx, ny, max_colors, depth + 1)

    @staticmethod
    def color_count_all(img, x, y, nx, ny, max_colors):
        colors = set()
        for i, j in iterate_cartesian(range(x, nx), range(y, ny)):
            colors.add(str(img[i, j, :]))
            if len(colors) > max_colors:
                return len(colors)

        return len(colors)

    @staticmethod
    def color_count_differing(img, x, y, nx, ny, max_colors):
        colors = []
        for i, j in iterate_in_steps(x, y, nx, ny, step=(ny - y) // 8):
            pixel_color = img[i, j, :]
            for color in colors:
                if ((color - pixel_color) ** 2).sum() < 256:
                    break
            else:
                colors.append(pixel_color)
                if len(colors) > max_colors:
                    return len(colors)
        return len(colors)
Exemplo n.º 35
0
class World:
    class WorldMetric:
        def __init__(self):
            self.lost_demand = []
            self.average_deviation_ideal_state = []
            self.deficient_battery = []
            self.time = []

        def add_analysis_metrics(self, world):
            """
            Add data to analysis
            :param world: world object to record state from
            """
            self.lost_demand.append(
                sum([1 for reward in world.rewards if reward == LOST_TRIP_REWARD])
                if len(world.rewards) > 0
                else 0
            )
            self.average_deviation_ideal_state.append(
                sum(
                    [
                        abs(
                            (sum([1 for _ in cluster.get_available_scooters()]))
                            - cluster.ideal_state
                        )
                        for cluster in world.state.clusters
                    ]
                )
                / len(world.state.clusters)
            )
            self.deficient_battery.append(
                sum(
                    [
                        cluster.ideal_state * 100
                        - (
                            sum(
                                [
                                    scooter.battery
                                    for scooter in cluster.get_available_scooters()
                                ]
                            )
                        )
                        for cluster in world.state.clusters
                        if len(cluster.scooters) < cluster.ideal_state
                    ]
                )
            )
            self.time.append(world.time)

        def get_lost_demand(self):
            """
            Returns list of all lost demand
            """
            return self.lost_demand

        def get_deviation_ideal_state(self):
            """
            Returns list of average deviation from ideal state during the time analysed
            """
            return self.average_deviation_ideal_state

        def get_deficient_battery(self):
            """
            Returns list of total deficient battery in the system during the analysed time
            """
            return self.deficient_battery

        def get_time_array(self):
            """
            Returns a list of all timestamps when when data used for analysis is recorded
            """
            return self.time

        def get_all_metrics(self):
            """
            Returns all metrics recorded for analysis
            """
            return (
                self.lost_demand,
                self.average_deviation_ideal_state,
                self.deficient_battery,
            )

    def __init__(
        self,
        shift_duration: int,
        sample_size=100,
        number_of_clusters=20,
        initial_state=None,
        policy="RandomRolloutPolicy",
        initial_location_depot=True,
        verbose=False,
    ):
        self.shift_duration = shift_duration
        if initial_state:
            self.state = initial_state
        else:
            self.state = clustering_scripts.get_initial_state(
                sample_size=sample_size,
                number_of_clusters=number_of_clusters,
                initial_location_depot=initial_location_depot,
            )
        self.stack = []
        self.time = 0
        self.rewards = []
        self.cluster_flow = {
            (start, end): 0
            for start in np.arange(len(self.state.clusters))
            for end in np.arange(len(self.state.clusters))
            if start != end
        }
        self.policy = get_policy(policy)
        self.metrics = World.WorldMetric()
        self.verbose = verbose
        if verbose:
            self.progress_bar = IncrementalBar(
                "Running World",
                check_tty=False,
                max=round(shift_duration / ITERATION_LENGTH_MINUTES) + 1,
                color=WHITE,
                suffix="%(percent)d%% - ETA %(eta)ds",
            )

    def run(self):
        while self.time < self.shift_duration:
            event = self.stack.pop(0)
            event.perform(self)
            if isinstance(event, classes.GenerateScooterTrips) and self.verbose:
                self.progress_bar.next()
        if self.verbose:
            self.progress_bar.finish()

    def get_remaining_time(self) -> int:
        """
        Computes the remaining time by taking the difference between the shift duration
        and the current time of the world object.
        :return: the remaining time as a float
        """
        return self.shift_duration - self.time

    def add_reward(self, reward: float, discount=False) -> None:
        """
        Adds the input reward to the rewards list of the world object
        :param discount: boolean if the reward is to be discounted
        :param reward: reward given
        """
        self.rewards.append(reward * self.get_discount() if discount else reward)

    def get_total_reward(self) -> float:
        """
        Get total accumulated reward at current point of time
        :return:
        """
        return sum(self.rewards)

    def add_event(self, event) -> None:
        """
        Adds event to the sorted stack.
        Avoids calling sort on every iteration by using the bisect package
        :param event: event to insert
        """
        insert_index = bisect.bisect([event.time for event in self.stack], event.time)
        self.stack.insert(insert_index, event)

    def add_trip_to_flow(self, start: int, end: int) -> None:
        """
        Adds a trip from start to end for cluster flow
        :param start: departure cluster
        :param end: arrival cluster
        """
        self.cluster_flow[(start, end)] += 1

    def get_cluster_flow(self) -> [(int, int, int)]:
        """
        Get all flows between cluster since last vehicle arrival
        :return: list: tuple (start, end, flow) flow from departure cluster to arrival cluster
        """
        return [(start, end, flow) for (start, end), flow in self.cluster_flow.items()]

    def clear_flow_dict(self) -> None:
        """
        Clears the cluster flow dict
        """
        for key in self.cluster_flow.keys():
            self.cluster_flow[key] = 0

    def get_scooters_on_trip(self) -> [(int, int, int)]:
        """
        Get all scooters that are currently out on a trip
        :return: list of all scooters that are out on a trip
        """
        return [
            (event.departure_cluster_id, event.arrival_cluster_id, event.scooter.id)
            for event in self.stack
            if isinstance(event, classes.ScooterArrival)
        ]

    def get_discount(self):
        # Divide by 60 as there is 60 minutes in an hour. We want this number in hours to avoid big numbers is the power
        return DISCOUNT_RATE ** (self.time / 60)
Exemplo n.º 36
0
def downloadImage(image, iteration, outage=False, oNr=0, oTime=0):

    doc = open(
        './measurements/%s/%s/results/setup.txt' %
        (currentInstance, currentTest), 'w+')
    doc.write(
        'Server:%s\nHosts:%s\nSeeders:%s\nImage:%s\nServer outage:%s\nOutage number:%s\nOutage start:%s'
        % (str(len(set.servers)), str(len(set.name)), str(len(
            set.seeder)), image, outage, oNr, oTime))
    doc.close()

    image = image.strip()
    for node in set.name:
        subprocess.call(['docker exec mn.%s sh -c "rm -rf times/*"' % (node)],
                        stdout=FNULL,
                        stderr=subprocess.STDOUT,
                        shell=True)
        subprocess.call(['docker exec mn.%s sh -c "mkdir times/"' % (node)],
                        stdout=FNULL,
                        stderr=subprocess.STDOUT,
                        shell=True)

    for i in range(int(iteration)):
        print('\n###\nTest #%s\n###' % (i + 1))
        print datetime.now()
        image = image.strip()

        #prepare downloads
        subprocess.call([
            'mkdir measurements/%s/%s/%s/' % (currentInstance, currentTest, i)
        ],
                        stdout=FNULL,
                        stderr=subprocess.STDOUT,
                        shell=True)
        subprocess.call([
            'mkdir measurements/%s/%s/%s/time/' %
            (currentInstance, currentTest, i)
        ],
                        stdout=FNULL,
                        stderr=subprocess.STDOUT,
                        shell=True)
        subprocess.call([
            'mkdir measurements/%s/%s/%s/traffic/' %
            (currentInstance, currentTest, i)
        ],
                        stdout=FNULL,
                        stderr=subprocess.STDOUT,
                        shell=True)

        #deleting and restarting
        deleted = [False] * len(set.name)
        restarted = [False] * len(set.name)
        sum = 0
        print 'Deleting images and restarting container'
        bar_restart = IncrementalBar('Finished cleanup(s)', max=len(set.name))
        for node in set.name:
            subprocess.call([
                'docker exec -it mn.%s docker image rm -f %s' % (node, image)
            ],
                            stdout=FNULL,
                            stderr=subprocess.STDOUT,
                            shell=True)
            if node in set.servers:
                subprocess.call([
                    'docker exec -it mn.%s sh -c "(docker stop dfclient supernode && docker rm dfclient supernode)"&'
                    % (node)
                ],
                                stdout=FNULL,
                                stderr=subprocess.STDOUT,
                                shell=True)
            else:
                subprocess.call([
                    'docker exec -it mn.%s sh -c "(docker stop dfclient && docker rm dfclient)"&'
                    % (node)
                ],
                                stdout=FNULL,
                                stderr=subprocess.STDOUT,
                                shell=True)
            subprocess.call(
                ['docker exec -it mn.%s sh -c "iptables -Z"' % (node)],
                stdout=FNULL,
                stderr=subprocess.STDOUT,
                shell=True)
            subprocess.call([
                "docker exec mn.%s sh -c 'rm -f root/.small-dragonfly/logs/dfdaemon.log'"
                % node
            ],
                            stdout=FNULL,
                            stderr=subprocess.STDOUT,
                            shell=True)  #root/.small-dragonfly/logs/*
            subprocess.call([
                "docker exec mn.%s sh -c 'rm -f root/.small-dragonfly/logs/dfclient.log'"
                % node
            ],
                            stdout=FNULL,
                            stderr=subprocess.STDOUT,
                            shell=True)  #root/.small-dragonfly/logs/*
            subprocess.call([
                "docker exec mn.%s sh -c 'rm -f root/.small-dragonfly/logs/dfserver.log'"
                % node
            ],
                            stdout=FNULL,
                            stderr=subprocess.STDOUT,
                            shell=True)  #root/.small-dragonfly/logs/*
            subprocess.call([
                "docker exec mn.%s sh -c 'rm -rf root/.small-dragonfly/data/*'"
                % node
            ],
                            stdout=FNULL,
                            stderr=subprocess.STDOUT,
                            shell=True)  #root/.small-dragonfly/logs/*
            subprocess.call([
                "docker exec mn.%s sh -c 'rm -rf root/.small-dragonfly/meta/*'"
                % node
            ],
                            stdout=FNULL,
                            stderr=subprocess.STDOUT,
                            shell=True)  #root/.small-dragonfly/logs/*
            subprocess.call([
                "docker exec mn.%s sh -c 'rm -rf root/.small-dragonfly/dfdaemon/data/*'"
                % node
            ],
                            stdout=FNULL,
                            stderr=subprocess.STDOUT,
                            shell=True)  #root/.small-dragonfly/logs/*

        while sum < len(set.name):
            for node in set.name:
                if 'localhost:16000/%s' % image in subprocess.check_output(
                    ['docker exec mn.%s docker image ls' % node], shell=True):
                    subprocess.call([
                        'docker exec mn.%s docker image rm -f %s' %
                        (node, image)
                    ],
                                    stdout=FNULL,
                                    stderr=subprocess.STDOUT,
                                    shell=True)
                if deleted[set.name.index(node)] == False:  #delete
                    if node in set.servers:
                        if not ('dfclient'
                                and 'supernode') in subprocess.check_output(
                                    ['docker exec mn.%s docker ps' % node],
                                    shell=True):
                            deleted[set.name.index(node)] = True
                        else:
                            if not (
                                    'docker rm' or 'docker stop'
                            ) in subprocess.check_output(
                                ['docker exec mn.%s sh -c "ps -a"' % node],
                                    shell=True):
                                subprocess.call([
                                    'docker exec mn.%s sh -c "(docker stop dfclient supernode && docker rm dfclient supernode )"&'
                                    % (node)
                                ],
                                                stdout=FNULL,
                                                stderr=subprocess.STDOUT,
                                                shell=True)
                    else:
                        if not 'dfclient' in subprocess.check_output(
                            ['docker exec mn.%s docker ps' % node],
                                shell=True):
                            deleted[set.name.index(node)] = True
                        else:
                            if not (
                                    'docker rm' or 'docker stop'
                            ) in subprocess.check_output(
                                ['docker exec mn.%s sh -c "ps -a"' % node],
                                    shell=True):
                                subprocess.call([
                                    'docker exec mn.%s sh -c "(docker stop dfclient && docker rm dfclient)"&'
                                    % node
                                ],
                                                stdout=FNULL,
                                                stderr=subprocess.STDOUT,
                                                shell=True)
                else:
                    if restarted[set.name.index(node)] == False:
                        if node in set.servers:
                            if ('dfclient' and
                                    'supernode') in subprocess.check_output(
                                        ['docker exec mn.%s docker ps' % node],
                                        shell=True):
                                sum = sum + 1
                                restarted[set.name.index(node)] = True
                                bar_restart.next()
                            else:
                                if not ('compose') in subprocess.check_output(
                                    ['docker exec mn.%s sh -c "ps -a"' % node],
                                        shell=True):
                                    subprocess.call([
                                        'docker exec mn.%s sh -c "(export IP=%s && docker-compose -f stack_server.yml up -d)"&'
                                        % (node, set.ip[set.name.index(node)])
                                    ],
                                                    stdout=FNULL,
                                                    stderr=subprocess.STDOUT,
                                                    shell=True)
                                    #print node
                        else:
                            if 'dfclient' in subprocess.check_output(
                                ['docker exec mn.%s docker ps' % node],
                                    shell=True):
                                sum = sum + 1
                                restarted[set.name.index(node)] = True
                                bar_restart.next()
                            else:
                                if not ('compose') in subprocess.check_output(
                                    ['docker exec mn.%s sh -c "ps -a"' % node],
                                        shell=True):
                                    subprocess.call([
                                        'docker exec mn.%s sh -c "(export IP=%s && docker-compose -f stack_client.yml up -d)"&'
                                        % (node, set.ip[set.name.index(node)])
                                    ],
                                                    stdout=FNULL,
                                                    stderr=subprocess.STDOUT,
                                                    shell=True)
            time.sleep(5)
        print ''
        check.check()
        while check.repeat == True:
            check.check()
        bar_restart.finish()

        #prepare seeder
        print('Preparing seeder(s)')
        for node in set.seeder:
            subprocess.call(
                ['docker exec mn.%s docker pull %s' % (node, image)],
                stdout=FNULL,
                stderr=subprocess.STDOUT,
                shell=True)
            subprocess.call(
                ['docker exec -it mn.%s sh -c "iptables -Z"' % (node)],
                stdout=FNULL,
                stderr=subprocess.STDOUT,
                shell=True)

        #start download
        sum = 0
        complete = [False] * len(set.name)
        print('Starting download(s)')
        iStart = datetime.now()
        print iStart
        bar_download = IncrementalBar('Waiting for download(s)',
                                      max=len(set.name))
        for node in set.name:
            if not node in set.seeder:
                subprocess.call([
                    'docker exec mn.%s sh -c "(date +"%%Y-%%m-%%dT%%T.%%6N" > times/%s_%s_start.txt && docker pull %s && date +"%%Y-%%m-%%dT%%T.%%6N" > times/%s_%s_end.txt)"&'
                    % (node, node, i, image, node, i)
                ],
                                stdout=FNULL,
                                stderr=subprocess.STDOUT,
                                shell=True)
            else:
                complete[set.name.index(node)] = True
                bar_download.next()
                sum = sum + 1
                iPrev = datetime.now()

        #server outage
        if outage == True:
            print('\nWaiting %s seconds for outage...' % oTime)
            time.sleep(int(oTime))
            for j in range(1, int(oNr) + 1):
                print set.servers[j]
                subprocess.call([
                    'docker exec mn.%s docker stop supernode &' %
                    (set.servers[-j])
                ],
                                stdout=FNULL,
                                stderr=subprocess.STDOUT,
                                shell=True)

        #check download
        while sum < len(set.name):
            for node in set.name:
                if complete[set.name.index(node)] == False:
                    if image in subprocess.check_output(
                        ['docker exec mn.%s docker image ls' % node],
                            shell=True):
                        #print ('Docker pull successful for mn.%s' % node) #remove first comment for info on successful pull
                        sum = sum + 1
                        complete[set.name.index(node)] = True
                        bar_download.next()
                    else:
                        if not image in subprocess.check_output(
                            ['docker exec mn.%s sh -c "ps -a"' % node],
                                shell=True):
                            subprocess.call([
                                'docker exec mn.%s sh -c "(docker pull %s && date +"%%Y-%%m-%%dT%%T.%%6N" > times/%s_%s_end.txt)"&'
                                % (node, image, node, i)
                            ],
                                            stdout=FNULL,
                                            stderr=subprocess.STDOUT,
                                            shell=True)
                            #print ('Docker pull restarted for mn.%s' % node) #remove first comment for info on failed pull
            time.sleep(1)
        bar_download.finish()
        print 'Download(s) successful'

        print 'Grabbing data after download(s)'
        for node in set.name:
            subprocess.call([
                'docker cp mn.%s:times/%s_%s_start.txt measurements/%s/%s/%s/time/%s_start.txt'
                % (node, node, i, currentInstance, currentTest, i, node)
            ],
                            stdout=FNULL,
                            stderr=subprocess.STDOUT,
                            shell=True)
            subprocess.call([
                'docker cp mn.%s:times/%s_%s_end.txt measurements/%s/%s/%s/time/%s_end.txt'
                % (node, node, i, currentInstance, currentTest, i, node)
            ],
                            stdout=FNULL,
                            stderr=subprocess.STDOUT,
                            shell=True)
            subprocess.call([
                "docker exec mn.%s sh -c 'iptables -L INPUT -n -v -x > tmp_IN.txt'"
                % node
            ],
                            stdout=FNULL,
                            stderr=subprocess.STDOUT,
                            shell=True)
            subprocess.call([
                'docker cp mn.%s:tmp_IN.txt measurements/%s/%s/%s/traffic/%s_IN.txt'
                % (node, currentInstance, currentTest, i, node)
            ],
                            stdout=FNULL,
                            stderr=subprocess.STDOUT,
                            shell=True)
            subprocess.call([
                "docker exec mn.%s sh -c 'iptables -L OUTPUT -n -v -x > tmp_OUT.txt'"
                % node
            ],
                            stdout=FNULL,
                            stderr=subprocess.STDOUT,
                            shell=True)
            subprocess.call([
                'docker cp mn.%s:tmp_OUT.txt measurements/%s/%s/%s/traffic/%s_OUT.txt'
                % (node, currentInstance, currentTest, i, node)
            ],
                            stdout=FNULL,
                            stderr=subprocess.STDOUT,
                            shell=True)

    set.measureTime(False, currentInstance, currentTest, iteration)
    set.measureTraffic(False, currentInstance, currentTest, iteration)
Exemplo n.º 37
0
class SysExParser(object):
    def __init__(self,send_func,debug=False):
        super(SysExParser,self).__init__()
        self.send_func  = send_func
        self.debug      = debug
        self.dump_file  = None
        self.dump_on    = False
        self.dump_ram   = False
        self.printer    = MessagePrinter(debug=self.debug)
        self.handlers   = {
            # FILE FUNCTIONS  FILE_F
            "F_DHDR":      self.handleFileDumpHeader,
            "F_DPKT":      self.handleFileDumpDataBlock,
            "DIR_HDR":     self.handleFileDumpHeader,
            "F_WAIT"     : noop,
            "F_CANCEL"   : cancel,
            "F_ERR"      : cancel,
            # DEVICE COMMAND  DEVICE_CMD
            "STAT_ANSWER": self.handleStatusAnswer,
            "DATA_HEADER": self.handleDirectoryAnswer,
            "DATA_DUMP"  : self.handleDataDump,
            "DIR_ANSWER" : self.handleDirectoryAnswer,
            "D_WAIT"     : noop,
            "D_ACK"      : noop,
            "D_CANCEL"   : cancel,
            "D_ERR"      : cancel,
        }

        self.dump_start = [ "F_DREQ", "DATA_REQUEST" ]
        self.dump_stop  = [ "F_CANCEL", "D_CANCEL"]

    def __del__(self):
        self.closeDumpFile()

    def createDumpFile(self,filename=None):
        if not filename:
            timestamp = time.strftime("%Y%m%d%H%M%S")
            filename="dump_%s.bin" % mktimestamp()
        self.dump_file = open(filename,"wb")
        
    def closeDumpFile(self):
        if not self.dump_file: return
        self.dump_file.close()
        self.dump_file = None

    def startDump(self,filename,size):
        if not self.dump_on: return
        self.dump_written = 0
        self.dump_size = size
        self.closeDumpFile()
        self.createDumpFile(filename)
        print "Dumping '%s'" % filename
        showsize = ' 0x%(index)06x' if self.dump_ram else ''
        self.bar = IncrementalBar(
            max=size,
            suffix = '%(percent)d%% [%(elapsed_td)s / %(eta_td)s]' + showsize)

    def stopDump(self):
        if not self.dump_on: return
        self.bar.finish()
        self.closeDumpFile()
        self.dump_on = False
        
    def dump(self,data,filename=None):
        if not self.dump_on: return
        if not self.dump_file:
            self.createDumpFile()
        if self.dump_written == self.dump_size:
            print "Discarding", len(data), "bytes, dump has ended"
        elif len(data) + self.dump_written > self.dump_size:
            discard = len(data) + self.dump_written - self.dump_size
            self.dump_file.write(bytearray(data[:-discard]))
            self.bar.next(self.dump_size-self.dump_written)
            self.dump_written = self.dump_size
            self.bar.finish()
            leftover = data[-discard:]
            for i in leftover:
                if i != 0:
                    print "Discarding non-NUL data:", hexdump(leftover)
                    break
        else:
            self.dump_file.write(bytearray(data))
            self.dump_written += len(data)
            self.bar.next(len(data))
        
    # FILE FUNCTIONS  FILE_F
    def handleFileDumpHeader(self,msg,timestamp):
        self.sendSysEx( MSCEIMessage(fromName="F_WAIT"),timestamp=timestamp+1)
        offset=17
        data = []
        for i in xrange(2):
            data += conv7_8(msg[offset:offset+8])
            offset += 8
        location = ''
        while msg[offset] != 0:
            location += chr(msg[offset])
            offset += 1
        offset+=1
        cc = msg[offset]
        cc_calc = checksum(msg[1:offset])
        if cc == cc_calc:
            filename = str(bytearray(msg[5:16])).strip()
            length = struct.unpack('>I',list2str(data[4:8]))[0]
            self.startDump(filename,length)
            self.dump(data[8:])
            self.sendSysEx( MSCEIMessage(fromName="F_ACK"),
                            timestamp=timestamp+2)
        else:
            self.sendSysEx( MSCEIMessage(fromName="F_NACK"),
                            timestamp=timestamp+2)
        return True
        
    def handleFileDumpDataBlock(self,msg,timestamp):
        self.sendSysEx( MSCEIMessage(fromName="F_WAIT"),timestamp=timestamp+1)
        noctets = msg[5]
        offset=6
        data = []
        for i in xrange(noctets):
            data += conv7_8(msg[offset:offset+8])
            offset += 8
        cc = msg[offset]
        cc_calc = checksum(msg[1:offset])
        if cc == cc_calc:
            self.dump(data)
            self.sendSysEx( MSCEIMessage(fromName="F_ACK"),
                            timestamp=timestamp+2)
        else:
            self.sendSysEx( MSCEIMessage(fromName="F_NACK"),
                            timestamp=timestamp+2)
        return True

    # DEVICE COMMAND  DEVICE_CMD
    def handleStatusAnswer(self,msg,timestamp):
        self.sendSysEx( MSCEIMessage(fromName="D_WAIT"),timestamp=timestamp+1)
        offset= 5 + 3*8
        cc = msg[offset]
        cc_calc = checksum(msg[1:offset])
        if cc == cc_calc:
            self.sendSysEx( MSCEIMessage(fromName="D_ACK"),
                            timestamp=timestamp+2)
            if self.dump_ram:
                self.dump_on = True
                self.startDump("ramdump_%s.bin" % mktimestamp(), 2097060)
                time.sleep(0.1)
                self.sendSysEx( MSCEIMessage(fromName="F_ACK"),
                                timestamp=timestamp+3)
                return True
        else:
            self.sendSysEx( MSCEIMessage(fromName="D_NACK"),
                            timestamp=timestamp+2)
        return False

    def handleDataDump(self,msg,timestamp):
        self.sendSysEx( MSCEIMessage(fromName="D_WAIT"))
        noctets = msg[5]
        offset=6
        data = []
        for i in xrange(noctets):
            data += conv7_8(msg[offset:offset+8])
            offset += 8
        cc = msg[offset]
        cc_calc = checksum(msg[1:offset])
        if cc == cc_calc:
            self.dump(data)
            self.sendSysEx( MSCEIMessage(fromName="D_ACK"),
                            timestamp=timestamp+2)
        else:
            self.sendSysEx( MSCEIMessage(fromName="D_NACK"),
                            timestamp=timestamp+2)
        return True

    def handleDirectoryAnswer(self,msg,timestamp):
        #time.sleep(0.1)
        self.sendSysEx( MSCEIMessage(fromName="D_WAIT"),timestamp=timestamp+1)
        offset = 8 + 11 + 1
        data = []
        for i in xrange(2):
            data += conv7_8(msg[offset:offset+8])
            offset += 8
        offset += 11
        cc = msg[offset]
        cc_calc = checksum(msg[1:offset])
        if cc == cc_calc:
            filename = str(bytearray(msg[8:19])).strip()
            length = struct.unpack('>I',list2str(data[4:8]))[0]
            self.startDump(filename,length)
            #time.sleep(0.1)
            self.sendSysEx( MSCEIMessage(fromName="D_ACK"),
                            timestamp=timestamp+2)
        else:
            self.sendSysEx( MSCEIMessage(fromName="D_NACK"),
                            timestamp=timestamp+2)
        return True
        
    def parse(self, msg, timestamp, acceptUnhandled=True):
        if msg[0] != 0xF0:
            print 'Non-sysex message'
            print [ hex(b) for b in msg ]
            print
            return acceptUnhandled
Exemplo n.º 38
0
    def render_all(self, cat_ids=None, verbose=True, blender_path='blender'):
        import subprocess
        from progress.bar import IncrementalBar
        import tempfile
        from .path import renderings_format
        from ..objs import try_extract_models
        for cat_id in cat_ids:
            try_extract_models(cat_id)
        _FNULL = open(os.devnull, 'w')
        call_kwargs = dict()
        if not verbose:
            call_kwargs['stdout'] = _FNULL
            call_kwargs['stderr'] = subprocess.STDOUT

        root_dir = os.path.realpath(os.path.dirname(__file__))
        script_path = os.path.join(root_dir, 'scripts', 'blender_render.py')

        render_params_path = None
        camera_positions_path = None

        def clean_up():
            for path in (render_params_path, camera_positions_path):
                if path is not None and os.path.isfile(path):
                    os.remove(path)

        render_params_fp, render_params_path = tempfile.mkstemp(suffix='.json')
        try:
            view_params = self.get_view_params()
            view_params.update(**self.get_image_params())
            os.write(render_params_fp, json.dumps(view_params))
            os.close(render_params_fp)

            args = [
                blender_path, '--background', '--python', script_path, '--',
                '--render_params', render_params_path
            ]

            keys = tuple(self.needs_rendering_keys(cat_ids))
            n = len(keys)
            if n == 0:
                print('No keys to render.')
                return
            print('Rendering %d examples' % n)
            bar = IncrementalBar(max=n)
            for cat_id, example_id in keys:
                bar.next()

                camera_positions_fp, camera_positions_path = tempfile.mkstemp(
                    suffix='.npy')
                os.close(camera_positions_fp)
                np.save(
                    camera_positions_path,
                    self.view_manager.get_camera_positions(cat_id, example_id))

                out_dir = self.get_renderings_dir(cat_id, example_id)
                proc = subprocess.Popen(
                    args + [
                        '--obj',
                        self.get_obj_path(cat_id, example_id),
                        '--out_dir',
                        out_dir,
                        '--filename_format',
                        renderings_format,
                        '--camera_positions',
                        camera_positions_path,
                    ], **call_kwargs)
                try:
                    proc.wait()
                except KeyboardInterrupt:
                    proc.kill()
                    raise
                if os.path.isfile(camera_positions_path):
                    os.remove(camera_positions_path)
            bar.finish()
        except (Exception, KeyboardInterrupt):
            clean_up()
            raise
        clean_up()
Exemplo n.º 39
0
def lemmas_collocations(database, IGC_folder, prop_names):
    dci = SQLDatabase(db_name='gagnagrunnar/nmo.db')
    dim = SQLDatabase(db_name='gagnagrunnar/bin_lemmur_ordmyndir.db')
    filters = SQLDatabase(db_name='gagnagrunnar/IGC_filters.db'
                          )  # Predefined stop-word list based on the RMH
    pos_to_ignore = [
        'e', 'c', 'v', 'as', 'to', 'tp', 'ta', 'au'
    ]  # The POS tags that should not be displayed in the results
    outdict = {}

    print("""
    ============================================================
    Les skjöl úr málheildinni. 
    ============================================================
    """)
    xml_files = glob.glob(IGC_folder + '/**/*.xml', recursive=True)

    filebar = IncrementalBar('Framvinda', max=len(xml_files))
    for file in xml_files:
        colloc = []
        with open(file, 'r', encoding='utf-8') as content:
            try:
                tree = ET.parse(content)
                for word in tree.iter():
                    if word.text is not None:
                        if word.attrib.get('lemma') is not None:
                            pos = word.attrib.get('type')
                            lemma = word.attrib.get('lemma')
                            word_form = word.text
                            colloc.append((word_form, lemma, pos))
                        elif word.text in punctuation:
                            colloc.append((word.text, ' ', ' '))

                for i, w in enumerate(colloc):
                    if prop_names == False:
                        if w[2].startswith('n') and w[2].endswith(
                                's'):  # Ignore proper names
                            continue
                    if w[2] in pos_to_ignore:
                        continue
                    if w[1][-1] == '-':  # if a word starts or ends in an hyphen, ignore it (likely OCR error)
                        continue
                    if w[1][0] == '-':
                        continue
                    if (
                            not all(i.isalpha() or i == '-' for i in w[1])
                    ):  # if a word contains anything but an alphabetic letter or hyphen, ignore it
                        continue
                    filter_query = SQLiteQuery(w[1],
                                               'filter',
                                               'FILTER_WORD_FORMS',
                                               cursor=filters.cursor)
                    if filter_query.exists:
                        continue
                    else:
                        if database == 'NMO':
                            query = SQLiteQuery(w[1],
                                                'lemma',
                                                'DCI_ELEMENT',
                                                cursor=dci.cursor
                                                )  # Capitalized words included
                            query_lower = SQLiteQuery(w[1].lower(),
                                                      'lemma',
                                                      'DCI_ELEMENT',
                                                      cursor=dci.cursor)
                        elif database == 'BIN':
                            query = SQLiteQuery(w[1],
                                                'lemma',
                                                'DIM_ELEMENT',
                                                cursor=dim.cursor
                                                )  # Capitalized words included
                            query_lower = SQLiteQuery(w[1].lower(),
                                                      'lemma',
                                                      'DIM_ELEMENT',
                                                      cursor=dim.cursor)
                        if not query.exists and not query_lower.exists:  # If the word is not found in the database nor the filters
                            if len(w[1]) > 1:
                                if i - 2 < 0:  # collects 2 words before and after the candidate
                                    w1 = ""
                                else:
                                    w1 = str(colloc[i - 2][0])
                                if i - 1 < 0:
                                    w2 = ""
                                else:
                                    w2 = str(colloc[i - 1][0])
                                if i + 1 > len(colloc) - 1:
                                    w4 = ""
                                else:
                                    w4 = str(colloc[i + 1][0])
                                if i + 2 > len(colloc) - 1:
                                    w5 = ""
                                else:
                                    w5 = str(colloc[i + 2][0])
                                if w[1] in outdict:
                                    if str(w1 + ' ' + w2 + ' ' + w[0] + ' ' +
                                           w4 + ' ' + w5) not in outdict[
                                               w[1]]['orðstaða']:
                                        outdict[w[1]]['orðstaða'][
                                            str(w1 + ' ' + w2 + ' ' + w[0] +
                                                ' ' + w4 + ' ' + w5)] = 1
                                    else:
                                        outdict[w[1]]['orðstaða'][
                                            str(w1 + ' ' + w2 + ' ' + w[0] +
                                                ' ' + w4 + ' ' + w5)] += 1
                                    outdict[w[1]]['tíðni'] += 1
                                else:
                                    outdict[w[1]] = {}
                                    outdict[w[1]]['tíðni'] = 1
                                    outdict[w[1]]['orðstaða'] = {
                                        str(w1 + ' ' + w2 + ' ' + w[0] + ' ' + w4 + ' ' + w5):
                                        1
                                    }
            except sqlite3.OperationalError:
                pass
        filebar.next()
        sys.stdout.flush()
    filebar.finish()

    if IGC_folder == "malheildir/RMH/":
        with open(f'uttak/{database}/RMH_lemmur_med_orstodulyklum.freq',
                  mode='w+') as outputfile:
            candidates = {
                k: v
                for k, v in sorted(outdict.items(),
                                   key=lambda item: item[1]['tíðni'],
                                   reverse=True)
            }  # Sort the candidates by their total frequencies
            for key, item in candidates.items():
                for counter, dictitem in enumerate(item.items()):
                    if counter % 2 == 0:
                        freq = dictitem[1]
                    elif counter % 2 != 0:
                        sorted_sents = {
                            k: v
                            for k, v in sorted(
                                dictitem[1].items(
                                ),  # Sort the sentence examples by their frequencies
                                key=lambda item: item[1],
                                reverse=True)
                        }
                        if len(
                                sorted_sents
                        ) > 5:  # This limit the examples to the 5 most frequent ones, can be changed
                            sents = list(sorted_sents)[:5]
                        else:
                            sents = list(sorted_sents)
                        outputfile.write(
                            key + ' : ' + str(freq) + '. ' + str(sents) + '\n'
                        )  # word: freq. [sent example 1, sent example 2...]

        print(f"""
    ============================================================
    Úttaksskjalið RMH_lemmur_med_orstodulyklum.freq er tilbúið og 
    er að finna í undirmöppunni uttak/{database}/
    ============================================================
        """)

    elif IGC_folder == "malheildir/RMH/CC_BY/":
        with open(f'uttak/{database}/CC_BY_lemmur_med_orstodulyklum.freq',
                  mode='w+') as outputfile:
            candidates = {
                k: v
                for k, v in sorted(outdict.items(),
                                   key=lambda item: item[1]['tíðni'],
                                   reverse=True)
            }  # Sort the candidates by their total frequencies
            for key, item in candidates.items():
                for counter, dictitem in enumerate(item.items()):
                    if counter % 2 == 0:
                        freq = dictitem[1]
                    elif counter % 2 != 0:
                        sorted_sents = {
                            k: v
                            for k, v in sorted(
                                dictitem[1].items(
                                ),  # Sort the sentence examples by their frequencies
                                key=lambda item: item[1],
                                reverse=True)
                        }
                        if len(
                                sorted_sents
                        ) > 5:  # This limit the examples to the 5 most frequent ones, can be changed
                            sents = list(sorted_sents)[:5]
                        else:
                            sents = list(sorted_sents)
                        outputfile.write(
                            key + ' : ' + str(freq) + '. ' + str(sents) + '\n'
                        )  # word: freq. [sent example 1, sent example 2...]

        print(f"""
    ============================================================
    Úttaksskjalið CC_BY_lemmur_med_orstodulyklum.freq er tilbúið og 
    er að finna í undirmöppunni uttak/{database}/
    ============================================================
        """)
    elif IGC_folder == "malheildir/RMH/MIM/":
        with open(f'uttak/{database}/MIM_lemmur_med_orstodulyklum.freq',
                  mode='w+') as outputfile:
            candidates = {
                k: v
                for k, v in sorted(outdict.items(),
                                   key=lambda item: item[1]['tíðni'],
                                   reverse=True)
            }  # Sort the candidates by their total frequencies
            for key, item in candidates.items():
                for counter, dictitem in enumerate(item.items()):
                    if counter % 2 == 0:
                        freq = dictitem[1]
                    elif counter % 2 != 0:
                        sorted_sents = {
                            k: v
                            for k, v in sorted(
                                dictitem[1].items(
                                ),  # Sort the sentence examples by their frequencies
                                key=lambda item: item[1],
                                reverse=True)
                        }
                        if len(
                                sorted_sents
                        ) > 5:  # This limit the examples to the 5 most frequent ones, can be changed
                            sents = list(sorted_sents)[:5]
                        else:
                            sents = list(sorted_sents)
                        outputfile.write(
                            key + ' : ' + str(freq) + '. ' + str(sents) + '\n'
                        )  # word: freq. [sent example 1, sent example 2...]

        print(f"""
    ============================================================
    Úttaksskjalið MIM_lemmur_med_orstodulyklum.freq er tilbúið og 
    er að finna í undirmöppunni uttak/{database}/
    ============================================================
        """)

    else:
        namefolder = IGC_folder.split("/")[3]
        with open(f'uttak/{database}/' + namefolder +
                  '_lemmur_med_orstodulyklum.freq',
                  mode='w+') as outputfile:
            candidates = {
                k: v
                for k, v in sorted(outdict.items(),
                                   key=lambda item: item[1]['tíðni'],
                                   reverse=True)
            }  # Sort the candidates by their total frequencies
            for key, item in candidates.items():
                for counter, dictitem in enumerate(item.items()):
                    if counter % 2 == 0:
                        freq = dictitem[1]
                    elif counter % 2 != 0:
                        sorted_sents = {
                            k: v
                            for k, v in sorted(
                                dictitem[1].items(
                                ),  # Sort the sentence examples by their frequencies
                                key=lambda item: item[1],
                                reverse=True)
                        }
                        if len(
                                sorted_sents
                        ) > 5:  # This limit the examples to the 5 most frequent ones, can be changed
                            sents = list(sorted_sents)[:5]
                        else:
                            sents = list(sorted_sents)
                        outputfile.write(
                            key + ' : ' + str(freq) + '. ' + str(sents) + '\n'
                        )  # word: freq. [sent example 1, sent example 2...]

        print(f"""
    ============================================================
    Úttaksskjalið {namefolder}_lemmur_med_orstodulyklum.freq er tilbúið og 
    er að finna í undirmöppunni uttak/{database}/
    ============================================================
        """)
def getFeatureVectorAndLabels(data_dir):
    allDrugEvents = []
    samplesList = []
    labelsList = []
    allFiles = []
    featuresDict = defaultdict(list)
    featureNames = ['sections', 'containsFutureWord', 'prevSentContainsFutureWord',\
                    'current_tense', 'prev_tense', 'temporalType',\
                    'polarity', 'position', 'modality', 'proximity', 'futureCount']
    tfIdfFeatureVectorList = []
    wordEmbeddingsFeatureVectorList = []
    with open('drugClassification.csv', 'w') as csvfile:
        filewriter = csv.writer(csvfile)
        filewriter.writerow(['Drug', 'Predicted Label', 'Correct Label'])

    coreNLPClient = CoreNLPClient(annotators=[
        'tokenize', 'ssplit', 'pos', 'lemma', 'ner', 'parse', 'depparse',
        'coref'
    ],
                                  timeout=100000,
                                  memory='8G')
    filesToProcess = [
        file for file in os.listdir(data_dir) if (file.endswith('.txt'))
    ]
    bar = IncrementalBar('Processing', max=len(filesToProcess))
    for file in filesToProcess:
        f = open(os.path.join(data_dir, file), 'r')
        raw = f.read()
        CLAMPdrugs = getAllDrugsFromCLAMP(file, data_dir, raw)
        drugEvents, drugEventsStartIndices, drugEventPolarityFeatureVector, drugEventModalityFeatureVector = getDrugEvents(
            file, data_dir, CLAMPdrugs)

        correctLabels = getLabels(file, drugEvents, data_dir)
        allFiles += [file] * len(correctLabels)
        sectionsFeatureVector = getSectionFeature(file, data_dir,
                                                  drugEventsStartIndices)
        containsFutureWordsVector, prevSentContainsFutureWordsFeatureVector, proximityToFutureWordFeatureVector, futureWordsCountFeatureVector = getContainsFutureWordsFeature(
            raw, drugEvents, allDrugEvents)
        currentTenseFeatureVector = getCurrentTenseFeatureVector(
            file, coreNLPClient, drugEvents, raw)
        prevTenseFeatureVector = getPrevTenseFeatureVector(
            file, coreNLPClient, drugEvents, raw)
        temporalTypeFeatureVector = getTemporalCluesFeatureVectors(
            file, drugEvents, raw, data_dir)
        positionInTextFeatureVector = getPositionInTextFeatureVector(
            raw, drugEvents)
        wordEmbeddingsFeatureVector = getWordEmbeddingsFeatureVector(
            raw, drugEvents)
        tfIdfFeatureVector = getTfIdfVectors(drugEvents, raw,
                                             drugEventsStartIndices)
        wordEmbeddingsFeatureVectorList += wordEmbeddingsFeatureVector
        tfIdfFeatureVectorList += tfIdfFeatureVector
        features = [
            sectionsFeatureVector, containsFutureWordsVector,
            prevSentContainsFutureWordsFeatureVector,
            currentTenseFeatureVector, prevTenseFeatureVector,
            temporalTypeFeatureVector, drugEventPolarityFeatureVector,
            positionInTextFeatureVector, drugEventModalityFeatureVector,
            proximityToFutureWordFeatureVector, futureWordsCountFeatureVector
        ]
        for i in range(len(features)):
            featuresDict[featureNames[i]] += features[i]
        for i in range(len(sectionsFeatureVector)):
            sampleList = [feature[i] for feature in features]
            samplesList.append(sampleList)
        for label in correctLabels:
            labelsList.append(label)
        bar.next()
    bar.finish()
    ordinalEncoder = OrdinalEncoder()
    featuresVector = ordinalEncoder.fit_transform(samplesList)
    wordEmbeddingsFeatureVector = np.array(wordEmbeddingsFeatureVectorList)
    featuresVector = np.hstack((featuresVector, wordEmbeddingsFeatureVector))
    tfIdfFeatureVector = np.array(tfIdfFeatureVectorList)
    featuresVector = np.hstack((featuresVector, tfIdfFeatureVector))
    labelsVector = np.array(labelsList)
    return allFiles, allDrugEvents, featuresDict, featuresVector, labelsVector
Exemplo n.º 41
0
def user_defined_collocations(database, filterbase, corpus):
    """
    Iterates through the corpus and retrieves the words that do 
    not appear in the database. Collects 5 word collocations on
    every word, two words before and after the candidate word. 
    """
    db = SQLDatabase(db_name=database)
    txt_files = glob.glob(corpus + '/**/*.txt', recursive=True)
    if filterbase not in ['n', 'N']:
        filters = SQLDatabase(db_name=filterbase)
    else:
        pass  # if there is no filterbase, ignore this step

    outdict = {}

    print("""
    ============================================================
    Les skjöl úr málheildinni.
    ============================================================
    """)
    filebar = IncrementalBar('Framvinda', max=len(txt_files))
    for file in txt_files:
        with open(file, 'r', encoding='utf-8') as content:
            f = content.read()
            words = f.split()
            for i, w in enumerate(words):
                if w[-1] == '-':  # if a word starts or ends in an hyphen, ignore it (likely OCR error)
                    continue
                if w[0] == '-':
                    continue
                if (
                        not all(i.isalpha() or i == '-' for i in w)
                ):  # if a word contains anything but an alphabetic letter or hyphen, ignore it
                    continue
                if filterbase not in [
                        'n', 'N'
                ]:  # if a stopword database has been defined, filter the results
                    filter_query = SQLiteQuery(w,
                                               'filter',
                                               'FILTER_WORD_FORMS',
                                               cursor=filters.cursor)
                    if filter_query.exists:
                        continue
                    else:
                        query = SQLiteQuery(
                            w, 'word', 'LEXICON_WORD', cursor=db.cursor
                        )  # parameters must be updated if the database format is changed
                        query_lower = SQLiteQuery(w.lower(),
                                                  'word',
                                                  'LEXICON_WORD',
                                                  cursor=db.cursor)
                        if not query.exists and not query_lower.exists:  # If the word is not found in the database nor the filters
                            if len(w) > 1:
                                if i - 2 < 0:  # collects 2 words before and after the candidate
                                    w1 = ""
                                else:
                                    w1 = str(words[i - 2])
                                if i - 1 < 0:
                                    w2 = ""
                                else:
                                    w2 = str(words[i - 1])
                                if i + 1 > len(words) - 1:
                                    w4 = ""
                                else:
                                    w4 = str(words[i + 1])
                                if i + 2 > len(words) - 1:
                                    w5 = ""
                                else:
                                    w5 = str(words[i + 2])
                                if w in outdict:
                                    if str(w1 + ' ' + w2 + ' ' + w + ' ' + w4 +
                                           ' ' +
                                           w5) not in outdict[w]['orðstaða']:
                                        outdict[w]['orðstaða'][str(w1 + ' ' +
                                                                   w2 + ' ' +
                                                                   w + ' ' +
                                                                   w4 + ' ' +
                                                                   w5)] = 1
                                    else:
                                        outdict[w]['orðstaða'][str(w1 + ' ' +
                                                                   w2 + ' ' +
                                                                   w + ' ' +
                                                                   w4 + ' ' +
                                                                   w5)] += 1
                                    outdict[w]['tíðni'] += 1
                                else:
                                    outdict[w] = {}
                                    outdict[w]['tíðni'] = 1
                                    outdict[w]['orðstaða'] = {
                                        str(w1 + ' ' + w2 + ' ' + w + ' ' + w4 + ' ' + w5):
                                        1
                                    }

                else:
                    query = SQLiteQuery(w,
                                        'word',
                                        'LEXICON_WORD',
                                        cursor=db.cursor)
                    query_lower = SQLiteQuery(w.lower(),
                                              'word',
                                              'LEXICON_WORD',
                                              cursor=db.cursor)
                    if not query.exists and not query_lower.exists:
                        if len(w) > 1:
                            if i - 2 < 0:
                                w1 = ""
                            else:
                                w1 = str(words[i - 2])
                            if i - 1 < 0:
                                w2 = ""
                            else:
                                w2 = str(words[i - 1])
                            if i + 1 > len(words) - 1:
                                w4 = ""
                            else:
                                w4 = str(words[i + 1])
                            if i + 2 > len(words) - 1:
                                w5 = ""
                            else:
                                w5 = str(words[i + 2])
                            if w in outdict:
                                if str(w1 + ' ' + w2 + ' ' + w + ' ' + w4 +
                                       ' ' + w5) not in outdict[w]['orðstaða']:
                                    outdict[w]['orðstaða'][str(w1 + ' ' + w2 +
                                                               ' ' + w + ' ' +
                                                               w4 + ' ' +
                                                               w5)] = 1
                                else:
                                    outdict[w]['orðstaða'][str(w1 + ' ' + w2 +
                                                               ' ' + w + ' ' +
                                                               w4 + ' ' +
                                                               w5)] += 1
                                outdict[w]['tíðni'] += 1
                            else:
                                outdict[w] = {}
                                outdict[w]['tíðni'] = 1
                                outdict[w]['orðstaða'] = {
                                    str(w1 + ' ' + w2 + ' ' + w + ' ' + w4 + ' ' + w5):
                                    1
                                }

        filebar.next()
        sys.stdout.flush()
    filebar.finish()

    output_file = input("""
    ============================================================
    Skrifaðu það sem þú vilt að úttaksskjalið heiti með 
    endingunni .freq
 
    Dæmi: ordasafn_ordstodulyklar.freq
    ============================================================
    """)

    with open('uttak/notendagogn/' + output_file, mode='w+') as outputfile:
        candidates = {
            k: v
            for k, v in sorted(outdict.items(),
                               key=lambda item: item[1]['tíðni'],
                               reverse=True)
        }  # Sort the candidates by their total frequencies
        for key, item in candidates.items():
            for counter, dictitem in enumerate(item.items()):
                if counter % 2 == 0:
                    freq = dictitem[1]
                elif counter % 2 != 0:
                    sorted_sents = {
                        k: v
                        for k, v in sorted(
                            dictitem[1].items(
                            ),  # Sort the sentence examples by their frequencies
                            key=lambda item: item[1],
                            reverse=True)
                    }
                    if len(
                            sorted_sents
                    ) > 5:  # This limit the examples to the 5 most frequent ones, can be changed
                        sents = list(sorted_sents)[:5]
                    else:
                        sents = list(sorted_sents)
                    outputfile.write(
                        key + ' : ' + str(freq) + '. ' + str(sents) + '\n'
                    )  # word: freq. [sent example 1, sent example 2...]

    print(f"""
    ============================================================
    Úttaksskjalið {output_file} er tilbúið og má finna í 
    undirmöppunni uttak/notendagogn/
    ============================================================
    """)
Exemplo n.º 42
0
class scraping():
    """ classe pour le scrapping du site BooksToScrap """
    def __init__(self):
        # nom de l'image du livre qui est sauvegardé
        self.nomImage = ''
        #variable qui stocke les urls de chaque catégorie à scraper dans le site
        self.urlsCategories = []
        #variable qui stocke pour une catégorie les paramètres nécessaires au scraping
        self.categorie = {
            'nbLivres': '',
            'nbPages': '',
            'urlsLivres': [],
            'urlsPages': []
        }
        # variable de nom de CSV sauvegardé (valeur par défaut donnée)
        self.fichierCSV = 'resultats.csv'
        # url de base du site à scrapper
        self.urlBase = 'http://books.toscrape.com/index.html'
        # url de base pour scrapper une image
        self.urlBaseImage = 'http://books.toscrape.com/'
        # url de base pour scrapper une catégorie
        self.urlCatalogue = 'http://books.toscrape.com/catalogue/'
        # stocke tous les paramètres à scrapper pour un livre
        self.livre = {
            'product_page_url': '',
            'upc': '',
            'title': '',
            'price_including_tax': '',
            'price_excluding_tax': '',
            'number_available': '',
            'product_description': '',
            'category': '',
            'review_rating': '',
            'image_url': ''
        }

    def initialiseLivre(self):
        """ lors du scrapping d'une catégorie, permet de remettre les valeurs par défaut
         de self.livre pour passer d'un livre à l'autre"""
        self.livre = {
            'product_page_url': '',
            'upc': '',
            'title': '',
            'price_including_tax': '',
            'price_excluding_tax': '',
            'number_available': '',
            'product_description': '',
            'category': '',
            'review_rating': '',
            'image_url': ''
        }

    def initialiseCategorie(self):
        """ pour le scraping du site entier, remet les valeurs par defaut pour self.categorie
         quand on change de catégorie à scrapper"""
        self.categorie = {
            'nbLivres': '',
            'nbPages': '',
            'urlsLivres': [],
            'urlsPages': []
        }

    def creerObjetSoup(self, url):
        """ créé self.soup avec l'url en paramètre """
        self.reponse = requests.get(url)
        self.reponse.encoding = 'utf-8'
        self.soup = BeautifulSoup(self.reponse.text, features="html.parser")

    def recupereCategorieEtTitreLivre(self):
        """ recupère et sauvegarde la catégorie et le titre du livre dans self.livre
        le titre est le content de la balise <li class ='active'>
        la catégorie est le content de la balise <li> qui contient <a href= urlPageCategorie>
        et dont le content est different de 'Books'
        Je transforme les "'" en '"' pour éviter des problèmes de segmentation lors de la création du CSV
        les chaînes de caractères sont identifiées avec des "'" """

        lis = self.soup.findAll('li')
        for elem in lis:  # boucle pour trouver le titre et la catégorie
            try:  # pour trouver le titre
                if (elem['class'][0] == 'active'):
                    self.livre['title'] = elem.contents[0].replace("'", '"')
                    break
            except:  # pour catégorie
                if ('category' in elem.find('a')['href']):
                    cat = elem.find('a')
                    if (cat.contents[0] != 'Books'):
                        self.livre['category'] = cat.contents[0]

    def recupereUrlImageLivre(self):
        """ recupere et sauvegarde l'url de l'image du livre dans self.livre
        l'url (chemin relatif) de l'image se trouve dans une balise <div class='item active'
        il faut compléter le chemin l'url de base pour une image pour avoir le chemin absolu
        """
        divs = self.soup.findAll('div')
        for elem in divs:
            try:
                if (elem['class'] == ['item', 'active']):
                    cheminRelatif = elem.find('img')['src'][6:]
                    self.livre['image_url'] = self.urlBaseImage + cheminRelatif
            except:
                pass

    def recupererReviewRating(self):
        """ récupère le nombre d'étoiles du livre et le stocke dans self.livre
        le rating se trouve dans une balise <p class='star-rating RATING'
        Cependant d'autres livres sont suggérés en lecture à la fin de la page produit
        avec également du rating. Pour éviter la confusion, il faut choisir la balise
        <div class="col-sm-6 product_main"> qui correspond à celle du produit voulu
        """
        filtreStar = 'star-rating'
        filtreProduit = 'product_main'
        divs = self.soup.findAll('div')
        for d in divs:
            try:
                if (filtreProduit in d['class']):
                    ps = d.findAll('p')
                    for p in ps:
                        try:
                            if (filtreStar in p['class']):
                                self.livre[
                                    'review_rating'] = p['class'][1] + '/Five'
                                break
                        except:
                            pass
            except:
                pass

    def recupereDescriptionLivre(self):
        """ recupere et sauvegarde la description du livre dans self.livre
        la description est contenue dans une balise <p>, il y en a plusieurs dans une
        page produit mais la seule qui corresponde à la description
        a un content qui fait au moins 50 caractères
        Deplus dans la description des points-virgule peuvent apparaîtrent. Ce sont
        les séparateurs du fichier CSV. Aussi pour éviter des confusions lors de la création
        du fichier une solution est d'entourer cette string de guillemets
        """

        ps = self.soup.findAll('p')
        for p in ps:
            if (len(p.contents[0]) >= 50):
                self.livre['product_description'] = p.contents[0].replace(
                    "'", '"')

    def recupereAutresParametresLivre(self):
        """ recupere et sauvegarde les autres caracteristiques du livre dans self.livre
        Tous les autres paramètres cherchés se trouvent dans un tableau
        les balises <tr> incluent une balise <th> dont le content est le nom du paramètre
        cherché et dont la balise <td> contient la valeur du paramètre cherché
        Pour la disponibilité des livres on cherche seulement le nombre dans la chaine de caractère
        qui est du type 'In stock (19 available)' d'où le travail sur ce paramètre"""
        trs = self.soup.findAll('tr')
        for elem in trs:
            if (elem.find('th').contents[0] == 'UPC'):
                self.livre['upc'] = elem.find('td').contents[0]
            elif (elem.find('th').contents[0] == 'Price (excl. tax)'):
                self.livre['price_excluding_tax'] = elem.find(
                    'td').contents[0][1:]
            elif (elem.find('th').contents[0] == 'Price (incl. tax)'):
                self.livre['price_including_tax'] = elem.find(
                    'td').contents[0][1:]
            elif (elem.find('th').contents[0] == 'Availability'):
                filtre = ' available'
                res = elem.find('td').contents[0]
                res = res[res.find('(') + 1:res.find(filtre)]
                self.livre['number_available'] = res

    def creationDossiersSauvegarde(self):
        """ créé les dossiers nécessaires pour sauvegarder à partir du fichier
         config.py les livres scrappés"""

        if (not os.path.isdir(cf.dossierSauvegarde)):
            os.mkdir(cf.dossierSauvegarde)
        if (not os.path.isdir(cf.dossierImages)):
            os.mkdir(cf.dossierImages)

    def ecrireHeadersCSV(self, fichierCSV):
        """ créé les headers du fichier CSV entré en paramètre à partir de la variable
         self.livre"""

        headers = ''
        for k in self.livre.keys():
            headers += k + cf.delimiteurCSV
        headers = headers[:-1] + '\n'

        with open(fichierCSV, 'w') as f:
            f.write(headers)

    def ajouterUneLigneCSV(self, fichierCSV):
        """ ajoute une ligne (qui correspond à un livre) à un fichier CSV déjà existant
        et de la variable self.livre"""

        ligne = ''
        for v in self.livre.values():
            ligne += v + cf.delimiteurCSV
        ligne = ligne[:-1] + '\n'
        with open(fichierCSV, 'a') as f:
            f.write(ligne)

    def creeCSVunLivre(self, fichierCSV):
        """ crée un CSV avec séparateur défini dans le fichier config et
        sauvegarde les caracteristiques présentes dans self.livre dans fichierCSV"""
        if (not os.path.exists(fichierCSV)):
            self.ecrireHeadersCSV(fichierCSV)
        self.ajouterUneLigneCSV(fichierCSV)

    def scrapUnLivre(self, urlLivre, unLivre=False):
        """ scrap les paramètres pour un livre donné,cette méthode est utilisée pour
        scrapper un livre mais également tout une catégorie aussi il est nécessaire de
        faire cette distinction pour pouvoir créer le bon nom de fichier CSV pour la sauvegarde
         si unLivre == True alors le nom du fichier CSV est le nom du livre, sinon c'est celui
         de sa catégorie"""

        self.creerObjetSoup(urlLivre)
        self.initialiseLivre()
        self.livre['product_page_url'] = urlLivre
        self.recupereCategorieEtTitreLivre()
        self.recupereUrlImageLivre()
        self.recupereDescriptionLivre()
        self.recupererReviewRating()
        self.recupereAutresParametresLivre()
        self.creationDossiersSauvegarde()
        if (unLivre):
            self.fichierCSV = self.livre['title'] + '.csv'
        else:
            self.fichierCSV = self.livre['category'] + '.csv'
        self.creeCSVunLivre(os.path.join(cf.dossierSauvegarde,
                                         self.fichierCSV))
        self.sauvegardeImageUnLivre(self.livre)

    def recupereUrlsUnePageCategorie(self):
        """ recupere toutes les urls de livre pour une seule page d'une catégorie
        les urls (chemin relatif) sont incuses dans une balise <div> qui contient une balise <a> avec un href
        Il faut compléter l'url pour en faire un chemin absolu
        """
        ol = self.soup.find('ol')
        divs = ol.findAll('div')
        self.categorie['urlsLivres'].append([])
        for elem in divs:
            try:
                self.categorie['urlsLivres'][-1].append(
                    self.urlCatalogue + elem.find('a')['href'][9:])
            except:
                pass

    def recupereInfosUneCategorie(self, urlCategorie):
        """ recupere toutes les infos necessaires à une catégorie dans self.categorie
        La première information à avoir est le nombre de pages à scrapper. Je cherche donc
        le nombre de livres, chaque page contenant 20 livres je connais le nombre de pages.
        Ensuite si il y a une seule page à scrapper alors j'ajoute simplement l'url de la catégorie
        à self.categorie['urlsPages']. Sinon je créé les urls des pages à scrapper à partir
        du modèle suivant : http://books.toscrape.com/catalogue/category/books/<NOMCATEGORIE>_<VALEUR>/page-1.html
        et je modifie l'url avec le bon numéro de page puis je les stocke dans self.categorie['urlsPages']
        """
        self.initialiseCategorie()
        self.creerObjetSoup(urlCategorie)
        strongs = self.soup.findAll('strong')
        self.categorie['nbLivres'] = int(strongs[1].contents[0])
        self.categorie['nbPages'] = int(self.categorie['nbLivres'] / 20) + 1
        if (self.categorie['nbPages'] > 1):
            urlUtilisee = urlCategorie[:-10] + 'page-1.html'
            for elem in range(
                    self.categorie['nbPages']
            ):  # urls de toutes les pages pour obtenir tous les livres
                self.categorie['urlsPages'].append(urlUtilisee[:-6] +
                                                   str(elem + 1) + '.html')
        else:
            self.categorie['urlsPages'].append(urlCategorie)

        for elem in self.categorie[
                'urlsPages']:  # recupere toutes les urls des livres dans toutes les pages de la catégorie
            self.creerObjetSoup(elem)
            self.recupereUrlsUnePageCategorie()

    def scrapUneCategorie(self, url):
        """ scrap une catégorie entière de livres, cette méthode est également utilisée
        pour scrapper le site en entier"""

        # Prépare le scraping
        self.recupereInfosUneCategorie(urlCategorie=url)

        # Optimise l'affichage pour l'utilisateur
        print('Il y a {} page(s) dans cette catégorie'.format(
            len(self.categorie['urlsLivres'])))
        self.barre = IncrementalBar('pages scrapées : ',
                                    max=len(self.categorie['urlsLivres']))

        # recupere toutes les infos de livre de chaque url
        for index, elem in enumerate(self.categorie['urlsLivres']):
            for el in elem:
                self.initialiseLivre()
                self.scrapUnLivre(
                    el
                )  # enregistre le CSV et l'image de chaque livre à la volée
            self.barre.next()
        self.barre.finish()
        print('CSV de la catégorie sauvegardée dans {}'.format(
            cf.dossierSauvegarde))
        print('Images de la categorie sauvegardees dans {}'.format(
            cf.dossierImages))

    def scrapSiteInternet(self):  # en test
        """ scrap tout le site internet et génère un csv par catégorie de livres
         ces csv sont stockés dans le dossier indiqué dans config.py et les images dans un sous dossier
         indiqué également dans config.py
         ATTENTION : cette méthode nécessite environ 15 minutes pour s'exécuter intégralement"""

        self.recupereInfosPourToutesCategories()
        print('Il y a {} categories'.format(len(self.urlsCategories)))
        print('Le scrapping va prendre environ 15 minutes...')
        for index, elem in enumerate(self.urlsCategories):
            print('Catégorie {}/{} : {}'.format(index + 1,
                                                len(self.urlsCategories),
                                                elem['csv'][:-4]))
            self.scrapUneCategorie(url=elem['url'])
        print('\nSite Web scrapé intégralement dans {}'.format(
            cf.dossierSauvegarde))

    def trouverNomCategorie(self, url):
        """ Cette méthode permet de récupérer le nom des catégories quand on scrappe tout
         le site internet. Cette information est utilisée uniquement pour
         l'affichage dans la console lors de l'éxecution du programme
         pour le confort de l'utilisateur
         """
        csv = url[::-1]
        csv = csv[csv.find('/') + 1:]
        csv = csv[:csv.find('/')]
        csv = csv[csv.find('_') + 1:]
        csv = csv[::-1]
        return (csv)

    def recupereInfosPourToutesCategories(self):
        """ recupere les urls de chaque categorie du site et les stocke dans
         self.urlsCategories. Elles sont incluses dans la balise <ul class='nav nav-list'>
          Elles sont contenus esuite dans la balise <ul> puis les balises <li> puis <a href=...>"""

        self.creerObjetSoup(self.urlBase)
        uls = self.soup.findAll('ul')
        for ul in uls:
            try:
                if (ul['class'] == ['nav', 'nav-list']):
                    lis = ul.find('ul').findAll('li')
                    break
            except:
                pass
        for li in lis:
            try:
                url = self.urlBase[:-10] + li.find('a')['href']
                csv = self.trouverNomCategorie(url) + '.csv'
                self.urlsCategories.append({'url': url, 'csv': csv})
            except:
                pass

    def sauvegardeImageUnLivre(self, livre):
        """ enregistre l'image du livre depuis son url dans self.livre
        dans le dossier self.dossierImages pour les images de livre
        """
        reponse = requests.get(livre['image_url'])

        #le nom du livre peut poser des problemes lors de l'enregistrement du fichier
        #les deux problèmes rencontrés ont été: 1. avoir des slashs dans les titres ( donc remplacés par des underscores)
        # 2. avoir des noms avec trop de caractères (donc limités à 30)
        self.nomImage = livre['title'].replace('/', '_')[:30] + '.png'
        # création des dossiers de sauvegardes
        self.creationDossiersSauvegarde()
        dossierImage = os.path.join(cf.dossierImages, self.livre['category'])
        if (not os.path.isdir(dossierImage)):
            os.mkdir(dossierImage)
        #écriture du fichier image
        with open(os.path.join(dossierImage, self.nomImage), 'wb') as f:
            f.write(reponse.content)

    def choisirLaCibleDuScraping(self):
        """ Cette méthode sert à laisser le choix à l'utilisateur de ce qu'il veut scrapper
         sans avoir à toucher au code du programme, il peut scrapper:
         - un livre seul
         - une catégorie entière
         - tout le site (environ 15 minutes)
         """
        print(
            'Le programme va scrapper le site http://books.toscrape.com/index.html\nPlusieurs choix sont possibles'
        )
        print(
            'Choisir parmi les 3 options de scraping:\n1- scraper un seul livre\n'
            '2- scraper une seule catégorie\n3- scraper tout le site (environ 15 minutes)'
        )
        entree = input('Taper 1, 2 ou 3 puis <Entree>: ')

        if (entree == '3'):  # scraping de tout le site
            self.scrapSiteInternet()
            exit()
        elif (entree == '1'):  # scraping d'un seul livre
            url = input("Entrer l'url du livre à utiliser :\n")
            self.scrapUnLivre(urlLivre=url, unLivre=True)
            exit()
        elif (entree == '2'):  #scraping d'une seule catégorie
            url = input("Entrer l'url de la catégorie à utiliser :\n")
            self.scrapUneCategorie(url)
            exit()
        else:
            print('Votre choix doit être 1, 2 ou 3')
            exit()
Exemplo n.º 43
0
    def train_model(self, input_data):
        """
        An implementation of the CD-n algorithm.


        inputs: training data should be a vector V of the same shape as v = np.zeros(nvisible)
        """

        self.data_matrix = input_data
        N = self.nvisible  #Scaling factor used in the learning process.
        #Creates empty arrays to store "differentials".
        dW = np.zeros((self.nvisible, self.nhidden))
        dvb = np.zeros(self.nvisible)
        dhb = np.zeros(self.nhidden)

        bar = IncrementalBar("Progress",
                             max=self.nepochs)  #Sets up the progressbar.
        #Trains the RBM using the CD-n algorithm on a single datapoint at the time.
        for epoch in range(self.nepochs):
            bar.next()
            shuffled_indices = np.random.permutation(self.batch_size)
            training_data = self.data_matrix[shuffled_indices]
            error = 0
            for k in range(self.batch_size):
                visible = training_data[k]
                #sample hidden variables
                self.compute_hidden(visible)

                #compute <vh>_0
                CDpos = np.tensordot(
                    visible, self.hiddenprob, axes=0
                )  #Tensor product computes a matrix of shape (nvisible x nhidden)
                CDpos_vb = visible  #Simply the initial state of the visible nodes.
                CDpos_hb = self.hiddenprob  #The first computed state of the hidden nodes.

                #CD-n, if nCDsteps = 1, this is essentially just reconstrunction of the input.
                #Choosing nCDsteps = 1 works alright and is computationally effective.
                for j in range(self.nCDsteps):
                    self.compute_visible(self.hiddenact)
                    self.compute_hidden(self.visibleact)
                #self.compute_visible(self.hiddenprob)
                #self.compute_hidden(self.visibleact)

                #Computes <vh>_n
                CDneg = np.tensordot(self.visibleact, self.hiddenprob, axes=0)
                CDneg_vb = self.visibleact
                CDneg_hb = self.hiddenprob

                #This is where the learning happens, you can skip the momentum if you want but it speeds up initial learning
                #You can modifiy the class to add decay, that is add -self.decay*dW to the learning rule, or reduce the momentum towards the end of learning.

                #Reconstruction error. It measures how well the RBM reconstructs the data it's shown.
                visible = training_data[k]
                error += np.sum((self.data_matrix[k] - self.visibleact)**2)
                dW = self.eta * (CDpos - CDneg) / N + self.momentum * dW
                self.weights += dW
                dvb = self.eta * (CDpos_vb -
                                  CDneg_vb) / N + self.momentum * dvb
                self.visiblebias += dvb
                dhb = self.eta * (CDpos_hb -
                                  CDneg_hb) / N + self.momentum * dhb
                self.hiddenbias += dhb
            error /= self.batch_size
            self.loss[epoch] = error
        bar.finish()
Exemplo n.º 44
0

for bar_cls in (Bar, ChargingBar, FillingSquaresBar, FillingCirclesBar):
    suffix = '%(index)d/%(max)d [%(elapsed)d / %(eta)d / %(eta_td)s]'
    bar = bar_cls(bar_cls.__name__, suffix=suffix)
    for i in bar.iter(range(200)):
        sleep()

for bar_cls in (IncrementalBar, PixelBar, ShadyBar):
    suffix = '%(percent)d%% [%(elapsed_td)s / %(eta)d / %(eta_td)s]'
    with bar_cls(bar_cls.__name__, suffix=suffix, max=200) as bar:
        for i in range(200):
            bar.next()
            sleep()

for spin in (Spinner, PieSpinner, MoonSpinner, LineSpinner, PixelSpinner):
    for i in spin(spin.__name__ + ' ').iter(range(100)):
        sleep()
    print()

for singleton in (Counter, Countdown, Stack, Pie):
    for i in singleton(singleton.__name__ + ' ').iter(range(100)):
        sleep()
    print()

bar = IncrementalBar('Random', suffix='%(index)d')
for i in range(100):
    bar.goto(random.randint(0, 100))
    sleep()
bar.finish()
Exemplo n.º 45
0
def downloadFile(image, iterations, outage = False, oNr = 0, oTime = 0):
    for node in set.name:
        subprocess.call(['docker cp mn.%s:var/log/transmission/transmission.log measurements/%s/%s/0/time/%s.txt&' % (node, currentInstance, currentTest, node)],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)

    image = image.strip()

    milestone = [0] * len(set.name)
    for iteration in range(int(iterations)):
        print ('\n###\nTest #%s\n###' % (iteration + 1))
        iStart = datetime.now()
        print iStart

        #checkTransmissionContainer()
        subprocess.call(['mkdir measurements/%s/%s/%s/' % (currentInstance,currentTest,(iteration + 1))],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)
        subprocess.call(['mkdir measurements/%s/%s/%s/time/' % (currentInstance,currentTest,(iteration + 1))],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)
        subprocess.call(['mkdir measurements/%s/%s/%s/traffic/' % (currentInstance,currentTest,(iteration + 1))],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)

        with open('measurements/%s/torrentsNr.txt' % currentInstance,'r+') as current:
            lines = current.readlines()
            torrentsNr = int(lines[-1])
            print 'Torrent #%s' % torrentsNr
        doc = open('measurements/%s/torrentsNr.txt' % currentInstance,'w+')
        doc.write(str(torrentsNr + 1)+'\n')
        doc.close()

        #delete existing file and log files on hosts
        sum = 0
        seederPrep = [False] * len(set.seeder)
        complete = [False] * len(set.name)
        bar_restart = IncrementalBar('Deleting existing files ', max = len(set.name))
        for node in set.name:
            if not node in set.servers:
                subprocess.call(['docker exec -it mn.%s docker image rm -f %s' %(node, image)],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)
            else:
                subprocess.call(['docker exec -it mn.%s sh -c "(docker stop opentracker && docker rm opentracker && export IP=%s && docker-compose -f stack_server.yml up -d)"' % (node, set.ip[set.name.index(node)])],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)
            subprocess.call(["docker exec -it mn.%s sh -c 'rm -rf downloads/*'" % node],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)
            subprocess.call(["docker exec -it mn.%s sh -c 'rm -rf torrents/*'" % node],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)
            subprocess.call(["docker exec -it mn.%s sh -c 'rm -rf root/.config/transmission-daemon/resume/*'" % node],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)
            subprocess.call(['docker exec -it mn.%s transmission-remote -t %s -r' % (node, str(torrentsNr))],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)
            subprocess.call(["docker exec -it mn.%s sh -c 'rm -rf root/.config/transmission-daemon/torrents/*'" % node],stdout=FNULL, stderr=subprocess.STDOUT,shell=True) #root/.small-dragonfly/logs/*
            subprocess.call(["docker exec mn.%s sh -c 'iptables -Z'" % node ],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)
            bar_restart.next()
        bar_restart.finish()
        check.check()
        while check.repeat == True:
            check.check()
        print ('%s deleted on every host' % image)

        #Prepare seeder
        for node in set.seeder:
            if iteration == 0:
                subprocess.call(['docker exec mn.%s docker pull %s' %(node, image)],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)
            subprocess.call(['docker exec mn.%s docker save -o downloads/%s%s.tar %s' %(node, image, torrentsNr, image)],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)
            subprocess.call(["'docker exec mn.%s sh -c 'iptables -Z'" % node ],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)
            subprocess.call(['docker exec mn.%s transmission-remote -a torrents/%s%s.torrent &' % (node, image, torrentsNr)],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)

        #Creating torrent and sharing torrent
        bar_sharing = IncrementalBar('Creating and sharing torrent', max = len(set.name))
        trackerAdr = ''
        for node in set.servers:
            trackerAdr = '%s -t udp://%s:6969' % (trackerAdr, set.ip[set.name.index(node)])

        subprocess.call(['docker exec mn.%s transmission-create -o torrents/%s%s.torrent%s downloads/%s%s.tar' % (set.seeder[0], image, torrentsNr, trackerAdr, image, torrentsNr)],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)
        subprocess.call(['docker cp mn.%s:torrents/%s%s.torrent measurements/%s/%s/torrents/%s%s.torrent' % (set.seeder[0], image, torrentsNr, currentInstance, currentTest, image, torrentsNr)],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)
        for node in set.name:
            subprocess.call(['docker cp measurements/%s/%s/torrents/%s%s.torrent mn.%s:torrents/%s%s.torrent' % (currentInstance, currentTest, image, torrentsNr, node, image, torrentsNr)],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)
            bar_sharing.next()
        bar_sharing.finish()

        #Start download
        print datetime.now()
        sum = 0
        bar_download = IncrementalBar('Waiting for download(s)', max = len(set.name))
        for node in set.name:
            subprocess.call(['docker exec mn.%s transmission-remote -a torrents/%s%s.torrent &' % (node, image, torrentsNr)],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)
            if node in set.seeder:
                complete[set.name.index(node)] = True
                bar_download.next()
                sum = sum + 1

        #Server outage
        if outage == True:
            print ('\nWaiting %s seconds for outage...' % oTime)
            time.sleep(int(oTime))
            for j in range(1,int(oNr)+1):
                print set.servers[j]
                subprocess.call(['docker exec mn.%s docker stop opentracker &' % (set.servers[-j])],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)

        while sum < len(set.name):
            time.sleep(120)
            for node in set.name:
                if complete[set.name.index(node)] == False:
                    if ('%s%s.tar' % (image, str(torrentsNr) ) in subprocess.check_output(['docker exec mn.%s ls downloads/' % node],shell=True)): #and not (file + '.part' in subprocess.check_output(['docker exec mn.' + node + ' ls downloads/'],shell=True)):
                        subprocess.call(['docker cp mn.%s:var/log/transmission/transmission.log measurements/%s/%s/%s/time/%s.txt' % (node, currentInstance, currentTest, (iteration + 1), node)],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)
                        with open('measurements/%s/%s/%s/time/%s.txt' % (currentInstance, currentTest, (iteration + 1), node)) as tmp:
                            lines = tmp.readlines()
                            for i in range(milestone[set.name.index(node)],len(lines)):
                                if '%s%s.tar State changed from "Incomplete" to "Complete"' % (image, torrentsNr) in lines[i]:
                                    sum = sum + 1
                                    complete[set.name.index(node)] = True
                                    milestone[set.name.index(node)] = i + 1
                                    bar_download.next()
                                    break

        bar_download.finish()
        print 'Download(s) successful'
        print 'Grabbing data after download(s)'
        for node in set.name:
            subprocess.call(["docker exec mn.%s sh -c 'iptables -L INPUT -n -v -x > tmp_IN.txt'" % node ],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)
            subprocess.call(['docker cp mn.%s:tmp_IN.txt measurements/%s/%s/%s/traffic/%s_IN.txt' % (node, currentInstance, currentTest, (iteration + 1), node)],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)
            subprocess.call(["docker exec mn.%s sh -c 'iptables -L OUTPUT -n -v -x > tmp_OUT.txt'" % node ],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)
            subprocess.call(['docker cp mn.%s:tmp_OUT.txt measurements/%s/%s/%s/traffic/%s_OUT.txt' % (node, currentInstance, currentTest, (iteration + 1), node)],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)
            subprocess.call(["docker exec mn.%s sh -c 'iptables -L FORWARD -n -v -x > tmp_OUT.txt'" % node ],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)
            subprocess.call(['docker cp mn.%s:tmp_OUT.txt measurements/%s/%s/%s/traffic/%s_FOR.txt' % (node, currentInstance, currentTest, (iteration + 1), node)],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)

    subprocess.call(['docker cp mn.%s:downloads/%s%s.tar measurements/%s/%s/results/%s%s.tar' % (set.seeder[0], image, torrentsNr, currentInstance, currentTest, image, torrentsNr)],stdout=FNULL, stderr=subprocess.STDOUT,shell=True)
    set.measureTime(image, False, currentInstance, currentTest, iterations, torrentsNr)
    set.measureTraffic(image, False, currentInstance, currentTest, iterations)

    doc = open('./measurements/%s/%s/results/setup.txt' % (currentInstance, currentTest), 'w+')
    doc.write('Server:%s\nHosts:%s\nSeeders:%s\nImage:%s\nServer outage:%s\nOutage number:%s\nOutage start:%s' % (str(len(set.servers)), str(len(set.name)), str(len(set.seeder)), image, outage, oNr, oTime))
    doc.close()
    set.imageTime(image, '%s%s.tar' % (image, torrentsNr), currentInstance, currentTest)
Exemplo n.º 46
0
class SampleDumpHandler(object):
    def __init__(self,debug=False,samplelist=None):
        super(SampleDumpHandler,self).__init__()
        self.debug=debug
        self.samplelist = samplelist
        self.reset()
        
    def __del__(self):
        if len(self.data):
            self.saveFile()

    def reset(self):
        self.header  = {}
        self.data = []
        self.lastpacket = 0
        self.raw = []
        self.packetcounter = 0
        self.dump_start = 0
        self.exppacket = 0
        self.starttime = 0
        
    def parse(self,msg):
        status = None
        if msg[3] == 0x1:
            status = self.parseHeader(msg)
        elif msg[3] == 0x2:
            status = self.parsePacket(msg)
        elif msg[3] == 0x3:
            status = self.parseRequest(msg)
        elif msg[3] == 0x7F and self.dump_start > 0:
            status = self.continueDump()
        return status
    
    def parseHeader(self, msg):
        self.reset()
        if len(msg) != 21:
            print "Size mismatch, is", len(msg)
            return HandshakeMessage.NAK(packetnumber=self.lastpacket)

        speriod = int(msg[9]  << 14 | msg[8]  << 7 | msg[7])
        srate   = 1./(speriod *1e-9)
        self.header = {
            "target_id"        : msg[2],
            "sample_number"    : msg[5] << 7 | msg[4],
            "sample_format"    : msg[6],
            "sample_period"    : speriod,
            "sample_rate"      : srate,
            "sample_length"    : msg[12] << 14 | msg[11] << 7 | msg[10],
            "sample_loop_start": msg[15] << 14 | msg[14] << 7 | msg[13],
            "sample_loop_end"  : msg[18] << 14 | msg[17] << 7 | msg[16],
            "loop_type"        : msg[19],
            }

        if self.debug:
            print "Sample Dump Header"
            print "  Data:"
            for k,v in self.header.iteritems():
                print "    %s:" % k, v

        self.raw += msg
        format = int(self.header["sample_format"])
        length = int(self.header["sample_length"])
        self.exppacket = (format+6)/7*length/120+1
        self.starttime = time.time()
        self.bar = IncrementalBar(
            "Receiving sample dump", max=self.exppacket,
            suffix = '%(percent)d%% [%(elapsed_td)s / %(eta_td)s]')
        return HandshakeMessage.ACK(packetnumber=self.lastpacket)
    
    def parsePacket(self, msg):
        if not 0xF7 in msg:
            print "printSampleDumpDataPacket: could not find EOX"
            return HandshakeMessage.NAK(packetnumber=self.lastpacket)
        
        cs = msg.index(0xF7)-1
        calced_cs = checksum(msg[1:cs])
        if self.debug:
            print "Sample Dump Data Packet"
            print "  Data:"
            print "    Packet count", msg[4]
            print "  checksum:", hex(msg[cs]), \
                "(calculated 0x%x)" % calced_cs
        if msg[cs] != calced_cs:
            print "Checksum mismatch:", hex(msg[cs]), "should be", hex(calced_cs)
            return HandshakeMessage.NAK(packetnumber=self.lastpacket)
        offset = 5
        format = int(self.header['sample_format'])

        if format == 14:
            self.data += msg[offset:offset+120]
        else:
            print format, "bit samples are not supported"
        self.lastpacket = msg[4]
        self.raw += msg
        self.packetcounter += 1
        self.bar.next()
        return HandshakeMessage.ACK(packetnumber=self.lastpacket)

    def parseRequest(self,msg):
        self.reset()
        if not 0xF7 in msg:
            print "printSampleDumpDataPacket: could not find EOX"
            return HandshakeMessage.NAK(packetnumber=self.lastpacket)

        samplenumber = int(msg[5] << 7 | msg[4])

        print "Received Sample Dump Request for sample", samplenumber
        if self.debug:
            print "  Data:"
            print "        targetid:",  msg[2]
            print "    samplenumber:", samplenumber

        samplefile = None
        if self.samplelist and samplenumber < len(self.samplelist):
            samplefile = self.samplelist[samplenumber]
            print "Selected list index", samplenumber, repr(samplefile)
        if not samplefile or not os.path.exists(samplefile):
            samplefile = "sample.sds"
            print "Selected fallback", repr(samplefile)
        if not os.path.exists(samplefile):
            print "No sample to send"
            return HandshakeMessage.Cancel(packetnumber=self.lastpacket)
            
        f = open(samplefile, "rb")
        self.raw = [ ord(i) for i in f.read() ]
        f.close()
        n = self.raw.count(0xF7)
        if n > 0:
            print "Sending", n, "Sample Dump Packets (+ header)"
            self.starttime = time.time()
            self.dump_start = self.raw.index(0xF7)+1
            self.packetcounter += 1
            return self.raw[:self.dump_start]
        
        return HandshakeMessage.Cancel(packetnumber=self.lastpacket)

    def continueDump(self):
        n = self.raw[self.dump_start:].count(0xF7)
        if n == 0:
            elapsed = time.time()-self.starttime
            print "Sent %d packets in %.1f seconds (%.1f bytes/sec)" % (
                self.packetcounter, elapsed, len(self.raw)/elapsed)
            self.reset()
            return HandshakeMessage.EOF(packetnumber=self.lastpacket)
        
        ds = self.dump_start
        self.dump_start = self.raw.index(0xF7,self.dump_start)+1
        if self.packetcounter % 100 == 0:
            print "Sent %d packets" % self.packetcounter
        self.packetcounter += 1
        return self.raw[ds:self.dump_start]
        
    def saveFile(self, filename=None):
        self.bar.finish()
        if not filename:
            timestamp = time.strftime("%Y%m%d%H%M%S")
            filename = "sample_%s" % timestamp

        rate = self.packetcounter*120/(time.time()-self.starttime)
        print "Packets received: %d/%d" % (self.packetcounter, self.exppacket)
        print "Average rate:     %.1f bytes/sec" % rate
        print "Saving to", filename

        # concatenation of sysex messages
        with open(filename+".sds", "wb") as f:
            f.write(bytearray(self.raw))

        # adjust data size to sample length
        nsamples = int(self.header.get('sample_length',len(self.data)/2))
        self.data = self.data[:nsamples*2]
        
        # sample data only (7-in-8-bit chunks, big-endian: .dcba987 .6543210)
        with open(filename+".dmp", "wb") as f:
            f.write(bytearray(self.data))

        # decoded sample data
        format = int(self.header['sample_format'])
        out  = []
        if format == 14:
            pos  = 0
            while pos < len(self.data):
                # assume big-endian
                tmp = self.data[pos] << 7 | self.data[pos+1]
                # convert to s16le
                tmp = u2s(tmp<<2)
                out.append(tmp & 0xFF)
                out.append((tmp >> 8) & 0xFF)
                pos += 2
            print
        else:
            print format, "bit samples are not supported"
        
        if len(out):
            # write raw file
            with open(filename+".raw", "wb") as f:
                f.write(bytearray(out))
            # write WAV file
            writeWAV(filename+".wav",int(self.header.get("sample_rate", 22050)),
                     bytearray(out))
        # sample properties
        with open(filename+".txt", "w") as f:
            f.writelines( [ "%s: %s\n" % i for i in self.header.iteritems() ] )
            f.writelines(
                [ "file_%s: %s.%s\n" % (suffix,filename,suffix) for suffix in [
                    'sds', 'raw', 'dmp', 'wav' ] ])
        self.reset()
Exemplo n.º 47
0
def convert_jds_wf_to_wf32(source_directory, result_directory, no_of_bunches_per_file):
    """
    function converts jds waveform data to wf32 waveform data for further processing (coherent dedispersion) and
    saves txt files with time data
    Input parameters:
        source_directory - directory where initial jds waveform data are stored
        result_directory - directory where new wf32 files will be stored
        no_of_bunches_per_file - number of data bunches per file to process (depends on RAM volume on the PC)
    Output parameters:
        result_wf32_files - list of results files
    """

    file_list = find_and_check_files_in_current_folder(source_directory, '.jds')

    # To print in console the header of first file
    print('\n  First file header parameters: \n')

    # *** Data file header read ***
    [df_filename, df_filesize, df_system_name, df_obs_place, df_description,
     clock_freq, df_creation_timeUTC, channel, receiver_mode, Mode, Navr, time_res, fmin, fmax,
     df, frequency, freq_points_num, data_block_size] = FileHeaderReaderJDS(source_directory + file_list[0], 0, 1)
    if Mode > 0:
        sys.exit('  ERROR!!! Data recorded in wrong mode! Waveform mode needed.\n\n    Program stopped!')

    result_wf32_files = []
    # Main loop by files start
    for file_no in range(len(file_list)):   # loop by files

        fname = source_directory + file_list[file_no]

        # Create long data files and copy first data file header to them
        if file_no == 0:

            with open(fname, 'rb') as file:
                # *** Data file header read ***
                file_header = file.read(1024)

            # *** Creating a name for long timeline TXT file ***
            tl_file_name = df_filename + '_Timeline.wtxt'
            tl_file = open(tl_file_name, 'w')  # Open and close to delete the file with the same name
            tl_file.close()

            # *** Creating a binary file with data for long data storage ***
            file_data_A_name = df_filename + '_Data_chA.wf32'
            result_wf32_files.append(file_data_A_name)
            file_data_A = open(file_data_A_name, 'wb')
            file_data_A.write(file_header)
            file_data_A.close()

            if channel == 2:
                file_data_B_name = df_filename + '_Data_chB.wf32'
                result_wf32_files.append(file_data_B_name)
                file_data_B = open(file_data_B_name, 'wb')
                file_data_B.write(file_header)
                file_data_B.close()

            del file_header

        # Calculation of number of blocks and number of spectra in the file
        if channel == 0 or channel == 1:    # Single channel mode
            no_of_spectra_in_bunch = int((df_filesize - 1024) / (no_of_bunches_per_file * 2 * data_block_size))
        else:                               # Two channels mode
            no_of_spectra_in_bunch = int((df_filesize - 1024) / (no_of_bunches_per_file * 4 * data_block_size))

        no_of_blocks_in_file = (df_filesize - 1024) / data_block_size

        if file_no == 0:
            print(' Number of blocks in file:               ', no_of_blocks_in_file)
            print(' Number of bunches to read in file:      ', no_of_bunches_per_file)
            print('\n  *** Reading data from file *** \n')

        # *******************************************************************************
        #                           R E A D I N G   D A T A                             *
        # *******************************************************************************

        with open(fname, 'rb') as file:
            file.seek(1024)  # Jumping to 1024 byte from file beginning

            # !!! Fake timing. Real timing to be done!!!
            TimeFigureScaleFig = np.linspace(0, no_of_bunches_per_file, no_of_bunches_per_file + 1)
            for i in range(no_of_bunches_per_file):
                TimeFigureScaleFig[i] = str(TimeFigureScaleFig[i])

            time_scale_bunch = []

            bar = IncrementalBar(' File ' + str(file_no + 1) + ' of ' + str(len(file_list)) + ' reading: ',
                                 max=no_of_bunches_per_file, suffix='%(percent)d%%')

            bar.start()

            for bunch in range(no_of_bunches_per_file):

                # bar.next()

                # Reading and reshaping all data with time data
                if channel == 0 or channel == 1:    # Single channel mode
                    wf_data = np.fromfile(file, dtype='i2', count=no_of_spectra_in_bunch * data_block_size)
                    wf_data = np.reshape(wf_data, [data_block_size, no_of_spectra_in_bunch], order='F')
                if channel == 2:                    # Two channels mode
                    wf_data = np.fromfile(file, dtype='i2', count=2 * no_of_spectra_in_bunch * data_block_size)
                    wf_data = np.reshape(wf_data, [data_block_size, 2 * no_of_spectra_in_bunch], order='F')

                # Timing
                timeline_block_str = jds_waveform_time(wf_data, clock_freq, data_block_size)
                if channel == 2:                    # Two channels mode
                    # Cut the timeline of second channel
                    timeline_block_str = timeline_block_str[0:int(len(timeline_block_str) / 2)]  
                for i in range(len(timeline_block_str)):
                    time_scale_bunch.append(df_creation_timeUTC[0:10] + ' ' + timeline_block_str[i])  # [0:12]

                # Deleting the time blocks from waveform data
                real_data_block_size = data_block_size - 4
                wf_data = wf_data[0: real_data_block_size, :]

                # Separation data into channels
                if channel == 0 or channel == 1:  # Single channel mode
                    wf_data_chA = np.reshape(wf_data, [real_data_block_size * no_of_spectra_in_bunch, 1], order='F')
                    del wf_data  # Deleting unnecessary array name just in case

                if channel == 2:  # Two channels mode

                    # Separating the data into two channels
                    wf_data = np.reshape(wf_data, [2 * real_data_block_size * no_of_spectra_in_bunch, 1], order='F')
                    wf_data_chA = wf_data[0: (2 * real_data_block_size * no_of_spectra_in_bunch): 2]  # A
                    wf_data_chB = wf_data[1: (2 * real_data_block_size * no_of_spectra_in_bunch): 2]  # B
                    del wf_data

                # Saving WF data to dat file
                file_data_A = open(file_data_A_name, 'ab')
                file_data_A.write(np.float32(wf_data_chA).transpose().copy(order='C'))
                file_data_A.close()
                if channel == 2:
                    file_data_B = open(file_data_B_name, 'ab')
                    file_data_B.write(np.float32(wf_data_chB).transpose().copy(order='C'))
                    file_data_B.close()

                # Saving time data to ling timeline file
                with open(tl_file_name, 'a') as tl_file:
                    for i in range(no_of_spectra_in_bunch):
                        tl_file.write((str(time_scale_bunch[i][:])) + ' \n')  # str

                bar.next()

            bar.finish()

        file.close()  # Close the data file
        del file_data_A
        if channel == 2:
            del file_data_B

    return result_wf32_files
Exemplo n.º 48
0
def Alliance():
    if not os.path.exists(f"./scrap_essentials/data/{date}.json"):
        with webdriver.Chrome(executable_path=path, options=options) as driver:

            def connect():
                driver.find_element_by_id('username').send_keys(email)
                driver.find_element_by_id('password').send_keys(password)
                driver.find_element_by_id('loginSubmit').click()

            driver.get(URL)
            wait = WebDriverWait(driver, 60)
            connect()
            driver.get("https://www.airlines-manager.com/home")
            AllResult = {"Alliance": []}
            if (wait.until(
                    presence_of_element_located(
                        (By.XPATH, '//*[@id="mainHeader"]/div[2]')))):
                for id in ALLIANCE_LIST:
                    result = {
                        "Name": None,
                        "ID": None,
                        "Classement": None,
                        "Profile": {
                            "General": {
                                "Created": None,
                                "nbCompanies": None,
                                "Solde": None,
                                "BeneficeHebdo": None,
                                "TaxeHebdo": None
                            },
                            "Hub": {
                                "HubsDispo": None,
                                "KmPartage": None,
                                "TaxeLigne": None,
                                "TaxeCompanies": None
                            },
                            "AG": {
                                "nbAvionProposer": None,
                                "ReducMax": None,
                                "Reduc30j": None,
                                "nbAvionAcheter": None,
                                "AideAchatMax": None,
                                "AideAchat30j": None
                            },
                            "R&D": None
                        },
                        "Members": [
                            # PatternMembers Here
                        ],
                        "Networks": {
                            "Statistique": {
                                "NbrHub": None,
                                "NbrLigne": None,
                                "KmLigne": None,
                            },
                            "Hubs": []
                        }
                    }

                    bar = Bar(
                        f'Tabs ({id["Name"]}): ',
                        max=(len(ALLIANCE_TABS)),
                        suffix=
                        '%(percent).1f%% (%(index)d/%(max)d) - [%(elapsed_td)s / %(eta_td)s]'
                    )
                    for tabs in ALLIANCE_TABS:
                        driver.get(f"{URL_ALLIANCE_PROFIL}/{tabs}/{id['ID']}")
                        if tabs == "profile":
                            if (wait.until(
                                    presence_of_element_located(
                                        (By.CSS_SELECTOR,
                                         'div#alliance_profile_statistiques'
                                         )))):
                                result = Profile(driver, result)
                                bar.next()
                        elif tabs == "members":
                            if (wait.until(
                                    presence_of_element_located((
                                        By.CSS_SELECTOR,
                                        '#allianceMembersList > tbody > tr:nth-child(1) > th:nth-child(2) > span'
                                    )))):
                                result = Member(driver, result)
                                bar.next()
                        elif tabs == "network":
                            if (wait.until(
                                    presence_of_element_located(
                                        (By.CSS_SELECTOR, 'div#map_canvas')))):
                                result = Network(driver, result)
                                bar.next()
                    AllResult["Alliance"].append(result)
                    bar.finish()

                with open(os.getcwd() + date_str + date + ".json",
                          "w",
                          encoding='utf8') as f:
                    f.write(json.dumps(AllResult, indent=4))
            print(os.getcwd() + date_str + date + ".json")
            return AllResult
            # Back previous page : driver.back()
            driver.Quit()

    else:
        print(os.getcwd() + date_str + date + ".json")
        return json.load(
            open(os.getcwd() + date_str + date + ".json", "r",
                 encoding='utf8'))
        driver.Quit()
Exemplo n.º 49
0
	def collect(self, topic_list):

		data_num = np.zeros(4, dtype=np.int)

		for bag_file in self.bag_filelist:

			print "loading bag file: ", bag_file
			bag_temp = rosbag.Bag( os.path.join(self.bag_path, bag_file), 'r')

			info_dict = yaml.load(bag_temp._get_yaml_info())

			for topic_item in info_dict["topics"]:
				for i in range(len(topic_list)):
					if topic_item["topic"] == topic_list[i]:
						data_num[i] = topic_item["messages"]

			data_pose = np.zeros((self.batch_size, 6), dtype=np.float)
			data_status = np.zeros((self.batch_size, 2), dtype=np.int)

			if self.greyscale:
				data_image = np.zeros((self.batch_size, np.prod(self.img_size)/3), dtype=np.uint8)
			else:				
				data_image = np.zeros((self.batch_size, np.prod(self.img_size)), dtype=np.uint8)

			# data_lidar = np.zeros((self.batch_size, 1000000), dtype=np.uint8)			

			bar = IncrementalBar('Processing messages', max=np.sum(data_num))

			cout = np.zeros(4, dtype=np.int) 
			cout_tot = np.zeros(4, dtype=np.int)
			for topic, msg, t in bag_temp.read_messages(topics=topic_list):				

				if topic == "/nav/fix":
					data_pose[cout[0], 0] = msg.pose.position.x
					data_pose[cout[0], 1] = msg.pose.position.y
					data_pose[cout[0], 2] = msg.pose.orientation.x
					data_pose[cout[0], 3] = msg.pose.orientation.y
					data_pose[cout[0], 4] = msg.pose.orientation.z
					data_pose[cout[0], 5] = msg.pose.orientation.w	
					cout[0] += 1

				if topic == "/nav/status":		
					data_status[cout[1], 0] = msg.gps
					data_status[cout[1], 1] = msg.satellite
					cout[1] += 1

				if topic == "/usb_cam/image_raw":
					img_array = self.bridge.imgmsg_to_cv2(msg, "rgb8")
					if self.greyscale:
						img_array = cv2.cvtColor(np.reshape(img_array, self.img_size), cv2.COLOR_RGB2GRAY).flatten()
					else:
						img_array = img_array.flatten()
					data_image[cout[2], :] = img_array
					cout[2] += 1

				bar.next()

				if (bar.index+1)%self.batch_size == 0:

					self.save(bag_file, topic_list, \
						data_pose[:cout[0],  :],\
						data_status[:cout[1],:],\
						data_image[:cout[2], :])

					cout_tot += cout				
					cout = np.zeros(4, dtype=np.int)

				# if topic == "/velodyne_points":
				# 	point_tuple = struct.unpack("B"*len(msg.data), msg.data)
				# 	point_array = np.asarray(point_tuple)
				# 	data_lidar[cout[3], :len(msg.data)] = point_array
				# 	cout[3] += 1

			bar.finish()
			del bag_temp

			self.save(bag_file, topic_list, \
				data_pose[:cout[0],  :],\
				data_status[:cout[1],:],\
				data_image[:cout[2], :])
Exemplo n.º 50
0
    elif isinstance(fault.data, types.ListType):
        missing = fault.data

    if '' in missing:
        del missing[missing.index(''):]

    bar = IncrementalBar('Uploading', max=len(missing))
    bar.suffix = '%(percent).1f%% - %(eta)ds'
    with open(path) as fp:
        for hash in missing:
            offset = hashes.index(unhexlify(hash)) * blocksize
            fp.seek(offset)
            block = fp.read(blocksize)
            client.update_container_data(container, StringIO(block))
            bar.next()
    bar.finish()

    return client.create_object_by_hashmap(container, object, map, **kwargs)


def download(client, container, object, path):

    res = client.retrieve_object_hashmap(container, object)
    blocksize = int(res['block_size'])
    blockhash = res['block_hash']
    bytes = res['bytes']
    map = res['hashes']

    if os.path.exists(path):
        h = HashMap(blocksize, blockhash)
        h.load(open(path))
Exemplo n.º 51
0
class AuthorCrawler:
    visitedProfileURL = []
    queueProfileURL = []
    visitedArticleURL = []
    queueArticleURL = []
    numberOfCrawlerProfile = 0

    def __init__(self):
        self.baseURL = 'https://www.researchgate.net/'
        from progress.bar import IncrementalBar
        self.progress_bar = IncrementalBar('Crawling', max=MIN_NUMBER_OF_PROFILE, suffix='%(percent)d%% %(remaining)s remaining - eta %(eta_td)s')

    def crawl(self):
        self.queueProfileURL.extend(START_PAGES)
        os.makedirs(AFTER_CRAWL_AUTHOR_DIR, exist_ok=True)
        while self.numberOfCrawlerProfile < MIN_NUMBER_OF_PROFILE:
            while len(self.queueProfileURL) == 0:
                if len(self.queueArticleURL) == 0:
                    self.progress_bar.finish()
                    return
                try:
                    self.queueProfileURL.extend(filter(lambda x: x not in self.visitedProfileURL and x not in self.queueProfileURL,self.getAuthorFromArticle(self.queueArticleURL.pop(0))))
                except:
                    pass
            try:
                self.progress_bar.next()
                self.crawlProfile(self.queueProfileURL.pop(0))
            except:
                pass
        self.progress_bar.finish()

    def getAuthorFromArticle(self, url):

        r = requests.get(url)
        s = BeautifulSoup(r.text, 'html.parser')

        authors = s.findAll('a', class_='display-name')
        authorsList = []
        for author in authors:
            authorsList.append(self.baseURL +author['href'])
        return authorsList

    def getArticleIDFromURL(self, url):
        return re.findall(r'publication/(?P<id>\d+)_', url)[0]

    def crawlProfile(self, profURL):
        if not profURL.endswith('publications'):
            profURL += '/publications'
        r = requests.get(profURL)
        s = BeautifulSoup(r.text, 'html.parser')
        name = s.find('h1', class_='profile-header-name')
        name = name.text
        n = 1
        articles = []
        while True:
            url = profURL+'/'+n.__str__()
            n+=1
            res = self.parseProfilePage(url)
            if res is None or len(res) == 0:
                break
            articles.extend(res)
        self.queueArticleURL.extend(filter(lambda x: x not in self.visitedArticleURL and x not in self.queueArticleURL,map(lambda x : x[0],articles)))
        js = {}
        js['Name'] = name
        js['Article'] = articles

        file_name = '{}.json'.format(name)
        with open(os.path.join(AFTER_CRAWL_AUTHOR_DIR , file_name), 'w') as outfile:
            json.dump(js, outfile)
        self.numberOfCrawlerProfile +=1
        print(self.numberOfCrawlerProfile)

    def parseProfilePage(self, url):  # return top 10 article url
        r = requests.get(url)
        s = BeautifulSoup(r.text, 'html.parser')
        articles = s.findAll('a', class_='ga-publication-item')
        result = []
        for article in articles:
            result.append((self.baseURL + article['href'], self.getArticleIDFromURL(article['href'])))
        return result
Exemplo n.º 52
0
    def find_solutions(self, graph_setting_groups):
        results = {}
        # check for solutions for a specific set of interaction settings
        logging.info("Number of interaction settings groups being processed: "
                     + str(len(graph_setting_groups)))
        for strength, graph_setting_group in sorted(
                graph_setting_groups.items(), reverse=True):
            logging.info("processing interaction settings group with "
                         "strength " + str(strength))
            logging.info(str(len(graph_setting_group)) +
                         " entries in this group")
            logging.info("running with " +
                         str(self.number_of_threads) + " threads...")

            temp_results = []
            bar = IncrementalBar('Propagating quantum numbers...',
                                 max=len(graph_setting_group))
            bar.update()
            if self.number_of_threads > 1:
                with Pool(self.number_of_threads) as p:
                    for result in p.imap_unordered(
                            self.propagate_quantum_numbers,
                            graph_setting_group, 1):
                        temp_results.append(result)
                        bar.next()
            else:
                for graph_setting_pair in graph_setting_group:
                    temp_results.append(self.propagate_quantum_numbers(
                        graph_setting_pair))
                    bar.next()
            bar.finish()
            logging.info('Finished!')
            if strength not in results:
                results[strength] = []
            results[strength].extend(temp_results)

        for k, v in results.items():
            logging.info(
                "number of solutions for strength ("
                + str(k) + ") after qn propagation: "
                + str(sum([len(x[0]) for x in v])))

        # remove duplicate solutions, which only differ in the interaction qn S
        results = remove_duplicate_solutions(results, self.filter_remove_qns,
                                             self.filter_ignore_qns)

        node_non_satisfied_rules = []
        solutions = []
        for result in results.values():
            for (tempsolutions, non_satisfied_laws) in result:
                solutions.extend(tempsolutions)
                node_non_satisfied_rules.append(non_satisfied_laws)
        logging.info("total number of found solutions: " +
                     str(len(solutions)))
        violated_laws = []
        if len(solutions) == 0:
            violated_laws = analyse_solution_failure(node_non_satisfied_rules)
            logging.info("violated rules: " + str(violated_laws))

        # finally perform combinatorics of identical external edges
        # (initial or final state edges) and prepare graphs for
        # amplitude generation
        match_external_edges(solutions)
        final_solutions = []
        for sol in solutions:
            final_solutions.extend(
                perform_external_edge_identical_particle_combinatorics(sol)
            )

        return (final_solutions, violated_laws)
Exemplo n.º 53
0
            for (w, weight) in m.show_topic(topic[0], topn=50):

                d = {}
                d['word'] = w
                d['doc_page'] = counter
                d['year'] = years[i]
                d['word_weight'] = weight
                d['topic_id'] = topic[0]
                d['topic_weight'] = topic[1]
                dl = pd.DataFrame().append(d, ignore_index=True)
                if os.path.exists(output_path):
                    dl.to_csv(output_path, mode='a', header=False, index=False)
                else:
                    dl.to_csv(output_path, header=True, index=False)
        counter = counter + 1
processing.finish()

# lda_1960 = pd.read_csv('../scripts/1960_all_lda_words.csv')
# lda_1961 = pd.read_csv('../scripts/1961_all_lda_words.csv')
# lda_1962 = pd.read_csv('../scripts/1962_all_lda_words.csv')
# lda_1963 = pd.read_csv('../scripts/1963_all_lda_words.csv')
# lda_1964 = pd.read_csv('../scripts/1964_all_lda_words.csv')
# lda_1965 = pd.read_csv('../scripts/1965_all_lda_words.csv')
# lda_1966 = pd.read_csv('../scripts/1966_all_lda_words.csv')

# print('1960',list(lda_1960.word.unique()))
# print('1961',list(lda_1961.word.unique()))
# print('1962',list(lda_1962.word.unique()))
# print('1963',list(lda_1963.word.unique()))
# print('1964',list(lda_1964.word.unique()))
# print('1965',list(lda_1965.word.unique()))
Exemplo n.º 54
0
def tweets_parecidos_con_distinto_humor(corpus):
    print("Buscando tweets muy parecidos pero con distinto valor de humor...")

    parecidos_con_distinto_humor = set()

    ids_parecidos_con_distinto_humor = cargar_parecidos_con_distinto_humor()

    if ids_parecidos_con_distinto_humor:
        corpus_por_id = {tweet.id: tweet for tweet in corpus}
        for id_tweet_humor, id_tweet_no_humor in ids_parecidos_con_distinto_humor:
            parecidos_con_distinto_humor.add((corpus_por_id[id_tweet_humor], corpus_por_id[id_tweet_no_humor]))
    else:
        subcorpus_cuentas_de_humor = []
        subsubcorpus_cuentas_de_humor_humor = []
        subsubcorpus_cuentas_de_humor_no_humor = []
        for tweet in corpus:
            if tweet.es_chiste:
                subcorpus_cuentas_de_humor.append(tweet)
                if tweet.es_humor:
                    subsubcorpus_cuentas_de_humor_humor.append(tweet)
                else:
                    subsubcorpus_cuentas_de_humor_no_humor.append(tweet)

        subsubcorpus_cuentas_de_humor_no_humor_por_largo = defaultdict(list)

        bar = IncrementalBar("Tokenizando\t\t\t", max=len(subcorpus_cuentas_de_humor),
                             suffix=SUFIJO_PROGRESS_BAR)
        bar.next(0)
        for tweet_cuenta_humor in subcorpus_cuentas_de_humor:
            tweet_cuenta_humor.oraciones = Freeling.procesar_texto(tweet_cuenta_humor.texto_original)
            tweet_cuenta_humor.tokens = list(itertools.chain(*tweet_cuenta_humor.oraciones))
            bar.next()
        bar.finish()

        for tweet_no_humor in subsubcorpus_cuentas_de_humor_no_humor:
            subsubcorpus_cuentas_de_humor_no_humor_por_largo[len(tweet_no_humor.tokens)].append(tweet_no_humor)

        bar = IncrementalBar("Buscando en tweets\t\t", max=len(subsubcorpus_cuentas_de_humor_humor),
                             suffix=SUFIJO_PROGRESS_BAR)
        bar.next(0)
        for tweet_humor in subsubcorpus_cuentas_de_humor_humor:
            margen = int(round(len(tweet_humor.tokens) / 5))
            largo_min = len(tweet_humor.tokens) - margen
            largo_max = len(tweet_humor.tokens) + margen

            for largo in range(largo_min, largo_max + 1):
                for tweet_no_humor in subsubcorpus_cuentas_de_humor_no_humor_por_largo[largo]:
                    if distancia_edicion(tweet_humor.tokens, tweet_no_humor.tokens)\
                            <= max(len(tweet_humor.tokens), len(tweet_no_humor.tokens)) / 5:
                        parecidos_con_distinto_humor.add((tweet_humor, tweet_no_humor))
                        print('')
                        print(tweet_humor.id)
                        print(tweet_humor.texto_original)
                        print("------------")
                        print(tweet_no_humor.id)
                        print(tweet_no_humor.texto_original)
                        print("------------")
                        print('')
            bar.next()
        bar.finish()

        guardar_parecidos_con_distinto_humor(parecidos_con_distinto_humor)

    return parecidos_con_distinto_humor
def dump_image_point():
    data_prefix = '/media/tree/data1/projects/AttentionBased/data'
    train_output_folder = '/media/tree/backup/projects/AttentionBased/data/train'
    test_output_folder = '/media/tree/backup/projects/AttentionBased/data/test'
    image_input_folder = 'image_256_256_12'
    point_input_folder = 'pointcloud_12/16384'
    image_output_folder = 'image_256_256_12'
    image_192_output_folder = 'image_192_256_12'
    point_output_folder = 'point_16384_12'
    image_number = 12

    with open('/media/tree/backup/projects/AttentionBased/data/train_models.json', 'r') as f:
        train_models_dict = json.load(f)

    with open('/media/tree/backup/projects/AttentionBased/data/test_models.json', 'r') as f:
        test_models_dict = json.load(f)

    cats = shapenet_category_to_id.values()
    for cat in cats:
        print(cat, 'starts at ', time.strftime("%m-%d %H:%M:%S", time.localtime()))
        print(cat, 'loading train_split!')
        train_image_models = []
        train_point_models = []
        train_img_path = []
        train_pt_path = []
        train_image_models.extend([os.path.join(data_prefix, image_input_folder, model) for model in train_models_dict[cat]])
        for each in train_image_models:
            for index in range(image_number):
                train_img_path.append(os.path.join(each, '{0:02d}.png'.format(int(index))))
        
        train_point_models.extend([os.path.join(data_prefix, point_input_folder, model) for model in train_models_dict[cat]])
        for each in train_point_models:
            for index in range(image_number):
                train_pt_path.append(os.path.join(each, '{0:02d}.npy'.format(int(index))))
                
        print(cat, 'train_split loaded!')

        train_image_save = h5py.File(os.path.join(train_output_folder, image_output_folder, '{}.h5'.format(cat)), mode = 'w')
        train_image_192_save = h5py.File(os.path.join(train_output_folder, image_192_output_folder, '{}.h5'.format(cat)), mode = 'w')
        # train_point_save = h5py.File(os.path.join(train_output_folder, point_output_folder, '{}.h5'.format(cat)), mode = 'w')
        
        train_img_shape = (len(train_img_path), 256, 256, 3)
        train_img_192_shape = (len(train_img_path), 192, 256, 3)
        train_pt_shape = (len(train_pt_path), 16384, 3)

        train_image_save.create_dataset('image', train_img_shape, np.uint8)
        train_image_192_save.create_dataset('image', train_img_192_shape, np.uint8)
        # train_point_save.create_dataset('point', train_pt_shape, np.float32)
        
        print(cat, 'saving train data at', time.strftime("%m-%d %H:%M:%S", time.localtime()))
        train_bar =  IncrementalBar(max=len(train_img_path))
        for i in range(len(train_img_path)):
            image_array, point_array , image_192_array = load_data(train_img_path[i], train_pt_path[i])
            train_image_save['image'][i, ...] = image_array
            train_image_192_save['image'][i, ...] = image_192_array
            # train_point_save['point'][i, ...] = point_array
            train_bar.next()
        train_bar.finish()
        print(cat, 'train data saved!')
        
        train_image_save.close()
        train_image_192_save.close()
        # train_point_save.close()

        print(cat, 'loading test_split!')
        test_image_models = []
        test_point_models = []
        test_img_path = []
        test_pt_path = []
        test_image_models.extend([os.path.join(data_prefix, image_input_folder, model) for model in test_models_dict[cat]])
        for each in test_image_models:
            for index in range(image_number):
                test_img_path.append(os.path.join(each, '{0:02d}.png'.format(int(index))))
        
        test_point_models.extend([os.path.join(data_prefix, point_input_folder, model) for model in test_models_dict[cat]])
        for each in test_point_models:
            for index in range(image_number):
                test_pt_path.append(os.path.join(each, '{0:02d}.npy'.format(int(index))))
        
        print(cat, 'test_split loaded!')

        test_image_save = h5py.File(os.path.join(test_output_folder, image_output_folder, '{}.h5'.format(cat)), mode = 'w')
        test_image_192_save = h5py.File(os.path.join(test_output_folder, image_192_output_folder, '{}.h5'.format(cat)), mode = 'w')
        # test_point_save = h5py.File(os.path.join(test_output_folder, point_output_folder, '{}.h5'.format(cat)), mode = 'w')
        
        test_img_shape = (len(test_img_path), 256, 256, 3)
        test_img_192_shape = (len(test_img_path), 192, 256, 3)
        test_pt_shape = (len(test_pt_path), 16384, 3)
        
        test_image_save.create_dataset('image', test_img_shape, np.uint8)
        test_image_192_save.create_dataset('image', test_img_192_shape, np.uint8)
        # test_point_save.create_dataset('point', test_pt_shape, np.float32)

        print(cat, 'saving test data at ', time.strftime("%m-%d %H:%M:%S", time.localtime()))
        test_bar =  IncrementalBar(max=len(test_img_path))
        for i in range(len(test_img_path)):
            image_array, point_array , image_192_array = load_data(test_img_path[i], test_pt_path[i])
            test_image_save['image'][i, ...] = image_array
            test_image_192_save['image'][i, ...] = image_192_array
            # test_point_save['point'][i, ...] = point_array
            test_bar.next()
        test_bar.finish()
        print(cat, 'test data saved!')
        
        print(cat, 'finished at ', time.strftime("%m-%d %H:%M:%S", time.localtime()))
        
        test_image_save.close()
        test_image_192_save.close()