Пример #1
0
def student_list(school_id, turma_id):
    db = utils.get_database()
    school = utils.get_database().schools.find_one(
        {"_id": bson.ObjectId(school_id)})
    data_filter = {
        "escola": school["name"],
        "turma": turma_id,
    }
    print repr(data_filter)
    result = db.weekly.aggregate([{
        "$match": data_filter,
    }, {
        "$group": {
            "_id": "$aluno"
        }
    }, {
        "$project": {
            "_id": 1,
            "name": {
                "$concat": ["Aluno ", "$_id"]
            }
        }
    }, {
        "$sort": {
            "name": 1
        }
    }])["result"]

    return result
Пример #2
0
def main():
    empty_output_dir()
    client = utils.get_mongo_client()
    db = utils.get_database(client, 'omni')
    fake_id_dict = read_variants.read_fake_ids()
    disease_icd_dict = read_variants.read_disease_icd_dict()
    omni_to_jax_disease_dict = read_variants.read_omni_to_jax_disease_dict()
    disease_path_to_reportable_disease_dict = read_variants.read_omni_reportable_disease_name_dict(
    )
    variant_groups_dict = read_variants.read_variant_groups_dict()
    io_drug_dict = additional_io.get_io_drug_dict()
    patients = read_variants.read_immune_results_file(
        get_immune_results_file_path())
    read_variants.read_all_variants(patients, get_variant_file_path())
    read_variants.read_summary_interprations(patients, get_summary_file_path())
    strands = annotate.read_strands('data/strands.xlsx')
    num = 1
    with open('output/manifest.txt', "w") as file:
        for order_id in patients.keys():
            patient = patients[order_id]
            read_variants.add_patient_data(
                patient, fake_id_dict, disease_icd_dict,
                omni_to_jax_disease_dict,
                disease_path_to_reportable_disease_dict)

            handle_one_patient(patient, db, strands, variant_groups_dict,
                               io_drug_dict)
            # create_recommendations(patient,db)
            create_one_report(patient)
            out_string = generate_manifest_string(num, order_id, patient)
            print(out_string)
            # file.write(out_string)
            num += 1
Пример #3
0
def main():
    client = utils.get_mongo_client()
    db = utils.get_database(client, 'omni')
    path = 'data/TSO500_UniqueVariants_runs1-6.csv'
    # path = 'data/TSO500_UniqueVariants_4.csv'
    var_list = TSO500.read_tso_unique_variants(path)
    outF = open("data/decisions.tsv", "w")
    h = "gene\tcdot\tpdot\tgene_category\tmutation_type\treport_status\treasons\tlack_of_reasons\t" \
        "is_protein_altering\tin_clinvar\tis_clinvar_benign\tis_clinvar_pathogenic\tclinvar_explain\t" \
        "is_gain_of_function\tis_loss_of_function\thotspots\tpredicted_deleterious\tis_truncating_variants\tis_near_GOF_LOF_mutation\t" \
        "omni_gene\tomni_cdot\tomni_pdot"
    # print(h)
    outF.write(h)
    outF.write("\n")

    for index, variant in enumerate(var_list):
        key = get_key_from_variant(variant)
        annotated = get_annotated_snv(key, db)
        if not 'reasons' in annotated:
            annotated['reasons'] = []
        if not 'lack_of_reasons' in annotated:
            annotated['lack_of_reasons'] = []

        is_tso_snv_reportable(annotated)
        s = f"{annotated['gene']}\t{annotated['cdot']}\t{annotated['pdot']}\t{annotated['gene_category']}\t{annotated['mutation_type']}\t" \
            f"{annotated['report_status']}\t{annotated['reasons']}\t{annotated['lack_of_reasons']}\t" \
            f"{annotated['is_protein_altering']}\t{annotated['in_clinvar']}\t{annotated['is_clinvar_benign']}\t{annotated['is_clinvar_pathogenic']}\t{annotated['clinvar_explain']}\t" \
            f"{annotated['is_gain_of_function']}\t{annotated['is_loss_of_function']}\t{annotated['hotspots']}\t{annotated['predicted_deleterious']}\t" \
            f"{annotated['is_truncating_variants']}\t{annotated['is_near_GOF_LOF_mutation']}\t" \
            f"{annotated['HGNC_Symbol']}\t{annotated['omni_c_dot']}\t{annotated['omni_p_dot']}"
        # print(s)
        outF.write(s)
        outF.write("\n")
    outF.close()
Пример #4
0
def visualize_embeddings(database='mnist',
                         model_dir='exp/mnist/run_13/',
                         model_name='model_weights.h5',
                         sprite=False,
                         model=None):
    sprite_filename = '/home/daniel/models-tensorflow/tensorflow-triplet-loss/experiments/mnist_10k_sprite.png'

    tf.logging.set_verbosity(tf.logging.INFO)

    data, input_size = get_database(database)
    _, (x_test, y_test) = data
    # Load the parameters from json file
    if model is None or True:
        tf.reset_default_graph()
        K.clear_session()
        estimator = load_model(model_dir + model_name)
    else:
        estimator = model

    # Compute embeddings on the test set
    tf.logging.info("Predicting")
    embeddings = estimator.predict(x_test)

    tf.logging.info("Embeddings shape: {}".format(embeddings.shape))

    # Visualize test embeddings
    embedding_var = tf.Variable(embeddings, name='embedding')

    eval_dir = os.path.join(model_dir, "log")
    summary_writer = tf.summary.FileWriter(eval_dir)

    config = projector.ProjectorConfig()
    embedding = config.embeddings.add()
    embedding.tensor_name = embedding_var.name

    # Specify where you find the sprite (we will create this later)
    # Copy the embedding sprite image to the eval directory

    if sprite:
        shutil.copy2(sprite_filename, eval_dir)
        embedding.sprite.image_path = pathlib.Path(sprite_filename).name
        embedding.sprite.single_image_dim.extend([28, 28])

    # Specify where you find the metadata
    # Save the metadata file needed for Tensorboard projector
    metadata_filename = "metadata.tsv"
    with open(os.path.join(eval_dir, metadata_filename), 'w') as f:
        for i in range(len(y_test)):
            c = y_test[i]
            f.write('{}\n'.format(c))
    embedding.metadata_path = metadata_filename

    # Say that you want to visualise the embeddings
    projector.visualize_embeddings(summary_writer, config)

    saver = tf.train.Saver()
    with K.get_session() as sess:
        sess.run(embedding_var.initializer)
        saver.save(sess, os.path.join(eval_dir, "embeddings.ckpt"))
Пример #5
0
def get_decreasing_schools(field_name="totalminutes",
                           op="avg",
                           extraFilter=None):
    db = utils.get_database()

    aggregateFilter = []
    if extraFilter:
        aggregateFilter.append({"$match": extraFilter})

    results = db.weekly.aggregate(
        list(aggregateFilter) + [{
            '$group': {
                '_id': {
                    "e": "$escola",
                    "w": "$semana",
                },
                'avg': {
                    '${}'.format(op): "${}".format(field_name)
                },
            }
        }])["result"]
    all_schools = {}
    for r in results:
        school = r["_id"]["e"]
        week = r["_id"]["w"]
        avg = r["avg"]
        all_schools[school] = all_schools.get(school, {})
        all_schools[school][week] = avg

    process_output = []
    for school, data in all_schools.iteritems():
        avg = sum(data.values()) / len(data.values())
        current = sorted(data.keys())[-1]
        last = sorted(data.keys())[-2]
        diff = data[current] - data[last]
        out = {
            'schoolName': school,
            'avg': avg,
            'week': last,
            'current': data[current],
            'last': data[last],
            'diff': diff,
            'percentDiff':
            float(diff) / data[last] * 100 if data[last] else None,
        }
        process_output.append(out)

    probs = filter(lambda x: x["diff"] < 0, process_output)
    probs.sort(key=lambda x: x['percentDiff'])

    return probs
Пример #6
0
def main():
    client = utils.get_mongo_client()
    db = utils.get_database(client,'omni')
    path = 'data/TSO500_UniqueVariants_runs1-6.csv'
    # path = 'data/TSO500_UniqueVariants_4.csv'
    var_list = TSO500.read_tso_unique_variants(path)
    variants_to_analyze = []
    for index, variant in enumerate(var_list):
        key = get_key_from_variant(variant)
        annotated = get_annotated_snv(key,db)
        if annotated == None:
            variants_to_analyze.append(variant)
            if len(variants_to_analyze) >= 100:
                report_on_variants(db, variants_to_analyze)
                variants_to_analyze = []
    if len(variants_to_analyze)>0:
        report_on_variants(db, variants_to_analyze)
Пример #7
0
def school_problem_list():
    data_filter = {}
    school_id = flask.request.values.get("school") or flask.request.values.get(
        "escola")
    if school_id:
        school_name = utils.get_database().schools.find_one(
            {"_id": bson.ObjectId(school_id)})["name"]
        data_filter["escola"] = school_name

    turma = flask.request.values.get("turma")
    if turma:
        data_filter["turma"] = turma

    aluno = flask.request.values.get("aluno") or flask.request.values.get(
        "student")
    if aluno:
        data_filter["aluno"] = aluno

    return map(
        _add_school_by_name,
        queries.get_decreasing_schools("totalminutes",
                                       extraFilter=data_filter))
Пример #8
0
def main(argv):
    # TODO(LuHa): load options
    options = utils.get_database('options.secret', across=True)

    # TODO(LuHa): print menu
    while True:
        print('\n----+----+ Edit options ----+----+')
        print('Current options')
        print(json.dumps(options, indent='  '))
        print('1. Toggle downloaded image log')
        print('b. Back')

        user_input = input('User input: ')
        user_input = user_input.lower()
        user_input = user_input.strip()
        # TODO(LuHa): handle user input
        if user_input == '1':
            options['log'] = get_logging_level_from_user()
        elif user_input == 'b':
            break

    # TODO(LuHa): save options
    utils.set_database('options.secret', options, across=True)
Пример #9
0
#coding:utf8
from utils import get_database
from utils import get_qianyue_database
from utils import get_ltp_path
import sys
from pyltp import Segmentor
from preprocessing import tokenizer
import pickle
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import CountVectorizer
from tqdm import tqdm
import random
from texttable import Texttable
import numpy

wechat_db = get_database(sys.argv[1]).neaten_wechat2
account_info_db = get_database(sys.argv[1]).wechat_old_account_info


def get_data_of_an_account(account_id):
    data = wechat_db.find_one({'_id': account_id})
    if data is None:
        return data
    data['articles'], data['articles2'] = [], data['articles']
    for article in data['articles2']:
        #if article['position'] == '头条':
        data['articles'].append(article)
    return data


def get_account_ids(account_name):
Пример #10
0
    try:
        next_state = state_handler(event, vk)
    except Exception as error:
        logging.exception(error)
        next_state = None

    if next_state is None:
        return

    user = db.get(f"vk_{user_id}")
    if user is not None:
        user_data = json.loads(user)
        user_data["state"] = next_state
    else:
        user_data = {"state": next_state}
    db.set(f"vk_{user_id}", json.dumps(user_data))


if __name__ == "__main__":
    load_dotenv()

    global db
    db = utils.get_database()

    vk_session = vk_api.VkApi(token=os.getenv("VK_TOKEN"))
    vk = vk_session.get_api()
    longpoll = VkLongPoll(vk_session)
    for event in longpoll.listen():
        if event.type == VkEventType.MESSAGE_NEW and event.to_me:
            handle_user_reply(event, vk)
Пример #11
0
def main():
    queue = get_message_queue(sys.argv[1], 'wechat_account_info_queue')
    db = get_database(sys.argv[1]).wechat_account_info
    firefoxProfile = FirefoxProfile()
    # firefoxProfile.set_preference('permissions.default.stylesheet', 2)
    firefoxProfile.set_preference('dom.ipc.plugins.enabled.libflashplayer.so',
                                  'false')
    firefoxProfile.set_preference('permissions.default.image', 2)
    driver = webdriver.Firefox(firefoxProfile)
    driver.set_page_load_timeout(30)
    driver.implicitly_wait(5)
    login_from_cookie(driver)
    try:
        driver.find_element_by_class_name('new-header-login.unlogin')
        login_by_user()
        login_from_cookie(driver)
    except Exception as e:
        pass
    while 1:
        if queue.empty():
            print('Already finished')
            print('Waiting for new query ...')
        #wechat_id = queue.get().decode()
        wechat_id = 'HIT_SCIR'
        print('Crawling %s' % wechat_id)
        url = 'http://www.newrank.cn/public/info/detail.html?account=%s' % wechat_id
        try:
            driver.get(url)
        except:
            print('!!!!!!!!!!!!!Cannot get web page!!!!!!!!')
            time.sleep(3)
            continue
        if (driver.title == u'页面错误'):
            print('%s not included' % (wechat_id))
            continue
        locator = contain_something([
            './/*[@class="tag-name-list"]//li[1]',
            './/*[@class="tag-name-list"]//a[1]'
        ])

        try:
            WebDriverWait(driver, 10, 0.5).until(locator)
            info_tree = etree.HTML(
                driver.find_element_by_class_name('info-detail-head')
                .get_attribute('innerHTML'))
            account_info = dict()
            account_info['_id'] = wechat_id
            account_info['str_id'] = wechat_id
            account_info['name'] = info_tree.xpath(
                './/*[@class="info-detail-head-weixin-name"]/span')[0].xpath(
                    'string(.)').strip()
            account_info['description'] = info_tree.xpath(
                './/*[@class="info-detail-head-weixin-fun-introduce ellipsis"]/@title'
            )[0]
            account_info['category'] = info_tree.xpath(
                './/*[@class="info-detail-head-classify-subname"]/a/text()')
            account_info['fans_count'] = info_tree.xpath(
                './/*[@class="detail-fans-counts"]/@data')[0]
            try:
                driver.find_element_by_xpath(
                    './/*[@class="info-detail-head-classify"]//*[@class="detail-edit info-detail-edit detail-pic"]'
                ).click()
                html = driver.find_element_by_id(
                    'current_tag_list').get_attribute('innerHTML')
                account_info['tags'] = etree.HTML(html).xpath('.//a/text()')
            except:
                account_info['tags'] = []
            print(account_info)
            continue
            db.insert_one(account_info)
        except Exception as e:
            print(e)
            print('Error')
            with open('./fail_ids_for_user_info.dat', 'a') as fout:
                fout.write('%s\n' % wechat_id)
Пример #12
0
def _add_school_by_name(data):
    data["school"] = utils.get_database().schools.find_one(
        {"name": data["schoolName"]})
    data["_id"] = data["school"]["_id"]
    return _add_weekly_report_link(data)
def main(argv):
    """
    main flow
    """
    # TODO(LuHa): print message about program execution
    #utils.logger.info(
    #        '\x1B[38;5;5m[Danbooru] Execute danbooru downloader\x1B[0m')
    print('\x1B[38;5;5m[Danbooru] Execute danbooru downloader\x1B[0m')

    # TODO(LuHa): create downloads directory
    # actually, this code use only downloads directory.
    # but to ensure execution of source code,
    #   make save directory.
    os.makedirs('./downloads', exist_ok=True)
    os.makedirs('./save', exist_ok=True)

    # TODO(LuHa): load ban database
    ban_db = utils.get_database('ban.secret')

    # TODO(LuHa): load mute database
    mute_db = utils.get_database('mute.secret')

    # TODO(LuHa): read pre-downloaded image
    downloaded = utils.get_downloaded_images('danbooru')

    # TODO(LuHa): load tags
    if os.path.exists('tags.secret'):
        with open('tags.secret', 'r') as f_tags:
            tags = json.load(f_tags)
            tags = tags['danbooru']
    else:
        utils.logger.error('[Danbooru] Need tags in file named tags.secret')
        return

    # TODO(LuHa): load API keys
    if os.path.exists('danbooru_api.secret'):
        utils.logger.debug('[Danbooru] API key exists')
        with open('danbooru_api.secret', 'r') as f_api:
            api_key = f_api.read()
            api_key = api_key.strip()
    else:
        utils.logger.error(
            '[Danbooru] Need API key in file named danbooru_api.secret')
        utils.logger.error('[Danbooru] The format is ID:APIKEY')
        return

    # TODO(LuHa): create opener
    auth = api_key
    auth = auth.encode('ascii')
    auth = base64.b64encode(auth)
    auth = auth.decode('utf-8')
    opener = urllib.request.build_opener()
    opener.addheaders = [('Authorization', 'Basic ' + auth)]

    # TODO(LuHa): loop search by tags
    base_url = 'https://danbooru.donmai.us'
    # for fun
    random.shuffle(tags)
    for tag in tags:
        request_url = (base_url + '/posts.json?tags=' + tag + '&random=true')
        utils.logger.info(
            '\x1B[38;5;5m[Danbooru] Request: {0}\x1B[0m'.format(request_url))
        response = opener.open(request_url, timeout=30)
        try:
            posts = json.loads(response.read().decode('utf-8'))
        except socket.timeout:
            utils.logger.info('\x1B[38;5;5m[Danbooru] Response timeout\x1B[0m')
            return

        # TODO(LuHa): loop download by posts
        # get 20 images at one time in dandooru
        for post in posts:
            # skip target image is already downloaded
            if post['id'] in downloaded:
                utils.logger.debug('[Danbooru] Already downloaded {0}'.format(
                    post['id']))
                continue
            elif post['id'] in ban_db['danbooru']:
                utils.logger.debug('[Danbooru] Ban downloaded {0}'.format(
                    post['id']))
                continue
            elif post['id'] in mute_db['danbooru']:
                utils.logger.debug('[Danbooru] Mute downloaded {0}'.format(
                    post['id']))
                continue
            else:
                downloaded.add(post['id'])

            # Change url rule at 180410
            request_url = post['file_url']
            try:
                response = opener.open(request_url, timeout=TIMEOUT)
            except socket.timeout:
                utils.logger.info('[Danbooru] Request timeout')
                return
            image_path = ('./downloads' + '/danbooru-' + str(post['id']) +
                          '.' + post['file_ext'])
            with open(image_path, 'wb') as f:
                try:
                    f.write(response.read())
                except socket.timeout:
                    utils.logger.info(('\x1B[38;5;5m[Danbooru] ' +
                                       'Response timeout\x1B[0m'))
                    return
            utils.logger.debug('[Danbooru] Downloaded {0}'.format(image_path))
            # sleep for prevent block
            utils.dynamic_sleep()

    # TODO(LuHa): print message about program terminaion
    utils.logger.info(
        '\x1B[38;5;5m[Danbooru] Terminate danbooru downloader\x1B[0m')
Пример #14
0
dropout = 0.35  # Dropout probability of each layer. Conv layers use SpatialDropout2D
blocks = 6  # Number of (Conv -> Act -> BN -> MaxPool -> Dropout) blocks
n_channels = args[
    'channels']  # Number of channels (or feature maps) of the first convolution block.
# the following ones are 1.5 times the number of channels of the previous block
weight_decay = 1e-4 * 0

# dataloader parameters.
# Folder's path where the files query.txt and bounding_box_train.txt are
# query.txt contains the path and the class of test images
# bounding_box_train.txt contains the path and the class of train images
path = args['path']

exp_dir, log_dir, model_weights_path, model_name = get_dirs(database)
print(exp_dir, log_dir, model_weights_path, model_name)
data, input_size = get_database(
    database)  # if database == 'skillup'. data is None
im_size = input_size[:2]

data_gen_args_train = dict(
    featurewise_center=False,  # set input mean to 0 over the dataset
    samplewise_center=False,  # set each sample mean to 0
    featurewise_std_normalization=False,  # divide inputs by std of the dataset
    samplewise_std_normalization=False,  # divide each input by its std
    zca_whitening=False,  # apply ZCA whitening
    rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
    zoom_range=0.1,  # Randomly zoom image
    width_shift_range=
    0.1,  # randomly shift images horizontally (fraction of total width)
    height_shift_range=
    0.1,  # randomly shift images vertically (fraction of total height)
    horizontal_flip=False,  # randomly flip images
Пример #15
0
def main():
    queue = get_message_queue(sys.argv[1], 'wechat_article_content_queue')
    firefoxProfile = FirefoxProfile()
    firefoxProfile.set_preference('permissions.default.stylesheet', 2)
    firefoxProfile.set_preference('dom.ipc.plugins.enabled.libflashplayer.so',
                                  'false')
    firefoxProfile.set_preference('permissions.default.image', 2)
    # driver = webdriver.Firefox(firefoxProfile)
    driver = webdriver.PhantomJS(service_args=['--load-images=false'])
    print('Driver is ready')
    driver.implicitly_wait(5)
    db = get_database(sys.argv[1]).article_contents
    finished_count = 0
    while 1:
        # time.sleep(random.uniform(1, 1))
        if queue.empty():
            print('Already finished')
            print('Waiting for new query ...')
        url = queue.get().decode()
        print('start')
        try:
            driver.get(url)
        except:
            print(url)
            print('!!!!!!!!!!!!!Cannot get web page!!!!!!!!')
            continue
        if driver.title.strip() == '':
            print(url)
            print('Empty title')
            continue
        locator = (By.XPATH, '//*[@id="page-content"]')
        try:
            WebDriverWait(driver, 10,
                          0.5).until(EC.presence_of_element_located(locator))
            data = dict()
            data['href'] = url
            page_content = etree.HTML(
                driver.find_element_by_id('page-content').get_attribute(
                    'innerHTML'))
            if page_content == None:
                print('Not find page-content')
                print(url)
                continue
            data['title'] = ''.join(
                page_content.xpath('//*[@id="activity-name"]/text()')).strip()
            data['post-user'] = ''.join(
                page_content.xpath('//*[@id="post-user"]/text()')).strip()
            data['post-date'] = ''.join(
                page_content.xpath('//*[@id="post-date"]/text()')).strip()
            data['origin'] = ''.join(
                page_content.xpath(
                    '//*[@id="copyright_logo"]/text()')).strip()
            data['title2'] = ''.join(
                page_content.xpath(
                    '//*[@class="rich_media_meta rich_media_meta_text"]/text()'
                )).strip()
            data['content'] = get_content(page_content)
            # print(data)
            print('end')
            assert (len(data['content']) > 0)
            db.insert_one(data)
            finished_count += 1
            if finished_count % 100 == 0:
                print('Quit driver')
                driver.quit()
                time.sleep(random.random() * 3 + 1)
                driver = webdriver.PhantomJS(
                    service_args=['--load-images=false'])
                print('Driver is ready')
                driver.implicitly_wait(5)
        except Exception as e:
            print(url)
            print('Error')
            print(e)
            with open('./fail_ids_for_article_content.data', 'a') as fout:
                fout.write('%s\n' % url)
            time.sleep(random.random() * 3 + 1)
Пример #16
0
def school_weekly_report(id):
    school = _get_school(id)
    db = utils.get_database()
    data_filter = {
        "escola": school["name"],
    }
    turma = flask.request.values.get("turma")
    if turma:
        data_filter["turma"] = turma

    aluno = flask.request.values.get("aluno") or flask.request.values.get(
        "student")
    if aluno:
        data_filter["aluno"] = aluno

    result = db.weekly.aggregate([
        {
            "$match": data_filter,
        },
        {
            "$group": {
                "_id": "$semana",
                "minTotalMinutes": {
                    "$min": "$totalminutes"
                },
                "minVideoMinutes": {
                    "$min": "$videominutes"
                },
                "minExerciseMinutes": {
                    "$min": "$exerciseminutes"
                },
                "minNivel1": {
                    "$min": "$nivel1"
                },
                "minNivel2": {
                    "$min": "$nivel2"
                },
                "minPrecisaPraticar": {
                    "$min": "$precisa_praticar"
                },
                "minPraticado": {
                    "$min": "$praticado"
                },
                "minDominado": {
                    "$min": "$dominado"
                },
                "minPontos": {
                    "$min": "$pontos"
                },
                "minComDificuldade": {
                    "$min": "$com dificuldade"
                },
                "maxTotalMinutes": {
                    "$max": "$totalminutes"
                },
                "maxVideoMinutes": {
                    "$max": "$videominutes"
                },
                "maxExerciseMinutes": {
                    "$max": "$exerciseminutes"
                },
                "maxNivel1": {
                    "$max": "$nivel1"
                },
                "maxNivel2": {
                    "$max": "$nivel2"
                },
                "maxPrecisaPraticar": {
                    "$max": "$precisa_praticar"
                },
                "maxPraticado": {
                    "$max": "$praticado"
                },
                "maxDominado": {
                    "$max": "$dominado"
                },
                "maxPontos": {
                    "$max": "$pontos"
                },
                "maxComDificuldade": {
                    "$max": "$com dificuldade"
                },
                "avgTotalMinutes": {
                    "$avg": "$totalminutes"
                },
                "avgVideoMinutes": {
                    "$avg": "$videominutes"
                },
                "avgExerciseMinutes": {
                    "$avg": "$exerciseminutes"
                },
                "avgNivel1": {
                    "$avg": "$nivel1"
                },
                "avgNivel2": {
                    "$avg": "$nivel2"
                },
                "avgPrecisaPraticar": {
                    "$avg": "$precisa_praticar"
                },
                "avgPraticado": {
                    "$avg": "$praticado"
                },
                "avgDominado": {
                    "$avg": "$dominado"
                },
                "avgPontos": {
                    "$avg": "$pontos"
                },
                "avgComDificuldade": {
                    "$avg": "$com dificuldade"
                },
            }
        },
        {
            "$sort": {
                "_id": -1
            }
        },
    ])["result"]

    def _split_result(r):
        return {
            "_id": r["_id"],
            "week": r["_id"],
            "avg": {
                "totalMinutes": r["avgTotalMinutes"],
                "videoMinutes": r["avgVideoMinutes"],
                "exerciseMinutes": r["avgExerciseMinutes"],
                "nivel1": r["avgNivel1"],
                "nivel2": r["avgNivel2"],
                "precisaPraticar": r["avgPrecisaPraticar"],
                "praticado": r["avgPraticado"],
                "dominado": r["avgDominado"],
                "pontos": r["avgPontos"],
                "comDificuldade": r["avgComDificuldade"],
            },
            "min": {
                "totalMinutes": r["minTotalMinutes"],
                "videoMinutes": r["minVideoMinutes"],
                "exerciseMinutes": r["minExerciseMinutes"],
                "nivel1": r["minNivel1"],
                "nivel2": r["minNivel2"],
                "precisaPraticar": r["minPrecisaPraticar"],
                "praticado": r["minPraticado"],
                "dominado": r["minDominado"],
                "pontos": r["minPontos"],
                "comDificuldade": r["minComDificuldade"],
            },
            "max": {
                "totalMinutes": r["maxTotalMinutes"],
                "videoMinutes": r["maxVideoMinutes"],
                "exerciseMinutes": r["maxExerciseMinutes"],
                "nivel1": r["maxNivel1"],
                "nivel2": r["maxNivel2"],
                "precisaPraticar": r["maxPrecisaPraticar"],
                "praticado": r["maxPraticado"],
                "dominado": r["maxDominado"],
                "pontos": r["maxPontos"],
                "comDificuldade": r["maxComDificuldade"],
            },
        }

    return map(_split_result, result)
Пример #17
0
def _get_school(id_):
    return utils.get_database().schools.find_one({"_id": bson.ObjectId(id_)})
Пример #18
0
 def __init__(self, config_file):
     self.tmall_info_db = get_database(config_file).neaten_tmall
     self.wechat_info_db = get_database(config_file).neaten_wechat
     self.sp = ScoreProvider_V1(config_file)
Пример #19
0
def main(argv):
    """
    main flow
    """
    # TODO(LuHa): print message about program execution
    print('\x1B[38;5;5m[Pixiv] Execute pixiv downloader\x1B[0m')

    # TODO(LuHa): create downloads directory
    # actually, this code use only downloads directory.
    # but to ensure execution of source code,
    #   make save directory.
    os.makedirs('./downloads', exist_ok=True)
    os.makedirs('./save', exist_ok=True)

    # TODO(LuHa): load ban database
    ban_db = utils.get_database('ban.secret')

    # TODO(LuHa): load mute database
    mute_db = utils.get_database('mute.secret')

    # TODO(LuHa): read pre-downloaded image
    downloaded = utils.get_downloaded_images('pixiv')

    # TODO(LuHa): load tags
    if os.path.exists('tags.secret'):
        with open('tags.secret', 'r') as f_tags:
            tags = json.load(f_tags)
            tags = tags['pixiv']
    else:
        print('[Pixiv] Need tags in file named tags.secret')
        return

    # TODO(LuHa): load API keys
    if os.path.exists('pixiv_api.secret'):
        print('[Pixiv] API key exists')
        with open('pixiv_api.secret', 'r') as f_api:
            api_key = json.load(f_api)
            user_id = api_key['id'].strip()
            user_passwd = api_key['passwd'].strip()
    else:
        print('[Pixiv] Need User ud and passwd file ' +
              'named pixiv_api.secret')
        print('[Pixiv] The format is below')
        print('{')
        print('    "id": "ID",')
        print('    "passwd": "PASSWD"')
        print('}')
        return

    # TODO(LuHa): load cookie from file
    cookie_jar = http.cookiejar.LWPCookieJar('pixiv_cookie.secret')
    if os.path.exists('pixiv_cookie.secret'):
        cookie_jar.load()
    cookie = urllib.request.HTTPCookieProcessor(cookie_jar)

    # TODO(LuHa): create opener
    opener = urllib.request.build_opener(cookie)
    opener.addheaders = [('User-agent', 'Mozilla/5.0'),
                         ('Accept', 'text/html')]

    # TODO(LuHa) get hidden value for login
    hidden_parser = LoginTagParser()
    base_url = 'https://accounts.pixiv.net/'
    page_url = 'login'
    request_url = base_url + page_url
    response = opener.open(request_url, timeout=TIMEOUT)
    try:
        hidden_parser.feed(response.read().decode('utf-8'))
    except socket.timeout:
        print('\x1B[38;5;5m[Pixiv] Response timeout\x1B[0m')
        return
    auth = hidden_parser.get_hidden()

    # TODO(LuHa): if the cookie is not login, login with cookie
    try:
        if 'post_key' in auth.keys():
            auth['pixiv_id'] = user_id
            auth['password'] = user_passwd
            auth = urllib.parse.urlencode(auth)
            auth = auth.encode('ascii')
            opener.open(request_url, data=auth, timeout=TIMEOUT)

        # TODO(LuHa): query to daily rank
        # rank start url:
        #    https://www.pixiv.net/ranking.php?mode=daily&date=20070913
        for tag in tags:
            base_url = 'https://www.pixiv.net/'
            page_url = 'ranking.php' + tag
            request_url = base_url + page_url
            if tag.endswith('date='):
                request_url = request_url + get_random_date()
            print(
                '\x1B[38;5;5m[Pixiv] Request: {0}\x1B[0m'.format(request_url))
            response = opener.open(request_url, timeout=TIMEOUT)

            # TODO(LuHa): get page uri
            image_page_parser = ImagePageParser()
            try:
                image_page_parser.feed(response.read().decode('utf-8'))
            except socket.timeout:
                print('\x1B[38;5;5m[Pixiv] Response timeout\x1B[0m')
                return

            # TODO(LuHa): get image uri, but remain multiple page
            image_url_parser = ImageURLParser()
            for image_page in image_page_parser.get_pages():
                request_url = base_url + image_page
                response = opener.open(request_url, timeout=TIMEOUT)
                try:
                    image_url_parser.feed(response.read().decode('utf-8'))
                except socket.timeout:
                    print('\x1B[38;5;5m[Pixiv] Response timeout\x1B[0m')
                    return
                #print('[P] image url ready {0}'.format(len(image_url_parser.get_urls())))
            print('[Pixiv] Get ranking page')

            # TODO(LuHa): get multiple image uri
            image_urls = image_url_parser.get_urls()
            multi_page_parser = MultiPageParser()
            multi_url_parser = MultiURLParser()
            final_urls = list()
            for image_url in image_urls:
                #print('[P] final URL ready {0}'.format(len(final_urls)))
                if image_url.startswith('https://'):
                    final_urls.append(image_url)
                    continue
                multi_url_parser.clear_urls()
                multi_page_parser.clear_pages()
                request_url = 'https://www.pixiv.net/' + image_url
                response = opener.open(request_url, timeout=TIMEOUT)
                try:
                    multi_page_parser.feed(response.read().decode('utf-8'))
                except socket.timeout:
                    print('\x1B[38;5;5m[Pixiv] Response timeout\x1B[0m')
                    return
                for multi_page in multi_page_parser.get_pages():
                    request_url = 'https://www.pixiv.net' + multi_page
                    response = opener.open(request_url, timeout=TIMEOUT)
                    try:
                        multi_url_parser.feed(response.read().decode('utf-8'))
                    except socket.timeout:
                        print('\x1B[38;5;5m[Pixiv] Response timeout\x1B[0m')
                        return
                final_urls.extend(multi_url_parser.get_urls())
            print('[Pixiv] Get URLs of all images in ranking')

            # TODO(LuHa): download image
            for image_url in final_urls:
                image_id = image_url.split('/')[-1]
                if image_id in downloaded:
                    print('[Pixiv] Already downloaded {0}'.format(image_id))
                    continue
                elif image_id in ban_db['pixiv']:
                    print('[Pixiv] Ban downloaded {0}'.format(image_id))
                    continue
                elif image_id in mute_db['pixiv']:
                    print('[Pixiv] Mute downloaded {0}'.format(image_id))
                    continue
                else:
                    downloaded.add(image_id)
                file_name = ('./downloads' + '/pixiv-' +
                             image_url.split('/')[-1])
                with open(file_name, 'wb') as f:
                    referer = 'https://www.pixiv.net/member_illust.php'
                    referer = referer + '?mode=medium&illust_id='
                    referer = referer + file_name.split('_')[0]
                    opener.addheaders = [('User-agent', 'Mozilla/5.0'),
                                         ('Referer', referer)]
                    response = opener.open(image_url, timeout=TIMEOUT)
                    try:
                        f.write(response.read())
                    except socket.timeout:
                        print('\x1B[38;5;5m[Pixiv] Response timeout\x1B[0m')
                        return
                print('[Pixiv] Downloaded {0}'.format(file_name))
                # sleep for prevent blocking
                utils.dynamic_sleep()

    except KeyboardInterrupt:
        print('[Pixiv] Keyboard Interrupt')
    except Exception as e:
        print('[Pixiv] Some Interrupt', e)

    # TODO(LuHa): save cookie to file
    cookie_jar.save()

    # TODO(LuHa): print message about program termination
    print('\x1B[38;5;5m[Pixiv] Terminate pixiv downloader\x1B[0m')
Пример #20
0
from flask import Flask, request, jsonify, render_template, url_for
from flask import session
from flask_googleauth import GoogleAuth
import random
import urlparse
import twilio.twiml
from twilio import TwilioRestException
from utils import load_data, set_trace
from models import log_call, aggregate_stats, valid_users
from utils import get_database, play_or_say, locate_member_ids

app = Flask(__name__)
app.config.from_object('config.ConfigProduction')
app.secret_key = app.config['SECRET_KEY']

db = get_database(app)

auth = GoogleAuth(app)

call_methods = ['GET', 'POST']

campaigns, legislators, districts = load_data()
defaults_campaign = campaigns['default']

def full_url_for(route, **kwds):
    return urlparse.urljoin(app.config['APPLICATION_ROOT'],
        url_for(route, **kwds))

def get_campaign(cid):
    return dict(defaults_campaign, **campaigns[cid])
Пример #21
0
def main():
    queue = get_message_queue(sys.argv[1],
                              'wechat_official_account_content_queue')
    firefoxProfile = FirefoxProfile()
    # firefoxProfile.set_preference('permissions.default.stylesheet', 2)
    firefoxProfile.set_preference('dom.ipc.plugins.enabled.libflashplayer.so',
                                  'false')
    firefoxProfile.set_preference('permissions.default.image', 2)
    driver = webdriver.Firefox(firefoxProfile)
    # driver = webdriver.PhantomJS(service_args=['--load-images=false'])
    print('Driver is ready')
    driver.implicitly_wait(10)
    # driver.set_page_load_timeout(30)
    db = get_database(sys.argv[1]).wechat_article_list
    while 1:
        if queue.empty():
            print('Already finished')
            print('Waiting for new query ...')
        #wechat_id = queue.get().decode()
        wechat_id = 'HIT_SCIR'
        print('Crawling %s' % wechat_id)
        url = 'http://www.newrank.cn/public/info/detail.html?account=%s' % wechat_id

        try:
            driver.get(url)
        except:
            print('!!!!!!!!!!!!!Cannot get web page!!!!!!!!')
            time.sleep(3)
            continue
        if (driver.title == u'页面错误'):
            print('%s not included' % (wechat_id))
            continue
        locator = (By.XPATH, '//*[@id="info_detail_article_lastest"]//li')
        try:
            WebDriverWait(driver, 20,
                          0.5).until(EC.presence_of_element_located(locator))
            elements = driver.find_elements_by_xpath(
                '//*[@id="info_detail_article_lastest"]//li')
            data = dict()
            data['str_id'] = wechat_id
            data['article_list'] = []
            for e in elements:
                article = dict()
                article['title'] = e.find_element_by_class_name(
                    'ellipsis').get_attribute('title')
                article['href'] = e.find_element_by_class_name(
                    'ellipsis').get_attribute('href')
                article['short_text'] = e.find_element_by_class_name(
                    'article-text').find_element_by_tag_name(
                        'a').get_attribute('title')
                article['date'] = e.find_element_by_class_name(
                    'info-detail-article-date').text
                article['read_count'] = e.find_element_by_class_name(
                    'read-count').text
                article['like_count'] = e.find_element_by_class_name(
                    'links-count').text
                article['position'] = e.find_element_by_class_name(
                    'tj').find_elements_by_tag_name('span')[1].text
                data['article_list'].append(article)
            assert len(data['article_list']) > 0
            record = db.find_one({'str_id': wechat_id})
            if record is None:
                print('Not find %s in database' % wechat_id)
                db.insert(data)
            else:
                print(len(record['article_list']))
                for article in data['article_list']:
                    if article not in record['article_list']:
                        record['article_list'].append(article)
                print(len(record['article_list']))
                db.replace_one({'str_id': wechat_id}, record)
        except Exception as e:
            print('Error')
            print(e)
            with open('./fail_ids_for_article_urls.data', 'a') as fout:
                fout.write('%s\n' % wechat_id)
            print('Not find id "info_detail_article_lastest" when crawl %s' %
                  wechat_id)

        sleep_time = random.uniform(1, 3)
        time.sleep(sleep_time)
Пример #22
0
def main():
    # General parameters
    net = ['base', 'cifar', 'emb+soft', 'resnet50', 'resnet20',
           'local_feat'][0]
    database = ['cifar10', 'mnist', 'fashion_mnist', 'skillup'][1]
    epochs = 10
    learn_rate = 0.01
    decay = (learn_rate / epochs) * 0.8
    ims_per_id = 8
    ids_per_batch = 8
    margin = 0.9
    embedding_size = 64
    squared = False
    data_augmentation = False
    patience = 25

    # built model's parameters
    dropout = 0.3
    blocks = 3
    n_channels = 32
    weight_decay = 1e-4 * 0

    # dataloader parameters
    use_dataloader = True
    path = '/home/daniel/proyectos/product_detection/web_market_preproces/duke_from_images'

    exp_dir, log_dir, model_weights_path, model_name = get_dirs(database)
    tl_object = TripletLoss(ims_per_id=ims_per_id,
                            ids_per_batch=ids_per_batch,
                            margin=margin,
                            squared=squared)
    tl_h = TripletLoss(ims_per_id, ids_per_batch, margin, squared)
    opt = optimizers.Adam(lr=learn_rate, decay=decay)
    data, input_size = get_database(database)
    im_size = input_size[:2]

    data_gen_args_train = dict(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=
        False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=
        10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range=0.1,  # Randomly zoom image
        width_shift_range=
        0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=
        0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)
    if not data_augmentation:
        data_gen_args_train = {}

    model_args = dict(embedding_dim=embedding_size,
                      input_shape=input_size,
                      drop=dropout,
                      blocks=blocks,
                      n_channels=n_channels,
                      weight_decay=weight_decay,
                      layer_limit=173,
                      patience=patience)

    data_loader_args = dict(path=path,
                            ims_per_id=ims_per_id,
                            ids_per_batch=ids_per_batch,
                            target_image_size=im_size,
                            data_gen_args=data_gen_args_train,
                            preprocess_unit=True,
                            data=data)

    if database == 'skillup':
        dl = FileDataloader(**data_loader_args)
    else:
        dl = StaticDataloader(**data_loader_args)

    model = get_net_object(net, model_args)
    model.compile(opt, tl_object.cluster_loss)
    if use_dataloader:
        model.train_generator(dl, model_weights_path, epochs, log_dir)
    else:
        model.train(data, model_weights_path, epochs,
                    ims_per_id * ids_per_batch, log_dir)
    model.save_model(model_weights_path)
    visualize_embeddings(database=database,
                         model_dir=exp_dir,
                         model_name=model_name,
                         model=model.model)
def main(argv):
    """
    main flow
    """
    # TODO(LuHa): print message about program execution
    utils.logger.info(
            '\x1B[38;5;5m[Wallhaven] Execute wallhaven downloader\x1B[0m')

    # TODO(LuHa): create downloads directory
    # actually, this code use only downloads directory.
    # but to ensure execution of source code,
    #   make save directory.
    os.makedirs('./downloads', exist_ok = True)
    os.makedirs('./save', exist_ok = True)

    # TODO(LuHa): load ban database
    ban_db = utils.get_database('ban.secret')

    # TODO(LuHa): load mute database
    mute_db = utils.get_database('mute.secret')

    # TODO(LuHa): read pre-downloaded image
    downloaded = utils.get_downloaded_images('wallhaven')

    # TODO(LuHa): load tags
    if os.path.exists('tags.secret'):
        with open('tags.secret', 'r') as f_tags:
            tags = json.load(f_tags)
            tags = tags['wallhaven']
    else:
        utils.logger.error('[Wallhaven] Need tags in file named tags.secret')
        return

    # TODO(LuHa): load API keys
    if os.path.exists('wallhaven_api.secret'):
        print('[Wallhaven] API key exists')
        with open('wallhaven_api.secret', 'r') as f_api:
            api_key = json.load(f_api)
            user_id = api_key['id'].strip()
            user_passwd = api_key['passwd'].strip()
    else:
        print('[Wallhaven] Need User id and passwd file '
            + 'named wallhaven_api.secret')
        print('[Wallhaven] The format is below')
        print('{')
        print('    "id": "ID",')
        print('    "passwd": "PASSWD"')
        print('}')
        return

    # TODO(LuHa): load cookie from file
    cookie_jar = http.cookiejar.LWPCookieJar('wallhaven_cookie.secret')
    if os.path.exists('wallhaven_cookie.secret'):
        cookie_jar.load()
    cookie = urllib.request.HTTPCookieProcessor(cookie_jar)

    # TODO(LuHa): create opener
    opener = urllib.request.build_opener(cookie)
    opener.addheaders = [('User-agent', 'Mozilla/5.0'),
                         ('Accept', 'text/html')]

    # TODO(LuHa): check logined or not logined
    request_url = 'https://alpha.wallhaven.cc/auth/login'
    response = opener.open(request_url, timeout = TIMEOUT)
    login_parser = LoginParser()
    try:
        login_parser.feed(response.read().decode('utf-8'))
    except socket.timeout:
        print('\x1B[38;5;5m[Wallhaven] Response timeout\x1B[0m')
        return

    # TODO(LuHa): if the cookie is not login, login with cookie
    try:
        if login_parser.get_logined() == False:
            request_url = 'https://alpha.wallhaven.cc/auth/login'
            auth = {'username': user_id,
                    'password': user_passwd}
            auth = urllib.parse.urlencode(auth)
            auth = auth.encode('ascii')
            opener.open(request_url, data = auth)
    
        # TODO(LuHa): loop search by tags
        base_url = 'https://alpha.wallhaven.cc/search'
        max_page_parser = MaxPageParser()
        id_parser = ImageIdParser()
        uri_parser = ImageURIParser()
        # for fun
        random.shuffle(tags)
        for tag in tags:
            base_url = 'https://alpha.wallhaven.cc/search'
            max_page_parser.clear_data()
            id_parser.clear_ids()
            uri_parser.clear_uris()
    
            # TODO(LuHa): get max page
            opener.addheaders = [('User-agent', 'Mozilla/5.0'),
                                 ('Accept', 'text/html')]
            request_url = base_url + tag
            print('\x1B[38;5;5m[Wallhaven] Request: {0}\x1B[0m'.format(request_url))
            response = opener.open(request_url, timeout = TIMEOUT)
            try:
                max_page_parser.feed(response.read().decode('utf-8'))
            except socket.timeout:
                print('\x1B[38;5;5m[Wallhaven] Response timeout\x1B[0m')
                return
            max_page = max_page_parser.get_data()
            max_page = max_page.split()
            if len(max_page) > 3:
                max_page = int(max_page[3])
            else:
                max_page = 1
    
            # TODO(LuHa): get image id
            random_page = random.randint(1, max_page)
            random_page = '&page=' + str(random_page)
            request_url = base_url + tag + random_page
            response = opener.open(request_url, timeout = TIMEOUT)
            try:
                id_parser.feed(response.read().decode('utf-8'))
            except socket.timeout:
                print('\x1B[38;5;5m[Wallhaven] Response timeout\x1B[0m')
                return
    
            # TODO(LuHa): loop parse image path
            # get 24 images at one time in wallhaven
            print('[Wallhaven] Search image path')
            for image_id in id_parser.get_ids():
                # skip target image is already downloaded
                if image_id in downloaded:
                    print('[Wallhaven] Already downloaded {0}'.format(image_id))
                    continue
                elif image_id in ban_db['wallhaven']:
                    print('[Wallhaven] Ban downloaded {0}'.format(image_id))
                    continue
                elif image_id in mute_db['wallhaven']:
                    print('[Wallhaven] Mute downloaded {0}'.format(image_id))
                    continue
                else:
                    downloaded.add(image_id)
    
                base_url = 'https://alpha.wallhaven.cc/'
                request_url = (base_url
                             + 'wallpaper/'
                             + image_id)
                response = opener.open(request_url, timeout = TIMEOUT)
                try:
                    uri_parser.feed(response.read().decode('utf-8'))
                except socket.timeout:
                    print('\x1B[38;5;5m[Wallhaven] Response timeout\x1B[0m')
                    return
                # sleep for prevent blocking
                utils.dynamic_sleep()
    
            # TODO(LuHa): loop download by posts
            opener.addheaders = [('User-agent', 'Mozilla/5.0')]
            for image_uri in uri_parser.get_uris():
                request_url = ('https:'
                             + image_uri)
                response = opener.open(request_url, timeout = TIMEOUT)
                image_path = ('./downloads/'
                            + image_uri.split('/')[-1])
                with open(image_path, 'wb') as f:
                    try:
                        f.write(response.read())
                    except socket.timeout:
                        print('\x1B[38;5;5m[Wallhaven] Response timeout\x1B[0m')
                        return
                print('[Wallhaven] Downloaded {0}'.format(image_path))
                # sleep for prevent blocking
                utils.dynamic_sleep()

    except KeyboardInterrupt:
        print('[Wallhaven] keyboard Interrupt')
    except Exception as e:
        print('[Wallhaven] Some Interrupt', e)
    
    # TODO(LuHa): save cookie
    cookie_jar.save()

    # TODO(Luha): print message about program termination
    utils.logger.info(
            '\x1B[38;5;5m[Wallhaven] Terminate wallhaven downloader\x1B[0m')
Пример #24
0
def school_list():
    return map(_add_weekly_report_link, utils.get_database().schools.find())