def cli(
    args,
    verbose,
    debug,
    simulate,
    count,
    printn,
):

    null = not printn

    if verbose:
        ic(sys.stdout.isatty())

    for index, arg in enumerate_input(iterator=args,
                                      null=null,
                                      debug=debug,
                                      verbose=verbose):
        if verbose or simulate:
            ic(index, arg)
        if count:
            if count > (index + 1):
                ic(count)
                sys.exit(0)

        if simulate:
            continue

        ic(arg)
Exemplo n.º 2
0
def list_keys(ctx):
    iterator = keys_and_sizes(r=ctx.obj['r'])
    for index, value in enumerate_input(iterator=iterator,
                                        null=ctx.obj['null'],
                                        progress=ctx.obj['progress'],
                                        skip=False,
                                        head=False,
                                        tail=False,
                                        debug=ctx.obj['debug'],
                                        verbose=ctx.obj['verbose'],):

        if ctx.obj['verbose']:
            ic(index, value)

        print(value, end=ctx.obj['end'])
Exemplo n.º 3
0
def add(ctx, *,
        key,
        values,
        key_type,
        hash_values,
        verbose,
        debug,):

    ctx.obj['verbose'] = verbose
    ctx.obj['debug'] = debug

    if ctx.obj['verbose']:
        ic(ctx.obj, key)

    iterator = values

    redis_instance = RedisKey(key=key,
                              algorithm="sha3_256",
                              hash_values=hash_values,
                              key_type=key_type,
                              verbose=ctx.obj['verbose'],
                              debug=ctx.obj['debug'],
                              hash_length=None,)

    index = 0
    for index, value in enumerate_input(iterator=iterator,
                                        null=ctx.obj['null'],
                                        progress=ctx.obj['progress'],
                                        skip=None,
                                        head=None,
                                        tail=None,
                                        debug=ctx.obj['debug'],
                                        verbose=ctx.obj['verbose'],):

        if ctx.obj['verbose']:
            ic(index, value)

        result = redis_instance.add(value)

        print(key, result, value, end=ctx.obj['end'])
Exemplo n.º 4
0
def cli(paths,
        add,
        verbose,
        debug,
        ipython,
        null):

    config, config_mtime = click_read_config(click_instance=click,
                                             app_name=APP_NAME,
                                             verbose=verbose)
    if verbose:
        ic(config, config_mtime)

    for index, path in enumerate_input(iterator=paths,
                                       null=null,
                                       debug=debug,
                                       verbose=verbose):
        if verbose:
            ic(index, path)

        for mol_data in molecule_dict_generator(path, verbose=verbose):
            pprint.pprint(mol_data, indent=1)
            if ipython:
                import IPython; IPython.embed()
                break

        if add:
            section = "test_section"
            key = "test_key"
            value = "test_value"
            config, config_mtime = click_write_config_entry(click_instance=click,
                                                            app_name=APP_NAME,
                                                            section=section,
                                                            key=key,
                                                            value=value,
                                                            verbose=verbose)
            if verbose:
                ic(config)
Exemplo n.º 5
0
def delete_key(ctx, *,
               keys,
               verbose,
               debug,):

    ctx.obj['verbose'] = verbose
    ctx.obj['debug'] = debug

    if ctx.obj['verbose']:
        ic(ctx.obj, keys)

    iterator = keys

    index = 0
    for index, key in enumerate_input(iterator=iterator,
                                      null=ctx.obj['null'],
                                      progress=ctx.obj['progress'],
                                      skip=None,
                                      head=None,
                                      tail=None,
                                      debug=ctx.obj['debug'],
                                      verbose=ctx.obj['verbose'],):

        if ctx.obj['verbose']:
            ic(index, key)

        redis_instance = RedisKey(key=key,
                                  algorithm="sha3_256",
                                  hash_values=False,
                                  key_type=None,
                                  verbose=ctx.obj['verbose'],
                                  debug=ctx.obj['debug'],
                                  hash_length=None,)

        result = redis_instance.delete()

        print(key, result, end=ctx.obj['end'])
Exemplo n.º 6
0
def cli(timestamps,
        before: str,
        after: str,
        around: str,
        within: str,
        inclusive: bool,
        verbose: bool,
        debug: bool,
        oldest: bool,
        newest: bool,
        human: bool,
        printn: bool,
        exit_after_matches: int,
        ):

    null = not printn
    end = '\n'
    if null:
        end = '\0'
    if sys.stdout.isatty():
        end = '\n'
    if verbose:
        #ic(sys.stdout.isatty())
        ic(end)

    if verbose:
        ic(before, after)

    if within is not None:
        maxone([before, after, around], msg='--within requires one of --before/--after/--around')

    if around is not None:
        if within is None:
            raise ValueError('--around requires --within')
        if (before is not None) or (after is not None):
            raise ValueError('--around can not be used with --before/--after')

    if before is not None:
        try:
            before = Decimal(before)
        except InvalidOperation:
            before = human_date_to_timestamp(before)

    if after is not None:
        try:
            after = Decimal(after)
        except InvalidOperation:
            after = human_date_to_timestamp(after)

    if around is not None:
        try:
            around = Decimal(around)
        except InvalidOperation:
            around = human_date_to_timestamp(around)

    if within is not None:
        try:
            within = Decimal(within)
        except InvalidOperation:
            within_converted = convert(human_input_units=within,
                                       human_output_unit="seconds",
                                       verbose=verbose,
                                       debug=debug,)
            ic(within_converted)
            within = Decimal(within_converted.magnitude)
            ic(within)

        # at this point, before and after need to be configured
        assert before is None
        assert after is None

        after = around - within
        before = around + within
        ic(after, before)

    now = Decimal(time.time())

    if (before or after or within):
        ic(before, after, within, now)

    match_count = 0

    current_newest = None
    current_oldest = None
    for index, timestamp in enumerate_input(iterator=timestamps,
                                            null=null,
                                            skip=None,
                                            head=None,
                                            tail=None,
                                            debug=debug,
                                            verbose=verbose):

        try:
            timestamp = Decimal(timestamp)
        except InvalidOperation as e:
            ic(e)
            ic(index, timestamp)
            #import IPython; IPython.embed()
            if timestamp == '':
                continue
            raise e

        if debug:
            ic(index, timestamp)

        if after:
            if not is_after(timestamp=timestamp,
                            after=after,
                            inclusive=inclusive,
                            verbose=verbose,
                            debug=debug,):
                continue

        if before:
            if not is_before(timestamp=timestamp,
                             before=before,
                             inclusive=inclusive,
                             verbose=verbose,
                             debug=debug,):
                continue

        if newest:
            if not current_newest:
                current_newest = timestamp
                if verbose:
                    current_newest_human = timestamp_to_human_date(current_newest)
                    ic(current_newest, current_newest_human)
            else:
                if is_after(timestamp=timestamp,
                            after=current_newest,
                            inclusive=False,
                            verbose=verbose,
                            debug=debug,):
                    current_newest = timestamp
                    if verbose:
                        current_newest_human = timestamp_to_human_date(current_newest)
                        ic(current_newest, current_newest_human)

        if oldest:
            if not current_oldest:
                current_oldest = timestamp
                if verbose:
                    current_oldest_human = timestamp_to_human_date(current_oldest)
                    ic(current_oldest, current_oldest_human)
            else:
                if is_before(timestamp=timestamp,
                             before=current_oldest,
                             inclusive=False,
                             verbose=verbose,
                             debug=debug,):
                    current_oldest = timestamp
                    if verbose:
                        current_oldest_human = timestamp_to_human_date(current_oldest)
                        ic(current_oldest, current_oldest_human)

        if not (newest or oldest):
            print_result(timestamp=timestamp,
                         human=human,
                         end=end,
                         verbose=verbose,
                         debug=debug,)

        match_count += 1
        if exit_after_matches:
            if match_count >= exit_after_matches:
                sys.exit(0)

    if (newest or oldest):
        if newest:
            print_result(timestamp=current_newest,
                         human=human,
                         end=end,
                         verbose=verbose,
                         debug=debug,)
        if oldest:
            print_result(timestamp=current_oldest,
                         human=human,
                         end=end,
                         verbose=verbose,
                         debug=debug,)
Exemplo n.º 7
0
def list_key(ctx, *,
             key,
             matches,
             exact,
             count,
             skip,
             head,
             tail,
             first,
             last,
             verbose,
             debug,):

    ctx.obj['count'] = count
    ctx.obj['skip'] = skip
    ctx.obj['head'] = head
    ctx.obj['tail'] = tail
    ctx.obj['verbose'] = verbose
    ctx.obj['debug'] = debug

    if ctx.obj['verbose']:
        ic(ctx.obj, skip, head, tail)

    iterator = RedisKey(port=ctx.obj['port'],
                        ip=ctx.obj['ip'],
                        key=key,
                        algorithm="sha3_256",
                        hash_values=False,
                        key_type=None,
                        verbose=ctx.obj['verbose'],
                        debug=ctx.obj['debug'],
                        hash_length=None,)
    if first or last:
        if count:
            raise ValueError('--count is mutually exclusive with --first/--last')
    if first and last:
        raise ValueError('--first and --last are mutually exclusive')

    if first:
        value = iterator.first()
        print(value, end=ctx.obj['end'])
        return
    if last:
        value = iterator.last()
        print(value, end=ctx.obj['end'])
        return

    index = 0
    for index, value in enumerate_input(iterator=iterator,
                                        null=ctx.obj['null'],
                                        progress=ctx.obj['progress'],
                                        skip=ctx.obj['skip'],
                                        head=ctx.obj['head'],
                                        tail=ctx.obj['tail'],
                                        debug=ctx.obj['debug'],
                                        verbose=ctx.obj['verbose'],):

        if ctx.obj['verbose']:
            ic(index, value)

        if not ctx.obj['count']:
            if matches:
                if exact:
                    for match in matches:
                        if match in value:
                            print(value, end=ctx.obj['end'])
                            break
                else:
                    for match in matches:
                        if match.lower() in value.lower():
                            print(value, end=ctx.obj['end'])
                            break
            else:
                print(value, end=ctx.obj['end'])

    if ctx.obj['count']:
        print(index + 1, end=ctx.obj['end'])
Exemplo n.º 8
0
def dbimport(
    ctx,
    paths,
    add: bool,
    verbose: bool,
    debug: bool,
    ipython: bool,
    simulate: bool,
    count: int,
    start_cid: int,
    delete_database: bool,
    null: bool,
):

    total_records = 155000000

    database = ctx.obj['database']
    if delete_database:
        if not simulate:
            really_delete_database(database)

    config, config_mtime = click_read_config(
        click_instance=click,
        app_name=ctx.obj['appname'],
        verbose=verbose,
        debug=debug,
    )
    if verbose:
        ic(config, config_mtime)

    #primary_key_created = False
    with self_contained_session(db_url=database) as session:
        if verbose:
            ic(session)

        ic(BASE)
        BASE.metadata.create_all(session.bind)

        if not paths:
            ic('waiting for input')

        all_sdf_keys = config['sdf_keys'].keys()
        assert "PUBCHEM_XLOGP3" in all_sdf_keys

        #mdict_df = pandas.DataFrame()
        for index, path in enumerate_input(iterator=paths,
                                           null=null,
                                           debug=debug,
                                           skip=None,
                                           head=None,
                                           tail=None,
                                           verbose=verbose):
            path = Path(path).expanduser()
            last_cid_in_file = int(path.name.split("_")[-1].split('.')[0])
            ic(last_cid_in_file)
            if start_cid:
                if last_cid_in_file < start_cid:
                    ic('skipping:', path)
                    continue

            ic(index, path)
            if simulate:
                continue

            import_start_time = time.time()  # per sdf.gz
            md5_hash = md5_hash_file(path)
            expected_md5 = Path(path.as_posix() +
                                '.md5').read_text().split()[0]
            ic(md5_hash)
            ic(expected_md5)
            assert md5_hash == expected_md5
            for mindex, mdict in enumerate(
                    molecule_dict_generator(path=path.as_posix(),
                                            verbose=verbose)):
                if start_cid:
                    if int(mdict['PUBCHEM_COMPOUND_CID']) < start_cid:
                        continue

                if count:
                    if count > (mindex + 1):
                        ic(count)
                        sys.exit(1)

                for key in all_sdf_keys:
                    if key not in mdict.keys():
                        mdict[key] = ''

                if verbose:
                    ic(mdict)

                mdict = {k.lower(): v for k, v in mdict.items()}
                mdict = {k.replace(' ', '_'): v for k, v in mdict.items()}
                for key in mdict.keys():
                    #assert key in SDF_FIELD_TYPES.keys()
                    key_type = SDF_FIELD_TYPES[key]
                    if mdict[key]:
                        if key_type in ['Integer', 'Boolean']:
                            mdict[key] = int(mdict[key])
                        if key_type in ['Boolean']:
                            mdict[key] = bool(mdict[key])
                    else:  # ''
                        mdict[key] = None

                pubchem_row = PubChem(**mdict)
                #ic(pubchem_row)
                cid = mdict['pubchem_compound_cid']
                elapsed_time = max(int(time.time() - import_start_time), 1)
                records_per_sec = max(int((mindex + 1) / elapsed_time), 1)
                records_remaning = total_records - cid
                seconds_eta = records_remaning / records_per_sec
                hours_eta = seconds_eta / (60 * 60)
                days_eta = round(hours_eta / 24, 3)

                session.add(pubchem_row)
                if mindex % 1000 == 0:
                    session.commit()
                    name = mdict['pubchem_iupac_name']
                    ic(days_eta, records_per_sec, records_remaning, mindex,
                       cid, name)

            if ipython:
                import IPython
                IPython.embed()
                break
Exemplo n.º 9
0
def cli(
    ctx,
    license_corpus,
    license_files,
    verbose,
    debug,
    list_licenses,
    ipython,
    progress,
    printn,
):

    null = not printn
    end = '\n'
    if null:
        end = '\x00'
    if sys.stdout.isatty():
        end = '\n'
        assert not ipython

    #progress = False
    if (verbose or debug):
        progress = False

    ctx.ensure_object(dict)
    ctx.obj['verbose'] = verbose
    ctx.obj['debug'] = debug
    ctx.obj['end'] = end
    ctx.obj['null'] = null
    ctx.obj['progress'] = progress

    if list_licenses:
        license_list = build_license_list(
            path=license_corpus,
            verbose=verbose,
            debug=debug,
        )
        for license in license_list:
            print(license)
        return

    license_dict = build_license_dict(
        path=license_corpus,
        verbose=verbose,
        debug=debug,
    )

    iterator = license_files

    for index, path in enumerate_input(
            iterator=iterator,
            null=null,
            skip=None,
            head=None,
            tail=None,
            progress=progress,
            debug=debug,
            verbose=verbose,
    ):
        path = Path(path)

        if verbose:
            ic(index, path)

        with open(path, 'r') as fh:
            path_data = fh.read()

        linear_license = linearize_text(
            text=path_data,
            verbose=verbose,
            debug=debug,
        )

        closest_guess = find_closest_string_distance(
            string_dict=license_dict,
            in_string=linear_license,
            verbose=verbose,
            debug=debug,
        )
        ic(closest_guess)