Exemplo n.º 1
0
def download_prices_from_s3(bucket: ServiceResource, dir_prices: Path,
                            remote_dir_prices: Path, missing_rics: List[str],
                            logger: logging.Logger) -> None:

    dir_prices.mkdir(parents=True, exist_ok=True)

    for ric in missing_rics:

        remote_filename = ric2filename(remote_dir_prices, ric, 'csv.gz')

        basename = remote_filename.name
        dest_parent = dir_prices
        dest = dest_parent / Path(basename)

        if dest.is_file():
            logger.debug('skip downloading {}'.format(basename))
        else:
            logger.debug('start downloading {}'.format(basename))
            try:
                bucket.download_file(Key=str(remote_filename),
                                     Filename=str(dest))
            except ClientError as e:
                code = e.response.get('Error', {}).get('Code', '')
                if str(code) == str(HTTPStatus.NOT_FOUND.value):
                    logger.critical('{} is not found'.format(
                        str(remote_filename)))
            logger.debug('end downloading {}'.format(basename))
Exemplo n.º 2
0
def upload_prices_to_s3(bucket: ServiceResource, local_dir: Path,
                        remote_dir: Path, rics: List[str]) -> None:

    for ric in rics:

        local_filename = ric2filename(local_dir, ric, 'csv.gz')

        key = str(remote_dir / Path(local_filename.name))

        objs = list(bucket.objects.filter(Prefix=key).all())

        if len(objs) > 0 and objs[0].key == key:
            continue

        with local_filename.open(mode='rb') as body:

            bucket.put_object(Key=key, Body=body)
Exemplo n.º 3
0
def insert_prices(session: Session, dir_prices: Path, missing_rics: List[str],
                  dir_resources: Path, logger: Logger) -> None:

    ct = ClosingTime(dir_resources)
    insert_instruments(session, dir_resources / Path('ric.csv'), logger)
    seqtypes = [
        SeqType.RawShort, SeqType.RawLong, SeqType.MovRefShort,
        SeqType.MovRefLong, SeqType.NormMovRefShort, SeqType.NormMovRefLong,
        SeqType.StdShort, SeqType.StdLong
    ]

    for ric in missing_rics:

        filename = ric2filename(dir_prices, ric, extension='csv.gz')

        price_seqs = dict((seqtype, []) for seqtype in seqtypes)
        with gzip.open(filename, mode='rt') as f:
            dataframe = pandas.read_table(f, delimiter=',')
            column = 'Close Bid' if int(
                dataframe[['Last']].dropna().count()) == 0 else 'Last'
            mean = float(dataframe[[column]].mean())
            std = float(dataframe[[column]].std())

            f.seek(0)
            N = sum(1 for _ in f) - 1

            f.seek(0)
            reader = csv.reader(f, delimiter=',')
            next(reader)
            fields = next(reader)
            ric = fields[0]

            stock_exchange = session \
                .query(Instrument.exchange) \
                .filter(Instrument.ric == ric) \
                .scalar()
            if stock_exchange is None:
                stock_exchange = 'TSE'
            get_close_utc = ct.func_get_close_t(stock_exchange)

            logger.info('start importing {}'.format(f.name))

            f.seek(0)
            column_names = next(reader)
            # Some indices contain an additional column
            shift = 1 if column_names[1] == 'Alias Underlying RIC' else 0

            prices = []
            close_prices = []
            raw_short_vals = []
            raw_long_vals = []
            raw_mov_ref_long_vals = []
            raw_mov_ref_short_vals = []
            std_long_vals = []
            std_short_vals = []
            prev_row_t = None
            max_mov_ref_long_val = float('-inf')
            min_mov_ref_long_val = float('inf')
            max_mov_ref_short_val = float('-inf')
            min_mov_ref_short_val = float('inf')

            for _ in tqdm(range(N)):
                fields = next(reader)

                ric = fields[0]
                t = fields[2 + shift].replace('Z', '+0000')
                utc_offset = int(fields[3 + shift])
                if ric == Code.SPX.value:
                    utc_offset += 1
                last = fields[8 + shift].strip()
                close_bid = fields[14 + shift].strip()

                if last == '' and close_bid == '':
                    continue
                val = Decimal(close_bid if last == '' else last)
                std_val = (float(val) - mean) / std
                try:
                    t = datetime.strptime(t, REUTERS_DATETIME_FORMAT)
                except ValueError:
                    logger.info('ValueError: {}, {}, {}'.format(ric, t, val))
                    continue

                if prev_row_t is not None:

                    if prev_row_t == t:
                        continue

                    close_time = get_close_utc(utc_offset)
                    close_datetime = datetime(t.year,
                                              t.month,
                                              t.day,
                                              close_time.hour,
                                              close_time.minute,
                                              tzinfo=UTC)

                    if prev_row_t < close_datetime and close_datetime <= t:
                        close_prices.append(Close(ric, t).to_dict())

                        if len(raw_long_vals) > 1:
                            raw_mov_ref_long_val = float(
                                val) - raw_long_vals[0]
                            raw_mov_ref_long_vals = [raw_mov_ref_long_val] + raw_mov_ref_long_vals \
                                if len(raw_mov_ref_long_vals) < N_LONG_TERM \
                                else [raw_mov_ref_long_val] + raw_mov_ref_long_vals[:-1]
                            price_seqs[SeqType.MovRefLong] \
                                .append(PriceSeq(ric, SeqType.MovRefLong, t, raw_mov_ref_long_vals).to_dict())
                            max_mov_ref_long_val = raw_mov_ref_long_val \
                                if raw_mov_ref_long_val > max_mov_ref_long_val \
                                else max_mov_ref_long_val
                            min_mov_ref_long_val = raw_mov_ref_long_val \
                                if raw_mov_ref_long_val < min_mov_ref_long_val \
                                else min_mov_ref_long_val

                        raw_long_vals = [float(val)] + raw_long_vals \
                            if len(raw_long_vals) < N_LONG_TERM \
                            else [float(val)] + raw_long_vals[:-1]
                        price_seqs[SeqType.RawLong] \
                            .append(PriceSeq(ric, SeqType.RawLong, t, raw_long_vals).to_dict())

                        std_long_vals = [std_val] + std_long_vals \
                            if len(std_long_vals) < N_LONG_TERM \
                            else [std_val] + std_long_vals[:-1]
                        price_seqs[SeqType.StdLong] \
                            .append(PriceSeq(ric, SeqType.StdLong, t, std_long_vals).to_dict())

                prices.append(Price(ric, t, utc_offset, val).to_dict())

                if len(raw_short_vals) > 1 and len(raw_long_vals) > 2:
                    raw_mov_ref_short_val = float(
                        val) - raw_long_vals[1 if t == close_datetime else 0]
                    raw_mov_ref_short_vals = [raw_mov_ref_short_val] + raw_mov_ref_short_vals \
                        if len(raw_mov_ref_short_vals) < N_SHORT_TERM \
                        else [raw_mov_ref_short_val] + raw_mov_ref_short_vals[:-1]
                    price_seqs[SeqType.MovRefShort] \
                        .append(PriceSeq(ric, SeqType.MovRefShort, t, raw_mov_ref_short_vals).to_dict())
                    max_mov_ref_short_val = raw_mov_ref_short_val \
                        if raw_mov_ref_short_val > max_mov_ref_short_val \
                        else max_mov_ref_short_val
                    min_mov_ref_short_val = raw_mov_ref_short_val \
                        if raw_mov_ref_short_val < min_mov_ref_short_val \
                        else min_mov_ref_short_val

                raw_short_vals = [float(val)] + raw_short_vals \
                    if len(raw_short_vals) < N_SHORT_TERM \
                    else [float(val)] + raw_short_vals[:-1]
                price_seqs[SeqType.RawShort] \
                    .append(PriceSeq(ric, SeqType.RawShort, t, raw_short_vals).to_dict())

                std_short_vals = [std_val] + std_short_vals \
                    if len(std_short_vals) < N_SHORT_TERM \
                    else [std_val] + std_short_vals[:-1]
                price_seqs[SeqType.StdShort] \
                    .append(PriceSeq(ric, SeqType.StdShort, t, std_short_vals).to_dict())
                prev_row_t = t

            session.execute(Price.__table__.insert(), prices)
            session.execute(Close.__table__.insert(), close_prices)

            for seqtype in seqtypes:
                if seqtype == SeqType.NormMovRefShort:
                    price_seqs[seqtype] = \
                        [PriceSeq(ric, SeqType.NomMovRefShort, p['t'], None)
                         for p in price_seqs[SeqType.MovRefShort]] \
                        if isclose(max_mov_ref_long_val, min_mov_ref_short_val) \
                        else [PriceSeq(ric, SeqType.NormMovRefShort, p['t'],
                                       [(2 * v - (max_mov_ref_short_val + min_mov_ref_short_val)) /
                                        (max_mov_ref_short_val - min_mov_ref_short_val)
                                        for v in p['vals']]).to_dict()
                              for p in price_seqs[SeqType.MovRefShort]]
                elif seqtype == SeqType.NormMovRefLong:
                    price_seqs[seqtype] = \
                        [PriceSeq(ric, SeqType.NomMovRefLong, p['t'], None)
                         for p in price_seqs[SeqType.MovRefLong]] \
                        if isclose(max_mov_ref_long_val, min_mov_ref_long_val) \
                        else [PriceSeq(ric, SeqType.NormMovRefLong, p['t'],
                                       [(2 * v - (max_mov_ref_long_val + min_mov_ref_long_val)) /
                                        (max_mov_ref_long_val - min_mov_ref_long_val)
                                        for v in p['vals']]).to_dict()
                              for p in price_seqs[SeqType.MovRefLong]]
                session.execute(PriceSeq.__table__.insert(),
                                price_seqs[seqtype])
            session.commit()

            logger.info('end importing {}'.format(ric))
Exemplo n.º 4
0
def test_ric2filename_lowercase():
    result = ric2filename(Path('/somewhere/'), 'JNIc1', 'csv.gz')
    expected = Path('/somewhere/jni#c1.csv.gz')
    assert result == expected
Exemplo n.º 5
0
def test_ric2filename_equal():
    result = ric2filename(Path('/somewhere/'), 'EUR=', 'csv.gz')
    expected = Path('/somewhere/eur=.csv.gz')
    assert result == expected
Exemplo n.º 6
0
def test_ric2filename_underscore():
    result = ric2filename(Path('/somewhere/'), '.IRAIL.T', 'csv.gz')
    expected = Path('/somewhere/_irail_t.csv.gz')
    assert result == expected
Exemplo n.º 7
0
def test_ric2filename_period():
    result = ric2filename(Path('/somewhere/'), '.JSD', 'csv.gz')
    expected = Path('/somewhere/_jsd.csv.gz')
    assert result == expected
Exemplo n.º 8
0
 def test_ric2filename_lowercase(self):
     result = ric2filename(Path('/somewhere/'), 'JNIc1', 'csv.gz')
     expected = Path('/somewhere/jni#c1.csv.gz')
     self.assertEqual(result, expected)
Exemplo n.º 9
0
 def test_ric2filename_underscore(self):
     result = ric2filename(Path('/somewhere/'), '.IRAIL.T', 'csv.gz')
     expected = Path('/somewhere/_irail_t.csv.gz')
     self.assertEqual(result, expected)
Exemplo n.º 10
0
 def test_ric2filename_equal(self):
     result = ric2filename(Path('/somewhere/'), 'EUR=', 'csv.gz')
     expected = Path('/somewhere/eur=.csv.gz')
     self.assertEqual(result, expected)
Exemplo n.º 11
0
 def test_ric2filename_period(self):
     result = ric2filename(Path('/somewhere/'), '.JSD', 'csv.gz')
     expected = Path('/somewhere/_jsd.csv.gz')
     self.assertEqual(result, expected)