Пример #1
0
def __processCSHOC(command, keys, attributes, timestamp, source, backfill,
                   processCoid, processSecid):
    if "CSHOC" not in attributes:
        return

    secid = database.getSecidFromCsid(keys["GVKEY"], keys["IID"], timestamp)
    if not processSecid(secid):
        return

    if secid is None:
        secid = database.createNewCsid(keys["GVKEY"], keys["IID"], timestamp)
        util.warning("Created new secid: {}.{}=>{}".format(
            keys['GVKEY'], keys['IID'], secid))
    date = util.convert_date_to_millis(keys["DATADATE"])
    if backfill:
        timestamp = util.convert_date_to_millis(__datePlusOne(
            keys["DATADATE"]))

    if command in ("I", "C"):
        database.insertAttribute("sec", "n", secid, date, source, "CSHOC",
                                 attributes["CSHOC"], timestamp, None,
                                 backfill, False, True, __CSHOCEquals)
    elif command in ("R", "D"):
        database.deleteAttribute("sec", "n", secid, date, source, "CSHOC",
                                 timestamp, False)
Пример #2
0
def _parseFile(filepath):
    #this should only happen when we process the first file ever
    if filepath is None:
        return set(), None, None
    
    data = set()
    
    info = datafiles.read_info_file(filepath)
    if info['date_last_absent'] is None:
        timestamp = util.convert_date_to_millis(info['date_modified']) 
    else:
        timestamp = util.convert_date_to_millis(info['date_first_present'])
        
    csvfile = open(filepath)
    dialect = csv.Sniffer().sniff(csvfile.read(1024))
    csvfile.seek(0)
    reader = csv.DictReader(csvfile, dialect=dialect)
    for row in reader:
        secid = database.getSecidFromCsid(row['GVKEY'], row['IID'], timestamp)
        if secid is None:
            secid = database.createNewCsid(row['GVKEY'], row['IID'], timestamp, None, None, True)
            util.warning("Created new secid: {}.{}=>{}".format(row['GVKEY'], row['IID'], secid))
        data.add((secid, int(row["SPLITDATE"]), float(row["SPLITRATE"])))
        
    #get the file start date from the filename
    startDate = os.path.normpath(filepath).split("/")[-1][0:8] #split the filepath
    startDate = int(startDate)
            
    return data, startDate, timestamp
Пример #3
0
def __processMkt(command, keys, attributes, timestamp, source, backfill,
                 processCoid, processSecid):
    if keys['CFFLAG'] != 'F':
        return

    date = util.convert_date_to_millis(keys['DATADATE'])
    if backfill:
        timestamp = util.convert_date_to_millis(__datePlusOne(
            keys["DATADATE"]))
    coid = int(keys["GVKEY"])
    if not processCoid(coid):
        return

    for n, v in attributes.iteritems():
        if n in ('MKVALT', 'PRCC', 'PRCH', 'PRCL', 'MKVALTQ', 'PRCCQ', 'PRCHQ',
                 'PRCLQ'):
            datatype = "n"
            value = float(v) if v is not None else None
        elif n in ('CLSM', 'CLSMQ'):
            datatype = "s"
            value = v
        else:
            continue

        if command in ("C", "I"):
            database.insertAttribute("co", datatype, coid, date, source, n,
                                     value, timestamp, None, backfill)
        elif command in ("R", "D"):
            database.deleteAttribute("co", datatype, coid, date, source, n,
                                     timestamp)
Пример #4
0
def __processFundamental(command, keys, attributes, timestamp, source,
                         backfill, global_cs):
    if not global_cs and not (keys['INDFMT'] == 'INDL' and keys['DATAFMT']
                              == "STD" and keys['POPSRC'] == "D"
                              and keys["CONSOL"] == "C"):
        return
    elif global_cs and not (keys['INDFMT'] == 'INDL' and keys['DATAFMT']
                            == "HIST_STD" and keys['POPSRC'] == "I"
                            and keys["CONSOL"] == "C"):
        return

    date = util.convert_date_to_millis(keys['DATADATE'])
    if backfill:
        timestamp = util.convert_date_to_millis(__datePlusOne(
            keys["DATADATE"]))
    coid = int(keys["GVKEY"])

    for n, v in attributes.iteritems():
        if n[-3:] == "_DC":
            continue
        if global_cs and n not in ("ATQ", "IBQ", "SALEQ", "OANCFY"):
            continue
        #value=float(v) if v is not None else None
        if command in ("C", "I"):
            database.insertAttribute("co", "n", coid, date, source, n, v,
                                     timestamp, None, backfill)
        elif command in ("D", "R"):
            database.deleteAttribute("co", "n", coid, date, source, n,
                                     timestamp)
Пример #5
0
def __processDesind(command, keys, attributes, timestamp, source, backfill,
                    processCoid, processSecid):
    if keys['INDFMT'] != 'INDL' or keys['DATAFMT'] != "STD":
        return

    date = util.convert_date_to_millis(keys['DATADATE'])
    if backfill == 1:
        timestamp = util.convert_date_to_millis(__datePlusOne(
            keys["DATADATE"]))
    coid = int(keys["GVKEY"])
    if not processCoid(coid):
        return

    for n, v in attributes.iteritems():
        if n in ('AJEXQ', 'AJPQ', 'AJEX', 'AJP'):
            datatype = "n"
            value = float(v) if v is not None else None
        elif n in ('APDEDATEQ', 'FDATEQ', 'PDATEQ', 'RDQ', 'APDEDATE', 'FDATE',
                   'PDATE'):
            datatype = "d"
            value = util.convert_date_to_millis(v) if v is not None else None
        elif n in ('ACCTSTDQ', 'COMPSTQ', 'CURCDQ', 'CURNCDQ', 'DATACQTR',
                   'DATAFQTR', 'FQTR', 'FYEARQ', 'UPDQ', 'ACCTSTD', 'COMPST',
                   'CURCD', 'CURNCD', 'FYEAR', 'UPD'):
            datatype = "s"
            value = v
        else:
            continue

        if command in ("C", "I"):
            database.insertAttribute("co", datatype, coid, date, source, n,
                                     value, timestamp, None, backfill)
        elif command in ("R", "D"):
            database.deleteAttribute("co", datatype, coid, date, source, n,
                                     timestamp)
Пример #6
0
def __processHgic(command, keys, attributes, timestamp, source, backfill,
                  processCoid, processSecid):
    if keys['INDTYPE'] != 'GICS':
        return

    date = -1L
    dateFrom = util.convert_date_to_millis(keys['INDFROM'])
    if "INDTHRU" in attributes and attributes["INDTHRU"] is not None:
        dateTo = util.convert_date_to_millis(
            __datePlusOne(attributes["INDTHRU"]))
    else:
        dateTo = None
    coid = int(keys["GVKEY"])
    if not processCoid(coid):
        return

    #born = dateval #XXX why do we do this???
    for n, v in attributes.iteritems():
        if n not in ("GSUBIND", "GGROUP", "GIND", "GSECTOR"):
            continue
        if command in ("C", "I"):
            database.deleteAttribute("co", "s", coid, date, source, n,
                                     dateFrom, True)
            database.insertAttribute("co", "s", coid, date, source, n, v,
                                     dateFrom, dateTo, backfill, True)
        elif command in ("R", "D"):
            database.deleteAttribute("co", "s", coid, date, source, n,
                                     dateFrom, True)
Пример #7
0
def singleDayReconcile(date, old):
    morgan = MorganStanleyPositions()
    morganPositions = morgan.getPositions(date)

    if old: us = OldSystemPositions()
    else: us = NewSystemPositions()
    usPositions = us.getPositions(date)

    #maps=getIdMaps(date)
    result = reconcile(usPositions, morganPositions,
                       util.convert_date_to_millis(date))

    return __beautify(*result, timestamp=util.convert_date_to_millis(date))
Пример #8
0
def __processExchange(command, keys, attributes, timestamp, source, backfill):
    if backfill:
        timestamp = min(
            timestamp,
            util.convert_date_to_millis(__datePlusOne(keys["DATADATE"])))

    code = database.getAttributeType("EXRATE", source, "n", database.EXRATE)
    updates = (0, 0)
    if command in ("I", "C") and "EXRATD" in attributes:
        updates = database.insertTimelineRow(
            database.EXRATE, {
                "currency": database.getCurrencyType(keys["TOCURD"]),
                "date": int(keys["DATADATE"])
            }, {
                "backfill": backfill,
                "rate": float(attributes["EXRATD"])
            }, timestamp)
    elif command in ("D", "R"):
        updates = database.killOrDeleteTimelineRow(
            database.EXRATE, {
                "currency": database.getCurrencyType(keys["TOCURD"]),
                "date": int(keys["DATADATE"])
            }, timestamp)

    database.updateAttributeStats(code, *updates)
Пример #9
0
def __processDividend(command, keys, attributes, timestamp, source, backfill,
                      processCoid, processSecid):
    secid = database.getSecidFromCsid(keys["GVKEY"], keys["IID"], timestamp)
    if not processSecid(secid):
        return

    if secid is None:
        secid = database.createNewCsid(keys["GVKEY"], keys["IID"], timestamp)
        util.warning("Created new secid: {}.{}=>{}".format(
            keys['GVKEY'], keys['IID'], secid))
    if backfill == 1:
        timestamp = min(
            timestamp,
            util.convert_date_to_millis(__datePlusOne(keys["DATADATE"])))

    code = database.getAttributeType("DIVIDEND", source, "n",
                                     database.DIVIDEND)
    updates = (0, 0)
    if command in ("I"):
        data = {
            "backfill": backfill,
            "currency": database.getCurrencyType(keys["CURCDDV"])
        }
        #get the data that we track and translate them to our own names. make also sure that you get the attribute types right
        for k, v in __usToCsDividendTranslate.iteritems():
            value = attributes.get(v, None)
            if value is not None:
                data[k] = float(value)
            else:
                data[k] = value  #i.e., None
        #finally do the insertion
        updates = database.insertTimelineRow(database.DIVIDEND, {
            "secid": secid,
            "date": int(keys["DATADATE"])
        }, data, timestamp)
    elif command in ("R"):
        updates = database.killOrDeleteTimelineRow(
            database.DIVIDEND, {
                "secid": secid,
                "date": int(keys["DATADATE"])
            }, timestamp)
    elif command in ("C", "D"):
        data = {
            "backfill": backfill,
            "currency": database.getCurrencyType(keys["CURCDDV"])
        }
        for n, v in attributes.iteritems(
        ):  #for each attribute from the compustat line
            if n in __csToUsDividendTranslate:  #if it is among the ones we track
                ourName = __csToUsDividendTranslate[n]
                if v is not None:  #else
                    data[ourName] = float(v)
                else:
                    data[ourName] = None  #i.e None
        updates = database.updateTimelineRow(database.DIVIDEND, {
            "secid": secid,
            "date": int(keys["DATADATE"])
        }, data, timestamp)

    database.updateAttributeStats(code, *updates)
Пример #10
0
def get_dist(date, secid2tickers, massive, overflow_mult):
    global database
    total = 0.0
    dist = {}
    volsumFile = "/".join(
        (os.environ["DATA_DIR"], "bars", str(date), "volsum.txt"))
    if not os.path.isfile(volsumFile):
        raise Exception(
            "Problem finding historical volume data in file {}".format(
                volsumFile))
    with open(volsumFile, "r") as f:
        #skip the header
        f.readline()
        for line in f:
            tokens = line.strip().split("|")
            secid = tokens[0]
            ticker = secid2tickers.get(secid, None)
            if ticker is None:
                ticker = database.getXrefFromSecid(
                    "TIC", int(secid), util.convert_date_to_millis(date))
            if ticker is None:
                continue
            mult = 1
            if ticker not in massive:
                mult = overflow_mult
            vol = mult * sum([float(field) for field in tokens[1:]])
            total += vol
            if vol > 0:
                dist[ticker] = vol

    for ticker in dist.keys():
        dist[ticker] = dist[ticker] / total

    return dist
Пример #11
0
def __processCredit(command, keys, attributes, timestamp, source, backfill):
    date = util.convert_date_to_millis(keys["DATADATE"])
    if backfill:
        timestamp = util.convert_date_to_millis(__datePlusOne(
            keys["DATADATE"]))
    coid = int(keys["GVKEY"])

    for n, v in attributes.iteritems():
        if n not in ('SPLTICRM', 'SPSTICRM', 'SPSDRM'):
            continue
        if command in ("C", "I"):
            database.insertAttribute("co", "s", coid, date, source, n, v,
                                     timestamp, None, backfill)
        elif command in ("R", "D"):
            database.deleteAttribute("co", "s", coid, date, source, n,
                                     timestamp)
Пример #12
0
def process(filepath, source):
    info = datafiles.read_info_file(filepath)

    if info["date_last_absent"] is not None:
        backfill = 0
        timestamp = util.convert_date_to_millis(info["date_first_present"])
    else:
        backfill = 1
        timestamp = util.convert_date_to_millis(info["date_modified"])

    database.setAttributeAutoCreate(True)

    bad = 0
    data = util.csvdict(open(filepath))
    for row in data:
        ticker = row["Symbol"]
        secid = database.getSecidFromXref("TIC", ticker, timestamp,
                                          "compustat_idhist",
                                          newdb.xrefsolve.preferUS)
        if secid is None:
            continue

        try:
            date = util.convert_date_to_millis(row["Record_Date"])
        except:
            util.warning("Bad date for row: " + str(row))
            bad += 1
        if bad > 20:
            util.error(
                str(bad) +
                " bad lines found. Raising excpeption. Go check file " +
                filepath)
            raise Exception(
                str(bad) +
                " bad lines found. Raising excpeption. Go check file " +
                filepath)

        for sqAtt, ourAtt in attributeMap.iteritems():
            name = ourAtt[0]
            compareWithRecent = ourAtt[1]
            value = row[sqAtt]
            if value == '': value = None
            database.insertAttribute("sec", "n", secid, date, source, name,
                                     value, timestamp, None, backfill, False,
                                     compareWithRecent, approximatelyEqual)
Пример #13
0
def __processIndustry(command, keys, attributes, timestamp, source, backfill,
                      processCoid, processSecid):
    date = util.convert_date_to_millis(keys["DATADATE"])
    if backfill:
        timestamp = util.convert_date_to_millis(__datePlusOne(
            keys["DATADATE"]))
    coid = int(keys["GVKEY"])
    if not processCoid(coid):
        return

    for n, v in attributes.iteritems():
        if n not in ('NAICSH', 'SICH'):
            continue
        if command in ("C", "I"):
            database.insertAttribute("co", "s", coid, date, source, n, v,
                                     timestamp, None, backfill)
        elif command in ("R", "D"):
            database.deleteAttribute("co", "s", coid, date, source, n,
                                     timestamp)
Пример #14
0
def process(filepath, source):
    date = os.path.basename(filepath).split('.')[2]
    born = date + " 09:30 EST"
    date_millis = util.convert_date_to_millis(date)
    born_millis = util.convert_date_to_millis(born)

    # If we have acquisition times, use these for real born_millis time
    info = datafiles.read_info_file(filepath)
    if info['date_last_absent'] is not None:
        born = util.convert_date_to_millis(info['date_first_present'])
        backfill = 0
    else:
        born = util.convert_date_to_millis(date + " 09:30 EST")
        backfill = 1

    database.setAttributeAutoCreate(True)

    for line in file(filepath):
        handle_htb(line, date_millis, born_millis, backfill)
Пример #15
0
def xrefChanges2(date1=None, date2=None):
    #read the secids we are interested in. we might want to change the source
    uni = set()
    #with open("/".join((os.environ["ROOT_DIR"],"run",os.environ["STRAT"],"old.secids.txt")),"r) as file:
    with open("/apps/ase/run/useq-live/old.secids.txt", "r") as file:
        for line in file:
            tokens = line.strip().split("|")
            uni.add(tokens[2])

    if date2 is None:
        date2 = util.now()
    if date1 is None:
        date1 = util.exchangeTradingOffset(
            os.environ["PRIMARY_EXCHANGE"],
            util.convert_millis_to_datetime(date2).strftime("%Y%m%d"), -1)
        date1 = util.convert_date_to_millis(str(date1))

    tickerChanges = []
    cusipChanges = []
    for secid in uni:
        ticker1 = database.execute(
            "SELECT value FROM xref WHERE secid={secid} AND xref_type=2 AND source=2 AND born<={date} AND (died IS NULL OR died>{date})"
            .format(secid=secid, date=date1)).fetchone()
        ticker1 = ticker1["value"] if ticker1 is not None else None

        ticker2 = database.execute(
            "SELECT value FROM xref WHERE secid={secid} AND xref_type=2 AND source=2 AND born<={date} AND (died IS NULL OR died>{date})"
            .format(secid=secid, date=date2)).fetchone()
        ticker2 = ticker2["value"] if ticker2 is not None else None

        cusip1 = database.execute(
            "SELECT value FROM xref WHERE secid={secid} AND xref_type=1 AND source=2 AND born<={date} AND (died IS NULL OR died>{date})"
            .format(secid=secid, date=date1)).fetchone()
        cusip1 = cusip1["value"] if cusip1 is not None else None

        cusip2 = database.execute(
            "SELECT value FROM xref WHERE secid={secid} AND xref_type=1 AND source=2 AND born<={date} AND (died IS NULL OR died>{date})"
            .format(secid=secid, date=date2)).fetchone()
        cusip2 = cusip2["value"] if cusip2 is not None else None

        if ticker1 != ticker2: tickerChanges.append((secid, ticker1, ticker2))
        if cusip1 != cusip2: cusipChanges.append((secid, cusip1, cusip2))

    report = []
    report.append("Xref changes between {} and {}".format(
        util.convert_millis_to_datetime(date1).strftime("%Y%m%d"),
        util.convert_millis_to_datetime(date2).strftime("%Y%m%d")))
    for secid, x1, x2 in tickerChanges:
        report.append("{}: {} => {}".format(secid, x1, x2))
    for secid, x1, x2 in cusipChanges:
        report.append("{}: {} => {}".format(secid, x1, x2))

    return "\n".join(report) if len(report) > 1 else None
Пример #16
0
def __processFiledate(command, keys, attributes, timestamp, source, backfill,
                      global_cs):
    if keys['SRCTYPE'] not in ('10Q', '10K'):
        return

    date = util.convert_date_to_millis(keys['DATADATE'])
    if backfill:
        timestamp = util.convert_date_to_millis(__datePlusOne(
            keys["DATADATE"]))
    coid = int(keys["GVKEY"])

    for n, v in attributes.iteritems():
        if n not in ("FILEDATE"):
            continue
        if command in ("C", "I"):
            database.insertAttribute("co", "d", coid, date, source, n,
                                     util.convert_date_to_millis(v), timestamp,
                                     None, backfill)
        elif command in ("R", "D"):
            database.deleteAttribute("co", "d", coid, date, source, n,
                                     timestamp)
Пример #17
0
def __getBorrows(uni, date):
    borrows = {}
    args = {"secid": None, "date": util.convert_date_to_millis(date)}
    for secid in uni:
        args["secid"] = secid
        row = database.execute(
            "SELECT value FROM sec_attr_n WHERE secid=%(secid)s AND date=%(date)s AND type=2210 AND died IS NULL ORDER BY date DESC LIMIT 1",
            args).fetchone()
        if row is not None:
            borrows[secid] = row["value"]

    return borrows
Пример #18
0
def __processIndustry(command, keys, attributes, timestamp, source, backfill,
                      global_cs):
    if not global_cs and not (keys['POPSRC'] == "D" and keys["CONSOL"] == "C"):
        return
    elif global_cs and not (keys['POPSRC'] == "I" and keys["CONSOL"] == "C"):
        return

    date = util.convert_date_to_millis(keys["DATADATE"])
    if backfill:
        timestamp = util.convert_date_to_millis(__datePlusOne(
            keys["DATADATE"]))
    coid = int(keys["GVKEY"])

    for n, v in attributes.iteritems():
        if n not in ('NAICSH', 'SICH'):
            continue
        if command in ("C", "I"):
            database.insertAttribute("co", "s", coid, date, source, n, v,
                                     timestamp, None, backfill)
        elif command in ("R", "D"):
            database.deleteAttribute("co", "s", coid, date, source, n,
                                     timestamp)
Пример #19
0
def __processFundamental(command, keys, attributes, timestamp, source,
                         backfill, processCoid, processSecid):
    if keys['INDFMT'] != 'INDL' or keys['DATAFMT'] != "STD":
        return
    date = util.convert_date_to_millis(keys['DATADATE'])
    if backfill:
        timestamp = util.convert_date_to_millis(__datePlusOne(
            keys["DATADATE"]))
    coid = int(keys["GVKEY"])
    if not processCoid(coid):
        return

    for n, v in attributes.iteritems():
        if n[-3:] == "_DC":
            continue

        #value=float(v) if v is not None else None
        if command in ("C", "I"):
            database.insertAttribute("co", "n", coid, date, source, n, v,
                                     timestamp, None, backfill)
        elif command in ("D", "R"):
            database.deleteAttribute("co", "n", coid, date, source, n,
                                     timestamp)
Пример #20
0
def process(filepath, source):
    sourceNameInDatabase = "onlineinvestor"
    info = datafiles.read_info_file(filepath)

    if "hist" in source:
        backfill = 1
        #timestamp will be data dependent
    else:
        backfill = 0
        timestamp = util.convert_date_to_millis(info["date_modified"])

    database.setAttributeAutoCreate(True)

    with open(filepath, "r") as file:
        for line in file:
            tokens = line.split("\t")
            date = util.convert_date_to_millis(tokens[0])
            ticker = tokens[1]
            notes = tokens[2]
            if backfill == 1:
                born = date
            else:
                born = timestamp

            secid = database.getSecidFromXref("TIC", ticker, date, "compustat",
                                              newdb.xrefsolve.preferUS)
            if secid is None:
                util.warning("Failed to map ticker {},{}".format(
                    ticker, tokens[0]))
                return

            coid, issueid = database.getCsidFromSecid(secid)
            assert coid is not None

            database.insertAttribute("co", "s", coid, date,
                                     sourceNameInDatabase, "BUYBACK", notes,
                                     born, None, backfill)
Пример #21
0
    def getPositions(self, date):
        #for date the sod positions are in date+1
        #sodPortPath = os.environ["ROOT_DIR"] + "/run/" + os.environ["STRAT"] + "/" + str(util.exchangeTradingOffset(os.environ["PRIMARY_EXCHANGE"], date.strftime("%Y%m%d"), 1)) + "/sodPort.txt"
        sodPortPath = "/apps/ase/run/useq-live/" + str(
            util.exchangeTradingOffset(os.environ["PRIMARY_EXCHANGE"],
                                       date.strftime("%Y%m%d"),
                                       1)) + "/sodPort.txt"

        if not os.path.isfile(sodPortPath):
            raise PositionSourceError(
                "NewSystemPositions failed to locate sodPort.txt")

        positions = list()
        with open(sodPortPath, "r") as file:
            #skip header
            file.readline()
            for line in file:
                if len(line) == 0: continue
                tokens = line.strip().split("|")
                secid = int(tokens[1])
                size = int(tokens[2])
                price = float(tokens[3])
                cusip = database.getXrefFromSecid(
                    "CUSIP", secid, util.convert_date_to_millis(date),
                    "compustat_idhist")
                ticker = database.getXrefFromSecid(
                    "TIC", secid, util.convert_date_to_millis(date),
                    "compustat_idhist")
                isin = database.getXrefFromSecid(
                    "ISIN", secid, util.convert_date_to_millis(date),
                    "compustat_g_idhist")

                position = Position(secid, cusip, ticker, isin, size, price)
                positions.append(position)

        return positions
Пример #22
0
def __processSecurity(command, keys, attributes, timestamp, source, backfill,
                      processCoid, processSecid):
    secid = database.getSecidFromCsid(keys["GVKEY"], keys["IID"], timestamp)

    if not processSecid(secid):
        return

    if secid is None:
        secid = database.createNewCsid(keys["GVKEY"], keys["IID"], timestamp)
        util.warning("Created new secid: {}.{}=>{}".format(
            keys['GVKEY'], keys['IID'], secid))

    for n, v in attributes.iteritems():
        if n in ("CUSIP", "ISIN", "SEDOL", "TIC"):
            if command in ("C", "I"):
                #database.insertTimelineRow(database.XREF_TABLE, {"secid":secid, "xref_type":database.getXrefType(n), "source":database.getSourceType(source)}, {"value":v}, timestamp)
                database.insertXref(secid, source, n, v, timestamp)
            elif command in ("D", "R"):
                #database.killOrDeleteTimelineRow(database.XREF_TABLE, {"secid":secid, "xref_type":database.getXrefType(n), "source":database.getSourceType(source)}, timestamp)
                database.deleteXref(secid, source, n, timestamp)
        elif n in ("SECSTAT", "TPCI", "EXCNTRY"):
            date = 0L
            if command in ("C", "I"):
                database.insertAttribute("sec", "s", secid, date, source, n, v,
                                         timestamp, None, backfill)
            elif command in ("D", "R"):
                database.deleteAttribute("sec", "s", secid, date, source, n,
                                         timestamp)
        elif n in ("EXCHG"):
            date = 0L
            if command in ("C", "I"):
                database.insertAttribute("sec", "n", secid, date, source, n, v,
                                         timestamp, None, backfill)
            elif command in ("D", "R"):
                database.deleteAttribute("sec", "n", secid, date, source, n,
                                         timestamp)
        elif n in ("DLDTEI"):
            date = 0L
            if command in ("C", "I"):
                database.insertAttribute("sec", "d", secid, date, source, n,
                                         util.convert_date_to_millis(v),
                                         timestamp, None, backfill)
            elif command in ("D", "R"):
                database.deleteAttribute("sec", "d", secid, date, source, n,
                                         timestamp)
Пример #23
0
def parseCostLog(data):
    res = []
    eastern = pytz.timezone('US/Eastern')
    for line in data:
        tokens = line.strip().split()
        if tokens[3] != "REQ": continue
        long_ts = (tokens[0] + " " + tokens[1])[0:-7]
        long_ts = datetime.datetime.strptime(long_ts, "%Y/%m/%d %H:%M:%S")
        long_ts = eastern.localize(long_ts)
        ts = util.convert_date_to_millis(long_ts)

        m = re.match(r"\[orderID: (.*?)\]", tokens[-2] + " " + tokens[-1])
        if m is None:
            util.warning("Failed to parse REQ line: {}".format(line))
            continue
        orderid = long(m.group(1))
        if orderid > 0:
            res.append(str(orderid) + "|" + str(ts))

    return res
Пример #24
0
def __processSplit(command, keys, attributes, timestamp, source, backfill,
                   global_cs):
    if "SPLIT" not in attributes:
        return

    if global_cs and not keys["IID"].endswith("W"):
        return
    elif not global_cs and keys["IID"].endswith("W"):
        return

    secid = database.getSecidFromCsid(keys["GVKEY"], keys["IID"], timestamp)

    if secid is None:
        secid = database.createNewCsid(keys["GVKEY"], keys["IID"], timestamp)
        util.warning("Created new secid: {}.{}=>{}".format(
            keys['GVKEY'], keys['IID'], secid))
    if backfill:
        timestamp = min(
            timestamp,
            util.convert_date_to_millis(__datePlusOne(keys["DATADATE"])))

    code = database.getAttributeType("SPLIT", source, "n", database.SPLIT)
    updates = (0, 0)
    if command in ("I", "C"):
        updates = database.insertTimelineRow(database.SPLIT, {
            "secid": secid,
            "date": int(keys["DATADATE"])
        }, {
            "backfill": backfill,
            "rate": float(attributes["SPLIT"])
        }, timestamp)
    elif command in ("D", "R"):
        updates = database.killOrDeleteTimelineRow(
            database.SPLIT, {
                "secid": secid,
                "date": int(keys["DATADATE"])
            }, timestamp)

    database.updateAttributeStats(code, *updates)
Пример #25
0
                file_path = os.path.normpath(file_path_info[0:-5])
                #file_path_rel = file_path.replace("%s/%s/" % (os.environ["DATA_DIR"], sconfig['local_dir']), "")
                file_path_rel = os.path.relpath(
                    file_path, "/".join(
                        (os.environ["DATA_DIR"], sconfig["local_dir"])))
                if file_path_rel not in seen:
                    info = datafiles.read_info_file(file_path)
                    # If we don't have reliable acquisition times (first fetch), use modified timestamp
                    if info['date_last_absent'] is None:
                        date_released = info['date_modified']
                    else:
                        date_released = info['date_first_present']

                    #if we are processing using lag, do not add file
                    if options.lag is not None and (
                            util.now() - util.convert_date_to_millis(
                                datetime.timedelta(days=options.lag)) <
                            util.convert_date_to_millis(date_released)):
                        continue

                    util.info("Found new file:< %s" % file_path)
                    files.append({
                        'path': file_path,
                        'path_rel': file_path_rel,
                        'date': (date_released, info['date_modified'])
                    })

            util.info("Found %d files" % len(files))
            if len(files) == 0:
                util.warning("Done indexing, no files found")
                continue
Пример #26
0
def __processHistorical(filepath, source):
    #currentFileInfo = datafiles.read_info_file(filepath)
    #currentFileDate= currentFileInfo['date_first_present']
    #currentFileTimestamp = util.convert_date_to_millis(currentFileDate)

    #set autocreate attributes
    autocreate = database.getAttributeAutoCreate()
    database.setAttributeAutoCreate(True)

    (newData, removedData, firstLoad) = __getDeltas(filepath, source)

    ######process the deltas##########
    ####Important!!!! To maintain consistency they should processed in ascending effective date
    removedData = list(removedData)
    removedData.sort(key=lambda x: x.split("|")[4])
    for line in removedData:
        info = __lineToDict(line)

        born = util.convert_date_to_millis(info["start"])
        died = None if info["end"] is None else util.convert_date_to_millis(
            info["end"])

        #check row quality
        if died is not None and died <= born:
            continue
        if info["attributeValue"] == "":
            continue

        util.debug("Getting/Inserting Security {}.{}, {}".format(
            info["coid"], info["issueid"], born))
        secid = __getOrCreateSecid(info["coid"], info["issueid"],
                                   info["country"], info["currency"], born)

        if info["attributeName"] == "EXCHG":
            __deleteExchangeHistorical(secid, info["attributeName"],
                                       info["attributeValue"], source, born)
        else:
            __deleteXrefHistorical(secid, info["attributeName"], source, born)

    ######process the deltas##########
    ####Important!!!! To maintain consistency they should processed in ascending effective date
    newData = list(newData)
    newData.sort(key=lambda x: x.split("|")[4])
    for line in newData:
        info = __lineToDict(line)

        born = util.convert_date_to_millis(info["start"])
        died = None if info["end"] is None else util.convert_date_to_millis(
            info["end"])

        #check row quality
        if died is not None and died <= born:
            continue
        if info["attributeValue"] == "":
            continue

        util.debug("Getting/Inserting Security {}.{}, {}".format(
            info["coid"], info["issueid"], born))
        secid = __getOrCreateSecid(info["coid"], info["issueid"],
                                   info["country"], info["currency"], born)

        if info["attributeName"] == "EXCHG":
            __insertExchangeHistorical(secid, info["attributeName"],
                                       info["attributeValue"], source, born,
                                       died, 1)
        else:
            __insertXrefHistorical(secid, info["attributeName"],
                                   info["attributeValue"], source, born, died)

    #revert attribute autocreate
    database.setAttributeAutoCreate(autocreate)
Пример #27
0
        if len(subdirs) == 0:
            errors.append("{}: Never received a file".format(
                sourceConfigFile[:-3]))
            continue

        subdir = subdirs[-1]
        acquireTimestamp = 0L
        for node in os.walk(sourceLocalDir + "/" + subdir):
            dir = node[0]
            files = node[2]
            for file in files:
                if ".info" in file or ".time" in file or ".new" in file:
                    continue

                info = datafiles.read_info_file(dir + "/" + file)
                timestamp = util.convert_date_to_millis(
                    info["date_first_present"])
                if timestamp > acquireTimestamp:
                    acquireTimestamp = timestamp

        now = util.now()
        checkTimestamp = util.convert_date_to_millis(
            cPickle.load(open(timeFile, 'rb')))
        #get the frequency with which we expect new data
        expectedNewDataFrequency = sc.get("new_data_frequency",
                                          defaultNewDataFrequency)

        checkHours = (now - checkTimestamp) / (60 * 60 * 1000)
        checkMins = ((now - checkTimestamp) % (60 * 60 * 1000)) / (60 * 1000)
        acquireHours = (now - acquireTimestamp) / (60 * 60 * 1000)
        acquireMins = ((now - acquireTimestamp) %
                       (60 * 60 * 1000)) / (60 * 1000)
Пример #28
0
 for mufile in sorted(os.listdir(musdir)):
     util.info("Processing file {}/{}".format(musdir, mufile))
     tokens = mufile.split(".")
     if not tokens[0] == "mus": continue
     ts_token = 1
     mus_type = 0
     if tokens[1] == "FULL":
         ts_token += 1
         mus_type = 2
     elif tokens[1] == "SHORT":
         ts_token += 1
         mus_type = 1
         
     if mus_type != 2: continue
         
     mus_ts = util.convert_date_to_millis(datetime.datetime.strptime(tokens[ts_token], "%Y%m%d_%H%M"))
     
     if (not args.force) and (mus_ts <= max_ts_in_db):
         continue
     
     row = {}
     with open(musdir + "/" + mufile, "r") as file:
         for line in file:
             tokens = line.strip().split("|")
             secid = int(tokens[0])
             
             fc = tokens[1]
             fc_type = database.getAttributeType(fc, "mus", "n", "mus")                            
             value = float(tokens[2])
             
             buffer.append((secid, fc_type, mus_ts, value))
Пример #29
0
fs = file_source.FileSource('/apps/exec/log/rts1/')
listing = fs.list_recursive('cost.log', sizes=False)
start = dateutil.parser.parse(sys.argv[1] + "01")
end = start + dateutil.relativedelta.relativedelta(months=1)
dt = start
rebates = 0.0
mils = float(sys.argv[2])

while dt < end:
    for row in listing:
        if row[0].find(dt.strftime("%Y%m%d")) == -1: continue
        for line in open(row[0]):
            if line.find("FILL") != -1:
                (date, type, sym, ecn, type, size, price, bid, ask,
                 liq) = line.split()
                date = util.convert_date_to_millis(date)
                type = int(type)
                size = int(size)
                price = float(price)
                bid = float(bid)
                ask = float(ask)

                if liq != 'remove':
                    if ecn == "ISLD":
                        rebates += mils * abs(size)

    dt += datetime.timedelta(days=1)

print start.strftime('%Y%m'), rebates
Пример #30
0
def process(filePath, source, verifyOnly=False):
    #process the RSK files for now
    if filePath.find(".RSK.") < 0:
        return
    file = open(filePath, "r")

    #The first 2 lines should be the pricedate and the modeldate
    tokens = file.readline().strip().split(":")
    if tokens[0] != "PriceDate":
        util.error("It doesn't seem like a barra daily format")
        raise Exception
    else:
        priceDate = __barraDateToCompact(tokens[1].strip())

    tokens = file.readline().strip().split(":")
    if tokens[0] != "ModelDate":
        util.error("It doesn't seem like a barra daily format")
        raise Exception
    else:
        #pass
        modelDate = __barraDateToCompact(tokens[1].strip())

    # If we have acquisition times, use these for real born time.
    # Else, use the priceDate + 1 day
    fileInfo = datafiles.read_info_file(filePath)
    if fileInfo['date_last_absent'] is not None:
        timestamp = util.convert_date_to_millis(fileInfo['date_first_present'])
        backfill = 0
        database.setAttributeAutoCreate(True)
    else:
        date = priceDate + datetime.timedelta(days=1)
        timestamp = util.convert_date_to_millis(date.strftime("%Y%m%d"))
        backfill = 1
        database.setAttributeAutoCreate(True)

    #get the header names. comma separated, surrounded by double quotes
    line = file.readline()
    headers = __getListFromBarraLine(line)

    #init the dabase
    #database.dropXrefCache()
    #database.addXrefCache(timestamp) #cache xrefs

    #######MAPPING VERIFICATION CODE########
    inconcistentMappings = []
    ########################################

    for line in file:
        data = __getListFromBarraLine(line)

        if len(data) != len(headers):
            util.warning("Skipping bad line: {}".format(line))
            continue

        data = dict(zip(headers, data))

        #######MAPPING VERIFICATION CODE########
        if verifyOnly:
            result = __verifyMapping(
                data["BARRID"], util.cusip8to9(data["CUSIP"]), data["TICKER"],
                source, timestamp,
                newdb.xrefsolve.preferUS)  #mirror the getSecid call
            if result is not None: inconcistentMappings.append(result)
            continue
        ########################################

        secid = __getSecId(data["BARRID"], util.cusip8to9(data["CUSIP"]),
                           data["TICKER"], source, timestamp,
                           newdb.xrefsolve.preferUS, filePath)
        if secid is None:
            continue

        #Now, insert barra attributes and attribute values
        __removeUnwantedAttributes(data)
        for attributeName, attributeValue in data.iteritems():
            if isinstance(attributeValue, str):
                table = "s"
            elif isinstance(attributeValue, int):
                table = "n"
            elif isinstance(attributeValue, float):
                table = "n"
            else:
                util.error(
                    "Dude, attribute values should be either int,float or str")
                raise

            #assert attributeName.startswith("INDNAME") and table=="s"

            #With the exeption of capitalization and price, the other barra attributes
            #are attributes that are evaluated monthly. for them, the date should be the
            #model date. price we ignore, while capitatlization, we only create a new tuple
            #if the capitalization has changed more than a threshould since the last date
            #for which we have a tuple
            if attributeName == "PRICE":
                continue
            elif attributeName == "CAPITALIZATION":
                database.insertAttribute(
                    "sec", "n", secid, util.convert_date_to_millis(priceDate),
                    source, attributeName, attributeValue, timestamp, None,
                    backfill, False, True, __capEquals)
            else:
                database.insertAttribute(
                    "sec", table, secid,
                    util.convert_date_to_millis(modelDate), source,
                    attributeName, attributeValue, timestamp, None, backfill)

    file.close()

    #######MAPPING VERIFICATION CODE########
    if verifyOnly:
        return inconcistentMappings