Exemplo n.º 1
0
Arquivo: io.py Projeto: palmerc/lab
 def start(self, doc):
     try:
         ahdr = open( __main__.prescript_dir+'arff-header', 'r+' )
         self.out.writelines( ahdr.readlines() )
         ahdr.close()
     except IOError:
         msg( 'WARNING: Header %s does not exist' % (__main__.prescript_dir+'arff-header') )
Exemplo n.º 2
0
    def buy(self, symbol, shares, price, date):
        #FIXME: output to log file
        msg('%s BUY %s %s@%s' % (date, symbol, shares, price), ind=1)
        position, _, _ = self.account.positions[symbol]
        self.account.cash -= cfg.commission
        self.account.cash -= (shares * price)
        self.account.positions[symbol] = (position + shares, price, date)

        self.account.update(date)
Exemplo n.º 3
0
    def update_symbol(self, symbol):
        msg('updating data: %s' % symbol)
        #FIXME: auto download csv and only add new data
        #add new table if neccessary
        #get most recent weekday date
        #get most recent date in csv
        #add missing data
        #calc missing indicators

        #for now just remake the whole db
        #add quotes
        self.cur.execute((('create table if not exists %s (' % symbol) +
                          'id integer primary key autoincrement,'
                          'Date text unique not null,'
                          'Open real not null,'
                          'High real not null,'
                          'Low real not null,'
                          'Close real not null,'
                          'AdjClose real not null,'
                          'Volume real not null'
                          ')'))
        reader = csv.reader(open('data/%s.csv' % symbol, 'rb'))
        next(reader)  #skip header
        for Date, Open, High, Low, Close, AdjClose, Volume in reader:
            self.cur.execute(
                ('insert or ignore into %s ' % symbol +
                 '(Date, Open, High, Low, Close, AdjClose, Volume)'
                 'values (?,?,?,?,?,?,?)'),
                (Date, Open, High, Low, Close, AdjClose, Volume))

        #add splits
        s = '_splits'
        self.cur.execute('create table if not exists %s%s (' % (symbol, s) +
                         'id integer primary key autoincrement,'
                         'Date text unique not null,'
                         'Split text not null'
                         ')')
        reader = csv.reader(open('data/%s%s.csv' % (symbol, s), 'rb'))
        next(reader)  #skip header
        for Date, Split in reader:
            self.cur.execute(
                ('insert or ignore into %s%s ' % (symbol, s) + '(Date, Split)'
                 'values (?,?)'), (Date, Split))

        #add dividends
        s = '_dividends'
        self.cur.execute('create table if not exists %s%s (' % (symbol, s) +
                         'id integer primary key autoincrement,'
                         'Date text unique not null,'
                         'Dividend real not null'
                         ')')
        reader = csv.reader(open('data/%s%s.csv' % (symbol, s), 'rb'))
        next(reader)  #skip header
        for Date, Dividend in reader:
            self.cur.execute(('insert or ignore into %s%s ' %
                              (symbol, s) + '(Date, Dividend)'
                              'values (?,?)'), (Date, Dividend))
Exemplo n.º 4
0
 def __init__(self, name='testaccount'):
     self.name = name
     self.cash = cfg.start_cash
     self.networth = self.cash
     self.buypower = self.cash * cfg.risk
     self.positions = self.init_positions()  #symbol: shares
     self.trades = self.init_trades()  # symbol: [(date_bought,
     #date_sold, price_bought, price_sold, shares), ...]
     msg('using account: %s' % self.name, '+')
Exemplo n.º 5
0
    def sell(self, symbol, shares, price, date):
        #FIXME: output to log file
        msg('%s SELL %s %s@%s' % (date, symbol, shares, price), ind=1)
        position, prev_price, prev_date = self.account.positions[symbol]
        self.account.cash -= cfg.commission
        self.account.cash += (shares * price)
        self.account.positions[symbol] = (position - shares, price, date)
        self.account.trades[symbol].append(
            (prev_date, date, prev_price, price, shares))

        self.account.update(date)
Exemplo n.º 6
0
    def info(self, date):
        #DEBUG
        for symbol in self.trades:
            print '\n', symbol, 'trades'
            for trade in self.trades[symbol]:
                print '    ', trade

        self.update(date)
        stats = self.calc_stats()
        msg('account info for \'%s\'' % self.name)
        for stat in self.stat_listing:
            msg('%20s: %0.2f' % (stat, stats[stat]), ind=1)
Exemplo n.º 7
0
def assembleDocument(frags):
   msg( "Assembling document" )

   docasm = DocAssembler()
   pageasm = PageAssembler( docasm )
   lineasm = LineAssembler(frags.modeCharWidth(), pageasm )
    
   for fragment in frags.getFragData():
      lineasm.submit( fragment )

   lineasm.done()
    
   return docasm.getDocument()
Exemplo n.º 8
0
def backtest(weights):
    #iterate daily through weekdays
    for day in rrule.rrule(rrule.DAILY,
                           dtstart=cfg.test_begin,
                           until=cfg.test_end):

        #skip weekends, holidays (only for holidays 1995 onward)
        date = str(day.date())
        if day.weekday() > 4 or date in cfg.holidays:
            continue

        day_start = time.time()

        #DEBUG
        #if date != '2017-09-29':
        #    continue

        #analyze all symbols
        evals = {}
        for symbol in cfg.tickers:
            evals[symbol] = analyze.Analyze(symbol, date, weights)
            #print '    eval: ', symbol, evals[symbol].evaluation

        #choose best evaluations
        choices = analyze.best_eval(evals)
        for choice in choices:
            #create trade to buy/sell/pass number of shares at price
            symbol, price, shares = analyze.pick_trade(choice)
            if shares > 0:
                cfg.api.buy(symbol, shares, price, date)
            elif shares < 0:
                cfg.api.flatten(symbol, price, date)

        elapsed = round(time.time() - day_start, 3)
        msg('%s analyzed in %s (%s - %s)' %
            (date, elapsed, cfg.api.account_balance(),
             cfg.api.account_networth()))

        #update account at end of day
        cfg.api.update_account(date)

    #flatten any open positions at end
    cfg.api.close_all(str(cfg.test_end.date()))

    #update account
    cfg.api.update_account(date)
Exemplo n.º 9
0
    def __init__(self, name):
        self.name = 'data/%s' % name
        try:
            self.con = sql.connect(self.name)
            self.cur = self.con.cursor()
            #print self.query('select sqlite_version()')
            msg('connected to db: %s' % self.name, '+')

            #make sure db has correct data / correct tables

        except:
            #error and exit for now
            msg('could not connect to database: %s' % self.name, '-')
            sys.exit(1)

        #add or update specified symbols
        for symbol in cfg.tickers:
            self.update_symbol(symbol)
Exemplo n.º 10
0
 def preprocess(self):
    self.checkPolarity()
    
    # calculate document wide mode values
    self.MOLS=statfunctions.Mode(self.all_ls)
    self.MOLEN=statfunctions.Mode(self.all_len)
 
    msg( '\tMOLS='+`self.MOLS` )
    msg( '\tMOLEN='+`self.MOLEN` )
 
    self.calcMargins()
    
    lastPageLastLine = None
    for i in range(len(self.pages)):
       self.pages[i].calcValues(self.MOLS,self.MOLEN,self.modertmargins, lastPageLastLine)
       if self.pages[i][-1].type == Line.Plain or len(self.pages[i]) < 2:
          n = -1
       else:
          n = -2
       lastPageLastLine = self.pages[i].lines[n]
Exemplo n.º 11
0
def checkParams(argv):
   # check we have all commandline parameters
   if len(argv) < 3:
      msg( "Usage: prescript <plain|html|arff> <input> [output]" )
      sys.exit(1)

   inputFilename, format = argv[2],argv[1]

   # append the .ps if it was omitted
   if not inputFilename.endswith(".ps"):
      inputFilename += '.ps'
      
   # plain is a more convenient word to use
   if format == 'plain': 
      format = 'txt'
      
   # check that it's a valid format
   if not format in io.knownFormats:
      msg( "Unknown format %s." % format )
      sys.exit(1)

   # make sure it exists
   if not os.path.exists( inputFilename ):
      msg( "Can't find (or access) file '%s'" % inputFilename )
      sys.exit(1)

   if len(sys.argv) == 4:
      outFilename = sys.argv[3]
   else:
      outFilename = misc.MakeFilename(inputFilename, '.'+format)
   
   return format,inputFilename,outFilename
Exemplo n.º 12
0
def main():
    #general init done on config import

    #train weights
    weights = {}
    if cfg.train:
        msg('beginning to train', '+')
        weights = train()
        msg('done training', '+')

    #backtest algorithm
    if cfg.backtest:
        msg('beginning backtest', '+')
        start = time.time()
        backtest(weights)
        msg('finished testing in %s' % round(time.time() - start, 3), '+')
        cfg.api.account_info(cfg.today)
Exemplo n.º 13
0
Arquivo: io.py Projeto: palmerc/lab
def readPostScriptDataFile(FH, worker):
    errs = 0
    while 1:
        input = FH.readline()
        if not input: break

        input = strip(input)
        if len(input) == 0: continue

        input = split(input, '\t')
        if input[0][0] == "P":
            worker.newPage()
            
        elif input[0][0] == "S" and len(input) == 8:
            [tag, x0, y0, string, ytop, ybot, x1, y1] = input
            
            # If x1 is 'S', then some funny recursive font stuff has happened.
            # Ignore the recursive stuff, and search for the rest of this line
            if x1 == "S":
                while 1:
                    input = FH.readline();
                    if input[0] != "S": break
                [x1, y1] = split(input[:-1], '\t')[:2]
                
            string = unquote(string)  
            if len(string) > 0:
                worker.textFragment( Fragment( atoi(x0), atoi(y0),
                                               string,
                                               atoi(x1), atoi(y1)))
        else:
            msg( "Bad fragment line: "+`input`)
            errs = errs + 1
            if errs == maxBadLines:
                msg( 'Error limit encounter, aborting.  Is this *really* a post script file?' )
                sys.exit(1)
    worker.done()
Exemplo n.º 14
0
Arquivo: io.py Projeto: palmerc/lab
def applyHandcheck(inputFilename, document):
    hcstring={'0':'linefeed','1':'paragraph','2':'pagebreaklinefeed','3':'pagebreakparagraph','4':'explicitlinefeed','5':'picnoise'}
    handcheckFN = misc.MakeFilename(inputFilename,'.handclass')
    if os.path.exists( handcheckFN ):
        msg( 'Found and applying handcheck file' )
        hcf = open( handcheckFN )

        for page in document:
            for line in page:
                try:
                    if line.type in ARFFFormatter.ARFFtypes:
                        line.handclass = hcstring[string.strip(hcf.readline())]
                except KeyError, val:
                    if val in [None,'']:  # cope with EOF
                        msg( 'ERROR: Handclass file ran out before the end of the document!' )
                        return
                    # otherwise it's a bad keyword
                    msg( 'Unknown classification: '+`val` )
Exemplo n.º 15
0
Arquivo: io.py Projeto: palmerc/lab
def readFragments(psFilename):
    msg( "Reading PS fragments" )
    gspipe = os.popen("gs -q -dNODISPLAY -soutfile=%%stdout %sprescript.ps %s quit.ps" % (__main__.prescript_dir, psFilename))
    fragdata = PSDatReader()
    readPostScriptDataFile( gspipe, fragdata )
    return fragdata
Exemplo n.º 16
0
   def findFreq( self, list ):
      if len(list) == 0:        # can't operate on nothing
         msg( "findFreq: WARNING: called with an empty input list!!" )
         return []

      output = []                       # output list
      f = 0                     # frequency of val
      val = list[0]             # start with the first element
      valcum = (0,0)            # cumulation of vals
      min = max = val[1]                # minimum and maximum y values
   
      # since val's aren't strictly the same, we'll average the val's to get a
      # more accurate view of what val actually is
   
      for l in list:
         if misc.isEqual(val[0],l[0],Xequalness):
            f = f + 1           # we've found another val
            valcum = (valcum[0] + l[0], valcum[1] + l[1])
            if l[1] < min:
               min = l[1]
            elif l[1] > max:
               max = l[1]
         else:
            output.append( (f, round(valcum[0]/f,1),round(valcum[1]/f,1), min, max ) )
            # reset
            val = l             # next val
            f = 1               # and we've already found one
            valcum = l
            min = max = val[1]

      output.append( (f, round(valcum[0]/f,1),round(valcum[1]/f,1), min, max) )
      output.sort()       # sort on frequency (pri) and x (sec) 

      # output now contains the list of consecutive frequencies.
   
      # I found the following few lines of code very difficult to comment,
      # so please excuse the bad explanation.
      #
      # 'output contains a list of (consecutive frequency,value) pairs.  That
      # means you can have several records that have the same value, but since
      # they weren't consecutive in the input list, they were not combined.
      #
      # for example, [(1,10),(2,8),(1,10),(1,9),(5,10)]
      #
      # in that example, 10 is the value with a sufficiently high frequency,
      # and so all values less than 10 must be deleted and all (x,10) records
      # must be combined.
   
      # this code finds the first record whose f is >= MinConsecFreq
      i = 0               # where to cut
      while i < len(output) and output[i][0] < MinConsecFreq:
         i = i + 1      # find first i where output[i] >= MinConsecFreq

      # if i points off the list, no eligible candidates were found.
      # When this happens, we take the largest frequency found, which will
      # be the last record in the list.
      if i == len(output):
         i = i - 1 

      # this code backtracks to ensure that any occurrances of the val
      # associated with the i'th record also gets included.  Without this,
      # the frequency reported for the val[i] might be too small because
      # some small groups (less than MinConsecFreq) were discounted
      # because their f's were too small.  Sort of.  Try and make sense
      # of that.
      j = 0
      while j < i:
         if misc.isEqual(output[j][1],output[i][1],Xequalness):
            j = j + 1
         else:
            del output[j]
         i = i - 1

      # output now contains only eligible frequencies which must now be combined
      # ie, [(1,10),(2,10),(1,11)] ==> [(3,10),(1,11)]
      #
      output.sort( lambda a,b: int(a[1] - b[1]) ) # sort on x value 
      i = 0
      while i <= len(output) - 2:       # go from 0 to second to last index
         if misc.isEqual( output[i][1], output[i+1][1],Xequalness ): #abs(output[i][1] - output[i+1][1]) <= 2:
            output[i] = self.mergeRecords( output[i], output[i+1], lambda a,b: round((a+b)/2) )
            del output[i+1]
         else:
            i = i + 1

      return output
Exemplo n.º 17
0
def preprocessDocument(document):
   msg( "Preprocessing document" )

   document.preprocess()
Exemplo n.º 18
0
Arquivo: io.py Projeto: palmerc/lab
def renderDocument(formatter, document):
    msg( "Rendering document" )
    
    formatter.start(document)
    for page in document:   renderPage(formatter, page)
    formatter.end(document)
Exemplo n.º 19
0
 def __init__(self):
     msg('connected to api', '+')
     self.account = Account()