Exemplo n.º 1
0
def get_output(tg, path, root):
    #print("get_output", path)
    info = []
    try:
        c = reorder(path, root)
    except NotFound as e:
        c = path
    try:
        wei = erc20address.get_token_info(c[0])['decimal']
        base = pow(10, wei)
        rate = common.switch_decimal(base)
        swap_estimate = [rate]
        l = len(c)
        for i in range(l):
            src = c[i]
            dest = c[(i + 1) % l]
            pair = tg[src][dest] or tg[src][dest]
            pair = pair['pair']
            rate = Pair.exchange(pair, c[i], c[(i + 1) % l], rate)
            info.append(pair['address'])
            swap_estimate.append(rate)
        return {
            'rate': rate / base,
            'path': c,
            'info': info,
            'swap': swap_estimate
        }
    except Exception as e:
        raise e
Exemplo n.º 2
0
def test8():
    symbol = 'ETH/USDT'
    t1 = Exchange.Exchange(ccxt.exmo())
    t1.add_book(t1.Ex.fetch_order_book(symbol), symbol)
    t2 = Exchange.Exchange(ccxt.ethfinex())
    t2.add_book(t2.Ex.fetch_order_book(symbol), symbol)
    pair = Pair.Pair(t1, t2, 'ETH/USDT')
Exemplo n.º 3
0
 def sentence_length(self):
     '''
     テキストファイル中に含まれる文の長さの[0]平均値[1]標準偏差[2]変動係数を返す
     '''
     p = Pair.Pair(self.path)
     path = p.mcb2text()
     f = open(path, 'r', encoding="utf-8")
     text = f.read()
     f.close()
     text_splitted = re.split('。', text)
     snum = 0  #文の数をかぞえる
     cnum = 0  #全文字数を数える
     length_list = list()
     for sentence in text_splitted:
         if len(sentence) != 0:
             snum += 1
             cnum += len(sentence)
             length_list.append(len(sentence))
     try:
         mean_length = cnum / snum
     except ZeroDivisionError:
         print(path)
         mean_length = 0
     stdev_length = stdev(length_list)
     cov_length = stdev_length / mean_length  #変動係数
     #print("文字数:{0} 文の数:{1} 1文中の平均字数:{2}".format(cnum, snum, mean_length))
     return mean_length, stdev_length, cov_length
Exemplo n.º 4
0
def test6():
    symbol = 'ETH/BTC'
    t1 = Exchange.Exchange(ccxt.binance())
    t1.add_book(abook, symbol)
    t2 = Exchange.Exchange(ccxt.hitbtc())
    t2.add_book(bbook, 'ETH/BTC')
    pair = Pair.Pair(t1, t2, 'ETH/BTC')
    for i in range(int(10 * pair.min_trade()), int(10 * pair.max_trade())):
        print(pair.roi(i / 10.0))
Exemplo n.º 5
0
def test1():
    bin = Exchange.Exchange(ccxt.binance())
    bin.add_book(bin.Ex.fetch_order_book('BCD/BTC'), 'BCD/BTC')
    hit = Exchange.Exchange(ccxt.bitz())
    hit.add_book(hit.Ex.fetch_order_book('BCD/BTC'), 'BCD/BTC')
    pair = Pair.Pair(hit, bin, 'BCD/BTC')
    print(pair.max_trade())
    print(pair.min_trade())
    print(pair.margin())
Exemplo n.º 6
0
def get_pairs(congress, chamber):
    uri = "https://api.propublica.org/congress/v1/{}/{}/members.json".format(congress, chamber)
    members = get_json(uri)['results'][0]['members']
    pairs = dict()
    for i in range(len(members)):
        for j in range(i + 1, len(members)):
            pair = Pair(members[i]['id'], members[j]['id'], \
                        members[i]['first_name'] + ' ' + members[i]['last_name'], \
                        members[j]['first_name'] + ' ' + members[j]['last_name'])
            pairs[pair] = pair.data
    return pairs
Exemplo n.º 7
0
    def parse_greyware_behaviors(self):
        self.__behavior_privacy_details = dict()
        greyware_rating = self.get_greyware_rating()
        for greyware_risk in greyware_rating.library_list:
            for greyware_behavior in greyware_risk.behavior_list:
                behavior_name = greyware_behavior.behavior_name
                leak = greyware_behavior.leak
                privacy_details = greyware_behavior.privacy_details
                pair = Pair.Pair(leak, privacy_details)
                self.__behavior_privacy_details[behavior_name] = pair

        return len(self.__behavior_privacy_details.keys())
Exemplo n.º 8
0
def test4():
    symbol = 'BCD/BTC'
    t1 = Exchange.Exchange(ccxt.binance())
    t1.add_book(abook, symbol)
    t2 = Exchange.Exchange(ccxt.bitz())
    t2.add_book(cbook, 'BCD/BTC')
    pair = Pair.Pair(t1, t2, 'BCD/BTC')
    print(pair.max_trade())
    print(pair.min_trade())
    print(pair.Margin)
    print(pair.FlatFees)
    print(pair.PercentFees)
Exemplo n.º 9
0
 def main(self):
     files = os.listdir(self.dirname)
     for file in files:
         p = Pair.Pair(file)
         if self.dirname == "text/student":
             filename = p.stu2rev()
         else:
             filename = p.rev2stu()
         if os.path.exists(filename):
             pass
         else:
             print(filename)
     print("以上")
Exemplo n.º 10
0
def calculate_metric(congress, chamber, metric_name, weights):
    filename = str(congress) + '_' + chamber + '.csv'
    data = from_csv(filename)
    pairs = [Pair(None, None, None, None, tup) for tup in data]
    metric_list = [metric(pairs) for metric in metrics]
    final_metric_values = weigh(pairs, metric_list, weights)
    new_filename = metric_name + '_' + str(
        congress) + '_' + chamber + '_metric.csv'
    new_header = ('member_a_id', 'member_b_id', 'member_a_name', \
        'member_b_name', 'metric')
    data = [(pair.id_a, pair.id_b, pair.name_a, pair.name_b, metric) \
        for pair, metric in final_metric_values.items()]
    to_csv(new_header, data, new_filename)
Exemplo n.º 11
0
def test7():
    symbol = 'ETH/BTC'
    t1 = Exchange.Exchange(ccxt.binance())
    t1.add_book(abook, symbol)
    t2 = Exchange.Exchange(ccxt.hitbtc())
    t2.add_book(abook, 'ETH/BTC')
    pair = Pair.Pair(t1, t2, 'ETH/BTC')
    print(pair.max_trade())
    print(pair.min_trade())
    print(pair.Margin)
    print(pair.margin(pair.max_trade()))
    print(pair.margin(pair.min_trade()))
    print(
        '$', "%.2f" % botutils.convert_to_USD(pair.max_trade(), 'ETH'),
        ' -> $', "%.2f" % botutils.convert_to_USD(
            pair.max_trade() + pair.roi(pair.max_trade()), 'ETH'))
Exemplo n.º 12
0
def update_pairs():

    dump('Trying ' + yellow(str(len(pairtuples))) + ' arbitrable trade pairs.')

    for p in pairtuples:
        #print(p[0], p[1], p[2])
        pair = Pair.Pair(p[0], p[1], p[2])
        # if pair.Margin > 1.0:
        #     print(pair)
        #     print(pair.Exsell is not None)
        #     print(pair.min_trade())
        #     print(pair.max_trade())
        if pair.ExSell is not None and pair.min_trade() < pair.max_trade():
            pairs.append(pair)
            #print('pair: ', pair.Symbol)

    pairs.sort(key=lambda x: x.Margin, reverse=True)
Exemplo n.º 13
0
def pair_similarity(member_votes, pairs):#congress, chamber='both'):

    for m in it.combinations(member_votes.keys(), 2):

        if m[0] == m[1]:
            continue

        m1 = member_votes[m[0]]
        m2 = member_votes[m[1]]

        d1 = pd.DataFrame(m1, columns=['bill_id1','vote1'])
        d2 = pd.DataFrame(m2, columns=['bill_id2','vote2'])

        d3 = d1.merge(d2, how="inner", left_on='bill_id1', right_on='bill_id2')
        vs = d3.loc[d3['vote1'] == d3['vote2'],:].shape[0]
        vt = d3.shape[0]

        dummy_pair = Pair(m[0], m[1]) #used to access dictionary
        try:
            pairs[dummy_pair]['votes_same'] = vs
            pairs[dummy_pair]['votes_total'] = vt
        except KeyError:
            print("Discrepancy in ProPublica member data")
Exemplo n.º 14
0
def getArbitragePairs():

    ids = [*Exchanges]
    for id in ids:
        marketSymbols[id] = []
    allSymbols = [symbol for id in ids for symbol in Exchanges[id].symbols]

    # get all unique symbols
    uniqueSymbols = list(set(allSymbols))

    # filter out symbols that are not present on at least two exchanges
    arbitrableSymbols = sorted([
        symbol for symbol in uniqueSymbols
        if allSymbols.count(symbol) > 1 and symbol in Symbols
    ])

    for symbol in arbitrableSymbols:
        pairedSymbols.append(symbol)
        pair = Pair.ArbPair()
        for id in ids:
            if symbol in Exchanges[id].symbols:
                exchanges[id].addPair()
                marketSymbols[id].append(symbol)
Exemplo n.º 15
0
    def find_pairs_quick_k(self, image, k):
        x = copy.deepcopy(self.distances_to_another_image)
        # cut features matrix to get k minimum values
        # for image_1 from rows
        min_dist_image_1 = np.argpartition(x, k, axis=0)
        min_dist_image_1 = min_dist_image_1[:k]
        # cut features matrix to get k minimum values
        # for image_2 from columns
        min_dist_image_2 = np.argpartition(x, k, axis=1)
        min_dist_image_2 = min_dist_image_2[:, :k]

        for i in range(0, self.points_size):
            img1 = min_dist_image_1[:, i]

            for b in range(0, k):
                img2 = min_dist_image_2[img1[b]]
                if i in img2[:]:
                    # add the pair to the list
                    # if nearest is symmetrical relation
                    self.pairs.append(Pair.Pair(self.points[i],
                                                image.points[img1[b]],
                                                self.distances_to_another_image[img1[b]][i]))
        print(self.pairs)
        print(len(self.pairs))
Exemplo n.º 16
0
Arquivo: arb2.py Projeto: mikem5/arb2
    def __init__(self, m, numCur):

        # numCur is the amount of currencies we are
        # going to be tracking across all the exchanges
        # eg, ltc/btc, ftc/btc would be 2 etc.

        # keep track of running time
        self.__started = time.time()

        # an array of market objects
        self.markets = m

        # this is the default order for currency
        self.currencies = ["ltc", "usd", "lsd"]

        # Use the pair object to creat an array of the
        # best currency markets
        # we may need to put the market name/index number
        # in the Pair object so that we can later match

        # The form that the pair objects in ask/bid will take
        # inside this arbitrage object is as follows, and
        # note: IS DIFFERENT FROM DEFAULT PAIR.

        # [price, quantity, index, marketbalance, marketidx]
        self.pairs = []
        self.trades = []
        for x in range(0, numCur):
            self.pairs.append(Pair('arb', x, 50, [dq, dq, dq], 0))
            self.trades.append(Trades('arb', x, 50, 0))

        # Variables to keep track of overall profit / trade
        # volume....This will need to be corrected for multiple
        # currency pairs as the price/volume is drasticlly different
        self.tprofit = D('0')
        self.ttrades = []

        for x in range(0, numCur):
            self.ttrades.append(D('0'))

        # Strings for the logger
        self.last_arb = ""
        self.last_best = ""
        self.last_string = ""

        # This is for our draw screen function
        self.screen_update = time.time()

        # This sets up the balance sheets for all our currencies
        # we use this to tell if we are way off the mark and need
        # to buy/sell a currency to bring us back to a flat line
        self.market_balances = Balance()

        for m in self.markets:
            for k, v in m.balances.funds.items():
                self.market_balances.funds[k] += v

        self.__initial_market_balances = copy.deepcopy(self.market_balances)

        # Write out our initial balances to our log file
        self.logger(self.stringFormat(0, 0, 0, 2))

        # This lets our main loop know if we entered an arb state
        # so that we can run checkbooks/etc
        self.entered_arb = 0

        # This is a counter, to check our books a couple times
        # then stop so we don't just check all the time as
        # really, unless there is a change in 3-5 minutes
        # there shouldnt be any further change
        self.arb_counter = 0

        #
        # This is the discrepancy from the base balance we
        # are off in each market - should use to adjust order
        # trades in the future
        #
        # This is either + or - from the base
        # + means we should sell more (or buy less)
        # - means we should buy more (or sell less)
        #
        self.discrep = {'usd': D('0'), 'ltc': D('0'), 'btc': D('0')}
Exemplo n.º 17
0
    def KPPCluster(self, points, bufferedWriter=None):
        # /**
        #  * remember the ratio of each cluster
        #  */
        # //sizeRatio = new ArrayList<Double>(totalGroup);

        # long t1 = System.currentTimeMillis();
        # //threshold to do
        if np.amax(points) > 0:
            pct = self.selectThreshold(points)
            self.threshold4Maximum = pct[0]
            pctEntropy = pct[1]

            # long t2 = System.currentTimeMillis();

            # LOG.info("KMeansSelectPct: "+(t2-t1) + " "+threshold4Maximum+", entropy: "+pctEntropy+"\n");
            # if(bufferedWriter!=null){
            # try {
            # 	bufferedWriter.write("KMeansSelectPct: "+(t2-t1) + " "+threshold4Maximum+"\n");
            # 	bufferedWriter.flush();
            # } catch (IOException e) {
            # 	// TODO Auto-generated catch block
            # 	e.printStackTrace();
            # }}

            # t1 = System.currentTimeMillis();
            clusterInput = np.extract(points < self.threshold4Maximum, points)
            # t2 = System.currentTimeMillis();
            #  // LOG.info("$: "+threshold4Maximum+", "+clusterInput.size);
            # LOG.info("KMeansSetPrepare: "+(t2-t1)+"\n");
            #   if(bufferedWriter!=null){
            #   try {
            # 	bufferedWriter.write("KMeansSetPrepare: "+(t2-t1)+"\n");
            # 	bufferedWriter.flush();
            # } catch (IOException e) {
            # 	// TODO Auto-generated catch block
            # 	e.printStackTrace();
            # }}

            # // initialize a new clustering algorithm.
            #  // we use KMeans++ with  clusters and x iterations maximum.
            #  // we did not specify a distance measure; the default (euclidean distance) is used.
            kmeansClusterNum = self.totalGroup - 1

            # //double[] centers= new double[kmeansClusterNum];
            # //ArrayList<Double> center = new ArrayList<Double>(kmeansClusterNum);
            center = []
            totalPoints = float(points.size)
            clusterPoints = clusterInput.size
            groupPercent = (totalPoints - clusterPoints) / totalPoints
            # //add
            center.append(
                Pair(groupPercent, float(self.threshold4Maximum), pctEntropy))
        else:
            self.pctThreshold = 5
            pct = self.selectThreshold(points)
            self.threshold4Maximum = pct[0]
            pctEntropy = pct[1]

            # long t2 = System.currentTimeMillis();

            # LOG.info("KMeansSelectPct: "+(t2-t1) + " "+threshold4Maximum+", entropy: "+pctEntropy+"\n");
            # if(bufferedWriter!=null){
            # try {
            # 	bufferedWriter.write("KMeansSelectPct: "+(t2-t1) + " "+threshold4Maximum+"\n");
            # 	bufferedWriter.flush();
            # } catch (IOException e) {
            # 	// TODO Auto-generated catch block
            # 	e.printStackTrace();
            # }}

            # t1 = System.currentTimeMillis();
            clusterInput = np.extract(points > self.threshold4Maximum, points)
            # t2 = System.currentTimeMillis();
            #  // LOG.info("$: "+threshold4Maximum+", "+clusterInput.size);
            # LOG.info("KMeansSetPrepare: "+(t2-t1)+"\n");
            #   if(bufferedWriter!=null){
            #   try {
            # 	bufferedWriter.write("KMeansSetPrepare: "+(t2-t1)+"\n");
            # 	bufferedWriter.flush();
            # } catch (IOException e) {
            # 	// TODO Auto-generated catch block
            # 	e.printStackTrace();
            # }}

            # // initialize a new clustering algorithm.
            #  // we use KMeans++ with  clusters and x iterations maximum.
            #  // we did not specify a distance measure; the default (euclidean distance) is used.
            kmeansClusterNum = self.totalGroup - 1

            # //double[] centers= new double[kmeansClusterNum];
            # //ArrayList<Double> center = new ArrayList<Double>(kmeansClusterNum);
            center = []
            totalPoints = float(points.size)
            clusterPoints = clusterInput.size
            groupPercent = (totalPoints - clusterPoints) / totalPoints
            # //add
            center.append(
                Pair(groupPercent, float(self.threshold4Maximum), pctEntropy))
        # //LOG.info("center: "+ center[0].value);

        # ManhattanDistance md = new ManhattanDistance();
        kmeans = KMeans(n_clusters=kmeansClusterNum).fit(
            clusterInput.reshape(-1, 1))
        labels = kmeans.labels_
        centerV = kmeans.cluster_centers_.reshape(-1)
        # print(centerV)
        clusterResults = []

        # to do np.count()
        for i in range(kmeansClusterNum):
            tmp_clusterresult = np.extract(labels == i, clusterInput)
            clusterResults.append(tmp_clusterresult)

            # print('tmp clusterResults{}, max {}, min {}, mean {}, error rate {}', tmp_clusterresult,tmp_clusterresult.max(),tmp_clusterresult.min(), tmp_clusterresult.mean(), np.abs((tmp_clusterresult.max()-tmp_clusterresult.min())/tmp_clusterresult.min()))
        # centerV = kmeans.cluster_centers_

        # KMeansPlusPlusClusterer<oneDimData> clusterer = new KMeansPlusPlusClusterer<oneDimData>(kmeansClusterNum,maxIters,md);
        # //seed
        # RandomGenerator rg = clusterer.getRandomGenerator();
        # rg.setSeed(System.currentTimeMillis());

        # LOG.info("Cluster start! "+clusterInput.size);
        # t1 = System.currentTimeMillis();
        # //compute cluster
        # List<CentroidCluster<oneDimData>> clusterResults = clusterer.cluster(clusterInput);
        # LOG.info("Cluster completed!");
        for i in range(kmeansClusterNum):
            # //center
            # //cluster centers to do
            center.append(
                Pair(
                    float(clusterResults[i].size) / totalPoints, centerV[i],
                    self.entropy_bin(clusterResults[i])))
            print('Pair:',
                  float(clusterResults[i].size) / totalPoints, centerV[i],
                  self.entropy_bin(clusterResults[i]))
            # //LOG.info("center: "+ centerV.getPoint()[0]);
            # //all nodes

    # //		    	for(oneDimData oneD: clusterResults.get(i).getPoints()){
    # //		    		LOG.info(POut.toString(oneD.getPoint()));
    # //		    	}
    # 		}
    # //sort the array to do
        sorted_center = sorted(center,
                               key=lambda center_pair: center_pair.value)

        # 	t2 = System.currentTimeMillis();

        # 	LOG.info("KMeansDelay: "+(t2-t1)+"\n");
        # 	if(bufferedWriter!=null){
        # 	try {
        # 	bufferedWriter.write("KMeansDelay: "+(t2-t1)+"\n");
        # 	bufferedWriter.flush();
        # } catch (IOException e) {
        # 	// TODO Auto-generated catch block
        # 	e.printStackTrace();
        # }}

        return sorted_center
Exemplo n.º 18
0
    def __init__(self,
                 cur,
                 url,
                 rang=50,
                 f=1.002,
                 thrp=.02,
                 thrm=0,
                 mtq=[0, 0]):

        # An array of currency names
        self.curPairs = cur

        # The url is an ARRAY [] where 0 is the first bit
        # then we stop and continue after the curPair would be
        # added so that we have [0] + curPair + [1] for the
        # complete url. but our defURL will only have [0] and [1]
        self.defURL = url

        # The range or orderbook depth default 50
        self.range = rang

        # Default fee is .2%
        # we write as 1.002
        # This is a decimal from this point on
        self.fee = D(str(f))

        # throttles should be deleted

        # Default throttle is 0
        # this is time between calls
        self.throttle = thrm

        # we set this so that we can keep track
        self.throttle_updated = time.time()

        # Main orderbook pairs initialization process
        self.pairs = []
        for i, x in enumerate(self.curPairs):
            if x != 0:
                # set the inverted bit
                if self.pairKey(x) == 'usd':
                    self.pairs.append(
                        Pair(self.mname, x, self.range, mtq[i], thrp, inv=1)
                    )  # we make the currency pairs, thrp is the default pair throttle
                # non inverted, "normal"
                else:
                    self.pairs.append(
                        Pair(self.mname, x, self.range, mtq[i], thrp, inv=0)
                    )  # we make the currency pairs, thrp is the default pair throttle
            else:
                self.pairs.append(x)

        # same process for trades history
        self.trades = []
        for x in self.curPairs:
            if x != 0:
                self.trades.append(
                    Trades(self.mname, x, self.range, thrp)
                )  # we make the currency pairs, thrp is the default pair throttle

                # now we open the trade file and get the last trade
                try:
                    line = subprocess.check_output(
                        ['tail', '-1', 'logs/trades/' + self.mname + '/' + x])
                    # now parse the trade
                    y = line.split(',')
                    self.trades[-1].last_trade = [
                        D(str(y[0])),
                        D(str(y[1])),
                        int(str(y[2])),
                        int(str(y[3])), 'bid',
                        str(time.time())
                    ]
                except:
                    pass

            else:
                self.trades.append(x)

        # This is the balance sheet object
        self.balances = Balance()

        # this is the order array for an open "market maker" order
        self.open_order_mm = OrderList()

        # This indicates if the market is in some type of error
        # so we halt trading on it
        self.hold_trade = 0

        # This variable indicates that we interacted in a trade
        # so we should call the cancelorders/setBalance
        # to update our book.
        self.need_update = 0

        # Market was last alive at this point
        self.last_alive = time.time()

        # A way to count number of trades executed on this market
        self.totaltrades = 0

        # Last quantity is a way for threading execution to get
        # the last traded amount since python threads cant
        # return the value that well.
        self.last_quantity_traded = D('-1')

        # The initial nonce is loaded from a saved file. This SHOULD
        # be quite close to the last one, but just in case we
        # will increment by 100 at the start to make sure we are not off
        # this could change later.
        # nonces are stored in nonce/

        filename = "nonce/" + self.mname
        f = open(filename, 'r')

        self.initial_nonce = int(f.readline()) + 100
        self.current_nonce = 0

        f.close()

        self.writeNonce()

        # Function which gets initial balances for this market
        self.setBalance(initial=1)

        # skew for order placement, and order cancel
        self.skew_order = 10
        self.skew_cancel = 10

        # for competing on market making orders
        self.compete_price = []
        for x in self.curPairs:
            self.compete_price.append(D('0'))
Exemplo n.º 19
0
    def __init__(self, line, bg):
        prm = params.params()
        G = bg.network
        p = Pair.Pair(line)
        c = p.refinedCoords
        idx = p.Id
        color = p.colors

        p1 = c[0:2]
        p2 = c[2:4]
        p3 = c[4:6]
        p4 = c[6:]

        indp1 = (utils.toInd(p1[0], prm.lonMin, prm.lonTick),
                 utils.toInd(p1[1], prm.latMin, prm.latTick))
        indd1 = (utils.toInd(p2[0], prm.lonMin, prm.lonTick),
                 utils.toInd(p2[1], prm.latMin, prm.latTick))
        indp2 = (utils.toInd(p3[0], prm.lonMin, prm.lonTick),
                 utils.toInd(p3[1], prm.latMin, prm.latTick))
        indd2 = (utils.toInd(p4[0], prm.lonMin, prm.lonTick),
                 utils.toInd(p4[1], prm.latMin, prm.latTick))

        target = nx.connected_components(G)
        target = list(target)
        target = target[0]

        if indp1 not in target:
            indp1 = utils.approximate(indp1, target)

        if indd1 not in target:
            indd1 = utils.approximate(indd1, target)

        if indp2 not in target:
            indp2 = utils.approximate(indp2, target)

        if indd2 not in target:
            indd2 = utils.approximate(indd2, target)

        ashortest = nx.shortest_path(G, indp1, indd1)
        ashortest1 = nx.shortest_path(G, indd1, indp2)
        ashortest2 = nx.shortest_path(G, indp2, indd2)

        zz = [utils.mapItOn(z, bg.map_df) for z in ashortest]
        zz = np.asarray(zz)
        zz1 = [utils.mapItOn(z, bg.map_df) for z in ashortest1]
        zz1 = np.asarray(zz1)
        zz2 = [utils.mapItOn(z, bg.map_df) for z in ashortest2]
        zz2 = np.asarray(zz2)

        zp1 = bg.background.axes[0].plot(zz[:, 0],
                                         zz[:, 1],
                                         lw=20,
                                         c=color[0],
                                         alpha=1)
        zp2 = bg.background.axes[0].plot(zz1[:, 0],
                                         zz1[:, 1],
                                         lw=20,
                                         c=color[1],
                                         alpha=1)
        zp3 = bg.background.axes[0].plot(zz2[:, 0],
                                         zz2[:, 1],
                                         lw=20,
                                         c=color[2],
                                         alpha=1)

        bg.background.savefig('out.png', bbox_inches='tight', pad_inches=0)
        img = Image.open('out.png')
        img = img.resize((prm.step, prm.step),
                         Image.ANTIALIAS)  # resizes image in-place

        bg.background.axes[0].lines.pop(0)
        bg.background.axes[0].lines.pop(0)
        bg.background.axes[0].lines.pop(0)

        self.x = np.asarray(img)[:, :, :-1]
        self.y = 1 - p.type[0]
Exemplo n.º 20
0
common_words = []

email_fileName = 'emails.csv'
output = 'common_spam_tokens.txt'

# Open email file
with open(email_fileName) as f:
    reader = csv.reader(f, delimiter=',')
    for mail in reader:
        # Tokenize mail's subject
        tokens = nltk.wordpunct_tokenize(mail[0])[2:]

        for word in tokens:
            # Find word's index in common_words
            index = ult.find(common_words, word)

            # If common_words doesn't contains word and it's a spam mail
            if index == -1 and mail[1] == 'Spam':
                common_words.append(ult.Pair(word))

            # Else if mail is a spam mail, increase frequency counter, if not, decrease frequency counter
            else:
                common_words[index].addFreq(1 if mail[1] == 'Spam' else -1)

# Sort list in descending frequency order
common_words.sort(reverse=True)

# Write 2500 first items to output file
with open(output, 'w') as f:
    for i in range(0, 2500):