示例#1
1
 def send_to_db(self):
     conn = sqlite3.connect('data2.sqlite', timeout=30)
     c = conn.cursor()
     df = DataFrame(self.__dict__.items(), index=self.__dict__.keys())
     df = df.drop(0,1)
     df = df.transpose()
     df = df.sort(axis=1)
     df.to_sql('earnings_calendar', conn, if_exists='append', index=False)
示例#2
0
    def testDataFrame(self):
        df = DataFrame([[1, 2, 3], [4, 5, 6]], index=["a", "b"], columns=["x", "y", "z"])

        # column indexed
        outp = DataFrame(ujson.decode(ujson.encode(df)))
        self.assertTrue((df == outp).values.all())
        assert_array_equal(df.columns, outp.columns)
        assert_array_equal(df.index, outp.index)

        outp = DataFrame(**ujson.decode(ujson.encode(df, orient="split")))
        self.assertTrue((df == outp).values.all())
        assert_array_equal(df.columns, outp.columns)
        assert_array_equal(df.index, outp.index)

        outp = DataFrame(ujson.decode(ujson.encode(df, orient="records")))
        outp.index = df.index
        self.assertTrue((df == outp).values.all())
        assert_array_equal(df.columns, outp.columns)

        outp = DataFrame(ujson.decode(ujson.encode(df, orient="values")))
        outp.index = df.index
        self.assertTrue((df.values == outp.values).all())

        outp = DataFrame(ujson.decode(ujson.encode(df, orient="index")))
        self.assertTrue((df.transpose() == outp).values.all())
        assert_array_equal(df.transpose().columns, outp.columns)
        assert_array_equal(df.transpose().index, outp.index)
示例#3
0
    def testDataFrame(self):
        df = DataFrame([[1,2,3], [4,5,6]], index=['a', 'b'], columns=['x', 'y', 'z'])

        # column indexed
        outp = DataFrame(ujson.decode(ujson.encode(df)))
        self.assertTrue((df == outp).values.all())
        assert_array_equal(df.columns, outp.columns)
        assert_array_equal(df.index, outp.index)

        dec = _clean_dict(ujson.decode(ujson.encode(df, orient="split")))
        outp = DataFrame(**dec)
        self.assertTrue((df == outp).values.all())
        assert_array_equal(df.columns, outp.columns)
        assert_array_equal(df.index, outp.index)

        outp = DataFrame(ujson.decode(ujson.encode(df, orient="records")))
        outp.index = df.index
        self.assertTrue((df == outp).values.all())
        assert_array_equal(df.columns, outp.columns)

        outp = DataFrame(ujson.decode(ujson.encode(df, orient="values")))
        outp.index = df.index
        self.assertTrue((df.values == outp.values).all())

        outp = DataFrame(ujson.decode(ujson.encode(df, orient="index")))
        self.assertTrue((df.transpose() == outp).values.all())
        assert_array_equal(df.transpose().columns, outp.columns)
        assert_array_equal(df.transpose().index, outp.index)
示例#4
0
文件: check.py 项目: whouses/hardware
def network_perf(systems, unique_id, group_number, detail_options,
                 rampup_value=0, current_dir=""):
    have_net_data = False
    sets = search_item(systems, unique_id, "network", r"(.*)", [], [])
    modes = ['bandwidth', 'requests_per_sec']
    for mode in sorted(modes):
        results = {}
        for system in sets:
            net = []
            series = []
            global_perf = 0.0
            for perf in sets[system]:
                if perf[1] == mode:
                    if not perf[1] in net:
                        net.append(perf[1])
                    global_perf = global_perf + float(perf[3])

            series.append(global_perf)
            results[system] = Series(series, index=net)

        df = DataFrame(results)
        details = []
        matched_category = []
        for net in df.transpose().columns:
            if have_net_data is False:
                print()
                print("Group %d : Checking network disks perf" % group_number)
                have_net_data = True
            consistent = []
            curious = []
            unstable = []
            # How much the variance could be far from the average (in %)
            tolerance_max = 15
            tolerance_min = 2

            print_perf(tolerance_min, tolerance_max, df.transpose()[net], df,
                       mode, net, consistent, curious, unstable, "",
                       rampup_value, current_dir)
            if mode == 'bandwidth':
                unit = "MB/sec"
            else:
                unit = "RRQ/sec"
            prepare_detail(detail_options, group_number, mode, net, details,
                           matched_category)
            print_summary("%-30s %s" % (mode, net), consistent, "consistent",
                          unit, df)
            print_summary("%-30s %s" % (mode, net), curious, "curious", unit,
                          df)
            print_summary("%-30s %s" % (mode, net), unstable, "unstable",
                          unit, df)

        print_detail(detail_options, details, df, matched_category)
示例#5
0
    def unMap(self,X, Y, ycat):
        newX = []
        newY = []

        for i in range(X.shape[0]):
            # x = self.cats[0].categories[X[i]]
            # y = ycat.categories[Y[i]]
            # x = "ISIN%d" % (X[i])
            y = "Stripe %d" % (Y[i])
            # newX.append(x)
            newY.append(y)

        dataFrame = DataFrame([X, newY])

        dataFrame.transpose().to_csv("output.csv", index=False, header=["ISIN", "Risk_Stripe"])
示例#6
0
def from_json_to_dataframe():
    results = json.load(open('./networks/first_level_analysis.json','r'))
    df = DataFrame(results)
    df.to_csv("panels.csv")
    dft = df.transpose()
    dft.to_csv("panels_trans.csv")
    return df
示例#7
0
def analyze_first_level_panels():
    results = {}
    
    for d in first_level_topic_list:
        print "\n*********DESCRIPTOR: " + first_level_topic_list[d] + "(" + str(d) + ")"
        G = build_panel_network_by_descriptor(d)
        print "\nDESCRIPTOR: " + first_level_topic_list[d] + "(" + str(d) + ")"
        print "Nodes:", G.number_of_nodes()
        print "Edges:", G.number_of_edges()
        res_clique = analize_cliques(G)
        res_degree = analize_degrees(G)
        res_weight = analize_edges(G)
        d_final = dict(res_clique)
        d_final.update(res_degree)
        d_final.update(res_weight)
        d_final['id'] = d
        d_final['avg_clustering'] = nx.average_clustering(G)
        results[first_level_topic_list[d]] = d_final
        
    print "Writing json..."
    json.dump(results, open('./networks/first_level_panels_analysis.json','w'), indent = 2)
    print "Writing csvs..."
    df = DataFrame(results)
    df.to_csv('./networks/first_level_panels_analysis.csv')
    dfinv = df.transpose()
    dfinv.to_csv('./networks/first_level_panels_analysis_inv.csv')
示例#8
0
文件: PLM_plot.py 项目: marlonbetz/ML
def plot_phonemes(path):
    phoneme_embeddings = dict()
    for line in codecs.open(path,"r"):
        line = line.split(",")
        key= line[0][1:-1]
        emb = line[1:]
        emb[-1] = emb[-1][:-1]
        emb = np.array([float(e) for e in emb])
        phoneme_embeddings[key] = emb
    
    phoneme_embeddings = DataFrame(phoneme_embeddings,columns=phoneme_embeddings.keys())
    print(phoneme_embeddings.columns)
    
    m = TSNE()
    phoneme_embeddings_tsne = m.fit_transform(phoneme_embeddings.transpose())
    print(len(phoneme_embeddings_tsne))
    for p,emb in zip(phoneme_embeddings.columns, phoneme_embeddings_tsne):
        c = "black"
        if regex.search("^[aeiou3E][*]?$", p):
            c = "red"
            plt.annotate(p,(emb[0],emb[1]),color=c)
        if regex.search("^.*w~$", p):
            c = "blue"
            plt.annotate(p,(emb[0],emb[1]),color=c)
        if regex.search("^.*y~$", p):
            c = "yellow"
            plt.annotate(p,(emb[0],emb[1]),color=c)
        if regex.search("^.*h~$", p):
            c = "brown"
            plt.annotate(p,(emb[0],emb[1]),color=c)
        if regex.search("^.*\"$", p):
            c = "green"
            plt.annotate(p,(emb[0],emb[1]),color=c)
示例#9
0
 def diagnostic_table(self, num_years=5, base_calc=None):
     table = []
     row_years = []
     calc = copy.deepcopy(self)
     base_calc = copy.deepcopy(base_calc)
     for i in range(0, num_years):
         has_behavior = (calc.behavior.BE_sub or calc.behavior.BE_inc or
                         calc.behavior.BE_CG_per)
         if has_behavior:
             base_calc.calc_all()
             behavior_calc = behavior(base_calc, calc)
             behavior_calc.diagnostic_table_items(table)
         else:
             calc.calc_all()
             calc.diagnostic_table_items(table)
         row_years.append(calc.policy.current_year)
         if i < num_years - 1:
             calc.increment_year()
             if base_calc is not None:
                 base_calc.increment_year()
     df = DataFrame(table, row_years,
                    ["Returns (#m)", "AGI ($b)", "Itemizers (#m)",
                     "Itemized Deduction ($b)",
                     "Standard Deduction Filers (#m)",
                     "Standard Deduction ($b)", "Personal Exemption ($b)",
                     "Taxable income ($b)", "Regular Tax ($b)",
                     "AMT income ($b)", "AMT amount ($b)",
                     "AMT number (#m)", "Tax before credits ($b)",
                     "refundable credits ($b)",
                     "nonrefundable credits ($b)",
                     "Misc. Surtax ($b)",
                     "Ind inc tax ($b)", "Payroll tax ($b)"])
     df = df.transpose()
     pd.options.display.float_format = '{:8,.1f}'.format
     return df
示例#10
0
    def test_dataframe(self, orient, numpy):
        if orient == "records" and numpy:
            pytest.skip("Not idiomatic pandas")

        df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[
            "a", "b"], columns=["x", "y", "z"])
        encode_kwargs = {} if orient is None else dict(orient=orient)
        decode_kwargs = {} if numpy is None else dict(numpy=numpy)

        output = ujson.decode(ujson.encode(df, **encode_kwargs),
                              **decode_kwargs)

        # Ensure proper DataFrame initialization.
        if orient == "split":
            dec = _clean_dict(output)
            output = DataFrame(**dec)
        else:
            output = DataFrame(output)

        # Corrections to enable DataFrame comparison.
        if orient == "values":
            df.columns = [0, 1, 2]
            df.index = [0, 1]
        elif orient == "records":
            df.index = [0, 1]
        elif orient == "index":
            df = df.transpose()

        tm.assert_frame_equal(output, df, check_dtype=False)
示例#11
0
 def diagnostic_table(self, num_years=5, base_calc=None):
     table = []
     row_years = []
     calc = copy.deepcopy(self)
     base_calc = copy.deepcopy(base_calc)
     for i in range(0, num_years):
         if calc.behavior.has_response():
             base_calc.calc_all()
             behavior_calc = Behavior.response(base_calc, calc)
             behavior_calc.diagnostic_table_items(table)
         else:
             calc.calc_all()
             calc.diagnostic_table_items(table)
         row_years.append(calc.policy.current_year)
         if i < num_years - 1:
             calc.increment_year()
             if base_calc is not None:
                 base_calc.increment_year()
     df = DataFrame(table, row_years,
                    ['Returns (#m)', 'AGI ($b)', 'Itemizers (#m)',
                     'Itemized Deduction ($b)',
                     'Standard Deduction Filers (#m)',
                     'Standard Deduction ($b)', 'Personal Exemption ($b)',
                     'Taxable income ($b)', 'Regular Tax ($b)',
                     'AMT income ($b)', 'AMT amount ($b)',
                     'AMT number (#m)', 'Tax before credits ($b)',
                     'refundable credits ($b)',
                     'nonrefundable credits ($b)',
                     'Misc. Surtax ($b)',
                     'Ind inc tax ($b)', 'Payroll tax ($b)',
                     'Combined liability ($b)'])
     df = df.transpose()
     pd.options.display.float_format = '{:8,.1f}'.format
     return df
示例#12
0
def make_league_df():
    from pandas import DataFrame
    from members import import_teams
    div_teams = import_teams()
    div_bins = load_all_substitute_bins()
    bins = sum(div_bins, [])
    targets = ['Jessica', 'Lexie']
    results = {}
    for target in targets:
        for bin_num, bin in enumerate(bins):
            for person in bin:
                if target in person:
                    break
        if target in results:
            break
    from pprint import pprint
    people = {}
    def find_team (name, teams):
        for team_idx, team in enumerate(teams):
            for person in team:
                if name in person:
                    return team_idx
        return -1
    for div_idx, (bins, teams) in enumerate(zip(div_bins, div_teams)):
        for bin_idx, bin in enumerate(bins):
            for person in bin:
                team_idx = find_team(person, teams)
                int_team = find_team(person, div_teams[1])
                people[person] = {'div': div_idx, 'bin': bin_idx,
                                  'team': team_idx, 'int_team': int_team}
    df = DataFrame(people)
    df = df.transpose()
    comp_busy = [4, 6, 7]  # really, 5, 7, 8
    int_busy = [11, 9, 1]  # really, 12, 10, 2

    print('\n\nbusy')
    brian_int = df.loc[df['bin'] == 0].loc[df['div'] == 2].loc[
        df['int_team'].isin(int_busy)]
    lexies = df.loc[df['bin'] == 1].loc[df['div'] == 2].loc[
        df['team'].isin(comp_busy)]
    jeses = df.loc[df['bin'] == 0].loc[df['div'] == 2].loc[
        df['team'].isin(comp_busy)]
    pprint(", ".join(brian_int.index.values))
    pprint(", ".join(lexies.index.values))
    pprint(", ".join(jeses.index.values))

    print('\n\nyes')
    brian_int = df.loc[df['bin']==0].loc[df['div']==2].loc[~df['int_team'].isin(int_busy)]
    lexies = df.loc[df['bin']==1].loc[df['div']==2].loc[~df['team'].isin(comp_busy)]
    jeses = df.loc[df['bin']==0].loc[df['div']==2].loc[~df['team'].isin(comp_busy)]
    pprint(len(brian_int))
    pprint(len(lexies))
    pprint(len(jeses))
    print(", ".join(brian_int.index.values))
    print(", ".join(lexies.index.values))
    print(", ".join(jeses.index.values))
示例#13
0
    def testDataFrameNumpy(self):
        df = DataFrame([[1, 2, 3], [4, 5, 6]], index=["a", "b"], columns=["x", "y", "z"])

        # column indexed
        outp = DataFrame(ujson.decode(ujson.encode(df), numpy=True))
        self.assertTrue((df == outp).values.all())
        assert_array_equal(df.columns, outp.columns)
        assert_array_equal(df.index, outp.index)

        dec = _clean_dict(ujson.decode(ujson.encode(df, orient="split"), numpy=True))
        outp = DataFrame(**dec)
        self.assertTrue((df == outp).values.all())
        assert_array_equal(df.columns, outp.columns)
        assert_array_equal(df.index, outp.index)

        outp = DataFrame(ujson.decode(ujson.encode(df, orient="index"), numpy=True))
        self.assertTrue((df.transpose() == outp).values.all())
        assert_array_equal(df.transpose().columns, outp.columns)
        assert_array_equal(df.transpose().index, outp.index)
示例#14
0
 def parse_data_model(self, full_df):
     data_model = {}
     levels = ['specimens', 'samples', 'sites', 'locations',
               'ages', 'measurements', 'criteria', 'contribution']
     for level in levels:
         df = DataFrame(full_df['tables'][level]['columns'])
         data_model[level] = df.transpose()
     # replace np.nan with None
     data_model[level] = data_model[level].where((pd.notnull(data_model[level])), None)
     return data_model
示例#15
0
def append_village_areas(divname):
    im_vil = pd.read_csv('../data/%s_village_images.csv' % divname.lower())
    shape_helper = ShapeHelper('../data/shapefiles/fixed_village_shapefiles/%s/%s.shp' % (divname.lower(), divname.lower()),
                               lat_offset, lon_offset)
    areas = shape_helper.get_shape_areas('village')
    areas_df = DataFrame(areas, index=['area'])
    areas_df = areas_df.transpose()
    areas_df.reset_index(inplace=True)
    areas_df.rename(columns={'index': 'village'}, inplace=True)
    im_vil_areas = pd.merge(im_vil, areas_df, how='left')
    im_vil_areas.set_index('image', inplace=True)
    im_vil_areas.to_csv('../data/%s_village_areas_images.csv' % divname.lower())
示例#16
0
def annotate(ann, ccols, ocols, clust, c):
    to_add = open(ann, 'r')
    head = next(to_add)
    head = head.rstrip('\n')
    bids = head.split('\t')
    # SHOULD HAVE ROW HEADERS
    Cols = bids[1:]
    maps = ('Reds', 'Reds', 'Greys', 'Greens')
    k = 0
    annot = []
    for line in to_add:
        line = line.rstrip('\n')
        data = line.split('\t')
        to_map = data[1:]
        rmap = []
        newCols = []
        # reorg data to match cluster

        for i in ccols:
            rmap.append(to_map[Cols.index(ocols[i])])
            newCols.append(ocols[i])
        rmap = np.asarray(rmap)
        Rows = []
        Rows.append(data[0])
        # flag if qualitative
        q = 0
        if isint(rmap[0]):
            rmap = rmap.astype(np.float)
        else:
            q = 1
            qdict = {}
            j = 0
            for i in xrange(0, len(rmap), 1):
                if rmap[i] not in qdict:
                    qdict[rmap[i]] = j
                    sys.stderr.write(str(j) + ' ' + rmap[i] + '\n')
                    j += 1
                rmap[i] = qdict[rmap[i]]
            rmap = rmap.astype(np.float)
        df = DataFrame(rmap, index=ocols, columns=Rows)

        df = df.transpose()
        new, cur = plt.subplots()
        cur = sns.heatmap(df, cmap=maps[k], rasterized=True)

        new.set_figheight(2)
        new.set_figwidth(c)
        new.set_dpi(600)
        cur.set_xticklabels(newCols, rotation=90)
        new.savefig('test' + str(k) + '.pdf')
        annot.append(new)
        k += 1
    return annot
示例#17
0
 def parse_data_model(self, full_df):
     """
     Format the data model into a dictionary of DataFrames.
     """
     data_model = {}
     levels = ['specimens', 'samples', 'sites', 'locations',
               'ages', 'measurements', 'criteria', 'contribution',
               'images']
     criteria_map = DataFrame(full_df['criteria_map'])
     for level in levels:
         df = DataFrame(full_df['tables'][level]['columns'])
         data_model[level] = df.transpose()
     # replace np.nan with None
     data_model[level] = data_model[level].where((pd.notnull(data_model[level])), None)
     return data_model, criteria_map
示例#18
0
def file_prep(file):
    df = DataFrame(read_csv(file, sep = '\t'))
    df.drop(df[df.apply(allele_count, axis = 1) != 2].index, inplace = True)
    major_freqs = df.apply(major_prop_find, axis = 1)
    major_alleles = df.apply(major_find, axis =1 )
    df.insert(3,'major_freqs', major_freqs)
    df.insert(3,'major_alleles', major_alleles)
    df = df.transpose()
    
    
    chrom, chrom_idx = np.unique(df.loc['chrom'], return_index=True)
    
    super_missing_df = df == '.'
    
    chromosome_dict = {}
    for number in np.unique(df.loc['chrom']):
        chromosome_dict[number] = df.loc['chrom'][df.loc['chrom'] == number].index
    return df, super_missing_df, chromosome_dict
示例#19
0
def main():
    train_set = create_dataset(N)
    test_set = create_dataset(N)
    df_ws = DataFrame()

    fig = plt.figure()
    for c, m in enumerate(M):
        f, ws = resolve(train_set, m)
        df_ws = df_ws.append(Series(ws, name="M=%d" % m))

        subplot = fig.add_subplot(2, 2, c + 1)
        subplot.set_xlim(-0.05, 1.05)
        subplot.set_ylim(-1.5, 1.5)
        subplot.set_title("M=%d" % m)

        subplot.scatter(train_set.x, train_set.y, marker='o', color='blue')

        linex = np.linspace(0, 1, 101)
        liney = np.sin(2 * np.pi * linex)
        subplot.plot(linex, liney, color='green', linestyle='--')

        linex = np.linspace(0,1,101)
        # like probabilty density function
        liney = f(linex)
        label = "E(RMS)=%.2f" % rmse(train_set, f)
        subplot.plot(linex, liney, color='red', label=label)
        subplot.legend(loc=1)

    print("Table of the coeffcients")
    print(df_ws.transpose())
    fig.show()

    df = DataFrame()
    for m in range(0, 10):
        f, ws = resolve(train_set, m)
        train_error = rmse(train_set, f)
        test_error = rmse(test_set, f)
        df = df.append(
                Series([train_error, test_error], index=['Training set', 'Test set']),
                ignore_index=True)

    df.plot(title='RMS Error', style=['-', '--'], grid=True, ylim=(0, 0.9))
    plt.show()
示例#20
0
    def apply(self, transforms, axis=0):
        if isinstance(transforms, Transform) \
        or \
        (isinstance(transforms, type) and issubclass(transforms, Transform)):
            transform = transforms #only a single object passed (not a list)
            return transform.__eapply__(self)

        elif isinstance(transforms, (types.FunctionType, types.BuiltinFunctionType, functools.partial)):
            func = transforms #only a single object passed (not a list)
            transformed_data_df = DataFrame(self.data_df.apply(func, axis=axis))

            #transpose to return the samples as column namess rather than row names
            if axis == 0 : transformed_data_df = transformed_data_df.transpose()

            return self.with_data_df(transformed_data_df)

        elif isinstance(transforms, list):
            transformed_exp = self
            for transform in transforms:
                transformed_exp = transform.__eapply__(transformed_exp)
            return transformed_exp

        else:
            raise NotImplementedError
示例#21
0
def save(filename, results):
    filename += '.xlsx'
    df = DataFrame(data=results)
    # print(df)
    df = df.transpose()
    df.to_excel(filename, sheet_name='sheet1', index=False, header=False)
示例#22
0
    def buy6030(self, sym, direction="Bull", exp="", budget=500):

        if direction == "Bull":
            right = "Put"
        else:
            right = "Call"

        if exp == "":
            d = datetime.date.today()
            d += datetime.timedelta(10)
            while d.weekday() != 4:
                d += datetime.timedelta(1)
            exp = d.strftime("%Y%m%d")

        contract1 = IBcontract()
        contract1.secType = "STK"
        contract1.symbol = sym
        contract1.exchange = "ISLAND"

        contract2 = IBcontract()
        contract2.secType = "OPT"
        contract2.symbol = sym
        contract2.exchange = "SMART"
        contract2.lastTradeDateOrContractMonth = exp
        contract2.right = right
        contract2.multiplier = 100

        self.reqMktData(1032, contract1, "", False, False, [])
        contract1.exchange = "SMART"
        self.reqMktData(1033, contract1, "", False, False, [])
        d = self.reqContractDetails(1202, contract2)
        time.sleep(1)
        #print(d)

        print("=" * 40)
        print()
        print("{} Price Details:".format(sym))
        lastPrice = None
        try:
            for k in list(self._my_price_details[1032].queue):
                t = dict(k)
                if t['tickType'] == 4:
                    lastPrice = t['price']
                if t['tickType'] == 9 and lastPrice == None:
                    lastPrice = t['price']
                print(t)
        except:
            try:
                for k in list(self._my_price_details[1033].queue):
                    t = dict(k)
                    if t['tickType'] == 4:
                        lastPrice = t['price']
                    if t['tickType'] == 9 and lastPrice == None:
                        lastPrice = t['price']
                    print(t)
            except:
                print("No stock prices available for {} at this time.".format(
                    sym))
                return

        if lastPrice == None:
            print("No stock prices available for {} at this time.".format(sym))
            return

        # print()
        # print("{0} Last Price: ${1:4.2f}".format(sym, lastPrice))
        # print()

        rID = 1100
        df = DataFrame()
        print("Contract Details:")
        try:
            cDetails = self._my_contract_details[1202].queue
        except:
            print("Contract details for {} are not available at this time.".
                  format(sym))
            return
        for k in list(cDetails):
            t = list(str(k).split(','))
            # print(t)
            try:
                if lastPrice * 1.10 > float(t[4]) > lastPrice * 0.90:
                    df[rID] = t
                    contract3 = IBcontract()
                    contract3.secType = "OPT"
                    contract3.symbol = sym
                    contract3.exchange = "CBOE2"
                    contract3.lastTradeDateOrContractMonth = exp
                    contract3.strike = float(t[4])
                    contract3.right = right
                    contract3.multiplier = 100
                    self.reqMarketDataType(2)
                    self.reqMktData(rID, contract3, "", False, False, [])
                    rID = rID + 1
            except:
                pass
        if rID == 1100:
            print(
                "No option prices available for {} at this time.".format(sym))
            return

        df = df.transpose()
        # print(df)
        # print("Getting option details for {0:2d} strikes:".format(len(df)))
        # print()

        time.sleep(1)

        df['undPrice'] = [""] * len(df)
        df['optPrice'] = [""] * len(df)
        df['delta'] = [""] * len(df)
        df['strike'] = [""] * len(df)
        df['delta60'] = [""] * len(df)
        for s in df.index:
            #self.cancelMktData(s)
            try:
                for k in list(self._my_option_data[s].queue):
                    t = dict(k)
                    #print(s,t)
                    if t['delta']:
                        try:
                            df.loc[s, 'conId'] = int(df.loc[s, 0])
                            df.loc[s, 'strike'] = float(df.loc[s, 4])
                            df.loc[s, 'undPrice'] = t['undPrice']
                            df.loc[s, 'optPrice'] = t['optPrice']
                            df.loc[s, 'delta'] = abs(t['delta'])
                            df.loc[s, 'delta60'] = abs(abs(t['delta']) - 0.60)
                        except:
                            pass
            except:
                print("No option prices available for {} at this time.".format(
                    sym))
                return

        # print(df.loc[:,['conId',3,'strike','undPrice','delta','delta60']].sort_values(['strike']))
        # print()
        d60 = df.loc[df['delta60'] == df['delta60'].min()].index.min()
        # print("Sell a {} with the {:7.2f} strike".format(right,df.strike[d60]))

        t30 = (df.delta[d60] - 0.3)
        p = df.loc[df.delta > t30].delta.min()
        d30plus = df.loc[df.delta == p].index.min()
        m = df.loc[df.delta < t30].delta.max()
        d30min = df.loc[df.delta == m].index.min()
        if abs(df.delta[d30plus] - t30) > abs(df.delta[d30min] - t30):
            d30 = d30min
        else:
            d30 = d30plus

        # Order variables
        #####
        cdelta = df.delta[d60] - df.delta[d30]
        lim = abs(df.strike[d60] - df.strike[d30]) * 0.35
        try:
            cOptPrice = df.optPrice[d60] - df.optPrice[d30]
            if abs(cOptPrice) < abs(lim * 0.95):
                print("Spread Combo price for {} is too low.".format(sym))
                return True
            quantity = int(budget / 100 / cOptPrice)
            if quantity == 0:
                print("Spread Combo for {} is above the budget of ${}".format(
                    sym, budget))
                return True
        except:
            quantity = 1
        takeProfitLimitPrice = lim * 0.
        stopLossPrice = lim * 1.50
        action = "SELL"
        #parentOrderId = 101

        # print("Buy a {} with the  {:7.2f} strike ".format(right,df.strike[d30]))
        # print("Combo delta is {:5.3f}".format(cdelta))
        # print("Combo limit price is ${:7.2f}".format(lim))
        # print("Combo Expiry is {}".format(exp))
        # print()
        print(
            "{} - Price: ${:7.2f} - Sell a {} {:7.2f}/{:7.2f} {} Spread - Limit price: ${:5.2f} - Combo delta: {:5.3f}"
            .format(sym, lastPrice, exp, df.strike[d60], df.strike[d30], right,
                    lim, cdelta))

        #
        #  Send order for the Spread above
        ####

        contract3 = IBcontract()
        contract3.secType = "BAG"
        contract3.symbol = sym
        contract3.exchange = "SMART"
        contract3.currency = "USD"

        leg1 = IBcomboLeg()
        leg1.conId = int(df.conId[d60])  # Sell the delta 60 option
        leg1.ratio = 1
        leg1.action = "SELL" if action == "BUY" else "BUY"
        leg1.exchange = "SMART"

        leg2 = IBcomboLeg()
        leg2.conId = int(
            df.conId[d30])  # Buy the delta 30 option as protection
        leg2.ratio = 1
        leg2.action = "BUY" if action == "BUY" else "SELL"
        leg2.exchange = "SMART"

        contract3.comboLegs = []
        contract3.comboLegs.append(leg1)
        contract3.comboLegs.append(leg2)

        order3 = Order()
        order3.action = action
        order3.orderType = "LMT"
        order3.totalQuantity = quantity
        order3.lmtPrice = lim
        order3.tif = 'DAY'
        order3.transmit = False

        parentOrderId = self.place_new_IB_order(contract3,
                                                order3,
                                                orderid=None)

        takeProfit = Order()
        takeProfit.action = "SELL" if action == "BUY" else "BUY"
        takeProfit.orderType = "LMT"
        takeProfit.totalQuantity = quantity
        takeProfit.lmtPrice = takeProfitLimitPrice
        takeProfit.parentId = parentOrderId
        takeProfit.tif = 'GTC'
        takeProfit.transmit = False
        self.place_new_IB_order(contract3, takeProfit, orderid=None)

        stopLoss = Order()
        stopLoss.action = "SELL" if action == "BUY" else "BUY"
        stopLoss.orderType = "STP"
        # Stop trigger price
        stopLoss.auxPrice = stopLossPrice
        stopLoss.totalQuantity = quantity
        stopLoss.parentId = parentOrderId
        stopLoss.tif = 'GTC'
        # In this case, the low side order will be the last child being sent. Therefore, it needs to set this attribute to True
        # to activate all its predecessors
        stopLoss.transmit = True
        self.place_new_IB_order(contract3, stopLoss, orderid=None)
        time.sleep(1)
        return True
示例#23
0
def ComputeMetrics1(stats, filename):
    """
    DESCRIPTION
    
    :Parameters:
        NAME : TYPE
            DESCRIPTIOIN
        
    :Return:
        DESCRIPTION
    """

    data = {}

    for article in stats:
        metrics = {}
        temp = {}
        
        title = article['article-title']
        
        # get metrics from data
        allActions = GetMetric(article, 'total-actions')
        number_tokens = GetMetric(article, 'number-tokens')
        maintainanceTag = GetMetric(article, 'tag-maintained')

        # split metrics between maintainer and others
        addsMaintainer, addsOthers = SplitMO(article, maintainers[index], 'tokens-added')
        deletesMaintainer, deletesOthers = SplitMO(article, maintainers[index], 'tokens-deleted')
        revertsMaintainer, revertsOthers = SplitMO(article, maintainers[index], 'tokens-reverted')
        antActionsMaintainer, antActionsOthers = SplitMO(article, maintainers[index], 'antagonistic-actions')
        reintroMaintainer, reintroOthers = SplitMO(article, maintainers[index], 'tokens-reintroduced')
        selfreintroMaintainer, selfreintroOthers = SplitMO(article, maintainers[index], 'tokens-self-reintroduced')
        talkpageMaintainer, talkpageOthers = SplitMO(article, maintainers[index], 'talkpage-edits')

        # BLABLA
        ownershipMaintainerAbs = GetOwnership(article, maintainers_id[index], 'tokens-absolute')
        ownershipMaintainerRel = GetOwnership(article, maintainers_id[index], 'tokens-relative')
        
        # get properties of article
        metrics['firstMaintRev'] = GetFirstMaintainedRev(maintainanceTag)
        metrics['maintainer-name'] = article['maintainer-name']
        metrics['maintainer-id'] = article['maintainer-id']
        metrics['all-actions'] = sum(allActions)
        metrics['edits-maintainer'] = len(addsMaintainer)
        metrics['edits-others'] = len(addsOthers)
        metrics['number-revisions'] = metrics['edits-maintainer'] + metrics['edits-others']
        
        # temporal comparison
        TempCompare()


        # to relativize with edits is just an assumptions to have something.
        if talkpageOthers:
            #metrics['talkPageRatio'] = sum(talkpageMaintainer) / float(metrics['edits-maintainer']) / float( sum(talkpageOthers) / float(metrics['edits-others']) )
            metrics['talkPageRatio'] = sum(talkpageMaintainer) / float(sum(talkpageOthers))
        else:
            metrics['talkPageRatio'] = 0

        # if metrics['all-actions'] is 0:
        #     metrics['addsMaintainerAvg'] = 0
        #     metrics['addsOthersAvg'] = 0
        #     metrics['addsRatio'] = 0
            
        #     metrics['deletesMaintainerRel'] = 0
        #     metrics['deletesOthersRel'] = 0
        #     metrics['deletesRatio'] = 0
            
        #     metrics['revertsMaintainerRel'] = 0
        #     metrics['revertsOthersRel'] = 0
        #     metrics['revertsRatio'] = 0

        #     metrics['reintroMaintainerAvg'] = 0
        #     metrics['reintroOthersAvg'] = 0
        #     metrics['selfreintroMaintainerAvg'] = 0
        #     metrics['selfreintroOthersAvg'] = 0
        #     metrics['selfreintroRatio'] = 0

        #     metrics['antActionsMaintainerAvg'] = 0
        #     metrics['antActionsOthersAvg'] = 0
        #     metrics['negActionsRatio'] = 0

        #     metrics['targetedIntroRatio'] = 0
        # metrics['addsMaintainerRel'] = sum(addsMaintainer)/float(metrics['all-actions'])
        # metrics['addsOthersRel'] = sum(addsOthers)/float(metrics['all-actions'])
        # metrics['addsRatio'] = metrics['addsMaintainerRel'] / float(metrics['addsOthersRel'])
        # metrics['deletesMaintainerRel'] = sum(deletesMaintainer)/float(metrics['all-actions'])
        # metrics['deletesOthersRel'] = sum(deletesOthers)/float(metrics['all-actions'])
        # metrics['deletesRatio'] = metrics['deletesMaintainerRel'] / float(metrics['deletesOthersRel'])
        # metrics['revertsMaintainerRel'] = sum(revertsMaintainer)/float(metrics['all-actions'])
        # metrics['revertsOthersRel'] = sum(revertsOthers)/float(metrics['all-actions'])
        # metrics['revertsRatio'] = metrics['revertsMaintainerRel'] / float(metrics['revertsOthersRel'])
        # metrics['reintroMaintainerRel'] = sum(reintroMaintainer)/float(metrics['all-actions'])
        # metrics['reintroOthersRel'] = sum(reintroOthers)/float(metrics['all-actions']) 
        # metrics['selfreintroMaintainerRel'] = sum(selfreintroMaintainer)/float(metrics['all-actions'])
        # metrics['selfreintroOthersRel'] = sum(selfreintroOthers)/float(metrics['all-actions']) 
        # if metrics['selfreintroOthersAvg'] == 0:
        #     metrics['selfreintroRatio'] = 0
        # else:
        #     metrics['selfreintroRatio'] = metrics['selfreintroMaintainerAvg'] / float(metrics['selfreintroOthersAvg'])
        # if metrics['antActionsOthersAvg'] == 0:
        #     metrics['antActionsRatio'] = 0
        # else:
        #     metrics['antActionsRatio'] = metrics['antActionsMaintainerAvg'] / float(metrics['antActionsOthersAvg'])
        
        # if metrics['reintroMaintainerAvg'] == 0 or metrics['selfreintroOthersAvg'] == 0 or metrics['reintroOthersAvg'] == 0:
        #     metrics['targetedIntroRatio'] = 0
        #     metrics['targetedIntroRatio2Ownership'] = 0
        # else:
        #     metrics['targetedIntroRatio'] = (metrics['selfreintroMaintainerAvg'] / float(metrics['reintroMaintainerAvg'])) \
        #         / float((metrics['selfreintroOthersAvg'] / float(metrics['reintroOthersAvg'])))
        #     #metrics['targetedIntroRatio2Ownership'] = (metrics['selfreintroMaintainerRel'] / float(metrics['reintroMaintainerRel'])) \
        #     #    / float((metrics['selfreintroOthersRel'] / float(metrics['reintroOthersRel'])))

        metrics['addsMaintainerAvg'] = sum(addsMaintainer)/float(metrics['edits-maintainer'])
        metrics['addsOthersAvg'] = sum(addsOthers)/float(metrics['edits-others'])
        metrics['addsRatio'] = metrics['addsMaintainerAvg'] / float(metrics['addsOthersAvg'])
        metrics['reintroMaintainerAvg'] = sum(reintroMaintainer) / float(metrics['edits-maintainer'])
        metrics['reintroOthersAvg'] = sum(reintroOthers) / float(metrics['edits-others']) 
        metrics['reintroRatio'] = metrics['reintroMaintainerAvg'] / float(metrics['reintroOthersAvg'])
        metrics['selfreintroMaintainerAvg'] = sum(selfreintroMaintainer) / float(metrics['edits-maintainer'])
        metrics['selfreintroOthersAvg'] = sum(selfreintroOthers) / float(metrics['edits-others']) 
        metrics['selfreintroRatio'] = metrics['selfreintroMaintainerAvg'] / float(metrics['selfreintroOthersAvg'])
        metrics['antActionsMaintainerAvg'] = sum(antActionsMaintainer)/float(metrics['edits-maintainer']) 
        metrics['antActionsOthersAvg'] = sum(antActionsOthers)/float(metrics['edits-others'])
        
        # metrics['deletesMaintainerAvg'] = sum(deletesMaintainer)/float(metrics['edits-maintainer'])
        # metrics['deletesOthersAvg'] = sum(deletesOthers)/float(metrics['edits-others'])
        # metrics['deletesRatio'] = sum(metrics['deletesMaintainerAvg']) / float(metrics['edits-maintainer']) / float(sum(temp['deletesOthersAvg']) / float(metrics['edits-others']))
        
        # metrics['revertsMaintainerAvg'] = sum(revertsMaintainer)/float(metrics['edits-maintainer'])
        # metrics['revertsOthersAvg'] = sum(revertsOthers)/float(metrics['edits-others'])
        # metrics['revertsRatio'] = sum(metrics['revertsMaintainerAvg']) / float(metrics['edits-maintainer']) / float(sum(metrics['revertsOthersAvg']) / float(metrics['edits-others']))
        # metrics['revertsMaintainerPot'] = sum(revertsMaintainer)/float(metrics['edits-maintainer'])
        # metrics['revertsOthersPot'] = sum(revertsOthers)/float(metrics['edits-others'])
        # metrics['revertsPotRatio'] = sum(metrics['revertsMaintainerAvg']) / float(metrics['edits-maintainer']) / float(sum(metrics['revertsOthersAvg']) / float(metrics['edits-others']))
        
        # metrics['reintroMaintainerAvg'] = sum(reintroMaintainer)/float(metrics['edits-maintainer'])
        # metrics['reintroOthersAvg'] = sum(reintroOthers)/float(metrics['edits-others'])
        # metrics['reintroRatio'] = sum(metrics['reintroMaintainerAvg']) / float(metrics['edits-maintainer']) / float(sum(metrics['reintroOthersAvg']) / float(metrics['edits-others']))

        # metrics['selfreintroMaintainerAvg'] = sum(selfreintroMaintainer)/float(metrics['edits-maintainer'])
        # metrics['selfreintroOthersAvg'] = sum(selfreintroOthers)/float(metrics['edits-others'])
        # metrics['selfreintroRatio'] = sum(metrics['selfreintroMaintainerAvg']) / float(metrics['edits-maintainer']) / float(sum(metrics['selfreintroOthersAvg']) / float(metrics['edits-others']))

        # share of selfreintroductions of potential own tokens
        # temp['selfreintroMaintainerPot'] = [(b/float(a)) for a,b in zip(ownershipMaintainerAbs[:len(ownershipMaintainerAbs)-2], selfreintroMaintainer[1:len(selfreintroMaintainer)-1])]
        # temp['selfreintroOthersPot'] = [(b/float(c-a)) for a,b in zip(ownershipMaintainerAbs[:len(ownershipMaintainerAbs)-2], selfreintroOthers[1:len(selfreintroOthers)-1], number_tokens[:len(number_tokens)-2) if a is not 0]
        # metrics['selfreintroPotRatio'] = sum(temp['selfreintroMaintainerPot']) / float(metrics['edits-maintainer']) / float(sum(temp['selfreintroOthersPot']) / float(metrics['edits-others']))
        
        # temp['antActionsMaintainerPot'] = [(b/float(a)) for a,b in zip(ownershipMaintainerAbs[:len(ownershipMaintainerAbs)-2], antActionsMaintainer[1:len(antActionsMaintainer)-1]) if a is not 0]
        # temp['antActionsOthersPot'] = [(b/float(c-a)) for a,b in zip(ownershipMaintainerAbs[:len(ownershipMaintainerAbs)-2], antActionsOthers[1:len(antActionsOthers)-1], number_tokens[:len(number_tokens)-2]) if a is not 0]
        # metrics['antActionsRatio'] = sum(temp['antActionsMaintainerPot']) / float(metrics['edits-maintainer']) / float(sum(temp['antActionsOthersPot']) / float(metrics['edits-others']))
            
        data[title] = metrics

    data = DataFrame(data)
    data = data.transpose()
    save2CSV(data, filename)

    return data
示例#24
0
path = "C:\\Users\\keriambermudez\\Dropbox\\David_Fenyos_Lab\\Image_Analysis\\Testing_Jacop\\Basal_bleomycin_masked_cvs\\"

#%%

basal = pd.read_csv(
    "C:\\Users\\keriambermudez\\Dropbox\\David_Fenyos_Lab\\Image_Analysis\\Testing_Jacop\\Basal\\Masked_log_files\\Basal_masked_jacop.csv"
)
bleomycin = pd.read_csv(
    "C:\\Users\\keriambermudez\\Dropbox\\David_Fenyos_Lab\\Image_Analysis\\Testing_Jacop\\Bleomycyn\\Masked_log_files\\Bleomycin_masked_jacop.csv"
)

#%%

icq = DataFrame([basal.ix[:, '43'], bleomycin.ix[:, '43']],
                index=['basal', 'bleomycin'])
icq = icq.transpose()
icq.to_csv(path + "ICQ.csv")

pearsons = DataFrame([basal.ix[:, '3'], bleomycin.ix[:, '3']],
                     index=['basal', 'bleomycin'])
pearsons = pearsons.transpose()
pearsons.to_csv(path + "pearsons.csv")

overlap_coef = DataFrame([basal.ix[:, '5'], bleomycin.ix[:, '5']],
                         index=['basal', 'bleomycin'])
overlap_coef = overlap_coef.transpose()
overlap_coef.to_csv(path + "overlap_coef.csv")

M1 = DataFrame([basal.ix[:, '16'], bleomycin.ix[:, '16']],
               index=['basal', 'bleomycin'])
M1 = M1.transpose()
示例#25
0
        subplot.scatter(train_set.x, train_set.y, marker='o', color='blue')

        # 真の曲線を表示
        linex = np.linspace(0, 1, 101)
        liney = np.sin(2 * np.pi * linex)
        subplot.plot(linex, liney, color='green', linestyle='--')

        # 多項式近似の曲線を表示
        linex = np.linspace(0, 1, 101)
        liney = f(linex)
        label = "E(RMS)=%.2f" % rms_error(train_set, f)
        subplot.plot(linex, liney, color='red', label=label)
        subplot.legend(loc=1)

    # 係数の値を表示
    print "Table of the coefficients"
    print df_ws.transpose()
    fig.show()

    # トレーニングセットとテストセットでの誤差の変化を表示
    df = DataFrame(columns=['Training set', 'Test set'])
    for m in range(0, 10):  # 多項式の次数
        f, ws = resolve(train_set, m)
        train_error = rms_error(train_set, f)
        test_error = rms_error(test_set, f)
        df = df.append(Series([train_error, test_error],
                              index=['Training set', 'Test set']),
                       ignore_index=True)
    df.plot(title='RMS Error', style=['-', '--'], grid=True, ylim=(0, 0.9))
    plt.show()
示例#26
0
##
##    a=np.zeros((len(c),len(d)))
##
##    keys=[]
##    keys2=[]
##
##    for key,values in pin.iteritems():
##        keys.append(key)
##        for key2, value in values.iteritems():
##            if key2 not in keys2:
##                keys2.append(key2)
##
##    for key, values in pin.iteritems():
##        index=keys.index(key)
##        for key2, value in values.iteritems():
##            index2=keys2.index(key2)
##            a[index][index2]=values.get(key2,0)
##
##    np.savetxt("pinmat.csv", a, delimiter=",",fmt='%2i')

# Write Jaccard index Matrix
    print "Dataframe"
    df2 = DataFrame(pinjac)
    print "transpose"
    df2t = df2.transpose()
    print "to csv"
    df2t.to_csv('pinsJac.csv')

    cur.close()
    conn.close()
示例#27
0
    def test_frame_from_json_to_json(self):
        def _check_orient(df, orient, dtype=None, numpy=False,
                          convert_axes=True, check_dtype=True, raise_ok=None):
            df = df.sort()
            dfjson = df.to_json(orient=orient)

            try:
                unser = read_json(dfjson, orient=orient, dtype=dtype,
                                  numpy=numpy, convert_axes=convert_axes)
            except Exception as detail:
                if raise_ok is not None:
                    if isinstance(detail, raise_ok):
                        return
                    raise

            unser = unser.sort()

            if dtype is False:
                check_dtype=False

            if not convert_axes and df.index.dtype.type == np.datetime64:
                unser.index = DatetimeIndex(
                    unser.index.values.astype('i8') * 1e6)
            if orient == "records":
                # index is not captured in this orientation
                assert_almost_equal(df.values, unser.values)
                self.assertTrue(df.columns.equals(unser.columns))
            elif orient == "values":
                # index and cols are not captured in this orientation
                assert_almost_equal(df.values, unser.values)
            elif orient == "split":
                # index and col labels might not be strings
                unser.index = [str(i) for i in unser.index]
                unser.columns = [str(i) for i in unser.columns]
                unser = unser.sort()
                assert_almost_equal(df.values, unser.values)
            else:
                if convert_axes:
                    assert_frame_equal(df, unser, check_dtype=check_dtype)
                else:
                    assert_frame_equal(df, unser, check_less_precise=False,
                                       check_dtype=check_dtype)

        def _check_all_orients(df, dtype=None, convert_axes=True, raise_ok=None):

            # numpy=False
            if convert_axes:
                _check_orient(df, "columns", dtype=dtype)
                _check_orient(df, "records", dtype=dtype)
                _check_orient(df, "split", dtype=dtype)
                _check_orient(df, "index", dtype=dtype)
                _check_orient(df, "values", dtype=dtype)

            _check_orient(df, "columns", dtype=dtype, convert_axes=False)
            _check_orient(df, "records", dtype=dtype, convert_axes=False)
            _check_orient(df, "split", dtype=dtype, convert_axes=False)
            _check_orient(df, "index", dtype=dtype, convert_axes=False)
            _check_orient(df, "values", dtype=dtype ,convert_axes=False)

            # numpy=True and raise_ok might be not None, so ignore the error
            if convert_axes:
                _check_orient(df, "columns", dtype=dtype, numpy=True,
                              raise_ok=raise_ok)
                _check_orient(df, "records", dtype=dtype, numpy=True,
                              raise_ok=raise_ok)
                _check_orient(df, "split", dtype=dtype, numpy=True,
                              raise_ok=raise_ok)
                _check_orient(df, "index", dtype=dtype, numpy=True,
                              raise_ok=raise_ok)
                _check_orient(df, "values", dtype=dtype, numpy=True,
                              raise_ok=raise_ok)

            _check_orient(df, "columns", dtype=dtype, numpy=True,
                          convert_axes=False, raise_ok=raise_ok)
            _check_orient(df, "records", dtype=dtype, numpy=True,
                          convert_axes=False, raise_ok=raise_ok)
            _check_orient(df, "split", dtype=dtype, numpy=True,
                          convert_axes=False, raise_ok=raise_ok)
            _check_orient(df, "index", dtype=dtype, numpy=True,
                          convert_axes=False, raise_ok=raise_ok)
            _check_orient(df, "values", dtype=dtype, numpy=True,
                          convert_axes=False, raise_ok=raise_ok)

        # basic
        _check_all_orients(self.frame)
        self.assertEqual(self.frame.to_json(),
                         self.frame.to_json(orient="columns"))

        _check_all_orients(self.intframe, dtype=self.intframe.values.dtype)
        _check_all_orients(self.intframe, dtype=False)

        # big one
        # index and columns are strings as all unserialised JSON object keys
        # are assumed to be strings
        biggie = DataFrame(np.zeros((200, 4)),
                           columns=[str(i) for i in range(4)],
                           index=[str(i) for i in range(200)])
        _check_all_orients(biggie,dtype=False,convert_axes=False)

        # dtypes
        _check_all_orients(DataFrame(biggie, dtype=np.float64),
                           dtype=np.float64, convert_axes=False)
        _check_all_orients(DataFrame(biggie, dtype=np.int), dtype=np.int,
                           convert_axes=False)
        _check_all_orients(DataFrame(biggie, dtype='U3'), dtype='U3',
                           convert_axes=False, raise_ok=ValueError)

        # empty
        _check_all_orients(self.empty_frame)

        # time series data
        _check_all_orients(self.tsframe)

        # mixed data
        index = pd.Index(['a', 'b', 'c', 'd', 'e'])
        data = {
            'A': [0., 1., 2., 3., 4.],
            'B': [0., 1., 0., 1., 0.],
            'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
            'D': [True, False, True, False, True]
        }
        df = DataFrame(data=data, index=index)
        _check_orient(df, "split", check_dtype=False)
        _check_orient(df, "records", check_dtype=False)
        _check_orient(df, "values", check_dtype=False)
        _check_orient(df, "columns", check_dtype=False)
        # index oriented is problematic as it is read back in in a transposed
        # state, so the columns are interpreted as having mixed data and
        # given object dtypes.
        # force everything to have object dtype beforehand
        _check_orient(df.transpose().transpose(), "index", dtype=False)
示例#28
0
class Sensor():
    def __init__(self, sensor_id, distance, dbReader):
        self.sensor_id = sensor_id
        self.distance = distance
        #print "Sensor ID: ", sensor_id
        self.dbReader = dbReader
        self.spectrum_info = DataFrame()
        self.normal_info = DataFrame()
        self.calculated_info = DataFrame()
        self.count = 1
        self.state = "PASSIVE"
        self.last_active = 0
        self.clock_diff = 0
        self.unavailable_frequencies = []
        self.occupied_frequencies = []
        self.table_name = "channelinfo_" + str(self.sensor_id)
        self.channelInfo = DataFrame()
        self.log_count = 0
        self.last_active_time = 0
        self.potential = dict()
        self.cbsd_pu = dict()
        self.longitude = 0
        self.latitude = 0
        self.distUpdate = False
        self.last_loc_update = 0
        self.calc_psd = dict()
        self.spectrum_noise = DataFrame()
        self.avg_noise = dict()
        #self.fetch_pu_info()

    def fetch_channel_info(self):
        #print "Table Name: ",self.table_name
        conditions = {'startfreq': ' > 800e6 AND "startfreq" < 1000e6'}
        self.channelInfo = self.dbReader.fetch_data(
            ['startfreq', 'occ', 'noise_floor'], self.table_name, conditions,
            'ORDER BY startfreq')
        if self.channelInfo.size > 0:
            transposed_info = self.channelInfo.transpose()
            cols = None
            info = None
            noise_floor = None
            for row in transposed_info.itertuples():
                if (row[0] == 'startfreq'):
                    cols = list(row[1:-1])
                if (row[0] == 'occ'):
                    info = list(row[1:-1])
                if (row[0]) == 'noise_floor':
                    noise_floor = list(row[1:-1])

            #current_time = time.mktime(datetime.datetime.utcnow().timetuple())
            current_time = datetime.datetime.utcnow()
            ind = [current_time]
            temp = DataFrame(info, columns=ind, index=cols)
            temp = temp.transpose()
            temp_noise = DataFrame(noise_floor, columns=ind, index=cols)
            temp_noise = temp_noise.transpose()
            #print "Current Time: ",current_time
            #print temp_noise
            if self.spectrum_info.size == 0:
                self.spectrum_info = temp
                self.spectrum_noise = temp_noise
            else:
                try:
                    self.spectrum_info = self.spectrum_info.append(
                        temp, ignore_index=False)
                    self.spectrum_noise = self.spectrum_noise.append(
                        temp_noise, ignore_index=False)
                except Exception as e:
                    print "Error Appending:", e

            file_name = self.table_name + '.csv'
            if self.log_count == 0:
                temp.to_csv(file_name, mode='w', sep='\t')
                self.log_count = 1
            else:
                temp.to_csv(file_name, mode='a', header=False, sep='\t')

            spectrum_info_shape = self.spectrum_info.shape
            row_num = spectrum_info_shape[0]

            #print self.spectrum_info
            if (row_num > 3):
                #print self.spectrum_info
                ind_to_drop = self.spectrum_info.axes[0][0]
                self.spectrum_info = self.spectrum_info.drop(ind_to_drop)
                ind_to_drop = self.spectrum_noise.axes[0][0]
                self.spectrum_noise = self.spectrum_noise.drop(ind_to_drop)
            self.fetch_pu_info()

    def update_thresholds(self, nearest, near, furthest, startfreq):
        table_name = self.table_name + '_pu'
        input_data = {'nearest': nearest, 'near': near, 'furthest': furthest}
        self.dbReader.update_data(input_data, table_name,
                                  {'startfreq': startfreq})

    def fetch_pu_info(self):
        query = 'SELECT' + " pu_frequencies" + ',' + 'registered_cbsds."fccId"' + ', last_active, distance' + ' FROM sensorcbsdconnection INNER JOIN registered_cbsds ON sensorcbsdconnection."fccId" = registered_cbsds."fccId" WHERE  "nodeid"  = ' + str(
            self.sensor_id)
        self.potential_pu = self.dbReader.fetchQuery(query)
        self.potential_pu = self.potential_pu[
            self.potential_pu.last_active.notnull()]
        #self.potential_pu = self.potential_pu[self.potential_pu.pu_frequencies.notnull()]
        index_list = self.potential_pu.index.tolist()

        for i in index_list:
            fccId = self.potential_pu.loc[i]['fccId']
            if fccId not in self.cbsd_pu:
                self.cbsd_pu[fccId] = CBSD(
                    fccId, self.potential_pu.loc[i]['distance'], self.dbReader,
                    self.sensor_id)

            self.cbsd_pu[fccId].setLastActive(
                np.asscalar(self.potential_pu.loc[i]['last_active']))
            self.cbsd_pu[fccId].set_distance(
                self.potential_pu.loc[i]['distance'])
            #print np.asscalar(self.potential_pu.loc[i]['last_active'])

            freq_list = np.asarray(self.potential_pu.loc[i]['pu_frequencies'])
            freq_list = freq_list.tolist()
            self.cbsd_pu[fccId].addPotentialPuFrequencies(freq_list)

    def updatePerceivedPU(self, key, value):
        check = self.spectrum_info.to_dict()
        check2 = self.calculated_info.to_dict()
        if key in self.potential:
            #print key, " : ", check[key]
            #print "Chosen: ", check2[key]
            pass
        if value > 200 and key in self.potential:
            print key, " lost a potential -------------- dist ", value, " at ", self.sensor_id, " psd = ", self.calc_psd[
                key]
            #print check[key]
            self.potential.pop(key, None)
            self.distUpdate = True
        elif key not in self.potential and value < 200:
            self.potential[key] = value
            #print check[key]
            print key, " gained a potential ++++++++++++++ dist ", value, " at ", self.sensor_id, " psd = ", self.calc_psd[
                key]
            self.distUpdate = True
        elif key in self.potential and self.potential[key] != value:
            self.potential[key] = value
            #print check[key]
            print key, " Updated Distance: ", value, " at ", self.sensor_id, " psd = ", self.calc_psd[
                key]
            self.distUpdate = True

    def averageNoise(self):
        noise_dict = self.spectrum_noise.to_dict()
        if len(noise_dict) > 0:
            for freq, value in noise_dict.iteritems():
                if len(noise_dict[freq]) > 0:
                    self.avg_noise[freq] = 0
                    for time_stamp, noise_dbm in noise_dict[freq].iteritems():
                        self.avg_noise[freq] = self.avg_noise[
                            freq] + self.dbm_to_mw(noise_dbm)
                    self.avg_noise[freq] = self.avg_noise[freq] / len(
                        noise_dict[freq])
                    self.avg_noise[freq] = self.mw_to_dbm(self.avg_noise[freq])
        return self.avg_noise

    def mw_to_dbm(self, mW):
        return 10. * math.log10(mW)

    def dbm_to_mw(self, dBm):
        return 10**((dBm) / 10.)

    def test(self):
        pass
示例#29
0
def main():
    train_set = create_dataset(N)
    test_set = create_dataset(N)
    df_ws = DataFrame()

    # 多項式近似の曲線を求めて表示
    fig = plt.figure()
    for c, m in enumerate(M):
        f, ws = lsm_resolve(train_set, m)
        df_ws = df_ws.append(Series(ws, name="M = %d" % m))

        subplot = fig.add_subplot(2, 2, c + 1)
        subplot.set_xlim(-0.05, 1.05)
        subplot.set_ylim(-1.5, 1.5)
        subplot.set_title("M = %d" % m)

        # トレーニングセットを表示
        subplot.scatter(train_set.x,
                        train_set.y,
                        marker='o',
                        color='blue',
                        label=None)

        # 真の曲線を表示
        linex = np.linspace(0, 1, 101)
        liney = np.sin(2 * np.pi * linex)
        subplot.plot(linex, liney, color='green', linestyle='--')

        # 多項式近似の曲線を表示
        linex = np.linspace(0, 1, 101)
        liney = f(linex)
        label = "E(RMS)=%.2f" % rms_error(train_set, f)
        subplot.plot(linex, liney, color='red', label=label)
        subplot.legend(loc=1)

    # 係数の値を表示
    # p69 図2.3
    # N = 100の場合:p80 図2.11
    print("Table of the coefficients")
    print(df_ws.transpose())
    # N = 100の場合:p80 図2.12(2.2とあまり変わらない)
    # fig.savefig("out/021-p68_fig2.2.png")

    # トレーニングセットとテストセットでの誤差の変化を表示
    df = DataFrame(columns=['Training set', 'Test set'])
    # 多項式の次数の分だけ繰り返す
    for m in range(0, 10):
        f, ws = lsm_resolve(train_set, m)
        train_error = rms_error(train_set, f)
        test_error = rms_error(test_set, f)
        df = df.append(Series([train_error, test_error],
                              index=['Training set', 'Test set']),
                       ignore_index=True)

    # p77 図2.8
    # 次数Mが3以上になったあたりで、テストセットに対する誤差の減りが鈍くなる(0.3くらいに収束する)
    # トレーニングセットに対してはN = 10の場合M = 9で完全に正確(トレーニングセットの学習結果をトレーニングセットとの誤差で比較してるから当然だが)
    # →過学習(オーバーフィッティング):トレーニングセットに特化した汎用性のない結果を得てしまう
    df.plot(title='RMS Error', style=['-', '--'], grid=True, ylim=(0, 0.9))
    # plt.savefig("out/021-p77_fig2.8.png")
    plt.show()
示例#30
0
def memory_perf(system_list,
                unique_id,
                group_number,
                detail_options,
                rampup_value=0,
                current_dir=""):
    have_memory_data = False
    modes = ['1K', '4K', '1M', '16M', '128M', '256M', '1G', '2G']
    sets = search_item(system_list, unique_id, "cpu", "(.*)", [], modes)
    for mode in sorted(modes):
        real_mode = "Memory benchmark %s" % mode
        results = {}
        threaded_perf = dict()
        forked_perf = dict()
        for system in sets:
            memory = []
            series = []
            found_data = ""
            threaded_perf[system] = 0
            forked_perf[system] = 0
            for perf in sets[system]:
                if mode in perf[2]:
                    # We shall split individual cpu benchmarking from
                    # the global one
                    if ("logical_" in perf[1]
                            and ("bandwidth_%s" % mode) in perf[2]):
                        if not perf[1] in memory:
                            memory.append(perf[1])
                        series.append(float(perf[3]))
                    elif "threaded_bandwidth_%s" % mode in perf[2]:
                        threaded_perf[system] = float(perf[3])
                        found_data = float(perf[3])
                    elif "forked_bandwidth_%s" % mode in perf[2]:
                        forked_perf[system] = float(perf[3])
                        found_data = float(perf[3])

            if found_data:
                # If no series are populated, it means that a single "All CPU"
                # run was done
                # If so, let's create a single run value
                if not series:
                    series.append(found_data)
                    memory.append("logical")

            results[system] = Series(series, index=memory)

        # No need to continue if no Memory data in this benchmark
        if not results:
            continue

        consistent = []
        curious = []
        unstable = []
        details = []
        matched_category = ''

        df = DataFrame(results)
        for memory in df.transpose().columns:
            if have_memory_data is False:
                print()
                print("Group %d : Checking Memory perf" % group_number)
                have_memory_data = True

            print_perf(1, 7,
                       df.transpose()[memory], df, real_mode, memory,
                       consistent, curious, unstable, "", rampup_value,
                       current_dir)
            matched_category = []
            prepare_detail(detail_options, group_number, mode, memory, details,
                           matched_category)

        print_detail(detail_options, details, df, matched_category)
        print_summary(mode, consistent, "consistent", "MB/s", df)
        print_summary(mode, curious, "curious", "MB/s", df)
        print_summary(mode, unstable, "unstable", "MB/s", df)

        for bench_type in ["threaded", "forked"]:
            efficiency = {}
            have_forked_or_threaded = False
            if "threaded" in bench_type:
                mode_text = "Thread effi."
            else:
                mode_text = "Forked Effi."
            for system in sets:
                host_efficiency_full_load = []
                host_perf = df[system].sum()
                if (host_perf > 0 and threaded_perf[system] > 0
                        and forked_perf[system] > 0):
                    have_forked_or_threaded = True
                    if "threaded" in bench_type:
                        host_efficiency_full_load.append(
                            threaded_perf[system] / host_perf * 100)
                    else:
                        host_efficiency_full_load.append(forked_perf[system] /
                                                         host_perf * 100)

                    efficiency[system] = Series(host_efficiency_full_load,
                                                index=[mode_text])

            details = []
            memory_eff = DataFrame(efficiency)
            if have_forked_or_threaded is True:
                consistent = []
                curious = []
                unstable = []

                for memory in memory_eff.transpose().columns:
                    print_perf(2, 10,
                               memory_eff.transpose()[memory], memory_eff,
                               real_mode, memory, consistent, curious,
                               unstable)
                    matched_category = []
                    prepare_detail(detail_options, group_number, mode, memory,
                                   details, matched_category)

                # Let's pad if its a thread or forked effi in addition
                # of the block size
                if matched_category:
                    matched_category[0] += " " + mode_text

                print_detail(detail_options, details, memory_eff,
                             matched_category)
                print_summary(mode + " " + mode_text, consistent, "consistent",
                              "%", memory_eff)
                print_summary(mode + " " + mode_text, curious, "curious", "%",
                              memory_eff)
                print_summary(mode + " " + mode_text, unstable, "unstable",
                              "%", memory_eff)
            else:
                utils.do_print(real_mode, utils.Levels.WARNING,
                               "%-12s : Benchmark not run on this group",
                               mode_text)
示例#31
0
def cpu_perf(system_list,
             unique_id,
             group_number,
             detail_options,
             rampup_value=0,
             current_dir=""):
    have_cpu_data = False
    host_cpu_list = search_item(system_list, unique_id, "cpu", "(.*)", [],
                                ['product'])
    host_cpu_number = search_item(system_list, unique_id, "cpu",
                                  "(.*logical.*)", [], ['number'])
    core_counts = 1
    for host in host_cpu_number:
        for item in host_cpu_number[host]:
            core_counts = item[3]
            break

    cpu_type = ''
    for host in host_cpu_list:
        for item in host_cpu_list[host]:
            cpu_type = item[3]
            break

    modes = ['bogomips', 'loops_per_sec']
    sets = search_item(system_list, unique_id, "cpu", "(.*)", [], modes)
    global_perf = dict()
    for mode in sorted(modes):
        results = {}
        for system in sets:
            cpu = []
            series = []
            found_data = False
            for perf in sets[system]:
                if perf[2] == mode:
                    # We shall split individual cpu benchmarking from
                    # the global one
                    if "_" in perf[1]:
                        if not perf[1] in cpu:
                            cpu.append(perf[1])
                        series.append(float(perf[3]))
                        found_data = True
                    elif "loops_per_sec" in mode:
                        global_perf[system] = float(perf[3])
                        found_data = True

            if found_data is True:
                # If no series are populated, it means that a single
                # "All CPU" run was done
                # If so, let's create a single run value
                if not series:
                    series.append(global_perf[system])
                    cpu.append("logical")

                results[system] = Series(series, index=cpu)

        # No need to continue if no CPU data in this benchmark
        if not results:
            continue

        df = DataFrame(results)
        consistent = []
        curious = []
        unstable = []
        details = []
        matched_category = []

        for cpu in df.transpose().columns:
            if have_cpu_data is False:
                print()
                print("Group %d : Checking CPU perf" % group_number)
                have_cpu_data = True
            print_perf(2, 7,
                       df.transpose()[cpu], df, mode, cpu, consistent, curious,
                       unstable, "", rampup_value, current_dir)
            prepare_detail(detail_options, group_number, mode, cpu, details,
                           matched_category)

        print_detail(detail_options, details, df, matched_category)

        print_summary(mode, consistent, "consistent", "", df, cpu_type)
        print_summary(mode, curious, "curious", "", df)
        print_summary(mode, unstable, "unstable", "", df)

        if mode == "loops_per_sec":
            efficiency = {}
            mode_text = 'CPU Effi.'
            consistent = []
            curious = []
            unstable = []
            details = []
            matched_category = []

            for system in sets:
                host_efficiency_full_load = []
                host_perf = (df[system].sum() *
                             (int(core_counts) / df[system].count()))
                host_efficiency_full_load.append(global_perf[system] /
                                                 host_perf * 100)
                efficiency[system] = Series(host_efficiency_full_load,
                                            index=[mode_text])

            cpu_eff = DataFrame(efficiency)
            print_perf(1, 2,
                       cpu_eff.transpose()[mode_text], cpu_eff, mode,
                       mode_text, consistent, curious, unstable)
            prepare_detail(detail_options, group_number, mode, mode_text,
                           details, matched_category)

            print_detail(detail_options, details, cpu_eff, matched_category)
            print_summary("CPU Efficiency", consistent, "consistent", '%',
                          cpu_eff)
            print_summary("CPU Efficiency", curious, "curious", '%', cpu_eff)
            print_summary("CPU Efficiency", unstable, "unstable", '%', cpu_eff)
示例#32
0
def logical_disks_perf(system_list,
                       unique_id,
                       group_number,
                       detail_options,
                       perf_unit,
                       rampup_value=0,
                       current_dir=""):
    have_disk_data = False
    sets = search_item(system_list, unique_id, "disk", r"[a-z]d(\S+)", [],
                       ['simultaneous', 'standalone'])
    modes = []

    # Searching for modes ran in this benchmark
    for system in sets:
        for perf in sets[system]:
            if perf[2] not in modes and perf_unit in perf[2]:
                modes.append(perf[2])

    if modes:
        return

    for mode in sorted(modes):
        results = {}
        for system in sets:
            disks = []
            series = []
            for perf in sets[system]:
                if perf[2] == mode:
                    if not perf[1] in disks:
                        disks.append(perf[1])
                    series.append(int(perf[3]))
            results[system] = Series(series, index=disks)

        df = DataFrame(results)
        details = []
        matched_category = []
        for disk in df.transpose().columns:
            if have_disk_data is False:
                print()
                print("Group %d : Checking logical disks perf" % group_number)
                have_disk_data = True
            consistent = []
            curious = []
            unstable = []
            # How much the variance could be far from the average (in %)
            tolerance_max = 10
            tolerance_min = 2
            # In random mode, the variance could be higher as
            # we cannot insure the distribution pattern was similar
            if "rand" in mode:
                tolerance_min = 5
                tolerance_max = 15

            print_perf(tolerance_min, tolerance_max,
                       df.transpose()[disk], df, mode, disk, consistent,
                       curious, unstable, "-%s" % perf_unit, rampup_value,
                       current_dir)

            prepare_detail(detail_options, group_number, mode, disk, details,
                           matched_category)
            print_summary("%-30s %s" % (mode, disk), consistent, "consistent",
                          perf_unit, df)
            print_summary("%-30s %s" % (mode, disk), curious, "curious",
                          perf_unit, df)
            print_summary("%-30s %s" % (mode, disk), unstable, "unstable",
                          perf_unit, df)

        print_detail(detail_options, details, df, matched_category)
示例#33
0
def dataframe_transpose(df: pd.DataFrame):
    return df.transpose().reset_index()
示例#34
0
								#print col_ind ," : col",dev_name ,u' 已经存在,请检查源表格数据!'
								sys.exit()

				else:
					 continue
			minn=min(map(len,ser))-1#serr=ser[:]
			ddd=[ser[1][:minn][::-1],ser[2][:minn][::-1]]


			#file2=os.path.join(os.getcwd(),'Data_out.xlsx')



			##print '\nData extracted sucessfully! Now writing to new file……\n'
			oltdata=DataFrame(devs_Paravalue.values(),index=name)
			oltdata2=oltdata.transpose()
			oltpara=DataFrame(ddd,index=['compareTypeInfo','compareItem'])
			oltpara2=oltpara.transpose()
			oltall=oltpara2.join(oltdata2,how='outer')
			oltall2=oltall.drop_duplicates()

			excelname=os.path.join(outdir,"Data_out.xlsx")
			setColWidth(excelname,'比较信息',oltall2.shape[1],200)
			##print u'閻㈢喐鍨氶弬鍥︽閻ㄥ嫬鍨€瑰€燁啎缂冾喕璐?0 娑擃亞鈹栫€涙顑佺€硅棄瀹?
			if not os.path.exists(os.path.join(os.getcwd(),excelname)):
				setColWidth(excelname,'比较信息',oltall2.shape[1],200)
			else:
				ss=ExcelWriter(excelname)

				pattern=r'^(.*(?:[^\W\d_]*))\n(.+)$'
				reg=compile(pattern,re.M)
示例#35
0
文件: anal-gb.py 项目: jit9/cuts
#print Dgood.describe()
print ' '
#print Dbad.describe()
print ' '

Dx = DataFrame()
for k in keys_list:
    kgood = str(k+' good')
    kbad  = str(k+' bad')
    gD = Dgood[k]
    bD = Dbad[k]
    print k, '\nGood: ', gD.describe(), '\n Bad:', bD.describe(), '\n\n' 
    gD = np.array(Dgood[k]).reshape(-1,)
    bD = np.array(Dbad[k]).reshape(-1)
    Dx = DataFrame([gD,bD])
    DD = Dx.transpose()
    Dx.columns = [kgood, kbad]
    Dx.plot.hist()
    plt.show()

    
    

#print Dgood
print ' '
#print Dbad



Dgood.hist(grid=True, bins = 1000)
plt.title(" Good TODs" )
示例#36
0
文件: check.py 项目: whouses/hardware
def logical_disks_perf(systems, unique_id, group_number, detail_options,
                       perf_unit, rampup_value=0, current_dir=""):
    have_disk_data = False
    sets = search_item(systems, unique_id, "disk", r"[a-z]d(\S+)", [],
                       ['simultaneous', 'standalone'])
    modes = []

    # Searching for modes ran in this benchmark
    for system in sets:
        for perf in sets[system]:
            if perf[2] not in modes and perf_unit in perf[2]:
                modes.append(perf[2])

    if len(modes) == 0:
        return

    for mode in sorted(modes):
        results = {}
        for system in sets:
            disks = []
            series = []
            for perf in sets[system]:
                if perf[2] == mode:
                    if not perf[1] in disks:
                        disks.append(perf[1])
                    series.append(int(perf[3]))
            results[system] = Series(series, index=disks)

        df = DataFrame(results)
        details = []
        matched_category = []
        for disk in df.transpose().columns:
            if have_disk_data is False:
                print()
                print("Group %d : Checking logical disks perf" % group_number)
                have_disk_data = True
            consistent = []
            curious = []
            unstable = []
            # How much the variance could be far from the average (in %)
            tolerance_max = 10
            tolerance_min = 2
            # In random mode, the variance could be higher as
            # we cannot insure the distribution pattern was similar
            if "rand" in mode:
                tolerance_min = 5
                tolerance_max = 15

            print_perf(tolerance_min, tolerance_max, df.transpose()[disk], df,
                       mode, disk, consistent, curious, unstable, "-%s" % perf_unit,
                       rampup_value, current_dir)

            prepare_detail(detail_options, group_number, mode, disk, details,
                           matched_category)
            print_summary("%-30s %s" % (mode, disk), consistent, "consistent",
                          perf_unit, df)
            print_summary("%-30s %s" % (mode, disk), curious, "curious",
                          perf_unit, df)
            print_summary("%-30s %s" % (mode, disk), unstable, "unstable",
                          perf_unit, df)

        print_detail(detail_options, details, df, matched_category)
示例#37
0
    def test_frame_from_json_to_json(self):
        def _check_orient(df, orient, dtype=None, numpy=True):
            df = df.sort()
            dfjson = df.to_json(orient=orient)
            unser = DataFrame.from_json(dfjson,
                                        orient=orient,
                                        dtype=dtype,
                                        numpy=numpy)
            unser = unser.sort()
            if df.index.dtype.type == np.datetime64:
                unser.index = DatetimeIndex(unser.index.values.astype('i8'))
            if orient == "records":
                # index is not captured in this orientation
                assert_almost_equal(df.values, unser.values)
                self.assert_(df.columns.equals(unser.columns))
            elif orient == "values":
                # index and cols are not captured in this orientation
                assert_almost_equal(df.values, unser.values)
            elif orient == "split":
                # index and col labels might not be strings
                unser.index = [str(i) for i in unser.index]
                unser.columns = [str(i) for i in unser.columns]
                unser = unser.sort()
                assert_almost_equal(df.values, unser.values)
            else:
                assert_frame_equal(df, unser)

        def _check_all_orients(df, dtype=None):
            _check_orient(df, "columns", dtype=dtype)
            _check_orient(df, "records", dtype=dtype)
            _check_orient(df, "split", dtype=dtype)
            _check_orient(df, "index", dtype=dtype)
            _check_orient(df, "values", dtype=dtype)

            _check_orient(df, "columns", dtype=dtype, numpy=False)
            _check_orient(df, "records", dtype=dtype, numpy=False)
            _check_orient(df, "split", dtype=dtype, numpy=False)
            _check_orient(df, "index", dtype=dtype, numpy=False)
            _check_orient(df, "values", dtype=dtype, numpy=False)

        # basic
        _check_all_orients(self.frame)
        self.assertEqual(self.frame.to_json(),
                         self.frame.to_json(orient="columns"))

        _check_all_orients(self.intframe, dtype=self.intframe.values.dtype)

        # big one
        # index and columns are strings as all unserialised JSON object keys
        # are assumed to be strings
        biggie = DataFrame(np.zeros((200, 4)),
                           columns=[str(i) for i in range(4)],
                           index=[str(i) for i in range(200)])
        _check_all_orients(biggie)

        # dtypes
        _check_all_orients(DataFrame(biggie, dtype=np.float64),
                           dtype=np.float64)
        _check_all_orients(DataFrame(biggie, dtype=np.int), dtype=np.int)
        _check_all_orients(DataFrame(biggie, dtype='<U3'), dtype='<U3')

        # empty
        _check_all_orients(self.empty_frame)

        # time series data
        _check_all_orients(self.tsframe)

        # mixed data
        index = pd.Index(['a', 'b', 'c', 'd', 'e'])
        data = {
            'A': [0., 1., 2., 3., 4.],
            'B': [0., 1., 0., 1., 0.],
            'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
            'D': [True, False, True, False, True]
        }
        df = DataFrame(data=data, index=index)
        _check_orient(df, "split")
        _check_orient(df, "records")
        _check_orient(df, "values")
        _check_orient(df, "columns")
        # index oriented is problematic as it is read back in in a transposed
        # state, so the columns are interpreted as having mixed data and
        # given object dtypes.
        # force everything to have object dtype beforehand
        _check_orient(df.transpose().transpose(), "index")
示例#38
0
文件: check.py 项目: whouses/hardware
def memory_perf(systems, unique_id, group_number, detail_options,
                rampup_value=0, current_dir=""):
    have_memory_data = False
    modes = ['1K', '4K', '1M', '16M', '128M', '256M', '1G', '2G']
    sets = search_item(systems, unique_id, "cpu", "(.*)", [], modes)
    for mode in sorted(modes):
        real_mode = "Memory benchmark %s" % mode
        results = {}
        threaded_perf = dict()
        forked_perf = dict()
        for system in sets:
            memory = []
            series = []
            found_data = ""
            threaded_perf[system] = 0
            forked_perf[system] = 0
            for perf in sets[system]:
                if mode in perf[2]:
                    # We shall split individual cpu benchmarking from
                    # the global one
                    if ("logical_" in perf[1] and
                            ("bandwidth_%s" % mode) in perf[2]):
                        if not perf[1] in memory:
                            memory.append(perf[1])
                        series.append(float(perf[3]))
                    elif "threaded_bandwidth_%s" % mode in perf[2]:
                        threaded_perf[system] = float(perf[3])
                        found_data = float(perf[3])
                    elif "forked_bandwidth_%s" % mode in perf[2]:
                        forked_perf[system] = float(perf[3])
                        found_data = float(perf[3])

            if found_data:
                # If no series are populated, it means that a single "All CPU"
                # run was done
                # If so, let's create a single run value
                if not series:
                    series.append(found_data)
                    memory.append("logical")

            results[system] = Series(series, index=memory)

        # No need to continue if no Memory data in this benchmark
        if not results:
            continue

        consistent = []
        curious = []
        unstable = []
        details = []
        matched_category = ''

        df = DataFrame(results)
        for memory in df.transpose().columns:
            if have_memory_data is False:
                print()
                print("Group %d : Checking Memory perf" % group_number)
                have_memory_data = True

            print_perf(1, 7, df.transpose()[memory], df, real_mode, memory,
                       consistent, curious, unstable, "", rampup_value,
                       current_dir)
            matched_category = []
            prepare_detail(detail_options, group_number, mode, memory,
                           details, matched_category)

        print_detail(detail_options, details, df, matched_category)
        print_summary(mode, consistent, "consistent", "MB/s", df)
        print_summary(mode, curious, "curious", "MB/s", df)
        print_summary(mode, unstable, "unstable", "MB/s", df)

        for bench_type in ["threaded", "forked"]:
            efficiency = {}
            have_forked_or_threaded = False
            if "threaded" in bench_type:
                mode_text = "Thread effi."
            else:
                mode_text = "Forked Effi."
            for system in sets:
                host_efficiency_full_load = []
                host_perf = df[system].sum()
                if (host_perf > 0 and threaded_perf[system] > 0 and
                        forked_perf[system] > 0):
                    have_forked_or_threaded = True
                    if "threaded" in bench_type:
                        host_efficiency_full_load.append(
                            threaded_perf[system] / host_perf * 100)
                    else:
                        host_efficiency_full_load.append(
                            forked_perf[system] / host_perf * 100)

                    efficiency[system] = Series(host_efficiency_full_load,
                                                index=[mode_text])

            details = []
            memory_eff = DataFrame(efficiency)
            if have_forked_or_threaded is True:
                consistent = []
                curious = []
                unstable = []

                for memory in memory_eff.transpose().columns:
                    print_perf(2, 10, memory_eff.transpose()[memory],
                               memory_eff, real_mode, memory, consistent,
                               curious, unstable)
                    matched_category = []
                    prepare_detail(detail_options, group_number, mode,
                                   memory, details, matched_category)

                # Let's pad if its a thread or forked effi in addition
                # of the block size
                if matched_category:
                    matched_category[0] += " " + mode_text

                print_detail(detail_options, details, memory_eff,
                             matched_category)
                print_summary(mode + " " + mode_text, consistent,
                              "consistent", "%", memory_eff)
                print_summary(mode + " " + mode_text, curious,
                              "curious", "%", memory_eff)
                print_summary(mode + " " + mode_text, unstable,
                              "unstable", "%", memory_eff)
            else:
                utils.do_print(real_mode, utils.Levels.WARNING,
                               "%-12s : Benchmark not run on this group",
                               mode_text)
示例#39
0
def speichern(file, name):
    df = DataFrame(file)
    df.transpose()
    writer = pd.ExcelWriter("rsc/" + name + ".xlsx", engine="xlsxwriter")
    df.to_excel(writer, sheet_name="minVerbrauch", header=False, index=False)
    writer.save()
def zonal_stats(vector_path,
                raster_path,
                class_attr=None,
                ret_vals=True,
                ret_stats=True,
                nodata_value=None,
                global_src_extent=False):
    rds = gdal.Open(raster_path, GA_ReadOnly)
    assert (rds)
    rb = rds.GetRasterBand(1)
    rgt = rds.GetGeoTransform()

    if nodata_value:
        nodata_value = float(nodata_value)
        rb.SetNoDataValue(nodata_value)

    vds = ogr.Open(
        vector_path,
        GA_ReadOnly)  # TODO maybe open update if we want to write stats
    assert (vds)
    vlyr = vds.GetLayer(0)

    # create an in-memory numpy array of the source raster data
    # covering the whole extent of the vector layer
    if global_src_extent:
        # use global source extent
        # useful only when disk IO or raster scanning inefficiencies are your limiting factor
        # advantage: reads raster data in one pass
        # disadvantage: large vector extents may have big memory requirements
        src_offset = bbox_to_pixel_offsets(rgt, vlyr.GetExtent())
        # src_array = rb.ReadAsArray(*src_offset)
        src_array = rds.ReadAsArray(*src_offset)

        # calculate new geotransform of the layer subset
        new_gt = ((rgt[0] + (src_offset[0] * rgt[1])), rgt[1], 0.0,
                  (rgt[3] + (src_offset[1] * rgt[5])), 0.0, rgt[5])

    mem_drv = ogr.GetDriverByName('Memory')
    driver = gdal.GetDriverByName('MEM')

    # Loop through vectors
    stats = []
    cols = ['b' + str(i + 1) for i in range(rds.RasterCount)]
    vals = DataFrame(columns=cols)
    feat = vlyr.GetNextFeature()
    while feat is not None:

        if not global_src_extent:
            # use local source extent
            # fastest option when you have fast disks and well indexed raster (ie tiled Geotiff)
            # advantage: each feature uses the smallest raster chunk
            # disadvantage: lots of reads on the source raster
            src_offset = bbox_to_pixel_offsets(rgt,
                                               feat.geometry().GetEnvelope())
            # src_array = rb.ReadAsArray(*src_offset)
            src_array = rds.ReadAsArray(*src_offset)

            # calculate new geotransform of the feature subset
            new_gt = ((rgt[0] + (src_offset[0] * rgt[1])), rgt[1], 0.0,
                      (rgt[3] + (src_offset[1] * rgt[5])), 0.0, rgt[5])

        # Create a temporary vector layer in memory
        mem_ds = mem_drv.CreateDataSource('out')
        mem_layer = mem_ds.CreateLayer('poly', None, ogr.wkbPolygon)
        mem_layer.CreateFeature(feat.Clone())

        # Rasterize it
        rvds = driver.Create('', src_offset[2], src_offset[3], 1,
                             gdal.GDT_Byte)
        rvds.SetGeoTransform(new_gt)
        gdal.RasterizeLayer(rvds, [1], mem_layer, burn_values=[1])
        rv_array = rvds.ReadAsArray()

        # Mask the source data array with our current feature
        # we take the logical_not to flip 0<->1 to get the correct mask effect
        # we also mask out nodata values explictly
        masked = np.ma.MaskedArray(src_array,
                                   mask=np.tile(
                                       np.logical_or(src_array == nodata_value,
                                                     np.logical_not(rv_array)),
                                       (rds.RasterCount, 1, 1)))

        try:
            class_val = feat.GetFieldAsString(class_attr)
        except:
            pass

        #import ipdb; ipdb.set_trace()

        if ret_vals:
            stacked = masked.reshape(rds.RasterCount, masked[0, :, :].size)
            stacked = DataFrame(columns=cols,
                                data=stacked.transpose()).dropna()
            if class_val is not None:
                stacked = stacked.set_index(
                    np.repeat(class_val, stacked.shape[0]))
            vals = vals.append(stacked)

        if ret_stats:
            feature_stats = {
                'min': float(masked.min()),
                'mean': float(masked.mean()),
                'max': float(masked.max()),
                'std': float(masked.std()),
                'sum': float(masked.sum()),
                'count': int(masked.count()),
                'fid': int(feat.GetFID())
            }

            stats.append(feature_stats)

        rvds = None
        mem_ds = None
        feat = vlyr.GetNextFeature()

    vds = None
    rds = None
    return (vals, stats)
示例#41
0
                if not wordtag[0] in stopwords_split and not wordtag[
                        0] in punctuation:
                    if not wordtag[1] in tag_to_int:
                        tag_to_int[wordtag[1]] = tag_number
                        tag_number += 1
                    indx = index + 1
                    term_ferquency.append(tf[wordtag[0]])
                    inverse_doc_frequency.append(idf[wordtag[0]])
                    positions.append(position(indx, title_len, context_len))
                    in_title.append(is_in_l(wordtag[0], title))
                    in_context.append(is_in_l(wordtag[0], context))
                    in_label.append(is_in_l(wordtag[0], tag))
                    pos_tag.append(tag_to_int[wordtag[1]])

            table = [
                term_ferquency, inverse_doc_frequency, positions, in_title,
                in_context, pos_tag, in_label
            ]
            df = DataFrame(table)
            df = df.transpose()
            cols = [
                'term_ferquency', 'inverse_doc_frequency', 'positions',
                'in_title', 'in_context', 'pos_tag', 'in_label'
            ]
            df.columns = cols
            df.to_csv(f, mode='a', index=False, header=False)
        print 'Data builed : {}/6 ; Operation time : {:04.2f} minute'.format(
            i + 1, (time.time() - starttime) / 60)
    f.close()
print 'Training data saved : training_data.csv'
示例#42
0
def read_lmw(admFile, datFile, kwaFile):
    with open(admFile) as f:
        administration = f.readlines()
    with open(datFile) as f:
        data = f.readlines()
    with open(kwaFile) as f:
        data_quality = f.readlines()

    if len(administration) != len(data):
        raise Exception("Input data is not of same length.")

    # LMW interval in minutes
    interval = 10
    val_series = []
    timestamp_series = []
    remoteid_series = []
    quality_series = []
    zom_win = []

    for i in range(len(administration)):
        values = administration[i].split(",")
        # Get the id of the timeserie
        timeseriesId = values[0].strip() +\
            "_" + values[1].strip() + "_" + values[3].strip()
        # Get the time of the first value
        if values[7].find('MET') == -1:
            zom_win = 'summer'

        values[7] = values[7].replace("JAN", "01")
        values[7] = values[7].replace("FEB", "02")
        values[7] = values[7].replace("MRT", "03")
        values[7] = values[7].replace("APR", "04")
        values[7] = values[7].replace("MEI", "05")
        values[7] = values[7].replace("JUN", "06")
        values[7] = values[7].replace("JUL", "07")
        values[7] = values[7].replace("AUG", "08")
        values[7] = values[7].replace("SEP", "09")
        values[7] = values[7].replace("OKT", "10")
        values[7] = values[7].replace("NOV", "11")
        values[7] = values[7].replace("DEC", "12")
        values[7] = values[7].replace("Z03", "")
        values[7] = values[7].replace("MET", "")
        values[7] = values[7].strip()

        if zom_win == 'summer':
            timeFirstValue = datetime.strptime(values[7], "%d-%m-%y %H:%M") -\
                timedelta(0, 0, 0, 0, 120)
        else:
            timeFirstValue = datetime.strptime(values[7], "%d-%m-%y %H:%M") -\
                timedelta(0, 0, 0, 0, 60)
        # Get all the measurements
        measurements = data[i].split(",")
        quality = data_quality[i].split(",")

        if len(measurements) != 7:
            raise Exception("Invalid number of measurements for timeserie.")

        if len(quality) != 7:
            raise Exception("Invalid number of quality flags for timeserie.")

        counter = 0
        for j in range(6):
            value = measurements[j].strip()
            value_flag = int(quality[j])
            if value != "f" and value != "n":
                TimeForValue = timeFirstValue +\
                    timedelta(0, 0, 0, 0, interval * j)
                val_series.append(float(value))
                timestamp_series.append(TimeForValue)
                remoteid_series.append(timeseriesId)
                counter += 1
                if value_flag in [10, 30, 50, 70]:
                    quality_series.append('0')
                elif value_flag in [2, 22, 24, 28, 42, 44, 48, 62, 68]:
                    quality_series.append('3')
                else:
                    quality_series.append('6')

    tsobj = DataFrame([remoteid_series,
                       val_series, quality_series])
    tsobj = tsobj.transpose()
    tsobj.columns = ['SensorID', 'value', 'flag']
    tstamp = DataFrame(timestamp_series, columns=['ts'])
    tsobj_indexed = tsobj.set_index(tstamp['ts'])
    return tsobj_indexed
示例#43
0
def normalize_df(target: DataFrame,
                 normer: DataFrame,
                 ind_sep: Optional[str] = "-",
                 alphas: Optional[Iterable[float]] = None,
                 cv: float = 5,
                 **RidgeCV_kws) -> DataFrame:
    """ Used to normalize a dataset by another dataset, using a linear model with regularization
    chosen through cross validation (aka sklearn's RidgeCV). This is useful for normalizing,
    for example, RNA values by CNA, or phosphopeptide values by protein abundance. If target and
    normer dataframe row IDs (index) match 1:1, pass None for ind_sep.

    Args:
        target: Dataframe of values to normalize. Row IDs (index) before the sep (or whole ID
        if no sep) must match normer IDs. Row IDs must be unique.
        normer: Dataframe of values to use for normalization. Row IDs must match all or
        pre-ind_sep portions of target row IDs. Row IDs must be unique.
        ind_sep: If multiple rows in target map to 1 row in normer, the delimiter used to split
        the unique ID that matches the normer IDs. Defaul "-"
        alphas: Parameters to try for regulariztion. If None, tries powers of 2 from -10 to 10.
        cv: Fold for cross validation. Also the minimum number of non-null values for each
        row. Default 5
        **RidgeCV_kws: kws to pass to sklearn's RidgeCV

    Returns: normed
        The target dataframe normalized by the normer dataframe. Only includes rows with
        sufficient non-null values from both dataframe.

    """

    if not alphas:
        alphas = [2**i for i in range(-10, 10, 1)]

    normer = normer[[col for col in target.columns if col in normer.columns]]
    target = target[normer.columns]
    if (len(normer.columns) < cv) or (len(target.columns) < cv):
        raise KeyError(
            "target and normer dataframes do not have at least %s columns in common"
            % cv)

    target = target.transpose()
    target["col0"] = 0
    target.set_index("col0", append=True, inplace=True)
    target = target.reorder_levels(
        [target.index.names[-1], target.index.names[0]]).transpose()

    normer = normer.transpose()
    normer["col0"] = 1
    normer.set_index("col0", append=True, inplace=True)
    normer = normer.reorder_levels(
        [normer.index.names[-1], normer.index.names[0]]).transpose()

    target["gene"] = [i.split(ind_sep)[0] for i in target.index]
    target = target.loc[target["gene"].isin(normer.index), :]
    if len(target) == 0:
        raise KeyError("No rows in common between target and normer")
    logging.info(
        "Normalizing %s common rows and %s common samples between target and normer"
        % (len(target), len(normer.columns)))
    data = target.merge(normer, how="left", left_on="gene", right_index=True)

    model = lm.RidgeCV(alphas=alphas, cv=cv, **RidgeCV_kws)
    normed = data.apply(
        (lambda row: _convert_to_residuals(row[0], row[1], model)), axis=1)

    return normed
示例#44
0
def ComputeMetrics1(stats, filename):
    """
    DESCRIPTION
    
    :Parameters:
        NAME : TYPE
            DESCRIPTIOIN
        
    :Return:
        DESCRIPTION
    """

    data = {}

    for article in stats:
        metrics = {}
        temp = {}

        title = article['article-title']

        # get metrics from data
        allActions = GetMetric(article, 'total-actions')
        number_tokens = GetMetric(article, 'number-tokens')
        maintainanceTag = GetMetric(article, 'tag-maintained')

        # split metrics between maintainer and others
        addsMaintainer, addsOthers = SplitMO(article, maintainers[index],
                                             'tokens-added')
        deletesMaintainer, deletesOthers = SplitMO(article, maintainers[index],
                                                   'tokens-deleted')
        revertsMaintainer, revertsOthers = SplitMO(article, maintainers[index],
                                                   'tokens-reverted')
        antActionsMaintainer, antActionsOthers = SplitMO(
            article, maintainers[index], 'antagonistic-actions')
        reintroMaintainer, reintroOthers = SplitMO(article, maintainers[index],
                                                   'tokens-reintroduced')
        selfreintroMaintainer, selfreintroOthers = SplitMO(
            article, maintainers[index], 'tokens-self-reintroduced')
        talkpageMaintainer, talkpageOthers = SplitMO(article,
                                                     maintainers[index],
                                                     'talkpage-edits')

        # BLABLA
        ownershipMaintainerAbs = GetOwnership(article, maintainers_id[index],
                                              'tokens-absolute')
        ownershipMaintainerRel = GetOwnership(article, maintainers_id[index],
                                              'tokens-relative')

        # get properties of article
        metrics['firstMaintRev'] = GetFirstMaintainedRev(maintainanceTag)
        metrics['maintainer-name'] = article['maintainer-name']
        metrics['maintainer-id'] = article['maintainer-id']
        metrics['all-actions'] = sum(allActions)
        metrics['edits-maintainer'] = len(addsMaintainer)
        metrics['edits-others'] = len(addsOthers)
        metrics['number-revisions'] = metrics['edits-maintainer'] + metrics[
            'edits-others']

        # temporal comparison
        TempCompare()

        # to relativize with edits is just an assumptions to have something.
        if talkpageOthers:
            #metrics['talkPageRatio'] = sum(talkpageMaintainer) / float(metrics['edits-maintainer']) / float( sum(talkpageOthers) / float(metrics['edits-others']) )
            metrics['talkPageRatio'] = sum(talkpageMaintainer) / float(
                sum(talkpageOthers))
        else:
            metrics['talkPageRatio'] = 0

        # if metrics['all-actions'] is 0:
        #     metrics['addsMaintainerAvg'] = 0
        #     metrics['addsOthersAvg'] = 0
        #     metrics['addsRatio'] = 0

        #     metrics['deletesMaintainerRel'] = 0
        #     metrics['deletesOthersRel'] = 0
        #     metrics['deletesRatio'] = 0

        #     metrics['revertsMaintainerRel'] = 0
        #     metrics['revertsOthersRel'] = 0
        #     metrics['revertsRatio'] = 0

        #     metrics['reintroMaintainerAvg'] = 0
        #     metrics['reintroOthersAvg'] = 0
        #     metrics['selfreintroMaintainerAvg'] = 0
        #     metrics['selfreintroOthersAvg'] = 0
        #     metrics['selfreintroRatio'] = 0

        #     metrics['antActionsMaintainerAvg'] = 0
        #     metrics['antActionsOthersAvg'] = 0
        #     metrics['negActionsRatio'] = 0

        #     metrics['targetedIntroRatio'] = 0
        # metrics['addsMaintainerRel'] = sum(addsMaintainer)/float(metrics['all-actions'])
        # metrics['addsOthersRel'] = sum(addsOthers)/float(metrics['all-actions'])
        # metrics['addsRatio'] = metrics['addsMaintainerRel'] / float(metrics['addsOthersRel'])
        # metrics['deletesMaintainerRel'] = sum(deletesMaintainer)/float(metrics['all-actions'])
        # metrics['deletesOthersRel'] = sum(deletesOthers)/float(metrics['all-actions'])
        # metrics['deletesRatio'] = metrics['deletesMaintainerRel'] / float(metrics['deletesOthersRel'])
        # metrics['revertsMaintainerRel'] = sum(revertsMaintainer)/float(metrics['all-actions'])
        # metrics['revertsOthersRel'] = sum(revertsOthers)/float(metrics['all-actions'])
        # metrics['revertsRatio'] = metrics['revertsMaintainerRel'] / float(metrics['revertsOthersRel'])
        # metrics['reintroMaintainerRel'] = sum(reintroMaintainer)/float(metrics['all-actions'])
        # metrics['reintroOthersRel'] = sum(reintroOthers)/float(metrics['all-actions'])
        # metrics['selfreintroMaintainerRel'] = sum(selfreintroMaintainer)/float(metrics['all-actions'])
        # metrics['selfreintroOthersRel'] = sum(selfreintroOthers)/float(metrics['all-actions'])
        # if metrics['selfreintroOthersAvg'] == 0:
        #     metrics['selfreintroRatio'] = 0
        # else:
        #     metrics['selfreintroRatio'] = metrics['selfreintroMaintainerAvg'] / float(metrics['selfreintroOthersAvg'])
        # if metrics['antActionsOthersAvg'] == 0:
        #     metrics['antActionsRatio'] = 0
        # else:
        #     metrics['antActionsRatio'] = metrics['antActionsMaintainerAvg'] / float(metrics['antActionsOthersAvg'])

        # if metrics['reintroMaintainerAvg'] == 0 or metrics['selfreintroOthersAvg'] == 0 or metrics['reintroOthersAvg'] == 0:
        #     metrics['targetedIntroRatio'] = 0
        #     metrics['targetedIntroRatio2Ownership'] = 0
        # else:
        #     metrics['targetedIntroRatio'] = (metrics['selfreintroMaintainerAvg'] / float(metrics['reintroMaintainerAvg'])) \
        #         / float((metrics['selfreintroOthersAvg'] / float(metrics['reintroOthersAvg'])))
        #     #metrics['targetedIntroRatio2Ownership'] = (metrics['selfreintroMaintainerRel'] / float(metrics['reintroMaintainerRel'])) \
        #     #    / float((metrics['selfreintroOthersRel'] / float(metrics['reintroOthersRel'])))

        metrics['addsMaintainerAvg'] = sum(addsMaintainer) / float(
            metrics['edits-maintainer'])
        metrics['addsOthersAvg'] = sum(addsOthers) / float(
            metrics['edits-others'])
        metrics['addsRatio'] = metrics['addsMaintainerAvg'] / float(
            metrics['addsOthersAvg'])
        metrics['reintroMaintainerAvg'] = sum(reintroMaintainer) / float(
            metrics['edits-maintainer'])
        metrics['reintroOthersAvg'] = sum(reintroOthers) / float(
            metrics['edits-others'])
        metrics['reintroRatio'] = metrics['reintroMaintainerAvg'] / float(
            metrics['reintroOthersAvg'])
        metrics['selfreintroMaintainerAvg'] = sum(
            selfreintroMaintainer) / float(metrics['edits-maintainer'])
        metrics['selfreintroOthersAvg'] = sum(selfreintroOthers) / float(
            metrics['edits-others'])
        metrics[
            'selfreintroRatio'] = metrics['selfreintroMaintainerAvg'] / float(
                metrics['selfreintroOthersAvg'])
        metrics['antActionsMaintainerAvg'] = sum(antActionsMaintainer) / float(
            metrics['edits-maintainer'])
        metrics['antActionsOthersAvg'] = sum(antActionsOthers) / float(
            metrics['edits-others'])

        # metrics['deletesMaintainerAvg'] = sum(deletesMaintainer)/float(metrics['edits-maintainer'])
        # metrics['deletesOthersAvg'] = sum(deletesOthers)/float(metrics['edits-others'])
        # metrics['deletesRatio'] = sum(metrics['deletesMaintainerAvg']) / float(metrics['edits-maintainer']) / float(sum(temp['deletesOthersAvg']) / float(metrics['edits-others']))

        # metrics['revertsMaintainerAvg'] = sum(revertsMaintainer)/float(metrics['edits-maintainer'])
        # metrics['revertsOthersAvg'] = sum(revertsOthers)/float(metrics['edits-others'])
        # metrics['revertsRatio'] = sum(metrics['revertsMaintainerAvg']) / float(metrics['edits-maintainer']) / float(sum(metrics['revertsOthersAvg']) / float(metrics['edits-others']))
        # metrics['revertsMaintainerPot'] = sum(revertsMaintainer)/float(metrics['edits-maintainer'])
        # metrics['revertsOthersPot'] = sum(revertsOthers)/float(metrics['edits-others'])
        # metrics['revertsPotRatio'] = sum(metrics['revertsMaintainerAvg']) / float(metrics['edits-maintainer']) / float(sum(metrics['revertsOthersAvg']) / float(metrics['edits-others']))

        # metrics['reintroMaintainerAvg'] = sum(reintroMaintainer)/float(metrics['edits-maintainer'])
        # metrics['reintroOthersAvg'] = sum(reintroOthers)/float(metrics['edits-others'])
        # metrics['reintroRatio'] = sum(metrics['reintroMaintainerAvg']) / float(metrics['edits-maintainer']) / float(sum(metrics['reintroOthersAvg']) / float(metrics['edits-others']))

        # metrics['selfreintroMaintainerAvg'] = sum(selfreintroMaintainer)/float(metrics['edits-maintainer'])
        # metrics['selfreintroOthersAvg'] = sum(selfreintroOthers)/float(metrics['edits-others'])
        # metrics['selfreintroRatio'] = sum(metrics['selfreintroMaintainerAvg']) / float(metrics['edits-maintainer']) / float(sum(metrics['selfreintroOthersAvg']) / float(metrics['edits-others']))

        # share of selfreintroductions of potential own tokens
        # temp['selfreintroMaintainerPot'] = [(b/float(a)) for a,b in zip(ownershipMaintainerAbs[:len(ownershipMaintainerAbs)-2], selfreintroMaintainer[1:len(selfreintroMaintainer)-1])]
        # temp['selfreintroOthersPot'] = [(b/float(c-a)) for a,b in zip(ownershipMaintainerAbs[:len(ownershipMaintainerAbs)-2], selfreintroOthers[1:len(selfreintroOthers)-1], number_tokens[:len(number_tokens)-2) if a is not 0]
        # metrics['selfreintroPotRatio'] = sum(temp['selfreintroMaintainerPot']) / float(metrics['edits-maintainer']) / float(sum(temp['selfreintroOthersPot']) / float(metrics['edits-others']))

        # temp['antActionsMaintainerPot'] = [(b/float(a)) for a,b in zip(ownershipMaintainerAbs[:len(ownershipMaintainerAbs)-2], antActionsMaintainer[1:len(antActionsMaintainer)-1]) if a is not 0]
        # temp['antActionsOthersPot'] = [(b/float(c-a)) for a,b in zip(ownershipMaintainerAbs[:len(ownershipMaintainerAbs)-2], antActionsOthers[1:len(antActionsOthers)-1], number_tokens[:len(number_tokens)-2]) if a is not 0]
        # metrics['antActionsRatio'] = sum(temp['antActionsMaintainerPot']) / float(metrics['edits-maintainer']) / float(sum(temp['antActionsOthersPot']) / float(metrics['edits-others']))

        data[title] = metrics

    data = DataFrame(data)
    data = data.transpose()
    save2CSV(data, filename)

    return data
示例#45
0
pp.ylabel('incidenten')
pp.tight_layout(pad=3.0)
pp.show()

groups = series['2014':'2019'].groupby(Grouper(freq='A'))
years = DataFrame()
for name, group in groups:
    years[name.year] = group.values
# Box and Whisker Plots
pp.figure(figsize=(6, 4), dpi=100, edgecolor='k')
years.boxplot()
pp.title('Trend')
pp.tight_layout(pad=3.0)
pp.show()

years = years.transpose()
pp.figure(figsize=(6, 4), dpi=100, edgecolor='k')
years.boxplot()
pp.tight_layout(pad=3.0)
pp.xticks([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], [
    'jan', 'feb', 'mrt', 'apr', 'mei', 'jun', 'jul', 'aug', 'sep', 'okt',
    'nov', 'dec'
])
pp.title('Seizoen')
pp.show()

# isoleer het laatste jaar (12 maanden) in een afzonderlijke data/testset

split_point = len(series) - 12
dataset, validation = series[0:split_point], series[split_point:]
print('Dataset %d, Validation %d' % (len(dataset), len(validation)))
示例#46
0
data = {
    'name': ['박대성', '백상훈', '이권수', '이우현'],
    'money': [5000, 2000000, 5000000, 80000000],
    'sex': ['M', 'M', 'M', 'M']
}

print(data)
print('---------dict --> DataFrame 변경---')
df = DataFrame(data)
print(df)
print(type(df))
print('------------------')
df = DataFrame(data, columns=['money', 'sex', 'name'])
print(df)
print(type(df))
print('---------iloc / loc ------')
print(df.iloc[0:2])
print('-------------------------------------------')
print(df.loc[0])

print('---열 추가 -----')
df['level'] = df['money'] > 5000
print(df)

print('---data 추출-----')
print(df[df.name == '박대성'])
print(df[(df.money > 10000) & (df.sex == 'M')])

print('----행 과 열 치환------')
print(df.transpose())
    def test_frame_from_json_to_json(self):
        def _check_orient(df, orient, dtype=None, numpy=False,
                          convert_axes=True, check_dtype=True, raise_ok=None,
                          sort=None, check_index_type=True,
                          check_column_type=True, check_numpy_dtype=False):
            if sort is not None:
                df = df.sort_values(sort)
            else:
                df = df.sort_index()

            # if we are not unique, then check that we are raising ValueError
            # for the appropriate orients
            if not df.index.is_unique and orient in ['index', 'columns']:
                pytest.raises(
                    ValueError, lambda: df.to_json(orient=orient))
                return
            if (not df.columns.is_unique and
                    orient in ['index', 'columns', 'records']):
                pytest.raises(
                    ValueError, lambda: df.to_json(orient=orient))
                return

            dfjson = df.to_json(orient=orient)

            try:
                unser = read_json(dfjson, orient=orient, dtype=dtype,
                                  numpy=numpy, convert_axes=convert_axes)
            except Exception as detail:
                if raise_ok is not None:
                    if isinstance(detail, raise_ok):
                        return
                raise

            if sort is not None and sort in unser.columns:
                unser = unser.sort_values(sort)
            else:
                unser = unser.sort_index()

            if dtype is False:
                check_dtype = False

            if not convert_axes and df.index.dtype.type == np.datetime64:
                unser.index = DatetimeIndex(
                    unser.index.values.astype('i8') * 1e6)
            if orient == "records":
                # index is not captured in this orientation
                tm.assert_almost_equal(df.values, unser.values,
                                       check_dtype=check_numpy_dtype)
                tm.assert_index_equal(df.columns, unser.columns,
                                      exact=check_column_type)
            elif orient == "values":
                # index and cols are not captured in this orientation
                if numpy is True and df.shape == (0, 0):
                    assert unser.shape[0] == 0
                else:
                    tm.assert_almost_equal(df.values, unser.values,
                                           check_dtype=check_numpy_dtype)
            elif orient == "split":
                # index and col labels might not be strings
                unser.index = [str(i) for i in unser.index]
                unser.columns = [str(i) for i in unser.columns]

                if sort is None:
                    unser = unser.sort_index()
                tm.assert_almost_equal(df.values, unser.values,
                                       check_dtype=check_numpy_dtype)
            else:
                if convert_axes:
                    tm.assert_frame_equal(df, unser, check_dtype=check_dtype,
                                          check_index_type=check_index_type,
                                          check_column_type=check_column_type)
                else:
                    tm.assert_frame_equal(df, unser, check_less_precise=False,
                                          check_dtype=check_dtype)

        def _check_all_orients(df, dtype=None, convert_axes=True,
                               raise_ok=None, sort=None, check_index_type=True,
                               check_column_type=True):

            # numpy=False
            if convert_axes:
                _check_orient(df, "columns", dtype=dtype, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "records", dtype=dtype, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "split", dtype=dtype, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "index", dtype=dtype, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "values", dtype=dtype, sort=sort,
                              check_index_type=False, check_column_type=False)

            _check_orient(df, "columns", dtype=dtype,
                          convert_axes=False, sort=sort)
            _check_orient(df, "records", dtype=dtype,
                          convert_axes=False, sort=sort)
            _check_orient(df, "split", dtype=dtype,
                          convert_axes=False, sort=sort)
            _check_orient(df, "index", dtype=dtype,
                          convert_axes=False, sort=sort)
            _check_orient(df, "values", dtype=dtype,
                          convert_axes=False, sort=sort)

            # numpy=True and raise_ok might be not None, so ignore the error
            if convert_axes:
                _check_orient(df, "columns", dtype=dtype, numpy=True,
                              raise_ok=raise_ok, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "records", dtype=dtype, numpy=True,
                              raise_ok=raise_ok, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "split", dtype=dtype, numpy=True,
                              raise_ok=raise_ok, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "index", dtype=dtype, numpy=True,
                              raise_ok=raise_ok, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "values", dtype=dtype, numpy=True,
                              raise_ok=raise_ok, sort=sort,
                              check_index_type=False, check_column_type=False)

            _check_orient(df, "columns", dtype=dtype, numpy=True,
                          convert_axes=False, raise_ok=raise_ok, sort=sort)
            _check_orient(df, "records", dtype=dtype, numpy=True,
                          convert_axes=False, raise_ok=raise_ok, sort=sort)
            _check_orient(df, "split", dtype=dtype, numpy=True,
                          convert_axes=False, raise_ok=raise_ok, sort=sort)
            _check_orient(df, "index", dtype=dtype, numpy=True,
                          convert_axes=False, raise_ok=raise_ok, sort=sort)
            _check_orient(df, "values", dtype=dtype, numpy=True,
                          convert_axes=False, raise_ok=raise_ok, sort=sort)

        # basic
        _check_all_orients(self.frame)
        assert self.frame.to_json() == self.frame.to_json(orient="columns")

        _check_all_orients(self.intframe, dtype=self.intframe.values.dtype)
        _check_all_orients(self.intframe, dtype=False)

        # big one
        # index and columns are strings as all unserialised JSON object keys
        # are assumed to be strings
        biggie = DataFrame(np.zeros((200, 4)),
                           columns=[str(i) for i in range(4)],
                           index=[str(i) for i in range(200)])
        _check_all_orients(biggie, dtype=False, convert_axes=False)

        # dtypes
        _check_all_orients(DataFrame(biggie, dtype=np.float64),
                           dtype=np.float64, convert_axes=False)
        _check_all_orients(DataFrame(biggie, dtype=np.int), dtype=np.int,
                           convert_axes=False)
        _check_all_orients(DataFrame(biggie, dtype='U3'), dtype='U3',
                           convert_axes=False, raise_ok=ValueError)

        # categorical
        _check_all_orients(self.categorical, sort='sort', raise_ok=ValueError)

        # empty
        _check_all_orients(self.empty_frame, check_index_type=False,
                           check_column_type=False)

        # time series data
        _check_all_orients(self.tsframe)

        # mixed data
        index = pd.Index(['a', 'b', 'c', 'd', 'e'])
        data = {'A': [0., 1., 2., 3., 4.],
                'B': [0., 1., 0., 1., 0.],
                'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
                'D': [True, False, True, False, True]}
        df = DataFrame(data=data, index=index)
        _check_orient(df, "split", check_dtype=False)
        _check_orient(df, "records", check_dtype=False)
        _check_orient(df, "values", check_dtype=False)
        _check_orient(df, "columns", check_dtype=False)
        # index oriented is problematic as it is read back in in a transposed
        # state, so the columns are interpreted as having mixed data and
        # given object dtypes.
        # force everything to have object dtype beforehand
        _check_orient(df.transpose().transpose(), "index", dtype=False)
示例#48
0
    def __init__(self, workbench, data_path = "/home/moritz/people/MoreData/genomes/TOBG/", clean = False):
        Database.__init__(self,workbench = workbench, data_path = data_path)

        wb = load_workbook("metadata/Table3_GenomeStats.xlsx")
        t_metadata = DataFrame([l for i,l in enumerate(wb['Sheet1'].values) if i >1], columns=[l for l in wb['Sheet1'].values][1])
        corrected = { u'\xc2Gemmatimonadetes': 'Gemmatimonadetes' ,
        'marinegroup': 'Puniceicoccaceae',
        'Urania1B19': 'Phycisphaerae',
        'Thalassopira' : 'Thalassospira',
        'SM1A02': 'Phycisphaerae',
        'SAR324cluster': 'SAR324 cluster',
        'unclassifiedAlphaproteobacteria': 'Alphaproteobacteria',
        'SAR202-2': 'SAR202 cluster',
        'SAR202-1': 'SAR202 cluster',
        'SAR116cluster' : 'SAR116 cluster',
        'OPB35soil': 'unidentified Verrucomicrobium group OPB35',
        'Pla3': 'Planctomycetes',
        'OM190': 'Planctomycetes',
        'NovelClass_B': 'Ignavibacteriae',
        'Nitropelagicus': 'Candidatus Nitrosopelagicus' ,
        'Nanoarchaoeta': 'Nanoarchaeota',
        'Methylobacterum': 'Methylobacterium',
        'JL-ENTP-F27': 'Phycisphaerae',
        'FS140-16B-02marinegroup': 'Phycisphaerae',
        'Epsilonbacteraeota': 'Bacteria',
        'DEV007': 'Verrucomicrobiales',
        'CandidatusPuniceispirillum': 'Candidatus Puniceispirillum',
        'CandidatePhylaRadiation' : 'Bacteria candidate phyla',
        'CaThioglobus': 'Candidatus Thioglobus',
        'CaAtelocyanobacterium' : 'Candidatus Atelocyanobacterium',
        '0319-6G20': 'Bdellovibrionales',
        'Euryarcheota' : 'Euryarchaeota' ,
        'SBR1093' : 'Bacteria',
        'Euryarcheoata' : 'Euryarchaeota'
        }

        regions = { 'NP' : 'North_Pacific',
        'NAT' : 'North_Atlantic',
        'MED' : 'Mediterranean',
        'ARS' : 'Arabian_Sea',
        'RS'  : 'Red_Sea',
        'IN'  : 'Indian_Ocean',
        'EAC' : 'East_Africa_Coastal',
        'SAT' : 'South_Atlantic',
        'CPC' : 'Chile_Peru_Coastal',
        'SP'  : 'South_Pacific'
        }

        wb2 = load_workbook("metadata/Table4_Phylogeny.xlsx")
        taxos = { l[0] : [v for v in l[:-1] if v != 'null' and not v[0:4] == "nove" ][-1] for l in wb2.get_sheet_by_name('Hug set').values}
        taxos = {k : corrected[v] if corrected.has_key(v) else v for k, v in taxos.items()}

        tax_2_id = self.taxDb.get_name_translator(taxos.values())
        tax_ids = {g : tax_2_id.get(taxos[g])[0]  for g in t_metadata['Genome ID'] if taxos.has_key(g) }
        t_metadata['species_taxid'] = [ tax_ids[g] if tax_ids.has_key(g) else 131567 for g in t_metadata['Genome ID']]
        t_metadata.index = Index(t_metadata['Genome ID'])
        t_metadata['region'] = [regions[g.split("_")[1].split("-")[0]] for g in t_metadata['Genome ID']]
        self.metadata = t_metadata.transpose().to_dict()

        print "Loading genomes"
        if os.path.exists(pjoin(self.data_path , 'TOBGGENOMES.tar.gz')):
            os.system("tar xzvf " + pjoin(self.data_path , 'TOBGGENOMES.tar.gz'))
            os.remove(pjoin(self.data_path , 'TOBGGENOMES.tar.gz'))

        for k,v in tqdm(self.metadata.items()):
            genome_path = pjoin(self.data_path, v['region'], k)
            genome_file = pjoin(genome_path, k + ".fna")
            if not os.path.exists(genome_file):
                os.makedirs(pjoin(genome_path, 'original_files'))
                shutil.move(self.data_path + k + ".fna", pjoin(genome_path, 'original_files'))
            self.genomes += [Genome(k, genome_path, ref=pjoin(genome_path, 'original_files', k + ".fna"), manual_metadata = v, taxDb = self.taxDb, workbench = self.workbench)]
示例#49
0
    def test_frame_from_json_to_json(self):
        def _check_orient(
            df,
            orient,
            dtype=None,
            numpy=False,
            convert_axes=True,
            check_dtype=True,
            raise_ok=None,
            sort=None,
            check_index_type=True,
            check_column_type=True,
        ):
            if sort is not None:
                df = df.sort_values(sort)
            else:
                df = df.sort_index()

            # if we are not unique, then check that we are raising ValueError
            # for the appropriate orients
            if not df.index.is_unique and orient in ["index", "columns"]:
                self.assertRaises(ValueError, lambda: df.to_json(orient=orient))
                return
            if not df.columns.is_unique and orient in ["index", "columns", "records"]:
                self.assertRaises(ValueError, lambda: df.to_json(orient=orient))
                return

            dfjson = df.to_json(orient=orient)

            try:
                unser = read_json(dfjson, orient=orient, dtype=dtype, numpy=numpy, convert_axes=convert_axes)
            except Exception as detail:
                if raise_ok is not None:
                    if isinstance(detail, raise_ok):
                        return
                    raise

            if sort is not None and sort in unser.columns:
                unser = unser.sort_values(sort)
            else:
                unser = unser.sort_index()

            if dtype is False:
                check_dtype = False

            if not convert_axes and df.index.dtype.type == np.datetime64:
                unser.index = DatetimeIndex(unser.index.values.astype("i8") * 1e6)
            if orient == "records":
                # index is not captured in this orientation
                assert_almost_equal(df.values, unser.values)
                self.assertTrue(df.columns.equals(unser.columns))
            elif orient == "values":
                # index and cols are not captured in this orientation
                if numpy is True and df.shape == (0, 0):
                    assert unser.shape[0] == 0
                else:
                    assert_almost_equal(df.values, unser.values)
            elif orient == "split":
                # index and col labels might not be strings
                unser.index = [str(i) for i in unser.index]
                unser.columns = [str(i) for i in unser.columns]

                if sort is None:
                    unser = unser.sort_index()
                assert_almost_equal(df.values, unser.values)
            else:
                if convert_axes:
                    assert_frame_equal(
                        df,
                        unser,
                        check_dtype=check_dtype,
                        check_index_type=check_index_type,
                        check_column_type=check_column_type,
                    )
                else:
                    assert_frame_equal(df, unser, check_less_precise=False, check_dtype=check_dtype)

        def _check_all_orients(
            df, dtype=None, convert_axes=True, raise_ok=None, sort=None, check_index_type=True, check_column_type=True
        ):

            # numpy=False
            if convert_axes:
                _check_orient(df, "columns", dtype=dtype, sort=sort, check_index_type=False, check_column_type=False)
                _check_orient(df, "records", dtype=dtype, sort=sort, check_index_type=False, check_column_type=False)
                _check_orient(df, "split", dtype=dtype, sort=sort, check_index_type=False, check_column_type=False)
                _check_orient(df, "index", dtype=dtype, sort=sort, check_index_type=False, check_column_type=False)
                _check_orient(df, "values", dtype=dtype, sort=sort, check_index_type=False, check_column_type=False)

            _check_orient(df, "columns", dtype=dtype, convert_axes=False, sort=sort)
            _check_orient(df, "records", dtype=dtype, convert_axes=False, sort=sort)
            _check_orient(df, "split", dtype=dtype, convert_axes=False, sort=sort)
            _check_orient(df, "index", dtype=dtype, convert_axes=False, sort=sort)
            _check_orient(df, "values", dtype=dtype, convert_axes=False, sort=sort)

            # numpy=True and raise_ok might be not None, so ignore the error
            if convert_axes:
                _check_orient(
                    df,
                    "columns",
                    dtype=dtype,
                    numpy=True,
                    raise_ok=raise_ok,
                    sort=sort,
                    check_index_type=False,
                    check_column_type=False,
                )
                _check_orient(
                    df,
                    "records",
                    dtype=dtype,
                    numpy=True,
                    raise_ok=raise_ok,
                    sort=sort,
                    check_index_type=False,
                    check_column_type=False,
                )
                _check_orient(
                    df,
                    "split",
                    dtype=dtype,
                    numpy=True,
                    raise_ok=raise_ok,
                    sort=sort,
                    check_index_type=False,
                    check_column_type=False,
                )
                _check_orient(
                    df,
                    "index",
                    dtype=dtype,
                    numpy=True,
                    raise_ok=raise_ok,
                    sort=sort,
                    check_index_type=False,
                    check_column_type=False,
                )
                _check_orient(
                    df,
                    "values",
                    dtype=dtype,
                    numpy=True,
                    raise_ok=raise_ok,
                    sort=sort,
                    check_index_type=False,
                    check_column_type=False,
                )

            _check_orient(df, "columns", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok, sort=sort)
            _check_orient(df, "records", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok, sort=sort)
            _check_orient(df, "split", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok, sort=sort)
            _check_orient(df, "index", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok, sort=sort)
            _check_orient(df, "values", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok, sort=sort)

        # basic
        _check_all_orients(self.frame)
        self.assertEqual(self.frame.to_json(), self.frame.to_json(orient="columns"))

        _check_all_orients(self.intframe, dtype=self.intframe.values.dtype)
        _check_all_orients(self.intframe, dtype=False)

        # big one
        # index and columns are strings as all unserialised JSON object keys
        # are assumed to be strings
        biggie = DataFrame(np.zeros((200, 4)), columns=[str(i) for i in range(4)], index=[str(i) for i in range(200)])
        _check_all_orients(biggie, dtype=False, convert_axes=False)

        # dtypes
        _check_all_orients(DataFrame(biggie, dtype=np.float64), dtype=np.float64, convert_axes=False)
        _check_all_orients(DataFrame(biggie, dtype=np.int), dtype=np.int, convert_axes=False)
        _check_all_orients(DataFrame(biggie, dtype="U3"), dtype="U3", convert_axes=False, raise_ok=ValueError)

        # categorical
        _check_all_orients(self.categorical, sort="sort", raise_ok=ValueError)

        # empty
        _check_all_orients(self.empty_frame, check_index_type=False, check_column_type=False)

        # time series data
        _check_all_orients(self.tsframe)

        # mixed data
        index = pd.Index(["a", "b", "c", "d", "e"])
        data = {
            "A": [0.0, 1.0, 2.0, 3.0, 4.0],
            "B": [0.0, 1.0, 0.0, 1.0, 0.0],
            "C": ["foo1", "foo2", "foo3", "foo4", "foo5"],
            "D": [True, False, True, False, True],
        }
        df = DataFrame(data=data, index=index)
        _check_orient(df, "split", check_dtype=False)
        _check_orient(df, "records", check_dtype=False)
        _check_orient(df, "values", check_dtype=False)
        _check_orient(df, "columns", check_dtype=False)
        # index oriented is problematic as it is read back in in a transposed
        # state, so the columns are interpreted as having mixed data and
        # given object dtypes.
        # force everything to have object dtype beforehand
        _check_orient(df.transpose().transpose(), "index", dtype=False)
示例#50
0
                        label=None)

        # 真の曲線を表示
        linex = np.linspace(0, 1, 101)
        liney = np.sin(2 * np.pi * linex)
        subplot.plot(linex, liney, color='green', linestyle='--')

        # 多項式近似の曲線を表示
        linex = np.linspace(0, 1, 101)
        liney = f(linex)
        label = "E(RMS)=%.2f" % rms_error(train_set, f)
        subplot.plot(linex, liney, color='red', label=label)
        subplot.legend(loc=1)

    # 係数の値を表示
    print("Table of the coefficients")
    print(df_ws.transpose())
    fig.show()

    # トレーニングセットとテストセットでの誤差の変化を表示
    df = DataFrame(columns=['Training set', 'Test set'])
    for m in range(0, 10):  # 多項式の次数
        f, ws = resolve(train_set, m)
        train_error = rms_error(train_set, f)
        test_error = rms_error(test_set, f)
        df = df.append(Series([train_error, test_error],
                              index=['Training set', 'Test set']),
                       ignore_index=True)
    df.plot(title='RMS Error', style=['-', '--'], grid=True, ylim=(0, 0.9))
    plt.show()
示例#51
0
    def test_frame_from_json_to_json(self):
        def _check_orient(df, orient, dtype=None, numpy=False,
                          convert_axes=True, check_dtype=True, raise_ok=None,
                          sort=None, check_index_type=True,
                          check_column_type=True, check_numpy_dtype=False):
            if sort is not None:
                df = df.sort_values(sort)
            else:
                df = df.sort_index()

            # if we are not unique, then check that we are raising ValueError
            # for the appropriate orients
            if not df.index.is_unique and orient in ['index', 'columns']:
                self.assertRaises(
                    ValueError, lambda: df.to_json(orient=orient))
                return
            if (not df.columns.is_unique and
                    orient in ['index', 'columns', 'records']):
                self.assertRaises(
                    ValueError, lambda: df.to_json(orient=orient))
                return

            dfjson = df.to_json(orient=orient)

            try:
                unser = read_json(dfjson, orient=orient, dtype=dtype,
                                  numpy=numpy, convert_axes=convert_axes)
            except Exception as detail:
                if raise_ok is not None:
                    if isinstance(detail, raise_ok):
                        return
                    raise

            if sort is not None and sort in unser.columns:
                unser = unser.sort_values(sort)
            else:
                unser = unser.sort_index()

            if dtype is False:
                check_dtype = False

            if not convert_axes and df.index.dtype.type == np.datetime64:
                unser.index = DatetimeIndex(
                    unser.index.values.astype('i8') * 1e6)
            if orient == "records":
                # index is not captured in this orientation
                assert_almost_equal(df.values, unser.values,
                                    check_dtype=check_numpy_dtype)
                self.assert_index_equal(df.columns, unser.columns,
                                        exact=check_column_type)
            elif orient == "values":
                # index and cols are not captured in this orientation
                if numpy is True and df.shape == (0, 0):
                    assert unser.shape[0] == 0
                else:
                    assert_almost_equal(df.values, unser.values,
                                        check_dtype=check_numpy_dtype)
            elif orient == "split":
                # index and col labels might not be strings
                unser.index = [str(i) for i in unser.index]
                unser.columns = [str(i) for i in unser.columns]

                if sort is None:
                    unser = unser.sort_index()
                assert_almost_equal(df.values, unser.values,
                                    check_dtype=check_numpy_dtype)
            else:
                if convert_axes:
                    assert_frame_equal(df, unser, check_dtype=check_dtype,
                                       check_index_type=check_index_type,
                                       check_column_type=check_column_type)
                else:
                    assert_frame_equal(df, unser, check_less_precise=False,
                                       check_dtype=check_dtype)

        def _check_all_orients(df, dtype=None, convert_axes=True,
                               raise_ok=None, sort=None, check_index_type=True,
                               check_column_type=True):

            # numpy=False
            if convert_axes:
                _check_orient(df, "columns", dtype=dtype, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "records", dtype=dtype, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "split", dtype=dtype, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "index", dtype=dtype, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "values", dtype=dtype, sort=sort,
                              check_index_type=False, check_column_type=False)

            _check_orient(df, "columns", dtype=dtype,
                          convert_axes=False, sort=sort)
            _check_orient(df, "records", dtype=dtype,
                          convert_axes=False, sort=sort)
            _check_orient(df, "split", dtype=dtype,
                          convert_axes=False, sort=sort)
            _check_orient(df, "index", dtype=dtype,
                          convert_axes=False, sort=sort)
            _check_orient(df, "values", dtype=dtype,
                          convert_axes=False, sort=sort)

            # numpy=True and raise_ok might be not None, so ignore the error
            if convert_axes:
                _check_orient(df, "columns", dtype=dtype, numpy=True,
                              raise_ok=raise_ok, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "records", dtype=dtype, numpy=True,
                              raise_ok=raise_ok, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "split", dtype=dtype, numpy=True,
                              raise_ok=raise_ok, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "index", dtype=dtype, numpy=True,
                              raise_ok=raise_ok, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "values", dtype=dtype, numpy=True,
                              raise_ok=raise_ok, sort=sort,
                              check_index_type=False, check_column_type=False)

            _check_orient(df, "columns", dtype=dtype, numpy=True,
                          convert_axes=False, raise_ok=raise_ok, sort=sort)
            _check_orient(df, "records", dtype=dtype, numpy=True,
                          convert_axes=False, raise_ok=raise_ok, sort=sort)
            _check_orient(df, "split", dtype=dtype, numpy=True,
                          convert_axes=False, raise_ok=raise_ok, sort=sort)
            _check_orient(df, "index", dtype=dtype, numpy=True,
                          convert_axes=False, raise_ok=raise_ok, sort=sort)
            _check_orient(df, "values", dtype=dtype, numpy=True,
                          convert_axes=False, raise_ok=raise_ok, sort=sort)

        # basic
        _check_all_orients(self.frame)
        self.assertEqual(self.frame.to_json(),
                         self.frame.to_json(orient="columns"))

        _check_all_orients(self.intframe, dtype=self.intframe.values.dtype)
        _check_all_orients(self.intframe, dtype=False)

        # big one
        # index and columns are strings as all unserialised JSON object keys
        # are assumed to be strings
        biggie = DataFrame(np.zeros((200, 4)),
                           columns=[str(i) for i in range(4)],
                           index=[str(i) for i in range(200)])
        _check_all_orients(biggie, dtype=False, convert_axes=False)

        # dtypes
        _check_all_orients(DataFrame(biggie, dtype=np.float64),
                           dtype=np.float64, convert_axes=False)
        _check_all_orients(DataFrame(biggie, dtype=np.int), dtype=np.int,
                           convert_axes=False)
        _check_all_orients(DataFrame(biggie, dtype='U3'), dtype='U3',
                           convert_axes=False, raise_ok=ValueError)

        # categorical
        _check_all_orients(self.categorical, sort='sort', raise_ok=ValueError)

        # empty
        _check_all_orients(self.empty_frame, check_index_type=False,
                           check_column_type=False)

        # time series data
        _check_all_orients(self.tsframe)

        # mixed data
        index = pd.Index(['a', 'b', 'c', 'd', 'e'])
        data = {'A': [0., 1., 2., 3., 4.],
                'B': [0., 1., 0., 1., 0.],
                'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
                'D': [True, False, True, False, True]}
        df = DataFrame(data=data, index=index)
        _check_orient(df, "split", check_dtype=False)
        _check_orient(df, "records", check_dtype=False)
        _check_orient(df, "values", check_dtype=False)
        _check_orient(df, "columns", check_dtype=False)
        # index oriented is problematic as it is read back in in a transposed
        # state, so the columns are interpreted as having mixed data and
        # given object dtypes.
        # force everything to have object dtype beforehand
        _check_orient(df.transpose().transpose(), "index", dtype=False)
示例#52
0
def fig_graph(
    data: DataFrame,
    title: str,
    percentile_cutoff: float = 0.5,
    percentile_bins: int = 100,
) -> Figure:
    data = data.copy()
    assert 0.0 <= percentile_cutoff <= 1.0
    assert percentile_bins > 0
    assert list(data.index) == list(data.columns)
    assert np.allclose(data.values, data.transpose().values, equal_nan=True)

    diag = data.values.diagonal().copy()
    np.fill_diagonal(data.values, np.nan)
    print(f"Creating figure for dataframe of shape {data.shape}")

    drop_labels = data[
        data.sum().rank(method="first", pct=True) <= percentile_cutoff].index
    print(f"\t{percentile_cutoff=}, dropping {len(drop_labels)} entries")
    data.drop(drop_labels, axis=0, inplace=True)
    data.drop(drop_labels, axis=1, inplace=True)

    G = ig.Graph.Adjacency(
        np.where(
            np.logical_and(np.isfinite(data.values),
                           np.greater(data.values, 0)),
            True,
            False,
        ).tolist(),
        mode=ig.ADJ_UNDIRECTED,
    )

    # clustering = G.community_fastgreedy().as_clustering()
    # G = clustering.graph
    # clusters = clustering.membership

    # layout = G.layout('sphere', dim=3)
    layout = G.layout_auto(dim=3)
    x_n, y_n, z_n = list(zip(*layout))

    min_size, max_size = 5, 10  # in px
    if data.shape[0] > 1:
        sizes = diag
        sizes /= sizes.max()  # in [0, 1]
        sizes = (min_size + (max_size - min_size) * sizes
                 )  # in [min_size, max_size]
    else:
        sizes = [max_size for _ in range(data.shape[0])]

    weights = list(
        data.fillna(0).values[e.source][e.target].item() for e in G.es)
    if len(weights) > 0:
        centralities = np.array(G.betweenness(weights=weights))
        if centralities.min() != centralities.max():
            centralities = (centralities - centralities.min()) / (
                centralities.max() - centralities.min())
    else:
        centralities = []

    node_trace = go.Scatter3d(
        x=x_n,
        y=y_n,
        z=z_n,
        mode="markers+text",
        marker=dict(
            size=sizes,
            sizemode="diameter",
            color=centralities,
            opacity=1,
            colorscale="Jet",
            colorbar=dict(title="vertex betweenness", thickness=5),
        ),
        hovertext=data.columns,
        hoverinfo="text",
        hoverlabel=dict(bgcolor="white"),
        showlegend=False,
    )
    print(f"\tcreated scatter plot with {len(x_n)} nodes")

    percentiles = (data.unstack().rank(method="first",
                                       pct=True).values.reshape(data.shape))
    x_e = {i: [] for i in range(percentile_bins)}
    y_e = {i: [] for i in range(percentile_bins)}
    z_e = {i: [] for i in range(percentile_bins)}

    min_width, max_width = min_size / 2, max_size / 2  # in px
    linspace_width = np.linspace(min_width, max_width, num=percentile_bins)
    w_e = dict(enumerate(linspace_width))

    min_alpha, max_alpha = 0.01, 0.2
    linspace_alpha = np.linspace(min_alpha, max_alpha, num=percentile_bins)
    a_e = dict(enumerate(linspace_alpha))

    quantiles = []
    for e in G.es:
        s = e.source
        t = e.target
        quantile = min(percentile_bins - 1,
                       int(percentile_bins * percentiles[s][t]))
        quantiles.append(quantile)
        x_e[quantile] += [layout[s][0], layout[t][0], None]
        y_e[quantile] += [layout[s][1], layout[t][1], None]
        z_e[quantile] += [layout[s][2], layout[t][2], None]

    edge_traces = [
        go.Scatter3d(
            x=x_e[i],
            y=y_e[i],
            z=z_e[i],
            mode="lines",
            line=dict(width=w_e[i], color=f"rgba(0,0,0,{a_e[i]})"),
            showlegend=False,
            hoverinfo="none",
        ) for i in range(percentile_bins)
    ]
    print(
        f"\tcreated scatter plot with {len(G.es)} edges across {percentile_bins} bins"
    )

    layout = go.Layout(
        title=title,
        scene=dict(
            xaxis=dict(visible=False),
            yaxis=dict(visible=False),
            zaxis=dict(visible=False),
        ),
        hovermode="closest",
    )

    fig = go.Figure(data=edge_traces + [node_trace], layout=layout)
    return fig
#Feature ranking with recursive feature elimination and cross-validated selection of the best number of features

#use linear regression as the model
lin_reg = LinearRegression()

#This is to select 8 variables: can be changed and checked in model for accuracy
mod =  RFECV(lin_reg, step=1, cv=20) #RFE(lin_reg, 4, step=1)

mod_fit = mod.fit(X,y) #to fit

#The feature ranking, such that ranking_[i] corresponds to the ranking position of the i-th feature. Selected (i.e., estimated best) features are assigned rank 1.

rankings=DataFrame(mod_fit.ranking_, index=names) #Make it into data frame
rankings.rename(columns ={0: 'Rank'}, inplace=True)
rankings.transpose()

# Selecting features to be involved intro training and predictions

columns = ['others_cr','trend_macd', 'trend_ichimoku_a', 'trend_ichimoku_conv', 'trend_ichimoku_base', 'close']
df = btc[['others_cr','trend_macd', 'trend_ichimoku_a', 'trend_ichimoku_conv', 'trend_ichimoku_base', 'close']]

train = df.astype(float)

print('Shape of training set == {}.'.format(train.shape)); print();
print(df.tail()); print();
print('Observations: %d' % (len(df)));

fig, ax = plt.subplots(figsize=(10, 6))

values = df.close
def _measure_cos_sim(columns_set: pd.DataFrame, rows_set: pd.DataFrame):
    similarity_matrix = rows_set.dot(columns_set.transpose())
    return similarity_matrix
示例#55
0
class InsertAndTable(QWidget):
    def __init__(
        self,
        Tablename: str,
        FeildForm: dict,
        database: QSqlDatabase,
        InsertQuery: str,
        parent=None,
    ):
        super().__init__(parent=parent)
        self.Tablename = Tablename
        self.database = database
        self.FeildForm = FeildForm
        self.setInsertAndLayout()
        self.InsertQuery = InsertQuery

    def setInsertAndLayout(self):
        layout = QVBoxLayout(self)

        layout1 = QHBoxLayout()
        layout2 = QVBoxLayout()
        button = QPushButton("Input Data", self)
        button2 = QPushButton("Delete")
        button3 = QPushButton("Printer!")

        button.clicked.connect(self.InsertShow)
        button2.clicked.connect(self.DeleteRow)
        button3.clicked.connect(self.Print)
        self.table = Table("projects.db", self.Tablename, self.database, self)

        layout1.addWidget(button)
        layout1.addWidget(button2)
        layout1.addWidget(button3)
        layout2.addWidget(self.table)
        layout.addLayout(layout1)
        layout.addLayout(layout2)

    def InsertShow(self):
        FormButton = FormDialog(self.Tablename, self.FeildForm, self)
        result = FormButton.exec_()
        if result == True:
            if self.InsertQuery != "":
                ExecQuery = self.InsertQuery.format(
                    *FormButton.GetAllFeildResponses())
                result = self.database.exec_(ExecQuery)
                self.table.refresh()

    def DeleteRow(self):
        self.table.model.removeRow(self.table.currentIndex().row())
        self.table.refresh()

    def Print(self):

        Page = QPrintDialog(self)
        dec = Page.exec_()

        ###Code added for creating the excel file
        if dec == 1:
            model = self.table.model
            data = []
            for row in range(model.rowCount()):
                data.append([])
                for column in range(model.columnCount()):
                    index = model.index(row, column)
                    data[row].append(str(model.data(index)))

            # for formatting excels
            dp = dict(enumerate(string.ascii_uppercase, 1))
            xlsFilepath = "./testing1.xlsx"

            ### creating dataframes adding columns to the dataframe
            self.df = DataFrame(data)
            self.attr = list(self.FeildForm.keys())
            self.df.columns = self.attr
            self.no_of_attr = len(self.df.columns)

            # greater than  or equal to 7 no_of_attr
            if self.no_of_attr >= 7:

                self.df = self.df.transpose()

                self.df.to_excel("testing1.xlsx")

                wb = openpyxl.load_workbook("testing1.xlsx")
                sheet = wb.active

                column_len = len(max(self.df.index))
                sheet.column_dimensions[dp[1]].width = column_len + 5

                for x, y in enumerate(self.df.columns):

                    column_len = self.df[x].astype(str).str.len().max()
                    print(dp[x + 2])
                    sheet.column_dimensions[dp[x + 2]].width = column_len + 5

                wb.save(xlsFilepath)
            # for no_of_attributes <7
            else:

                self.df.to_excel("testing1.xlsx")

                wb = openpyxl.load_workbook("testing1.xlsx")
                sheet = wb.active

                for x, y in enumerate(self.df.columns):

                    column_len = len(max(self.df[self.df.columns[x]]))

                    column_attr_len = len(self.df.columns[x])

                    print("cl=", column_len, " attr =", column_attr_len)

                    column_len = (column_len if column_len >= column_attr_len
                                  else column_attr_len)

                    sheet.column_dimensions[dp[x + 2]].width = column_len + 5
                    print(dp[x + 1], column_len + 5)

                sheet.column_dimensions[dp[x + 2]].width = column_len + 5
                print(dp[x + 2], column_len + 5)

                wb.save(xlsFilepath)
示例#56
0
def pseudobulk(adata,
               outpath=None,
               column='celltype0',
               label='celltype0',
               split_condition='donor',
               todrop=[
                   'CELL', 'input.path', 'percent_mito', 'n_counts', 'n_genes',
                   'leiden', 'celltype0', 'celltype1', 'celltype2',
                   'celltype3', 'dblabel'
               ],
               main_condition='CONDITION'):
    """export pseudobulk profiles of cells to .gct files

    This is a function with which any type of labeling (i.e. celltype annotation, louvain 
    clustering, etc.) can be written out to several .gct files as well as a single metadata file. 

    To ensure FAIR compatbility label, and file name should not be changed.

    parameters
    ----------
    adata: `AnnData`
        the AnnData object containing the labeling 
    outpath `str` | default = current working directory
        filepath to the directory in which the results should be outputed, if no directory is 
        specified it outputs the results to the current working directory.
    column: `str` | default = 'celltype0'
        Name of the column in adata.obs that is to be mapped to cell barcodes and written out to file.
    label: `str` | default = 'celltype0'
        label above the column when it is written out to several files
    split_condition: `str` | default = 'experiment'
        the experimental unit, e.g. sample ID
    todrop: `list` 
        Several column headers to be excluded from metadata
    main_condition: `str` | default = 'CONDITION'
        main condition to be outputed in the metadata file
    returns
    -------
    dfmerge: `pd.DataFrame`
        merged dataframe

    """
    if outpath is None:
        outpath = os.getcwd()

    data = adata.obs.get(column)
    if data is None:
        sys.exit('please specify a column name that is present in adata.obs')

    data = adata.obs.get(column).to_frame(name=label)

    data = adata.obs.get(main_condition)
    if data is None:
        sys.exit(
            'please specify a condition name that is present in adata.obs')

    ### check if the outdir exists if not create
    if not os.path.exists(outpath):
        os.makedirs(outpath)

    ### create adata subsets for each column value
    adata.obs[split_condition] = adata.obs[split_condition].astype('str')
    adata.obs[split_condition] = adata.obs[split_condition].astype('category')
    adata.obs[column] = adata.obs[column].astype('category')

    bulks = {}
    myset = list(set(adata.obs[column]))
    for i in myset:
        ii = i.replace(" ", "_")  ## to avoid spaces in cell names
        bulks[ii] = adata[adata.obs[column].isin([i])].copy()
    bulks['all'] = adata.copy()

    ### go through each adata subset and export pseudobulk
    dfbulks = {}
    for x in bulks.keys():
        # sum expression
        auxdata = bulks[x].copy()
        myexp = list(auxdata.obs[split_condition].cat.categories
                     )  ### these are all different levels for experiments
        mysums = zeros((len(auxdata.raw.var.index), len(myexp)))
        for i in range(len(myexp)):
            mysums[:, i] = expm1(
                auxdata[auxdata.obs[split_condition] == myexp[i]].raw.X).sum(
                    axis=0)
        mysums = DataFrame(mysums)
        mysums.index = adata.raw.var.index
        mysums.columns = [x + '.' + y for y in myexp]
        dfbulks[x] = mysums

        mydat = auxdata.raw.var.loc[:, ['SYMBOL', 'ENSEMBL']]
        mydat.rename(columns={'SYMBOL': 'Description'}, inplace=True)
        gct = mydat.merge(dfbulks[x],
                          how='right',
                          left_index=True,
                          right_index=True)
        gct.set_index('ENSEMBL', inplace=True)
        gct.index.names = ['NAME']
        gct.columns = ['Description'] + myexp

        #write out average expression
        gctFile_pseudo = outpath + 'Pseudobulk-' + label + '-' + x + '.gct'
        with open(gctFile_pseudo, "w") as fp:
            fp.write("#1.2" + "\n")
            fp.write(str(gct.shape[0]) + '\t' + str(gct.shape[1] - 1) +
                     '\n')  # "description" already merged in as a column
        fp.close()
        #...and then the matrix
        gct.to_csv(gctFile_pseudo,
                   sep='\t',
                   index=True,
                   index_label='NAME',
                   header=True,
                   mode='a',
                   float_format='%.3f')
        print('Pseudobulk-' + label + '-' + x +
              '.gct exported successfully to file')

    #### Output into single .tsv file
    dfmerge = concat(dfbulks, axis=1)
    dfmerge.columns = dfmerge.columns.droplevel()
    dfmerge.to_csv(outpath + 'Pseudobulk-' + label + '.tsv',
                   sep='\t',
                   index_label=False)

    ### Export one metadata file
    myexp = list(adata.obs[split_condition].cat.categories)
    colindex = range(0, len(adata.obs.columns)
                     )  ### replace if only a subset of metadata should be used
    mysums = []
    for i in range(len(myexp)):
        mysums.append(
            list(adata[adata.obs[split_condition] ==
                       myexp[i]].obs.iloc[:, colindex].iloc[0, :]))
    mysums = DataFrame(mysums).transpose()
    mysums.index = adata[adata.obs[split_condition] ==
                         myexp[i]].obs.iloc[:, colindex].columns
    mysums.columns = myexp
    mysums = mysums.transpose().drop(labels=todrop, axis=1, errors='ignore')
    mysums['ID'] = list(mysums.index)
    colorder = ['ID', main_condition] + (mysums.columns.drop(
        ['ID', main_condition]).tolist())
    mysums.loc[:, colorder].to_csv(outpath + 'Pseudobulk.meta',
                                   sep='\t',
                                   index=False)

    return (dfmerge)
    sys.exit(0)
示例#57
0
文件: check.py 项目: whouses/hardware
def cpu_perf(systems, unique_id, group_number, detail_options,
             rampup_value=0, current_dir=""):
    have_cpu_data = False
    host_cpu_list = search_item(systems, unique_id, "cpu", "(.*)", [],
                                ['product'])
    host_cpu_number = search_item(systems, unique_id, "cpu",
                                  "(.*logical.*)", [], ['number'])
    core_counts = 1
    for host in host_cpu_number:
        for item in host_cpu_number[host]:
            core_counts = item[3]
            break

    cpu_type = ''
    for host in host_cpu_list:
        for item in host_cpu_list[host]:
            cpu_type = item[3]
            break

    modes = ['bogomips', 'loops_per_sec']
    sets = search_item(systems, unique_id, "cpu", "(.*)", [], modes)
    global_perf = dict()
    for mode in sorted(modes):
        results = {}
        for system in sets:
            cpu = []
            series = []
            found_data = False
            for perf in sets[system]:
                if perf[2] == mode:
                    # We shall split individual cpu benchmarking from
                    # the global one
                    if "_" in perf[1]:
                        if not perf[1] in cpu:
                            cpu.append(perf[1])
                        series.append(float(perf[3]))
                        found_data = True
                    elif "loops_per_sec" in mode:
                        global_perf[system] = float(perf[3])
                        found_data = True

            if found_data is True:
                # If no series are populated, it means that a single
                # "All CPU" run was done
                # If so, let's create a single run value
                if not series:
                    series.append(global_perf[system])
                    cpu.append("logical")

                results[system] = Series(series, index=cpu)

        # No need to continue if no CPU data in this benchmark
        if not results:
            continue

        df = DataFrame(results)
        consistent = []
        curious = []
        unstable = []
        details = []
        matched_category = []

        for cpu in df.transpose().columns:
            if have_cpu_data is False:
                print()
                print("Group %d : Checking CPU perf" % group_number)
                have_cpu_data = True
            print_perf(2, 7, df.transpose()[cpu], df, mode, cpu, consistent,
                       curious, unstable, "", rampup_value, current_dir)
            prepare_detail(detail_options, group_number, mode, cpu, details,
                           matched_category)

        print_detail(detail_options, details, df, matched_category)

        print_summary(mode, consistent, "consistent", "", df, cpu_type)
        print_summary(mode, curious, "curious", "", df)
        print_summary(mode, unstable, "unstable", "", df)

        if mode == "loops_per_sec":
            efficiency = {}
            mode_text = 'CPU Effi.'
            consistent = []
            curious = []
            unstable = []
            details = []
            matched_category = []

            for system in sets:
                host_efficiency_full_load = []
                host_perf = (df[system].sum() *
                             (int(core_counts) / df[system].count()))
                host_efficiency_full_load.append(
                    global_perf[system] / host_perf * 100)
                efficiency[system] = Series(host_efficiency_full_load,
                                            index=[mode_text])

            cpu_eff = DataFrame(efficiency)
            print_perf(1, 2, cpu_eff.transpose()[mode_text], cpu_eff, mode,
                       mode_text, consistent, curious, unstable)
            prepare_detail(detail_options, group_number, mode, mode_text,
                           details, matched_category)

            print_detail(detail_options, details, cpu_eff, matched_category)
            print_summary("CPU Efficiency", consistent, "consistent", '%',
                          cpu_eff)
            print_summary("CPU Efficiency", curious, "curious", '%', cpu_eff)
            print_summary("CPU Efficiency", unstable, "unstable", '%', cpu_eff)
    "2016-09-02T00:00:00.000000000":"2016-09-08T00:00:00.000000000"].idxmax()
JuliaPeak_t18 = df_t18.loc[
    "2016-09-18T00:00:00.000000000":"2016-09-25T00:00:00.000000000"].max()
JuliaPeak_t18_time = df_t18.loc[
    "2016-09-18T00:00:00.000000000":"2016-09-25T00:00:00.000000000"].idxmax()
MatthewPeak_t18 = df_t18.loc[
    "2016-10-07T00:00:00.000000000":"2016-10-14T00:00:00.000000000"].max()
MatthewPeak_t18_time = df_t18.loc[
    "2016-10-07T00:00:00.000000000":"2016-10-14T00:00:00.000000000"].idxmax()

peaks_values = DataFrame([
    HerminePeak_t1, JuliaPeak_t1, MatthewPeak_t1, HerminePeak_t9, JuliaPeak_t9,
    MatthewPeak_t9, HerminePeak_t18, JuliaPeak_t18, MatthewPeak_t18
])

peaks_values = peaks_values.transpose()
peaks_values.columns = [
    'HerminePeak_t1', 'JuliaPeak_t1', 'MatthewPeak_t1', 'HerminePeak_t9',
    'JuliaPeak_t9', 'MatthewPeak_t9', 'HerminePeak_t18', 'JuliaPeak_t18',
    'MatthewPeak_t18'
]

peak_times = DataFrame([
    HerminePeak_t1_time, JuliaPeak_t1_time, MatthewPeak_t1_time,
    HerminePeak_t9_time, JuliaPeak_t9_time, MatthewPeak_t9_time,
    HerminePeak_t18_time, JuliaPeak_t18_time, MatthewPeak_t18_time
])

peak_times = peak_times.transpose()
peak_times.columns = [
    'HermineTime_t1', 'JuliaTime_t1', 'MatthewTime_t1', 'HermineTime_t9',
示例#59
0
    def test_frame_from_json_to_json(self):

        def _check_orient(df, orient, dtype=None, numpy=True):
            df = df.sort()
            dfjson = df.to_json(orient=orient)
            unser = DataFrame.from_json(dfjson, orient=orient, dtype=dtype,
                                        numpy=numpy)
            unser = unser.sort()
            if df.index.dtype.type == np.datetime64:
                unser.index = DatetimeIndex(unser.index.values.astype('i8'))
            if orient == "records":
                # index is not captured in this orientation
                assert_almost_equal(df.values, unser.values)
                self.assert_(df.columns.equals(unser.columns))
            elif orient == "values":
                # index and cols are not captured in this orientation
                assert_almost_equal(df.values, unser.values)
            elif orient == "split":
                # index and col labels might not be strings
                unser.index = [str(i) for i in unser.index]
                unser.columns = [str(i) for i in unser.columns]
                unser = unser.sort()
                assert_almost_equal(df.values, unser.values)
            else:
                assert_frame_equal(df, unser)

        def _check_all_orients(df, dtype=None):
            _check_orient(df, "columns", dtype=dtype)
            _check_orient(df, "records", dtype=dtype)
            _check_orient(df, "split", dtype=dtype)
            _check_orient(df, "index", dtype=dtype)
            _check_orient(df, "values", dtype=dtype)

            _check_orient(df, "columns", dtype=dtype, numpy=False)
            _check_orient(df, "records", dtype=dtype, numpy=False)
            _check_orient(df, "split", dtype=dtype, numpy=False)
            _check_orient(df, "index", dtype=dtype, numpy=False)
            _check_orient(df, "values", dtype=dtype, numpy=False)

        # basic
        _check_all_orients(self.frame)
        self.assertEqual(self.frame.to_json(),
                         self.frame.to_json(orient="columns"))

        _check_all_orients(self.intframe, dtype=self.intframe.values.dtype)

        # big one
        # index and columns are strings as all unserialised JSON object keys
        # are assumed to be strings
        biggie = DataFrame(np.zeros((200, 4)),
                           columns=[str(i) for i in range(4)],
                           index=[str(i) for i in range(200)])
        _check_all_orients(biggie)

        # dtypes
        _check_all_orients(DataFrame(biggie, dtype=np.float64),
                           dtype=np.float64)
        _check_all_orients(DataFrame(biggie, dtype=np.int), dtype=np.int)
        _check_all_orients(DataFrame(biggie, dtype='<U3'), dtype='<U3')

        # empty
        _check_all_orients(self.empty_frame)

        # time series data
        _check_all_orients(self.tsframe)

        # mixed data
        index = pd.Index(['a', 'b', 'c', 'd', 'e'])
        data = {
            'A': [0., 1., 2., 3., 4.],
            'B': [0., 1., 0., 1., 0.],
            'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
            'D': [True, False, True, False, True]
        }
        df = DataFrame(data=data, index=index)
        _check_orient(df, "split")
        _check_orient(df, "records")
        _check_orient(df, "values")
        _check_orient(df, "columns")
        # index oriented is problematic as it is read back in in a transposed
        # state, so the columns are interpreted as having mixed data and
        # given object dtypes.
        # force everything to have object dtype beforehand
        _check_orient(df.transpose().transpose(), "index")
示例#60
0
def pfa_coef_counts(coef: pd.DataFrame):
    coef = coef.drop(columns=['factor'])
    coef = coef.transpose()
    coef = coef.reindex(["correct_coef", "incorrect_coef", "intercept"])
    coef.insert(0, column="cor", value=[1., 0., 1.])
    return coef.to_numpy()