Python DF示例，pandas.DF Python示例

示例#1

0

显示文件

文件： distribution.py 项目： sdboard/stellar

    def reward_holders(TOKENS,EXCEPTIONS,MEMOS,POT,PUB=False,MULT=0,FIXED_NUM=0):
        print("rewarding holders...")
        COLLECTABLES = []
        for t in TOKENS:
            COLLECTABLES.append([])
            for x in t.split(":"):
                COLLECTABLES[len(COLLECTABLES)-1].append(x)

        EXCEPT = []
        for e in EXCEPTIONS:
            EXCEPT.append(e)

        MEMO = MEMOS[rand(0,len(MEMOS)-1)]

        holders_totals, percent_stakes = Functions.get_percent_holders(COLLECTABLES,EXCEPT,public=PUB)

        if not (MULT or FIXED_NUM):
            sendpot, receipt = Functions.send_payments(percent_stakes,POT,MEMO,public=PUB)
        elif MULT:
            sendpot, receipt = Functions.send_payments(holders_totals,POT,MEMO,public=PUB,multiplier=MULT)
        else:
            sendpot, receipt = Functions.send_payments(percent_stakes,POT,MEMO,public=PUB,fixed_amount=FIXED_NUM)

        Receipt_String = "\n\t Totals " + DF(holders_totals).to_string()
        Receipt_String += "\n\n\t Totals by % " + DF(percent_stakes).to_string()
        Receipt_String += '\n\n\t Sending ..."' + DF(sendpot).to_string() + receipt

        # log transaction data locally
        receipt_file = "Receipts/" + time.strftime("%Y%m%d%H%M")+ ".txt"
        log = open(receipt_file,'w')
        log.write(Receipt_String)
        log.close()

        return Receipt_String,receipt_file

示例#2

0

显示文件

def get_category_count(name, deal_now, train_data, start_date, end_date):
    count = DF(
        deal_now.groupby(['user_id', name
                          ]).size().reset_index().rename(columns={0: 'times'}))
    count_size = deal_now.groupby([name]).size().shape[0]
    sum_data = 0
    for i in range(0, count_size):
        new_name = 'see_' + name + '_' + str(i)
        temp = pd.merge(train_data, count[count[name] == i],
                        on=['user_id']).rename(columns={'times': new_name})
        train_have = pd.merge(train_data,
                              temp[['user_id', new_name]],
                              on=['user_id'])
        train_have = train_have[['user_id', new_name]]
        not_have_name = list(
            set(train_data['user_id'].values) -
            set(train_have['user_id'].values))
        train_not_have = DF()
        train_not_have['user_id'] = train_data[train_data['user_id'].isin(
            not_have_name)]['user_id']
        train_not_have['see_' + name + '_' + str(i)] = 0
        temp = pd.concat([train_have, train_not_have], axis=0)
        train_data = pd.merge(train_data, temp, on=['user_id'], how='left')
        sum_data += train_data[new_name].values

    for i in range(0, count_size):
        new_name = 'see_' + name + '_' + str(i)
        train_data[new_name +
                   '_ratio'] = train_data[new_name].values / sum_data

    return train_data

示例#3

0

显示文件

    def __init__(self, debug=0):
        self.data_url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'
        x = requests.get(self.data_url).json()
        data = json.loads(x['data'])
        if debug: self.data = data
        a = data['areaTree'][0]['children'][0]['today'].keys()
        b = ['total_' + i for i in data['areaTree'][0]['total'].keys()]
        self.__header = ['city'] + b + list(a)
        self.chinaDayList = DF([i.values() for i in data['chinaDayList']],
                               columns=data['chinaDayList'][0].keys())
        self.chinaDayADD = DF([i.values() for i in data['chinaDayAddList']],
                              columns=data['chinaDayAddList'][0].keys())
        print(
            f'截止 {data["lastUpdateTime"]}; 2019nCoV 已蔓延 {len(data["areaTree"])} 个国家/地区'
        )
        print(
            f'中国累计 {data["chinaTotal"]["confirm"]} 例确诊，自昨日00:00新增{data["chinaAdd"]["confirm"]}'
        )

        # 各个地区数据
        areaTree = data['areaTree']
        self.area_dict = {}  #记录全部地区
        self.total_rec = [self._detail_area(i) for i in areaTree]  #解析全部记录
        self.all_area = set(self.area_dict.keys())  # 所有的地名
        self.global_area = DF(self._country(areaTree),
                              columns=['country'] +
                              self.__header[1:])  # 全球感染情况
        self.china = DF(self._country(areaTree[0]['children']),
                        columns=self.__header)  # 国内感染情况

示例#4

0

显示文件

    def permLabels(self, isSavePerms=1):
        if isBoolLabel == 1:
            L = self.boolLabelsDF
        elif isBoolLabel == 0:
            L = self.contLabelsDF

        self.permedBoolLabelsDF = DF(index=L.index, columns=L.columns)
        for col in L.columns:
            self.permedBoolLabelsDF[col] = np.random.permutation(
                L[col]
            )  #TODO - test what happens if the rand perm is on each label seperatly
        try:
            L = self.contLabelsDF
            self.permedContLabelsDF = DF(index=L.index, columns=L.columns)
            for col in L.columns:
                self.permedContLabelsDF[col] = np.random.permutation(L[col])
        except AttributeError:
            pass

        if 'isSavePerms' not in locals():
            isSavePerms = int(raw_input('save permed Labels? '))
        if isSavePerms:
            print('\nsaving to pickle...')
            pickle.dump(self, open(self.LabelsPath + '.pickle', 'wb'))
            print('saving to csv...')
            self.boolLabelsDF.to_csv(self.LabelsPath + 'DF.csv')
            print('All Labels Data successfully saved to ' + self.LabelsPath)

示例#5

0

显示文件

def main():

    #Here we are cleaning up the previous analysys Data
    clean_up()

    # These are the functions to get the Exchanges tickers and filaing them into there Data Frames for each Exchange
    exchange_data = get_exchange_data(my_exchange_list)
    exchange_data_CB = DF(exchange_data['tickers'][0])
    exhhange_data_KC = DF(exchange_data['tickers'][1])

    # This Cleans the data so that it can be more easily worked with
    clean_KC_data = clean_exchange_data(exhhange_data_KC)
    clean_CB_data = clean_exchange_data(exchange_data_CB)

    # this seperates the information into the trading pairs
    trading_pairs_KC = trading_pair_df(clean_KC_data)
    trading_pairs_CB = trading_pair_df(clean_CB_data)

    #This is where the CSV files will be created for each echanges and send the datat that they have for analys
    records = int(input("How Many Records Would you like> "))
    days = input("How Many Days (ie. 1, 7, 14, 30)> ")

    trading_pairs = [trading_pairs_CB, trading_pairs_KC]
    for x in range(len(trading_pairs)):
        ohlc_to_csv(trading_pairs[x], my_exchange_list[x], records, days)

示例#6

0

显示文件

 def __init__(self, DetailsDF, LabelsPath):
     self.SubjectsDetails = DetailsDF
     self.SubjectsList = list(DetailsDF.index.unique())
     self.boolLabelsDF = DF(index=self.SubjectsList)
     self.contLabelsDF = DF(index=self.SubjectsList)
     self.LabelingDetails = {}
     self.LabelingMethod = None
     self.isSave = int(raw_input('save Labels? '))
     self.LabelsPath = LabelsPath

示例#7

0

显示文件

文件： TrainandPredictData.py 项目： sreehari59/Machine-Learning-Capstone-Project

def PredictSales(pdate=dt.date.today()):
    # get all salesmen details in the system
    #tsm.trainandsavemodel()

    data = pd.read_excel("Salesmandata.xlsx")
    productdata = pd.read_excel("Productdata.xlsx")
    productdata = productdata.values

    print("Weekday :", (calendar.day_name[pdate.weekday()]))
    # getting the day from the date
    day = calendar.day_name[pdate.weekday()]
    year = pdate.year
    month = pdate.month
    weekofYear = pdate.isocalendar()[1]
    print(weekofYear)

    # selecting the salesmen details for the day
    predicteddetailsDF = DF()
    salesmandata = data[data['Day'] == day]
    print(data['Day'])
    print(salesmandata)
    #
    salesmandata = salesmandata.values
    fileName = 'PredictedSales_' + '.xlsx'
    print(fileName)
    wo = pd.ExcelWriter(fileName)
    for salesman in salesmandata:
        # Prints the salesman data
        print("Salesman:" + str(salesman))
        predicteddetailsDF = DF()

        predicteddict = dict({'Salesman': [], 'Product': [], 'Sale': []})
        for product in productdata:
            predicteddict['Salesman'].append(salesman[2])
            predicteddict['Product'].append(product[1])
            # prints the product details
            print(product)

            predictsalelist = [
                product[0], year, month, weekofYear, salesman[1], product[2],
                salesman[0], salesman[3]
            ]
            # predictsale gets the value present inside predictsalelist
            predictsale = np.array([predictsalelist])
            # predictedValue gets the value returned by the function predictsale
            predictedValue = ps.predictsale(predictsale)
            # Prints the value returned by the function predictsale
            print("PredictedValue[0]", predictedValue[0])
            predicteddict['Sale'].append(predictedValue[0])

        predicteddetailsDF = pd.DataFrame(predicteddict)
        print("==================================")
        predicteddetailsDF.to_excel(wo, salesman[2])

    print(predicteddetailsDF)
    wo.save()

示例#8

0

显示文件

文件： features.py 项目： Stacha-dev/black-box

def extract_features(filepath, model='VGG16', write_to=None):
    """ Reads an input image file, or directory containing images, and returns
    resulting extracted features. Use write_to=<some_filepath> to save the
    features somewhere. """

    # print('Extracting features')

    # Get the model
    # print('Acquiring model "{}"'.format(model), end='')
    m = named_model(model)
    # print('\rAcquired model\t\t\t\t\t')

    # Get the image filepaths
    filepath = filepath.replace('\\', '/')
    img_fps = []

    assert os.path.exists(filepath), \
        'Filepath does not exist: "{}"'.format(filepath)

    if os.path.isfile(filepath):
        ext = filepath.lower().rsplit('.', 1)[-1]
        assert ext in IMG_EXTS, \
            'Specified file "{}" is not in recognised image formats'.format(filepath)
        img_fps = img_fps.append(filepath)

    elif os.path.isdir(filepath):
        for fn in os.listdir(filepath):
            ext = fn.rsplit('.', 1)[-1]
            if ext in IMG_EXTS:
                img_fps.append(os.path.join(filepath, fn))

    else:
        raise ValueError(
            'Filepath should be an image, or a directory containing images')

    # And the image filenames
    img_fns = [fp.replace('\\', '/').rsplit('/', 1)[-1] for fp in img_fps]

    # print('Found {} images'.format(len(img_fns)))

    # Run the extraction over each image
    features = []
    for (i, fp) in enumerate(img_fps):
        # print('\rProcessing: {:.2f}%\t\t'.format((i + 1) / len(img_fps) * 100), end='', flush=True)
        features.append(_extract(fp, m))

    # print('\nSuccess')

    # Make into a DataFrame and add an ID column
    features_df = DF(features, dtype=object)
    id_col = DF(img_fns, dtype=str)
    features_df.insert(0, 'ID', id_col)
    features_df.to_csv(write_to, index=False)

    return features_df

示例#9

0

显示文件

文件： methods.py 项目： jacobboy/spring14

def matrix_scatter_pcs(names=None, tks=True, **kwargs):
    import itertools
    from pandas import DataFrame as DF
    import pandas as pd
    to_plot = []
    for key, value in kwargs.iteritems():
        if key != "tks":
            if value.shape[0] > value.shape[1]:
                to_plot.append((key, DF(value.T)))
            else:
                to_plot.append((key, DF(value)))
    numvar = to_plot[0][1].shape[0]
    s = range(numvar)
    if names == None:
        names = ["PC " + str(x + 1) for x in s]
    fgr, axs = plt.subplots(numvar, numvar)
    # colors = cm.rainbow(np.linspace(0, 1, numvar))
    colors = ["blue", "red", "green"]
    for xidx, yidx in itertools.product(s, repeat=2):
        ax = axs[yidx, xidx]
        # Hide all ticks and labels
        ax.xaxis.set_visible(False)
        ax.yaxis.set_visible(False)
        # Set up ticks only on one side for the "edge" subplots...
        if xidx == 0:
            ax.set_ylabel(names[yidx])
            ax.yaxis.set_ticks([])
            ax.yaxis.set_visible(True)
        if xidx == max(s):
            if tks:
                ax.yaxis.set_ticks_position('right')
                ax.yaxis.set_visible(True)
        if yidx == 0:
            ax.set_xlabel(names[xidx])
            ax.xaxis.set_label_position("top")
            ax.xaxis.set_visible(True)
            ax.xaxis.set_ticks([])
        if yidx == max(s):
            if tks:
                ax.xaxis.set_ticks_position('bottom')
                ax.xaxis.set_visible(True)
        xs = []
        ys = []
        i = 0
        for name, plot in to_plot:
            x, y = plot.iloc[xidx, :], plot.iloc[yidx, :]
            xs.append(x)
            ys.append(y)
            ax.scatter(x, y, s=3, color=colors[i], label=name)
            i += 1
        allxs = pd.concat(xs)
        allys = pd.concat(ys)
        ax.set_xlim([min(allxs), max(allxs)])
        ax.set_ylim([min(allys), max(allys)])
    return fgr, axs

示例#10

0

显示文件

文件： service.py 项目： ysig/stream_graph

def _read_tns_json(data, discrete, instantaneous, merge):
    instantaneous = ('tf' not in data
                     if instantaneous is None else instantaneous)
    if instantaneous:
        return ITemporalNodeSetDF(DF(data),
                                  discrete=discrete,
                                  no_duplicates=merge)
    else:
        return TemporalNodeSetDF(DF(data),
                                 discrete=discrete,
                                 disjoint_intervals=merge)

示例#11

0

显示文件

def basis_corr(frame, algo='SparCC', **kwargs):
    '''
    Compute correlations between all columns of a counts frame.
    This is a wrapper around pysurvey.analysis.basis_correlations.main
        
    Parameters
    ----------
    counts : array_like
        2D array of counts. Columns are components, rows are samples. 
    method : str {SparCC (default)| clr| pearson| spearman| kendall}
        The algorithm to use for computing correlation.

    Returns
    -------
    cor_med: frame
        Estimated correlation matrix.
        Labels are column labels of input frame.
    cov_med: frame/None
        If method in {SparCC, clr} : Estimated covariance matrix.
        Labels are column labels of input frame. 
        Otherwise: None.
              
    =======   ============ =======   ================================================
    kwarg     Accepts      Default   Desctiption
    =======   ============ =======   ================================================
    iter      int          20        number of estimation iteration to average over.
    oprint    bool         True      print iteration progress?
    th        0<th<1       0.1       exclusion threshold for SparCC.
    xiter     int          10        number of exclusion iterations for sparcc.
    norm      str          dirichlet method used to normalize the counts to fractions.
    log       bool         True      log-transform fraction? used if method ~= SparCC/CLR
    =======   ============ ========= ================================================
    '''
    import SparCC
    comps  = frame.columns
    cor_med, cov_med, pval = SparCC.main(frame, algo=algo, **kwargs)
    #print cor_med.shape
    cor = DF(cor_med, index=comps, columns=comps)
    if cov_med is None:
        cov = None
    else:
        cov  = DF(cov_med, index=comps, columns=comps)
    #print "***********************************************************"
    #print "BASIS_CORR FUNCTION"
    #print "COR"
    #print cor
    #print "COV"
    #print cov
    #print "PVAL"
    #print pval
    #print "***********************************************************"
    return cor, cov, pval

示例#12

0

显示文件

文件： EDA.py 项目： shenfuli/CCF-2018-TeleCOM

def column_types_table(data):
    print('Number of each type of columns:')
    count_dtype = DF(data.dtypes.value_counts()).reset_index()
    count_dtype.columns = ['name', 'total']
    print(count_dtype)

    print('\nNumber of unique classes in each columns:')
    for i in count_dtype['name'].values:
        print('Type: ', i)
        print(
            DF(data.select_dtypes(i).apply(
                pd.Series.nunique, axis=0)).sort_values(
                    by=[0], ascending=False).rename(columns={0: 'NUNIQUE'}))

示例#13

0

显示文件

文件： EDA.py 项目： shenfuli/CCF-2018-TeleCOM

def label_analysis(data, label_name=None, feature_name=[]):
    print('LABEL CATEGORY Analysis')
    count_label = DF(data[label_name].value_counts()).reset_index()
    count_label.columns = ['cate', 'total']
    print(count_label)
    try:
        data[label_name].astype(int).plot.hist()
        plt.show()
    except:
        data[label_name].fillna(-1).astype(int).plot.hist()
        plt.show()

# Describe 01
    if len(feature_name) == 0:
        feature_name = [i for i in data.columns if i not in [
            label_name,
        ]]
    print('Want To Watch: ', len(feature_name))
    print(feature_name)
    print('Describe in each columns: ')
    for i in count_label['cate'].values:
        print('Cate: ', i)
        print(data[data[label_name].astype(int) == i][feature_name].describe())

    print('CALC CORR')
    correlations = data.corr()[label_name].sort_values()
    print('Most Positive Correlations:\n', correlations.tail(15))
    print('\nMost Negative Correlations:\n', correlations.head(15))

示例#14

0

显示文件

 def saveNoticeToCSV(self):
     result = DF(self.data)
     result.columns = ['title', 'url']
     file = '학교_학과_공지사항.csv'
     if path.isfile(file):
         remove(file)
     result.to_csv(file, encoding='cp949')

示例#15

0

显示文件

def get_pic(model, feature_name):
    ans = DF()
    ans['name'] = feature_name
    ans['score'] = model.feature_importances_
    #     print(ans[ans['score']>0].shape)
    return ans.sort_values(by=['score'],
                           ascending=False).reset_index(drop=True)

示例#16

0

显示文件

文件： InfoCollector.py 项目： dariyush/Python-multi-process-web-scraping

def CollectCompanies(br):
    companyList = []
    for i in tqdm(range(1, 10)):
        url = f"http://www.annualreports.com/Companies?exch={i}"
        log.info(url)

        br.get(url)

        try:
            tbody = WebDriverWait(br, 30).until(
                EC.presence_of_element_located((By.XPATH, "//table/tbody")))
        except Exception as e:
            log.info(f"{url} ==> {e}")

        tbody.get_attribute('innerHTML')
        for tr in tbody.find_elements_by_xpath(".//tr"):
            row = {}
            td = tr.find_elements_by_xpath(".//td")
            row['CompanyNameAr'] = td[0].text
            row['UrlAr'] = td[0].find_element_by_xpath(".//a").get_attribute(
                'href')
            companyList.append(row)

        if debug:
            break

    return DF(companyList)

示例#17

0

显示文件

文件： select_feature.py 项目： zlz1313/-i-want-want-want

def get_division_feature(data, feature_name):
    new_feature = []
    new_feature_name = []
    for i in range(len(data[feature_name].columns) - 1):
        for j in range(i + 1, len(data[feature_name].columns)):
            new_feature_name.append(data[feature_name].columns[i] + '/' +
                                    data[feature_name].columns[j])
            new_feature_name.append(data[feature_name].columns[i] + '*' +
                                    data[feature_name].columns[j])
            new_feature_name.append(data[feature_name].columns[i] + '+' +
                                    data[feature_name].columns[j])
            new_feature_name.append(data[feature_name].columns[i] + '-' +
                                    data[feature_name].columns[j])
            new_feature.append(data[data[feature_name].columns[i]] /
                               data[data[feature_name].columns[j]])
            new_feature.append(data[data[feature_name].columns[i]] *
                               data[data[feature_name].columns[j]])
            new_feature.append(data[data[feature_name].columns[i]] +
                               data[data[feature_name].columns[j]])
            new_feature.append(data[data[feature_name].columns[i]] -
                               data[data[feature_name].columns[j]])

    temp_data = DF(pd.concat(new_feature, axis=1))
    temp_data.columns = new_feature_name
    data = pd.concat([data, temp_data], axis=1).reset_index(drop=True)

    print(data.shape)

    return data.reset_index(drop=True)

示例#18

0

显示文件

文件： base_feature.py 项目： liqinsg/Photovoltaic_Rank37_solution

def get_division_feature(data ,feature_name):
    # 创造出特征之间 进行四则变换的特征， 每两两特征之间进行变化，  并且进行变换之后把变化后的 新特证名记录下来，（知道了吧。不想之前直接一起，
    # 特征名都完全丢失了
    new_feature = []
    new_feature_name = []
    for i in range(len(data[feature_name].columns ) -1):
        for j in range( i +1 ,len(data[feature_name].columns)):
            # 保存新创建的特征值和特征名
            new_feature_name.append(data[feature_name].columns[i] + '/' + data[feature_name].columns[j])
            new_feature_name.append(data[feature_name].columns[i] + '*' + data[feature_name].columns[j])
            new_feature_name.append(data[feature_name].columns[i] + '+' + data[feature_name].columns[j])
            new_feature_name.append(data[feature_name].columns[i] + '-' + data[feature_name].columns[j])
            new_feature.append(data[data[feature_name].columns[i] ] /data[data[feature_name].columns[j]])
            new_feature.append(data[data[feature_name].columns[i] ] *data[data[feature_name].columns[j]])
            new_feature.append(data[data[feature_name].columns[i] ] +data[data[feature_name].columns[j]])
            new_feature.append(data[data[feature_name].columns[i] ] -data[data[feature_name].columns[j]])


    temp_data = DF(pd.concat(new_feature ,axis=1))
    temp_data.columns = new_feature_name
    data = pd.concat([data ,temp_data] ,axis=1).reset_index(drop=True)

    print(data.shape)

    return data.reset_index(drop=True)

示例#19

0

显示文件

def read_results_text_file(results_text_file, labels, instrument_type,
                           **kwargs):
    labels = pd.read_csv(labels, index_col=0, header=0)
    ret_DF = DF()
    if instrument_type == "BioTek1":
        data_split, Time_points = read_results_biotek1(results_text_file)
    elif instrument_type == "BioTek2":
        data_split, Time_points = read_results_biotek2(results_text_file)
    elif instrument_type == "BioAnalyzer":
        data_split, Time_points = read_results_bioanalyzer(results_text_file)
    for n, zz in enumerate(data_split):
        if zz[0] in labels.index:
            ret_DF.at[n, "Well"] = zz[0]
            ret_DF.at[n, "Drug"] = labels.loc[zz[0]].Drug
            ret_DF.at[n, "Concentration"] = labels.loc[zz[0]].Concentration
            ret_DF.at[n, "Incubation_time"] = labels.loc[zz[0]].Incubation_time
            ret_DF.at[n, "Sample_type"] = labels.loc[zz[0]].Type
            ret_DF.at[n, "Experiment"] = labels.loc[zz[0]].Experiment
            ret_DF.at[n, "Time_points"] = ";".join(Time_points)
            ret_DF.at[n, "OD"] = ";".join(blank_zero_hour(zz[1:]))
        else:
            print "%s well is not labelled correctly and will be skipped" % zz[
                0]
    if "seg" in kwargs.keys():
        if kwargs["seg"] is True:
            ret_DF = ret_DF.loc[ret_DF["Experiment"].isin([kwargs["exp"]])]
        else:
            pass
    else:
        pass
    return ret_DF

示例#20

0

显示文件

def permute_w_replacement(frame, axis=0):
    '''
    Permute the frame values across the given axis.
    Create simulated dataset were the counts of each component (column)
    in each sample (row), are randomly sampled from the all the 
    counts of that component in all samples.
    
    Parameters
    ----------
    frame : DataFrame
        Frame to permute.
    axis : {0, 1}
        - 0 - Permute row values across columns
        - 1 - Permute column values across rows    
    
    Returns
    -------
    Permuted DataFrame (new instance).
    '''
    from numpy.random import randint 
    axis = 1-_get_axis(axis)
    s = frame.shape[axis]
    fun = lambda x: x.values[randint(0,s,(1,s))][0]
    #print "FRAME: ", frame
    #print "TYPE OF FRAME: ", type(frame)
    perm = DF(frame.apply(fun, axis=axis, result_type='broadcast'))
    #print "PERM: ", perm
    #print "TYPE OF PERM: ", type(perm)
    return perm

示例#21

0

显示文件

def generate(path) -> DF:
    files = os.listdir(path)

    dfs = []
    for f in files:
        if re.findall(r'\.json$', f):
            tab_col_name = filename_to_colname(f)
            with open(f, 'r') as fp:
                data = json.load(fp)
                metric = [round(d[2], 4) for d in data]
                step = [d[1] for d in data]
                df = DF(data=metric, index=step, columns=[tab_col_name])
                dfs.append(df)

    first = DF(dfs[0])
    newdf = first.join(dfs[1:], how='left')
    return newdf

示例#22

0

显示文件

def get_label(start_date, end_date):
    merge_name = ['user_id', 'day']
    all_log = pd.concat(
        [action_log[merge_name], app_log[merge_name], video_log[merge_name]],
        axis=0)
    train_label = get_transform(all_log, start_date, end_date)
    train_1 = DF(list(set(
        train_label['user_id']))).rename(columns={0: 'user_id'})
    train_1['label'] = 1
    reg_temp = get_transform(register_log, 1, start_date - 1)
    train_1 = train_1[train_1['user_id'].isin(reg_temp['user_id'])]
    train_0 = DF(list(set(reg_temp['user_id']) -
                      set(train_1['user_id']))).rename(columns={0: 'user_id'})
    train_0['label'] = 0
    del train_label
    gc.collect()
    return pd.concat([train_1, train_0], axis=0)

示例#23

0

显示文件

 def df(self):
     """
     convert sampler.chain into pandas.DataFrame for convenience.
     """
     _df = DF(self.sampler.flatchain)
     _df = _df.rename(columns={i: key for i, key in enumerate(self.keys)})
     _df["lnpost"] = self.sampler.flatlnprobability
     return _df

示例#24

0

显示文件

文件： mag_diff_f6_f8.py 项目： bpPrg/TIL

def write_mag_diff():
    print('Writing: ', OFILE)
    # Create data frames
    # Note: 7*43 = 301
    lst1, lst2 = np.arange(301), mag_diff_f6_f8(f606, f814,301)
    arr1, arr2 = np.array_split(lst1, NCOLS), np.array_split(lst2, NCOLS)
    df1, df2  = DF(arr1).T, DF(arr2).T

    # Assign column names
    clm = [ 'Galaxy_%d'%i for i in range(NCOLS)]
    df1.columns, df2.columns = clm, clm

    # Combine respective columns and create new df.
    for i in range(7):
        df1.insert(i*2+1,'Diff_%d'%i,df2['Galaxy_%d'%i])

    # Print and write df
    df1.columns = [ 'Galaxy', 'Diff']*7
    df1.to_csv(OFILE,float_format='%.3f',sep='\t',index=None)

示例#25

0

显示文件

文件： polodata.py 项目： zandikaulis/ongoing-projects

    def update(self):
        name = self.name
        period = self.period
        start = self.start
        data = DF(self.api.returnChartData(name, period, start, time()))

        out = {}
        for col in data.keys():
            out[col] = np.array(data[col]).astype('double')

        return out

示例#26

0

显示文件

文件： app.py 项目： Data-is-life/Steps_Analysis

 def parse_soup(self):
     self.df = DF()
     '''Insert the values in the original dataframe. Convert time to
        numerical format to make calculations easier.'''
     self.df.loc[:, 'start_date'] = [d['startDate'] for d in self.soup_obj]
     self.df.loc[:, 'end_date'] = [d['endDate'] for d in self.soup_obj]
     self.df.loc[:, self.measure_unit] = [d['value'] for d in self.soup_obj]
     self.df.loc[:, 'source'] = [d['sourceName'] for d in self.soup_obj]
     self.df.loc[:, self.measure_unit] = self.df[self.measure_unit].astype(
         float)
     return self.df

示例#27

0

显示文件

 def _detail_area(self, area):
     city_row = self._city_row(area)
     name = city_row[0][0]
     # 如果地区还能细分
     sp = 'children' in area.keys()
     if sp:
         for city in area['children']:
             city_row = city_row + self._detail_area(city)
     df = DF(city_row, columns=self.__header)
     self.area_dict[name] = df
     return (city_row)

示例#28

0

显示文件

    def to_dataframe(self, **kwargs):
        """
        Convert the system into a dataframe, from the
        `py:meth:~System.serialize` method.

        Args:
            **kwargs: arguments to be passed to
                `py:meth:System.serialize` method
        """
        from pandas import DataFrame as DF
        df = DF(self.serialize(**kwargs))
        validate_dataframe_representation(self, df)
        return df

示例#29

0

显示文件

 def _dict2DF(self, d, noneval, dropna=False):
     df = DF(noneval, index=self.row_labels, columns=self.col_labels, dtype=object)
     for k, res in d.items():
         i, j = self._positions[k]
         df[j][i] = res
     try:
         df = df.astype(float)
     except:
         pass
     if dropna:
         return df.dropna(axis=0, how='all').dropna(axis=1, how='all')
     else:
         return df

示例#30

0

显示文件

def plot_cm(cm,
            title='Confusion Matrix',
            display_labels=None,
            output_file=None):
    cmap = 'PuRd'
    cm = np.array(cm)
    cmpp.pretty_plot_confusion_matrix(DF(cm,
                                         index=display_labels,
                                         columns=display_labels),
                                      cmap=cmap,
                                      title=title,
                                      output_file=results_path + 'graphs/' +
                                      output_file)