Exemplo n.º 1
0
def print_full(x):
    """Print the entire Dataframe / Series."""
    pd.set_option('display.max_columns', None)
    pd.set_option('display.max_rows', None)
    print(x)
    pd.reset_option('display.max_columns')
    pd.reset_option('display.max_rows')
Exemplo n.º 2
0
def print_full(df): 
    '''
    print all rows of pd.DataFrame
    '''
    pd.set_option('display.max_rows', len(df))
    print(df)
    pd.reset_option('display.max_rows')
def print_full(x):
    '''
    Helper function to plot the *full* dataframe.
    '''
    pd.set_option('display.max_rows', len(x))
    print(x)
    pd.reset_option('display.max_rows')
Exemplo n.º 4
0
def dataset_bucket_analysis_by_field(field):
    # Set the dataset
    hotel_dataset_properties = {Constants.BUSINESS_TYPE_FIELD: 'fourcity_hotel'}
    Constants.update_properties(hotel_dataset_properties)

    records = ETLUtils.load_json_file(Constants.PROCESSED_RECORDS_FILE)

    print('Loaded %d records' % len(records))

    user_frequency_map = {}

    for record in records:

        user_id = record[field]
        if user_id not in user_frequency_map:
            user_frequency_map[user_id] = 0
        user_frequency_map[user_id] += 1

    print('There is a total of %d %ss' % (len(user_frequency_map), field))
    sorted_x = sorted(user_frequency_map.items(), key=operator.itemgetter(1), reverse=True)
    print(sorted_x[0])
    print(sorted_x[1])
    print(sorted_x[2])
    # print(user_frequency_map)

    # Number of reviews per user
    rda = ReviewsDatasetAnalyzer(records)
    users_summary = rda.summarize_reviews_by_field(field)
    print('Average number of reviews per %s: %f' % (field,
          float(rda.num_reviews) / rda.num_users))
    users_summary.plot(kind='line', rot=0)

    pandas.set_option('display.max_rows', len(users_summary))
    print(users_summary)
    pandas.reset_option('display.max_rows')
def genes_from_features_index( column_wildcard ):
    corr_type = 'whole-gene from features'
    usage = 'Example Usage: top_genes_from_features/T1T3?n=10&min_exprs=100&sort_col=1'
    result_elements = list() 

    (n, expression_threshold, sort_col, thr) = get_top_list_args( request.args )  
    some_results = gene_from_features_results.filter(regex=column_wildcard)[gene_from_features_results['mean']>expression_threshold]

    some_results = some_results.ix[some_results.filter(regex='_df$').min(axis=1)>1]

    if not n:
        n = some_results.shape[0]

    selected_column = some_results.columns[np.abs(sort_col)]
    some_results = some_results.sort(selected_column, ascending=n>0)
    n = abs(n)
    if thr:
        n = min(n, sum(some_results[selected_column]<thr) )

    some_results = some_results.head( n=n ) 
    some_results.columns = [c.replace('_', ' ') for c in some_results.columns] 
    selected_column = some_results.columns[sort_col]

    gene_set_for_search = '['+''.join( ["{'gene':'"+c+"'}," for c in some_results.index])+']'

    scatterize_all_data = ';'.join(some_results.index)
    scatterize_all_link = '<a href="../scatterize_list/genes?list='+scatterize_all_data+'">Scatterize these genes.</a>'
    scatterize_link_notes = 'This will link to a Scatterize page with all these genes along with various behavioral and physiological measures of interst, including AT.'

    enrichr_all_data = ';'.join(some_results.index)
    enrichr_all_label = selected_column.replace(' ','_')+'_top_'+str(n)+'_genes_over_'+str(expression_threshold)+'_reads'
    enrichr_all_link = '<a href="../enrichr_list/'+enrichr_all_data+'?analysis_name='+enrichr_all_label+'">Enrichr these genes.</a>'
    enrichr_link_notes = 'This will link to Enrichr for gene enrichement analyses of the genes listed on this page.' 

    export_all_data = ';'.join(some_results.index)
    export_all_label = selected_column.replace(' ','_')+'_top_'+str(n)+'_genes_over_'+str(expression_threshold)+'_reads'
    export_all_link = '<a href="../export_list/'+export_all_label+'.txt?list='+export_all_data+'">Export this list</a>' 
    export_link_notes = 'This will return a .txt with the genes on this page.' 

    some_results['Gene Name'] = ["<a href=\"/results/"+c+"\">"+c+"</a>" for c in some_results.index ]

    cols = some_results.columns.values
    cols = list(cols[-1:]) + list(cols[:-1])
    some_results = some_results[cols]

    pd.set_option('display.max_colwidth', -1)
    gene_list = some_results.to_html( classes='table table-striped', escape=False, index=False)
    pd.reset_option('display.max_colwidth')


    gene_list_notes = 'The "Gene Name" links to more info on the gene.'
    result_elements.append( {'title': 'Top Gene List ('+column_wildcard+')', 'notes': gene_list_notes, 'content': Markup(gene_list) } )
    result_elements.append( {'title': 'Scatterize feature list', 'notes': scatterize_link_notes, 'content': Markup(scatterize_all_link) } )
    result_elements.append( {'title': 'Enrichr feature list', 'notes': enrichr_link_notes, 'content': Markup(enrichr_all_link) } )
    result_elements.append( {'title': 'Export feature list', 'notes': export_link_notes, 'content': Markup(export_all_link) } )


    this_title = selected_column
    return render_template('top_list.html', **locals())
def print_flux_bounds(model):
    """ Prints flux bounds for all reactions. """
    info = []
    for r in model.reactions:
        info.append([r.id, r.lower_bound, r.upper_bound])
    df = DataFrame(info, columns=['id', 'lb', 'ub'])
    pd.set_option('display.max_rows', len(df))
    print(df)
    pd.reset_option('display.max_rows')
Exemplo n.º 7
0
 def __str__(self):
     """
     Currently displays Notations as a very long pandas.Series
     """
     import pandas as pd
     pd.options.display.max_rows=9999
     string = pd.Series(self.__dict__).__str__()
     pd.reset_option('max_rows')
     return string
Exemplo n.º 8
0
def pd_display_size_set(max_columns=None, max_rows=None):
    if max_columns:
        pandas.set_option('display.max_columns', max_columns)
    else:
        pandas.reset_option('display.max_columns')
    if max_rows:
        pandas.set_option('display.max_rows', max_rows)
    else:
        pandas.reset_option('display.max_rows')
Exemplo n.º 9
0
    def test_repr_chop_threshold(self):
        df = DataFrame([[0.1, 0.5],[0.5, -0.1]])
        pd.reset_option("display.chop_threshold") # default None
        self.assertEqual(repr(df), '     0    1\n0  0.1  0.5\n1  0.5 -0.1')

        with option_context("display.chop_threshold", 0.2 ):
            self.assertEqual(repr(df), '     0    1\n0  0.0  0.5\n1  0.5  0.0')

        with option_context("display.chop_threshold", 0.6 ):
            self.assertEqual(repr(df), '   0  1\n0  0  0\n1  0  0')

        with option_context("display.chop_threshold", None ):
            self.assertEqual(repr(df),  '     0    1\n0  0.1  0.5\n1  0.5 -0.1')
Exemplo n.º 10
0
    def test_isnull_for_inf_deprecated(self):
        # gh-17115
        s = Series(['a', np.inf, np.nan, 1.0])
        with tm.assert_produces_warning(DeprecationWarning,
                                        check_stacklevel=False):
            pd.set_option('mode.use_inf_as_null', True)
            r = s.isna()
            dr = s.dropna()
            pd.reset_option('mode.use_inf_as_null')

        e = Series([False, True, True, False])
        de = Series(['a', 1.0], index=[0, 3])
        tm.assert_series_equal(r, e)
        tm.assert_series_equal(dr, de)
Exemplo n.º 11
0
def print_fullPandas(x):
    """print_fullPandas is a helper function for printing a full dataframe

    :param x: the pandas dataframe to be printed
    :type x: a pandas dataframe
    :returns: no return, simply prints the full dataframe and then resets the
        default pandas print values.
    
    """
    pd.set_option('display.max_rows', len(x))
    pd.set_option('display.max_columns', 200)
    print(x)
    pd.reset_option('display.max_rows')
    pd.reset_option('display.max_columns')
Exemplo n.º 12
0
def full_print(df):
    '''
    Routine to fully print pandas.core.frame.DataFrame or pandas.core.series.Series objects'.

    Inputs:
        1. df  ::  pandas.core.frame.DataFrame object containing tabular data, of size-ftactionated zooplankton biomass
    '''
    import pandas as pd

    assert type(df) == type(pd.DataFrame()) or type(df) == type(pd.Series()), \
    '\'df\' is not of either pandas.core.frame.DataFrame or pandas.core.series.Series types.'

    pd.set_option('display.max_rows', len(df))
    print df
    pd.reset_option('display.max_rows')
Exemplo n.º 13
0
    def test_info_wide(self):
        from pandas import set_option, reset_option
        io = StringIO()
        df = DataFrame(np.random.randn(5, 101))
        df.info(buf=io)

        io = StringIO()
        df.info(buf=io, max_cols=101)
        rs = io.getvalue()
        self.assertTrue(len(rs.splitlines()) > 100)
        xp = rs

        set_option('display.max_info_columns', 101)
        io = StringIO()
        df.info(buf=io)
        self.assertEqual(rs, xp)
        reset_option('display.max_info_columns')
Exemplo n.º 14
0
 def execute(cls, ctx, op: "DataFrameAggregate"):
     try:
         pd.set_option('mode.use_inf_as_na', op.use_inf_as_na)
         if op.stage == OperandStage.map:
             cls._execute_map(ctx, op)
         elif op.stage == OperandStage.combine:
             cls._execute_combine(ctx, op)
         elif op.stage == OperandStage.agg:
             cls._execute_agg(ctx, op)
         elif op.raw_func == 'size':
             xp = cp if op.gpu else np
             ctx[op.outputs[0].key] = xp.array(ctx[op.inputs[0].key].agg(op.raw_func, axis=op.axis)) \
                 .reshape(op.outputs[0].shape)
         else:
             ctx[op.outputs[0].key] = ctx[op.inputs[0].key].agg(op.raw_func, axis=op.axis)
     finally:
         pd.reset_option('mode.use_inf_as_na')
Exemplo n.º 15
0
def print_full_dataframe(df, file_path=None):
    """
    :param df: pandas Dataframe to print
    :param file_path: path where to save the Dataframe printing (optional)
    :return: None, but prints and save stuff to disk
    """
    pd.set_option('display.max_rows', len(df))

    print(df)

    if file_path is not None:
        orig_stdout = sys.stdout
        with open(file_path, 'w') as f:
            sys.stdout = f
            print(df)
        sys.stdout = orig_stdout
    pd.reset_option('display.max_rows')
Exemplo n.º 16
0
    def execute_2D_binning(vis: Vis):
        pd.reset_option("mode.chained_assignment")
        with pd.option_context("mode.chained_assignment", None):
            x_attr = vis.get_attr_by_channel("x")[0].attribute
            y_attr = vis.get_attr_by_channel("y")[0].attribute

            vis._vis_data["xBin"] = pd.cut(vis._vis_data[x_attr],
                                           bins=lux.config.heatmap_bin_size)
            vis._vis_data["yBin"] = pd.cut(vis._vis_data[y_attr],
                                           bins=lux.config.heatmap_bin_size)

            color_attr = vis.get_attr_by_channel("color")
            if len(color_attr) > 0:
                color_attr = color_attr[0]
                groups = vis._vis_data.groupby(
                    ["xBin", "yBin"], history=False)[color_attr.attribute]
                if color_attr.data_type == "nominal":
                    # Compute mode and count. Mode aggregates each cell by taking the majority vote for the category variable. In cases where there is ties across categories, pick the first item (.iat[0])
                    result = groups.agg([
                        ("count", "count"),
                        (color_attr.attribute,
                         lambda x: pd.Series.mode(x).iat[0]),
                    ]).reset_index()
                elif color_attr.data_type == "quantitative" or color_attr.data_type == "temporal":
                    # Compute the average of all values in the bin
                    result = groups.agg([("count", "count"),
                                         (color_attr.attribute, "mean")
                                         ]).reset_index()
                result = result.dropna()
            else:
                groups = vis._vis_data.groupby(["xBin", "yBin"],
                                               history=False)[x_attr]
                result = groups.count().reset_index(name=x_attr)
                result = result.rename(columns={x_attr: "count"})
                result = result[result["count"] != 0]

            # convert type to facilitate weighted correlation interestingess calculation
            result["xBinStart"] = result["xBin"].apply(
                lambda x: x.left).astype("float")
            result["xBinEnd"] = result["xBin"].apply(lambda x: x.right)

            result["yBinStart"] = result["yBin"].apply(
                lambda x: x.left).astype("float")
            result["yBinEnd"] = result["yBin"].apply(lambda x: x.right)

            vis._vis_data = result.drop(columns=["xBin", "yBin"])
Exemplo n.º 17
0
def get_print_full(x):
    """Same as print_full, but returns string."""

    # Change the output to capture the string instead of sending it to the console
    old_stdout = sys.stdout
    sys.stdout = newstdout = StringIO()  # capture all print
    try:
        pd.set_option("display.max_rows", len(x))
        print(x)
        string = newstdout.getvalue()  # Get string output
    except:
        raise
    finally:
        sys.stdout = old_stdout
        pd.reset_option("display.max_rows")

    return string[:-1]
Exemplo n.º 18
0
def print_full(data):
    """Prints a dataframe at full width.
    Useful for peeking at the data for debugging.

    Parameters
    ----------
    data : Pandas dataframe
        The experimental results

    Returns
    -------
    Nothing
    """
    pd.set_option('display.max_rows', len(data), "display.max_columns", 500,
                  "display.width", 1000)
    print(data)
    pd.reset_option('display.max_rows')
Exemplo n.º 19
0
    def test_info_wide(self):
        from pandas import set_option, reset_option
        io = StringIO()
        df = DataFrame(np.random.randn(5, 101))
        df.info(buf=io)

        io = StringIO()
        df.info(buf=io, max_cols=101)
        rs = io.getvalue()
        self.assertTrue(len(rs.splitlines()) > 100)
        xp = rs

        set_option('display.max_info_columns', 101)
        io = StringIO()
        df.info(buf=io)
        self.assertEqual(rs, xp)
        reset_option('display.max_info_columns')
Exemplo n.º 20
0
def dfn(*x): 
  pd.reset_option('display.max_columns')
  pd.reset_option('display.max_rows')
  leng = len(x)
  df_concat = []
  for i in range(leng):
    row=len(x[0])
    blank = ['']*row
    tabn = '{'+str(i+1)+'}'
    blank = pd.DataFrame(blank,columns=[tabn])
    xx = pd.DataFrame(x[i])
    if(i==0):
      df_concat = pd.concat([xx,blank], axis=1)
    else:
      df_concat = pd.concat([df_concat,xx,blank], axis=1)
  df_concat.replace(np.nan, '', inplace=True)
  display(df_concat)
Exemplo n.º 21
0
    def test_info_wide(self):
        from pandas import set_option, reset_option

        io = StringIO()
        df = DataFrame(np.random.randn(5, 101))
        df.info(buf=io)

        io = StringIO()
        df.info(buf=io, max_cols=101)
        rs = io.getvalue()
        assert len(rs.splitlines()) > 100
        xp = rs

        set_option("display.max_info_columns", 101)
        io = StringIO()
        df.info(buf=io)
        assert rs == xp
        reset_option("display.max_info_columns")
Exemplo n.º 22
0
def Hitting_league_leaders():
    con = sqlite3.connect("MLB_Stats.sqlite") #connect to database
    print("Hitting leaders from around the league:\n")
    AVG = pd.read_sql_query("SELECT Name, AVG, Team FROM Batting_Stats WHERE AB >45 ORDER BY AVG DESC LIMIT 1",con)
    H = pd.read_sql_query("SELECT Name, H, Team FROM Batting_Stats ORDER BY H DESC LIMIT 1",con)
    Dbl = pd.read_sql_query('SELECT Name, "2B", Team FROM Batting_Stats ORDER BY "2B" DESC LIMIT 1',con)
    Trip = pd.read_sql_query('SELECT Name, "3B", Team FROM Batting_Stats ORDER BY "3B" DESC LIMIT 1',con)
    HR = pd.read_sql_query("SELECT Name, HR, Team FROM Batting_Stats ORDER BY HR DESC LIMIT 1",con)
    RBI = pd.read_sql_query("SELECT Name, RBI, Team FROM Batting_Stats ORDER BY RBI DESC LIMIT 1",con)
    pd.set_option('display.max_colwidth', 40)
    pd.options.display.float_format = '{:,.3f}'.format
    print("Average:\n",AVG.to_string(index=False),"\n")
    pd.reset_option('display.float_format')
    print("Hits:\n",H.to_string(index=False),"\n")
    print("Doubles:\n",Dbl.to_string(index=False),"\n")
    print("Triples:\n",Trip.to_string(index=False),"\n")
    print("Home Runs:\n",HR.to_string(index=False),"\n")
    print("Runs Batted In:\n",RBI.to_string(index=False),"\n")
Exemplo n.º 23
0
def abundance(valueTable, numbins):
    queryCounts = valueTable['qseqid'].value_counts()
    binsize = int(math.ceil(float(queryCounts.max()) / numbins))

    print(queryCounts[queryCounts > 1200].index.values)

    abundanceDict = {}
    lastAmount = 0
    for i in range(1, (numbins + 1)):
        currentBin = binsize * i
        currentAmount = queryCounts[queryCounts <= currentBin].count()
        abundanceDict[currentBin] = currentAmount - lastAmount
        lastAmount = currentAmount
    x = pd.Series(abundanceDict).sort_index()
    pd.set_option('display.max_rows', len(x))
    print(x)
    pd.reset_option('display.max_rows')
    return x
Exemplo n.º 24
0
def print_full(df: pd.DataFrame, num_rows: int = 100) -> None:
    '''Print the first num_rows rows of dataframe in full

    Resets display options back to default after printing
    '''
    pd.set_option('display.max_rows', len(df))
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', 2000)
    pd.set_option('display.float_format', '{:20,.2f}'.format)
    pd.set_option('display.max_colwidth', -1)
    display(df.iloc[0:num_rows])
    pd.reset_option('display.max_rows')
    pd.reset_option('display.max_columns')
    pd.reset_option('display.width')
    pd.reset_option('display.float_format')
    pd.reset_option('display.max_colwidth')

    return None
Exemplo n.º 25
0
def value_iteration(a=0.8, b=0.1):
    R = gridWorld()
    discount = 0.99
    values = [[] for i in range(17)]
    actions = [[] for i in range(17)]

    for i in range(17):
        values[i] += [R[i]]

    for t in range(1, 40):
        terminate = True
        for i in range(17):
            if i == 16:
                values[i] += [0]
                actions[i] += [None]
                break
            maxreward = -1 * float('inf')
            bestaction = None
            for action in [up, down, left, right]:
                actionsum = 0
                for transition in get_new_stage_and_probs(i, action, a, b):
                    prob = transition[1]
                    new_state = transition[0]
                    reward = R[i]
                    actionsum += prob * (reward +
                                         discount * values[new_state][t - 1])
                if actionsum >= maxreward:
                    maxreward = actionsum
                    bestaction = action
            values[i] += [maxreward]
            if values[i][t] - values[i][t - 1] > 0.01:
                terminate = False
            actions[i] += [bestaction]
        if terminate:  #terminate check
            break

    values = np.array(values)
    actions = np.array(actions)
    import pandas as pd
    pd.set_option('display.max_columns', 28)
    pd.set_option('display.max_rows', 28)
    print pd.DataFrame(values)
    print pd.DataFrame(actions)
    pd.reset_option('display.max_columns')
Exemplo n.º 26
0
def main():
    Options.load()

    if Options.files['metadatajson'] is None:
        qStr = 'select ' + Options.query['select'] + ' where ' + Options.query['where']
        data = VavDataReader.importVavData(server=Options.query['client'],
                                           query=qStr)
    else:
        with open(Options.files['metadatajson']) as data_file:
            data = json.load(data_file)
        data_file.close()

    for key in data:
        data[key] = rename_sensors(data[key])
    
    if Options.files['outputjson'] is not None:
        VavDataReader.dictToJson(data, Options.files['outputjson'])

    if Options.files['outputcsv'] is not None or Options.output['printtoscreen']:
        print "Preprocessing finished. Processing now."
        if Options.output['vav'] is None:
            processed = processdata(data, Options.query['client'])
        else:
            processed = processdata(data, Options.query['client'], Options.output['vav'])
        print "Done processing."
        if Options.output['printtoscreen']:
            pd.set_option('display.max_rows', len(processed))
            print processed
            pd.reset_option('display.max_rows')
        if Options.files['outputcsv'] is not None:
           processed.to_csv(Options.files['outputcsv'])
    elif Options.files['outputjson'] is None:
        sys.stderr.write("ERROR: No output specified.\n"
                         "In config file, at least one of the following should"
                         " be true:\n"
                         "- outputJSON is set to something other than None\n"
                         "- outputCSV is set to something other than None\n"
                         "- printToScreen is set to True.\n"
                         "Please modify the config file to satisfy at least "
                         "one of these.\n")
        sys.stderr.flush()
        sys.exit(1)

    print 'Done.'
Exemplo n.º 27
0
def print_full(x):
    pd.set_option('display.max.rows', len(x))
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', 2000)
    # pd.set_option('display.float_format', '{:20, .2f}'.format)
    pd.set_option('display.max_colwidth', -1)
    print(x)
    pd.reset_option('display.max_rows')
    pd.reset_option('display.max_columns')
    pd.reset_option('display.width')
    # pd.reset_option('display.float_format')
    pd.reset_option('display.max_colwidth')
Exemplo n.º 28
0
def finish_log():
    global log

    pd.set_option('display.max_colwidth', 1000)

    add_line_log('INFO', error_message='Finishing logging session')

    if (log.error_level >= MINIMUM_ERROR_LEVEL_TO_SEND_EMAIL
        ).any() and mc_email.IS_SENDING_EMAIL:
        date_yesterday = (dt.datetime.now() -
                          dt.timedelta(days=1)).strftime('%Y-%m-%d')

        mc_email.send_email(
            body=log.to_html(),
            subject='Failure in meteo station : {}'.format(date_yesterday),
            list_figures=log.figure.iteritems())

        add_line_log('INFO',
                     error_message='E-mail sent to: {}'.format(
                         mc_email.RECIPIENTS_EMAIL))
    else:
        add_line_log('INFO', error_message='E-mail not sent')

    if (log.error_level == 'INFO').all():
        print(
            '\n>> ALL THE LOG ISSUES ARE INFORMATIONAL - NO WARNING EMAIL SHOULD BE SENT'
        )

    working_path = os.getcwd(
    )  # tries to read config file from the Current Working Directory where meteocheck is invoked

    log.to_csv(str(Path(working_path, FILENAME_SESSION_LOG)),
               sep='\t',
               index=False,
               header=False,
               mode='w')
    log.to_csv(str(Path(working_path, FILENAME_HISTORY_LOG)),
               sep='\t',
               index=False,
               header=False,
               mode='a')

    pd.reset_option('display.max_colwidth')
    def full_preprocessing(self,
                           normalize,
                           missing_strat,
                           process_strat,
                           label_age,
                           label_gender,
                           label_id,
                           print_missing=True,
                           print_columns=False,
                           poly_degree=2):

        self.preprocessing(label_id)

        if normalize:
            self.normalize_age(label_age, label_gender)

        # We print the distribution of missing values (8)
        if print_missing:
            print('printing missing')
            missing_val_columns = self.apply(pd.value_counts)[8:]
            pd.set_option('display.max_columns', missing_val_columns.shape[1])
            print(missing_val_columns)
            pd.reset_option('display.max_columns')

        if missing_strat == 'Binary':
            # We create the binary columns
            self.create_missing_data_col()

        if missing_strat in ['Replacement', 'Binary']:
            # We replace missing values in the ADOS answers (8) by 0
            self.replace(8, 0, inplace=True)

        if print_columns:
            print(self.columns)
        if process_strat in ['pca_comp']:
            self.create_components_feat()
        if process_strat in ['indicator', 'interaction_ind']:
            self.create_indicators_columns()
        if process_strat in ['poly', 'interaction_ind']:
            self.create_poly_columns(poly_degree)
        if print_columns:
            print(self.columns)
        self.drop_constant_columns()
Exemplo n.º 30
0
def CompareXlsx(filename):
    #Program begin
    Gou = []
    for fid, fname in enumerate(filename):
        df = pd.read_excel(fname,
                           sheetname='Sheet1',
                           header=None,
                           parse_cols=[8, 9])
        df.columns = ['ID', 'name']
        df.sort_values(by='ID', inplace=True)
        df.dropna(inplace=True)
        df.drop_duplicates(subset='ID', inplace=True)
        df.reset_index(drop=True, inplace=True)
        for idnum in range(len(df['ID']) - 1, -1, -1):
            if isinstance(df['ID'][idnum], int):
                break
            df.drop(idnum, inplace=True)
        excel = df.reset_index(drop=True)
        Gou.append(excel['ID'])
        del excel
        del df

    com1 = Gou[1][Gou[1].isin(Gou[0])]
    for comnum in range(2, len(filename)):
        com2 = Gou[comnum].isin(com1)
        if 0 == sum(com2):
            com1 = []
            print('Output:')
            print('No Same data')
            return
        com1 = com1[com2]

    del Gou
    com1.reset_index(drop=True, inplace=True)
    samenum = len(com1)
    print('Output:')
    print('SAME:')
    pd.set_option('display.max_rows', samenum)
    print(com1)
    pd.reset_option('display.max_rows')
    print('NUM:%d' % samenum)
    com1.to_csv('compare.csv', index=False)
Exemplo n.º 31
0
 def execute(cls, ctx, op):
     try:
         pd.set_option('mode.use_inf_as_na', op.use_inf_as_na)
         if op.stage == OperandStage.map:
             cls._execute_map(ctx, op)
         elif op.stage == OperandStage.combine:
             cls._execute_combine(ctx, op)
         else:
             input_data = ctx[op.inputs[0].key]
             value = getattr(op, 'value', None)
             if isinstance(op.value, (Base, Entity)):
                 value = ctx[op.value.key]
             ctx[op.outputs[0].key] = input_data.fillna(
                 value=value,
                 method=op.method,
                 axis=op.axis,
                 limit=op.limit,
                 downcast=op.downcast)
     finally:
         pd.reset_option('mode.use_inf_as_na')
Exemplo n.º 32
0
    def pprint_parts(self):
        """Pretty print the parts of the energy system to the console."""
        df_parts = self.list_parts()  # Get DataFrame with all parts

        A = self.calc_investment()
        A_funding = self.calc_investment(include_funding=True)

        pd.set_option('precision', 2)  # Set the number of decimal points
        pd.set_option('display.float_format', self.f_space)
        print('------------- List of parts -------------')
        print(df_parts.to_string())
        print('-----------------------------------------')
        print('Total investment costs:   ', self.f_space(A))
        if A != A_funding:
            print('Investment after funding: ', self.f_space(A_funding))
        print('-----------------------------------------')
        pd.reset_option('precision')  # ...and reset the setting from above
        pd.reset_option('display.float_format')

        return df_parts
Exemplo n.º 33
0
def print_results(results):
    """Print GRAFIMO results on terminal without storing them on 
    the three files (TSV, HTML, GFF3)

    Parameters
    ----------
    results : pandas.DataFrame
        GRAFIMO results
    """

    if not isinstance(results, pd.DataFrame):
        errmsg: str = "\n\nERROR: the results must be stored in a pandas DataFrame"
        raise NoDataFrameException(errmsg)

    # little hack in pd df parameters to avoid the weird default
    # print of a DataFrame (cut the majority of lines)
    pd.set_option("display.max_rows", len(results))
    print()  # newline
    print(results)
    pd.reset_option("display.max_rows")
Exemplo n.º 34
0
    def write_to_html(self):
        pandas.set_option('display.max_colwidth', -1)
        header = '{!s}'.format(self.df.index.tolist()[0])
        df = self.df.reset_index(level=['Clf.', 'Set_Type', 'Eval.'])
        if '#Rep.' in df:
            df.drop('#Rep.', 1, inplace=True)

        df.drop('Eval.', 1, inplace=True)
        df.drop('Set_Size', 1, inplace=True)
        df.drop('Set_Type', 1, inplace=True)
        df.drop('f1', 1, inplace=True)
        df.drop('precision', 1, inplace=True)
        df.columns = [
            'Clf', '\\ac{DGA} Type', '\\ac{ACC}', '\\ac{TPR}', '\\ac{TNR}',
            '\\ac{FNR}', '\\ac{FPR}'
        ]
        fname = settings.ANALYSIS_FOLDER + '/eval_full.html'
        with open(fname, 'w') as f:
            f.write(df.to_html())
        pandas.reset_option('display.max_colwidth')
Exemplo n.º 35
0
def generatestockfile(stockslist): 
    # Fetch the data
    date1 = '2000-01-01'
    date2 = '2020-12-01'
    data = yf.download(stockslist, date1, date2)
    '''data['Adj Close'].plot()
    plt.show()'''
    # Print first 5 rows of the data
    #print(data.head(5))

    for company in stockslist:
        print(company)  
        datas = yf.download(company, date1, date2)  #gets the data of the company between the given dates.
        print("datas:\n", datas)  
        pd.set_option('display.max_rows', len(datas))  #this is here to get all of the data shown in the text file. 

        #the following section is to put the data into a text file:
        with open('rawdata/' + company + '.stock.txt', 'w') as stocks: 
            print(datas, file=stocks)
        pd.reset_option('display.max_rows') #reset the length 
Exemplo n.º 36
0
        def restructure_data():
            r"""
            Restructures data read from a csv file.

            Method creates a two-dimensional DataFrame containing the power
            coefficient curve or power curve of the requested wind turbine.

            Returns
            -------
            Tuple (pandas.DataFrame, float)
                Power curve or power coefficient curve (pandas.DataFrame)
                and nominal power (float).
                Power (coefficient) curve DataFrame contains power coefficient
                curve values (dimensionless) or power curve values in W with
                the corresponding wind speeds in m/s.

            """
            df = read_turbine_data(filename=filename)
            wpp_df = df[df.turbine_id == self.turbine_name]
            # if turbine not in data file
            if wpp_df.shape[0] == 0:
                pd.set_option('display.max_rows', len(df))
                logging.info('Possible types: \n{0}'.format(df.turbine_id))
                pd.reset_option('display.max_rows')
                sys.exit('Cannot find the wind converter type: {0}'.format(
                    self.turbine_name))
            # if turbine in data file write power (coefficient) curve values
            # to 'data' array
            ncols = ['turbine_id', 'p_nom', 'source', 'modificationtimestamp']
            data = np.array([0, 0])
            for col in wpp_df.keys():
                if col not in ncols:
                    if wpp_df[col].iloc[0] is not None and not np.isnan(
                            float(wpp_df[col].iloc[0])):
                        data = np.vstack(
                            (data, np.array([float(col),
                                             float(wpp_df[col])])))
            data = np.delete(data, 0, 0)
            df = pd.DataFrame(data, columns=['wind_speed', 'values'])
            nominal_power = wpp_df['p_nom'].iloc[0]
            return df, nominal_power
Exemplo n.º 37
0
def order_table(orders, name):

    asks = orders['asks']
    asks = pd.DataFrame(asks, columns=['price', 'amount'])
    asks.index = asks.index + 1
    asks = asks.sort_index()

    bids = orders['bids']
    bids = pd.DataFrame(bids, columns=['price', 'amount'])
    bids.index = bids.index + 1
    bids = bids.sort_index()

    print(name + ':')

    table = pd.concat([asks, bids], axis=1)
    #set display option
    pd.set_option('display.max_rows', len(table))
    print(table)
    #reset
    pd.reset_option('display.max_rows')
    print()
def write_clean_tokens_to_file(data, pos, multiword, punc_marks, stopwords,
                               window):
    path_extras = get_path_extras(multiword, punc_marks, stopwords, window)
    pd.set_option('display.max_rows', len(data))
    df = pd.DataFrame([data])
    df.to_csv(PREP_TRAINING_DATA_PATH + path_extras + '.csv',
              mode='a',
              float_format='%.5f',
              na_rep="NAN!",
              header=False,
              index=False,
              line_terminator="")
    df = pd.DataFrame([pos])
    df.to_csv(POS_TRAINING_DATA_PATH + path_extras + '.csv',
              mode='a',
              float_format='%.5f',
              na_rep="NAN!",
              header=False,
              index=False,
              line_terminator="")
    pd.reset_option('display.max_rows')
Exemplo n.º 39
0
def display(X, rows=None, where="inline", name="df"):
    if (rows == 'all'):
        rows = 2000
    elif (type(rows) is int):
        rows *= 2
    else:
        rows = 10

    if isinstance(X, pd.DataFrame) or isinstance(
            X, pd.Series) or (isinstance(X, np.ndarray) and X.ndim <= 2):
        X = pd.DataFrame(X)
        if (where == "popup"):
            filename = name + ".html"
            X.to_html(filename)
            webbrowser.open(filename, new=2)
        else:
            pd.set_option('display.max_rows', rows)
            ipd.display(X)
            pd.reset_option('display.max_rows')
    else:
        print(X)
Exemplo n.º 40
0
    def execute_2D_binning(vis: Vis):
        pd.reset_option('mode.chained_assignment')
        with pd.option_context('mode.chained_assignment', None):
            x_attr = vis.get_attr_by_channel("x")[0]
            y_attr = vis.get_attr_by_channel("y")[0]

            vis._vis_data.loc[:,"xBin"] = pd.cut(vis._vis_data[x_attr.attribute], bins=30)
            vis._vis_data.loc[:,"yBin"] = pd.cut(vis._vis_data[y_attr.attribute], bins=30)
            groups = vis._vis_data.groupby(['xBin','yBin'])[x_attr.attribute]
            result = groups.agg("count").reset_index() # .agg in this line throws SettingWithCopyWarning 
            result = result.rename(columns={x_attr.attribute:"z"})
            result = result[result["z"]!=0]

            # convert type to facilitate weighted correlation interestingess calculation
            result.loc[:,"xBinStart"] = result["xBin"].apply(lambda x: x.left).astype('float') 
            result.loc[:,"xBinEnd"] = result["xBin"].apply(lambda x: x.right)

            result.loc[:,"yBinStart"] = result["yBin"].apply(lambda x: x.left).astype('float')
            result.loc[:,"yBinEnd"] = result["yBin"].apply(lambda x: x.right)

            vis._vis_data = result.drop(columns=["xBin","yBin"])
Exemplo n.º 41
0
def print_predictions_comparison(df, predictions, label_name, num_of_rows=10):
    """ Print predictions next to actual values

    :param df: Pandas DataFrame containing the data
    :type df: pandas.DataFrame
    :param predictions: Array holding the predictions
    :type predictions: array
    :param label_name: Target label
    :type label_name: str
    :param num_of_rows: Number of rows to diplay
    :type num_of_rows: int
    """
    pd.set_option('display.max_rows', num_of_rows)
    if len(df) != len(predictions):
        print "\n### Error: Length of values does not match\n"
        return
    print "\n\n### Compare predictions to actual: ###\n"
    df['predictions'] = predictions
    print df[["predictions", label_name]][0:num_of_rows]
    print "###########\n\n"
    pd.reset_option('display.max_rows')
Exemplo n.º 42
0
 def __append_dataframe(self,
                        data: PandasDataFrame,
                        max_line_width: int = 100000000,
                        **kwargs: Any):
     """Append to CSV file using dataframe.
     :param data: the dataframe to write.
     :param max_line_width: max line width (PANDAS: display.width).
     :param kwargs: any other arguments that the selected writer may accept.
     """
     self.__logger.debug("Append a Dataframe to the CSV file.")
     kwargs["header"] = False if "header" not in kwargs.keys(
     ) else kwargs["header"]
     kwargs["index"] = False if "index" not in kwargs.keys(
     ) else kwargs["index"]
     try:
         with open(self.__path, 'a') as f:
             pd.set_option("display.width", max_line_width)
             pd.set_option("display.max_rows", data.shape[0])
             pd.set_option("display.max_columns", data.shape[1])
             data.to_csv(f, header=kwargs["header"], index=kwargs["index"])
             pd.reset_option("display.width")
             pd.reset_option("display.max_rows")
             pd.reset_option("display.max_columns")
     except ():
         self.__logger.error(__name__ +
                             " - Can not append dataframe to file: \n" +
                             self.__path)
         sys.exit()
Exemplo n.º 43
0
def combine_results_stratified(var: str, outputs_dir: str, BCN: str, duration: int, hydrology_IDs: list,
         run_dur_dic: dict=None, remove_ind_dur: bool = True) -> dict:
    '''Combines the excess rainfall *.csv files for each duration into a 
       single dictionary for all durations. A small value of 0.0001 is added so the result is not printed in scientific notation.
    '''
    pd.reset_option('^display.', silent=True)
    assert var in ['Excess_Rainfall', 'Weights'], 'Cannot combine results'
    dic = {}
    df_lst = []
    for ID in hydrology_IDs:
        scen = '{0}_Dur{1}_Hydro{2}'.format(BCN, duration, ID)
        file = outputs_dir/'{}_{}.csv'.format(var, scen)
        df = pd.read_csv(file, index_col = 0)
        if var == 'Excess_Rainfall':
            df_dic = df.to_dict()
            dates = list(df.index)
            ordin = df.index.name.title()
            events = {}
            for k, v in df_dic.items():
                if 'E' in k:
                    m = list(v.values())
                    m1= [ float(i)+0.0001 if float(i)< 0.0001  and 0< float(i) else float(i)  for i in m]
                    events[k] = m1
            key ='H{0}'.format(str(ID).zfill(2))
            val = {'time_idx_ordinate': ordin, 
                   'run_duration_days': run_dur_dic[str(duration)],
                    'time_idx': dates, 
                    'pluvial_BC_units': 'inch/ts', 
                    'BCName': {BCN: events}}         
            dic[key] = val
        elif var == 'Weights':
            df_lst.append(df)
        if remove_ind_dur:
            os.remove(file)    
    if var == 'Weights':
        all_dfs = pd.concat(df_lst)
        weights_dic = all_dfs.to_dict()
        dic = {'BCName': {BCN: weights_dic['Weight']}}
        #print('Total Weight:', all_dfs['Weight'].sum())
    return dic
Exemplo n.º 44
0
def get_tweets(search_term, items = 1000, incl_retweets = False):
    """Uses the tweepy api to get tweets

    Parameters
    ----------
    search_term (string)
        The hashtag that you want to search Twitter for
    items (int)
        The amount of tweets to return, if available. Max set to 1000
    incl_retweets (boolean)
        Whether or not to include retweets

    Returns
    -------
    Returns an object of tweets
    """
    
    pd.reset_option('^display.', silent=True)
    
    # Clean input
    search_term = search_term.strip()
    
    if len(search_term.split()) > 1:
        search_term = search_term.replace(' ','+')
        
    if items > 1000:
        items = 1000
        print('Using max items of 1000.')

    if incl_retweets:
        q_string = '#' + search_term
    else:
        q_string = '#' + search_term + ' -filter:retweets'
        
    tweets = tweepy.Cursor(api.search,
                           q=search_term,
                           lang="en",
                           since=str(now.year)+'-01-01').items(items)
    
    return tweets
Exemplo n.º 45
0
def CompareXlsx(filename):
    #Program begin
    Gou = []
    for fid,fname in enumerate(filename):
        df=pd.read_excel(fname,sheetname='Sheet1',header=None,parse_cols=[8,9])
        df.columns=['ID','name']
        df.sort_values(by='ID',inplace=True)    
        df.dropna(inplace=True)
        df.drop_duplicates(subset='ID',inplace=True)
        df.reset_index(drop=True,inplace=True)
        for idnum in range(len(df['ID'])-1,-1,-1):
            if isinstance(df['ID'][idnum], int):
                break
            df.drop(idnum,inplace=True)
        excel = df.reset_index(drop=True)
        Gou.append(excel['ID'])
        del excel
        del df
    
    com1=Gou[1][Gou[1].isin(Gou[0])]
    for comnum in range(2,len(filename)):
        com2=Gou[comnum].isin(com1)
        if 0==sum(com2):
            com1=[]
            print('Output:')
            print('No Same data')
            return
        com1=com1[com2]
    
    del Gou
    com1.reset_index(drop=True,inplace=True)
    samenum=len(com1)
    print('Output:')
    print('SAME:')
    pd.set_option('display.max_rows',samenum)
    print(com1)
    pd.reset_option('display.max_rows')
    print('NUM:%d' %samenum)
    com1.to_csv('compare.csv',index=False)
Exemplo n.º 46
0
def get_cheapest_spot_instance(client):
    prices = get(client, 'describe_spot_price_history', 
            {#'InstanceTypes': instance_types,
             'StartTime' : datetime.now() - timedelta(days=1)
            })

    #convert the list of lists into a list of prices
    prices = list(itertools.chain(*[pl['SpotPriceHistory'] for pl in prices] ))

    #prices = list(prices)
    prices_df = pd.DataFrame.from_dict(prices)
    prices_df = prices_df[ prices_df.ProductDescription == 'Linux/UNIX' ]

    grouped = prices_df.groupby(['AvailabilityZone', 'InstanceType', 'ProductDescription'])
    first = grouped.first()
    first.sort_values(by=["SpotPrice"], inplace=True)

    first.reset_index(inplace=True)
    pd.set_option('display.max_rows', len(first))
    print first
    pd.reset_option('display.max_rows')
    return first.to_dict(orient='records')
Exemplo n.º 47
0
def main(market_plate=u'创业板', filter_ruihua=True):
    stocks = query_market_plate_stock(market_plate, filter_ruihua)

    plate_stocks = []
    for i in stocks:
        sdt = query_latest_trading(i.stock_number)
        if sdt.today_closing_price > 0:
            item = {'stock_number': i.stock_number, 'stock_name': i.stock_name, 'increase_rate': sdt.increase_rate,
                    'today_closing_price': sdt.today_closing_price}
            plate_stocks.append(item)

    plate_stocks = sorted(plate_stocks, key=lambda stock: float(stock.get('increase_rate').replace('%', '')),
                          reverse=True)

    print market_plate
    print len(plate_stocks)
    print '---------------------------------------------------'
    frame = DataFrame(plate_stocks).set_index('stock_number').reindex(columns=['stock_name', 'today_closing_price',
                                                                               'increase_rate'])
    pd.set_option('display.max_rows', len(plate_stocks))
    print frame
    pd.reset_option('display.max_rows')
Exemplo n.º 48
0
def print_full_dataframe(df):
    pd.set_option('display.max_rows', len(df))
    pd.set_option('max_colwidth', 50)
    pd.set_option('display.width', 0)
    print(df)
    pd.reset_option('display.max_rows')
    pd.reset_option('max_colwidth')
    pd.reset_option('display.width')
Exemplo n.º 49
0
def print_full(x):
    "Print a dataframe in full, i.e. without skipping rows and inserting some (...)."
    pd.set_option('display.max_rows', len(x))
    print x
    pd.reset_option('display.max_rows')
Exemplo n.º 50
0
drinks.assign(servings = drinks.beer + drinks.spirit + drinks.wine)

# limit which rows are read when reading in a file
pd.read_csv('drinks.csv', nrows=10)           # only read first 10 rows
pd.read_csv('drinks.csv', skiprows=[1, 2])    # skip the first two rows of data

# write a DataFrame out to a CSV
drinks.to_csv('drinks_updated.csv')                 # index is used as first column
drinks.to_csv('drinks_updated.csv', index=False)    # ignore index

# save a DataFrame to disk (aka 'pickle') and read it from disk (aka 'unpickle')
drinks.to_pickle('drinks_pickle')
pd.read_pickle('drinks_pickle')

# randomly sample a DataFrame
train = drinks.sample(frac=0.75, random_state=1)    # will contain 75% of the rows
test = drinks[~drinks.index.isin(train.index)]      # will contain the other 25%

# change the maximum number of rows and columns printed ('None' means unlimited)
pd.set_option('max_rows', None)     # default is 60 rows
pd.set_option('max_columns', None)  # default is 20 columns
print drinks

# reset options to defaults
pd.reset_option('max_rows')
pd.reset_option('max_columns')

# change the options temporarily (settings are restored when you exit the 'with' block)
with pd.option_context('max_rows', None, 'max_columns', None):
    print drinks
Exemplo n.º 51
0
drinks.loc[drinks.beer_servings.between(201, 400), "beer_level"] = "high"  # change 201-400 to 'high'

# display a cross-tabulation of two Series
pd.crosstab(drinks.continent, drinks.beer_level)

# convert 'beer_level' into the 'category' data type (new in pandas 0.15.0)
drinks["beer_level"] = pd.Categorical(drinks.beer_level, categories=["low", "med", "high"])
drinks.sort_index(by="beer_level")  # sorts by the categorical ordering (low to high)

# create dummy variables for 'continent' and add them to the DataFrame
cont_dummies = pd.get_dummies(drinks.continent, prefix="cont").iloc[:, 1:]  # exclude first column
drinks = pd.concat([drinks, cont_dummies], axis=1)  # axis=0 for rows, axis=1 for columns

# randomly sample a DataFrame
mask = np.random.rand(len(drinks)) < 0.66  # create a Series of booleans
train = drinks[mask]  # will contain about 66% of the rows
test = drinks[~mask]  # will contain the remaining rows

# change the maximum number of rows and columns printed ('None' means unlimited)
pd.set_option("max_rows", None)  # default is 60 rows
pd.set_option("max_columns", None)  # default is 20 columns
print drinks

# reset options to defaults
pd.reset_option("max_rows")
pd.reset_option("max_columns")

# change the options temporarily (settings are restored when you exit the 'with' block)
with pd.option_context("max_rows", None, "max_columns", None):
    print drinks

# widen the column display
pd.set_option('max_colwidth', 500)


# negative sentiment in a 5-star review
yelp[(yelp.stars == 5) & (yelp.sentiment < -0.3)].head(1)


# positive sentiment in a 1-star review
yelp[(yelp.stars == 1) & (yelp.sentiment > 0.5)].head(1)


# reset the column display width
pd.reset_option('max_colwidth')


# ## Bonus: Adding Features to a Document-Term Matrix

# create a DataFrame that only contains the 5-star and 1-star reviews
yelp_best_worst = yelp[(yelp.stars==5) | (yelp.stars==1)]

# define X and y
feature_cols = ['text', 'sentiment', 'cool', 'useful', 'funny']
X = yelp_best_worst[feature_cols]
y = yelp_best_worst.stars

# split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
Exemplo n.º 53
0
def print_full(x,y):
    pandas.set_option('display.max_rows', y)
    print(x)
    pandas.reset_option('display.max_rows')
Exemplo n.º 54
0
def full_print(df):
    import pandas as pd
    pd.set_option('display.max_rows', len(df))
    print df
    pd.reset_option('display.max_rows')
Exemplo n.º 55
0
def print_full(x):
    pd.set_option('display.max_rows', len(x),'display.max_columns', len(x.columns))
    print(x)
    pd.reset_option('display.max_rows','display.max_columns')
Exemplo n.º 56
0
            item = {u'stock_number': i.stock_number, u'stock_name': i.stock_name.encode('utf-8'),
                    u'increase_rate': sdt.increase_rate, u'today_closing_price': sdt.today_closing_price}
            plate_stocks.append(item)

    plate_stocks = sorted(plate_stocks, key=lambda stock: float(stock.get('increase_rate').replace('%', '')),
                          reverse=True)

    print market_plate
    print len(plate_stocks)
    if len(plate_stocks):
        print '---------------------------------------------------'
        frame = DataFrame(plate_stocks).set_index('stock_number').reindex(columns=['stock_name', 'today_closing_price',
                                                                                   'increase_rate'])
        pd.set_option('display.max_rows', len(plate_stocks))
        print frame
        pd.reset_option('display.max_rows')


def setup_argparse():
    parser = argparse.ArgumentParser(description=u'查询某个板块所对应的股票')
    parser.add_argument(u'-m', action=u'store', dest='market_plate', required=True, help=u'需要查询的板块')
    parser.add_argument(u'-f', action=u'store_true', dest='filter_rh',
                        help=u'如果添加这个参数,则在结果里会过滤瑞华的客户')

    args = parser.parse_args()
    return args.market_plate, args.filter_rh

if __name__ == '__main__':
    setup_logging(__file__, logging.WARNING)
    market_plate, filter_rh = setup_argparse()
    if isinstance(market_plate, str):
Exemplo n.º 57
0
def print_full(x):
    #print all rows of a panda dataframe
    pd.set_option('display.max_rows', len(x))
    print(x)
    pd.reset_option('display.max_rows')
Exemplo n.º 58
0
def print_full(x):
    pd.set_option('display.max_rows', len(x))
    print(x)
    pd.reset_option('display.max_rows')
def print_full(x):
    pd.set_option('display.max_columns', None)
    pd.set_option('display.max_rows', None)
    print(x)
    pd.reset_option('display.max_rows')
    pd.reset_option('display.max_columns')
def main(args=None):
    config = xe.get_config(args.config)
    session = xe.get_xnat_session(config)
    if args.update:
        # Update the cache of XNAT Experiment XML files
        xe.extract_experiment_xml(config, session,
                                  args.experimentsdir, args.num_extract)

    # extract info from the experiment XML files
    experiment = xe.get_experiments_dir_info(args.experimentsdir)
    experiment_df = xe.experiments_to_dataframe(experiment)
    reading = xe.get_experiments_dir_reading_info(args.experimentsdir)
    reading_df = xe.reading_to_dataframe(reading)
    experiment_reading = inner_join_dataframes(experiment_df, reading_df)

    # exclude phantoms, but include the traveling human phantoms
    site_id_pattern = '[A-EX]-[0-9]{5}-[MFT]-[0-9]'
    df = experiment_reading[experiment_reading.site_id.str.contains(site_id_pattern)]

    result = None
    if args.report_type == 'no_findings_date':
        # Findings are listed without a findings date
        result = findings_date_empty(df)
        if args.set_findings_date:
            # Update the findings date to equal the date to dvd
            update_findings_date(args.config, result)

    elif args.report_type == 'no_findings':
        # Findings is empty but a date is listed
        result = findings_empty(df)

    elif args.report_type == 'no_findings_or_date':
        # Both the findings and findings date are empty
        result = findings_and_date_empty(df)
        if args.reset_datetodvd:
            record = result[result.experiment_id == experiment]
            project = record.project.values[0]
            subject = record.subject_id.values[0]
            experiment = args.reset_datetodvd
            set_experiment_attrs(args.config, project, subject, experiment, 'datetodvd', 'none')

    elif args.report_type == 'correct_dvd_date':
        dates_df = pd.read_csv(args.file_to_reset_datetodvd)
        result = pd.DataFrame(index=['Subject'], columns=['project', 'subject_id', 'experiment_id',
                 'site_experiment_id', 'datetodvd', 'findingsdate'])
        result = result.fillna(0)
        for subject in df['subject_id'].tolist():
            if subject in dates_df['mri_xnat_sid'].tolist():
                if args.verbose:
                    print "Checking for {}".format(subject)
                eids = dates_df[dates_df['mri_xnat_sid'] == subject]['mri_xnat_eids'].tolist()
                date = dates_df[dates_df['mri_xnat_sid'] == subject]['mri_datetodvd'].tolist()
                if eids != []:
                    if len(eids[0]) == 13:
                        experiment = eids[0]
                        record = df[df.experiment_id == experiment]
                        record_date = record['datetodvd'].tolist()
                        if date != [] and record_date != []:
                            if record_date[0] != date[0] or type(record_date[0]) != str() :
                                project = record.project.values[0]
                                subject = record.subject_id.values[0]
                                experiment = record.experiment_id.values[0]
                                set_experiment_attrs(args.config, project, subject, experiment, 'datetodvd', date[0])
                    elif len(eids[0]) == 27 or eids == None:
                        experiment = eids[0].split(" ")
                        for e in experiment:
                            record_date = record['datetodvd'].tolist()
                            record = df[df.experiment_id == e]
                            if date != [] and record_date != []:
                                if record_date[0] != date[0] or type(record_date[0]) == str():
                                    project = record.project.values[0]
                                    subject = record.subject_id.values[0]
                                    set_experiment_attrs(args.config, project, subject, e, 'datetodvd', date[0])

    elif args.report_type == 'no_findings_before_date':
        # Findings and Findings Date is empty before a given date
        if not args.before_date:
            raise(Exception("Please set --before-date YYYY-MM-DD when running the no_findings_before_date report."))
        has_dvd_before_date = check_dvdtodate_before_date(df, before_date=args.before_date)
        result = findings_and_date_empty(has_dvd_before_date)
        result.to_csv(args.outfile, index=False)
    else:
        raise(NotImplementedError("The report you entered is not in the list."))

    result.to_csv(args.outfile,
                  columns=['project', 'subject_id', 'experiment_id',
                           'site_experiment_id', 'datetodvd', 'findingsdate'],
                  index=False)
    if verbose:
        pd.set_option('display.max_rows', len(result))
        print("Total records found: {}".format(len(result)))
        print(result[['experiment_id', 'site_experiment_id']])
        pd.reset_option('display.max_rows')
        print("Finished!")