Python CsvOptions примеры, csv_to_sqlite.CsvOptions Python примеры использования

Пример #1

0

Показать файл

Файл: convert_tsv_to_sqlite.py Проект: benpullman/generate_sqlite_result

def run_sqlite_convert(result_file, result_view_name, column_types,
                       index_columns, output_folder):
    print("Input TSV result file  = [" + str(result_file.resolve()) + "]")
    db = output_folder.joinpath(result_file_name(result_file,
                                                 result_view_name))
    print("Output SQLite database file = [" + str(db.resolve()) + "]")
    # all the usual options are supported
    delimiter = '\t'
    if 'csv' in result_file.name:
        delimiter = ','
    options = None
    if column_types is not None:
        options = csv_to_sqlite.CsvOptions(
            delimiter=delimiter,
            bracket_style='backtick',
            typing_style='manual',
            column_types=column_types,
            column_select_func=lambda x: x in column_types.keys())
    else:
        options = csv_to_sqlite.CsvOptions(delimiter=delimiter,
                                           bracket_style='backtick')
    print("Converting...")
    sys.stdout.flush()
    start = current_time_ms()
    csv_to_sqlite.write_csv([str(result_file)], str(db), options)
    end = current_time_ms()
    print("  Completed in " + format_ms(end - start))
    print("Post-processing database...")
    sys.stdout.flush()
    start = end
    create_indexes(db, index_columns)
    end = current_time_ms()
    print("  Completed in " + format_ms(end - start))
    sys.stdout.flush()

Пример #2

0

Показать файл

def CreateCovidDB(input_directory, db_directory, db_name):
    """ doc string
    """
    # get a list of input csv files 
    input_files = glob(input_directory+"*.csv")
    # create database 
    options = csv_to_sqlite.CsvOptions(typing_style="full")
    csv_to_sqlite.write_csv(input_files, (db_directory+db_name), options)

Пример #3

0

Показать файл

def init_db():
    currentDir = os.getcwd()
    dataPath = currentDir + "/mapping_heat/fixtures/pitching_data.csv"
    instance_path = currentDir + "/instance/mapping_heat.sqlite"
    db = get_db()

    with current_app.open_resource('schema.sql') as f:
        db.executescript(f.read().decode('utf8'))
        # df = pandas.read_csv(dataPath)
        # df.to_sql(df.to_sql("pitching_data", con=db, if_exists='replace', index=False))
        options = csv_to_sqlite.CsvOptions(typing_style="full",
                                           delimiter=",",
                                           drop_tables=True)
        csv_to_sqlite.write_csv([dataPath], instance_path, options)

Пример #4

0

Показать файл

def test_csv_basic():
    options = csv_to_sqlite.CsvOptions(typing_style="quick", drop_tables=True)
    input_files = [
        "tests\\data\\abilities.csv", "tests\\data\\moves.csv",
        "tests\\data\\natures.csv"
    ]
    total = csv_to_sqlite.write_csv(input_files, "multiple.sqlite", options)
    assert total == 1064
    assert path.exists("multiple.sqlite")
    connection = sqlite3.connect("multiple.sqlite")
    tables = connection.execute("SELECT * FROM sqlite_master;").fetchall()
    assert len(tables) == 3
    movesSql = connection.execute(
        "SELECT sql FROM sqlite_master where name = 'natures';").fetchall()
    assert len(movesSql) == 1
    assert movesSql[0][
        0] == "CREATE TABLE [natures] (\n\t[id] integer,\n\t[identifier] text,\n\t[decreased_stat_id] integer,\n\t[increased_stat_id] integer,\n\t[hates_flavor_id] integer,\n\t[likes_flavor_id] integer,\n\t[game_index] integer\n)"

Пример #5

0

Показать файл

Файл: birdwatch.py Проект: KonradIT/birdwatch

    def save(self, data: dict, output="output"):
        captures = glob.glob(output + "/*")
        try:
            latest = max(captures, key=os.path.getmtime)
        except:
            latest = "0"
        createdat = data["data"]["birdwatch_latest_public_data_file_bundle"][
            "notes"]["created_at"]
        if int(createdat) > int(latest.replace(output + "/", "")):
            os.mkdir("{}/{}".format(output, createdat))
            filenames = []
            for url in data["data"][
                    "birdwatch_latest_public_data_file_bundle"]["notes"][
                        "urls"]:
                a = urlparse(url)
                r = requests.get(url, allow_redirects=True)
                filename = "{}/{}/{}".format(output, createdat,
                                             os.path.basename(a.path))
                filenames.append(filename)
                open(filename, "wb").write(r.content)

            for url in data["data"][
                    "birdwatch_latest_public_data_file_bundle"]["ratings"][
                        "urls"]:
                a = urlparse(url)
                r = requests.get(url, allow_redirects=True)
                filename = "{}/{}/{}".format(output, createdat,
                                             os.path.basename(a.path))
                filenames.append(filename)
                open(filename, "wb").write(r.content)

            options = csv_to_sqlite.CsvOptions(delimiter="\t",
                                               typing_style="utf8",
                                               encoding="windows-1250")
            csv_to_sqlite.write_csv(
                filenames, "{}/{}/{}".format(output, createdat,
                                             "database.sqlite"), options)

Пример #6

0

Показать файл

Файл: ctod.py Проект: doitmaan/Fillings

import csv, sqlite3

connection = sqlite3.connect(":memory:") # change to 'sqlite:///your_filename.db'
cur = connection.cursor()
cur.execute("CREATE TABLE t (col1, col2);") # use your column names here

with open('data.csv','r') as fin: # `with` statement available in 2.5+
    # csv.DictReader uses first line in file for column headings by default
    dr = csv.DictReader(fin) # comma is default delimiter
    to_db = [(i['col1'], i['col2']) for i in dr]

cur.executemany("INSERT INTO t (col1, col2) VALUES (?, ?);", to_db)
connection.commit()
connection.close()



pip install csv-to-sqlite

import csv_to_sqlite 

# all the usual options are supported
options = csv_to_sqlite.CsvOptions(typing_style="full", encoding="windows-1250") 
input_files = ["abilities.csv", "moves.csv"] # pass in a list of CSV files
csv_to_sqlite.write_csv(input_files, "output.sqlite", options)

Пример #7

0

Показать файл

#https://zblesk.net/blog/csv-to-sqlite-2-1/
import csv_to_sqlite

# all the usual options are supported
options = csv_to_sqlite.CsvOptions(typing_style="full", encoding="utf-8")
input_files = ["CSV/mostpopulartweets.csv"]  # pass in a list of CSV files
csv_to_sqlite.write_csv(input_files, "Databases/mostpopulartweets.db", options)

print("Success Most populat tweets to sqlite")

Пример #8

0

Показать файл

 def make_db_from_csv_files(self):
     options = csv_to_sqlite.CsvOptions(typing_style="full")
     input_files = ("ui_templates/" + table + ".csv"
                    for table in self.tables_scheme)
     csv_to_sqlite.write_csv(input_files, self.name, options)

Пример #9

0

Показать файл

Файл: Main.py Проект: WvrLight/G4Modsim

def main():
    print("Initializing data...")
    # creating object of TwitterClient Class
    api = TwitterClient()
    # calling function to get tweets
    print("Performing Processes...")

    path = 'query.txt'
    fileOpen = open(path, 'r')
    query = fileOpen.read()
    #query = input("Enter hashtag: ")#Biden eg.
    tweets = api.get_tweets(query, count=200)

    # picking positive tweets from tweets && # percentage of positive tweets
    ptweets = [tweet for tweet in tweets if tweet['sentiment'] == 'positive']
    PP = format(100 * len(ptweets) / len(tweets))

    # picking negative tweets from tweets && # percentage of negative tweets
    ntweets = [tweet for tweet in tweets if tweet['sentiment'] == 'negative']
    PN = format(100 * len(ntweets) / len(tweets))

    # picking neutral tweets from tweets && # percentage of neutral tweets
    ytweets = [tweet for tweet in tweets if tweet['sentiment'] == 'neutral']
    # format(100*(len(tweets) -(len( ntweets )+len( ptweets)))/len(tweets)))
    PY = format(100 * len(ytweets) / len(tweets))

    #############################################
    # GET TWEETS AND CLASSIFY THEM INTO THE THREE CATEGORIES:
    # FIRST 5 POSITIVE TWEETS
    PT = []
    for tweet in ptweets[:10]:
        #print(tweet['text'])
        PT.append(tweet['text'])

    # FIRST 5 NEUTRAL TWEETS
    YT = []
    for tweet in ytweets[:10]:
        #print(tweet['text'])
        YT.append(tweet['text'])

    # FIRST 5 NEGATIVE TWEETS
    NT = []
    for tweet in ntweets[:10]:
        #print(tweet['text'])
        NT.append(tweet['text'])

#############################################
# Put the gathered data into JSON Format for dumping
    d = {
        "SENTIMENTS": {
            "positivesenti": PP,
            "neutralsenti": PN,
            "negativesenti": PY
        },
        "PICKEDTWEETS": {
            "positivetweet": PT,
            "neutraltweets": YT,
            "negativetweets": NT
        }
    }

    # Dumping d into JSON formatted data
    json.dumps(d)
    #percentage sentiment first:
    d1 = {
        "SENTIMENTS": [{
            "positivesenti": PP,
            "neutralsenti": PN,
            "negativesenti": PY
        }]
    }
    #d1 sentiments percentage
    #print(json.dumps(d1, indent = 1))

    ##############################################
    # PRINTING 10 POSITIVE PICKED TWEETS
    w, h = 10, 10
    Positive = [[0 for x in range(w)] for y in range(h)]
    i = 0
    for tweet in ptweets[:10]:
        #print(tweet['text'])
        Positive[0][i] = tweet['text']
        i = i + 1

        positivetweets = {
            "POSITIVETWEETS": {
                "p1": Positive[0][0],
                "p2": Positive[0][1],
                "p3": Positive[0][2],
                "p4": Positive[0][3],
                "p5": Positive[0][4],
                "p6": Positive[0][5],
                "p7": Positive[0][6],
                "p8": Positive[0][7],
                "p9": Positive[0][8],
                "p10": Positive[0][9]
            }
        }

    # PRINTING 10 NEUTRAL PICKED TWEETS
    w, h = 10, 10
    Neutral = [[0 for x in range(w)] for y in range(h)]
    i = 0
    for tweet in ytweets[:10]:
        #print(tweet['text'])
        Neutral[0][i] = tweet['text']
        i = i + 1

        neutraltweets = {
            "NEUTRALTWEETS": {
                "y1": Neutral[0][0],
                "y2": Neutral[0][1],
                "y3": Neutral[0][2],
                "y4": Neutral[0][3],
                "y5": Neutral[0][4],
                "y6": Neutral[0][5],
                "y7": Neutral[0][6],
                "y8": Neutral[0][7],
                "y9": Neutral[0][8],
                "y10": Neutral[0][9]
            }
        }

    # PRINTING 10 NEGATIVE PICKED TWEETS
    w, h = 10, 10
    Matrix = [[0 for x in range(w)] for y in range(h)]
    i = 0
    #print("\n\nNegative tweets:")
    for tweet in ntweets[:10]:
        #print(tweet['text'])
        Matrix[0][i] = tweet['text']
        i = i + 1

        negativetweets = {
            "NEGATIVETWEETS": {
                "n1": Matrix[0][0],
                "n2": Matrix[0][1],
                "n3": Matrix[0][2],
                "n4": Matrix[0][3],
                "n5": Matrix[0][4],
                "n6": Matrix[0][5],
                "n7": Matrix[0][6],
                "n8": Matrix[0][7],
                "n9": Matrix[0][8],
                "n10": Matrix[0][9]
            }
        }


###############################################
#ALL DUMPER
    pickedtweets = [{
        "POSITIVETWEETS": Positive[0][0],
        "NEUTRALTWEETS": Neutral[0][0],
        "NEGATIVETWEETS": Matrix[0][0]
    }, {
        "POSITIVETWEETS": Positive[0][1],
        "NEUTRALTWEETS": Neutral[0][1],
        "NEGATIVETWEETS": Matrix[0][1]
    }, {
        "POSITIVETWEETS": Positive[0][2],
        "NEUTRALTWEETS": Neutral[0][2],
        "NEGATIVETWEETS": Matrix[0][2]
    }, {
        "POSITIVETWEETS": Positive[0][3],
        "NEUTRALTWEETS": Neutral[0][3],
        "NEGATIVETWEETS": Matrix[0][3]
    }, {
        "POSITIVETWEETS": Positive[0][4],
        "NEUTRALTWEETS": Neutral[0][4],
        "NEGATIVETWEETS": Matrix[0][4]
    }, {
        "POSITIVETWEETS": Positive[0][5],
        "NEUTRALTWEETS": Neutral[0][5],
        "NEGATIVETWEETS": Matrix[0][5]
    }, {
        "POSITIVETWEETS": Positive[0][6],
        "NEUTRALTWEETS": Neutral[0][6],
        "NEGATIVETWEETS": Matrix[0][6]
    }, {
        "POSITIVETWEETS": Positive[0][7],
        "NEUTRALTWEETS": Neutral[0][7],
        "NEGATIVETWEETS": Matrix[0][7]
    }, {
        "POSITIVETWEETS": Positive[0][8],
        "NEUTRALTWEETS": Neutral[0][8],
        "NEGATIVETWEETS": Matrix[0][8]
    }, {
        "POSITIVETWEETS": Positive[0][9],
        "NEUTRALTWEETS": Neutral[0][9],
        "NEGATIVETWEETS": Matrix[0][9]
    }]

    #############################################################
    #Second Analysis
    CONSUMER_KEY = 'kqUMGE1g9Ye6mZ9IdIxWNamTK'
    CONSUMER_SECRET = 'ffMqQa8dlAsd4zysRFW47JM7Vk0M5MwSP0QKVOLjrufLSHsEMA'
    OAUTH_TOKEN = '1156902832269774848-w0E7G6H024TGXg7ef6aM8KmCezVwoK'
    OAUTH_TOKEN_SECRET = 'Bvos2WEu0uTzszmTsdiPS1l3PLu9n1uSfIZJd7FjpYAEr'
    auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET, CONSUMER_KEY,
                               CONSUMER_SECRET)
    twitter_api = twitter.Twitter(auth=auth)
    #print(twitter_api)

    #Trending Hashtags by WOE_ID
    # The Yahoo! Where On Earth ID for the entire world is 1.
    # See https://dev.twitter.com/docs/api/1.1/get/trends/place and
    # http://developer.yahoo.com/geo/geoplanet/

    PH_WOE_ID = 23424934

    # Prefix ID with the underscore for query string parameterization.
    # Without the underscore, the twitter package appends the ID value
    # to the URL itself as a special case keyword argument.

    ph_trends = twitter_api.trends.place(_id=PH_WOE_ID)

    content = []
    x = (json.dumps(ph_trends, indent=1))
    content = x.replace("""trends""", "")
    # Slice string to remove first character
    content = content[0::]
    content = content[1::]
    content = content[2::]
    content = content[3::]
    content = content[4::]

    s = list(content)
    size = len(s)
    del s[size - 162:size - 1]
    s = ''.join([str(elem) for elem in s])
    ph_trends = s

    #collecting search results
    #Collecting search results
    q = query
    count = 100
    # See https://dev.twitter.com/docs/api/1.1/get/search/tweets
    search_results = twitter_api.search.tweets(q=q, count=count)
    statuses = search_results['statuses']
    # Iterate through 5 more batches of results by following the cursor
    for _ in range(5):
        try:
            next_results = search_results['search_metadata']['next_results']
        except KeyError as e:  # No more results when next_results doesn't exist
            break
        # Create a dictionary from next_results, which has the following form:
        # ?max_id=313519052523986943&q=NCAA&include_entities=1
        kwargs = dict([kv.split('=') for kv in next_results[1:].split("&")])
        search_results = twitter_api.search.tweets(**kwargs)
        statuses += search_results['statuses']
        # Show one sample search result by slicing the list...
        #print (json.dumps(statuses[0], indent=1))

    ### second analysis _ 1
    #extracting tweet entities
    status_texts = [status['text'] for status in statuses]
    screen_names = [
        user_mention['screen_name'] for status in statuses
        for user_mention in status['entities']['user_mentions']
    ]

    hashtags = [
        hashtag['text'] for status in statuses
        for hashtag in status['entities']['hashtags']
    ]
    # Compute a collection of all words from all tweets
    words = [w for t in status_texts for w in t.split()]
    str0 = json.dumps(hashtags[0:1])
    str1 = json.dumps(hashtags[1])
    str2 = json.dumps(hashtags[2])
    str3 = json.dumps(hashtags[3])
    str4 = json.dumps(hashtags[4])
    str5 = json.dumps(hashtags[5])
    str6 = json.dumps(hashtags[6])
    str7 = json.dumps(hashtags[7])
    str8 = json.dumps(hashtags[8])
    str9 = json.dumps(hashtags[9])

    str0 = str0.replace('"', "")
    str0 = str0.replace("[", "")
    str0 = str0.replace("]", "")

    str1 = str1.replace('"', "")
    str1 = str1.replace("[", "")
    str1 = str1.replace("]", "")

    str2 = str2.replace('"', "")
    str2 = str2.replace("[", "")
    str2 = str2.replace("]", "")

    str3 = str3.replace('"', "")
    str3 = str3.replace("[", "")
    str3 = str3.replace("]", "")

    str4 = str4.replace('"', "")
    str4 = str4.replace("[", "")
    str4 = str4.replace("]", "")

    str5 = str5.replace('"', "")
    str5 = str5.replace("[", "")
    str5 = str5.replace("]", "")

    str6 = str6.replace('"', "")
    str6 = str6.replace("[", "")
    str6 = str6.replace("]", "")

    str7 = str7.replace('"', "")
    str7 = str7.replace("[", "")
    str7 = str7.replace("]", "")

    str8 = str8.replace('"', "")
    str8 = str8.replace("[", "")
    str8 = str8.replace("]", "")

    str9 = str9.replace('"', "")
    str9 = str9.replace("[", "")
    str9 = str9.replace("]", "")

    dataHH = [{
        "relatedhashtags": str0
    }, {
        "relatedhashtags": str1
    }, {
        "relatedhashtags": str2
    }, {
        "relatedhashtags": str3
    }, {
        "relatedhashtags": str4
    }, {
        "relatedhashtags": str5
    }, {
        "relatedhashtags": str6
    }, {
        "relatedhashtags": str7
    }, {
        "relatedhashtags": str8
    }, {
        "relatedhashtags": str9
    }]

    # print(json.dumps(dataHH, indent = 1))

    from collections import Counter
    for item in [words, screen_names, hashtags]:
        c = Counter(item)

    #all dumper txt and json

    #creating directory

    os.mkdir("Dumps")
    os.mkdir("CSV")
    os.mkdir("Databases")

    ############################## ##############################

    f = open("Dumps/SENTIMENTS.json", "w+", encoding='utf-8')
    f = open("Dumps/PICKEDTWEETS.json", "w+", encoding='utf-8')
    f = open("Dumps/PH_trends.json", "w+", encoding='utf-8')
    f = open("Dumps/related_hashtags_no_count.json", "w+", encoding='utf-8')
    f = open("Dumps/Freq_Words.txt", "w+", encoding='utf-8')
    f = open("Dumps/ScreenNames.txt", "w+", encoding='utf-8')
    f = open("Dumps/related_hashtags_with_count.txt", "w+", encoding='utf-8')
    f = open("Dumps/mostpopulartweets.txt", "w+", encoding='utf-8')
    f = open("Dumps/LexicalDiversity.json", "w+", encoding='utf-8')

    f = open("CSV/SENTIMENTS.csv", "w+", encoding='utf-8')
    f = open("CSV/PICKEDTWEETS.csv", "w+", encoding='utf-8')
    f = open("CSV/PH_trends.csv", "w+", encoding='utf-8')
    f = open("CSV/related_hashtags_no_count.csv", "w+", encoding='utf-8')
    f = open("CSV/Freq_Words.csv", "w+", encoding='utf-8')
    f = open("CSV/ScreenNames.csv", "w+", encoding='utf-8')
    f = open("CSV/related_hashtags_with_count.csv", "w+", encoding='utf-8')
    f = open("CSV/mostpopulartweets.csv", "w+", encoding='utf-8')
    f = open("CSV/LexicalDiversity.csv", "w+", encoding='utf-8')

    f = open("Databases/SENTIMENTS.db", "w+", encoding='utf-8')
    f = open("Databases/PICKEDTWEETS.db", "w+", encoding='utf-8')
    f = open("Databases/PH_trends.db", "w+", encoding='utf-8')
    f = open("Databases/related_hashtags_no_count.db", "w+", encoding='utf-8')
    f = open("Databases/Freq_Words.db", "w+", encoding='utf-8')
    f = open("Databases/ScreenNames.db", "w+", encoding='utf-8')
    f = open("Databases/related_hashtags_with_count.db",
             "w+",
             encoding='utf-8')
    f = open("Databases/mostpopulartweets.db", "w+", encoding='utf-8')
    f = open("Databases/LexicalDiversity.db", "w+", encoding='utf-8')

    ########################################################################################################################

    ###############################################
    # save as Json file
    with open('Dumps/SENTIMENTS.json', 'w', encoding='utf-8') as json_file:
        json.dump(d1, json_file)

    with open('Dumps/PICKEDTWEETS.json', 'w', encoding='utf-8') as json_file:
        json.dump(pickedtweets, json_file)

    f = open('Dumps/PH_trends.json', 'w', encoding='utf-8')
    print(ph_trends, file=f)  # Python 3.x

    f = open('Dumps/related_hashtags_no_count.json', 'w', encoding='utf-8')
    print(json.dumps(dataHH, indent=1), file=f)  # Python 3.x

    #Pretty table
    counter = 0
    for label, data in (('Word', words), ('Screen Name', screen_names),
                        ('Hashtag', hashtags)):
        pt = PrettyTable(field_names=[label, 'Count'])
        c = Counter(data)
        [pt.add_row(kv) for kv in c.most_common()[:10]]
        pt.align[label], pt.align['Count'] = 'l', 'r'  # Set column alignment

        if counter == 0:
            f = open('Dumps/Freq_Words.txt', 'w', encoding='utf-8')
            print(pt, file=f)
        if counter == 1:
            f = open('Dumps/ScreenNames.txt', 'w', encoding='utf-8')
            print(pt, file=f)
        if counter == 2:
            f = open('Dumps/related_hashtags_with_count.txt',
                     'w',
                     encoding='utf-8')
            print(pt, file=f)
        if counter == 3:
            break
        counter = counter + 1

    # A function for computing lexical diversity
    def lexical_diversity(tokens):
        return 1.0 * len(set(tokens)) / len(tokens)

    # A function for computing the average number of words per tweet
    def average_words(statuses):
        total_words = sum([len(s.split()) for s in statuses])
        return 1.0 * total_words / len(statuses)

    d = words = (lexical_diversity(words))
    e = ScreenNames = (lexical_diversity(screen_names))
    #print(hashtags)
    f = LexHashtags = (lexical_diversity(hashtags))
    g = AveWords = (average_words(status_texts))

    data = [{
        "Words": words,
        "ScreenNames": ScreenNames,
        "LexHashtags": LexHashtags,
        "AveWords": AveWords
    }]
    f = open('Dumps/LexicalDiversity.json', 'w', encoding='utf-8')
    print(json.dumps(data, indent=1), file=f)  # Python 3.x

    retweets = [
        # Store out a tuple of these three values ...
        (status['retweet_count'],
         status['retweeted_status']['user']['screen_name'], status['text'])

        # ... for each status ...
        for status in statuses

        # ... so long as the status meets this condition.
        if status.__contains__('retweeted_status')
    ]

    # Slice off the first 5 from the sorted results and display each item in the tuple
    pt = PrettyTable(field_names=['Count', 'Screen Name', 'Text'])
    [pt.add_row(row) for row in sorted(retweets, reverse=True)[:5]]
    pt.max_width['Text'] = 50
    pt.align = 'l'
    with open('Dumps/mostpopulartweets.txt', 'w', encoding='utf-8') as f:
        print(pt, file=f)

    #### Conversion from dumps to csv
    # all the usual options are supported
    with open('Dumps/SENTIMENTS.json', encoding='utf-8') as file:
        data = json.load(file)
    picker = CherryPicker(data)
    flat = picker['SENTIMENTS'].flatten().get()
    df = pd.DataFrame(flat)
    df.to_csv('CSV/SENTIMENTS.csv', encoding='utf-8')

    with open('Dumps/PICKEDTWEETS.json', encoding='utf-8') as file:
        data = json.load(file)
    picker = CherryPicker(data)
    flat = picker.flatten().get()
    df = pd.DataFrame(flat)
    df.to_csv('CSV/PICKEDTWEETS.csv', encoding='utf-8')

    with open('Dumps/PH_trends.json', encoding='utf-8') as file:
        data = json.load(file)
    picker = CherryPicker(data)
    flat = picker.flatten().get()
    df = pd.DataFrame(flat)
    df.to_csv('CSV/PH_trends.csv', encoding='utf-8')

    with open('Dumps/related_hashtags_no_count.json',
              encoding='utf-8') as file:
        data = json.load(file)
    picker = CherryPicker(data)
    flat = picker.flatten().get()
    df = pd.DataFrame(flat)
    df.to_csv('CSV/related_hashtags_no_count.csv', encoding='utf-8')

    with open('Dumps/LexicalDiversity.json', encoding='utf-8') as file:
        data = json.load(file)
    picker = CherryPicker(data)
    flat = picker.flatten().get()
    df = pd.DataFrame(flat)
    df.to_csv('CSV/LexicalDiversity.csv', encoding='utf-8')

    def pretty_table_to_tuples(input_str):
        lines = input_str.split("\n")
        num_columns = len(re.findall("\+", lines[0])) - 1
        line_regex = r"\|" + (r" +(.*?) +\|" * num_columns)
        for line in lines:
            m = re.match(line_regex, line.strip())
            if m:
                yield m.groups()

    with open('Dumps/Freq_Words.txt', encoding="utf8") as fp:
        input_string = fp.read()
    with open('CSV/Freq_Words.csv', 'w', encoding="utf8") as outcsv:
        writer = csv.writer(outcsv, delimiter=",", lineterminator='\n')
        writer.writerows(pretty_table_to_tuples(input_string))

    with open('Dumps/ScreenNames.txt', encoding="utf8") as fp:
        input_string = fp.read()
    with open('CSV/ScreenNames.csv', 'w', encoding="utf8") as outcsv:
        writer = csv.writer(outcsv, delimiter=",", lineterminator='\n')
        writer.writerows(pretty_table_to_tuples(input_string))

    with open('Dumps/related_hashtags_with_count.txt', encoding="utf8") as fp:
        input_string = fp.read()
    with open('CSV/related_hashtags_with_count.csv', 'w',
              encoding="utf8") as outcsv:
        writer = csv.writer(outcsv, delimiter=",", lineterminator='\n')
        writer.writerows(pretty_table_to_tuples(input_string))

    with open('Dumps/mostpopulartweets.txt', encoding="utf8") as fp:
        input_string = fp.read()
    with open('CSV/mostpopulartweets.csv', 'w', encoding="utf8") as outcsv:
        writer = csv.writer(outcsv, delimiter=",", lineterminator='\n')
        writer.writerows(pretty_table_to_tuples(input_string))

    #### Conversion from csv to sqlite
    # all the usual options are supported

    options = csv_to_sqlite.CsvOptions(typing_style="full", encoding="utf-8")

    input_files = ["CSV/SENTIMENTS.csv"]  # pass in a list of CSV files
    csv_to_sqlite.write_csv(input_files, "Databases/SENTIMENTS.db", options)

    input_files = ["CSV/PICKEDTWEETS.csv"]  # pass in a list of CSV files
    csv_to_sqlite.write_csv(input_files, "Databases/PICKEDTWEETS.db", options)

    input_files = ["CSV/PH_trends.csv"]  # pass in a list of CSV files
    csv_to_sqlite.write_csv(input_files, "Databases/PH_trends.db", options)

    input_files = ["CSV/related_hashtags_no_count.csv"
                   ]  # pass in a list of CSV files
    csv_to_sqlite.write_csv(input_files,
                            "Databases/related_hashtags_no_count.db", options)

    input_files = ["CSV/LexicalDiversity.csv"]  # pass in a list of CSV files
    csv_to_sqlite.write_csv(input_files, "Databases/LexicalDiversity.db",
                            options)

    input_files = ["CSV/Freq_Words.csv"]  # pass in a list of CSV files
    csv_to_sqlite.write_csv(input_files, "Databases/Freq_Words.db", options)

    input_files = ["CSV/ScreenNames.csv"]  # pass in a list of CSV files
    csv_to_sqlite.write_csv(input_files, "Databases/ScreenNames.db", options)

    input_files = ["CSV/related_hashtags_with_count.csv"
                   ]  # pass in a list of CSV files
    csv_to_sqlite.write_csv(input_files,
                            "Databases/related_hashtags_with_count.db",
                            options)

    input_files = ["CSV/mostpopulartweets.csv"]  # pass in a list of CSV files
    csv_to_sqlite.write_csv(input_files, "Databases/mostpopulartweets.db",
                            options)

    print("\nSUCCESS")
    print("Database is successfully created!")

Пример #10

0

Показать файл

def test_csv_script():
    options = csv_to_sqlite.CsvOptions(typing_style="quick", drop_tables=True)
    input_files = ["tests\\data\\abilities.csv"]
    total = csv_to_sqlite.write_csv(input_files, "test_out.sqlite", options)
    assert total == 293
    assert path.exists("test_out.sqlite")

Пример #11

0

Показать файл

def init():
    options = csv_to_sqlite.CsvOptions(typing_style="full", encoding="windows-1250") 
    input_files = ["alcohol_prices.csv"] 
    csv_to_sqlite.write_csv(input_files, "alcohol.sqlite", options)

Пример #12

0

Показать файл

Файл: convert_rxclass_sqlite.py Проект: wtroddy/COVID19_Challenge

            input_id.text,
            output.find('relaSource').text,
            output.find('classId').text,
            output.find('name').text,
            output.find('classType').text
        ],
                                                 index=dfcols),
                                       ignore_index=True)

rxclass_df = rxclass_df.drop_duplicates()

# output
rxclass_df.to_csv(rxn_dir + "VACLASS.csv", index=False)

# write to sqlite db
options = csv_to_sqlite.CsvOptions(typing_style="full")
csv_to_sqlite.write_csv([rxn_dir + "VACLASS.csv"],
                        sqlite_dir + "covid_train.sqlite", options)
csv_to_sqlite.write_csv([rxn_dir + "VACLASS.csv"],
                        sqlite_dir + "covid_test.sqlite", options)

### update medications_covid_epochs tables
# set vars
input_dict = {
    "test_db": ["./data/sqlite/covid_test.sqlite"],
    "train_db": ["./data/sqlite/covid_train.sqlite"]
}

### sql
sql_text = open("./code/sql/TABLE_rxclass_statements.sql", "r").read()

Пример #13

0

Показать файл

def convertToSqlite( filename ):
    csv_name = filename + ".csv"
    options = csv_to_sqlite.CsvOptions(typing_style="full", encoding="windows-1250")
    input_files = [csv_name]  # pto_ass in a list of CSV files
    csv_to_sqlite.write_csv(input_files, filename+".sqlite", options)
    return filename + ".sqlite"

Python CsvOptions примеры использования