Exemplo n.º 1
0
def main():
    with open("nameDict.dat", "rb") as name_file:
        nameDict = pickle.load(name_file)

        bitNames = getDictSubset(nameDict,
                                 lambda record: record.namespace() == "d")
    max_height = getMaxHeight(nameDict)

    dotBitAlexa = alexaRanks()
    dirtyWords = [
        word.strip() for word in open('dirty.txt', 'r') if " " not in word
    ]
    dictWords = set(
        [word.strip() for word in open('/usr/share/dict/words', 'r')])
    bitWordList = set([
        "coin", "satoshi", "wallet", "crypto", "currency", "btc", "nmc",
        "blockchain"
    ])
    with open("name_lists/surnames.csv", "r") as surnames_file:
        reader = DictReader(surnames_file)
        surnamesSet = set(line["name"].lower() for line in reader)

    valueChangeRank = rankNumberOfValueChanges(bitNames, max_height)
    aliveRank = rankIsAlive(bitNames, max_height)
    validJSONRank = rankJSONDict(bitNames, max_height)
    validDNSRank = rankValidDNSDict(bitNames, max_height)
    timeActiveRank = rankByTimeActive(bitNames, max_height)

    maxRank = len(bitNames)
    xData = []
    yData = []
    for name in bitNames:
        yData.append(
            price([
                valueChangeRank[name], aliveRank[name], validJSONRank[name],
                validDNSRank[name], timeActiveRank[name]
            ], maxRank))
        xData.append([
            int(dotBitAlexa[name]) + 1,  # alexaRank
            int(len(wordnet.synsets(name[2:])) >= 1) + 1,  # inDict
            int(any(dirtyWord in name.lower()
                    for dirtyWord in dirtyWords)) + 1,  # inDirty
            int(set(name[2:]).issubset(set("0123456789"))) + 1,  # isNumber
            len(name),  # length
            int(
                any(word in name.lower()
                    for word in bitWordList) or name.startswith("d/bit")) +
            1,  # coinRelated
            int(set(name[2:]).issubset(set("abcdefghijklmnopqrstuvwxyz"))) + 1,
            SegmentString().string_segments(name[2:])
        ])

    # x_train, x_test, y_train, y_test = train_test_split(xData, yData, test_size=.10, random_state=33)

    alpha = 0.1
    enet = ElasticNet(alpha=alpha)

    score = cross_val_score(enet, xData, yData, scoring='r2_score')
    print(score)
Exemplo n.º 2
0
def main():
    with open("nameDict.dat", "rb") as name_file:
        nameDict = pickle.load(name_file)

        bitNames = getDictSubset(nameDict,
                                 lambda record: record.namespace() == "d")
    max_height = getMaxHeight(nameDict)

    dotBitAlexa = alexaRanks()
    dirtyWords = [word.strip() for word in open('dirty.txt', 'r') if " " not in word]
    dictWords = set([word.strip() for word in open('/usr/share/dict/words', 'r')])
    bitWordList = set(["coin", "satoshi", "wallet", "crypto", "currency", "btc", "nmc", "blockchain"])
    with open("name_lists/surnames.csv", "r") as surnames_file:
        reader = DictReader(surnames_file)
        surnamesSet = set(line["name"].lower() for line in reader)

    valueChangeRank = rankNumberOfValueChanges(bitNames, max_height)
    aliveRank = rankIsAlive(bitNames, max_height)
    validJSONRank = rankJSONDict(bitNames, max_height)
    validDNSRank = rankValidDNSDict(bitNames, max_height)
    timeActiveRank = rankByTimeActive(bitNames, max_height)

    maxRank = len(bitNames)
    xData = []
    yData = []
    for name in bitNames:
        yData.append(price([valueChangeRank[name], aliveRank[name], validJSONRank[name], validDNSRank[name], timeActiveRank[name]], maxRank))
        xData.append([
            int(dotBitAlexa[name]) + 1,                                                       # alexaRank
            int(len(wordnet.synsets(name[2:])) >= 1) + 1,                                       # inDict
            int(any(dirtyWord in name.lower() for dirtyWord in dirtyWords)) + 1,                # inDirty
            int(set(name[2:]).issubset(set("0123456789"))) + 1,                                 # isNumber
            len(name),                                                                          # length
            int(any(word in name.lower() for word in bitWordList) or name.startswith("d/bit")) + 1, # coinRelated
            int(set(name[2:]).issubset(set("abcdefghijklmnopqrstuvwxyz"))) + 1,
            SegmentString().string_segments(name[2:])
    ])

    # x_train, x_test, y_train, y_test = train_test_split(xData, yData, test_size=.10, random_state=33)

    alpha = 0.1
    enet = ElasticNet(alpha=alpha)

    score = cross_val_score(enet, xData, yData, scoring='r2_score')
    print(score)
#         upper = int((4/3) * (i + 1))
#         ret[i] = (cumulative[upper] - cumulative[lower]) / (upper - lower)
#     return ret

alexa_ranks = alexaRanks()
# Swap keys and values
# alexa_ranks = {rank: intern(name) for name, rank in alexa_ranks.items()}
# pdb.set_trace()
# alexa_list = []n
# for i in range(1, len(alexa_ranks) + 1):
#     alexa_list.append(alexa_ranks[i])

with open("nameDict.dat", "rb") as pickle_file:
    name_history = pickle.load(pickle_file)

max_height = getMaxHeight(name_history)
valid_names = getDictSubset(name_history,
                            lambda record: record.isValidAtHeight(max_height))
name_history = None
active_bit_names = getDictSubset(valid_names,
                                 lambda record: record.namespace() == "d")
valid_names = None

names = set(name for name in active_bit_names.keys())
active_bit_names = None
registered = [alexa_name in names for alexa_name in alexa_ranks]

averaged = variable_window_moving_average(registered)

rc('font', serif='Helvetica Neue')
rc('text', usetex='true')
#         ret[i] = (cumulative[upper] - cumulative[lower]) / (upper - lower)
#     return ret
        

alexa_ranks = alexaRanks()
# Swap keys and values
# alexa_ranks = {rank: intern(name) for name, rank in alexa_ranks.items()}
# pdb.set_trace()
# alexa_list = []n
# for i in range(1, len(alexa_ranks) + 1):
#     alexa_list.append(alexa_ranks[i])

with open("nameDict.dat", "rb") as pickle_file:
    name_history = pickle.load(pickle_file)

max_height = getMaxHeight(name_history)
valid_names = getDictSubset(name_history,
                            lambda record: record.isValidAtHeight(max_height))
name_history = None
active_bit_names = getDictSubset(valid_names,
                                 lambda record: record.namespace() == "d")
valid_names = None

names = set(name for name in active_bit_names.keys())
active_bit_names = None
registered = [alexa_name in names for alexa_name in alexa_ranks]


averaged = variable_window_moving_average(registered)

rc('font', serif='Helvetica Neue') 
import pickle
import pdb

from nltk.util import ngrams

from common import getDictSubset
from nameHistory import getMaxHeight
from segment_string import SegmentString

DOMAIN_REGEX = "^[a-z]([a-z0-9-]{0,62}[a-z0-9])?$"
def valid_domain_name(name, has_prefix=True):
    return bool(match(DOMAIN_REGEX, name.name()[2:]))


with open("nameDict.dat", "rb") as pickle_file:
    names_dict = pickle.load(pickle_file)

max_height = getMaxHeight(names_dict)
names_dict = getDictSubset(names_dict,
                      lambda record: record.isValidAtHeight(max_height))

names_dict = getDictSubset(names_dict,
                                 lambda record: record.namespace() == "d")
names_dict = getDictSubset(names_dict, valid_domain_name)

segment_counts = sorted([SegmentString().string_segments(name[2:]) for name in names_dict.keys()])

with open("segment_counts.pickle", "wb") as output_file:
    pickle.dump(segment_counts, output_file)

from nltk.util import ngrams

from common import getDictSubset
from nameHistory import getMaxHeight
from segment_string import SegmentString

DOMAIN_REGEX = "^[a-z]([a-z0-9-]{0,62}[a-z0-9])?$"


def valid_domain_name(name, has_prefix=True):
    return bool(match(DOMAIN_REGEX, name.name()[2:]))


with open("nameDict.dat", "rb") as pickle_file:
    names_dict = pickle.load(pickle_file)

max_height = getMaxHeight(names_dict)
names_dict = getDictSubset(names_dict,
                           lambda record: record.isValidAtHeight(max_height))

names_dict = getDictSubset(names_dict,
                           lambda record: record.namespace() == "d")
names_dict = getDictSubset(names_dict, valid_domain_name)

segment_counts = sorted(
    [SegmentString().string_segments(name[2:]) for name in names_dict.keys()])

with open("segment_counts.pickle", "wb") as output_file:
    pickle.dump(segment_counts, output_file)