Exemplo n.º 1
0
def on_message(client, userdata, msg):
    """Mock on_message method.

    Arguments:
        client {[type]} -- [description]
        userdata {[type]} -- [description]
        msg {[type]} -- [description]
    """
    client = client
    userdata = userdata
    data = json.loads(msg.payload.decode())
    data_point = DataPoint(identifier=data['id'],
                           date=datetime.fromisoformat(
                               data['content']['time_of_measurement']),
                           temperature_f=data['content']['temperature_f'],
                           temperature_c=data['content']['temperature_c'],
                           type=data['type'])
    session.add(data_point)
    session.commit()
Exemplo n.º 2
0
def submit_metric():
    """
    Submits the metric data for a specific computer
    if the computer doesn't exist it is created.
    """

    gson = json.loads(request.get_json())

    new_point = DataPoint(computer_name=gson["computer_name"],
                          cpu_percentage=gson["cpu_percentage"],
                          memory_percentage=gson["memory_percentage"],
                          timestamp=gson["timestamp"])

    with lock:
        if not instances.get(new_point.computer_name):
            instances[new_point.computer_name] = Timeline(
                maxsize=int(os.environ.get("COLLECTOR_BUFFER_SIZE")))
        instances[new_point.computer_name].append(new_point)

    return Response(status=200)
Exemplo n.º 3
0
def get_post_data():
    if request.method == "GET":
        # We're sending all of our data to the user!
        data = session.query(DataPoint).all()
        return jsonify([i.serialize for i in data])

    elif request.method == "POST":
        # There's a new data point in town!
        name = request.form.get("name") or "Anonymous"
        Rstar = request.form.get("Rstar", type=float)
        fp = request.form.get("fp", type=float)
        ne = request.form.get("ne", type=float)
        fl = request.form.get("fl", type=float)
        fi = request.form.get("fi", type=float)
        fc = request.form.get("fc", type=float)
        L = request.form.get("L", type=float)

        N = Rstar * fp * ne * fl * fi * fc * L

        new_data = DataPoint(name=name,
                             N=N,
                             Rstar=Rstar,
                             fp=fp,
                             ne=ne,
                             fl=fl,
                             fi=fi,
                             fc=fc,
                             L=L)

        session.add(new_data)
        session.commit()

        return jsonify(new_data.serialize)

    else:
        # The only two requests that we support are "GET" and "POST"
        return "Unsupported HTTP request", 400
Exemplo n.º 4
0
def main(csvfile):
    # TODO use Pandas
    reader = csv.reader(csvfile, delimiter=',', quotechar='"')

    points = []
    feature_vectors = []

    idxs = set()
    names = set()
    preexist = set(FeatureVector.objects.all().values_list("exact_name",
                                                           flat=True))

    now = timezone.now()

    count = 0
    for i, row in enumerate(reader):
        if not i:
            mapping = get_mapping(row)
            continue
        if row == [] or len(row) < max(mapping.values()):
            continue
        try:
            try:
                exact_name = get_exact_name(row[mapping["Name"]])
                try:
                    decay_feature = get_decay_feature_vector(exact_name)
                    feature_vector = True
                    if exact_name not in names and exact_name not in preexist:
                        temp = FeatureVector(exact_name=exact_name,
                                             type=FeatureVector.DECAY,
                                             vector=decay_feature,
                                             created=now)

                        temp.clean_fields()
                        feature_vectors.append(temp)
                        names.add(exact_name)

                        if len(feature_vectors) > 150:
                            FeatureVector.objects.bulk_create(feature_vectors)
                            feature_vectors = []

                except Exception:
                    feature_vector = None
            except Exception:
                feature_vector = None
                exact_name = None

            band_gap = row[mapping["BandGap"]]
            data = {
                "name": row[mapping["Name"]],
                "options": row[mapping["Options"]],
                "h**o": row[mapping["H**O"]],
                "lumo": row[mapping["LUMO"]],
                "homo_orbital": row[mapping["HomoOrbital"]],
                "dipole": row[mapping["Dipole"]],
                "energy": row[mapping["Energy"]],
                "band_gap": band_gap if band_gap != '---' else None,
                "exact_name": exact_name,
                "created": now,
            }

            point = DataPoint(**data)
            point.clean_fields()
            points.append(point)
            if len(points) > 50:
                DataPoint.objects.bulk_create(points)
                points = []
            if feature_vector is not None:
                idxs.add(count)

            count += 1
        except Exception:
            pass

    DataPoint.objects.bulk_create(points)
    FeatureVector.objects.bulk_create(feature_vectors)

    Through = DataPoint.vectors.through

    temp = DataPoint.objects.filter(created=now).values_list(
        "pk", "exact_name")
    temp2 = FeatureVector.objects.all().values_list("exact_name", "pk")
    groups = dict(temp2)

    final = []
    for i, (pk, name) in enumerate(temp):
        if i in idxs:
            final.append(
                Through(datapoint_id=pk, featurevector_id=groups[name]))

            if len(final) > 200:
                Through.objects.bulk_create(final)
                final = []
    Through.objects.bulk_create(final)

    return count
Exemplo n.º 5
0
    def get_latest_data_point(self, stocks: List[str],
                              current_time: datetime) -> Dict[str, DataPoint]:

        spark_data_frame_for_stock = self.spark.createDataFrame([],
                                                                self.schema)
        pandas_data_frame = pandas\
            .DataFrame(columns=['Datetime', 'Open', 'High', 'Low', 'Close', 'AdjustedClose', 'Volume', 'Symbol'])

        download_list = []

        stocks_dict = {}

        start_time = current_time.strftime("%Y-%m-%d")
        end_time = (current_time + timedelta(days=1)).strftime("%Y-%m-%d")

        historical_data_path = "./datasets/historical_data/"
        folder_path = historical_data_path + start_time + "/"

        for stock in stocks:
            try:
                Path(folder_path).mkdir(parents=True, exist_ok=True)
            except OSError:
                print(f'Creation of the directory {folder_path} failed')

            stock_file = Path(folder_path + stock + ".csv")
            if stock_file.is_file(
            ) and current_time < datetime.now() + timedelta(hours=-24):
                # if stock data already downloaded, just load it
                # stock_data_spark_df = self.spark.read .csv(str(stock_file), schema=self.schema, timestampFormat="yyyy-MM-dd HH:mm:ss", header=True)
                # spark_data_frame_for_stock = spark_data_frame_for_stock.union(stock_data_spark_df)
                stock_data = pandas.read_csv(str(stock_file))
                stock_data['Datetime'] = pandas.to_datetime(
                    stock_data['Datetime'], format='%Y-%m-%d')
                pandas_data_frame = pandas.concat(
                    [pandas_data_frame, stock_data])

            else:
                # add stock to download list
                download_list.append(stock)

        if len(download_list) > 0:
            stocks_data = yf.download(download_list,
                                      start=start_time,
                                      end=end_time,
                                      interval="1m")
            if len(stocks_data) > 0:
                for stock in download_list:

                    stock_data = stocks_data if len(
                        download_list) == 1 else stocks_data[stock]
                    stock_data = stock_data.rename(
                        columns={"Adj Close": "AdjustedClose"})
                    stock_data = stock_data.reset_index()
                    stock_data.dropna(inplace=True)
                    stock_data["Datetime"] = stock_data["Datetime"].astype(
                        str).str[:-6].astype('datetime64[ns]')
                    stock_data["Volume"] = stock_data["Volume"].astype(float)
                    stock_data["Symbol"] = stock
                    stock_data.set_index('Datetime')

                    if current_time < datetime.now() + timedelta(hours=-24):
                        stock_file = Path(folder_path + stock + ".csv")
                        stock_data.to_csv(path_or_buf=stock_file, index=False)

                    pandas_data_frame = pandas.concat(
                        [pandas_data_frame, stock_data])

            # date_filter_string = "Datetime < '" + current_time.strftime("%Y-%m-%d %H:%M:%S") + "'"
            # spark_data_frame_for_stock = spark_data_frame_for_stock.union(self.spark.createDataFrame(stock_data, self.schema))
            # data_frame = self.spark.createDataFrame(stock_data, self.schema)

        spark_data_frame_for_stock = self.spark.createDataFrame(
            pandas_data_frame, self.schema)
        for stock in stocks:
            last_point_row = spark_data_frame_for_stock \
                .where(spark_data_frame_for_stock.Datetime <= current_time.strftime("%Y-%m-%d %H:%M:%S")) \
                .where(spark_data_frame_for_stock.Symbol == stock)\
                .sort("Datetime", ascending=False) \
                .limit(1) \
                .select("*") \
                .first()

            data_point = DataPoint(last_point_row.Open, last_point_row.Close,
                                   last_point_row.High, last_point_row.Low,
                                   last_point_row.Volume,
                                   last_point_row.Datetime)
            stocks_dict[stock] = data_point

        return stocks_dict
Exemplo n.º 6
0
    def get_historical_data(self,
                            stock: str,
                            current_time: datetime,
                            number_of_days: int = 10) -> List[DataPoint]:
        spark_data_frame_for_stock = self.spark.createDataFrame([],
                                                                self.schema)

        for i in range(number_of_days + 1):
            start_time = (current_time -
                          timedelta(days=i)).strftime("%Y-%m-%d")
            end_time = (current_time -
                        timedelta(days=i - 1)).strftime("%Y-%m-%d")

            historical_data_path = "./datasets/historical_data/"
            folder_path = historical_data_path + start_time + "/"
            try:
                Path(folder_path).mkdir(parents=True, exist_ok=True)
            except OSError:
                print(f'Creation of the directory {folder_path} failed')
            # else:
            # print(f'Successfully created the directory {folder_path}')

            stock_file = Path(folder_path + stock + ".csv")
            if stock_file.is_file():
                # if stock data already downloaded, just load it
                stock_data_spark_df = self.spark.read \
                    .csv(str(stock_file), schema=self.schema, timestampFormat="yyyy-MM-dd HH:mm:ss", header=True)
                spark_data_frame_for_stock = spark_data_frame_for_stock.union(
                    stock_data_spark_df)
            else:
                # download if not downloaded
                stock_data = yf.download(stock,
                                         start=start_time,
                                         end=end_time,
                                         interval="1m")
                if len(stock_data) < 1:
                    print(f'stock data not found on yahoo finance: {stock}')
                    continue

                stock_data = stock_data.rename(
                    columns={"Adj Close": "AdjustedClose"})
                stock_data = stock_data.reset_index()
                stock_data.dropna(inplace=True)
                stock_data["Datetime"] = stock_data["Datetime"].astype(
                    str).str[:-6].astype('datetime64[ns]')
                stock_data["Volume"] = stock_data["Volume"].astype(float)
                stock_data["Symbol"] = stock
                stock_data.set_index('Datetime')
                if current_time - timedelta(
                        days=i) < datetime.now() + timedelta(days=-1):
                    stock_data.to_csv(path_or_buf=stock_file, index=False)
                stock_data_spark_df = self.spark.createDataFrame(
                    stock_data, self.schema)
                spark_data_frame_for_stock = spark_data_frame_for_stock.union(
                    stock_data_spark_df)

        spark_data_frame_for_stock_sorted = spark_data_frame_for_stock\
            .where(spark_data_frame_for_stock.Datetime <= current_time.strftime("%Y-%m-%d %H:%M:%S"))\
            .sort("Datetime")\
            .collect()

        list_of_data_points = [
            DataPoint(row.Open, row.Close, row.High, row.Low, row.Volume,
                      row.Datetime)
            for row in spark_data_frame_for_stock_sorted
        ]

        return list_of_data_points