{ "cells": [ { "cell_type": "markdown", "id": "cc72af0d", "metadata": {}, "source": [ "This is the basic environment. stumpy is the main library we're working with, numpy and pandas are for all intents and purposes required to work with datasets and matplotlib allows us nice output." ] }, { "cell_type": "code", "execution_count": 1, "id": "ecf98d07", "metadata": {}, "outputs": [], "source": [ "import pandas\n", "import stumpy\n", "import numpy\n", "import matplotlib.pyplot as plot\n", "import matplotlib.dates as dates\n", "from matplotlib.patches import Rectangle\n", "import datetime as dt" ] }, { "cell_type": "markdown", "id": "a42ce3b2", "metadata": {}, "source": [ "Basic utilities used to output the time it takes to compute a matrix profile over a given number of windows." ] }, { "cell_type": "code", "execution_count": 2, "id": "961bcc44", "metadata": {}, "outputs": [], "source": [ "from functools import wraps\n", "import time\n", "def timeit(fn):\n", " def timed(*args, **kw):\n", " print (f'>>> {fn.__name__} >>>')\n", "\n", " ts = time.time()\n", " result = fn(*args, **kw)\n", " te = time.time()\n", "\n", " print (f'<<< {fn.__name__} <<< {(te-ts):.3f}s')\n", " return result\n", " return timed\n", "\n", "@timeit\n", "@wraps(stumpy.stump)\n", "def timed_stump(dataset, w):\n", " l = len(dataset)\n", " ws = l - w + 1\n", " print(f'Processing a matrix profile over {ws} windows...')\n", " result = stumpy.stump(dataset, w)\n", " return result" ] }, { "cell_type": "markdown", "id": "e9e52f62", "metadata": {}, "source": [ "More utilities to encapsulate the process of plotting our data." ] }, { "cell_type": "code", "execution_count": 3, "id": "44481736", "metadata": {}, "outputs": [], "source": [ "from string import capwords\n", "plot.style.use(\"data/stumpy.mplstyle\")\n", "def plot_pandas_import (plot_, data, axis_name):\n", " plot_.set_ylabel(capwords(axis_name), fontsize='15')\n", " plot_.plot(data[axis_name].values)" ] }, { "cell_type": "code", "execution_count": 4, "id": "50431b57", "metadata": {}, "outputs": [], "source": [ "def plot_matrix_profile(plot_, profile):\n", " plot_.set_xlabel('Time', fontsize='15')\n", " plot_.set_ylabel('Distance', fontsize='15')\n", " plot_.plot(profile[:, 0])" ] }, { "cell_type": "markdown", "id": "07c68fbe", "metadata": {}, "source": [ "Load the data!" ] }, { "cell_type": "code", "execution_count": 5, "id": "da75be3d", "metadata": {}, "outputs": [], "source": [ "steam_data = pandas.read_csv(\"data/STUMPY_Basics_steamgen.csv\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "b6e0f03c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | drum pressure | \n", "excess oxygen | \n", "water level | \n", "steam flow | \n", "
|---|---|---|---|---|
| 0 | \n", "320.08239 | \n", "2.506774 | \n", "0.032701 | \n", "9.302970 | \n", "
| 1 | \n", "321.71099 | \n", "2.545908 | \n", "0.284799 | \n", "9.662621 | \n", "
| 2 | \n", "320.91331 | \n", "2.360562 | \n", "0.203652 | \n", "10.990955 | \n", "
| 3 | \n", "325.00252 | \n", "0.027054 | \n", "0.326187 | \n", "12.430107 | \n", "