parent
e01bf41df9
commit
ff24cab33b
@ -0,0 +1,18 @@
|
|||||||
|
|
||||||
|
# 1 - STUMPY Basics
|
||||||
|
Above what was seen in the tutorial I started on a general purpose motif
|
||||||
|
function which takes the dataset and a computed matrix profile and returns the
|
||||||
|
top motif it discovered and all locations where that motif appears. It does not
|
||||||
|
return overlapping motifs.
|
||||||
|
|
||||||
|
## TODO
|
||||||
|
I remember reading that the distances returned by a matrix profile have an upper
|
||||||
|
bound. I should see what calculating my distance cutoff with that upper bound
|
||||||
|
looks like rather than calculating off the top motif like I do now.
|
||||||
|
If two motifs overlap it could be that the window is too small. I don't know what
|
||||||
|
other criteria to test yet to tell me if that is the case or not. I do know that
|
||||||
|
this can be determined in discovery stages using a Pan-matrix profile (PMP).
|
||||||
|
Some form of reference following should occur to ensure that I'm grabbing motifs
|
||||||
|
which actually relate to one another.
|
||||||
|
Currently the function is quite primitive. It should be packaged and iterated on
|
||||||
|
using the data in further sections of the tutorial.
|
||||||
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,2 @@
|
|||||||
|
figure.figsize : 20, 6
|
||||||
|
xtick.direction : out
|
||||||
@ -0,0 +1,85 @@
|
|||||||
|
# Anything vaguely portable from the ipython notebook is here in plaintext
|
||||||
|
import pandas
|
||||||
|
import stumpy
|
||||||
|
import numpy
|
||||||
|
import matplotlib.pyplot as plot
|
||||||
|
import matplotlib.dates as dates
|
||||||
|
from matplotlib.patches import Rectangle
|
||||||
|
import datetime as dt
|
||||||
|
|
||||||
|
## Process wrapper, time reporting
|
||||||
|
from functools import wraps
|
||||||
|
import time
|
||||||
|
def timeit(fn):
|
||||||
|
def timed(*args, **kw):
|
||||||
|
print (f'>>> {fn.__name__} >>>')
|
||||||
|
|
||||||
|
ts = time.time()
|
||||||
|
result = fn(*args, **kw)
|
||||||
|
te = time.time()
|
||||||
|
|
||||||
|
print (f'<<< {fn.__name__} <<< {(te-ts):.3f}s')
|
||||||
|
return result
|
||||||
|
return timed
|
||||||
|
|
||||||
|
@timeit
|
||||||
|
@wraps(stumpy.stump)
|
||||||
|
def timed_stump(dataset, w):
|
||||||
|
l = len(dataset)
|
||||||
|
ws = l - w + 1
|
||||||
|
print(f'Processing a matrix profile over {ws} windows...')
|
||||||
|
result = stumpy.stump(dataset, w)
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Basic feature extraction
|
||||||
|
def threshold_extraction(op, cmp, profile, motif_order, window_width, threshold):
|
||||||
|
assert(0 < threshold < 1.0)
|
||||||
|
closest = profile[motif_order[0], 0]
|
||||||
|
cutoff = op(closest, (closest * threshold))
|
||||||
|
rv = []
|
||||||
|
|
||||||
|
for obs in motif_order:
|
||||||
|
if cmp(profile[obs,0], cutoff):
|
||||||
|
if not (any(map(lambda i: abs(i - obs) < window_width, rv))):
|
||||||
|
rv.append(obs)
|
||||||
|
else:
|
||||||
|
# If two motifs overlap it's possible the window is too small?
|
||||||
|
# Further criteria needed for that...
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
return rv
|
||||||
|
|
||||||
|
def get_motifs(profile, motif_order, window_width, threshold):
|
||||||
|
return threshold_extraction(lambda a, b: a + b
|
||||||
|
,lambda a, b: a < b
|
||||||
|
,profile
|
||||||
|
,motif_order
|
||||||
|
,window_width
|
||||||
|
,threshold)
|
||||||
|
|
||||||
|
def get_discords(profile, motif_order, window_width, threshold):
|
||||||
|
return threshold_extraction(lambda a, b: a - b
|
||||||
|
,lambda a, b: a > b
|
||||||
|
,profile
|
||||||
|
,numpy.flip(motif_order)
|
||||||
|
,window_width
|
||||||
|
,threshold)
|
||||||
|
|
||||||
|
def mark_discovered(motif_list, data_fig, profile_fig, window_width, fig_height):
|
||||||
|
for motif in motif_list:
|
||||||
|
rect = Rectangle((motif, 0), window_width, fig_height, facecolor='lightgrey')
|
||||||
|
data_fig.add_patch(rect)
|
||||||
|
profile_fig.axvline(x=motif, linestyle='dashed')
|
||||||
|
|
||||||
|
# Basic helper functions
|
||||||
|
def plot_matrix_profile(plot_, profile):
|
||||||
|
plot_.set_xlabel('Time', fontsize='15')
|
||||||
|
plot_.set_ylabel('Distance', fontsize='15')
|
||||||
|
plot_.plot(profile[:, 0])
|
||||||
|
|
||||||
|
from string import capwords
|
||||||
|
plot.style.use("data/stumpy.mplstyle")
|
||||||
|
def plot_pandas_import (plot_, data, axis_name):
|
||||||
|
plot_.set_ylabel(capwords(axis_name), fontsize='15')
|
||||||
|
plot_.plot(data[axis_name].values)
|
||||||
@ -1,14 +1,27 @@
|
|||||||
## Matrix Profiles Are Cool
|
## Matrix Profiles Are Cool
|
||||||
... and I've been interested in them going on a year now. The mathematics involved are deceptively
|
... and I've been interested in them going on a year now. The mathematics involved are deceptively
|
||||||
simple, relying only on a z-normalized euclidean distance comparison between Fourier transformed
|
simple, relying only on a z-normalized euclidean distance comparison between Fourier transformed
|
||||||
subsequences. After wrapping my head around that basic primitive I dove into [the literature][1]. Much
|
subsequences. After wrapping my head around that primitive I dove into [the literature][1]. Much
|
||||||
of the early academic literature regarding the MP is devoted to speeding up the calculation of
|
of the early academic literature regarding the MP is devoted to speeding up the calculation of
|
||||||
Fourier transformations on a sliding window of subsequences.
|
Fourier transformations on a sliding window of subsequences and other novel improvements and
|
||||||
|
shortcuts in the mathematics.
|
||||||
|
|
||||||
This repository is a collection of code relating to the [stumpy tutorial][2]. Some of it is simply
|
This repository is a collection of code relating to the [stumpy tutorial][2]. Some of it is simply
|
||||||
copied in as I follow along, I have tried to mark as clearly as possible where I make my own
|
copied in as I follow along, I have tried to mark as clearly as possible where I make my own
|
||||||
extrapolations. All data related to the tutorial is also mirrored in a data directory for each entry.
|
extrapolations. All data related to the tutorial is also mirrored in a data directory for each entry.
|
||||||
|
|
||||||
|
## 1 - STUMPY Basics
|
||||||
|
Above what was seen in the tutorial I started on a general purpose motif function which takes the dataset
|
||||||
|
and a computed matrix profile and returns possibly multiple motif groups. It takes 2 threshold parameters,
|
||||||
|
one for the absolute value of the matrix profile at the given point and the other for a percentage of the
|
||||||
|
maximum data magnitude.
|
||||||
|
|
||||||
|
### TODO
|
||||||
|
I remember reading somewhere about an upper bound on matrix profile values. I should find that again
|
||||||
|
and calculate a percentage of the upper bound rather than having mp_thresh be an absolute value.
|
||||||
|
The Motifs class and function should be broken off into their own module for re-use elsewhere, including
|
||||||
|
Jupyter.
|
||||||
|
|
||||||
|
|
||||||
[1]: https://www.cs.ucr.edu/%7Eeamonn/MatrixProfile.html "Resources and papers on the Matrix Profile"
|
[1]: https://www.cs.ucr.edu/%7Eeamonn/MatrixProfile.html "Resources and papers on the Matrix Profile"
|
||||||
[2]: https://stumpy.readthedocs.io/en/latest/tutorials.html "stumpy tutorial"
|
[2]: https://stumpy.readthedocs.io/en/latest/tutorials.html "stumpy tutorial"
|
||||||
Loading…
Reference in new issue