parent
e01bf41df9
commit
ff24cab33b
@ -0,0 +1,18 @@
|
||||
|
||||
# 1 - STUMPY Basics
|
||||
Above what was seen in the tutorial I started on a general purpose motif
|
||||
function which takes the dataset and a computed matrix profile and returns the
|
||||
top motif it discovered and all locations where that motif appears. It does not
|
||||
return overlapping motifs.
|
||||
|
||||
## TODO
|
||||
I remember reading that the distances returned by a matrix profile have an upper
|
||||
bound. I should see what calculating my distance cutoff with that upper bound
|
||||
looks like rather than calculating off the top motif like I do now.
|
||||
If two motifs overlap it could be that the window is too small. I don't know what
|
||||
other criteria to test yet to tell me if that is the case or not. I do know that
|
||||
this can be determined in discovery stages using a Pan-matrix profile (PMP).
|
||||
Some form of reference following should occur to ensure that I'm grabbing motifs
|
||||
which actually relate to one another.
|
||||
Currently the function is quite primitive. It should be packaged and iterated on
|
||||
using the data in further sections of the tutorial.
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,2 @@
|
||||
figure.figsize : 20, 6
|
||||
xtick.direction : out
|
||||
@ -0,0 +1,85 @@
|
||||
# Anything vaguely portable from the ipython notebook is here in plaintext
|
||||
import pandas
|
||||
import stumpy
|
||||
import numpy
|
||||
import matplotlib.pyplot as plot
|
||||
import matplotlib.dates as dates
|
||||
from matplotlib.patches import Rectangle
|
||||
import datetime as dt
|
||||
|
||||
## Process wrapper, time reporting
|
||||
from functools import wraps
|
||||
import time
|
||||
def timeit(fn):
|
||||
def timed(*args, **kw):
|
||||
print (f'>>> {fn.__name__} >>>')
|
||||
|
||||
ts = time.time()
|
||||
result = fn(*args, **kw)
|
||||
te = time.time()
|
||||
|
||||
print (f'<<< {fn.__name__} <<< {(te-ts):.3f}s')
|
||||
return result
|
||||
return timed
|
||||
|
||||
@timeit
|
||||
@wraps(stumpy.stump)
|
||||
def timed_stump(dataset, w):
|
||||
l = len(dataset)
|
||||
ws = l - w + 1
|
||||
print(f'Processing a matrix profile over {ws} windows...')
|
||||
result = stumpy.stump(dataset, w)
|
||||
return result
|
||||
|
||||
# Basic feature extraction
|
||||
def threshold_extraction(op, cmp, profile, motif_order, window_width, threshold):
|
||||
assert(0 < threshold < 1.0)
|
||||
closest = profile[motif_order[0], 0]
|
||||
cutoff = op(closest, (closest * threshold))
|
||||
rv = []
|
||||
|
||||
for obs in motif_order:
|
||||
if cmp(profile[obs,0], cutoff):
|
||||
if not (any(map(lambda i: abs(i - obs) < window_width, rv))):
|
||||
rv.append(obs)
|
||||
else:
|
||||
# If two motifs overlap it's possible the window is too small?
|
||||
# Further criteria needed for that...
|
||||
pass
|
||||
else:
|
||||
break
|
||||
return rv
|
||||
|
||||
def get_motifs(profile, motif_order, window_width, threshold):
|
||||
return threshold_extraction(lambda a, b: a + b
|
||||
,lambda a, b: a < b
|
||||
,profile
|
||||
,motif_order
|
||||
,window_width
|
||||
,threshold)
|
||||
|
||||
def get_discords(profile, motif_order, window_width, threshold):
|
||||
return threshold_extraction(lambda a, b: a - b
|
||||
,lambda a, b: a > b
|
||||
,profile
|
||||
,numpy.flip(motif_order)
|
||||
,window_width
|
||||
,threshold)
|
||||
|
||||
def mark_discovered(motif_list, data_fig, profile_fig, window_width, fig_height):
|
||||
for motif in motif_list:
|
||||
rect = Rectangle((motif, 0), window_width, fig_height, facecolor='lightgrey')
|
||||
data_fig.add_patch(rect)
|
||||
profile_fig.axvline(x=motif, linestyle='dashed')
|
||||
|
||||
# Basic helper functions
|
||||
def plot_matrix_profile(plot_, profile):
|
||||
plot_.set_xlabel('Time', fontsize='15')
|
||||
plot_.set_ylabel('Distance', fontsize='15')
|
||||
plot_.plot(profile[:, 0])
|
||||
|
||||
from string import capwords
|
||||
plot.style.use("data/stumpy.mplstyle")
|
||||
def plot_pandas_import (plot_, data, axis_name):
|
||||
plot_.set_ylabel(capwords(axis_name), fontsize='15')
|
||||
plot_.plot(data[axis_name].values)
|
||||
@ -1,14 +1,27 @@
|
||||
## Matrix Profiles Are Cool
|
||||
... and I've been interested in them going on a year now. The mathematics involved are deceptively
|
||||
simple, relying only on a z-normalized euclidean distance comparison between Fourier transformed
|
||||
subsequences. After wrapping my head around that basic primitive I dove into [the literature][1]. Much
|
||||
subsequences. After wrapping my head around that primitive I dove into [the literature][1]. Much
|
||||
of the early academic literature regarding the MP is devoted to speeding up the calculation of
|
||||
Fourier transformations on a sliding window of subsequences.
|
||||
Fourier transformations on a sliding window of subsequences and other novel improvements and
|
||||
shortcuts in the mathematics.
|
||||
|
||||
This repository is a collection of code relating to the [stumpy tutorial][2]. Some of it is simply
|
||||
copied in as I follow along, I have tried to mark as clearly as possible where I make my own
|
||||
extrapolations. All data related to the tutorial is also mirrored in a data directory for each entry.
|
||||
|
||||
## 1 - STUMPY Basics
|
||||
Above what was seen in the tutorial I started on a general purpose motif function which takes the dataset
|
||||
and a computed matrix profile and returns possibly multiple motif groups. It takes 2 threshold parameters,
|
||||
one for the absolute value of the matrix profile at the given point and the other for a percentage of the
|
||||
maximum data magnitude.
|
||||
|
||||
### TODO
|
||||
I remember reading somewhere about an upper bound on matrix profile values. I should find that again
|
||||
and calculate a percentage of the upper bound rather than having mp_thresh be an absolute value.
|
||||
The Motifs class and function should be broken off into their own module for re-use elsewhere, including
|
||||
Jupyter.
|
||||
|
||||
|
||||
[1]: https://www.cs.ucr.edu/%7Eeamonn/MatrixProfile.html "Resources and papers on the Matrix Profile"
|
||||
[2]: https://stumpy.readthedocs.io/en/latest/tutorials.html "stumpy tutorial"
|
||||
Loading…
Reference in new issue