← Back to HMM Trader

Lesson 08

The Full System

Everything from the previous seven lessons assembled into a running Python application. Three modular files: a data loader, a backtesting engine, and a Streamlit dashboard.

System architecture

yfinance

Download 730d hourly OHLCV data for BTC-USD

data_loader.py

Feature Engineering

returns, range, volume_change — clipped, no scale

data_loader.py

Gaussian HMM

7 states, full covariance, Baum-Welch training

backtester.py

Auto-Label

Rank by mean return → Bull / Bear / Chop

backtester.py

Indicators

8 technical columns via ta library

backtester.py

Signal + Backtest

2-factor entry, exit on Bear, cooldown, 2.5×

backtester.py

Dashboard

Streamlit: regime, confirmations, charts, trade log

dashboard.py

Setup and running

Install dependencies:

$ pip install -r requirements.txt

Run the dashboard:

$ streamlit run dashboard.py

Or run the backtester directly (no UI):

$ python backtester.py
First run note. On first launch, the app downloads ~17,000 hourly bars (~20 MB) and trains the HMM (20–60 seconds depending on hardware). Results are cached for 1 hour. Subsequent loads are instant from the Streamlit cache.
requirements.txt — hmmlearn>=0.3.0, yfinance>=0.2.40, ta>=0.10.2, streamlit>=1.30.0, plotly>=5.18.0, pandas>=2.0.0, numpy>=1.24.0, scikit-learn>=1.3.0

Source code

Three files, fully commented. Expand each to read the implementation.

"""
data_loader.py — Market data download and feature engineering.
"""

import numpy as np
import pandas as pd
import yfinance as yf

def load_data(ticker="BTC-USD", period="730d", interval="1h"):
    raw = yf.download(ticker, period=period, interval=interval, progress=False)

    # Flatten MultiIndex columns (yfinance quirk)
    if isinstance(raw.columns, pd.MultiIndex):
        raw.columns = raw.columns.droplevel(1)

    df = raw[["Open", "High", "Low", "Close", "Volume"]].copy()

    # Feature engineering — three axes for the HMM
    df["returns"]       = df["Close"].pct_change()
    df["range"]         = (df["High"] - df["Low"]) / df["Close"]
    df["volume_change"] = df["Volume"].pct_change()

    return df.replace([np.inf, -np.inf], np.nan).dropna()

def get_features(df, clip_pct=0.01):
    # Clip top/bottom 1% to remove flash-crash outliers
    features = df[["returns", "range", "volume_change"]].copy()
    for col in features.columns:
        lo = features[col].quantile(clip_pct)
        hi = features[col].quantile(1 - clip_pct)
        features[col] = features[col].clip(lo, hi)
    return features.values
"""
backtester.py — HMM training, indicators, trade loop.

The Gaussian HMM:
  P(x_t | S_t=k) = N(x_t; μ_k, Σ_k)      ← emission
  P(S_t | S_{t-1}) = A[S_{t-1}, S_t]       ← transition
  Training: Baum-Welch EM (200 iterations)
  Decoding: Viterbi → state sequence
"""

from hmmlearn.hmm import GaussianHMM
from ta.momentum import RSIIndicator
from ta.trend import MACD, ADXIndicator, EMAIndicator
from ta.volatility import AverageTrueRange
from ta.volume import OnBalanceVolumeIndicator

N_STATES       = 7
LEVERAGE       = 2.5
COOLDOWN_HOURS = 48
CONFIRM_THRESH = 7
HYSTERESIS_LAG = 3

def train_hmm(features, n_states=N_STATES):
    model = GaussianHMM(
        n_components=n_states,
        covariance_type="full",  # full 3×3 covariance per state
        n_iter=200, random_state=42
    )
    model.fit(features)
    return model

def label_states(model, df, features):
    # Viterbi decode → rank states by mean return
    df["state"]  = model.predict(features)
    returns_by_state = df.groupby("state")["returns"].mean()
    bull = returns_by_state.idxmax()
    bear = returns_by_state.idxmin()
    labels = {s: ("Bull Run" if s==bull else "Bear/Crash" if s==bear else "Chop/Noise")
              for s in range(model.n_components)}
    df["regime"] = df["state"].map(labels)
    return labels, df

def check_confirmations(row):
    return {
        "RSI < 90":          row["rsi"]        < 90,
        "Positive Momentum": row["momentum"]   > 0,
        "Volatility OK":     row["atr_pct"]    > 0.005,
        "Volume Surge":      row["vol_ratio"]  > 1.10,
        "ADX Trending":      row["adx"]        > 25,
        "Above EMA50":       row["Close"]      > row["ema50"],
        "MACD Bullish":      row["macd"]       > row["macd_signal"],
        "OBV Rising":        row["obv_slope"]  > 0,
    }

def run_backtest(df, leverage=LEVERAGE, confirm_thresh=CONFIRM_THRESH):
    # Hysteresis + cooldown + 2.5× leverage trade loop
    equity, bh_equity, positions = [1.0], [1.0], []
    in_trade = False; cooldown_until = None
    pending = confirmed = df["regime"].iloc[0]; count = 0

    for i in range(1, len(df)):
        row = df.iloc[i]; r = row["returns"]
        bh_equity.append(bh_equity[-1] * (1 + r))

        # Hysteresis
        if row["regime"] == pending: count += 1
        else: pending = row["regime"]; count = 1
        if count >= HYSTERESIS_LAG: confirmed = pending

        if in_trade:
            equity.append(equity[-1] * (1 + r * leverage))
            if confirmed == "Bear/Crash":
                in_trade = False
                cooldown_until = row.name + pd.Timedelta(hours=COOLDOWN_HOURS)
            positions.append(int(in_trade)); continue

        if cooldown_until and row.name < cooldown_until:
            equity.append(equity[-1]); positions.append(0); continue

        if confirmed == "Bull Run" and sum(check_confirmations(row).values()) >= confirm_thresh:
            in_trade = True
            equity.append(equity[-1] * (1 + r * leverage))
        else:
            equity.append(equity[-1])
        positions.append(int(in_trade))

    df = df.iloc[1:].copy()
    df["equity"] = equity[1:]; df["bh_equity"] = bh_equity[1:]; df["position"] = positions
    return df
"""
dashboard.py — Streamlit web dashboard.
Run: streamlit run dashboard.py
"""

import streamlit as st
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from backtester import *
from data_loader import *

st.set_page_config(page_title="Regime Terminal", layout="wide")

# Sidebar
with st.sidebar:
    ticker = st.text_input("Ticker", "BTC-USD")
    leverage = st.slider("Leverage", 1.0, 5.0, 2.5)
    refresh  = st.button("🔄 Refresh")

@st.cache_data(ttl=3600)
def run_pipeline(ticker, leverage):
    df    = load_data(ticker)
    feats = get_features(df)
    model = train_hmm(feats)
    _, df = label_states(model, df, feats)
    df    = compute_indicators(df)
    df, trades = run_backtest(df, leverage=leverage)
    return df, trades, compute_metrics(df, trades)

if refresh: st.cache_data.clear()
df, trades, metrics = run_pipeline(ticker, leverage)

# Header metrics
st.title("📊 Regime Terminal")
c1, c2, c3, c4 = st.columns(4)
c1.metric("Regime",      df.iloc[-1]["regime"])
c2.metric("Return",      str(metrics["total_return"]) + "%")
c3.metric("Alpha",       str(metrics["alpha"]) + "%")
c4.metric("Max Drawdown", str(metrics["max_drawdown"]) + "%")

# Confirmations breakdown
st.subheader("Confirmations (latest bar)")
confirms = check_confirmations(df.iloc[-1])
cols = st.columns(8)
for i, (name, passed) in enumerate(confirms.items()):
    cols[i].markdown(f"{'✅' if passed else '❌'} {name}")

# Charts
tab1, tab2, tab3 = st.tabs(["Price", "Equity", "Trades"])
with tab1:
    fig = make_subplots(rows=2, shared_xaxes=True)
    fig.add_trace(go.Candlestick(x=df.index,
        open=df["Open"], high=df["High"],
        low=df["Low"],  close=df["Close"]))
    fig.add_trace(go.Scatter(x=df.index, y=df["rsi"]), row=2, col=1)
    fig.update_layout(template="plotly_dark", height=600)
    st.plotly_chart(fig, use_container_width=True)
with tab2:
    fig2 = go.Figure()
    fig2.add_trace(go.Scatter(x=df.index, y=(df["equity"]-1)*100,   name="Strategy"))
    fig2.add_trace(go.Scatter(x=df.index, y=(df["bh_equity"]-1)*100, name="B&H"))
    fig2.update_layout(template="plotly_dark")
    st.plotly_chart(fig2, use_container_width=True)
with tab3:
    st.dataframe(trades, use_container_width=True)

Extending the system

Aggressive mode. In the dashboard sidebar, tick "Aggressive mode" to switch to 4× leverage and 5/8 confirmations. This increases trade frequency and amplifies returns — at the cost of higher drawdown risk.
Other assets. Change the ticker to "ETH-USD", "SPY", "GLD", or any Yahoo Finance symbol. The HMM will find regimes specific to that asset's volatility and return distribution. Lower-volatility assets (SPY) will show tighter emission Gaussians and more time in "Chop/Noise" regimes.
More states. Increase N_STATES from 7 to 9 or 10 for finer regime granularity (e.g., "Strong Bull" vs "Weak Bull"). Diminishing returns kick in — more states require more data to estimate reliably and can overfit to noise.