import numpy as np
import matplotlib.pyplot as plt
from numpy.linalg import det, inv
from scipy.optimize import minimize
= np.random.default_rng(1024)
rng
# Simulate data
= 1 # Number of unobservable true states
dim_x = 1 # Dimension of observations
dim_z = 100 # Number of time steps
n_steps = np.array([[1.0]]) # State transition
F = np.array([[1.0]]) # Observation model
H = np.array([[0.5]]) # Process noise covariance
Q = np.array([[1.0]]) # Measurement noise covariance
R
# Generate true states and noisy observations
= np.zeros((dim_x, n_steps))
X = np.zeros((dim_z, n_steps))
Z for t in range(1, n_steps):
= F @ X[:,t-1] + rng.multivariate_normal(np.zeros(dim_x), Q)
X[:,t] = H @ X[:,t] + rng.multivariate_normal(np.zeros(dim_z), R)
Z[:,t]
# Define the Kalman filter functions
def kalman_predict(x, P, F, Q):
= F @ x
x_pred = F @ P @ F.T + Q
P_pred return x_pred, P_pred
def kalman_update(x_pred, P_pred, z, H, R):
= H @ P_pred @ H.T + R
S = P_pred @ H.T @ inv(S)
K = x_pred + K @ (z - H @ x_pred)
x_upd = (np.eye(len(P_pred)) - K @ H) @ P_pred
P_upd return x_upd, P_upd, S
def run_kalman_filter(Q, R, F, H, x0, P0, Z):
= np.array([[Q]]) # Process noise covariance
Q = np.array([[R]]) # Measurement noise covariance
R = x0, P0
x, P
= 0
log_likelihood = np.log(2 * np.pi)
log_2pi for z in Z[0,:]:
= kalman_predict(x, P, F, Q)
x_pred, P_pred = kalman_update(x_pred, P_pred, z, H, R)
x, P, S = z - H @ x_pred
innovation = -0.5 * (dim_z * log_2pi + np.log(det(S)) + innovation.T @ inv(S) @ innovation)
l += l
log_likelihood return -log_likelihood # Negative log-likelihood to minimize
# Define objective function for parameter estimation
def objective(params, F, H, x, P, Z):
= params
Q, R return run_kalman_filter(Q, R, F, H, x, P, Z)
# Initial guess for the parameters: [Q, R]
= [0.1, 0.1]
initial_params = np.array([[0]]) # Initial state estimate
x0 = np.array([[1]]) # Initial covariance
P0
# Minimize the log-likelihood with bounds to ensure Q and R are positive
= minimize(
result
objective,
initial_params,=(F, H, x0, P0, Z,),
args='L-BFGS-B',
method=[(1e-5, None), (1e-5, None)]
bounds
)
= result.x
estimated_Q, estimated_R
# Run the Kalman filter with the estimated parameters
= np.array([[estimated_Q]])
estimated_Q = np.array([[estimated_R]])
estimated_R = np.zeros((dim_x, n_steps))
x_est = np.zeros((dim_x, dim_x, n_steps))
P_est = x0, P0
x, P for t in range(n_steps):
= kalman_predict(x, P, F, estimated_Q)
x_pred, P_pred = kalman_update(x_pred, P_pred, Z[:,t], H, estimated_R)
x, P, _ = x
x_est[:,t] = P
P_est[:,:,t]
# Plot the true states and estimates
=(9, 6))
plt.figure(figsize0], 'o', label='Observations', color='b', alpha=0.3)
plt.plot(Z[0], label='True State', color='b', linestyle='dashed')
plt.plot(X[0], label='Estimated State', color='r')
plt.plot(x_est['Time step')
plt.xlabel('Value')
plt.ylabel(
plt.legend()'Kalman Filter: True State, Observations, and Estimated State')
plt.title(f'True Q: {Q[0,0]:.2f}', xy=(0.05, 0.95), xycoords='axes fraction', fontsize=12, verticalalignment='top')
plt.annotate(f'True R: {R[0,0]:.2f}', xy=(0.05, 0.90), xycoords='axes fraction', fontsize=12, verticalalignment='top')
plt.annotate(f'Estimated Q: {estimated_Q[0,0]:.2f}', xy=(0.05, 0.85), xycoords='axes fraction', fontsize=12, verticalalignment='top')
plt.annotate(f'Estimated R: {estimated_R[0,0]:.2f}', xy=(0.05, 0.80), xycoords='axes fraction', fontsize=12, verticalalignment='top')
plt.annotate( plt.show()
Kalman Filter
This is a note as I study and understand Kalman filter. It does NOT derive Kalman filter but explains it to the best of my ability. A handwritten Python code to estimate Kalman filter is provided because I’d like to practice its implementation, but I recommend using established program/software for better numerical stability and performance.
Intuition
Imagine we are estimating the true inflation rate,
- A prediction using past information
: Based on a model that projects inflation trends. - A new observation
: A noisy indicator like a market survey or a bond yield linked to inflation.
But both have limitations. The model’s predication can be uncertain, and the observation includes noise. A naturally simple yet elegant solution would be to combine the two by weighing them based on their accuracy, therefore updating the estimate of the true inflation rate
where
It’s almost surely that we will place more weights on whichever component that is more precise!
if our measurement is known to be very good, but the model’s prediction is not. if our measurement is very noisy, but the model’s prediction is quite accurate.
In this way,
Also, we can rearrange Equation 1 to get
This means that the updated estimate is our best model estimate based on past information plus an adjustment for the difference between the observation and the model estimate.
Note that, however, the measurement
In our example, the true inflation rate may be an annualized percentage, but our measurement could be, for instance, the proportion of survey responses that believe inflation is higher than target, or even the trading volume of Bitcoin! Therefore they are not directly comparable such that
A more elaborated presentation
Now let’s move on to a more elaborated presentation and proceed to the basic Kalman filter. For pedagogical simplicity, we start with a univariate (or 1-dimensional) case, i.e., there is only one unobservable state variable
The setup
First, there are two equations, namely the state transition equation Equation 3 and the measurement equation Equation 4.
State transition equation:
This equation reflects our model, or how we perceive the unobservable state to evolve over time. However, there is some uncertainty
. : The unobserved true state (e.g., the true inflation rate).
: The factor linking the previous state to the current state.
: Process noise (uncertainty in the model), .
Measurement equation:
This equation describes how our observed measurement is related to unobservable true state. Because it is a measurement, there is also uncertainty or noise
. : The observed measurement (e.g., a survey or bond yield).
: The factor linking the true state to the observation.
: Measurement noise (error in the observed data), .
We assume the process noise
Predicting the state
At time
-
1 Note that the uncertainty
When making model prediction at
This should not be confused with the model estimate before updating at
While the state evolves predictably through
Let
-
2 Note that
State update
Also at time
When the measurement
3 As discussed earlier, because the measurement does not necessarily have the same scale as the state, we cannot directly use
This term quantifies how much the measurement differs from what the model expects. However, it is noisy, and its uncertainty is:
From the intuition section, we know that the updated state estimate
This
It is very much similar to the weight
For example, the estimated inflation is an annualized percentage, but our measurement could be the frequency of newspapers mentioning the word “inflation” or even the trading volume of Bitcoin.
Kalman gain
The Kalman gain
is the variance of the predicted state as in Equation 6. is the scaling factor that maps the state to the measurement space, which appears in Equation 4. is the variance of innovation as in Equation 8.
So, the numerator
Therefore,
- If the measurement noise
is small, the Kalman gain is larger, so we trust the measurement more.
- If the predicted uncertainty
is large, is also larger, prioritizing the new measurement.
- Conversely, if
is large (noisy measurement), is small, favoring the prediction.
Variance update
Further, new measurement not only helps us to update and refine our estimate of the state, it can also reduce the uncertainty of our estimation. Once Kalman gain
In state update Equation 9,
- State Update: The correction is applied directly to the predicted state.
alone is sufficient because it determines how much of the innovation (in measurement space) should influence the state estimate. - Variance Update: The correction adjusts the uncertainty in the predicted state.
is needed because uncertainty is transformed differently — it depends on both the Kalman gain and how strongly the state affects the measurement (via ).
Kalman filter
Now I’m changing to matrix notations to allow for multiple states and measurements.
To begin with, a system’s internal dynamics and observed outputs can be treated separately in systems theory and Kalman filter.
State space
The state space represents the system’s internal, unobserved dynamics — the variables we want to estimate but cannot directly measure. These are often called hidden states or latent states.4
4 Suppose we are tracking the true inflation rate. This is the “state” of the system. It evolves over time due to economic forces, but we cannot observe it directly.
The state equation describes how the state evolves over time, often including random noise to reflect uncertainty:
: the vector of true states at time . : the transition factor describing how the states evolve. : process noise with a zero mean and covariance matrix .
The state space contains all the information needed to describe the system’s internal behavior.
Measurement space
The measurement space is the observable part of the system — the variables we can measure directly, but which may be noisy or indirectly related to the state.5
5 Suppose we measure inflation using a price index. This is the “measurement” and is influenced by the true inflation rate, but it also includes random noise.
The measurement equation describes how observed measurements are related to unobservable states:
: the observed measurement at time . : the relationship between the state and the measurement (e.g., scaling factor). : measurement noise with a mean of zero and covariance matrix .
The measurement space is what we observe directly but may not fully capture the true state due to noise or incomplete representation.
The Kalman filter works by reconciling the state space (hidden, unobservable variables) with the measurement space (noisy observations) to improve our estimate of the state. It does so through two main steps:
- Prediction: Uses the state equation to estimate the next state and its uncertainty, based on prior information.
- Update: Refines this estimate by incorporating new observations from the measurement equation, weighing them based on their accuracy.
Prediction step
In the prediction step, the Kalman filter estimates the state
: the predicted state based on the system’s dynamics, or a prior state estimate before incorporating new measurement.
The associated uncertainty (variance) in the prediction is updated as:
: the predicted covariance matrix of the state.
Update step
The update step adjusts the predicted state
: the innovation or measurement residual, representing the difference between the actual and predicted measurement. : the Kalman gain, which determines how much weight is given to the innovation.
The Kalman gain is computed as:
Updated covariance
Once the state is updated, the covariance of the state estimate
Here,
This ensures that the updated covariance reflects the combined effect of the prediction and measurement steps.
The Kalman filter is recursive, meaning it processes data one step at a time, making it computationally efficient. Each new measurement refines the state estimate and covariance, which are then used for the next prediction.
Estimation of the Kalman filter
General steps
After we have parameterized the system, using the Kalman filter to estimate states means that we are to estimate:
: State transition matrix : Measurement matrix : Covariance of process noise : Covariance of measurement noise
Additionally, we need to set an initial state estimate:
- Initial state estimate
: the starting point of the state variable chosen based on prior knowledge or historical averages. - Initial covariance matrix
.
In some cases,
- Forward filtering:
- Apply the Kalman filter to compute the likelihood of observed data at each time step.
- The likelihood depends on the measurement residual
and its variance .
where is the dimension of observation.6
6 For example, a system’s unobservable true states may be 10-dimensional or it has 10 state variables at any given time. Our measurement or observation may be 5-dimensional, i.e., we observe 5 different metrics of the system. In this case,
- Parameter optimization:
- Adjust
and to maximize . - Numerical optimization methods like gradient ascent or expectation-maximization (EM) are commonly used.
- Adjust
Example and code
Suppose we model a 1-D true state
And the 1-D observation
Steps to estimate
- Start with initial guesses for
and . - Run the Kalman filter to compute the likelihood of observed
values. - Adjust
and to maximize the likelihood.
Python
Here’s a handwritten simple example for MLE in Python:
Matlab
I also tested 3 ways of estimating the parameters of Kalman filter in Matlab.
- Handwritten.
ss
model with handwritten log-likelihood.ssm
model without handwritten log-likelihood.
Results of estimates are basically the same. So I guess I can comfortably use my handwritten ones since they are easier to understand and extend if for example noises are correlated.
%% Kalman Filter Parameter Estimation Comparison with Log-Likelihood | |
% Clear workspace and set random seed for reproducibility | |
clear; clc; close all; | |
rng(1024); | |
%% Data Generation (Used by All Sections) | |
% Parameters | |
dim_x = 1; % Number of unobservable true states | |
dim_z = 1; % Dimension of observations | |
n_steps = 100; % Number of time steps | |
F = [1.0]; % State transition matrix | |
H = [1.0]; % Observation matrix | |
Q_true = [1.5]; % True process noise covariance | |
R_true = [2.8]; % True measurement noise covariance | |
% Generate true states and noisy observations | |
X = zeros(dim_x, n_steps); % True states | |
Z = zeros(dim_z, n_steps); % Observations | |
for t = 2:n_steps | |
% True state evolution | |
X(:, t) = F * X(:, t-1) + mvnrnd(zeros(1, dim_x), Q_true)'; | |
% Observation | |
Z(:, t) = H * X(:, t) + mvnrnd(zeros(1, dim_z), R_true)'; | |
end | |
% Initial state and covariance | |
x0 = zeros(dim_x, 1); % Initial state estimate | |
P0 = eye(dim_x); % Initial covariance estimate | |
%% Section 1: Handwritten Kalman Filter and Parameter Estimation | |
% Handwritten Kalman Filter functions | |
% Prediction step | |
function [x_pred, P_pred] = kalman_predict(x, P, F, Q) | |
x_pred = F * x; | |
P_pred = F * P * F' + Q; | |
end | |
% Update step | |
function [x_upd, P_upd, S] = kalman_update(x_pred, P_pred, z, H, R) | |
S = H * P_pred * H' + R; | |
K = P_pred * H' / S; | |
x_upd = x_pred + K * (z - H * x_pred); | |
P_upd = (eye(size(P_pred)) - K * H) * P_pred; | |
end | |
% Negative log-likelihood function with explicit log(2π) | |
function nll = kalman_neg_log_likelihood_handwritten(params, Z, F, H, x0, P0) | |
Q = params(1); | |
R = params(2); | |
if Q <= 0 || R <= 0 | |
nll = Inf; | |
return; | |
end | |
dim_z = size(Z, 1); % Dimension of observations | |
n_steps = size(Z, 2); | |
x = x0; | |
P = P0; | |
nll = 0; | |
for t = 1:n_steps | |
% Prediction | |
[x_pred, P_pred] = kalman_predict(x, P, F, Q); | |
% Innovation | |
y = Z(:, t) - H * x_pred; | |
S = H * P_pred * H' + R; | |
% Update negative log-likelihood (including log(2π)) | |
nll = nll + 0.5 * (dim_z * log(2 * pi) + log(det(S)) + y' * (S \ y)); | |
% Update state | |
[x, P, ~] = kalman_update(x_pred, P_pred, Z(:, t), H, R); | |
end | |
end | |
% Parameter estimation using fminsearch | |
initial_params = [1; 1]; % Initial guesses for [Q; R] | |
options = optimset('Display', 'off', 'TolX', 1e-6); | |
[estimated_params_handwritten, nll_handwritten] = fminsearch(@(params) kalman_neg_log_likelihood_handwritten(params, Z, F, H, x0, P0), initial_params, options); | |
% Extract estimated Q and R | |
Q_est_handwritten = estimated_params_handwritten(1); | |
R_est_handwritten = estimated_params_handwritten(2); | |
% Run Kalman filter with estimated parameters | |
x_est_handwritten = zeros(dim_x, n_steps); | |
P_est_handwritten = zeros(dim_x, dim_x, n_steps); | |
x = x0; | |
P = P0; | |
for t = 1:n_steps | |
% Prediction | |
[x_pred, P_pred] = kalman_predict(x, P, F, Q_est_handwritten); | |
% Update | |
[x, P, ~] = kalman_update(x_pred, P_pred, Z(:, t), H, R_est_handwritten); | |
% Store estimates | |
x_est_handwritten(:, t) = x; | |
P_est_handwritten(:, :, t) = P; | |
end | |
%% Section 2: Kalman Filter Using ss Model and Handwritten Log-Likelihood | |
% Define the state-space system using ss | |
sys = ss(F, [], H, [], 1); % Discrete-time system with sample time 1 | |
% Negative log-likelihood function with explicit log(2π) | |
function nll = kalman_neg_log_likelihood_ss(params, sys, x0, P0, Z) | |
Q = params(1); | |
R = params(2); | |
if Q <= 0 || R <= 0 | |
nll = Inf; | |
return; | |
end | |
dim_z = size(Z, 1); % Dimension of observations | |
x = x0; | |
P = P0; | |
nll = 0; | |
for t = 1:size(Z, 2) | |
% Prediction | |
x_pred = sys.A * x; | |
P_pred = sys.A * P * sys.A' + Q; | |
% Innovation | |
y = Z(:, t) - sys.C * x_pred; | |
S = sys.C * P_pred * sys.C' + R; | |
% Update negative log-likelihood (including log(2π)) | |
nll = nll + 0.5 * (dim_z * log(2 * pi) + log(det(S)) + y' * (S \ y)); | |
% Update state | |
K = P_pred * sys.C' / S; | |
x = x_pred + K * y; | |
P = (eye(size(P)) - K * sys.C) * P_pred; | |
end | |
end | |
% Parameter estimation using fminsearch | |
initial_params = [1; 1]; % Initial guesses for [Q; R] | |
[estimated_params_ss, nll_ss] = fminsearch(@(params) kalman_neg_log_likelihood_ss(params, sys, x0, P0, Z), initial_params, options); | |
% Extract estimated Q and R | |
Q_est_ss = estimated_params_ss(1); | |
R_est_ss = estimated_params_ss(2); | |
% Run Kalman filter with estimated parameters | |
x_est_ss = zeros(dim_x, n_steps); | |
P_est_ss = zeros(dim_x, dim_x, n_steps); | |
x = x0; | |
P = P0; | |
for t = 1:n_steps | |
% Prediction | |
x_pred = sys.A * x; | |
P_pred = sys.A * P * sys.A' + Q_est_ss; | |
% Innovation | |
y = Z(:, t) - sys.C * x_pred; | |
S = sys.C * P_pred * sys.C' + R_est_ss; | |
% Update state | |
K = P_pred * sys.C' / S; | |
x = x_pred + K * y; | |
P = (eye(size(P)) - K * sys.C) * P_pred; | |
% Store estimate | |
x_est_ss(:, t) = x; | |
P_est_ss(:, :, t) = P; | |
end | |
%% Section 3: Kalman Filter Using ssm and Built-in Functions | |
% Define parameter-to-matrix mapping function | |
function [A, B, C, D, Mean0, Cov0, StateType] = LocalStateSpaceModel(params) | |
Q = params(1); | |
R = params(2); | |
% State-space matrices | |
A = 1; % State transition matrix | |
B = sqrt(Q); % Process noise effect | |
C = 1; % Observation matrix | |
D = sqrt(R); % Measurement noise effect | |
% Initial state | |
Mean0 = 0; | |
Cov0 = 1; | |
% State type (0 for stationary) | |
StateType = 0; | |
end | |
% Initial parameter guesses | |
Param0 = [1; 1]; % Initial guesses for [Q; R] | |
% Create ssm model using the mapping function | |
Mdl = ssm(@LocalStateSpaceModel); | |
% Estimate the parameters using built-in functions | |
Z_transpose = Z'; | |
[EstMdl, EstParams, EstParamCov, logL] = estimate(Mdl, Z_transpose, Param0, 'Display', 'off'); | |
% Extract estimated Q and R | |
Q_est_ssm = EstParams(1); | |
R_est_ssm = EstParams(2); | |
% Extract log-likelihood value | |
nll_ssm = -logL; % Negative log-likelihood | |
% Run Kalman filter with estimated parameters | |
x_est_ssm = zeros(dim_x, n_steps); | |
P_est_ssm = zeros(dim_x, dim_x, n_steps); | |
x = EstMdl.Mean0; | |
P = EstMdl.Cov0; | |
% Extract estimated state-space matrices | |
A = EstMdl.A; | |
C = EstMdl.C; | |
Q_est_matrix = EstMdl.B * EstMdl.B'; % Process noise covariance | |
R_est_matrix = EstMdl.D * EstMdl.D'; % Measurement noise covariance | |
for t = 1:n_steps | |
% Prediction | |
x_pred = A * x; | |
P_pred = A * P * A' + Q_est_matrix; | |
% Innovation | |
y = Z(:, t) - C * x_pred; | |
S = C * P_pred * C' + R_est_matrix; | |
% Update state | |
K = P_pred * C' / S; | |
x = x_pred + K * y; | |
P = (eye(dim_x) - K * C) * P_pred; | |
% Store estimates | |
x_est_ssm(:, t) = x; | |
P_est_ssm(:, :, t) = P; | |
end | |
%% Comparison of Results | |
% Plot the true state, observations, and estimated states from all methods | |
figure; | |
plot(1:n_steps, X(1, :), 'k-', 'LineWidth', 2, 'DisplayName', 'True State'); | |
hold on; | |
plot(1:n_steps, Z(1, :), 'ko', 'MarkerSize', 4, 'DisplayName', 'Observations'); | |
plot(1:n_steps, x_est_handwritten(1, :), 'r--', 'LineWidth', 1.5, 'DisplayName', 'Estimated State (Handwritten)'); | |
plot(1:n_steps, x_est_ss(1, :), 'b-.', 'LineWidth', 1.5, 'DisplayName', 'Estimated State (ss Model)'); | |
plot(1:n_steps, x_est_ssm(1, :), 'g:', 'LineWidth', 1.5, 'DisplayName', 'Estimated State (ssm Model)'); | |
xlabel('Time Step'); | |
ylabel('State Value'); | |
title('Kalman Filter: Parameter Estimation with Different Methods'); | |
legend('Location', 'best'); | |
grid on; | |
hold off; | |
% Display estimated parameters and negative log-likelihoods from all methods | |
fprintf('--- Parameter Estimation Results ---\n'); | |
fprintf('True Process Noise Covariance Q_true: %.4f\n', Q_true); | |
fprintf('Estimated Q (Handwritten): %.4f\n', Q_est_handwritten); | |
fprintf('Estimated Q (ss Model): %.4f\n', Q_est_ss); | |
fprintf('Estimated Q (ssm Model): %.4f\n\n', Q_est_ssm); | |
fprintf('True Measurement Noise Covariance R_true: %.4f\n', R_true); | |
fprintf('Estimated R (Handwritten): %.4f\n', R_est_handwritten); | |
fprintf('Estimated R (ss Model): %.4f\n', R_est_ss); | |
fprintf('Estimated R (ssm Model): %.4f\n\n', R_est_ssm); | |
fprintf('--- Negative Log-Likelihoods ---\n'); | |
fprintf('Negative Log-Likelihood (Handwritten): %.4f\n', nll_handwritten); | |
fprintf('Negative Log-Likelihood (ss Model): %.4f\n', nll_ss); | |
fprintf('Negative Log-Likelihood (ssm Model): %.4f\n', nll_ssm); |