A simple liquidity measure based on spreads.

## Definition

The realized spread is the temporary component of the effective spread. It measures the revenue to liquidity providers assuming that the liquidity provider is able to close her position at the midpoint prevailing five minutes after the trade.

For a given stock $i$ and day $t$, the realized spread on the $k$th trade is defined as:

$$rspread_{i,t,k}=2q_{i,t,k} \left( \ln(P_{i,t,k}) - \ln(M_{i,t,k+5}) \right)$$

where $P_{i,t,k}$ is the price of the kth trade, $M_{i,t,k+5}$ is the midpoint of the consolidated BBO prevailing five minutes after the $k$th trade, and $q_{i,t,k}$ is the buy-sell indicator (+1 for buys, –1 for sells). Aggregating over day $t$, a stock’s realized spread $rspread_{i,t}$ is the dollar-volume-weighted average of the realized spread $rspread_{i,t,k}$ computed over all trades on day $t$.

## Source Code

This example Python code has been optimized for speed but serves only demonstration purpose. It may contain errors.

# RealizedSpread.py
from numba import jit
import numpy as np

description = """
It measures the revenue to liquidity providers assuming that the liquidity provider is able to close
her position at the midpoint prevailing five minutes after the trade.
"""
vars_needed = ['Price', 'Volume', 'Mid Point', 'Direction']

_5min = np.timedelta64(5, 'm')

@jit(nopython=True, nogil=True, cache=True)
def _estimate(log_midpt, price, timestamps, volume, direction):
T = len(timestamps)
log_price = np.log(price)
assert T == len(log_price) == len(log_midpt)
# Find the Quote Mid Point 5 min later than each trade.
matched_log_midpt = np.empty(T)
matched = 0
for i in range(T):
for j in range(i+1, T):
if timestamps[j] - timestamps[i] >= _5min:
matched_log_midpt[i] = log_midpt[j]
matched += 1
break
rspread = 2 * direction * (log_price - matched_log_midpt)
# Daily realized spread is the dollar-volume-weighted average
# of the realized spread computed over all trades in the day.
dollar_volume = (volume * price)[:matched]
rsprd = np.sum(rspread[:matched] * dollar_volume) / np.sum(dollar_volume)
return None if np.isnan(rsprd) else rsprd

def estimate__(data):
log_midpt = np.log(data['Mid Point'].to_numpy())
price = data['Price'].to_numpy()
timestamps = np.array(data.index, dtype='datetime64')
volume = data['Volume'].to_numpy()
direction = data['Direction'].to_numpy()
return _estimate(log_midpt, price, timestamps, volume, direction)


For comparison purpose, below is an implementation in pure Python which is really slow (10min for ~600,000 trades).

def estimate_python(data):
log_midpt = np.log(data['Mid Point'].to_numpy())
price = data['Price'].to_numpy()
log_price = np.log(price)
timestamps = np.array(data.index, dtype='datetime64')
matched_log_midpt = []
for idx, ts1 in enumerate(timestamps):
for i, ts2 in enumerate(timestamps[idx+1:]):
if ts2 - ts1 >= np.timedelta64(5, 'm'):
matched_log_midpt.append(log_midpt[idx+i+1])
break
matched = len(matched_log_midpt)
rspread = 2 * data['Direction'].to_numpy()[:matched] * \
(log_price[:matched] - np.array(matched_log_midpt))
volume = data['Volume'].to_numpy()
dolloar_volume = np.multiply(volume, price)[:matched]
rsprd = np.sum(rspread * dolloar_volume) / np.sum(dolloar_volume)
return None if np.isnan(rsprd) else rsprd