The technique leverages every day inventory value information from 1999 via March 2024. For every interval, we compute the SSD (Sum of Squared Variations) over a one-year lookback window, figuring out the highest 20 most comparable pairs. These pairs are then traded over a six-month horizon. We open positions primarily based on particular Z-score thresholds: pairs are purchased or bought when the Z-score crosses ±2, and the positions are closed as soon as the Z-score reverts to 0.
The implementation stays just like the cryptocurrency model we mentioned beforehand, however let’s evaluation every element for readability.
First, we normalize the value information and calculate SSD utilizing the next capabilities:
def normalize(df, min_vals, max_vals):
return (df - min_vals) / (max_vals - min_vals)def calculate_ssd(df):
filtered_df = df.dropna(axis=1)
return {f'{c1}-{c2}': np.sum((filtered_df[c1] - df[c2]) ** 2) for c1, c2 in mixtures(filtered_df.columns, 2)}
def top_x_pairs(df, begin, finish):
ssd_results_dict = calculate_ssd(df)
sorted_ssd_dict = dict(sorted(ssd_results_dict.objects(), key=lambda merchandise: merchandise[1]))
most_similar_pairs = {}
cash = set()
for pair, ssd in sorted_ssd_dict.objects():
coin1, coin2 = pair.cut up('-')
if coin1 not in cash and coin2 not in cash:
most_similar_pairs[coin1] = (pair, ssd)
cash.add(coin1)
cash.add(coin2)
if len(most_similar_pairs) == PORTFOLIO_SIZE:
break
sorted_ssd = dict(sorted(most_similar_pairs.objects(), key=lambda merchandise: merchandise[1][1]))
topx_pairs = checklist(sorted_ssd.values())[:PORTFOLIO_SIZE]
return topx_pairs
We set PORTFOLIO_SIZE
to twenty, choosing the highest 20 pairs with the smallest SSD metric throughout every interval. A number of further utility capabilities help date-based operations:
def get_previous_date(dates_list, target_date_str):
dates = [datetime.strptime(date, '%Y-%m-%d') for date in dates_list]
target_date = datetime.strptime(target_date_str, '%Y-%m-%d')
dates.type()
previous_date = None
for date in dates:
if date >= target_date:
break
previous_date = date
return previous_date.strftime('%Y-%m-%d') if previous_date else Nonedef one_day_after(date_str):
date_format = "%Y-%m-%d"
date_obj = datetime.strptime(date_str, date_format)
return (date_obj + timedelta(days=1)).strftime(date_format)
def one_year_before(date_str):
date_format = "%Y-%m-%d"
original_date = datetime.strptime(date_str, date_format)
strive:
return original_date.change(yr=original_date.yr - 1).strftime(date_format)
besides ValueError:
return original_date.change(month=2, day=28, yr=original_date.yr - 1).strftime(date_format)
We calculate the technique return over every holding interval:
def strategy_return(information, fee=0.001):
pnl = 0
for df in information.values():
# Deal with lengthy positions
long_entries = df[df['buy'] == 1].index
for idx in long_entries:
exit_idx = df[(df.index > idx) & (df['long_exit'])].index
# Place particulars omitted right here for readability.
return pnl / len(information)
We apply further filtering to exclude low-liquidity shares:
def filter_stocks(date):
nearest_date = get_previous_date(dates_list, date)
stock_list = tickers[nearest_date]
formation_start_date = one_year_before(date)
stocks_data = historical_data.loc[formation_start_date:date]
# Take away shares with lacking information or low liquidity.
return filtered_stocks