I've written code in Python which back tests on equities data, to compare result of strategy against a benchmark.
I want someone to review to assure that program works as desired, and any changes that would improve efficiency of my program.
import numpy as np
import pandas as pd
import time as t
import datetime
from datetime import datetime
from datetime import timedelta
import datetime as dt
t0 = t.time()
start_date = '2006-01-01'
end_date = '2016-12-31'
allocation = 100000
#Contains SPY 500 stocks list
symbols = ['SPY 500 Stocks']
#We need to invest in top 100 of filtered symbols
number_of_stocks_selected = 100
def converter(start_date):
convert=datetime.strptime(start_date, "%Y-%m-%d")
return convert
def delta_time(converter,n_days):
new_date = converter + timedelta(days=n_days)
return new_date
def data(symbols):
dates=pd.date_range(start_date,end_date)
df=pd.DataFrame(index=dates)
for symbol in symbols:
df_temp=pd.read_csv('C:\\Users\Furqan\Desktop\S&P500\{}.csv'.format(str(symbol)),usecols=['Date','Close'],
parse_dates=True,index_col='Date',na_values=['nan'])
df_temp = df_temp.rename(columns={'Close': symbol})
df=df.join(df_temp)
df=df.fillna(method='ffill')
df=df.fillna(method='bfill')
return df
def compute_daily_returns(df):
"""Compute and return the daily return values."""
daily_returns=(df/df.shift(1))-1
df=df.fillna(value=0)
daily_returns=daily_returns[1:]
return daily_returns
def mat_alloc_auto(symbols):
n = len(symbols)
mat_alloc = np.zeros((n,n), dtype='float')
for i in range(0,n):
mat_alloc[i,i] = allocation / n
return mat_alloc
def var_calculator(data_frame, start_date, end_date):
value_at_risk_matrix = []
returns_daily = compute_daily_returns(data_frame)
for symbol in symbols:
return_matrix = returns_daily.ix[start_date : end_date,'{}'.format(symbol)]
return_matrix = np.array(return_matrix)
value_at_risk = np.percentile(return_matrix, 100 * (1-0.99))
value_at_risk_matrix.append(value_at_risk)
var_df = pd.DataFrame(data = value_at_risk_matrix, index=symbols)
return var_df
def stock_picker(data_frame, start_date, start_date_test, end_date_test):
var_df = var_calculator(data_frame, start_date, start_date_test)
var_df.sort_values(by = 0 ,axis = 0, ascending = False, inplace = True)
symbols_to_invest = var_df.index.values.tolist()[0:number_of_stocks_selected]
symbols_to_invest_df = data_frame.loc[start_date_test:end_date_test ,symbols_to_invest]
symbols_to_invest_returns = compute_daily_returns(symbols_to_invest_df)
return symbols_to_invest, symbols_to_invest_returns
def sp500():
dates=pd.date_range(start_date,end_date)
df=pd.DataFrame(index=dates)
df_temp=pd.read_csv('C:\\Users\Furqan\Desktop\S&P500\^GSPC.csv',usecols=['Date','Close'],
parse_dates=True,index_col='Date',na_values=['nan'])
df_temp = df_temp.rename(columns={'Close': 'GSPC'})
df=df.join(df_temp)
df=df.fillna(method='ffill')
df=df.fillna(method='bfill')
sp500_return = compute_daily_returns(df)
sp500_return = sp500_return.ix[start_rng[0]: ,]
sp500_cumilative_returns = sp500_return.cumsum()
return sp500_cumilative_returns
# Starting Allocation amount
allocation = 100000
# Convert start and end date to date time format
start_date = converter(start_date)
end_date = converter(end_date)
# Data feed
data_frame = data(symbols)
#Create yearly range of dates
start_rng = pd.date_range(start_date, end_date, freq = 'A')
#Create end date range
end_rng = pd.date_range(start_rng[1], end_date, freq = 'A')
amount_matrix = [allocation]
dfs_returns = []
dfs_cum_sum = []
for start_date_test, end_date_test in zip(start_rng, end_rng):
# Since len(start_rng) != len(end_rng) we need to break if start_data_test == end_date_test
if start_date_test != end_date_test:
#Creates a filtered list of stocks in which we want to invest and returns of those stocks
stock_list_invest, stock_list_invest_returns = stock_picker(data_frame, start_date, start_date_test, end_date_test)
#Gives equal weightage so as to invest equally in all stocks
allocation_matrix = mat_alloc_auto(stock_list_invest)
valuation = np.dot(stock_list_invest_returns,allocation_matrix)
valuation = np.sum(valuation, axis=1, keepdims=True)
valuation = np.divide(valuation,amount_matrix)
dates_portfolio = pd.date_range(delta_time(start_date_test,1),end_date_test)
#Creates a dataframe of returns from time t=0 to t=1, then t=1 to t=2 and so on
portfolio_returns = pd.DataFrame(data=valuation, index=dates_portfolio, columns = ['Portfolio Returns'])
#df gets appended to a list to concatenate them later
dfs_returns.append(portfolio_returns)
#Change in allocation for the next period, either the allocation increases if gains were made or reduced if losses were generated
allocation = allocation + portfolio_returns.cumsum()['Portfolio Returns'].iloc[-1]*allocation
amount_matrix = [allocation]
else:
break
#Concatentate dfs to visualize result
result_df = pd.concat(dfs_returns)
#Create a cumilative sum series
cum_result = result_df.cumsum()
#Read SPY data(benchmark)
sp500_data = sp500()
#Create a df containing daily cumilative returns of both SPY and Portfolio
frames = [cum_result, sp500_data]
portfolio = pd.concat(frames, axis=1)
print("Portfolio returns are ", cum_result['Portfolio Returns'].iloc[-1] * 100,'%')
print("SPY returns are ", sp500_data['GSPC'].iloc[-1] * 100,'%')
t1 = t.time()
print('Exec time is ',t1-t0)
I presume the above program works on a monthly or yearly frequency. We create a data feed found locally. Then we filter from them top 100 stocks and equally invest in them. A returns data frame is generated and is appended to a list, and all the 100 stocks are sold at the end of period. When the next period starts, our allocation changes depending on our performance in prior period. In the next period, our filter gets calculated with historical data increasing by an additional year or month. At the end, all the returns data frame are appended and a series of cumulative returns is generated.