When we passively long-term invest, we prefer using well-established ETFs. However there might be circumstances when one might want to avoid using ETFs for some reason, however, still can purchase individual stocks.
Here I want to show that we do not have reproduce index of the world market to emulate its growth. It is actually enough to pick 20 random companies and invest the same amount of money into each of them, so that the risk of underperforming is less than 4% over a 8-year period. By underperforming we mean underperforming the inflation.
Strategy | Rel. 1/2 year | Rel. 3 years | Rel. 8 years | Growth |
---|---|---|---|---|
Nq 400 | .668 | .789 | .958 | .10-.13 |
U 400 | .692 | .837 | 1.0 | .10-.14 |
UP 20 | .679 | .799 | .965 | .10-.13 |
UP 5 | .617 | .716 | .826 | .09-.13 |
It seems like 20 random stocks strategy may work in real world, and its risks are comparable to the risks of index ETFs. However, I cannot conclude this confidently given all the problems with the research.
import pandas as pd
import yfinance as yf
Install yfinance
# !pip install yfinance --upgrade
Link to the ticker list: http://www.nasdaq.com/screening/companies-by-name.aspx?letter=0&exchange=nasdaq&render=download
ticker_info = pd.read_csv("../nasdaq_screener_1652817332355.csv", sep=",")
ticker_info
old_companies = ticker_info[ticker_info["IPO Year"] < 2000]
old_companies
tickers_as_one = " ".join(filter(lambda x: "^" not in x, old_companies["Symbol"]))
tickers_as_one[:100]
all_nasdaq_2000 = yf.download(tickers_as_one, period="max")
# all_nasdaq = pd.read_csv("../all_history_all_nasdaq.csv", )
all_nasdaq_2000.to_csv("../all_history_2000IPO_caps_nasdaq.csv")
all_nasdaq_2000
# all_nasdaq.to_csv("../all_history_all_nasdaq.csv")
ticker_info[0:5]
companies_to_drop_2010 = [ "CVT", "LAND", "ATAI", "MDH", "THRX", "LJPC" ]
companies_to_drop_2000 = companies_to_drop_2010 + ["CLM", "CSII", "CTIB", "CTIC"]
COUNT = 1000
tickers_largest_caps = old_companies.sort_values("Market Cap", ascending=False)[:COUNT]
index_large = tickers_largest_caps[:COUNT]
index_large = index_large.loc[index_large["Symbol"].map(lambda x: x not in companies_to_drop_2000)]
index_large
sum_cap = sum(index_large["Market Cap"])
sum_cap
props = pd.concat([index_large["Symbol"], index_large["Market Cap"].apply(lambda x: x / sum_cap)], axis=1, keys=["Symbol", "Share"])
props
last_20_years = all_nasdaq_2000[-20 * 365 * 5 // 7:]
avg_price = (last_20_years["Close"] + last_20_years["Open"]) / 2
stock_history_big_index = avg_price.filter(index_large["Symbol"])
stock_history_big_index
date_from = "2002-01-22"
date_to = "2022-05-17"
price_from = stock_history_big_index.loc[date_from]
price_to = stock_history_big_index.loc[date_to]
price_from
price_to
market_cap_li_indexed = pd.concat([index_large["Market Cap"]], axis=1).set_index(index_large["Symbol"])["Market Cap"]
market_cap_li_indexed
market_cap_from = price_from / price_to * market_cap_li_indexed
market_cap_from
index_large["Symbol"]
market_cap_from[market_cap_from.isna()]
market_cap_from.map(lambda x: x / market_sum * budget)
market_cap_from
budget = 1000.0
market_sum = market_cap_from.sum()
print("Total market cap", market_sum)
money_to_be_spent_on_company = market_cap_from.map(lambda x: x / market_sum * budget)
money_to_be_spent_on_company
number_of_stocks = pd.concat([index_large["Symbol"], index_large["Symbol"].map(lambda ticker: money_to_be_spent_on_company[ticker] / price_from[ticker])], axis=1, keys=["Symbol", "Stock count"])
number_of_stocks
number_of_stocks
plt.pie(number_of_stocks["Stock count"]);
def compute_portfolio_for_date(date, portfolio):
prices = avg_price.loc[date]
return portfolio.apply(lambda x: prices[x["Symbol"]] * x["Stock count"], axis=1).sum()
date_from, date_to
compute_portfolio_for_date(date_from, number_of_stocks), compute_portfolio_for_date(date_to, number_of_stocks)
dates = stock_history_big_index.index[::30]
portfolio_value = dates.map(lambda x: compute_portfolio_for_date(x, number_of_stocks))
import matplotlib.pyplot as plt
plt.plot(dates, portfolio_value)
nasdaq_index_costs = pd.concat([money_to_be_spent_on_company], axis=1, keys=["Cost"]).reset_index().rename(columns={"index": "Symbol"})
dates = all_nasdaq.index
dates
def build_graph_given_distribution(distribution, how_long_ago, dates_selector):
date_from = all_nasdaq_2000.index[-how_long_ago]
# print("Date from", date_from)
price_from = avg_price.loc[date_from]
portfolio = pd.concat([distribution["Symbol"], distribution.apply(lambda s: s["Cost"] / price_from[s["Symbol"]], axis=1)], axis=1, keys=["Symbol", "Stock count"])
selected_dates = dates_selector(dates[-how_long_ago:])
portfolio_values = pd.Series(selected_dates.map(lambda x: compute_portfolio_for_date(x, portfolio)))
return selected_dates, portfolio_values
dates_ndq_index, values_ndq_index = build_graph_given_distribution(nasdaq_index_costs, 20 * 365 * 5 // 7, lambda x: x[::30])
plt.plot(dates_ndq_index, values_ndq_index)
uniform_distr = pd.concat([old_companies["Symbol"], old_companies["Symbol"].map(lambda _: budget / old_companies.shape[0])], keys=["Symbol", "Cost"], axis=1)
uniform_distr
dates_uni_distr, values_uni_distr = build_graph_given_distribution(uniform_distr, 20 * 365 * 5 // 7, lambda x: x[::30])
plt.plot(dates_uni_distr, values_uni_distr)
total_growth = values_ndq_index[len(values_ndq_index) - 1] / values_ndq_index[0]
print("Total growth:", round(total_growth, 3))
annual_growth = total_growth ** (1 / 10)
print("Annual growth:", round(annual_growth, 3))
total_growth = values_uni_distr[len(values_uni_distr) - 1] / values_uni_distr[0]
print("Total growth:", round(total_growth, 3))
annual_growth = total_growth ** (1 / 10)
print("Annual growth:", round(annual_growth, 3))
import seaborn as sns
def plot_investing_growth_distribution(costs_array, window):
epoch = 19 * 365 * 5 // 7
growths = []
for costs in costs_array:
for i in range(0, epoch - window, 8):
dates_rr, values_rr = build_graph_given_distribution(costs, i + window + 1, lambda dates: pd.Series([dates[0], dates[window]]))
# print("From", dates_rr[0], "to", dates_rr[1], "grew from", values_rr[0], "to", values_rr[1])
gr = (values_rr[1] / values_rr[0]) ** (1 / window * (365 * 5 / 7))
growths.append(gr)
growths.sort()
# q = len(growths) // 6
# growths = growths[q:-q]
return growths
inflation = 1.03
import scipy.stats.mstats as mstats
gr_nq_8 = plot_investing_growth_distribution([nasdaq_index_costs], 8 * 365 * 5 // 7)
sns.displot(gr_nq_8)
prob_nq_8 = len(list(filter(lambda x: x > inflation, gr_nq_8))) / len(gr_nq_8)
print("Probabily that saving in cash is outperformed:", round(prob_nq_8, 3))
print("Average growth:", mstats.gmean(gr_nq_8))
gr_nq_3 = plot_investing_growth_distribution([nasdaq_index_costs], 3 * 365 * 5 // 7)
sns.displot(gr_nq_3)
import math
prob_nq_3 = len(list(filter(lambda x: x > inflation, gr_nq_3))) / len(gr_nq_3)
print("Probabily that saving in cash is outperformed:", round(prob_nq_3, 3))
gr_nq_3_no_nan = list(filter(lambda x: not math.isnan(x), gr_nq_3))
print("Average growth:", mstats.gmean(gr_nq_3_no_nan))
gr_nq_05 = plot_investing_growth_distribution([nasdaq_index_costs], 3 * 365 * 5 // 7 // 6)
sns.displot(gr_nq_05)
prob_nq_05 = len(list(filter(lambda x: x > inflation, gr_nq_05))) / len(gr_nq_05)
print("Probabily that saving in cash is outperformed:", round(prob_nq_05, 3))
gr_nq_05_no_nan = list(filter(lambda x: not math.isnan(x), gr_nq_05))
print("Average growth:", mstats.gmean(gr_nq_05_no_nan))
gr_up_8 = plot_investing_growth_distribution([uniform_distr], 8 * 365 * 5 // 7)
sns.displot(gr_up_8)
prob_up_8 = len(list(filter(lambda x: x > inflation, gr_up_8))) / len(gr_up_8)
print("Probabily that saving in cash is outperformed:", round(prob_up_8, 3))
gr_up_8_no_nan = list(filter(lambda x: not math.isnan(x), gr_up_8))
print("Average growth:", mstats.gmean(gr_up_8_no_nan))
gr_up_3 = plot_investing_growth_distribution([uniform_distr], 3 * 365 * 5 // 7)
sns.displot(gr_up_3)
prob_up_3 = len(list(filter(lambda x: x > inflation, gr_up_3))) / len(gr_up_3)
print("Probabily that saving in cash is outperformed:", round(prob_up_3, 3))
gr_up_3_no_nan = list(filter(lambda x: not math.isnan(x), gr_up_3))
print("Average growth:", mstats.gmean(gr_up_3_no_nan))
uniform_distr
gr_up_05 = plot_investing_growth_distribution([uniform_distr], 3 * 365 * 5 // 7 // 6)
sns.displot(gr_up_05)
prob_up_05 = len(list(filter(lambda x: x > inflation, gr_up_05))) / len(gr_up_05)
print("Probabily that saving in cash is outperformed:", round(prob_up_05, 3))
gr_up_05_no_nan = list(filter(lambda x: not math.isnan(x), gr_up_05))
print("Average growth:", mstats.gmean(gr_up_05_no_nan))
Take N random companies and invest the same amount of money into them
N = 20
uniform_distr_partial = pd.concat([old_companies["Symbol"], old_companies["Symbol"].map(lambda _: budget / N)], keys=["Symbol", "Cost"], axis=1)
uniform_distr_partial.sample(n=N)
uniform_distr_partial_array = [uniform_distr_partial.sample(n=N) for i in range(100)]
growths_uni_partial_8 = plot_investing_growth_distribution(uniform_distr_partial_array, 8 * 365 * 5 // 7)
sns.displot(growths_uni_partial_8)
prob_8 = len(list(filter(lambda x: x > inflation, growths_uni_partial_8))) / len(growths_uni_partial_8)
print("Probabily that saving in cash is outperformed:", round(prob_8, 3))
growths_uni_partial_8_no_nan = list(filter(lambda x: not math.isnan(x), growths_uni_partial_8))
print("Average growth:", mstats.gmean(growths_uni_partial_8_no_nan))
growths_uni_partial_3 = plot_investing_growth_distribution(uniform_distr_partial_array, 3 * 365 * 5 // 7)
sns.displot(growths_uni_partial_3)
prob_3 = len(list(filter(lambda x: x > inflation, growths_uni_partial_3))) / len(growths_uni_partial_3)
print("Probabily that saving in cash is outperformed:", round(prob_3, 3))
growths_uni_partial_3_no_nan = list(filter(lambda x: not math.isnan(x), growths_uni_partial_3))
print("Average growth:", mstats.gmean(growths_uni_partial_3_no_nan))
growths_uni_partial_05 = plot_investing_growth_distribution(uniform_distr_partial_array, 3 * 365 * 5 // 7 // 6)
sns.displot(growths_uni_partial_05)
prob_05 = len(list(filter(lambda x: x > inflation, growths_uni_partial_05))) / len(growths_uni_partial_05)
print("Probabily that saving in cash is outperformed:", round(prob_05, 3))
growths_uni_partial_05_no_nan = list(filter(lambda x: not math.isnan(x), growths_uni_partial_05))
print("Average growth:", mstats.gmean(growths_uni_partial_05_no_nan))
If for some reason you cannot or don't want to buy ETFs, and want individual shares, but more or less reliable, you can start by investing into 5-10 random companies with the same amount of money.
Strategy | Rel. 1/2 year | Rel. 3 years | Rel. 8 years | Growth |
---|---|---|---|---|
Nq 400 | .668 | .789 | .958 | .10-.13 |
U 400 | .692 | .837 | 1.0 | .10-.14 |
UP 20 | .679 | .799 | .965 | .10-.13 |
UP 5 | .617 | .716 | .826 | .09-.13 |
The probability that the given strategy will result in portfolio worth less in given number of years than it was with the inflation adjusted.
Average annual growth