import pandas as pd
import numpy as np
import yfinance as yf
from tqdm import notebook
from bs4 import BeautifulSoup
from datetime import timedelta
from scipy.stats.mstats import winsorize
import requests
import os
# Set current directory
os.chdir('/home/jovyan/work')
# Pandas settings
pd.set_option("display.max_columns", None)
from sqlalchemy import create_engine
engine = create_engine('postgresql://postgres:headband@192.168.0.151:5432/mydb')
sql_tables = ["Info","Income","BalanceSheet","CashFlow","Recommendation"]
dfs = []
for i in range(len(sql_tables)):
df = pd.read_sql_table(f"SP500{sql_tables[i]}", con=engine)
dfs.append(df)
info_df = dfs[0].copy()
income_df = dfs[1].copy()
balance_df = dfs[2].copy()
cash_df = dfs[3].copy()
recomm_df = dfs[4].copy()
lm_df = info_df[['symbol','priceToSalesTrailing12Months','priceToBook','dividendYield','debtToEquity','marketCap']].sort_values(by='symbol')
lm_df.reset_index(drop=True, inplace=True)
# Compound Annual Revenue Growth
idx_income_years = income_df.groupby(['Ticker'])['Date'].agg(['idxmin', 'idxmax'])
first_income_years = income_df.loc[idx_income_years['idxmin']]
last_income_years = income_df.loc[idx_income_years['idxmax']]
merged_income_years = first_income_years.merge(
last_income_years,
on='Ticker',
suffixes=('_first', '_last')
)
merged_income_years['diff'] = ((merged_income_years['Date_last'] - merged_income_years['Date_first']) / np.timedelta64(1, 'M')) / 12
lm_df['CARG'] = np.power(
merged_income_years['Total Revenue_last'] / merged_income_years['Total Revenue_first'],
(1 / merged_income_years['diff'])
) - 1
# Compound Annual Number of Share Growth
idx_balance_years = balance_df.groupby(['Ticker'])['Date'].agg(['idxmin', 'idxmax'])
first_balance_years = balance_df.loc[idx_balance_years['idxmin']]
last_balance_years = balance_df.loc[idx_balance_years['idxmax']]
merged_balance_years = first_balance_years.merge(
last_balance_years,
on='Ticker',
suffixes=('_first', '_last')
)
merged_balance_years['diff'] = ((merged_balance_years['Date_last'] - merged_balance_years['Date_first']) / np.timedelta64(1, 'M')) / 12
lm_df['CANSG'] = np.power(
merged_balance_years['Common Stock_last'] / merged_balance_years['Common Stock_first'],
(1 / merged_balance_years['diff'])
) - 1
# 4 yr Average Operating Margin
merged_finance = pd.merge(cash_df, income_df, how='left', left_on=['Ticker', 'Date'], right_on = ['Ticker','Date'])
merged_finance['Oper Margin'] = merged_finance['Total Cash From Operating Activities']/merged_finance['Total Revenue']
lm_df['opMarg'] = merged_finance.groupby('Ticker')['Oper Margin'].mean().reset_index(drop=True)
# Interest Coverage
merged_finance['Interest Coverage'] = merged_finance['Total Cash From Operating Activities']/np.absolute(merged_finance['Interest Expense'])
idx_years = merged_finance.groupby(['Ticker'])['Date'].agg(['idxmax'])
lm_df['intCov'] = merged_finance.loc[idx_years['idxmax']]['Interest Coverage'].reset_index(drop=True)
# Test
merged_finance[merged_finance['Ticker'] == 'MNST'][['Date','Total Cash From Operating Activities','Interest Expense']]
Date | Total Cash From Operating Activities | Interest Expense | |
---|---|---|---|
1284 | 2021-12-31 | 1.155741e+09 | -19000.0 |
1285 | 2020-12-31 | 1.364163e+09 | -39000.0 |
1286 | 2019-12-31 | 1.113762e+09 | -56000.0 |
1287 | 2018-12-31 | 1.161881e+09 | -60000.0 |
# Sector
lm_df['sector'] = info_df.sort_values('symbol')['sector'].reset_index(drop=True)
def set_recommendation(score):
if score in ['Strong Buy','Buy']:
return 1
elif score in ['Market Outperform','Overweight','Perform','Positive','Sector Outperform','Outperform']:
return 2
elif score in ['Peer Perform','In-Line','Sector Perform','Sector Weight','Market Perform','Equal-Weight','Hold','Neutral','Mixed']:
return 3
elif score in ['Sector Underperform','Underperform','Underweight']:
return 4
else:
return 5
recomm_df['Rating'] = recomm_df['To Grade'].apply(set_recommendation)
# Check for missing values and set recommendation score
test = recomm_df.groupby('Ticker')['Rating'].mean()
test_set = set(test.reset_index()['Ticker'].unique())
lm_set = set(lm_df['symbol'].unique())
missingRecs = list(lm_set.difference(test_set))
if len(missingRecs) > 0:
print('Fixing missing recommendations...')
df_fix = recomm_df.groupby('Ticker')['Rating'].mean()
for i in range(len(missingRecs)):
df_fix.loc[missingRecs[i]] = 3
df_fix = df_fix.reset_index()
lm_df['rating'] = df_fix.sort_values('Ticker')['Rating'].reset_index(drop=True)
print('Fix complete')
else:
lm_df['rating'] = recomm_df['Rating']
print('Looks good')
Fixing missing recommendations... Fix complete
# rename col and zero out nan values
lm_df.rename(columns={'priceToSalesTrailing12Months':'priceToSales'},inplace = True)
lm_df.fillna(0,inplace = True)
# winsorize outliers
win_lim = (0.01,0.02)
lm_df['intCov'] = winsorize(lm_df['intCov'],win_lim)
lm_df['CANSG'] = winsorize(lm_df['CANSG'],win_lim)
lm_df['CARG'] = winsorize(lm_df['CARG'],win_lim)
lm_df['debtToEquity'] = winsorize(lm_df['debtToEquity'],win_lim)
lm_df['priceToBook'] = winsorize(lm_df['priceToBook'],win_lim)
lm_df['opMarg'] = winsorize(lm_df['opMarg'],win_lim)
lm_df['dividendYield'] = winsorize(lm_df['dividendYield'],win_lim)
lm_df.tail()
symbol | priceToSales | priceToBook | dividendYield | debtToEquity | marketCap | CARG | CANSG | opMarg | intCov | sector | rating | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
497 | YUM | 4.847612 | 0.000000 | 0.0186 | 0.000 | 3.221238e+10 | 0.049957 | 0.0 | 0.232926 | 3.096189 | Consumer Cyclical | 1.857143 |
498 | ZBH | 2.855887 | 1.838131 | 0.0087 | 53.098 | 2.255580e+10 | -0.004079 | 0.0 | 0.195432 | 7.193858 | Healthcare | 2.500000 |
499 | ZBRA | 2.560271 | 6.823679 | 0.0000 | 43.221 | 1.462427e+10 | 0.100812 | 0.0 | 0.186273 | 213.800000 | Technology | 2.666667 |
500 | ZION | 2.823184 | 1.163843 | 0.0279 | 0.000 | 8.190056e+09 | 0.041367 | 0.0 | 0.291518 | 0.000000 | Financial Services | 2.666667 |
501 | ZTS | 9.156736 | 19.260940 | 0.0071 | 144.912 | 7.225581e+10 | 0.101057 | 0.0 | 0.299283 | 9.879464 | Healthcare | 1.250000 |
# Load
def update_server(table_name,dataframe):
try:
dataframe.to_sql(table_name,con=engine,index=False,if_exists='replace')
print(f'Successfully imported {table_name} to server')
except Exception as err:
print('Got an error while updating server')
print('Error: ', str(err))
update_server('SP500LM',lm_df)
Successfully imported SP500LM to server