728x90
해당 데이터를 활용한 3가지 변수선택법 실습 코드
In [1]:
import os
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
In [2]:
# 데이터 불러오기
df= pd.read_csv('C:/Users/설위준/Desktop/05-11--machine-learning/Part 05~11) Machine Learning/06. 회귀분석/실습코드/ToyotaCorolla.csv')
df.head()
Out[2]:
Id | Model | Price | Age_08_04 | Mfg_Month | Mfg_Year | KM | Fuel_Type | HP | Met_Color | ... | Central_Lock | Powered_Windows | Power_Steering | Radio | Mistlamps | Sport_Model | Backseat_Divider | Metallic_Rim | Radio_cassette | Tow_Bar | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | TOYOTA Corolla 2.0 D4D HATCHB TERRA 2/3-Doors | 13500 | 23 | 10 | 2002 | 46986 | Diesel | 90 | 1 | ... | 1 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
1 | 2 | TOYOTA Corolla 2.0 D4D HATCHB TERRA 2/3-Doors | 13750 | 23 | 10 | 2002 | 72937 | Diesel | 90 | 1 | ... | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
2 | 3 | ?TOYOTA Corolla 2.0 D4D HATCHB TERRA 2/3-Doors | 13950 | 24 | 9 | 2002 | 41711 | Diesel | 90 | 1 | ... | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
3 | 4 | TOYOTA Corolla 2.0 D4D HATCHB TERRA 2/3-Doors | 14950 | 26 | 7 | 2002 | 48000 | Diesel | 90 | 0 | ... | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
4 | 5 | TOYOTA Corolla 2.0 D4D HATCHB SOL 2/3-Doors | 13750 | 30 | 3 | 2002 | 38500 | Diesel | 90 | 0 | ... | 1 | 1 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 |
5 rows × 37 columns
In [3]:
# 데이터 수와 변수의 수 확인하기
df.shape
Out[3]:
(1436, 37)
범주형 변수를 이진형 변수로 변환¶
In [4]:
# 명목형 변수 Fuel_Type 유형 확인하고
## 가변수 생성
df.Fuel_Type.unique()
Out[4]:
array(['Diesel', 'Petrol', 'CNG'], dtype=object)
In [5]:
# 0으로 1436개 있는 벡터 생성
dummy_p = np.repeat(0,df.shape[0])
dummy_d = np.repeat(0,df.shape[0])
dummy_c = np.repeat(0,df.shape[0])
In [6]:
# 인덱스 슬라이싱 후 (binary = 1) 대입
p_idx = np.array(df.Fuel_Type == "Petrol")
d_idx = np.array(df.Fuel_Type == "Diesel")
c_idx = np.array(df.Fuel_Type == "CNG")
In [7]:
dummy_p[p_idx]=1
dummy_d[d_idx]=1
dummy_c[c_idx]=1
불필요한 변수 제거 및 가변수 추가¶
In [8]:
# 3개의 변수를 포함한 데이터프레임 만들기
Fuel = pd.DataFrame({'Petrol': dummy_p,'Diesel':dummy_d,'CNG':dummy_c})
In [9]:
Fuel.head()
Out[9]:
Petrol | Diesel | CNG | |
---|---|---|---|
0 | 0 | 1 | 0 |
1 | 0 | 1 | 0 |
2 | 0 | 1 | 0 |
3 | 0 | 1 | 0 |
4 | 0 | 1 | 0 |
In [10]:
# 불필요한 id, Model, Fuel_Type 제거하기
df = df.drop(['Id','Model','Fuel_Type'],axis=1)
In [11]:
# 합치기
mir_data = pd.concat((df,Fuel),1)
mir_data.head()
Out[11]:
Price | Age_08_04 | Mfg_Month | Mfg_Year | KM | HP | Met_Color | Automatic | cc | Doors | ... | Radio | Mistlamps | Sport_Model | Backseat_Divider | Metallic_Rim | Radio_cassette | Tow_Bar | Petrol | Diesel | CNG | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 13500 | 23 | 10 | 2002 | 46986 | 90 | 1 | 0 | 2000 | 3 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
1 | 13750 | 23 | 10 | 2002 | 72937 | 90 | 1 | 0 | 2000 | 3 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
2 | 13950 | 24 | 9 | 2002 | 41711 | 90 | 1 | 0 | 2000 | 3 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
3 | 14950 | 26 | 7 | 2002 | 48000 | 90 | 0 | 0 | 2000 | 3 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
4 | 13750 | 30 | 3 | 2002 | 38500 | 90 | 0 | 0 | 2000 | 3 | ... | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
5 rows × 37 columns
In [12]:
# bias 추가
mir_data = sm.add_constant(mir_data, has_constant='add')
mir_data.head()
Out[12]:
const | Price | Age_08_04 | Mfg_Month | Mfg_Year | KM | HP | Met_Color | Automatic | cc | ... | Radio | Mistlamps | Sport_Model | Backseat_Divider | Metallic_Rim | Radio_cassette | Tow_Bar | Petrol | Diesel | CNG | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1.0 | 13500 | 23 | 10 | 2002 | 46986 | 90 | 1 | 0 | 2000 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
1 | 1.0 | 13750 | 23 | 10 | 2002 | 72937 | 90 | 1 | 0 | 2000 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
2 | 1.0 | 13950 | 24 | 9 | 2002 | 41711 | 90 | 1 | 0 | 2000 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
3 | 1.0 | 14950 | 26 | 7 | 2002 | 48000 | 90 | 0 | 0 | 2000 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
4 | 1.0 | 13750 | 30 | 3 | 2002 | 38500 | 90 | 0 | 0 | 2000 | ... | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
5 rows × 38 columns
설명변수(X), 타켓변수(Y) 분리 및 학습데이터와 평가데이터 분할¶
In [13]:
feature_columns = list(mir_data.columns.difference(['Price'])) # price변수 제외한 모든 변수 반환
X = mir_data[feature_columns]
y = mir_data.Price
train_x, test_x, train_y, test_y = train_test_split(X, y, train_size=0.7, test_size=0.3)
print(train_x.shape, test_x.shape, train_y.shape, test_y.shape) # 7:3 비율로 데이터 분할
(1005, 37) (431, 37) (1005,) (431,)
In [14]:
# Train the MLR / 회귀모델적합
full_model = sm.OLS(train_y,train_x)
fitted_full_model=full_model.fit()
In [15]:
## R2가 높고, 대부분의 변수들이 유의
fitted_full_model.summary()
Out[15]:
Dep. Variable: | Price | R-squared: | 0.918 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.915 |
Method: | Least Squares | F-statistic: | 327.4 |
Date: | Sat, 21 Aug 2021 | Prob (F-statistic): | 0.00 |
Time: | 18:19:12 | Log-Likelihood: | -8404.0 |
No. Observations: | 1005 | AIC: | 1.688e+04 |
Df Residuals: | 971 | BIC: | 1.704e+04 |
Df Model: | 33 | ||
Covariance Type: | nonrobust |
coef | std err | t | P>|t| | [0.025 | 0.975] | |
---|---|---|---|---|---|---|
ABS | -97.9613 | 135.724 | -0.722 | 0.471 | -364.307 | 168.384 |
Age_08_04 | -118.9924 | 3.939 | -30.211 | 0.000 | -126.722 | -111.263 |
Airbag_1 | 84.2737 | 250.181 | 0.337 | 0.736 | -406.684 | 575.231 |
Airbag_2 | -285.5649 | 135.858 | -2.102 | 0.036 | -552.174 | -18.956 |
Airco | 354.5659 | 93.329 | 3.799 | 0.000 | 171.417 | 537.715 |
Automatic | 500.6287 | 152.806 | 3.276 | 0.001 | 200.761 | 800.497 |
Automatic_airco | 2433.9233 | 198.304 | 12.274 | 0.000 | 2044.769 | 2823.077 |
BOVAG_Guarantee | 513.3755 | 132.584 | 3.872 | 0.000 | 253.191 | 773.560 |
Backseat_Divider | -183.8144 | 131.426 | -1.399 | 0.162 | -441.725 | 74.096 |
Boardcomputer | -311.6825 | 118.632 | -2.627 | 0.009 | -544.487 | -78.878 |
CD_Player | 102.8262 | 101.144 | 1.017 | 0.310 | -95.659 | 301.312 |
CNG | -2202.1398 | 300.621 | -7.325 | 0.000 | -2792.082 | -1612.198 |
Central_Lock | 14.8695 | 143.733 | 0.103 | 0.918 | -267.193 | 296.932 |
Cylinders | -0.0266 | 0.002 | -11.023 | 0.000 | -0.031 | -0.022 |
Diesel | 2131.6619 | 331.412 | 6.432 | 0.000 | 1481.296 | 2782.028 |
Doors | 70.9976 | 40.317 | 1.761 | 0.079 | -8.121 | 150.117 |
Gears | -360.3911 | 229.802 | -1.568 | 0.117 | -811.357 | 90.575 |
Guarantee_Period | 62.5922 | 13.517 | 4.630 | 0.000 | 36.065 | 89.119 |
HP | 54.9771 | 6.148 | 8.942 | 0.000 | 42.912 | 67.042 |
KM | -0.0162 | 0.001 | -12.186 | 0.000 | -0.019 | -0.014 |
Met_Color | 23.0455 | 75.647 | 0.305 | 0.761 | -125.404 | 171.495 |
Metallic_Rim | 112.2521 | 95.607 | 1.174 | 0.241 | -75.368 | 299.872 |
Mfg_Month | -103.1347 | 10.497 | -9.825 | 0.000 | -123.734 | -82.535 |
Mfg_Year | 5.1581 | 0.917 | 5.625 | 0.000 | 3.359 | 6.958 |
Mfr_Guarantee | 202.2356 | 74.182 | 2.726 | 0.007 | 56.661 | 347.810 |
Mistlamps | 75.7436 | 110.297 | 0.687 | 0.492 | -140.705 | 292.192 |
Petrol | 70.4712 | 278.349 | 0.253 | 0.800 | -475.763 | 616.706 |
Power_Steering | -210.6612 | 285.013 | -0.739 | 0.460 | -769.974 | 348.651 |
Powered_Windows | 298.2411 | 145.506 | 2.050 | 0.041 | 12.699 | 583.783 |
Quarterly_Tax | 13.6632 | 2.209 | 6.186 | 0.000 | 9.329 | 17.998 |
Radio | 1222.5104 | 764.239 | 1.600 | 0.110 | -277.240 | 2722.261 |
Radio_cassette | -1317.3420 | 762.502 | -1.728 | 0.084 | -2813.684 | 179.000 |
Sport_Model | 339.5571 | 88.684 | 3.829 | 0.000 | 165.522 | 513.592 |
Tow_Bar | -142.9095 | 79.912 | -1.788 | 0.074 | -299.730 | 13.911 |
Weight | 8.6224 | 1.192 | 7.234 | 0.000 | 6.283 | 10.962 |
cc | -4.3384 | 0.560 | -7.740 | 0.000 | -5.438 | -3.239 |
const | -0.0067 | 0.001 | -11.023 | 0.000 | -0.008 | -0.005 |
Omnibus: | 93.503 | Durbin-Watson: | 1.942 |
---|---|---|---|
Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 503.817 |
Skew: | 0.207 | Prob(JB): | 3.96e-110 |
Kurtosis: | 6.444 | Cond. No. | 1.31e+16 |
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 3.38e-20. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.
VIF를 통한 다중공선성 확인¶
In [17]:
# 다중공선성 확인 코드
from statsmodels.stats.outliers_influence import variance_inflation_factor
vif = pd.DataFrame()
vif["VIF Factor"] = [variance_inflation_factor(
mir_data.values, i) for i in range(mir_data.shape[1])]
vif["features"] = mir_data.columns
vif
C:\work\envs\datascience\lib\site-packages\statsmodels\regression\linear_model.py:1715: RuntimeWarning: divide by zero encountered in double_scalars return 1 - self.ssr/self.centered_tss C:\work\envs\datascience\lib\site-packages\statsmodels\stats\outliers_influence.py:193: RuntimeWarning: divide by zero encountered in double_scalars vif = 1. / (1. - r_squared_i)
Out[17]:
VIF Factor | features | |
---|---|---|
0 | 0.000000 | const |
1 | 10.953474 | Price |
2 | inf | Age_08_04 |
3 | inf | Mfg_Month |
4 | inf | Mfg_Year |
5 | 2.400334 | KM |
6 | 2.621514 | HP |
7 | 1.143778 | Met_Color |
8 | 1.121303 | Automatic |
9 | 1.258641 | cc |
10 | 1.352288 | Doors |
11 | 0.000000 | Cylinders |
12 | 1.271814 | Gears |
13 | 5.496805 | Quarterly_Tax |
14 | 4.487491 | Weight |
15 | 1.210815 | Mfr_Guarantee |
16 | 1.392485 | BOVAG_Guarantee |
17 | 1.573026 | Guarantee_Period |
18 | 2.276617 | ABS |
19 | 1.612758 | Airbag_1 |
20 | 3.106933 | Airbag_2 |
21 | 1.846429 | Airco |
22 | 2.009866 | Automatic_airco |
23 | 2.647036 | Boardcomputer |
24 | 1.564446 | CD_Player |
25 | 4.593157 | Central_Lock |
26 | 4.676311 | Powered_Windows |
27 | 1.582829 | Power_Steering |
28 | 62.344621 | Radio |
29 | 2.076846 | Mistlamps |
30 | 1.510131 | Sport_Model |
31 | 2.702141 | Backseat_Divider |
32 | 1.349642 | Metallic_Rim |
33 | 62.172860 | Radio_cassette |
34 | 1.153760 | Tow_Bar |
35 | inf | Petrol |
36 | inf | Diesel |
37 | inf | CNG |
In [18]:
# 학습데이터의 잔차 확인
res = fitted_full_model.resid
# q-q plot # 정규분포 확인
fig = sm.qqplot(res, fit=True, line='45')
In [19]:
# residual pattern 확인
pred_y = fitted_full_model.predict(train_x)
import matplotlib.pyplot as plt
fig = plt.scatter(pred_y,res, s=4)
plt.xlim(4000,30000)
plt.xlabel('Fitted values')
plt.ylabel('Residual')
Out[19]:
Text(0, 0.5, 'Residual')
In [20]:
## 검증 데이터에 대한 예측
pred_y2 = fitted_full_model.predict(test_x)
In [22]:
## 잔차 plot
plt.plot(np.array(test_y-pred_y2),label="pred_full")
plt.legend()
plt.show()
In [23]:
from sklearn.metrics import mean_squared_error
In [24]:
## MSE 성능
mean_squared_error(y_true = test_y,y_pred = pred_y2)
Out[24]:
10489299.6822015
변수선택법¶
In [25]:
def processSubset(X,y, feature_set):
model = sm.OLS(y,X[list(feature_set)]) # Modeling
regr = model.fit() # 모델 학습
AIC = regr.aic # 모델의 AIC
return {"model":regr, "AIC":AIC}
print(processSubset(X=train_x, y=train_y, feature_set = feature_columns[0:5]))
{'model': <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855B7B0FA0>, 'AIC': 19114.13716660702}
In [26]:
# 전체에 대한 AIC
processSubset(X=train_x, y=train_y, feature_set = feature_columns)
Out[26]:
{'model': <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x2855a60f2b0>, 'AIC': 16875.965015916092}
In [27]:
import time
import itertools
# getBest: 가장 낮은 AIC를 가지는 모델 선택 및 저장
def getBest(X,y,k):
tic = time.time() # 시작시간
results = [] # 결과 저장공간
for combo in itertools.combinations(X.columns.difference(['const']), k): # 각 변수조합을 고려한 경우의 수
combo=(list(combo)+['const'])
results.append(processSubset(X,y,feature_set=combo)) # 모델링된 것들을 저장
models = pd.DataFrame(results) # 데이터 프레임으로 변환
# 가장 낮은 AIC를 가지는 모델 선택 및 저장
best_model = models.loc[models['AIC'].argmin()] # index
toc = time.time() # 종료시간
print("Processed ", models.shape[0], "models on", k, "predictors in", (toc - tic),
"seconds.")
return best_model
print(getBest(X=train_x, y=train_y,k=2))
Processed 630 models on 2 predictors in 1.6394476890563965 seconds. model <statsmodels.regression.linear_model.Regressio... AIC 17445.355614 Name: 211, dtype: object
In [28]:
# 변수 선택에 따른 학습시간과 저장
models = pd.DataFrame(columns=["AIC", "model"])
tic = time.time()
for i in range(1,4):
models.loc[i] = getBest(X=train_x,y=train_y,k=i)
toc = time.time()
print("Total elapsed time:", (toc-tic), "seconds.")
Processed 36 models on 1 predictors in 0.14760828018188477 seconds. Processed 630 models on 2 predictors in 1.455152988433838 seconds. Processed 7140 models on 3 predictors in 17.38446044921875 seconds. Total elapsed time: 19.276965141296387 seconds.
In [29]:
models
Out[29]:
AIC | model | |
---|---|---|
1 | 17732.252483 | <statsmodels.regression.linear_model.Regressio... |
2 | 17445.355614 | <statsmodels.regression.linear_model.Regressio... |
3 | 17306.539141 | <statsmodels.regression.linear_model.Regressio... |
In [30]:
# 변수가 3개일때 최적 모형
models.loc[3, "model"].summary()
Out[30]:
Dep. Variable: | Price | R-squared: | 0.866 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.865 |
Method: | Least Squares | F-statistic: | 2150. |
Date: | Sat, 21 Aug 2021 | Prob (F-statistic): | 0.00 |
Time: | 18:29:33 | Log-Likelihood: | -8649.3 |
No. Observations: | 1005 | AIC: | 1.731e+04 |
Df Residuals: | 1001 | BIC: | 1.733e+04 |
Df Model: | 3 | ||
Covariance Type: | nonrobust |
coef | std err | t | P>|t| | [0.025 | 0.975] | |
---|---|---|---|---|---|---|
KM | -0.0229 | 0.001 | -16.470 | 0.000 | -0.026 | -0.020 |
Mfg_Year | 1519.9659 | 36.717 | 41.397 | 0.000 | 1447.915 | 1592.017 |
Weight | 18.1798 | 0.914 | 19.884 | 0.000 | 16.386 | 19.974 |
const | -3.047e+06 | 7.3e+04 | -41.753 | 0.000 | -3.19e+06 | -2.9e+06 |
Omnibus: | 159.628 | Durbin-Watson: | 2.032 |
---|---|---|---|
Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 1887.400 |
Skew: | -0.292 | Prob(JB): | 0.00 |
Kurtosis: | 9.688 | Cond. No. | 1.32e+08 |
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.32e+08. This might indicate that there are
strong multicollinearity or other numerical problems.
In [31]:
# 모든 변수들 모델링 한것과 비교
print("full model Rsquared: ","{:.5f}".format(fitted_full_model.rsquared))
print("full model AIC: ","{:.5f}".format(fitted_full_model.aic))
print("full model MSE: ","{:.5f}".format(fitted_full_model.mse_total))
print("selected model Rsquared: ","{:.5f}".format(models.loc[3, "model"].rsquared))
print("selected model AIC: ","{:.5f}".format(models.loc[3, "model"].aic))
print("selected model MSE: ","{:.5f}".format(models.loc[3, "model"].mse_total))
full model Rsquared: 0.91755 full model AIC: 16875.96502 full model MSE: 13032667.79378 selected model Rsquared: 0.86566 selected model AIC: 17306.53914 selected model MSE: 13032667.79378
In [32]:
# Plot the result
plt.figure(figsize=(20,10))
plt.rcParams.update({'font.size': 18, 'lines.markersize': 10})
## Mallow Cp
plt.subplot(2, 2, 1)
Cp= models.apply(lambda row: (row[1].params.shape[0]+(row[1].mse_total-
fitted_full_model.mse_total)*(train_x.shape[0]-
row[1].params.shape[0])/fitted_full_model.mse_total
), axis=1)
plt.plot(Cp)
plt.plot(Cp.argmin(), Cp.min(), "or")
plt.xlabel('# Predictors')
plt.ylabel('Cp')
# adj-rsquared plot
# adj-rsquared = Explained variation / Total variation
adj_rsquared = models.apply(lambda row: row[1].rsquared_adj, axis=1)
plt.subplot(2, 2, 2)
plt.plot(adj_rsquared)
plt.plot(adj_rsquared.argmax(), adj_rsquared.max(), "or")
plt.xlabel('# Predictors')
plt.ylabel('adjusted rsquared')
# aic
aic = models.apply(lambda row: row[1].aic, axis=1)
plt.subplot(2, 2, 3)
plt.plot(aic)
plt.plot(aic.argmin(), aic.min(), "or")
plt.xlabel('# Predictors')
plt.ylabel('AIC')
# bic
bic = models.apply(lambda row: row[1].bic, axis=1)
plt.subplot(2, 2, 4)
plt.plot(bic)
plt.plot(bic.argmin(), bic.min(), "or")
plt.xlabel(' # Predictors')
plt.ylabel('BIC')
Out[32]:
Text(0, 0.5, 'BIC')
전진선택법¶
In [33]:
########전진선택법(step=1)
def forward(X, y, predictors):
# 데이터 변수들이 미리정의된 predictors에 있는지 없는지 확인 및 분류
remaining_predictors = [p for p in X.columns.difference(['const']) if p not in predictors]
tic = time.time()
results = []
for p in remaining_predictors:
results.append(processSubset(X=X, y= y, feature_set=predictors+[p]+['const']))
# 데이터프레임으로 변환
models = pd.DataFrame(results)
# AIC가 가장 낮은 것을 선택
best_model = models.loc[models['AIC'].argmin()] # index
toc = time.time()
print("Processed ", models.shape[0], "models on", len(predictors)+1, "predictors in", (toc-tic))
print('Selected predictors:',best_model['model'].model.exog_names,' AIC:',best_model[0] )
return best_model
In [34]:
#### 전진선택법 모델
def forward_model(X,y):
Fmodels = pd.DataFrame(columns=["AIC", "model"])
tic = time.time()
# 미리 정의된 데이터 변수
predictors = []
# 변수 1~10개 : 0~9 -> 1~10
for i in range(1, len(X.columns.difference(['const'])) + 1):
Forward_result = forward(X=X,y=y,predictors=predictors)
if i > 1:
if Forward_result['AIC'] > Fmodel_before:
break
Fmodels.loc[i] = Forward_result
predictors = Fmodels.loc[i]["model"].model.exog_names
Fmodel_before = Fmodels.loc[i]["AIC"]
predictors = [ k for k in predictors if k != 'const']
toc = time.time()
print("Total elapsed time:", (toc - tic), "seconds.")
return(Fmodels['model'][len(Fmodels['model'])])
In [35]:
Forward_best_model = forward_model(X=train_x, y= train_y)
Processed 36 models on 1 predictors in 0.11967658996582031 Selected predictors: ['Mfg_Year', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855DFEF040> Processed 35 models on 2 predictors in 0.07280683517456055 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855DFB5EE0> Processed 34 models on 3 predictors in 0.08078312873840332 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855DFEF6D0> Processed 33 models on 4 predictors in 0.08876252174377441 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002857FC1EAC0> Processed 32 models on 5 predictors in 0.083770751953125 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855DFEF430> Processed 31 models on 6 predictors in 0.09575510025024414 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855DFED1C0> Processed 30 models on 7 predictors in 0.08278203010559082 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855DFB5670> Processed 29 models on 8 predictors in 0.10666012763977051 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD52190> Processed 28 models on 9 predictors in 0.09275436401367188 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD52A90> Processed 27 models on 10 predictors in 0.07379984855651855 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD520D0> Processed 26 models on 11 predictors in 0.07870364189147949 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002857FC1E0D0> Processed 25 models on 12 predictors in 0.07978606224060059 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855DFB5AC0> Processed 24 models on 13 predictors in 0.0827791690826416 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855DFB50D0> Processed 23 models on 14 predictors in 0.08129620552062988 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855DFEDFD0> Processed 22 models on 15 predictors in 0.06682085990905762 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855DFC4070> Processed 21 models on 16 predictors in 0.08676528930664062 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855DFB5A60> Processed 20 models on 17 predictors in 0.08377671241760254 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855DFEF5E0> Processed 19 models on 18 predictors in 0.07779383659362793 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855DFC4B20> Processed 18 models on 19 predictors in 0.07580852508544922 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD81220> Processed 17 models on 20 predictors in 0.07081079483032227 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855DFED3A0> Processed 16 models on 21 predictors in 0.07180380821228027 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD52820> Processed 15 models on 22 predictors in 0.061836957931518555 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855DFEDDC0> Processed 14 models on 23 predictors in 0.04787421226501465 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'Cylinders', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855DFC43A0> Total elapsed time: 2.0838100910186768 seconds.
In [36]:
Forward_best_model.aic
Out[36]:
16864.8106523611
In [37]:
Forward_best_model.summary()
Out[37]:
Dep. Variable: | Price | R-squared: | 0.917 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.915 |
Method: | Least Squares | F-statistic: | 490.9 |
Date: | Sat, 21 Aug 2021 | Prob (F-statistic): | 0.00 |
Time: | 18:36:35 | Log-Likelihood: | -8409.4 |
No. Observations: | 1005 | AIC: | 1.686e+04 |
Df Residuals: | 982 | BIC: | 1.698e+04 |
Df Model: | 22 | ||
Covariance Type: | nonrobust |
coef | std err | t | P>|t| | [0.025 | 0.975] | |
---|---|---|---|---|---|---|
Mfg_Year | 1234.2430 | 125.256 | 9.854 | 0.000 | 988.443 | 1480.043 |
Automatic_airco | 2533.8930 | 184.775 | 13.713 | 0.000 | 2171.293 | 2896.493 |
Weight | 8.6821 | 1.180 | 7.356 | 0.000 | 6.366 | 10.998 |
KM | -0.0164 | 0.001 | -12.454 | 0.000 | -0.019 | -0.014 |
Powered_Windows | 343.9188 | 83.346 | 4.126 | 0.000 | 180.361 | 507.476 |
HP | 53.2948 | 5.823 | 9.152 | 0.000 | 41.868 | 64.722 |
Guarantee_Period | 66.4777 | 12.921 | 5.145 | 0.000 | 41.121 | 91.834 |
BOVAG_Guarantee | 468.3240 | 126.943 | 3.689 | 0.000 | 219.213 | 717.435 |
Quarterly_Tax | 13.1804 | 2.072 | 6.362 | 0.000 | 9.115 | 17.246 |
cc | -4.2820 | 0.548 | -7.812 | 0.000 | -5.358 | -3.206 |
CNG | -4221.1675 | 558.615 | -7.556 | 0.000 | -5317.384 | -3124.951 |
Airco | 373.8062 | 88.400 | 4.229 | 0.000 | 200.332 | 547.281 |
Petrol | -2052.2727 | 507.945 | -4.040 | 0.000 | -3049.056 | -1055.489 |
Boardcomputer | -324.0029 | 114.709 | -2.825 | 0.005 | -549.105 | -98.901 |
Automatic | 489.6334 | 151.480 | 3.232 | 0.001 | 192.371 | 786.895 |
Airbag_2 | -376.7919 | 96.394 | -3.909 | 0.000 | -565.953 | -187.631 |
Sport_Model | 286.5235 | 83.527 | 3.430 | 0.001 | 122.612 | 450.435 |
Mfr_Guarantee | 202.5188 | 72.843 | 2.780 | 0.006 | 59.573 | 345.464 |
Tow_Bar | -152.2879 | 77.285 | -1.970 | 0.049 | -303.950 | -0.626 |
Doors | 77.6576 | 39.386 | 1.972 | 0.049 | 0.367 | 154.948 |
Age_08_04 | -15.1787 | 10.464 | -1.451 | 0.147 | -35.713 | 5.356 |
CD_Player | 141.2613 | 98.300 | 1.437 | 0.151 | -51.640 | 334.163 |
const | -2.464e+06 | 2.51e+05 | -9.821 | 0.000 | -2.96e+06 | -1.97e+06 |
Omnibus: | 94.457 | Durbin-Watson: | 1.941 |
---|---|---|---|
Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 520.088 |
Skew: | 0.202 | Prob(JB): | 1.16e-113 |
Kurtosis: | 6.501 | Cond. No. | 5.72e+08 |
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 5.72e+08. This might indicate that there are
strong multicollinearity or other numerical problems.
후진소거법¶
In [38]:
######## 후진선택법(step=1)
def backward(X,y,predictors):
tic = time.time()
results = []
# 데이터 변수들이 미리정의된 predictors 조합 확인
for combo in itertools.combinations(predictors, len(predictors) - 1):
results.append(processSubset(X=X, y= y,feature_set=list(combo)+['const']))
models = pd.DataFrame(results)
# 가장 낮은 AIC를 가진 모델을 선택
best_model = models.loc[models['AIC'].argmin()]
toc = time.time()
print("Processed ", models.shape[0], "models on", len(predictors) - 1, "predictors in",
(toc - tic))
print('Selected predictors:',best_model['model'].model.exog_names,' AIC:',best_model[0] )
return best_model
In [39]:
# 후진 소거법 모델
def backward_model(X, y):
Bmodels = pd.DataFrame(columns=["AIC", "model"], index = range(1,len(X.columns)))
tic = time.time()
predictors = X.columns.difference(['const'])
Bmodel_before = processSubset(X,y,predictors)['AIC']
while (len(predictors) > 1):
Backward_result = backward(X=train_x, y= train_y, predictors = predictors)
if Backward_result['AIC'] > Bmodel_before:
break
Bmodels.loc[len(predictors) - 1] = Backward_result
predictors = Bmodels.loc[len(predictors) - 1]["model"].model.exog_names
Bmodel_before = Backward_result['AIC']
predictors = [ k for k in predictors if k != 'const']
toc = time.time()
print("Total elapsed time:", (toc - tic), "seconds.")
return (Bmodels['model'].dropna().iloc[0])
In [40]:
Backward_best_model = backward_model(X=train_x,y=train_y)
Processed 36 models on 35 predictors in 0.27528953552246094 Selected predictors: ['ABS', 'Age_08_04', 'Airbag_1', 'Airbag_2', 'Airco', 'Automatic', 'Automatic_airco', 'BOVAG_Guarantee', 'Backseat_Divider', 'Boardcomputer', 'CD_Player', 'CNG', 'Cylinders', 'Diesel', 'Doors', 'Gears', 'Guarantee_Period', 'HP', 'KM', 'Met_Color', 'Metallic_Rim', 'Mfg_Month', 'Mfg_Year', 'Mfr_Guarantee', 'Mistlamps', 'Petrol', 'Power_Steering', 'Powered_Windows', 'Quarterly_Tax', 'Radio', 'Radio_cassette', 'Sport_Model', 'Tow_Bar', 'Weight', 'cc', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD700A0> Processed 35 models on 34 predictors in 0.17553305625915527 Selected predictors: ['ABS', 'Age_08_04', 'Airbag_1', 'Airbag_2', 'Airco', 'Automatic', 'Automatic_airco', 'BOVAG_Guarantee', 'Backseat_Divider', 'Boardcomputer', 'CD_Player', 'CNG', 'Cylinders', 'Diesel', 'Doors', 'Gears', 'Guarantee_Period', 'HP', 'KM', 'Metallic_Rim', 'Mfg_Month', 'Mfg_Year', 'Mfr_Guarantee', 'Mistlamps', 'Petrol', 'Power_Steering', 'Powered_Windows', 'Quarterly_Tax', 'Radio', 'Radio_cassette', 'Sport_Model', 'Tow_Bar', 'Weight', 'cc', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855DFB0FA0> Processed 34 models on 33 predictors in 0.1884934902191162 Selected predictors: ['ABS', 'Age_08_04', 'Airbag_2', 'Airco', 'Automatic', 'Automatic_airco', 'BOVAG_Guarantee', 'Backseat_Divider', 'Boardcomputer', 'CD_Player', 'CNG', 'Cylinders', 'Diesel', 'Doors', 'Gears', 'Guarantee_Period', 'HP', 'KM', 'Metallic_Rim', 'Mfg_Month', 'Mfg_Year', 'Mfr_Guarantee', 'Mistlamps', 'Petrol', 'Power_Steering', 'Powered_Windows', 'Quarterly_Tax', 'Radio', 'Radio_cassette', 'Sport_Model', 'Tow_Bar', 'Weight', 'cc', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD736D0> Processed 33 models on 32 predictors in 0.1685488224029541 Selected predictors: ['ABS', 'Age_08_04', 'Airbag_2', 'Airco', 'Automatic', 'Automatic_airco', 'BOVAG_Guarantee', 'Backseat_Divider', 'Boardcomputer', 'CD_Player', 'CNG', 'Cylinders', 'Diesel', 'Doors', 'Gears', 'Guarantee_Period', 'HP', 'KM', 'Metallic_Rim', 'Mfg_Month', 'Mfg_Year', 'Mfr_Guarantee', 'Mistlamps', 'Petrol', 'Powered_Windows', 'Quarterly_Tax', 'Radio', 'Radio_cassette', 'Sport_Model', 'Tow_Bar', 'Weight', 'cc', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD70C10> Processed 32 models on 31 predictors in 0.17054438591003418 Selected predictors: ['ABS', 'Age_08_04', 'Airbag_2', 'Airco', 'Automatic', 'Automatic_airco', 'BOVAG_Guarantee', 'Backseat_Divider', 'Boardcomputer', 'CD_Player', 'CNG', 'Cylinders', 'Diesel', 'Doors', 'Gears', 'Guarantee_Period', 'HP', 'KM', 'Metallic_Rim', 'Mfg_Month', 'Mfg_Year', 'Mfr_Guarantee', 'Petrol', 'Powered_Windows', 'Quarterly_Tax', 'Radio', 'Radio_cassette', 'Sport_Model', 'Tow_Bar', 'Weight', 'cc', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855DFB00A0> Processed 31 models on 30 predictors in 0.15112566947937012 Selected predictors: ['Age_08_04', 'Airbag_2', 'Airco', 'Automatic', 'Automatic_airco', 'BOVAG_Guarantee', 'Backseat_Divider', 'Boardcomputer', 'CD_Player', 'CNG', 'Cylinders', 'Diesel', 'Doors', 'Gears', 'Guarantee_Period', 'HP', 'KM', 'Metallic_Rim', 'Mfg_Month', 'Mfg_Year', 'Mfr_Guarantee', 'Petrol', 'Powered_Windows', 'Quarterly_Tax', 'Radio', 'Radio_cassette', 'Sport_Model', 'Tow_Bar', 'Weight', 'cc', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD7A6A0> Processed 30 models on 29 predictors in 0.1570906639099121 Selected predictors: ['Age_08_04', 'Airbag_2', 'Airco', 'Automatic', 'Automatic_airco', 'BOVAG_Guarantee', 'Backseat_Divider', 'Boardcomputer', 'CNG', 'Cylinders', 'Diesel', 'Doors', 'Gears', 'Guarantee_Period', 'HP', 'KM', 'Metallic_Rim', 'Mfg_Month', 'Mfg_Year', 'Mfr_Guarantee', 'Petrol', 'Powered_Windows', 'Quarterly_Tax', 'Radio', 'Radio_cassette', 'Sport_Model', 'Tow_Bar', 'Weight', 'cc', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD707C0> Processed 29 models on 28 predictors in 0.12566542625427246 Selected predictors: ['Airbag_2', 'Airco', 'Automatic', 'Automatic_airco', 'BOVAG_Guarantee', 'Backseat_Divider', 'Boardcomputer', 'CNG', 'Cylinders', 'Diesel', 'Doors', 'Gears', 'Guarantee_Period', 'HP', 'KM', 'Metallic_Rim', 'Mfg_Month', 'Mfg_Year', 'Mfr_Guarantee', 'Petrol', 'Powered_Windows', 'Quarterly_Tax', 'Radio', 'Radio_cassette', 'Sport_Model', 'Tow_Bar', 'Weight', 'cc', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD7AA30> Processed 28 models on 27 predictors in 0.1406254768371582 Selected predictors: ['Airbag_2', 'Airco', 'Automatic', 'Automatic_airco', 'BOVAG_Guarantee', 'Backseat_Divider', 'Boardcomputer', 'CNG', 'Cylinders', 'Diesel', 'Doors', 'Gears', 'Guarantee_Period', 'HP', 'KM', 'Metallic_Rim', 'Mfg_Month', 'Mfg_Year', 'Mfr_Guarantee', 'Powered_Windows', 'Quarterly_Tax', 'Radio', 'Radio_cassette', 'Sport_Model', 'Tow_Bar', 'Weight', 'cc', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD895E0> Total elapsed time: 1.638221263885498 seconds.
In [41]:
Backward_best_model.aic
Out[41]:
16865.19260343656
In [42]:
Backward_best_model.summary()
Out[42]:
Dep. Variable: | Price | R-squared: | 0.917 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.915 |
Method: | Least Squares | F-statistic: | 417.1 |
Date: | Sat, 21 Aug 2021 | Prob (F-statistic): | 0.00 |
Time: | 18:36:53 | Log-Likelihood: | -8405.6 |
No. Observations: | 1005 | AIC: | 1.687e+04 |
Df Residuals: | 978 | BIC: | 1.700e+04 |
Df Model: | 26 | ||
Covariance Type: | nonrobust |
coef | std err | t | P>|t| | [0.025 | 0.975] | |
---|---|---|---|---|---|---|
Airbag_2 | -340.3583 | 105.018 | -3.241 | 0.001 | -546.445 | -134.272 |
Airco | 376.9936 | 89.022 | 4.235 | 0.000 | 202.297 | 551.691 |
Automatic | 491.0042 | 152.182 | 3.226 | 0.001 | 192.363 | 789.645 |
Automatic_airco | 2493.5066 | 185.621 | 13.433 | 0.000 | 2129.246 | 2857.767 |
BOVAG_Guarantee | 509.2730 | 130.752 | 3.895 | 0.000 | 252.686 | 765.860 |
Backseat_Divider | -178.4346 | 125.428 | -1.423 | 0.155 | -424.574 | 67.705 |
Boardcomputer | -282.9175 | 116.550 | -2.427 | 0.015 | -511.635 | -54.201 |
CNG | -5.712e+04 | 1735.127 | -32.918 | 0.000 | -6.05e+04 | -5.37e+04 |
Cylinders | -6.593e+05 | 2.05e+04 | -32.155 | 0.000 | -7e+05 | -6.19e+05 |
Diesel | -5.289e+04 | 1761.075 | -30.033 | 0.000 | -5.63e+04 | -4.94e+04 |
Doors | 66.4206 | 39.867 | 1.666 | 0.096 | -11.813 | 144.655 |
Gears | -360.9511 | 225.800 | -1.599 | 0.110 | -804.060 | 82.158 |
Guarantee_Period | 62.5243 | 13.450 | 4.649 | 0.000 | 36.130 | 88.919 |
HP | 53.5720 | 6.063 | 8.836 | 0.000 | 41.674 | 65.470 |
KM | -0.0161 | 0.001 | -12.218 | 0.000 | -0.019 | -0.014 |
Metallic_Rim | 135.2385 | 91.523 | 1.478 | 0.140 | -44.366 | 314.843 |
Mfg_Month | 15.9832 | 10.469 | 1.527 | 0.127 | -4.561 | 36.528 |
Mfg_Year | 1430.0774 | 44.515 | 32.126 | 0.000 | 1342.721 | 1517.434 |
Mfr_Guarantee | 210.9072 | 72.997 | 2.889 | 0.004 | 67.659 | 354.156 |
Petrol | -5.482e+04 | 1708.057 | -32.093 | 0.000 | -5.82e+04 | -5.15e+04 |
Powered_Windows | 326.3924 | 85.217 | 3.830 | 0.000 | 159.163 | 493.621 |
Quarterly_Tax | 14.0695 | 2.142 | 6.568 | 0.000 | 9.866 | 18.273 |
Radio | 1131.2404 | 749.266 | 1.510 | 0.131 | -339.113 | 2601.594 |
Radio_cassette | -1250.2742 | 749.233 | -1.669 | 0.095 | -2720.563 | 220.014 |
Sport_Model | 324.5078 | 86.720 | 3.742 | 0.000 | 154.329 | 494.687 |
Tow_Bar | -139.9642 | 78.488 | -1.783 | 0.075 | -293.989 | 14.061 |
Weight | 8.8000 | 1.179 | 7.462 | 0.000 | 6.486 | 11.114 |
cc | -4.2822 | 0.557 | -7.691 | 0.000 | -5.375 | -3.190 |
const | -1.648e+05 | 5125.896 | -32.155 | 0.000 | -1.75e+05 | -1.55e+05 |
Omnibus: | 93.517 | Durbin-Watson: | 1.937 |
---|---|---|---|
Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 508.785 |
Skew: | 0.201 | Prob(JB): | 3.30e-111 |
Kurtosis: | 6.462 | Cond. No. | 1.38e+16 |
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 3.04e-20. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.
단계적 선택법¶
In [43]:
def Stepwise_model(X,y):
Stepmodels = pd.DataFrame(columns=["AIC", "model"])
tic = time.time()
predictors = []
Smodel_before = processSubset(X,y,predictors+['const'])['AIC']
# 변수 1~10개 : 0~9 -> 1~10
for i in range(1, len(X.columns.difference(['const'])) + 1):
Forward_result = forward(X=X, y=y, predictors=predictors) # constant added
print('forward')
Stepmodels.loc[i] = Forward_result
predictors = Stepmodels.loc[i]["model"].model.exog_names
predictors = [ k for k in predictors if k != 'const']
Backward_result = backward(X=X, y=y, predictors=predictors)
if Backward_result['AIC']< Forward_result['AIC']:
Stepmodels.loc[i] = Backward_result
predictors = Stepmodels.loc[i]["model"].model.exog_names
Smodel_before = Stepmodels.loc[i]["AIC"]
predictors = [ k for k in predictors if k != 'const']
print('backward')
if Stepmodels.loc[i]['AIC']> Smodel_before:
break
else:
Smodel_before = Stepmodels.loc[i]["AIC"]
toc = time.time()
print("Total elapsed time:", (toc - tic), "seconds.")
return (Stepmodels['model'][len(Stepmodels['model'])])
In [44]:
Stepwise_best_model=Stepwise_model(X=train_x,y=train_y)
Processed 36 models on 1 predictors in 0.1077113151550293 Selected predictors: ['Mfg_Year', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD07970> forward Processed 1 models on 0 predictors in 0.0019958019256591797 Selected predictors: ['const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD07EE0> Processed 35 models on 2 predictors in 0.07679557800292969 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD83A90> forward Processed 2 models on 1 predictors in 0.005984067916870117 Selected predictors: ['Mfg_Year', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCECB50> Processed 34 models on 3 predictors in 0.07280635833740234 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCF3610> forward Processed 3 models on 2 predictors in 0.013959884643554688 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCF3190> Processed 33 models on 4 predictors in 0.08676719665527344 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD72940> forward Processed 4 models on 3 predictors in 0.01296544075012207 Selected predictors: ['Mfg_Year', 'Weight', 'KM', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD0FFD0> Processed 32 models on 5 predictors in 0.1361548900604248 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD07370> forward Processed 5 models on 4 predictors in 0.016953229904174805 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD07CA0> Processed 31 models on 6 predictors in 0.07879066467285156 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCEC130> forward Processed 6 models on 5 predictors in 0.014961719512939453 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCF31C0> Processed 30 models on 7 predictors in 0.07380342483520508 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCECB50> forward Processed 7 models on 6 predictors in 0.02293992042541504 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD07EE0> Processed 29 models on 8 predictors in 0.0718071460723877 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD83D60> forward Processed 8 models on 7 predictors in 0.022939443588256836 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCF3DC0> Processed 28 models on 9 predictors in 0.07679390907287598 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD07820> forward Processed 9 models on 8 predictors in 0.02892303466796875 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD83E80> Processed 27 models on 10 predictors in 0.08328819274902344 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCF30A0> forward Processed 10 models on 9 predictors in 0.025931119918823242 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'Quarterly_Tax', 'cc', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCF3C40> Processed 26 models on 11 predictors in 0.10124802589416504 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD0F1F0> forward Processed 11 models on 10 predictors in 0.06633639335632324 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'Quarterly_Tax', 'cc', 'CNG', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD07490> Processed 25 models on 12 predictors in 0.10272574424743652 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD83F70> forward Processed 12 models on 11 predictors in 0.04986834526062012 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD7ADF0> Processed 24 models on 13 predictors in 0.08377742767333984 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD07EE0> forward Processed 13 models on 12 predictors in 0.04487919807434082 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD834C0> Processed 23 models on 14 predictors in 0.07480001449584961 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCEC220> forward Processed 14 models on 13 predictors in 0.055845022201538086 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD0F070> Processed 22 models on 15 predictors in 0.08078646659851074 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD83970> forward Processed 15 models on 14 predictors in 0.058841705322265625 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCF3A30> Processed 21 models on 16 predictors in 0.07579851150512695 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD0F550> forward Processed 16 models on 15 predictors in 0.057848215103149414 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD72F40> Processed 20 models on 17 predictors in 0.09674334526062012 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD07250> forward Processed 17 models on 16 predictors in 0.0578458309173584 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Automatic', 'Airbag_2', 'Sport_Model', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD83490> Processed 19 models on 18 predictors in 0.09235358238220215 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCF3E80> forward Processed 18 models on 17 predictors in 0.07180905342102051 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCF39D0> Processed 18 models on 19 predictors in 0.061835527420043945 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD071C0> forward Processed 19 models on 18 predictors in 0.06183600425720215 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD72A00> Processed 17 models on 20 predictors in 0.06781792640686035 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCEC6D0> forward Processed 20 models on 19 predictors in 0.07830309867858887 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD075B0> Processed 16 models on 21 predictors in 0.06582379341125488 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCE4100> forward Processed 21 models on 20 predictors in 0.09773707389831543 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD7A3A0> Processed 15 models on 22 predictors in 0.05684065818786621 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCF3CD0> forward Processed 22 models on 21 predictors in 0.0797874927520752 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD7AF10> Processed 14 models on 23 predictors in 0.04587578773498535 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'Cylinders', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD83400> forward Processed 23 models on 22 predictors in 0.08875894546508789 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCE4040> backward Processed 14 models on 23 predictors in 0.05585336685180664 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'Cylinders', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD0FDC0> forward Processed 23 models on 22 predictors in 0.0947411060333252 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD075B0> backward Processed 14 models on 23 predictors in 0.08277750015258789 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'Cylinders', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCE41F0> forward Processed 23 models on 22 predictors in 0.09275245666503906 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCEC760> backward Processed 14 models on 23 predictors in 0.06382966041564941 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'Cylinders', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCD50D0> forward Processed 23 models on 22 predictors in 0.0837869644165039 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCE4AC0> backward Processed 14 models on 23 predictors in 0.08377814292907715 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'Cylinders', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD0FBE0> forward Processed 23 models on 22 predictors in 0.09275412559509277 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCF36A0> backward Processed 14 models on 23 predictors in 0.052858829498291016 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'Cylinders', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD83F40> forward Processed 23 models on 22 predictors in 0.08975982666015625 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD0FF40> backward Processed 14 models on 23 predictors in 0.05186057090759277 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'Cylinders', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCD55B0> forward Processed 23 models on 22 predictors in 0.07696700096130371 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCEC730> backward Processed 14 models on 23 predictors in 0.05585050582885742 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'Cylinders', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD7A0D0> forward Processed 23 models on 22 predictors in 0.08577132225036621 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCD5E20> backward Processed 14 models on 23 predictors in 0.049866676330566406 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'Cylinders', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCE4F10> forward Processed 23 models on 22 predictors in 0.1296546459197998 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD7AF10> backward Processed 14 models on 23 predictors in 0.0947413444519043 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'Cylinders', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCD52B0> forward Processed 23 models on 22 predictors in 0.10671472549438477 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD7A2E0> backward Processed 14 models on 23 predictors in 0.06083965301513672 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'Cylinders', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855CE615B0> forward Processed 23 models on 22 predictors in 0.09175443649291992 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BD83040> backward Processed 14 models on 23 predictors in 0.06582474708557129 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'Cylinders', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCE4D30> forward Processed 23 models on 22 predictors in 0.12566256523132324 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855CE61BE0> backward Processed 14 models on 23 predictors in 0.07280635833740234 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'Cylinders', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCD5760> forward Processed 23 models on 22 predictors in 0.11980557441711426 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCE44F0> backward Processed 14 models on 23 predictors in 0.07631063461303711 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'Cylinders', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855CE61400> forward Processed 23 models on 22 predictors in 0.08976173400878906 Selected predictors: ['Mfg_Year', 'Automatic_airco', 'Weight', 'KM', 'Powered_Windows', 'HP', 'Guarantee_Period', 'BOVAG_Guarantee', 'Quarterly_Tax', 'cc', 'CNG', 'Airco', 'Petrol', 'Boardcomputer', 'Automatic', 'Airbag_2', 'Sport_Model', 'Mfr_Guarantee', 'Tow_Bar', 'Doors', 'Age_08_04', 'CD_Player', 'const'] AIC: <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000002855BCD53D0> backward Total elapsed time: 5.594517707824707 seconds.
In [45]:
Stepwise_best_model.aic
Out[45]:
16864.8106523611
In [46]:
Stepwise_best_model.summary()
Out[46]:
Dep. Variable: | Price | R-squared: | 0.917 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.915 |
Method: | Least Squares | F-statistic: | 490.9 |
Date: | Sat, 21 Aug 2021 | Prob (F-statistic): | 0.00 |
Time: | 18:42:23 | Log-Likelihood: | -8409.4 |
No. Observations: | 1005 | AIC: | 1.686e+04 |
Df Residuals: | 982 | BIC: | 1.698e+04 |
Df Model: | 22 | ||
Covariance Type: | nonrobust |
coef | std err | t | P>|t| | [0.025 | 0.975] | |
---|---|---|---|---|---|---|
Mfg_Year | 1234.2430 | 125.256 | 9.854 | 0.000 | 988.443 | 1480.043 |
Automatic_airco | 2533.8930 | 184.775 | 13.713 | 0.000 | 2171.293 | 2896.493 |
Weight | 8.6821 | 1.180 | 7.356 | 0.000 | 6.366 | 10.998 |
KM | -0.0164 | 0.001 | -12.454 | 0.000 | -0.019 | -0.014 |
Powered_Windows | 343.9188 | 83.346 | 4.126 | 0.000 | 180.361 | 507.476 |
HP | 53.2948 | 5.823 | 9.152 | 0.000 | 41.868 | 64.722 |
Guarantee_Period | 66.4777 | 12.921 | 5.145 | 0.000 | 41.121 | 91.834 |
BOVAG_Guarantee | 468.3240 | 126.943 | 3.689 | 0.000 | 219.213 | 717.435 |
Quarterly_Tax | 13.1804 | 2.072 | 6.362 | 0.000 | 9.115 | 17.246 |
cc | -4.2820 | 0.548 | -7.812 | 0.000 | -5.358 | -3.206 |
CNG | -4221.1675 | 558.615 | -7.556 | 0.000 | -5317.384 | -3124.951 |
Airco | 373.8062 | 88.400 | 4.229 | 0.000 | 200.332 | 547.281 |
Petrol | -2052.2727 | 507.945 | -4.040 | 0.000 | -3049.056 | -1055.489 |
Boardcomputer | -324.0029 | 114.709 | -2.825 | 0.005 | -549.105 | -98.901 |
Automatic | 489.6334 | 151.480 | 3.232 | 0.001 | 192.371 | 786.895 |
Airbag_2 | -376.7919 | 96.394 | -3.909 | 0.000 | -565.953 | -187.631 |
Sport_Model | 286.5235 | 83.527 | 3.430 | 0.001 | 122.612 | 450.435 |
Mfr_Guarantee | 202.5188 | 72.843 | 2.780 | 0.006 | 59.573 | 345.464 |
Tow_Bar | -152.2879 | 77.285 | -1.970 | 0.049 | -303.950 | -0.626 |
Doors | 77.6576 | 39.386 | 1.972 | 0.049 | 0.367 | 154.948 |
Age_08_04 | -15.1787 | 10.464 | -1.451 | 0.147 | -35.713 | 5.356 |
CD_Player | 141.2613 | 98.300 | 1.437 | 0.151 | -51.640 | 334.163 |
const | -2.464e+06 | 2.51e+05 | -9.821 | 0.000 | -2.96e+06 | -1.97e+06 |
Omnibus: | 94.457 | Durbin-Watson: | 1.941 |
---|---|---|---|
Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 520.088 |
Skew: | 0.202 | Prob(JB): | 1.16e-113 |
Kurtosis: | 6.501 | Cond. No. | 5.72e+08 |
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 5.72e+08. This might indicate that there are
strong multicollinearity or other numerical problems.
성능평가¶
In [47]:
# the number of params
print(Forward_best_model.params.shape, Backward_best_model.params.shape, Stepwise_best_model.params.shape)
(23,) (29,) (23,)
In [48]:
# 모델에 의해 예측된/추정된 값 <-> test_y
pred_y_full = fitted_full_model.predict(test_x)
pred_y_forward = Forward_best_model.predict(test_x[Forward_best_model.model.exog_names])
pred_y_backward = Backward_best_model.predict(test_x[Backward_best_model.model.exog_names])
pred_y_stepwise = Stepwise_best_model.predict(test_x[Stepwise_best_model.model.exog_names])
In [49]:
perf_mat = pd.DataFrame(columns=["ALL", "FORWARD", "BACKWARD", "STEPWISE"],
index =['MSE', 'RMSE','MAE', 'MAPE'])
In [50]:
def mean_absolute_percentage_error(y_true, y_pred):
y_true, y_pred = np.array(y_true), np.array(y_pred)
return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
from sklearn import metrics
In [51]:
# 성능지표
perf_mat.loc['MSE']['ALL'] = metrics.mean_squared_error(test_y,pred_y_full)
perf_mat.loc['MSE']['FORWARD'] = metrics.mean_squared_error(test_y,pred_y_forward)
perf_mat.loc['MSE']['BACKWARD'] = metrics.mean_squared_error(test_y,pred_y_backward)
perf_mat.loc['MSE']['STEPWISE'] = metrics.mean_squared_error(test_y,pred_y_stepwise)
perf_mat.loc['RMSE']['ALL'] = np.sqrt(metrics.mean_squared_error(test_y, pred_y_full))
perf_mat.loc['RMSE']['FORWARD'] = np.sqrt(metrics.mean_squared_error(test_y, pred_y_forward))
perf_mat.loc['RMSE']['BACKWARD'] = np.sqrt(metrics.mean_squared_error(test_y, pred_y_backward))
perf_mat.loc['RMSE']['STEPWISE'] = np.sqrt(metrics.mean_squared_error(test_y, pred_y_stepwise))
perf_mat.loc['MAE']['ALL'] = metrics.mean_absolute_error(test_y, pred_y_full)
perf_mat.loc['MAE']['FORWARD'] = metrics.mean_absolute_error(test_y, pred_y_forward)
perf_mat.loc['MAE']['BACKWARD'] = metrics.mean_absolute_error(test_y, pred_y_backward)
perf_mat.loc['MAE']['STEPWISE'] = metrics.mean_absolute_error(test_y, pred_y_stepwise)
perf_mat.loc['MAPE']['ALL'] = mean_absolute_percentage_error(test_y, pred_y_full)
perf_mat.loc['MAPE']['FORWARD'] = mean_absolute_percentage_error(test_y, pred_y_forward)
perf_mat.loc['MAPE']['BACKWARD'] = mean_absolute_percentage_error(test_y, pred_y_backward)
perf_mat.loc['MAPE']['STEPWISE'] = mean_absolute_percentage_error(test_y, pred_y_stepwise)
print(perf_mat)
ALL FORWARD BACKWARD STEPWISE MSE 10489299.682201 10238833.370929 10265702.069152 10238833.370929 RMSE 3238.718833 3199.817709 3204.013431 3199.817709 MAE 995.325527 984.477736 996.256642 984.477736 MAPE 9.639251 9.530094 9.648756 9.530094
In [52]:
# 선택된 모델 갯수
print(len(fitted_full_model.params))
print(len(Forward_best_model.params))
print(len(Backward_best_model.params))
print(len(Stepwise_best_model.params))
37 23 29 23
In [55]:
from IPython.core.display import display, HTML
display(HTML("<style>.container {width:60% !important;}</style>"))
728x90
'Data scientist > Machine Learning' 카테고리의 다른 글
PCA + Python_Code (0) | 2021.08.23 |
---|---|
회귀분석(4)_로지스틱 회귀분석 (0) | 2021.08.21 |
회귀분석(2)_Code (0) | 2021.08.19 |
회귀분석(1) (0) | 2021.08.19 |
수학적 개념 이해(2) (0) | 2021.08.19 |