Kaggle - Titanic 생존 모델 예측
import numpy as np
import pandas as pd
import random as rd
import matplotlib.pyplot as plt
import seaborn as sns
# 머신러닝에 필요한 csv 파일 다운로드
train_set = pd.read_csv("/kaggle/input/titanic/train.csv")
test_set = pd.read_csv("/kaggle/input/titanic/test.csv")
gender_submission = pd.read_csv("/kaggle/input/titanic/gender_submission.csv")
# 데이터 프레임의 처음 5개 샘플
train_set.head(5)
| PassengerId | Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
| 1 | 2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
| 2 | 3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
| 3 | 4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
| 4 | 5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
데이터셋 기본 정보 확인 결과 Age 특성과 Cabin 특성에서 null값이 존재하고, Embarked 특성에서 2개의 null 값이 존재한다.
# 데이터셋 기본 정보 확인
train_set.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 891 entries, 0 to 890 Data columns (total 12 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 PassengerId 891 non-null int64 1 Survived 891 non-null int64 2 Pclass 891 non-null int64 3 Name 891 non-null object 4 Sex 891 non-null object 5 Age 714 non-null float64 6 SibSp 891 non-null int64 7 Parch 891 non-null int64 8 Ticket 891 non-null object 9 Fare 891 non-null float64 10 Cabin 204 non-null object 11 Embarked 889 non-null object dtypes: float64(2), int64(5), object(5) memory usage: 83.7+ KB
Age 특성은 수치형 특성임과 동시에 null 값이 비교적 적으므로 평균값으로 대체한다.
train_set["Age"].fillna(train_set["Age"].mean(), inplace=True)
데이터 탐색 및 시각화
# 훈련셋 복사
titanic = train_set.copy()
# 빈부격차(Pclass)에 따른 생존율
# Pclass 등급이 높을수록 생존율이 높음
sns.barplot(x = "Pclass", y = "Survived", data = titanic)
<AxesSubplot:xlabel='Pclass', ylabel='Survived'>
# 빈부격차에 따른 생존율 확인 후, 성별에 따른 생존율 추가
# 남성에 비해 여성의 생존율이 높음
sns.barplot(x = "Pclass", y = "Survived", hue = "Sex", data = titanic)
<AxesSubplot:xlabel='Pclass', ylabel='Survived'>
# 탑승한 항구에 따른 생존율
# C에서 탑승한 승객들의 생존율이 가장 높음
sns.barplot(x = "Embarked", y = "Survived", data = titanic)
<AxesSubplot:xlabel='Embarked', ylabel='Survived'>
# 탑승자의 나이중 가장 많은 나이와 가장 적은 나이
print(max(titanic["Age"]))
print(min(titanic["Age"]))
80.0 0.42
# 먼저 나이는 값이 많으므로 크게 분류를 지정
def change_age(age):
if isinstance(age, pd.Series):
age_values = []
for a in age:
if a <= 6:
age_values.append("Baby")
elif a <= 13:
age_values.append("Kids")
elif a <= 19:
age_values.append("Teen")
elif a <= 38:
age_values.append("Young Adult")
elif a <= 60:
age_values.append("Middle-aged")
elif a <= 80:
age_values.append("Senior")
return age_values
else:
if age <= 6:
return "Baby"
elif age <= 13:
return "Kids"
elif age <= 19:
return "Teen"
elif age <= 38:
return "Young Adult"
elif age <= 60:
return "Middle-aged"
elif age <= 80:
return "Senior"
titanic["Age"] = change_age(titanic["Age"])
titanic
| PassengerId | Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 0 | 3 | Braund, Mr. Owen Harris | male | Young Adult | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
| 1 | 2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | Young Adult | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
| 2 | 3 | 1 | 3 | Heikkinen, Miss. Laina | female | Young Adult | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
| 3 | 4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | Young Adult | 1 | 0 | 113803 | 53.1000 | C123 | S |
| 4 | 5 | 0 | 3 | Allen, Mr. William Henry | male | Young Adult | 0 | 0 | 373450 | 8.0500 | NaN | S |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 886 | 887 | 0 | 2 | Montvila, Rev. Juozas | male | Young Adult | 0 | 0 | 211536 | 13.0000 | NaN | S |
| 887 | 888 | 1 | 1 | Graham, Miss. Margaret Edith | female | Teen | 0 | 0 | 112053 | 30.0000 | B42 | S |
| 888 | 889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | female | Young Adult | 1 | 2 | W./C. 6607 | 23.4500 | NaN | S |
| 889 | 890 | 1 | 1 | Behr, Mr. Karl Howell | male | Young Adult | 0 | 0 | 111369 | 30.0000 | C148 | C |
| 890 | 891 | 0 | 3 | Dooley, Mr. Patrick | male | Young Adult | 0 | 0 | 370376 | 7.7500 | NaN | Q |
891 rows × 12 columns
# 확인 결과 일반적으로 나이가 적으면 생존율이 높음
sns.barplot(x = "Age", y = "Survived", data = titanic)
<AxesSubplot:xlabel='Age', ylabel='Survived'>
데이터 전처리
현재 학습할 데이터의 특성 중 Embarked에 결측값이 존재한다.
Age 특성에도 결측값이 존재했으나, 미리 앞에서 처리하였다.
train_set.isnull().sum()
PassengerId 0 Survived 0 Pclass 0 Name 0 Sex 0 Age 0 SibSp 0 Parch 0 Ticket 0 Fare 0 Cabin 687 Embarked 2 dtype: int64
Embarked의 결측값에 해당하는 샘플을 삭제하여 결측값을 제거한다.
train_set.dropna(subset = ["Embarked"], inplace = True)
이제 사용할 Pclass, Sex, Age, Embarked 특성을 이용한다.
해당 특성들 중 Sex, Embarked 특성의 값들은 object, 즉 문자열로 되어있는 데이터이므로 이를 처리한다.
from sklearn.preprocessing import OneHotEncoder
# Sex 특성 변환
train_set_sex = train_set[["Sex"]]
sex_encoder = OneHotEncoder()
sex_1hot = sex_encoder.fit_transform(train_set_sex)
# Embarked 특성 변환
train_set_embarked = train_set[["Embarked"]]
embarked_encoder = OneHotEncoder()
embarked_1hot = embarked_encoder.fit_transform(train_set_embarked)
embarked_output = pd.DataFrame(embarked_1hot.toarray(),
columns = embarked_encoder.get_feature_names_out(),
index = train_set_embarked.index)
embarked_output
| Embarked_C | Embarked_Q | Embarked_S | |
|---|---|---|---|
| 0 | 0.0 | 0.0 | 1.0 |
| 1 | 1.0 | 0.0 | 0.0 |
| 2 | 0.0 | 0.0 | 1.0 |
| 3 | 0.0 | 0.0 | 1.0 |
| 4 | 0.0 | 0.0 | 1.0 |
| ... | ... | ... | ... |
| 886 | 0.0 | 0.0 | 1.0 |
| 887 | 0.0 | 0.0 | 1.0 |
| 888 | 0.0 | 0.0 | 1.0 |
| 889 | 1.0 | 0.0 | 0.0 |
| 890 | 0.0 | 1.0 | 0.0 |
889 rows × 3 columns
sex_output = pd.DataFrame(sex_1hot.toarray(),
columns = sex_encoder.get_feature_names_out(),
index = train_set_sex.index)
sex_output
| Sex_female | Sex_male | |
|---|---|---|
| 0 | 0.0 | 1.0 |
| 1 | 1.0 | 0.0 |
| 2 | 1.0 | 0.0 |
| 3 | 1.0 | 0.0 |
| 4 | 0.0 | 1.0 |
| ... | ... | ... |
| 886 | 0.0 | 1.0 |
| 887 | 1.0 | 0.0 |
| 888 | 1.0 | 0.0 |
| 889 | 0.0 | 1.0 |
| 890 | 0.0 | 1.0 |
889 rows × 2 columns
# 원핫인코딩된 Embarked 특성 추가 및 원래의 열 삭제
train_set = pd.concat([train_set, embarked_output, sex_output], axis=1)
train_set = train_set.drop(["Embarked", "Sex"], axis=1)
# 마찬가지로 사용하지 않는 특성들도 삭제한다.
# 같이 삭제하면서 결측치가 많은 Cabin 특성도 삭제
# Name, Ticket 특성 삭제
train_set = train_set.drop(["Name", "Ticket", "Cabin"], axis=1)
최종적으로 훈련에 사용될 데이터셋이다.
train_set
| PassengerId | Survived | Pclass | Age | SibSp | Parch | Fare | Embarked_C | Embarked_Q | Embarked_S | Sex_female | Sex_male | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 0 | 3 | 22.000000 | 1 | 0 | 7.2500 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 |
| 1 | 2 | 1 | 1 | 38.000000 | 1 | 0 | 71.2833 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 |
| 2 | 3 | 1 | 3 | 26.000000 | 0 | 0 | 7.9250 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 |
| 3 | 4 | 1 | 1 | 35.000000 | 1 | 0 | 53.1000 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 |
| 4 | 5 | 0 | 3 | 35.000000 | 0 | 0 | 8.0500 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 886 | 887 | 0 | 2 | 27.000000 | 0 | 0 | 13.0000 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 |
| 887 | 888 | 1 | 1 | 19.000000 | 0 | 0 | 30.0000 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 |
| 888 | 889 | 0 | 3 | 29.699118 | 1 | 2 | 23.4500 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 |
| 889 | 890 | 1 | 1 | 26.000000 | 0 | 0 | 30.0000 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 |
| 890 | 891 | 0 | 3 | 32.000000 | 0 | 0 | 7.7500 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 |
889 rows × 12 columns
모델 선택 및 훈련
# 입력 및 타깃 데이터셋 지정
X_titanic = train_set.drop("Survived", axis = 1)
y_titanic = train_set["Survived"]
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_titanic, y_titanic, test_size = 0.2, random_state = 15)
사용할 모델로 로지스틱 회귀, 랜덤 포레스트, 결정 트리를 사용한다.
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
LR_clf = LogisticRegression(max_iter = 10000, random_state = 10)
RF_clf = RandomForestClassifier(random_state = 10)
DT_clf = DecisionTreeClassifier(random_state = 10)
# 로지스틱 회귀
LR_clf.fit(X_train, y_train)
LR_clf.score(X_test, y_test)
0.8258426966292135
# 랜덤 포레스트
RF_clf.fit(X_train, y_train)
RF_clf.score(X_test, y_test)
0.8426966292134831
# 결정 트리
DT_clf.fit(X_train, y_train)
DT_clf.score(X_test, y_test)
0.8033707865168539
교차 검증
# 로지스틱 회귀에 대한 cross_val_score
from sklearn.model_selection import cross_val_score
LR_rmses = cross_val_score(LR_clf, X_train, y_train, cv=10)
np.mean(LR_rmses)
0.7905712050078246
# 랜덤 포레스트에 대한 cross_val_score
RF_rmses = cross_val_score(RF_clf, X_train, y_train, cv= 10)
np.mean(RF_rmses)
0.8115610328638498
# 결정 트리에 대한 cross_val_score
DT_rmses = cross_val_score(DT_clf, X_train, y_train, cv = 10)
np.mean(DT_rmses)
0.7355829420970267
모델 미세 조정
# 결정 트리에 대한 모델 조정
from sklearn.model_selection import GridSearchCV
DT_param_grid = {"max_depth" : [2, 3, 5, 10],
"min_samples_split" : [2, 3, 5],
"min_samples_leaf" : [1, 5, 8]}
DT_grid_search = GridSearchCV(DT_clf, DT_param_grid, cv=5,
scoring="accuracy")
DT_grid_search.fit(X_train, y_train)
GridSearchCV(cv=5, estimator=DecisionTreeClassifier(random_state=10),
param_grid={'max_depth': [2, 3, 5, 10],
'min_samples_leaf': [1, 5, 8],
'min_samples_split': [2, 3, 5]},
scoring='accuracy')
# 최적의 하이퍼 파라미터
DT_grid_search.best_params_
{'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 2}
# 정확도
DT_grid_search.best_score_
0.7961095242785384
# 랜덤 포레스트에 대한 모델 조정
from sklearn.model_selection import GridSearchCV
RF_param_grid = {"max_depth" : [2, 3, 5, 10],
"min_samples_split" : [2, 3, 5],
"min_samples_leaf" : [1, 5, 8]}
RF_grid_search = GridSearchCV(RF_clf, RF_param_grid, cv=5,
scoring="accuracy")
RF_grid_search.fit(X_train, y_train)
GridSearchCV(cv=5, estimator=RandomForestClassifier(random_state=10),
param_grid={'max_depth': [2, 3, 5, 10],
'min_samples_leaf': [1, 5, 8],
'min_samples_split': [2, 3, 5]},
scoring='accuracy')
# 최적의 하이퍼 파라미터
RF_grid_search.best_params_
{'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2}
# 정확도
RF_grid_search.best_score_
0.8017334777898159
# 로지스틱 회귀에 대한 모델 조정
from sklearn.model_selection import GridSearchCV
LR_param_grid = {"penalty" : ['l1', 'l2'],
"C" : [0.1, 1, 10, 100],
"solver" : ['liblinear'],
"l1_ratio" : [0.2, 0.5, 0.8]}
LR_grid_search = GridSearchCV(LR_clf, LR_param_grid, cv=5,
scoring="accuracy")
LR_grid_search.fit(X_train, y_train)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)
"(penalty={})".format(self.penalty)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:1479: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)
"(penalty={})".format(self.penalty)
GridSearchCV(cv=5,
estimator=LogisticRegression(max_iter=10000, random_state=10),
param_grid={'C': [0.1, 1, 10, 100], 'l1_ratio': [0.2, 0.5, 0.8],
'penalty': ['l1', 'l2'], 'solver': ['liblinear']},
scoring='accuracy')
LR_grid_search.best_params_
{'C': 10, 'l1_ratio': 0.2, 'penalty': 'l1', 'solver': 'liblinear'}
LR_grid_search.best_score_
0.7933221707869595
평가
from sklearn.metrics import accuracy_score
# 결정트리에 대한 예측
best_DT = DT_grid_search.best_estimator_
DT_predict = best_DT.predict(X_test)
accuracy = accuracy_score(y_test, DT_predict)
accuracy
0.848314606741573
# 랜덤 포레스트에 대한 예측
best_RF = RF_grid_search.best_estimator_
RF_predict = best_RF.predict(X_test)
accuracy = accuracy_score(y_test, RF_predict)
accuracy
0.8539325842696629
# 로지스틱 회귀에 대한 예측
best_LR = LR_grid_search.best_estimator_
LR_predict = best_LR.predict(X_test)
accuracy = accuracy_score(y_test, LR_predict)
accuracy
0.8202247191011236
test.csv 파일 적용
test_set.isnull().sum()
PassengerId 0 Pclass 0 Name 0 Sex 0 Age 86 SibSp 0 Parch 0 Ticket 0 Fare 1 Cabin 327 Embarked 0 dtype: int64
test_set.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 418 entries, 0 to 417 Data columns (total 11 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 PassengerId 418 non-null int64 1 Pclass 418 non-null int64 2 Name 418 non-null object 3 Sex 418 non-null object 4 Age 332 non-null float64 5 SibSp 418 non-null int64 6 Parch 418 non-null int64 7 Ticket 418 non-null object 8 Fare 417 non-null float64 9 Cabin 91 non-null object 10 Embarked 418 non-null object dtypes: float64(2), int64(4), object(5) memory usage: 36.0+ KB
# test.csv 전처리
# Age 특성 중 결측치 처리
test_set["Age"].fillna(test_set["Age"].mean(), inplace=True)
# Fare 특성 결측치 처리
test_set["Fare"].fillna(test_set["Fare"].mean(), inplace=True)
# 필요없는 특성들 삭제
test_set = test_set.drop(["Name", "Ticket", "Cabin"], axis=1)
# Sex 원핫인코딩
test_set_sex = test_set[["Sex"]]
test_sex_encoder = OneHotEncoder()
test_sex_1hot = test_sex_encoder.fit_transform(test_set_sex)
# Embarked 원핫 인코딩
test_set_embarked = test_set[["Embarked"]]
test_embarked_encoder = OneHotEncoder()
test_embarked_1hot = test_embarked_encoder.fit_transform(test_set_embarked)
# 원핫 인코딩된 특성들 데이터에 추가
test_embarked_output = pd.DataFrame(test_embarked_1hot.toarray(),
columns = test_embarked_encoder.get_feature_names_out(),
index = test_set_embarked.index)
test_sex_output = pd.DataFrame(test_sex_1hot.toarray(),
columns = test_sex_encoder.get_feature_names_out(),
index = test_set_sex.index)
test_set = pd.concat([test_set, test_embarked_output, test_sex_output], axis=1)
test_set = test_set.drop(["Embarked", "Sex"], axis=1)
test_set
| PassengerId | Pclass | Age | SibSp | Parch | Fare | Embarked_C | Embarked_Q | Embarked_S | Sex_female | Sex_male | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 892 | 3 | 34.50000 | 0 | 0 | 7.8292 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 |
| 1 | 893 | 3 | 47.00000 | 1 | 0 | 7.0000 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 |
| 2 | 894 | 2 | 62.00000 | 0 | 0 | 9.6875 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 |
| 3 | 895 | 3 | 27.00000 | 0 | 0 | 8.6625 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 |
| 4 | 896 | 3 | 22.00000 | 1 | 1 | 12.2875 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 413 | 1305 | 3 | 30.27259 | 0 | 0 | 8.0500 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 |
| 414 | 1306 | 1 | 39.00000 | 0 | 0 | 108.9000 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 |
| 415 | 1307 | 3 | 38.50000 | 0 | 0 | 7.2500 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 |
| 416 | 1308 | 3 | 30.27259 | 0 | 0 | 8.0500 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 |
| 417 | 1309 | 3 | 30.27259 | 1 | 1 | 22.3583 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 |
418 rows × 11 columns
# 결측치 처리 확인
test_set.isnull().sum()
PassengerId 0 Pclass 0 Age 0 SibSp 0 Parch 0 Fare 0 Embarked_C 0 Embarked_Q 0 Embarked_S 0 Sex_female 0 Sex_male 0 dtype: int64
test_RF = best_RF.predict(test_set)
PassengerId = np.array(test_set["PassengerId"]).astype(int)
dt_solution = pd.DataFrame(
{"PassengerId": PassengerId, "Survived": test_RF}
).set_index("PassengerId")
dt_solution.to_csv("my_solution_titanic.csv")
dt_solution
| Survived | |
|---|---|
| PassengerId | |
| 892 | 0 |
| 893 | 0 |
| 894 | 0 |
| 895 | 0 |
| 896 | 0 |
| ... | ... |
| 1305 | 0 |
| 1306 | 1 |
| 1307 | 0 |
| 1308 | 0 |
| 1309 | 1 |
418 rows × 1 columns