from sklearn import datasets
from sklearn.naive_bayes import GaussianNB


import pandas as pd


iris = datasets.load_iris()
df_X=pd.DataFrame(iris.data)
df_Y=pd.DataFrame(iris.target)


df_X.head()


df_Y.head()


gnb=GaussianNB()
fitted=gnb.fit(iris.data,iris.target)
y_pred=fitted.predict(iris.data)


fitted.predict_proba(iris.data)[[1,48,51,100]] # 0,1,2를 맞출 각각의 확률

array([[1.00000000e+000, 1.51480769e-017, 2.34820051e-025],
       [1.00000000e+000, 2.63876217e-018, 2.79566024e-025],
       [7.27347795e-102, 9.45169639e-001, 5.48303606e-002],
       [3.23245181e-254, 6.35381031e-011, 1.00000000e+000]])


fitted.predict(iris.data)[[1,48,51,100]]

array([0, 0, 1, 2])


from sklearn.metrics import confusion_matrix


confusion_matrix(iris.target,y_pred)

array([[50,  0,  0],
       [ 0, 47,  3],
       [ 0,  3, 47]], dtype=int64)


gnb2=GaussianNB(priors=[1/100,1/100,98/100]) #특정 범주일 확률 설정
fitted2= gnb2.fit(iris.data,iris.target)
y_pred2=fitted2.predict(iris.data)
confusion_matrix(iris.target,y_pred2)
# 2번쨰 범주의 오차가 커짐을 확인할 수 있다

array([[50,  0,  0],
       [ 0, 33, 17],
       [ 0,  0, 50]], dtype=int64)


gnb2=GaussianNB(priors=[1/100,98/100,1/100])
fitted2= gnb2.fit(iris.data,iris.target)
y_pred2=fitted2.predict(iris.data)
confusion_matrix(iris.target,y_pred2)
# 3번쨰 범주의 오차가 커짐을 확인할 수 있다

array([[50,  0,  0],
       [ 0, 50,  0],
       [ 0, 14, 36]], dtype=int64)


from sklearn.naive_bayes import MultinomialNB


import numpy as np


X = np.random.randint(5, size=(6, 100))
y = np.array([1, 2, 3, 4, 5, 6])

X

array([[2, 1, 4, 3, 1, 2, 0, 4, 4, 0, 0, 3, 3, 4, 4, 4, 2, 4, 4, 2, 3, 0,
        3, 1, 0, 3, 3, 0, 1, 2, 1, 0, 4, 1, 0, 0, 0, 3, 2, 2, 0, 0, 4, 3,
        3, 3, 2, 0, 1, 4, 3, 0, 2, 1, 0, 3, 4, 2, 4, 3, 2, 3, 4, 4, 0, 4,
        2, 1, 4, 4, 3, 4, 0, 2, 2, 0, 2, 2, 2, 2, 0, 0, 0, 3, 4, 1, 2, 4,
        3, 1, 4, 3, 1, 1, 3, 3, 3, 1, 4, 2],
       [1, 2, 1, 3, 3, 1, 4, 0, 4, 1, 2, 2, 3, 4, 1, 1, 3, 4, 2, 4, 1, 3,
        2, 2, 1, 2, 1, 0, 2, 2, 2, 2, 0, 3, 1, 3, 1, 2, 2, 1, 0, 2, 3, 1,
        1, 0, 4, 1, 0, 2, 0, 4, 2, 1, 4, 2, 1, 3, 2, 4, 3, 2, 0, 0, 2, 2,
        4, 4, 0, 0, 4, 0, 3, 0, 1, 3, 1, 4, 4, 1, 0, 1, 2, 0, 1, 4, 4, 3,
        0, 1, 3, 3, 4, 2, 2, 2, 0, 2, 3, 1],
       [4, 3, 4, 3, 3, 3, 2, 1, 3, 4, 3, 0, 2, 0, 0, 1, 1, 0, 0, 3, 4, 2,
        0, 2, 1, 2, 4, 2, 4, 1, 3, 4, 1, 3, 1, 0, 0, 2, 1, 2, 3, 0, 2, 3,
        2, 3, 1, 4, 0, 2, 2, 1, 4, 2, 4, 0, 4, 4, 1, 4, 4, 4, 1, 1, 0, 0,
        3, 4, 4, 4, 4, 0, 4, 3, 4, 2, 0, 0, 2, 4, 4, 3, 3, 4, 2, 0, 0, 3,
        3, 0, 3, 0, 3, 4, 0, 1, 1, 4, 3, 2],
       [1, 1, 1, 1, 2, 4, 0, 3, 0, 2, 4, 1, 0, 0, 1, 2, 0, 2, 0, 3, 1, 4,
        1, 4, 0, 1, 3, 4, 4, 0, 2, 3, 2, 3, 0, 2, 3, 0, 1, 0, 3, 4, 3, 4,
        0, 0, 2, 3, 3, 3, 4, 3, 2, 3, 0, 3, 0, 4, 0, 2, 1, 3, 1, 0, 0, 4,
        4, 2, 1, 4, 4, 4, 3, 0, 1, 2, 3, 0, 3, 0, 3, 2, 4, 0, 4, 4, 3, 1,
        4, 0, 2, 1, 2, 4, 4, 2, 3, 0, 0, 4],
       [0, 2, 4, 2, 2, 2, 4, 4, 4, 2, 2, 4, 0, 3, 3, 3, 0, 3, 4, 2, 2, 2,
        2, 3, 4, 0, 4, 4, 0, 4, 0, 4, 0, 0, 4, 3, 4, 2, 3, 2, 4, 4, 3, 0,
        3, 3, 3, 3, 3, 1, 1, 2, 4, 4, 2, 1, 3, 0, 1, 2, 0, 0, 1, 2, 0, 0,
        4, 1, 3, 1, 0, 3, 3, 2, 0, 0, 2, 4, 0, 0, 4, 0, 3, 4, 0, 1, 1, 2,
        1, 0, 3, 1, 4, 4, 4, 1, 4, 1, 0, 0],
       [3, 4, 2, 4, 4, 2, 0, 4, 3, 0, 1, 1, 1, 1, 4, 3, 4, 2, 2, 2, 1, 0,
        0, 2, 4, 0, 2, 2, 4, 1, 4, 1, 4, 2, 3, 4, 3, 0, 2, 3, 4, 3, 3, 2,
        1, 0, 3, 3, 4, 3, 3, 3, 1, 3, 0, 0, 3, 3, 1, 3, 4, 4, 0, 3, 1, 0,
        2, 4, 0, 4, 0, 0, 2, 4, 1, 2, 1, 3, 3, 2, 3, 1, 0, 1, 1, 4, 1, 1,
        2, 3, 4, 2, 2, 4, 2, 0, 2, 3, 2, 1]])

y

array([1, 2, 3, 4, 5, 6])


clf = MultinomialNB()
clf.fit(X, y)

MultinomialNB()


print(clf.predict(X[2:3]))

[3]


clf.predict_proba(X[2:3])

array([[1.50000380e-41, 4.53274426e-37, 1.00000000e+00, 1.14271194e-36,
        4.86645418e-44, 1.06114129e-33]])


clf2 = MultinomialNB(class_prior=[0.1,0.1999,0.0001,0.1,0.1,0.1])
clf2.fit(X, y)

MultinomialNB(class_prior=[0.1, 0.1999, 0.0001, 0.1, 0.1, 0.1])


clf2.predict_proba(X[2:3]) # 2번째에 prior를 높게 주어 값이 높아졌다.

array([[1.50000380e-38, 9.06095578e-34, 1.00000000e+00, 1.14271194e-33,
        4.86645418e-41, 1.06114129e-30]])


from IPython.core.display import display, HTML
display(HTML("<style>.container {width:80% !important;}</style>"))

	0	1	2	3
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

LDA + Python_Code (0)	2021.08.25
K-NN + Python_Code (0)	2021.08.25
PCA + Python_Code (0)	2021.08.23
회귀분석(4)_로지스틱 회귀분석 (0)	2021.08.21
회귀분석(3)_변수선택법 Code (0)	2021.08.21

#wannabeeeeeee the best DataScientist

#wannabeeeeeee the best DataScientist

Naive Bayes + Python_Code 본문

Naive Bayes + Python_Code

◎ 수학적 개념 이해

◎ Naive Bayes Classifier

Naive Bayes 실습¶

1. Gaussian Naive Bayes¶

2. Multinomial naive bayes¶

'Data scientist > Machine Learning' 카테고리의 다른 글

티스토리툴바

	0	1	2	3
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

« 2025/08 »
일	월	화	수	목	금	토
					1	2
3	4	5	6	7	8	9
10	11	12	13	14	15	16
17	18	19	20	21	22	23
24	25	26	27	28	29	30
31

	0	1	2	3
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	0	1	2	3
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2