In Short ->
How to write this expression correctly?? ->
[(self._mean,self._var,self._priors)] = [ ([X[y==c].mean(axis=0)] , [X[y==c].var(axis=0)],[X[y==c].shape[0] / n_samples ]) for c in self.classes]
A Minimal Reproducable Example of this problem is , to generate the same error is :
from sklearn.model_selection import train_test_split
from sklearn import datasets
import numpy as np
import matplotlib.pyplot as plt
import time
X,y = datasets.make_classification(n_samples=1000,n_classes = 2, n_features=10,random_state= 1234)
Classes = [0,1,2,3,4,5,6,7,8,9]
[[_mean, _var]] = [[ (np.mean(X[i%10==c]),np.var(X[i%10==c])) for c in Classes ] for i in range(len(X)) ]
print(_mean)
print(_var)
with the error stack as :
/bin/python3 "/home/vivek/Documents/GitHub/ML-Coding-Playground/LecturesSeries1/Lecture 5 - Naive Bayes/CodeSample.py" ─╯
/home/vivek/.local/lib/python3.8/site-packages/numpy/core/fromnumeric.py:3474: RuntimeWarning: Mean of empty slice.
return _methods._mean(a, axis=axis, dtype=dtype,
/home/vivek/.local/lib/python3.8/site-packages/numpy/core/_methods.py:189: RuntimeWarning: invalid value encountered in double_scalars
ret = ret.dtype.type(ret / rcount)
/home/vivek/.local/lib/python3.8/site-packages/numpy/core/fromnumeric.py:3757: RuntimeWarning: Degrees of freedom <= 0 for slice
return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
/home/vivek/.local/lib/python3.8/site-packages/numpy/core/_methods.py:222: RuntimeWarning: invalid value encountered in true_divide
arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
/home/vivek/.local/lib/python3.8/site-packages/numpy/core/_methods.py:256: RuntimeWarning: invalid value encountered in double_scalars
ret = ret.dtype.type(ret / rcount)
Traceback (most recent call last):
File "/home/vivek/Documents/GitHub/ML-Coding-Playground/LecturesSeries1/Lecture 5 - Naive Bayes/CodeSample.py", line 12, in <module>
[[_mean, _var]] = [[ (np.mean(X[i%10==c]),np.var(X[i%10==c])) for c in Classes ] for i in range(len(X)) ]
ValueError: too many values to unpack (expected 1)
Context for the Line of code :
I am running a naive bayesian classifier from scratch, and have written the following script to run my code :
#script.py
#
#
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
import matplotlib.pyplot as plt
import time
from NaiveBayes import *
def accuracy (y_true, y_pred):
accuracy=np.sum(y_true==y_pred)/len(y_true)
return accuracy
X,y = datasets.make_classification(n_samples=1000,n_classes = 2, n_features=10,random_state= 1234)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=69420)
nb=NaiveBayes()
nb.fit(X_train,y_train)
y_pred=nb.predict(X_test)
print("Accuracy:",accuracy(y_test,y_pred))
print( "Confusion Matrix:")
print( np.array([[np.sum(y_test==0),np.sum(y_test==1)],[np.sum(y_pred==0),np.sum(y_pred==1)]]))
I have made a few attempts at the code for my naive bayes classifier,
- With use of for loop (working)
#NaiveBayes.py
#
#
import numpy as np
class NaiveBayes:
def fit(self,X,y):
n_samples, n_features = X.shape
self.classes = np.unique(y)
n_classes = len(self.classes)
#init mean , var, priors
self._mean = np.zeros((n_classes,n_features), dtype=np.float64)
self._var = np.zeros((n_classes,n_features), dtype=np.float64)
self._priors = np.zeros(n_classes, dtype=np.float64)
for c in self.classes:
X_c = X[y==c]
self._mean[c] = X_c.mean(axis=0)
self._var[c] = X_c.var(axis=0)
self._priors[c] = X_c.shape[0] / n_samples
#trying to use list comprehenstion to remove the loop
# self._mean = [X[y==c].mean(axis=0) for c in self.classes]
# self._var= [X[y==c].var(axis=0) for c in self.classes]
# self._priors= [X[y==c].shape[0] / n_samples for c in self.classes]
# Trying to have only one command for all three
print([c for c in self.classes]) #debugging
# [(self._mean,self._var,self._priors)] = [ ([X[y==c].mean(axis=0)] , [X[y==c].var(axis=0)],[X[y==c].shape[0] / n_samples ]) for c in self.classes]
# (self._mean,self._var,self._priors) = ([X[y==c].mean(axis=0)] , [X[y==c].var(axis=0)],[X[y==c].shape[0] / n_samples ] for c in self.classes )
#debugging
print(self._mean)
print(self._var)
print(self._priors)
def predict(self,X):
y_pred = [self._predict(x) for x in X]
return np.array(y_pred)
def _predict(self,x):
posteriors = [self._posterior(x,c,idx) for (idx,c) in enumerate(self.classes)]
return self.classes[np.argmax(posteriors)]
def _posterior(self,x,c,idx):
prior = np.log(self._priors[idx])
likelihood = np.prod((self._likelihood)(idx,x))
return prior + np.log(likelihood)
def _likelihood(self,class_idx,x): # x is a single sample , c is the class, class_idx is the id of said class , and this returns the likelihood of the sample belonging to the class, given the mean and variance of the class, and the priors of the class , IE the probability of the sample belonging to the class as the __PDF__ of the class. It is the _pdf function from the video
mean = self._mean[class_idx]
var = self._var[class_idx]
n_features = len(x)
coeff = 1.0 / np.sqrt(2 * np.pi * var)
exp = np.exp(-(x - mean)**2 / (2 * var))
return coeff * exp
- With use of three list comprehensions (working)
#NaiveBayes.py
#
#
import numpy as np
class NaiveBayes:
def fit(self,X,y):
n_samples, n_features = X.shape
self.classes = np.unique(y)
n_classes = len(self.classes)
#init mean , var, priors
self._mean = np.zeros((n_classes,n_features), dtype=np.float64)
self._var = np.zeros((n_classes,n_features), dtype=np.float64)
self._priors = np.zeros(n_classes, dtype=np.float64)
# for c in self.classes:
# X_c = X[y==c]
# self._mean[c] = X_c.mean(axis=0)
# self._var[c] = X_c.var(axis=0)
# self._priors[c] = X_c.shape[0] / n_samples
#trying to use list comprehenstion to remove the loop
self._mean = [X[y==c].mean(axis=0) for c in self.classes]
self._var= [X[y==c].var(axis=0) for c in self.classes]
self._priors= [X[y==c].shape[0] / n_samples for c in self.classes]
# Trying to have only one command for all three
print([c for c in self.classes]) #debugging
# [(self._mean,self._var,self._priors)] = [ ([X[y==c].mean(axis=0)] , [X[y==c].var(axis=0)],[X[y==c].shape[0] / n_samples ]) for c in self.classes]
# (self._mean,self._var,self._priors) = ([X[y==c].mean(axis=0)] , [X[y==c].var(axis=0)],[X[y==c].shape[0] / n_samples ] for c in self.classes )
#debugging
print(self._mean)
print(self._var)
print(self._priors)
def predict(self,X):
y_pred = [self._predict(x) for x in X]
return np.array(y_pred)
def _predict(self,x):
posteriors = [self._posterior(x,c,idx) for (idx,c) in enumerate(self.classes)]
return self.classes[np.argmax(posteriors)]
def _posterior(self,x,c,idx):
prior = np.log(self._priors[idx])
likelihood = np.prod((self._likelihood)(idx,x))
return prior + np.log(likelihood)
def _likelihood(self,class_idx,x): # x is a single sample , c is the class, class_idx is the id of said class , and this returns the likelihood of the sample belonging to the class, given the mean and variance of the class, and the priors of the class , IE the probability of the sample belonging to the class as the __PDF__ of the class. It is the _pdf function from the video
mean = self._mean[class_idx]
var = self._var[class_idx]
n_features = len(x)
coeff = 1.0 / np.sqrt(2 * np.pi * var)
exp = np.exp(-(x - mean)**2 / (2 * var))
return coeff * exp
- With use of one list comprehension, and numpy array manipulation ( not working), (error not understood)
#NaiveBayes.py
#
#
import numpy as np
class NaiveBayes:
def fit(self,X,y):
n_samples, n_features = X.shape
self.classes = np.unique(y)
n_classes = len(self.classes)
#init mean , var, priors
self._mean = np.zeros((n_classes,n_features), dtype=np.float64)
self._var = np.zeros((n_classes,n_features), dtype=np.float64)
self._priors = np.zeros(n_classes, dtype=np.float64)
# for c in self.classes:
# X_c = X[y==c]
# self._mean[c] = X_c.mean(axis=0)
# self._var[c] = X_c.var(axis=0)
# self._priors[c] = X_c.shape[0] / n_samples
#trying to use list comprehenstion to remove the loop
# self._mean = [X[y==c].mean(axis=0) for c in self.classes]
# self._var= [X[y==c].var(axis=0) for c in self.classes]
# self._priors= [X[y==c].shape[0] / n_samples for c in self.classes]
# Trying to have only one command for all three
print([c for c in self.classes]) #debugging
print(np.array([ [ np.array([X[y==c].mean(axis=0)]).flatten() , np.array([X[y==c].var(axis=0)]).flatten(), np.array ([X[y==c].shape[0] / n_samples ]).flatten() ] for c in self.classes],dtype=object).flatten() )#debugging
TempArray = np.array([ [ np.array([X[y==c].mean(axis=0)]).flatten() , np.array([X[y==c].var(axis=0)]).flatten(), np.array ([X[y==c].shape[0] / n_samples ]).flatten() ] for c in self.classes]).flatten()
self._mean=TempArray[0]
self._var=TempArray[1]
self._priors = TempArray[2]
# [(self._mean,self._var,self._priors)] = [ ([X[y==c].mean(axis=0)] , [X[y==c].var(axis=0)],[X[y==c].shape[0] / n_samples ]) for c in self.classes]
# (self._mean,self._var,self._priors) = ([X[y==c].mean(axis=0)] , [X[y==c].var(axis=0)],[X[y==c].shape[0] / n_samples ] for c in self.classes )
#debugging
print(self._mean)
print(self._var)
print(self._priors)
def predict(self,X):
y_pred = [self._predict(x) for x in X]
return np.array(y_pred)
def _predict(self,x):
posteriors = [self._posterior(x,c,idx) for (idx,c) in enumerate(self.classes)]
return self.classes[np.argmax(posteriors)]
def _posterior(self,x,c,idx):
prior = np.log(self._priors[idx])
likelihood = np.prod((self._likelihood)(idx,x))
return prior + np.log(likelihood)
def _likelihood(self,class_idx,x): # x is a single sample , c is the class, class_idx is the id of said class , and this returns the likelihood of the sample belonging to the class, given the mean and variance of the class, and the priors of the class , IE the probability of the sample belonging to the class as the __PDF__ of the class. It is the _pdf function from the video
mean = self._mean[class_idx]
var = self._var[class_idx]
n_features = len(x)
coeff = 1.0 / np.sqrt(2 * np.pi * var)
exp = np.exp(-(x - mean)**2 / (2 * var))
return coeff * exp
and the one that I have an error in :
- With use of one list comprehension and Iterable unpacking ( Error :
ValueError: too many values to unpack (expected 1)
line 33
#NaiveBayes.py
#
#
import numpy as np
class NaiveBayes:
def fit(self,X,y):
n_samples, n_features = X.shape
self.classes = np.unique(y)
n_classes = len(self.classes)
#init mean , var, priors
self._mean = np.zeros((n_classes,n_features), dtype=np.float64)
self._var = np.zeros((n_classes,n_features), dtype=np.float64)
self._priors = np.zeros(n_classes, dtype=np.float64)
# for c in self.classes:
# X_c = X[y==c]
# self._mean[c] = X_c.mean(axis=0)
# self._var[c] = X_c.var(axis=0)
# self._priors[c] = X_c.shape[0] / n_samples
#trying to use list comprehenstion to remove the loop
# self._mean = [X[y==c].mean(axis=0) for c in self.classes]
# self._var= [X[y==c].var(axis=0) for c in self.classes]
# self._priors= [X[y==c].shape[0] / n_samples for c in self.classes]
# Trying to have only one command for all three
print([c for c in self.classes]) #debugging
# print(np.array([ [ np.array([X[y==c].mean(axis=0)]).flatten() , np.array([X[y==c].var(axis=0)]).flatten(), np.array ([X[y==c].shape[0] / n_samples ]).flatten() ] for c in self.classes],dtype=object).flatten() )#debugging
# TempArray = np.array([ [ np.array([X[y==c].mean(axis=0)]).flatten() , np.array([X[y==c].var(axis=0)]).flatten(), np.array ([X[y==c].shape[0] / n_samples ]).flatten() ] for c in self.classes]).flatten()
# self._mean=TempArray[0]
# self._var=TempArray[1]
# self._priors = TempArray[2]
[(self._mean,self._var,self._priors)] = [ ([X[y==c].mean(axis=0)] , [X[y==c].var(axis=0)],[X[y==c].shape[0] / n_samples ]) for c in self.classes]
# (self._mean,self._var,self._priors) = ([X[y==c].mean(axis=0)] , [X[y==c].var(axis=0)],[X[y==c].shape[0] / n_samples ] for c in self.classes )
#debugging
print(self._mean)
print(self._var)
print(self._priors)
def predict(self,X):
y_pred = [self._predict(x) for x in X]
return np.array(y_pred)
def _predict(self,x):
posteriors = [self._posterior(x,c,idx) for (idx,c) in enumerate(self.classes)]
return self.classes[np.argmax(posteriors)]
def _posterior(self,x,c,idx):
prior = np.log(self._priors[idx])
likelihood = np.prod((self._likelihood)(idx,x))
return prior + np.log(likelihood)
def _likelihood(self,class_idx,x): # x is a single sample , c is the class, class_idx is the id of said class , and this returns the likelihood of the sample belonging to the class, given the mean and variance of the class, and the priors of the class , IE the probability of the sample belonging to the class as the __PDF__ of the class. It is the _pdf function from the video
mean = self._mean[class_idx]
var = self._var[class_idx]
n_features = len(x)
coeff = 1.0 / np.sqrt(2 * np.pi * var)
exp = np.exp(-(x - mean)**2 / (2 * var))
return coeff * exp
- Other Failed attempts
(self._mean,self._var,self._priors) = ([X[y==c].mean(axis=0)] , [X[y==c].var(axis=0)],[X[y==c].shape[0] / n_samples ] for c in self.classes )
was a failed attempt
Can You explain the correct way to do this, and why these other approaches of mine are largely failing?
Thank you for your time.
[(i,2*i) for i in range(5)]
produces.