0

I have the following code, I'm just trying to teach myself how to use a machine learning model.

import ast
import csv
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

data = pd.read_csv('datos_actualizadosPipes.csv', delimiter=";")


#data['nombreCaballo'] = data['nombreCaballo'].apply(lambda x: ast.literal_eval(x))
d=data['nombreCaballo'].apply(ast.literal_eval).tolist()
data['nombreCaballo']=d
data['fuerza']=data['fuerza'].apply(ast.literal_eval).tolist()
data['premios']=data['premios'].apply(ast.literal_eval).tolist()
data['5Carreras']=data['5Carreras'].apply(ast.literal_eval).tolist()
data['porcentaje']=data['porcentaje'].apply(ast.literal_eval).tolist()
data['multiplicador']=data['multiplicador'].apply(ast.literal_eval).tolist()

data['ganadorNom']=data['ganadorNom'].apply(ast.literal_eval).tolist()
data['segundo_puestoNom']=data['segundo_puestoNom'].apply(ast.literal_eval).tolist()



nc = np.asarray(data['nombreCaballo'])
f = np.asarray(data['fuerza'])
p = np.asarray(data['premios'])
c = np.asarray(data['5Carreras'])
po= np.asarray(data['porcentaje'])
m = np.asarray(data['multiplicador'])

ga= np.asarray(data['ganadorNom'])
seg = np.asarray(data['segundo_puestoNom'])


#df=csv.reader('datos_actualizados.csv', dialect='excel')


X = data[['numeroCarrera', 'porcentaje']]
X['nombreCaballo']=nc
X['fuerza']=f
X['premios']=p
X['5Carreras']=c
#X['multiplicador']=m
X['porcentaje']=po
X['multiplicador']=m

y = data[['ganadorNum', 'segundo_puestoNum']]#,'tercerPuesto','cuartoPuesto','quintoPuesto','sextoPuesto']]
y['ganadorNom']=ga
y['segundo_puestoNom']=seg
#print(data['nombreCaballo'])


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42)

print(X_train)
print(y_train)
print(X_test)
print(y_test)

print(X_train.ndim  )
print(y_train.ndim )
print(X_test.ndim  )
print(y_test.ndim )

model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Precisión del modelo: {accuracy}')

new_data = pd.DataFrame({})
predictions = model.predict(new_data)
print(predictions)

I already load the csv information into "accepteable format" to the model. in the csv there are some columns with lists of lists of ints; lists of its or just ints. I dont know if the problem is in the lists of lists or what. But I get the following error:

Exception has occurred: ValueError
setting an array element with a sequence.
  File "C:\Users\Malelizarazo\OneDrive - Universidad de los Andes\U\Cosas random\python\Caballos\hjk.py", line 68, in <module>
    model.fit(X_train, y_train)
ValueError: setting an array element with a sequence.

X_train, X_test, Y_test and Y_train are all 2 dimentions. Honestly dont know what to do.

Print outputs of the code :

print(X_train)

:

numeroCarrera ...
multiplicador 3 149 ... [50.5, 29.9, 26.9, 5.0, 4.5, 193.7, 226.4, 41....


print(y_train)

ganadorNum ... segundo_puestoNom 3 6 ... [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...

[1 rows x 4 columns]


print(X_test)

numeroCarrera ...
multiplicador 0 145 ... [43.5, 11.8, 8.2, 8.2, 11.5, 60.3, 41.6, 41.7,... 1 146 ... [19.9, 14.6, 16.4, 15.3, 14.6, 15.1, 17.0, 15.... 5 153 ... [4.8, 53.5, 28.1, 27.2, 3.9, 48.7, 25.6, 24.8,... 2 148 ... [50.0, 295.6, 71.4, 63.7, 59.0, 27.6, 6.7, 6.0... 4 151 ... [74.9, 51.6, 69.4, 51.7, 352.0, 7.1, 9.5, 7.2,...

[5 rows x 7 columns]


print(y_test)

ganadorNum ... segundo_puestoNom 0 5 ... [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... 1 4 ... [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... 5 6 ... [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ... 2 5 ... [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... 4 3 ... [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...

[5 rows x 4 columns]


print(X_train.ndim ) print(y_train.ndim ) print(X_test.ndim ) print(y_test.ndim )

Output:

2 2 2 2

4

0

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.