Набор данных можно найти здесь: https://gofile.io/d/f8nBLL.
Я пытаюсь запустить модель K-ближайших соседей, но столкнулся со следующей проблемой.
KeyError: 'Передача списков лайков в .loc или [] с отсутствующими метками больше не поддерживается, см. https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike%27
Код, который я использую, следующий:
def knn_train_test(variable_columns,target_columns,df,hyp_test=False):
# Randomizing the Data
np.random.seed(1)
df=df.loc[np.random.permutation(len(df))]
df = df.reset_index(drop=True)
# Dividing Training 75% / Test 25% Dataset
rows=round(df.shape[0]*0.75)
training=df[:rows]
test=df[rows:]
x_training=training[variable_columns]
y_training=training[target_columns]
x_test=test[variable_columns]
y_test=test[target_columns]
#Testing Multiple Hyper Parameters and Accuracy
if test==True:
hyper_params = [x for x in range(1,21)]
# Append the first model's MSE values to this list.
two_mse_values = list()
# Append the second model's MSE values to this list.
two_hyp_mse = dict()
for hp in hyper_params:
knn = KNeighborsRegressor(n_neighbors=hp) #Selecting the Model
knn.fit(x_training, y_training)
predictions = knn.predict(x_test)
rmse = mean_squared_error(y_test, predictions,squared=False)
two_mse_values.append(rmse)
two_lowest_mse = two_mse_values[0]
two_lowest_k = 1
for k,mse in enumerate(two_mse_values):
if rmse < two_lowest_mse:
two_lowest_mse = rmse
two_lowest_k = k + 1
two_hyp_mse[two_lowest_k] = two_lowest_mse
return (f'k:{two_lowest_k}, RMSE:{two_lowest_mse}')
else:
knn = KNeighborsRegressor() #Selecting the Model
knn.fit(x_training, y_training)
predictions = knn.predict(x_test)
rmse = mean_squared_error(y_test, predictions)
return rmse
variables=numeric_cars.drop('price',axis=1)
target=numeric_cars['price']
# For each column (minus `price`), train a model, return RMSE value
# and add to the dictionary `rmse_results`.
for col in variables.columns:
rmse_val = knn_train_test(col, 'price', numeric_cars)
rmse_results[col] = rmse_val
# Create a Series object from the dictionary so
# we can easily view the results, sort, etc
rmse_results_series = pd.Series(rmse_results)
rmse_results_series.sort_values()