Модель K-ближайших соседей не работает. Передача списков лайков в .loc или [] с отсутствующими метками больше не поддерживается.

Набор данных можно найти здесь: https://gofile.io/d/f8nBLL.

Я пытаюсь запустить модель K-ближайших соседей, но столкнулся со следующей проблемой.

KeyError: 'Передача списков лайков в .loc или [] с отсутствующими метками больше не поддерживается, см. https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike%27

Код, который я использую, следующий:

def knn_train_test(variable_columns,target_columns,df,hyp_test=False):
# Randomizing the Data
np.random.seed(1)
df=df.loc[np.random.permutation(len(df))]
df = df.reset_index(drop=True)

# Dividing Training 75% / Test 25% Dataset 
rows=round(df.shape[0]*0.75)
training=df[:rows]
test=df[rows:]
x_training=training[variable_columns]
y_training=training[target_columns]
x_test=test[variable_columns]
y_test=test[target_columns]

#Testing Multiple Hyper Parameters and Accuracy
if test==True:
    hyper_params = [x for x in range(1,21)]
    # Append the first model's MSE values to this list.
    two_mse_values = list()
    # Append the second model's MSE values to this list.
    two_hyp_mse = dict()
    for hp in hyper_params:
        knn = KNeighborsRegressor(n_neighbors=hp) #Selecting the Model
        knn.fit(x_training, y_training)
        predictions = knn.predict(x_test)
        rmse = mean_squared_error(y_test, predictions,squared=False)
        two_mse_values.append(rmse)

    two_lowest_mse = two_mse_values[0]
    two_lowest_k = 1

    for k,mse in enumerate(two_mse_values):
        if rmse < two_lowest_mse:
            two_lowest_mse = rmse
            two_lowest_k = k + 1
    two_hyp_mse[two_lowest_k] = two_lowest_mse
    return (f'k:{two_lowest_k}, RMSE:{two_lowest_mse}')
else:
    knn = KNeighborsRegressor() #Selecting the Model
    knn.fit(x_training, y_training)
    predictions = knn.predict(x_test)
    rmse = mean_squared_error(y_test, predictions)
    return rmse

variables=numeric_cars.drop('price',axis=1)
target=numeric_cars['price']

# For each column (minus `price`), train a model, return RMSE value
# and add to the dictionary `rmse_results`.
for col in variables.columns:
    rmse_val = knn_train_test(col, 'price', numeric_cars)
    rmse_results[col] = rmse_val

# Create a Series object from the dictionary so 
# we can easily view the results, sort, etc
rmse_results_series = pd.Series(rmse_results)
rmse_results_series.sort_values()

person ciro henrique    schedule 19.01.2021    source источник


Ответы (1)


Этот код работает для меня

import pandas as pd
import numpy as np
import sklearn

from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error

assert(pd.__version__ == '1.2.1')
assert(np.__version__ == '1.18.5')
assert(sklearn.__version__ == '0.23.1')

def knn_train_test(variable_columns,target_columns,df,hyp_test=False):
# Randomizing the Data
    np.random.seed(1)
    df=df.loc[np.random.permutation(len(df))]
    df = df.reset_index(drop=True)

    # Dividing Training 75% / Test 25% Dataset 
    rows=round(df.shape[0]*0.75)
    training=df[:rows]
    test=df[rows:]
    x_training=training[variable_columns]
    y_training=training[target_columns]
    x_test=test[variable_columns]
    y_test=test[target_columns]

    #Testing Multiple Hyper Parameters and Accuracy
    if hyp_test==True:
        hyper_params = [x for x in range(1,21)]
        # Append the first model's MSE values to this list.
        two_mse_values = list()
        # Append the second model's MSE values to this list.
        two_hyp_mse = dict()
        for hp in hyper_params:
            knn = KNeighborsRegressor(n_neighbors=hp) #Selecting the Model
            knn.fit(x_training, y_training)
            predictions = knn.predict(x_test)
            rmse = mean_squared_error(y_test, predictions,squared=False)
            two_mse_values.append(rmse)

        two_lowest_mse = two_mse_values[0]
        two_lowest_k = 1

        for k,mse in enumerate(two_mse_values):
            if rmse < two_lowest_mse:
                two_lowest_mse = rmse
                two_lowest_k = k + 1
        two_hyp_mse[two_lowest_k] = two_lowest_mse
        return (f'k:{two_lowest_k}, RMSE:{two_lowest_mse}')
    else:
        knn = KNeighborsRegressor() #Selecting the Model
        knn.fit(x_training, y_training)
        predictions = knn.predict(x_test)
        rmse = mean_squared_error(y_test, predictions)
        return rmse

    variables=numeric_cars.drop('price',axis=1)
    target=numeric_cars['price']

    # For each column (minus `price`), train a model, return RMSE value
    # and add to the dictionary `rmse_results`.
    for col in variables.columns:
        rmse_val = knn_train_test(col, 'price', numeric_cars)
        rmse_results[col] = rmse_val

    # Create a Series object from the dictionary so 
    # we can easily view the results, sort, etc
    rmse_results_series = pd.Series(rmse_results)
    rmse_results_series.sort_values()

file = '~/Downloads/numeric_cars.csv'

df = pd.read_csv(file)

variable_columns = ['normalized-losses', 'wheel-base']
target_columns = ['price']



print(knn_train_test(variable_columns, target_columns, df))

person nithish08    schedule 21.01.2021