使用遗传算法选择重要特征时出现了几个错误:

wvyml7n5  于 2021-07-13  发布在  Java
关注(0)|答案(0)|浏览(213)

代码:

import numpy as np
import pandas as pd
import math
import target as target
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

dataset = pd.read_csv('Tehran_hies98.csv')

to_drop = ['Address', 'weight', 'WH', 'inc3', 'Income', 'exp1', 'exp4', 'exp5', 'exp6', 'exp7', 
          'exp8', 'exp11'
        , 'exp12', 'exp13', 'Income_Mis', 'exp2', 'exp3', 'exp9', 'inc2', 'inc1', 'exp14']

dataset.drop(to_drop, inplace=True, axis=1)

dataset = pd.get_dummies(dataset,
                     columns=['HSize', 'SSex', 'SAge', 'SMadrak', 'SActivity', 'SMarital', 
                              'Tasarrof', 'Otagh',
                              'ZirBana'], drop_first=True)

target, feature_list = 'DV', [i for i in dataset.columns if i not in target]

def init_population(n, c):
     return np.array([[math.ceil(e) for e in pop] for pop in (np.random.rand(n, c) - 0.5)]), 
     np.zeros((2, c))-1

def single_poin_crossover(population):
       r, c, n = population.shape[0], population.shape[1], np.random.randint(1, population.shape[1])
       for i in range(0, r, 2):
             population[i], population[i + 1] = np.append(population[i][0:n], population[i + 1] 
             [n:c]), np.append(
             population[i + 1][0:n], population[i][n:c])
       return population

def flip_mutation(population):
      return population.max() - population

def random_selection(population):
   r = population.shape[0]
   new_population = population.copy()
   for i in range(r):
       new_population[i] = population[np.random.randint(0, r)]
   return new_population

def get_fitness(data, feature_list, target, population):
   fitness = []
   for i in range(population.shape[0]):
       columns = [feature_list[j] for j in range(population.shape[1]) if population[i, j] == 1]
       fitness.append(predictive_model(data[columns], data[target]))
   return fitness

def predictive_model(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=7)
    lr = LogisticRegression(solver='liblinear', max_iter=100, random_state=7)
    lr.fit(X_train, y_train)
    return accuracy_score(y_test, lr.predict(X_test))

def genetic_algorithm(data, feature_list, target, n, max_iter):
  c = len(feature_list)

  population, memory = init_population(n, c)
  temp1 = population
  temp2 = memory
  population, memory = temp2, temp1

  fitness = get_fitness(data, feature_list, target, population)

  optimal_value = max(fitness)
  optimal_solution = population[np.where(fitness == optimal_value)][0]

  for i in range(max_iter):
      population = random_selection(population)
      population = single_poin_crossover(population)
      if np.random.rand() < 0.3:
          population = flip_mutation(population)
      temp1 = population
      temp2 = memory
      population, memory = temp2, temp1

      fitness = get_fitness(data, feature_list, target, population)

      if max(fitness) > optimal_value:
          optimal_value = max(fitness)
          optimal_solution = population[np.where(fitness == optimal_value)][0]

  return optimal_solution, optimal_value

feature_set, acc_score = genetic_algorithm(dataset, feature_list, target, 10, 1000)
feature_set = [feature_list[i] for i in range(len(feature_list)) if feature_set[i] == 1]

print('Optimal Feature Set\n', feature_set, '\nOptimal Accuracy = ', round(acc_score * 100), '%')

第一个错误:target,feature_list='dv',[i代表dataset.columns中的i,如果我不在target中]typeerror:类型'module'的参数不可iterable
对于代码:

target, feature_list = 'DV', [i for i in dataset.columns if i not in target]

第二个错误:如果删除上述代码的这部分:

if i not in target

出现以下错误:
raise keyerror(key)from err keyerror:'dv'
我写了这个代码,到目前为止这个问题已经出现了。
如果你看到任何其他的错误,请帮我修复它。
如果可能的话,请帮助我或输入正确的代码
谢谢。

暂无答案!

目前还没有任何答案,快来回答吧!

相关问题