如何将scipy minimize与 Dataframe 一起使用

o7jaxewo  于 2022-11-10  发布在  其他
关注(0)|答案(1)|浏览(138)

我尝试从 Dataframe 中选择一个行子集,以满足某些约束条件,同时最大化目标。这是我目前所拥有的:

import pandas as pd
import numpy as np
from scipy.optimize import minimize

df = pd.DataFrame({'a': np.random.randint(0, 100, size=100),
                   'b': np.random.randint(0, 100, size=100),
                   'c': np.random.randint(0, 10, size=100),
                   'd': np.random.randint(0, 5, size=100)})

# constraint functions

def a_constraint(x0, df):
    return 1000 - df[x0].a.sum()

def c_constraint(x0, df):
    return 3 - df[x0].c.value_counts().max()

def d_constraint(x0, df):
    return 3 - df[(x0) & (df.d == 1)].shape[0]

def n_constraint(x0, df):
    return 15 - df[x0].shape[0]

# objective function

def objective(x0, df):
    return -df[x0].b.sum()

# initial guess

x0 = np.full((df.shape[0]), True, dtype=bool)

# optimization connstraints as list of dicts

constraints = [{'type': 'ineq', 'fun': a_constraint, 'args': (df,)},
               {'type': 'ineq', 'fun': d_constraint, 'args': (df,)},
               {'type': 'eq', 'fun': c_constraint, 'args': (df,)},
               {'type': 'eq', 'fun': n_constraint, 'args': (df,)},
               ]

result = minimize(objective, x0=x0, method='SLSQP', args=(df,), constraints=constraints)

然而,我得到了一个KeyError。看起来它把我的布尔数组当作列标签而不是索引掩码。有人能告诉我哪里出错了吗?
为了回应下面hpaulj的评论,回溯为:

KeyError                                  Traceback (most recent call last)
d:\Users\github\fpl\fpl_stackoverflow.py in <module>
     38                ]
     39 
---> 40 result = minimize(objective, x0=x0, method='SLSQP', args=(df,), constraints=constraints)

d:\Users\.conda\envs\test_env\lib\site-packages\scipy\optimize\_minimize.py in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
    626     elif meth == 'slsqp':
    627         return _minimize_slsqp(fun, x0, args, jac, bounds,
--> 628                                constraints, callback=callback,**options)
    629     elif meth == 'trust-constr':
    630         return _minimize_trustregion_constr(fun, x0, args, jac, hess, hessp,

d:\Users\.conda\envs\test_env\lib\site-packages\scipy\optimize\slsqp.py in _minimize_slsqp(func, x0, args, jac, bounds, constraints, maxiter, ftol, iprint, disp, eps, callback, finite_diff_rel_step,**unknown_options)
    327     # meq, mieq: number of equality and inequality constraints
    328     meq = sum(map(len, [atleast_1d(c['fun'](x, *c['args']))
--> 329               for c in cons['eq']]))
    330     mieq = sum(map(len, [atleast_1d(c['fun'](x, *c['args']))
    331                for c in cons['ineq']]))

d:\Users\.conda\envs\test_env\lib\site-packages\scipy\optimize\slsqp.py in <listcomp>(.0)
    327     # meq, mieq: number of equality and inequality constraints
    328     meq = sum(map(len, [atleast_1d(c['fun'](x, *c['args']))
--> 329               for c in cons['eq']]))
...
-> 1308                 raise KeyError(f"None of [{key}] are in the [{axis_name}]")
   1309 
   1310             ax = self.obj._get_axis(axis)

KeyError: "None of [Float64Index([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,\n              1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,\n              1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,\n              1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,\n              1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,\n              1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,\n              1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,\n              1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],\n             dtype='float64')] are in the [columns]"

该错误在目标函数中的print(x0)之前抛出。

sh7euo9m

sh7euo9m1#

首先,只有当你能保证x0总是整数或布尔值dtype时,你的公式才有效。因为scipy.optimize.minimize是针对连续优化问题的,所以它使用浮点数dtype作为当前迭代x的底层,所以第一次迭代已经失败了。也就是说,你的约束条件不是连续可微的,因此违反了底层算法的数学假设。
简而言之,您尝试使用连续优化问题的求解器来解决离散优化问题。将您的问题公式化为离散优化问题是一种更方便的方法。python-mip中可能的公式化和实现如下所示:

import pandas as pd
import numpy as np
from mip import Model, BINARY, maximize

df = pd.DataFrame({'a': np.random.randint(0, 100, size=100),
                   'b': np.random.randint(0, 100, size=100),
                   'c': np.random.randint(0, 10, size=100),
                   'd': np.random.randint(0, 5, size=100)})

# extract the numpy arrays for easier access

a = df.a.values
b = df.b.values
c = df.c.values
d = df.d.values

N = 100

# Indices at which d[i] == 1

Td = np.argwhere(d == 1).flatten()

# For each unique value k in c we save each index i such that c[i] == k

Tc = {k : np.argwhere(c == k).flatten() for k in np.unique(c)}

# model object

mdl = Model()

# optimization / decision variables

# x[i] == 1 iff we select the i-th element in a,b,c,d and 0 otherwise

x = [mdl.add_var(var_type=BINARY) for i in range(N)]

# objective: maximize the sum of the selected elements in b

mdl.objective = maximize(sum(b[i]*x[i] for i in range(N)))

# constraint (a): the selected elements in a sum up to less-than 1000

mdl.add_constr(sum(a[i]*x[i] for i in range(N)) <= 1000)

# constraint (n): we select at most 15 elements

mdl.add_constr(sum(x[i] for i in range(N)) == 15)

# constraint (d): we select at most 3 elements for which d[i] == 1

mdl.add_constr(sum(x[i] for i in Td) <= 3)

# constraint (c): we select at most 3 elements of c with the same value

for value, Indices in Tc.items():
    mdl.add_constr(sum(x[i] for i in Indices) <= 3)

# disable the solver output

mdl.verbose = 0

# solve the optimization problem

mdl.optimize()

# extract the indices of the solution

selected = np.array([i for i in range(N) if x[i].x > 0.99])

print(selected)

相关问题