scipy TypeError:在两个Pandas Dataframe 上执行Wilcoxon秩和测试时,'int'和'str'的示例之间不支持'〈'

njthzxwz  于 2022-11-10  发布在  其他
关注(0)|答案(1)|浏览(162)

我尝试在两个 Dataframe kircnormal之间执行Wilcoxon秩和测试。我想在列之间执行测试。我的代码引发了TypeError: '<' not supported between instances of 'int' and 'str'

from scipy.stats import ranksums
import pandas as pd

kirc = mrna.loc[mrna['subtype'] == "KIRC"].iloc[:,:-2]
normal = mrna.loc[mrna['subtype'] == "normal"].iloc[:,:-2]

对于两个 Dataframe 的每一行,我希望执行Wilcoxon秩和检验,以找到相应列之间的差值。

for i in normal.T.iterrows():
    for j in kirc.T.iterrows():
        ranksums(i, j)

追溯:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
/opt/conda/lib/python3.7/site-packages/numpy/core/fromnumeric.py in _wrapfunc(obj, method, *args,**kwds)
     56     try:
---> 57         return bound(*args,**kwds)
     58     except TypeError:

/opt/conda/lib/python3.7/site-packages/pandas/core/ops/common.py in new_method(self, other)
     68 
---> 69         return method(self, other)
     70 

/opt/conda/lib/python3.7/site-packages/pandas/core/arraylike.py in __lt__(self, other)
     39     def __lt__(self, other):
---> 40         return self._cmp_method(other, operator.lt)
     41 

/opt/conda/lib/python3.7/site-packages/pandas/core/series.py in _cmp_method(self, other, op)
   5501         with np.errstate(all="ignore"):
-> 5502             res_values = ops.comparison_op(lvalues, rvalues, op)
   5503 

/opt/conda/lib/python3.7/site-packages/pandas/core/ops/array_ops.py in comparison_op(left, right, op)
    283     elif is_object_dtype(lvalues.dtype) or isinstance(rvalues, str):
--> 284         res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues)
    285 

/opt/conda/lib/python3.7/site-packages/pandas/core/ops/array_ops.py in comp_method_OBJECT_ARRAY(op, x, y)
     72     else:
---> 73         result = libops.scalar_compare(x.ravel(), y, op)
     74     return result.reshape(x.shape)

/opt/conda/lib/python3.7/site-packages/pandas/_libs/ops.pyx in pandas._libs.ops.scalar_compare()

TypeError: '<' not supported between instances of 'int' and 'str'

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
<ipython-input-44-ea48324537c3> in <module>
      3 for i in normal.T.iterrows():
      4     for j in kirc.T.iterrows():
----> 5         ranksums(i, j)
      6 

/opt/conda/lib/python3.7/site-packages/scipy/stats/stats.py in ranksums(x, y)
   6469     n2 = len(y)
   6470     alldata = np.concatenate((x, y))
-> 6471     ranked = rankdata(alldata)
   6472     x = ranked[:n1]
   6473     s = np.sum(x, axis=0)

/opt/conda/lib/python3.7/site-packages/scipy/stats/stats.py in rankdata(a, method)
   7380     arr = np.ravel(np.asarray(a))
   7381     algo = 'mergesort' if method == 'ordinal' else 'quicksort'
-> 7382     sorter = np.argsort(arr, kind=algo)
   7383 
   7384     inv = np.empty(sorter.size, dtype=np.intp)

<__array_function__ internals> in argsort(*args,**kwargs)

/opt/conda/lib/python3.7/site-packages/numpy/core/fromnumeric.py in argsort(a, axis, kind, order)
   1112 
   1113     """
-> 1114     return _wrapfunc(a, 'argsort', axis=axis, kind=kind, order=order)
   1115 
   1116 

/opt/conda/lib/python3.7/site-packages/numpy/core/fromnumeric.py in _wrapfunc(obj, method, *args,**kwds)
     64         # Call _wrapit from within the except clause to ensure a potential
     65         # exception has a traceback chain.
---> 66         return _wrapit(obj, method, *args,**kwds)
     67 
     68 

/opt/conda/lib/python3.7/site-packages/numpy/core/fromnumeric.py in _wrapit(obj, method, *args,**kwds)
     41     except AttributeError:
     42         wrap = None
---> 43     result = getattr(asarray(obj), method)(*args,**kwds)
     44     if wrap:
     45         if not isinstance(result, mu.ndarray):

/opt/conda/lib/python3.7/site-packages/pandas/core/ops/common.py in new_method(self, other)
     67         other = item_from_zerodim(other)
     68 
---> 69         return method(self, other)
     70 
     71     return new_method

/opt/conda/lib/python3.7/site-packages/pandas/core/arraylike.py in __lt__(self, other)
     38     @unpack_zerodim_and_defer("__lt__")
     39     def __lt__(self, other):
---> 40         return self._cmp_method(other, operator.lt)
     41 
     42     @unpack_zerodim_and_defer("__le__")

/opt/conda/lib/python3.7/site-packages/pandas/core/series.py in _cmp_method(self, other, op)
   5500 
   5501         with np.errstate(all="ignore"):
-> 5502             res_values = ops.comparison_op(lvalues, rvalues, op)
   5503 
   5504         return self._construct_result(res_values, name=res_name)

/opt/conda/lib/python3.7/site-packages/pandas/core/ops/array_ops.py in comparison_op(left, right, op)
    282 
    283     elif is_object_dtype(lvalues.dtype) or isinstance(rvalues, str):
--> 284         res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues)
    285 
    286     else:

/opt/conda/lib/python3.7/site-packages/pandas/core/ops/array_ops.py in comp_method_OBJECT_ARRAY(op, x, y)
     71         result = libops.vec_compare(x.ravel(), y.ravel(), op)
     72     else:
---> 73         result = libops.scalar_compare(x.ravel(), y, op)
     74     return result.reshape(x.shape)
     75 

/opt/conda/lib/python3.7/site-packages/pandas/_libs/ops.pyx in pandas._libs.ops.scalar_compare()

TypeError: '<' not supported between instances of 'int' and 'str'

kirc数据

pd.DataFrame({'A1CF': {'TCGA-A3-3307-01': 750,
  'TCGA-A3-3308-01': 579,
  'TCGA-A3-3311-01': 2186,
  'TCGA-A3-3313-01': 220},
 'A2BP1': {'TCGA-A3-3307-01': 0,
  'TCGA-A3-3308-01': 7,
  'TCGA-A3-3311-01': 6,
  'TCGA-A3-3313-01': 43},
 'A2LD1': {'TCGA-A3-3307-01': 460,
  'TCGA-A3-3308-01': 433,
  'TCGA-A3-3311-01': 692,
  'TCGA-A3-3313-01': 1534},
 'A2ML1': {'TCGA-A3-3307-01': 64,
  'TCGA-A3-3308-01': 177,
  'TCGA-A3-3311-01': 134,
  'TCGA-A3-3313-01': 693}})

normal data

pd.DataFrame({'A1CF': {'TCGA-A3-3387-11': 2728,
  'TCGA-B0-4700-11': 434,
  'TCGA-B0-4712-11': 11,
  'TCGA-B0-5402-11': 640},
 'A2BP1': {'TCGA-A3-3387-11': 45,
  'TCGA-B0-4700-11': 14,
  'TCGA-B0-4712-11': 74,
  'TCGA-B0-5402-11': 60},
 'A2LD1': {'TCGA-A3-3387-11': 6614,
  'TCGA-B0-4700-11': 1178,
  'TCGA-B0-4712-11': 1201,
  'TCGA-B0-5402-11': 1058},
 'A2ML1': {'TCGA-A3-3387-11': 46,
  'TCGA-B0-4700-11': 26,
  'TCGA-B0-4712-11': 78,
  'TCGA-B0-5402-11': 47}})
omqzjyyz

omqzjyyz1#

因为你用错了iterrows()函数。下面是一个i值的例子:

for i in normal.T[0:1].iterrows():
    print(i)

# output

'''
('A1CF', TCGA-A3-3387-11    2728
TCGA-B0-4700-11     434
TCGA-B0-4712-11      11
TCGA-B0-5402-11     640
Name: A1CF, dtype: int64)
'''

请使用以下命令:

for i,j in normal.T.iterrows():
    print(j['TCGA-A3-3387-11']) #you have to enter the name of the column you want to use here:

# output

'''
2728
45
6614
46
'''

另外,不推荐使用iterrows()。因为它非常慢。你可以用df.to_dict()代替。你应该看一下this文章。
下面是一个用法示例:

for j in normal.T.to_dict('records'):
    print(j['TCGA-A3-3387-11'])

# output

'''
2728
45
6614
46
'''

相关问题