我尝试在两个 Dataframe kirc
和normal
之间执行Wilcoxon秩和测试。我想在列之间执行测试。我的代码引发了TypeError: '<' not supported between instances of 'int' and 'str'
。
from scipy.stats import ranksums
import pandas as pd
kirc = mrna.loc[mrna['subtype'] == "KIRC"].iloc[:,:-2]
normal = mrna.loc[mrna['subtype'] == "normal"].iloc[:,:-2]
对于两个 Dataframe 的每一行,我希望执行Wilcoxon秩和检验,以找到相应列之间的差值。
for i in normal.T.iterrows():
for j in kirc.T.iterrows():
ranksums(i, j)
追溯:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/opt/conda/lib/python3.7/site-packages/numpy/core/fromnumeric.py in _wrapfunc(obj, method, *args,**kwds)
56 try:
---> 57 return bound(*args,**kwds)
58 except TypeError:
/opt/conda/lib/python3.7/site-packages/pandas/core/ops/common.py in new_method(self, other)
68
---> 69 return method(self, other)
70
/opt/conda/lib/python3.7/site-packages/pandas/core/arraylike.py in __lt__(self, other)
39 def __lt__(self, other):
---> 40 return self._cmp_method(other, operator.lt)
41
/opt/conda/lib/python3.7/site-packages/pandas/core/series.py in _cmp_method(self, other, op)
5501 with np.errstate(all="ignore"):
-> 5502 res_values = ops.comparison_op(lvalues, rvalues, op)
5503
/opt/conda/lib/python3.7/site-packages/pandas/core/ops/array_ops.py in comparison_op(left, right, op)
283 elif is_object_dtype(lvalues.dtype) or isinstance(rvalues, str):
--> 284 res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues)
285
/opt/conda/lib/python3.7/site-packages/pandas/core/ops/array_ops.py in comp_method_OBJECT_ARRAY(op, x, y)
72 else:
---> 73 result = libops.scalar_compare(x.ravel(), y, op)
74 return result.reshape(x.shape)
/opt/conda/lib/python3.7/site-packages/pandas/_libs/ops.pyx in pandas._libs.ops.scalar_compare()
TypeError: '<' not supported between instances of 'int' and 'str'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-44-ea48324537c3> in <module>
3 for i in normal.T.iterrows():
4 for j in kirc.T.iterrows():
----> 5 ranksums(i, j)
6
/opt/conda/lib/python3.7/site-packages/scipy/stats/stats.py in ranksums(x, y)
6469 n2 = len(y)
6470 alldata = np.concatenate((x, y))
-> 6471 ranked = rankdata(alldata)
6472 x = ranked[:n1]
6473 s = np.sum(x, axis=0)
/opt/conda/lib/python3.7/site-packages/scipy/stats/stats.py in rankdata(a, method)
7380 arr = np.ravel(np.asarray(a))
7381 algo = 'mergesort' if method == 'ordinal' else 'quicksort'
-> 7382 sorter = np.argsort(arr, kind=algo)
7383
7384 inv = np.empty(sorter.size, dtype=np.intp)
<__array_function__ internals> in argsort(*args,**kwargs)
/opt/conda/lib/python3.7/site-packages/numpy/core/fromnumeric.py in argsort(a, axis, kind, order)
1112
1113 """
-> 1114 return _wrapfunc(a, 'argsort', axis=axis, kind=kind, order=order)
1115
1116
/opt/conda/lib/python3.7/site-packages/numpy/core/fromnumeric.py in _wrapfunc(obj, method, *args,**kwds)
64 # Call _wrapit from within the except clause to ensure a potential
65 # exception has a traceback chain.
---> 66 return _wrapit(obj, method, *args,**kwds)
67
68
/opt/conda/lib/python3.7/site-packages/numpy/core/fromnumeric.py in _wrapit(obj, method, *args,**kwds)
41 except AttributeError:
42 wrap = None
---> 43 result = getattr(asarray(obj), method)(*args,**kwds)
44 if wrap:
45 if not isinstance(result, mu.ndarray):
/opt/conda/lib/python3.7/site-packages/pandas/core/ops/common.py in new_method(self, other)
67 other = item_from_zerodim(other)
68
---> 69 return method(self, other)
70
71 return new_method
/opt/conda/lib/python3.7/site-packages/pandas/core/arraylike.py in __lt__(self, other)
38 @unpack_zerodim_and_defer("__lt__")
39 def __lt__(self, other):
---> 40 return self._cmp_method(other, operator.lt)
41
42 @unpack_zerodim_and_defer("__le__")
/opt/conda/lib/python3.7/site-packages/pandas/core/series.py in _cmp_method(self, other, op)
5500
5501 with np.errstate(all="ignore"):
-> 5502 res_values = ops.comparison_op(lvalues, rvalues, op)
5503
5504 return self._construct_result(res_values, name=res_name)
/opt/conda/lib/python3.7/site-packages/pandas/core/ops/array_ops.py in comparison_op(left, right, op)
282
283 elif is_object_dtype(lvalues.dtype) or isinstance(rvalues, str):
--> 284 res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues)
285
286 else:
/opt/conda/lib/python3.7/site-packages/pandas/core/ops/array_ops.py in comp_method_OBJECT_ARRAY(op, x, y)
71 result = libops.vec_compare(x.ravel(), y.ravel(), op)
72 else:
---> 73 result = libops.scalar_compare(x.ravel(), y, op)
74 return result.reshape(x.shape)
75
/opt/conda/lib/python3.7/site-packages/pandas/_libs/ops.pyx in pandas._libs.ops.scalar_compare()
TypeError: '<' not supported between instances of 'int' and 'str'
kirc
数据
pd.DataFrame({'A1CF': {'TCGA-A3-3307-01': 750,
'TCGA-A3-3308-01': 579,
'TCGA-A3-3311-01': 2186,
'TCGA-A3-3313-01': 220},
'A2BP1': {'TCGA-A3-3307-01': 0,
'TCGA-A3-3308-01': 7,
'TCGA-A3-3311-01': 6,
'TCGA-A3-3313-01': 43},
'A2LD1': {'TCGA-A3-3307-01': 460,
'TCGA-A3-3308-01': 433,
'TCGA-A3-3311-01': 692,
'TCGA-A3-3313-01': 1534},
'A2ML1': {'TCGA-A3-3307-01': 64,
'TCGA-A3-3308-01': 177,
'TCGA-A3-3311-01': 134,
'TCGA-A3-3313-01': 693}})
normal data
pd.DataFrame({'A1CF': {'TCGA-A3-3387-11': 2728,
'TCGA-B0-4700-11': 434,
'TCGA-B0-4712-11': 11,
'TCGA-B0-5402-11': 640},
'A2BP1': {'TCGA-A3-3387-11': 45,
'TCGA-B0-4700-11': 14,
'TCGA-B0-4712-11': 74,
'TCGA-B0-5402-11': 60},
'A2LD1': {'TCGA-A3-3387-11': 6614,
'TCGA-B0-4700-11': 1178,
'TCGA-B0-4712-11': 1201,
'TCGA-B0-5402-11': 1058},
'A2ML1': {'TCGA-A3-3387-11': 46,
'TCGA-B0-4700-11': 26,
'TCGA-B0-4712-11': 78,
'TCGA-B0-5402-11': 47}})
1条答案
按热度按时间omqzjyyz1#
因为你用错了iterrows()函数。下面是一个i值的例子:
请使用以下命令:
另外,不推荐使用iterrows()。因为它非常慢。你可以用df.to_dict()代替。你应该看一下this文章。
下面是一个用法示例: