我试图在一个大数据平台上使用kmeans构建一个聚类模型,我得到了这个错误,如何解决?
File "C:\Users\knwafor\run_scripts\bigdata.py", line 473, in <module>
kmeans_model = kmeans.fit(data_with_pca)
File "C:\Users\knwafor\run_scripts\runscripts_env\lib\site-packages\pyspark\ml\base.py", line 205, in fit
return self._fit(dataset)
File "C:\Users\knwafor\run_scripts\runscripts_env\lib\site-packages\pyspark\ml\wrapper.py", line 381, in _fit
java_model = self._fit_java(dataset)
File "C:\Users\knwafor\run_scripts\runscripts_env\lib\site-packages\pyspark\ml\wrapper.py", line 377, in _fit_java
self._transfer_params_to_java()
File "C:\Users\knwafor\run_scripts\runscripts_env\lib\site-packages\pyspark\ml\wrapper.py", line 174, in _transfer_params_to_java
pair = self._make_java_param_pair(param, self._defaultParamMap[param])
File "C:\Users\knwafor\run_scripts\runscripts_env\lib\site-packages\pyspark\ml\wrapper.py", line 158, in _make_java_param_pair
java_param = self._java_obj.getParam(param.name)
File "C:\Users\knwafor\run_scripts\runscripts_env\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
return_value = get_return_value(
File "C:\Users\knwafor\run_scripts\runscripts_env\lib\site-packages\pyspark\errors\exceptions\captured.py", line 169, in deco
return f(*a, **kw)
File "C:\Users\knwafor\run_scripts\runscripts_env\lib\site-packages\py4j\protocol.py", line 326, in get_return_value
raise Py4JJavaError(
py4j.protocol.Py4JJavaError: An error occurred while calling o1468.getParam.
: java.util.NoSuchElementException: Param maxBlockSizeInMB does not exist.
at org.apache.spark.ml.param.Params.$anonfun$getParam$2(params.scala:705)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.ml.param.Params.getParam(params.scala:705)
at org.apache.spark.ml.param.Params.getParam$(params.scala:703)
at org.apache.spark.ml.PipelineStage.getParam(Pipeline.scala:41)
at sun.reflect.GeneratedMethodAccessor41.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
at java.lang.Thread.run(Thread.java:748)
SUCCESS: The process with PID 13988 (child process of PID 17724) has been terminated.
SUCCESS: The process with PID 17724 (child process of PID 16860) has been terminated.
SUCCESS: The process with PID 16860 (child process of PID 7256) has been terminated.
字符串
1条答案
按热度按时间mxg2im7a1#
我后来解决了这个问题,没有在pyspark中使用kmeans,而是使用了BisectingKMeans算法,它也给了我聚类。