Я написал собственное хранилище состояний и поставщик хранилища состояний для Apache Spark 2.3.0 и попытался развернуть задание, используя дополнительный аргумент:
--conf spark.sql.streaming.stateStore.providerClass=com.sample.state.CustomStateStoreProvider
Для запуска заданий Spark я использую Marathon и Mesos, и задание завершается с ошибкой сразу после запуска с исключением:
java.lang.ClassNotFoundException: com.sample.state.CustomStateStoreProvider
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:348)
at org.apache.spark.util.Utils$.classForName(Utils.scala:235)
at org.apache.spark.sql.execution.streaming.state.StateStoreProvider$.create(StateStore.scala:213)
at org.apache.spark.sql.execution.streaming.StateStoreWriter$class.stateStoreCustomMetrics(statefulOperators.scala:121)
at org.apache.spark.sql.execution.streaming.StateStoreWriter$class.metrics(statefulOperators.scala:86)
at org.apache.spark.sql.execution.streaming.StateStoreSaveExec.metrics$lzycompute(statefulOperators.scala:251)
at org.apache.spark.sql.execution.streaming.StateStoreSaveExec.metrics(statefulOperators.scala:251)
at org.apache.spark.sql.execution.SparkPlanInfo$.fromSparkPlan(SparkPlanInfo.scala:58)
at org.apache.spark.sql.execution.SparkPlanInfo$$anonfun$fromSparkPlan$1.apply(SparkPlanInfo.scala:62)
at org.apache.spark.sql.execution.SparkPlanInfo$$anonfun$fromSparkPlan$1.apply(SparkPlanInfo.scala:62)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.immutable.List.foreach(List.scala:381)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
at scala.collection.immutable.List.map(List.scala:285)
at org.apache.spark.sql.execution.SparkPlanInfo$.fromSparkPlan(SparkPlanInfo.scala:62)
at org.apache.spark.sql.execution.SparkPlanInfo$$anonfun$fromSparkPlan$1.apply(SparkPlanInfo.scala:62)
at org.apache.spark.sql.execution.SparkPlanInfo$$anonfun$fromSparkPlan$1.apply(SparkPlanInfo.scala:62)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.immutable.List.foreach(List.scala:381)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
at scala.collection.immutable.List.map(List.scala:285)
at org.apache.spark.sql.execution.SparkPlanInfo$.fromSparkPlan(SparkPlanInfo.scala:62)
at org.apache.spark.sql.execution.SparkPlanInfo$$anonfun$fromSparkPlan$1.apply(SparkPlanInfo.scala:62)
at org.apache.spark.sql.execution.SparkPlanInfo$$anonfun$fromSparkPlan$1.apply(SparkPlanInfo.scala:62)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.immutable.List.foreach(List.scala:381)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
at scala.collection.immutable.List.map(List.scala:285)
at org.apache.spark.sql.execution.SparkPlanInfo$.fromSparkPlan(SparkPlanInfo.scala:62)
at org.apache.spark.sql.execution.SparkPlanInfo$$anonfun$fromSparkPlan$1.apply(SparkPlanInfo.scala:62)
at org.apache.spark.sql.execution.SparkPlanInfo$$anonfun$fromSparkPlan$1.apply(SparkPlanInfo.scala:62)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.immutable.List.foreach(List.scala:381)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
at scala.collection.immutable.List.map(List.scala:285)
at org.apache.spark.sql.execution.SparkPlanInfo$.fromSparkPlan(SparkPlanInfo.scala:62)
at org.apache.spark.sql.execution.SparkPlanInfo$$anonfun$fromSparkPlan$1.apply(SparkPlanInfo.scala:62)
at org.apache.spark.sql.execution.SparkPlanInfo$$anonfun$fromSparkPlan$1.apply(SparkPlanInfo.scala:62)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.immutable.List.foreach(List.scala:381)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
at scala.collection.immutable.List.map(List.scala:285)
at org.apache.spark.sql.execution.SparkPlanInfo$.fromSparkPlan(SparkPlanInfo.scala:62)
at org.apache.spark.sql.execution.SparkPlanInfo$$anonfun$fromSparkPlan$1.apply(SparkPlanInfo.scala:62)
at org.apache.spark.sql.execution.SparkPlanInfo$$anonfun$fromSparkPlan$1.apply(SparkPlanInfo.scala:62)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.immutable.List.foreach(List.scala:381)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
at scala.collection.immutable.List.map(List.scala:285)
at org.apache.spark.sql.execution.SparkPlanInfo$.fromSparkPlan(SparkPlanInfo.scala:62)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$org$apache$spark$sql$execution$streaming$MicroBatchExecution$$runBatch$3.apply(MicroBatchExecution.scala:475)
at org.apache.spark.sql.execution.streaming.ProgressReporter$class.reportTimeTaken(ProgressReporter.scala:271)
at org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:58)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution.org$apache$spark$sql$execution$streaming$MicroBatchExecution$$runBatch(MicroBatchExecution.scala:474)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$runActivatedStream$1$$anonfun$apply$mcZ$sp$1.apply$mcV$sp(MicroBatchExecution.scala:133)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$runActivatedStream$1$$anonfun$apply$mcZ$sp$1.apply(MicroBatchExecution.scala:121)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$runActivatedStream$1$$anonfun$apply$mcZ$sp$1.apply(MicroBatchExecution.scala:121)
at org.apache.spark.sql.execution.streaming.ProgressReporter$class.reportTimeTaken(ProgressReporter.scala:271)
at org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:58)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$runActivatedStream$1.apply$mcZ$sp(MicroBatchExecution.scala:121)
at org.apache.spark.sql.execution.streaming.ProcessingTimeExecutor.execute(TriggerExecutor.scala:56)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution.runActivatedStream(MicroBatchExecution.scala:117)
at org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:279)
at org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.run(StreamExecution.scala:189)
Вот команда для запуска задания:
/spark/bin/spark-submit \
--repositories "http://127.0.0.1:80/sbt-all" \
--packages com.sample:pipelines:0.1.0 \
--class com.sample.TestApplication \
--conf spark.sql.streaming.stateStore.providerClass=com.sample.state.CustomStateStoreProvider \
/spark/examples/jars/spark-examples_2.11-2.3.0.jar
Оба класса com.sample.TestApplication
и com.sample.state.CustomStateStoreProvider
находятся в пакете com.sample:pipelines:0.1.0
, и я уже проверял это несколько раз. Без параметра spark.sql.streaming.stateStore.providerClass
приложения запускаются и работают нормально.
Я уже пытался отправить задание, используя дополнительные пути к классам для драйвера и исполнителей и используя параметр --jars
с JAR, расположенным в HDFS или через HTTP.
P.S.: У меня нет проблем, когда я пытаюсь запустить задание локально, и в этом случае все работает хорошо.