加载依赖的jar包通过./spark-shell --jars ./jpmml-sparkml-executable-1.2.13.jar 启动spark
import org.apache.spark.ml.classification.LogisticRegressionimport org.apache.spark.ml.feature.VectorAssemblerimport org.apache.spark.ml.Pipelineimport org.apache.spark.ml.PipelineStageimport org.apache.spark.ml.feature.RFormulaimport org.jpmml.sparkml.PMMLBuilderimport java.io.Fileval df = (spark.read.format("csv") .option("sep", ",") .option("inferSchema", "true") .option("header", "true") .load("/user/spark/security/Wholesale_customers_data.csv")) val formula = new RFormula().setFormula("target ~ .")val lr = new LogisticRegression()val pipeline = new Pipeline().setStages(Array(formula,lr)) val schema = df.schemaval pipelineModel = pipeline.fit(df)val pmml = new PMMLBuilder(schema, pipelineModel)val file = pmml.buildFile(new File("/data/data2/tmp/logit_pipeline.pmml"))