package ai.tripl.arc.extract;

import ai.tripl.arc.api.API;
import ai.tripl.arc.util.CloudUtils$;
import ai.tripl.arc.util.ExtractUtils$;
import ai.tripl.arc.util.MetadataUtils$;
import ai.tripl.arc.util.log.logger.Logger;
import com.databricks.spark.xml.XmlReader;
import java.io.StringReader;
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.SchemaFactory;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.AnalysisException;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.StructType;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Option$;
import scala.Predef$;
import scala.Product;
import scala.Serializable;
import scala.Some;
import scala.Tuple15;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.package$;
import scala.reflect.ClassTag$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.util.Either;
import scala.util.Left;
import scala.util.Right;

/* compiled from: XMLExtract.scala */
/* loaded from: input_file:ai/tripl/arc/extract/XMLExtractStage$.class */
public final class XMLExtractStage$ implements Serializable {
    public static XMLExtractStage$ MODULE$;

    static {
        new XMLExtractStage$();
    }

    public Option<Dataset<Row>> execute(XMLExtractStage xMLExtractStage, SparkSession sparkSession, Logger logger, API.ARCContext aRCContext) {
        Right apply;
        Dataset createDataFrame;
        Dataset dataset;
        Dataset repartition;
        RDD rdd;
        System.setProperty("javax.xml.stream.XMLInputFactory", "com.sun.xml.internal.stream.XMLInputFactoryImpl");
        try {
            Some schema = ExtractUtils$.MODULE$.getSchema(xMLExtractStage.schema(), sparkSession, logger);
            try {
                Right input = xMLExtractStage.input();
                if (input instanceof Right) {
                    String str = (String) input.value();
                    CloudUtils$.MODULE$.setHadoopConfiguration(xMLExtractStage.authentication(), sparkSession, logger, aRCContext);
                    Dataset as = sparkSession.read().option("wholetext", true).text(str).as(sparkSession.implicits().newStringEncoder());
                    xMLExtractStage.xsd().foreach(str2 -> {
                        $anonfun$execute$1(as, str2);
                        return BoxedUnit.UNIT;
                    });
                    XmlReader xmlReader = new XmlReader();
                    if (schema instanceof Some) {
                        apply = package$.MODULE$.Right().apply(xmlReader.withSchema((StructType) schema.value()).xmlRdd(sparkSession, as.rdd()));
                    } else {
                        if (!None$.MODULE$.equals(schema)) {
                            throw new MatchError(schema);
                        }
                        apply = package$.MODULE$.Right().apply(xmlReader.xmlRdd(sparkSession, as.rdd()));
                    }
                } else {
                    if (!(input instanceof Left)) {
                        throw new MatchError(input);
                    }
                    String str3 = (String) ((Left) input).value();
                    XmlReader xmlReader2 = new XmlReader();
                    Some inputField = xMLExtractStage.inputField();
                    if (inputField instanceof Some) {
                        rdd = sparkSession.table(str3).select(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col((String) inputField.value())})).as(sparkSession.implicits().newStringEncoder()).rdd();
                    } else {
                        if (!None$.MODULE$.equals(inputField)) {
                            throw new MatchError(inputField);
                        }
                        rdd = sparkSession.table(str3).as(sparkSession.implicits().newStringEncoder()).rdd();
                    }
                    RDD rdd2 = rdd;
                    xMLExtractStage.xsd().foreach(str4 -> {
                        $anonfun$execute$3(rdd2, str4);
                        return BoxedUnit.UNIT;
                    });
                    if (schema instanceof Some) {
                        apply = package$.MODULE$.Right().apply(xmlReader2.withSchema((StructType) schema.value()).xmlRdd(sparkSession, rdd2));
                    } else {
                        if (!None$.MODULE$.equals(schema)) {
                            throw new MatchError(schema);
                        }
                        apply = package$.MODULE$.Right().apply(xmlReader2.xmlRdd(sparkSession, rdd2));
                    }
                }
            } catch (Throwable th) {
                if (!(th instanceof AnalysisException) || !th.getMessage().contains("Path does not exist")) {
                    if (th instanceof Exception) {
                        throw new XMLExtractStage$$anon$2((Exception) th, xMLExtractStage);
                    }
                    throw th;
                }
                Right input2 = xMLExtractStage.input();
                if (input2 instanceof Right) {
                    apply = package$.MODULE$.Left().apply(new API.PathNotExistsExtractError(Option$.MODULE$.apply((String) input2.value())));
                } else {
                    if (!(input2 instanceof Left)) {
                        throw new MatchError(input2);
                    }
                    apply = package$.MODULE$.Left().apply(new API.FileNotFoundExtractError(None$.MODULE$));
                }
            }
            Right right = apply;
            try {
                if (right instanceof Right) {
                    Dataset dataset2 = (Dataset) right.value();
                    if (dataset2.schema().length() != 0) {
                        createDataFrame = dataset2;
                    } else {
                        if (!(schema instanceof Some)) {
                            if (!None$.MODULE$.equals(schema)) {
                                throw new MatchError(schema);
                            }
                            Right input3 = xMLExtractStage.input();
                            if (input3 instanceof Right) {
                                throw new Exception(new API.EmptySchemaExtractError(new Some((String) input3.value())).getMessage());
                            }
                            if (input3 instanceof Left) {
                                throw new Exception(new API.EmptySchemaExtractError(None$.MODULE$).getMessage());
                            }
                            throw new MatchError(input3);
                        }
                        createDataFrame = sparkSession.createDataFrame(sparkSession.sparkContext().emptyRDD(ClassTag$.MODULE$.apply(Row.class)), (StructType) schema.value());
                    }
                } else {
                    if (!(right instanceof Left)) {
                        throw new MatchError(right);
                    }
                    API.ExtractError extractError = (Product) ((Left) right).value();
                    xMLExtractStage.stageDetail().put("records", 0);
                    if (!(schema instanceof Some)) {
                        if (None$.MODULE$.equals(schema)) {
                            throw new Exception(extractError.getMessage());
                        }
                        throw new MatchError(schema);
                    }
                    createDataFrame = sparkSession.createDataFrame(sparkSession.sparkContext().emptyRDD(ClassTag$.MODULE$.apply(Row.class)), (StructType) schema.value());
                }
                Dataset addInternalColumns = ExtractUtils$.MODULE$.addInternalColumns(createDataFrame, xMLExtractStage.contiguousIndex(), sparkSession, aRCContext);
                if (schema instanceof Some) {
                    dataset = MetadataUtils$.MODULE$.setMetadata(addInternalColumns, (StructType) schema.value(), sparkSession, logger);
                } else {
                    if (!None$.MODULE$.equals(schema)) {
                        throw new MatchError(schema);
                    }
                    dataset = addInternalColumns;
                }
                Dataset dataset3 = dataset;
                List<String> partitionBy = xMLExtractStage.partitionBy();
                if (Nil$.MODULE$.equals(partitionBy)) {
                    Some numPartitions = xMLExtractStage.numPartitions();
                    if (numPartitions instanceof Some) {
                        repartition = dataset3.repartition(BoxesRunTime.unboxToInt(numPartitions.value()));
                    } else {
                        if (!None$.MODULE$.equals(numPartitions)) {
                            throw new MatchError(numPartitions);
                        }
                        repartition = dataset3;
                    }
                } else {
                    List list = (List) partitionBy.map(str5 -> {
                        return dataset3.apply(str5);
                    }, List$.MODULE$.canBuildFrom());
                    Some numPartitions2 = xMLExtractStage.numPartitions();
                    if (numPartitions2 instanceof Some) {
                        repartition = dataset3.repartition(BoxesRunTime.unboxToInt(numPartitions2.value()), list);
                    } else {
                        if (!None$.MODULE$.equals(numPartitions2)) {
                            throw new MatchError(numPartitions2);
                        }
                        repartition = dataset3.repartition(list);
                    }
                }
                Dataset dataset4 = repartition;
                if (aRCContext.immutableViews()) {
                    dataset4.createTempView(xMLExtractStage.outputView());
                } else {
                    dataset4.createOrReplaceTempView(xMLExtractStage.outputView());
                }
                if (dataset4.isStreaming()) {
                    BoxedUnit boxedUnit = BoxedUnit.UNIT;
                } else {
                    xMLExtractStage.stageDetail().put("inputFiles", Integer.valueOf(dataset4.inputFiles().length));
                    xMLExtractStage.stageDetail().put("outputColumns", Integer.valueOf(dataset4.schema().length()));
                    xMLExtractStage.stageDetail().put("numPartitions", Integer.valueOf(dataset4.rdd().partitions().length));
                    if (xMLExtractStage.persist()) {
                        dataset4.persist(aRCContext.storageLevel());
                        xMLExtractStage.stageDetail().put("records", Long.valueOf(dataset4.count()));
                    } else {
                        BoxedUnit boxedUnit2 = BoxedUnit.UNIT;
                    }
                }
                return Option$.MODULE$.apply(dataset4);
            } catch (Exception e) {
                throw new XMLExtractStage$$anon$3(e, xMLExtractStage);
            }
        } catch (Exception e2) {
            throw new XMLExtractStage$$anon$1(e2, xMLExtractStage);
        }
    }

    public XMLExtractStage apply(XMLExtract xMLExtract, Option<String> option, String str, Option<String> option2, Either<String, List<API.ExtractColumn>> either, String str2, Either<String, String> either2, Option<String> option3, Option<API.Authentication> option4, Map<String, String> map, boolean z, Option<Object> option5, List<String> list, boolean z2, Option<String> option6) {
        return new XMLExtractStage(xMLExtract, option, str, option2, either, str2, either2, option3, option4, map, z, option5, list, z2, option6);
    }

    public Option<Tuple15<XMLExtract, Option<String>, String, Option<String>, Either<String, List<API.ExtractColumn>>, String, Either<String, String>, Option<String>, Option<API.Authentication>, Map<String, String>, Object, Option<Object>, List<String>, Object, Option<String>>> unapply(XMLExtractStage xMLExtractStage) {
        return xMLExtractStage == null ? None$.MODULE$ : new Some(new Tuple15(xMLExtractStage.m0plugin(), xMLExtractStage.id(), xMLExtractStage.name(), xMLExtractStage.description(), xMLExtractStage.schema(), xMLExtractStage.outputView(), xMLExtractStage.input(), xMLExtractStage.inputField(), xMLExtractStage.authentication(), xMLExtractStage.params(), BoxesRunTime.boxToBoolean(xMLExtractStage.persist()), xMLExtractStage.numPartitions(), xMLExtractStage.partitionBy(), BoxesRunTime.boxToBoolean(xMLExtractStage.contiguousIndex()), xMLExtractStage.xsd()));
    }

    private Object readResolve() {
        return MODULE$;
    }

    public static final /* synthetic */ void $anonfun$execute$2(String str, String str2) {
        SchemaFactory.newInstance("http://www.w3.org/2001/XMLSchema").newSchema(new StreamSource(new StringReader(str))).newValidator().validate(new StreamSource(new StringReader(str2)));
    }

    public static final /* synthetic */ void $anonfun$execute$1(Dataset dataset, String str) {
        dataset.foreach(str2 -> {
            $anonfun$execute$2(str, str2);
            return BoxedUnit.UNIT;
        });
    }

    public static final /* synthetic */ void $anonfun$execute$4(String str, String str2) {
        SchemaFactory.newInstance("http://www.w3.org/2001/XMLSchema").newSchema(new StreamSource(new StringReader(str))).newValidator().validate(new StreamSource(new StringReader(str2)));
    }

    public static final /* synthetic */ void $anonfun$execute$3(RDD rdd, String str) {
        rdd.foreach(str2 -> {
            $anonfun$execute$4(str, str2);
            return BoxedUnit.UNIT;
        });
    }

    private XMLExtractStage$() {
        MODULE$ = this;
    }
}
