package org.apache.iceberg.actions;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.iceberg.BaseMetastoreTableOperations;
import org.apache.iceberg.BaseTable;
import org.apache.iceberg.CatalogUtil;
import org.apache.iceberg.ManifestFiles;
import org.apache.iceberg.MetadataTableType;
import org.apache.iceberg.Snapshot;
import org.apache.iceberg.StaticTableOperations;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableMetadata;
import org.apache.iceberg.TableOperations;
import org.apache.iceberg.common.DynMethods;
import org.apache.iceberg.io.ClosingIterator;
import org.apache.iceberg.io.FileIO;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.spark.SparkUtil;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.broadcast.Broadcast;
import org.apache.spark.sql.DataFrameReader;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;

/* JADX INFO: Access modifiers changed from: package-private */
/* loaded from: input_file:org/apache/iceberg/actions/BaseSparkAction.class */
public abstract class BaseSparkAction<R> implements Action<R> {
    private static final DynMethods.UnboundMethod LOAD_CATALOG = DynMethods.builder("loadCatalogMetadataTable").hiddenImpl("org.apache.iceberg.spark.Spark3Util", SparkSession.class, String.class, MetadataTableType.class).orNoop().build();

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/iceberg/actions/BaseSparkAction$ReadManifest.class */
    public static class ReadManifest implements FlatMapFunction<ManifestFileBean, String> {
        private final Broadcast<FileIO> io;

        ReadManifest(Broadcast<FileIO> broadcast) {
            this.io = broadcast;
        }

        public Iterator<String> call(ManifestFileBean manifestFileBean) {
            return new ClosingIterator(ManifestFiles.readPaths(manifestFileBean, (FileIO) this.io.getValue()).iterator());
        }
    }

    protected abstract Table table();

    private List<String> getManifestListPaths(Iterable<Snapshot> iterable) {
        ArrayList newArrayList = Lists.newArrayList();
        Iterator<Snapshot> it = iterable.iterator();
        while (it.hasNext()) {
            String manifestListLocation = it.next().manifestListLocation();
            if (manifestListLocation != null) {
                newArrayList.add(manifestListLocation);
            }
        }
        return newArrayList;
    }

    private List<String> getOtherMetadataFilePaths(TableOperations tableOperations) {
        ArrayList newArrayList = Lists.newArrayList();
        newArrayList.add(tableOperations.metadataFileLocation("version-hint.text"));
        TableMetadata current = tableOperations.current();
        newArrayList.add(current.metadataFileLocation());
        Iterator<TableMetadata.MetadataLogEntry> it = current.previousFiles().iterator();
        while (it.hasNext()) {
            newArrayList.add(it.next().file());
        }
        return newArrayList;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Dataset<Row> buildValidDataFileDF(SparkSession sparkSession) {
        return buildValidDataFileDF(sparkSession, table().name());
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Dataset<Row> buildValidDataFileDF(SparkSession sparkSession, String str) {
        return loadMetadataTable(sparkSession, str, table().location(), MetadataTableType.ALL_MANIFESTS).selectExpr(new String[]{"path", "length", "partition_spec_id as partitionSpecId", "added_snapshot_id as addedSnapshotId"}).dropDuplicates("path", new String[0]).repartition(sparkSession.sessionState().conf().numShufflePartitions()).as(Encoders.bean(ManifestFileBean.class)).flatMap(new ReadManifest(new JavaSparkContext(sparkSession.sparkContext()).broadcast(SparkUtil.serializableFileIO(table()))), Encoders.STRING()).toDF(new String[]{"file_path"});
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Dataset<Row> buildManifestFileDF(SparkSession sparkSession, String str) {
        return loadMetadataTable(sparkSession, str, table().location(), MetadataTableType.ALL_MANIFESTS).selectExpr(new String[]{"path as file_path"});
    }

    protected Dataset<Row> buildManifestListDF(SparkSession sparkSession, Table table) {
        return sparkSession.createDataset(getManifestListPaths(table.snapshots()), Encoders.STRING()).toDF(new String[]{"file_path"});
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Dataset<Row> buildManifestListDF(SparkSession sparkSession, String str) {
        return buildManifestListDF(sparkSession, new BaseTable(new StaticTableOperations(str, table().io()), table().name()));
    }

    protected Dataset<Row> buildOtherMetadataFileDF(SparkSession sparkSession, TableOperations tableOperations) {
        return sparkSession.createDataset(getOtherMetadataFilePaths(tableOperations), Encoders.STRING()).toDF(new String[]{"file_path"});
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Dataset<Row> buildValidMetadataFileDF(SparkSession sparkSession, Table table, TableOperations tableOperations) {
        Dataset<Row> buildManifestFileDF = buildManifestFileDF(sparkSession, table.name());
        return buildManifestFileDF.union(buildOtherMetadataFileDF(sparkSession, tableOperations)).union(buildManifestListDF(sparkSession, table));
    }

    private static Dataset<Row> loadCatalogMetadataTable(SparkSession sparkSession, String str, MetadataTableType metadataTableType) {
        Preconditions.checkArgument(!LOAD_CATALOG.isNoop(), "Cannot find Spark3Util class but Spark3 is in use");
        return (Dataset) LOAD_CATALOG.asStatic().invoke(sparkSession, str, metadataTableType);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static Dataset<Row> loadMetadataTable(SparkSession sparkSession, String str, String str2, MetadataTableType metadataTableType) {
        Dataset<Row> loadCatalogMetadataTable;
        DataFrameReader format = sparkSession.read().format(BaseMetastoreTableOperations.ICEBERG_TABLE_TYPE_VALUE);
        if (str.contains("/")) {
            return format.load(str + "#" + metadataTableType);
        }
        if (sparkSession.version().startsWith("3") && (loadCatalogMetadataTable = loadCatalogMetadataTable(sparkSession, str, metadataTableType)) != null) {
            return loadCatalogMetadataTable;
        }
        if (str.startsWith("hadoop.")) {
            return format.load(str2 + "#" + metadataTableType);
        }
        if (str.startsWith(CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE)) {
            return format.load(str.replaceFirst("hive\\.", "") + "." + metadataTableType);
        }
        throw new IllegalArgumentException(String.format("Cannot find the metadata table for %s of type %s", str, metadataTableType));
    }
}
