package org.apache.hudi.io.storage;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import org.apache.avro.Schema;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.SparkAdapterSupport$;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.common.bloom.BloomFilter;
import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieSparkRecord;
import org.apache.hudi.common.util.FileFormatUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ParquetReaderIterator;
import org.apache.hudi.common.util.ParquetUtils;
import org.apache.hudi.common.util.TypeUtils;
import org.apache.hudi.common.util.collection.ClosableIterator;
import org.apache.hudi.common.util.collection.CloseableMappingIterator;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.hadoop.realtime.HoodieRealtimeRecordReader;
import org.apache.hudi.storage.HoodieStorage;
import org.apache.hudi.storage.StoragePath;
import org.apache.parquet.avro.AvroSchemaConverter;
import org.apache.parquet.hadoop.ParquetReader;
import org.apache.parquet.schema.MessageType;
import org.apache.spark.sql.HoodieInternalRowUtils;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.catalyst.expressions.UnsafeProjection;
import org.apache.spark.sql.catalyst.expressions.UnsafeRow;
import org.apache.spark.sql.execution.datasources.parquet.ParquetReadSupport;
import org.apache.spark.sql.execution.datasources.parquet.ParquetToSparkSchemaConverter;
import org.apache.spark.sql.execution.datasources.parquet.SparkBasicSchemaEvolution;
import org.apache.spark.sql.internal.SQLConf;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.StructType;

/* loaded from: input_file:org/apache/hudi/io/storage/HoodieSparkParquetReader.class */
public class HoodieSparkParquetReader implements HoodieSparkFileReader {
    private final StoragePath path;
    private final HoodieStorage storage;
    private final FileFormatUtils parquetUtils;
    private final List<ClosableIterator> readerIterators = new ArrayList();
    private Option<StructType> structTypeOption = Option.empty();
    private Option<Schema> schemaOption = Option.empty();

    public HoodieSparkParquetReader(HoodieStorage hoodieStorage, StoragePath storagePath) {
        this.path = storagePath;
        this.storage = hoodieStorage.newInstance(storagePath, hoodieStorage.getConf().newInstance());
        this.storage.getConf().set(AvroSchemaConverter.ADD_LIST_ELEMENT_RECORDS, HoodieRealtimeRecordReader.DEFAULT_REALTIME_SKIP_MERGE);
        this.parquetUtils = HoodieIOFactory.getIOFactory(hoodieStorage).getFileFormatUtils(HoodieFileFormat.PARQUET);
    }

    @Override // org.apache.hudi.io.storage.HoodieFileReader
    public String[] readMinMaxRecordKeys() {
        return this.parquetUtils.readMinMaxRecordKeys(this.storage, this.path);
    }

    @Override // org.apache.hudi.io.storage.HoodieFileReader
    public BloomFilter readBloomFilter() {
        return this.parquetUtils.readBloomFilterFromMetadata(this.storage, this.path);
    }

    @Override // org.apache.hudi.io.storage.HoodieFileReader
    public Set<Pair<String, Long>> filterRowKeys(Set<String> set) {
        return this.parquetUtils.filterRowKeys(this.storage, this.path, set);
    }

    @Override // org.apache.hudi.io.storage.HoodieFileReader
    public ClosableIterator<HoodieRecord<InternalRow>> getRecordIterator(Schema schema, Schema schema2) throws IOException {
        return getRecordIterator(schema2);
    }

    @Override // org.apache.hudi.io.storage.HoodieFileReader
    public ClosableIterator<HoodieRecord<InternalRow>> getRecordIterator(Schema schema) throws IOException {
        return new CloseableMappingIterator(getUnsafeRowIterator(schema), unsafeRow -> {
            return (HoodieRecord) TypeUtils.unsafeCast(new HoodieSparkRecord(unsafeRow));
        });
    }

    @Override // org.apache.hudi.io.storage.HoodieFileReader
    public ClosableIterator<String> getRecordKeyIterator() throws IOException {
        return new CloseableMappingIterator(getUnsafeRowIterator(HoodieAvroUtils.getRecordKeySchema()), unsafeRow -> {
            return ((HoodieSparkRecord) TypeUtils.unsafeCast(new HoodieSparkRecord(unsafeRow))).getRecordKey();
        });
    }

    public ClosableIterator<UnsafeRow> getUnsafeRowIterator(Schema schema) throws IOException {
        return getUnsafeRowIterator(HoodieInternalRowUtils.getCachedSchema(schema));
    }

    public ClosableIterator<UnsafeRow> getUnsafeRowIterator(StructType structType) throws IOException {
        SparkBasicSchemaEvolution sparkBasicSchemaEvolution = new SparkBasicSchemaEvolution(getStructSchema(), structType, SQLConf.get().sessionLocalTimeZone());
        String json = sparkBasicSchemaEvolution.getRequestSchema().json();
        this.storage.getConf().set("parquet.read.schema", json);
        this.storage.getConf().set(ParquetReadSupport.SPARK_ROW_REQUESTED_SCHEMA(), json);
        this.storage.getConf().set(SQLConf.PARQUET_BINARY_AS_STRING().key(), SQLConf.get().getConf(SQLConf.PARQUET_BINARY_AS_STRING()).toString());
        this.storage.getConf().set(SQLConf.PARQUET_INT96_AS_TIMESTAMP().key(), SQLConf.get().getConf(SQLConf.PARQUET_INT96_AS_TIMESTAMP()).toString());
        ParquetReader build = ParquetReader.builder(new ParquetReadSupport(), new Path(this.path.toUri())).withConf((Configuration) this.storage.getConf().unwrapAs(Configuration.class)).build();
        UnsafeProjection generateUnsafeProjection = sparkBasicSchemaEvolution.generateUnsafeProjection();
        ParquetReaderIterator parquetReaderIterator = new ParquetReaderIterator(build);
        generateUnsafeProjection.getClass();
        CloseableMappingIterator closeableMappingIterator = new CloseableMappingIterator(parquetReaderIterator, generateUnsafeProjection::apply);
        this.readerIterators.add(closeableMappingIterator);
        return closeableMappingIterator;
    }

    @Override // org.apache.hudi.io.storage.HoodieFileReader
    public Schema getSchema() {
        if (this.schemaOption.isEmpty()) {
            MessageType readSchema = ((ParquetUtils) this.parquetUtils).readSchema(this.storage, this.path);
            DataType convert = new ParquetToSparkSchemaConverter((Configuration) this.storage.getConf().unwrapAs(Configuration.class)).convert(readSchema);
            this.structTypeOption = Option.of(convert);
            this.schemaOption = Option.of(SparkAdapterSupport$.MODULE$.sparkAdapter().getAvroSchemaConverters().toAvroType(convert, true, readSchema.getName(), ""));
        }
        return this.schemaOption.get();
    }

    protected StructType getStructSchema() {
        if (this.structTypeOption.isEmpty()) {
            getSchema();
        }
        return this.structTypeOption.get();
    }

    @Override // org.apache.hudi.io.storage.HoodieFileReader, java.lang.AutoCloseable
    public void close() {
        this.readerIterators.forEach((v0) -> {
            v0.close();
        });
    }

    @Override // org.apache.hudi.io.storage.HoodieFileReader
    public long getTotalRecords() {
        return this.parquetUtils.getRowCount(this.storage, this.path);
    }
}
