package org.apache.spark.sql.hudi;

import io.hops.hudi.com.esotericsoftware.reflectasm.shaded.org.objectweb.asm.Opcodes;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
import org.apache.hudi.exception.HoodieException;
import org.apache.log4j.Logger;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import scala.Enumeration;
import scala.MatchError;
import scala.Predef$;
import scala.Tuple2;
import scala.collection.IterableLike;
import scala.collection.JavaConversions$;
import scala.collection.TraversableOnce;
import scala.collection.immutable.Iterable$;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.Buffer;
import scala.collection.mutable.Buffer$;
import scala.collection.mutable.HashMap;
import scala.collection.mutable.HashSet;
import scala.collection.mutable.HashSet$;
import scala.collection.mutable.ListBuffer;
import scala.collection.mutable.Set;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.LongRef;

/* compiled from: DedupeSparkJob.scala */
@ScalaSignature(bytes = "\u0006\u0001\u0005%c\u0001B\t\u0013\u0001uA\u0001\u0002\n\u0001\u0003\u0002\u0003\u0006I!\n\u0005\ta\u0001\u0011\t\u0011)A\u0005K!A\u0011\u0007\u0001B\u0001B\u0003%Q\u0005\u0003\u00053\u0001\t\u0005\t\u0015!\u00034\u0011!9\u0004A!A!\u0002\u0013A\u0004\u0002C \u0001\u0005\u0003\u0005\u000b\u0011\u0002!\t\u000b%\u0003A\u0011\u0001&\t\u000fI\u0003!\u0019!C\u0001'\"1q\u000b\u0001Q\u0001\nQCq\u0001\u0017\u0001C\u0002\u0013\u0005\u0011\f\u0003\u0004a\u0001\u0001\u0006IA\u0017\u0005\u0006C\u0002!\tA\u0019\u0005\u0006i\u0002!I!\u001e\u0005\b\u0003\u0007\u0001A\u0011BA\u0003\u0011\u001d\ti\u0002\u0001C\u0001\u0003?A\u0011\"!\r\u0001#\u0003%\t!a\r\u0003\u001d\u0011+G-\u001e9f'B\f'o\u001b&pE*\u00111\u0003F\u0001\u0005QV$\u0017N\u0003\u0002\u0016-\u0005\u00191/\u001d7\u000b\u0005]A\u0012!B:qCJ\\'BA\r\u001b\u0003\u0019\t\u0007/Y2iK*\t1$A\u0002pe\u001e\u001c\u0001a\u0005\u0002\u0001=A\u0011qDI\u0007\u0002A)\t\u0011%A\u0003tG\u0006d\u0017-\u0003\u0002$A\t1\u0011I\\=SK\u001a\f\u0001BY1tKB\u000bG\u000f\u001b\t\u0003M5r!aJ\u0016\u0011\u0005!\u0002S\"A\u0015\u000b\u0005)b\u0012A\u0002\u001fs_>$h(\u0003\u0002-A\u00051\u0001K]3eK\u001aL!AL\u0018\u0003\rM#(/\u001b8h\u0015\ta\u0003%A\fekBd\u0017nY1uK\u0012\u0004\u0016M\u001d;ji&|g\u000eU1uQ\u0006\u0001\"/\u001a9bSJ|U\u000f\u001e9viB\u000bG\u000f[\u0001\u000bgFd7i\u001c8uKb$\bC\u0001\u001b6\u001b\u0005!\u0012B\u0001\u001c\u0015\u0005)\u0019\u0016\u000bT\"p]R,\u0007\u0010^\u0001\u0003MN\u0004\"!O\u001f\u000e\u0003iR!aN\u001e\u000b\u0005qB\u0012A\u00025bI>|\u0007/\u0003\u0002?u\tQa)\u001b7f'f\u001cH/Z7\u0002\u0015\u0011,G-\u001e9f)f\u0004X\r\u0005\u0002B\u000b:\u0011!iQ\u0007\u0002%%\u0011AIE\u0001\u000b\t\u0016$U\u000f]3UsB,\u0017B\u0001$H\u0005\u00151\u0016\r\\;f\u0013\tA\u0005EA\u0006F]VlWM]1uS>t\u0017A\u0002\u001fj]&$h\bF\u0004L\u00196su\nU)\u0011\u0005\t\u0003\u0001\"\u0002\u0013\b\u0001\u0004)\u0003\"\u0002\u0019\b\u0001\u0004)\u0003\"B\u0019\b\u0001\u0004)\u0003\"\u0002\u001a\b\u0001\u0004\u0019\u0004\"B\u001c\b\u0001\u0004A\u0004\"B \b\u0001\u0004\u0001\u0015aC:qCJ\\\u0007*\u001a7qKJ,\u0012\u0001\u0016\t\u0003\u0005VK!A\u0016\n\u0003\u0017M\u0003\u0018M]6IK2\u0004XM]\u0001\rgB\f'o\u001b%fYB,'\u000fI\u0001\u0004\u0019>;U#\u0001.\u0011\u0005msV\"\u0001/\u000b\u0005uC\u0012!\u00027pORR\u0017BA0]\u0005\u0019aunZ4fe\u0006!AjT$!\u000319W\r\u001e#va\u0016\\U-\u001f#G)\t\u0019'\u000f\u0005\u0002e_:\u0011Q-\u001c\b\u0003M2t!aZ6\u000f\u0005!TgB\u0001\u0015j\u0013\u0005Y\u0012BA\r\u001b\u0013\t9\u0002$\u0003\u0002\u0016-%\u0011a\u000eF\u0001\ba\u0006\u001c7.Y4f\u0013\t\u0001\u0018OA\u0005ECR\fgI]1nK*\u0011a\u000e\u0006\u0005\u0006g2\u0001\r!J\u0001\bi\ndg*Y7f\u0003A\u0001H.\u00198EkBd\u0017nY1uK\u001aK\u0007\u0010F\u0001w!\u00119H0\n@\u000e\u0003aT!!\u001f>\u0002\u000f5,H/\u00192mK*\u00111\u0010I\u0001\u000bG>dG.Z2uS>t\u0017BA?y\u0005\u001dA\u0015m\u001d5NCB\u00042a^@&\u0013\r\t\t\u0001\u001f\u0002\b\u0011\u0006\u001c\bnU3u\u000359W\r\u001e#fIV\u0004X\r\u00157b]R\u0019a/a\u0002\t\u000f\u0005%a\u00021\u0001\u0002\f\u00059A-\u001e9f\u001b\u0006\u0004\bC\u0002\u0014\u0002\u000e\u0015\n\t\"C\u0002\u0002\u0010=\u00121!T1q!\u00159\u00181CA\f\u0013\r\t)\u0002\u001f\u0002\u0007\u0005V4g-\u001a:\u0011\u0007Q\nI\"C\u0002\u0002\u001cQ\u00111AU8x\u000351\u0017\u000e\u001f#va2L7-\u0019;fgR!\u0011\u0011EA\u0014!\ry\u00121E\u0005\u0004\u0003K\u0001#\u0001B+oSRD\u0011\"!\u000b\u0010!\u0003\u0005\r!a\u000b\u0002\r\u0011\u0014\u0018PU;o!\ry\u0012QF\u0005\u0004\u0003_\u0001#a\u0002\"p_2,\u0017M\\\u0001\u0018M&DH)\u001e9mS\u000e\fG/Z:%I\u00164\u0017-\u001e7uIE*\"!!\u000e+\t\u0005-\u0012qG\u0016\u0003\u0003s\u0001B!a\u000f\u0002F5\u0011\u0011Q\b\u0006\u0005\u0003\u007f\t\t%A\u0005v]\u000eDWmY6fI*\u0019\u00111\t\u0011\u0002\u0015\u0005tgn\u001c;bi&|g.\u0003\u0003\u0002H\u0005u\"!E;oG\",7m[3e-\u0006\u0014\u0018.\u00198dK\u0002")
/* loaded from: input_file:org/apache/spark/sql/hudi/DedupeSparkJob.class */
public class DedupeSparkJob {
    private final String basePath;
    private final String duplicatedPartitionPath;
    private final String repairOutputPath;
    private final SQLContext sqlContext;
    private final FileSystem fs;
    private final Enumeration.Value dedupeType;
    private final SparkHelper sparkHelper;
    private final Logger LOG = Logger.getLogger(getClass());

    public SparkHelper sparkHelper() {
        return this.sparkHelper;
    }

    public Logger LOG() {
        return this.LOG;
    }

    public Dataset<Row> getDupeKeyDF(String str) {
        return this.sqlContext.sql(new StringBuilder(Opcodes.IFGE).append("\n      select  `").append(HoodieRecord.RECORD_KEY_METADATA_FIELD).append("` as dupe_key,\n      count(*) as dupe_cnt\n      from ").append(str).append("\n      group by `").append(HoodieRecord.RECORD_KEY_METADATA_FIELD).append("`\n      having dupe_cnt > 1\n      ").toString());
    }

    private HashMap<String, HashSet<String>> planDuplicateFix() {
        String sb = new StringBuilder(5).append("htbl_").append(System.currentTimeMillis()).toString();
        String sb2 = new StringBuilder(9).append(sb).append("_dupeKeys").toString();
        HoodieTableMetaClient build = HoodieTableMetaClient.builder().setConf(this.fs.getConf()).setBasePath(this.basePath).build();
        Buffer buffer = (Buffer) JavaConversions$.MODULE$.deprecated$u0020asScalaBuffer((List) new HoodieTableFileSystemView(build, build.getActiveTimeline().getCommitTimeline().filterCompletedInstants(), this.fs.listStatus(new Path(new StringBuilder(1).append(this.basePath).append("/").append(this.duplicatedPartitionPath).toString()))).getLatestBaseFiles().collect(Collectors.toList())).map(hoodieBaseFile -> {
            return hoodieBaseFile.getPath();
        }, Buffer$.MODULE$.canBuildFrom());
        LOG().info(new StringBuilder(37).append(" List of files under partition: ").append(BoxedUnit.UNIT).append(" =>  ").append(buffer.mkString(" ")).toString());
        this.sqlContext.parquetFile(buffer).registerTempTable(sb);
        getDupeKeyDF(sb).registerTempTable(sb2);
        return getDedupePlan(JavaConversions$.MODULE$.deprecated$u0020asScalaBuffer(this.sqlContext.sql(new StringBuilder(208).append("\n        SELECT `_hoodie_record_key`, `_hoodie_partition_path`, `_hoodie_file_name`, `_hoodie_commit_time`\n        FROM ").append(sb).append(" h\n        JOIN ").append(sb2).append(" d\n        ON h.`_hoodie_record_key` = d.dupe_key\n                      ").toString()).collectAsList()).groupBy(row -> {
            return row.getString(0);
        }));
    }

    private HashMap<String, HashSet<String>> getDedupePlan(Map<String, Buffer<Row>> map) {
        HashMap<String, HashSet<String>> hashMap = new HashMap<>();
        map.foreach(tuple2 -> {
            $anonfun$getDedupePlan$1(this, hashMap, tuple2);
            return BoxedUnit.UNIT;
        });
        LOG().debug(new StringBuilder(32).append("fileToDeleteKeyMap size: ").append(hashMap.size()).append(", map: ").append(hashMap).toString());
        return hashMap;
    }

    public void fixDuplicates(boolean z) {
        HoodieTableMetaClient build = HoodieTableMetaClient.builder().setConf(this.fs.getConf()).setBasePath(this.basePath).build();
        Map map = ((TraversableOnce) JavaConversions$.MODULE$.deprecated$u0020asScalaBuffer((List) new HoodieTableFileSystemView(build, build.getActiveTimeline().getCommitTimeline().filterCompletedInstants(), this.fs.listStatus(new Path(new StringBuilder(1).append(this.basePath).append("/").append(this.duplicatedPartitionPath).toString()))).getLatestBaseFiles().collect(Collectors.toList())).map(hoodieBaseFile -> {
            return new Tuple2(hoodieBaseFile.getFileId(), new Path(hoodieBaseFile.getPath()));
        }, Buffer$.MODULE$.canBuildFrom())).toMap(Predef$.MODULE$.$conforms());
        HashMap<String, HashSet<String>> planDuplicateFix = planDuplicateFix();
        map.foreach(tuple2 -> {
            return BoxesRunTime.boxToBoolean($anonfun$fixDuplicates$2(this, planDuplicateFix, tuple2));
        });
        planDuplicateFix.foreach(tuple22 -> {
            return BoxesRunTime.boxToBoolean($anonfun$fixDuplicates$3(this, map, planDuplicateFix, tuple22));
        });
        this.sqlContext.read().parquet(new StringBuilder(10).append(this.repairOutputPath).append("/*.parquet").toString()).registerTempTable("fixedTbl");
        Dataset<Row> dupeKeyDF = getDupeKeyDF("fixedTbl");
        if (dupeKeyDF.count() != 0) {
            dupeKeyDF.show();
            throw new HoodieException("Still found some duplicates!!.. Inspect output");
        }
        Dataset except = sparkHelper().getDistinctKeyDF(((TraversableOnce) map.map(tuple23 -> {
            return ((Path) tuple23._2()).toString();
        }, Iterable$.MODULE$.canBuildFrom())).toList()).except(sparkHelper().getDistinctKeyDF(((TraversableOnce) map.map(tuple24 -> {
            return new StringBuilder(1).append(this.repairOutputPath).append("/").append(((Path) tuple24._2()).getName()).toString();
        }, Iterable$.MODULE$.canBuildFrom())).toList()));
        if (except.count() != 0) {
            except.show();
            throw new HoodieException("Some records in source are not found in fixed files. Inspect output!!");
        }
        Predef$.MODULE$.println("No duplicates found & counts are in check!!!! ");
        map.foreach(tuple25 -> {
            BoxedUnit boxToBoolean;
            if (tuple25 == null) {
                throw new MatchError(tuple25);
            }
            Path path = (Path) tuple25._2();
            Path path2 = new Path(new StringBuilder(1).append(this.repairOutputPath).append("/").append(path.getName()).toString());
            Path path3 = new Path(new StringBuilder(2).append(this.basePath).append("/").append(this.duplicatedPartitionPath).append("/").append(path.getName()).toString());
            if (z) {
                this.LOG().info(new StringBuilder(35).append("[JUST KIDDING!!!] Copying from ").append(path2).append(" to ").append(path3).toString());
                boxToBoolean = BoxedUnit.UNIT;
            } else {
                this.LOG().info(new StringBuilder(31).append("[FOR REAL!!!] Copying from ").append(path2).append(" to ").append(path3).toString());
                boxToBoolean = BoxesRunTime.boxToBoolean(FileUtil.copy(this.fs, path2, this.fs, path3, false, true, this.fs.getConf()));
            }
            return boxToBoolean;
        });
    }

    public boolean fixDuplicates$default$1() {
        return true;
    }

    public static final /* synthetic */ boolean $anonfun$getDedupePlan$2(HashMap hashMap, String str, Row row) {
        String str2 = ((String) row.apply(2)).split("_")[0];
        if (!hashMap.contains(str2)) {
            hashMap.update(str2, HashSet$.MODULE$.apply(Nil$.MODULE$));
        }
        return ((HashSet) hashMap.apply(str2)).add(str);
    }

    public static final /* synthetic */ void $anonfun$getDedupePlan$3(LongRef longRef, Row row) {
        long j = new StringOps(Predef$.MODULE$.augmentString((String) row.apply(3))).toLong();
        if (j > longRef.elem) {
            longRef.elem = j;
        }
    }

    public static final /* synthetic */ void $anonfun$getDedupePlan$5(LongRef longRef, Row row) {
        long j = new StringOps(Predef$.MODULE$.augmentString((String) row.apply(3))).toLong();
        if (j > longRef.elem) {
            longRef.elem = j;
        }
    }

    public static final /* synthetic */ boolean $anonfun$getDedupePlan$7(HashMap hashMap, String str, Row row) {
        String str2 = ((String) row.apply(2)).split("_")[0];
        if (!hashMap.contains(str2)) {
            hashMap.update(str2, HashSet$.MODULE$.apply(Nil$.MODULE$));
        }
        return ((HashSet) hashMap.apply(str2)).add(str);
    }

    public static final /* synthetic */ void $anonfun$getDedupePlan$1(DedupeSparkJob dedupeSparkJob, HashMap hashMap, Tuple2 tuple2) {
        if (tuple2 == null) {
            throw new MatchError(tuple2);
        }
        Tuple2 tuple22 = new Tuple2((String) tuple2._1(), (Buffer) tuple2._2());
        String str = (String) tuple22._1();
        Buffer buffer = (Buffer) tuple22._2();
        Enumeration.Value value = dedupeSparkJob.dedupeType;
        Enumeration.Value UPDATE_TYPE = DeDupeType$.MODULE$.UPDATE_TYPE();
        if (UPDATE_TYPE != null ? UPDATE_TYPE.equals(value) : value == null) {
            ((IterableLike) buffer.init()).foreach(row -> {
                return BoxesRunTime.boxToBoolean($anonfun$getDedupePlan$2(hashMap, str, row));
            });
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
            return;
        }
        Enumeration.Value INSERT_TYPE = DeDupeType$.MODULE$.INSERT_TYPE();
        if (INSERT_TYPE != null ? INSERT_TYPE.equals(value) : value == null) {
            LongRef create = LongRef.create(-1L);
            buffer.foreach(row2 -> {
                $anonfun$getDedupePlan$3(create, row2);
                return BoxedUnit.UNIT;
            });
            buffer.foreach(row3 -> {
                if (new StringOps(Predef$.MODULE$.augmentString((String) row3.apply(3))).toLong() == create.elem) {
                    return BoxedUnit.UNIT;
                }
                String str2 = ((String) row3.apply(2)).split("_")[0];
                if (!hashMap.contains(str2)) {
                    hashMap.update(str2, HashSet$.MODULE$.apply(Nil$.MODULE$));
                }
                return BoxesRunTime.boxToBoolean(((HashSet) hashMap.apply(str2)).add(str));
            });
            BoxedUnit boxedUnit2 = BoxedUnit.UNIT;
            return;
        }
        Enumeration.Value UPSERT_TYPE = DeDupeType$.MODULE$.UPSERT_TYPE();
        if (UPSERT_TYPE != null ? !UPSERT_TYPE.equals(value) : value != null) {
            throw new IllegalArgumentException("Please provide valid type for deduping!");
        }
        LongRef create2 = LongRef.create(-1L);
        buffer.foreach(row4 -> {
            $anonfun$getDedupePlan$5(create2, row4);
            return BoxedUnit.UNIT;
        });
        ListBuffer listBuffer = new ListBuffer();
        buffer.foreach(row5 -> {
            if (new StringOps(Predef$.MODULE$.augmentString((String) row5.apply(3))).toLong() == create2.elem) {
                return listBuffer.$plus$eq(row5);
            }
            String str2 = ((String) row5.apply(2)).split("_")[0];
            if (!hashMap.contains(str2)) {
                hashMap.update(str2, HashSet$.MODULE$.apply(Nil$.MODULE$));
            }
            return BoxesRunTime.boxToBoolean(((HashSet) hashMap.apply(str2)).add(str));
        });
        ((scala.collection.immutable.List) listBuffer.toList().init()).foreach(row6 -> {
            return BoxesRunTime.boxToBoolean($anonfun$getDedupePlan$7(hashMap, str, row6));
        });
        BoxedUnit boxedUnit3 = BoxedUnit.UNIT;
    }

    public static final /* synthetic */ boolean $anonfun$fixDuplicates$2(DedupeSparkJob dedupeSparkJob, HashMap hashMap, Tuple2 tuple2) {
        if (tuple2 == null) {
            throw new MatchError(tuple2);
        }
        String str = (String) tuple2._1();
        Path path = (Path) tuple2._2();
        Path path2 = new Path(new StringBuilder(1).append(dedupeSparkJob.repairOutputPath).append("/").append(path.getName()).append(hashMap.contains(str) ? ".bad" : "").toString());
        dedupeSparkJob.LOG().info(new StringBuilder(17).append("Copying from ").append(path).append(" to ").append(path2).toString());
        return FileUtil.copy(dedupeSparkJob.fs, path, dedupeSparkJob.fs, path2, false, true, dedupeSparkJob.fs.getConf());
    }

    public static final /* synthetic */ boolean $anonfun$fixDuplicates$3(DedupeSparkJob dedupeSparkJob, Map map, HashMap hashMap, Tuple2 tuple2) {
        if (tuple2 == null) {
            throw new MatchError(tuple2);
        }
        String str = (String) tuple2._1();
        String commitTime = FSUtils.getCommitTime(((Path) map.apply(str)).getName());
        Path path = new Path(new StringBuilder(5).append(dedupeSparkJob.repairOutputPath).append("/").append(((Path) map.apply(str)).getName()).append(".bad").toString());
        Path path2 = new Path(new StringBuilder(1).append(dedupeSparkJob.repairOutputPath).append("/").append(((Path) map.apply(str)).getName()).toString());
        dedupeSparkJob.LOG().info(new StringBuilder(37).append(" Skipping and writing new file for : ").append(str).toString());
        SparkHelpers$.MODULE$.skipKeysAndWriteNewFile(commitTime, dedupeSparkJob.fs, path, path2, (Set) hashMap.apply(str));
        return dedupeSparkJob.fs.delete(path, true);
    }

    public DedupeSparkJob(String str, String str2, String str3, SQLContext sQLContext, FileSystem fileSystem, Enumeration.Value value) {
        this.basePath = str;
        this.duplicatedPartitionPath = str2;
        this.repairOutputPath = str3;
        this.sqlContext = sQLContext;
        this.fs = fileSystem;
        this.dedupeType = value;
        this.sparkHelper = new SparkHelper(sQLContext, fileSystem);
    }
}
