/*
 * Copyright (C) 2022 HopsWorks AB.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.server.namenode.cloud.sync;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CloudProvider;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.CloudTestHelper;
import org.apache.hadoop.hdfs.DFSClient;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.HopsFilesTestHelper;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoContiguous;
import org.apache.hadoop.hdfs.server.blockmanagement.ProvidedBlocksChecker;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.CloudFsDatasetImpl;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.cloud.CloudPersistenceProviderAzureImpl;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.cloud.CloudPersistenceProviderGCSImpl;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.cloud.CloudPersistenceProviderS3Impl;
import org.apache.hadoop.hdfs.server.namenode.CloudBlockReportTestHelper;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import static org.apache.hadoop.hdfs.HopsFilesTestHelper.verifyFile;
import static org.apache.hadoop.hdfs.HopsFilesTestHelper.writeFile;
import static org.apache.hadoop.hdfs.server.namenode.cloud.failures.TestCloudDNFailures.waitDNCount;
import static org.junit.Assert.fail;

public class AppendAndSyncDNFailureHelper {
  static final Log LOG = LogFactory.getLog(TestCloudAppendAndSyncDNFailure1.class);

  MiniDFSCluster startCluster(String testname, CloudProvider defaultCloudProvider,
                              String testBucketPrefix,
                              int numDNs,
                              long blkSize) throws IOException {

    CloudTestHelper.purgeCloudData(defaultCloudProvider, testBucketPrefix);

    Logger.getRootLogger().setLevel(Level.INFO);
    Logger.getLogger(CloudFsDatasetImpl.class).setLevel(Level.ALL);
    Logger.getLogger(CloudPersistenceProviderS3Impl.class).setLevel(Level.DEBUG);
    Logger.getLogger(CloudPersistenceProviderGCSImpl.class).setLevel(Level.DEBUG);
    Logger.getLogger(CloudPersistenceProviderAzureImpl.class).setLevel(Level.DEBUG);
    Logger.getLogger(ProvidedBlocksChecker.class).setLevel(Level.DEBUG);
    Logger.getLogger(FSNamesystem.class).setLevel(Level.DEBUG);
    Logger.getLogger(DFSClient.class).setLevel(Level.DEBUG);

    Configuration conf = new Configuration();
    conf.setBoolean(DFSConfigKeys.DFS_CLOUD_STORE_SMALL_FILES_IN_DB_KEY, false);
    conf.setInt(DFSConfigKeys.DFS_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY, /*default 15*/ 1);
    conf.setInt(DFSConfigKeys.DFS_CLIENT_RETRY_MAX_ATTEMPTS_KEY, /*default 10*/ 1);
    conf.setInt(DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_KEY, /*default 500*/ 500);
    conf.setInt(DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_KEY, /*default 15000*/1000);
    conf.setInt(DFSConfigKeys.DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_KEY, /*default 0*/ 0);
    conf.setInt(DFSConfigKeys.DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_ON_SOCKET_TIMEOUTS_KEY,
      /*default 0*/0);
    conf.setInt(DFSConfigKeys.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY, /*default
    45*/ 2);
    conf.setInt(DFSConfigKeys.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, /*default 10*/ 1);
    conf.set(HdfsClientConfigKeys.Retry.POLICY_SPEC_KEY, "1000,2");

    conf.setBoolean(DFSConfigKeys.DFS_ENABLE_CLOUD_PERSISTENCE, true);
    conf.set(DFSConfigKeys.DFS_CLOUD_PROVIDER, defaultCloudProvider.name());
    conf.setInt(DFSConfigKeys.DFS_DN_CLOUD_CACHE_DELETE_ACTIVATION_PRECENTAGE_KEY, 99);
    conf.setInt(DFSConfigKeys.DFS_BR_LB_MAX_CONCURRENT_BR_PER_NN, numDNs);
    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blkSize);

    conf.setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 500);
    //if a datanode fails then the unfinished block report entry will linger for some time
    //before it is reclaimed. Untill the entry is reclaimed other datanodes will not be
    //able to block report. Reducing the BR Max process time to quickly reclaim
    //unfinished block reports
    conf.setLong(DFSConfigKeys.DFS_BR_LB_MAX_BR_PROCESSING_TIME, 5 * 1000);
    conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1L);
    conf.setLong(DFSConfigKeys.DFS_CLOUD_MARK_BLOCKS_CORRUPT_OR_MISSING_AFTER_KEY, 0);

    conf.setLong(DFSConfigKeys.DFS_DN_CLOUD_CACHE_DELETE_ACTIVATION_PRECENTAGE_KEY, 100);

    CloudTestHelper.createRandomBucket(conf, testBucketPrefix, testname);
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDNs)
      .storageTypes(CloudTestHelper.genStorageTypes(numDNs)).format(true).build();
    cluster.waitActive();
    return cluster;
  }

  public void testAppendAndHsyncDNFailure(String testname, CloudProvider defaultCloudProvider,
                                          String testBucketPrefix, boolean isSync, int blkSize,
                                          int initSize, int appendSize) throws IOException {
    MiniDFSCluster cluster = null;
    try {
      final int NUM_DN = 3;

      cluster = startCluster(testname, defaultCloudProvider, testBucketPrefix, NUM_DN, blkSize);
      DistributedFileSystem dfs = cluster.getFileSystem();
      dfs.mkdirs(new Path("/dir"));
      dfs.setStoragePolicy(new Path("/dir"), "CLOUD");

      writeFile(dfs, "/dir/file1", initSize);  // write to cloud
      int dataWritten = initSize;
      CloudTestHelper.matchMetadata(cluster.getConfiguration(0));

      LOG.info("Opening the file for append");
      FSDataOutputStream out = dfs.append(new Path("/dir/file1"));
      HopsFilesTestHelper.writeData(out, dataWritten, appendSize);
      if (isSync) {
        out.hsync();
      } else {
        out.hflush();
      }
      dataWritten += appendSize;
      LOG.info("Appended " + appendSize + " bytes");

      // -----------kill datanode-----------
      String poolId = cluster.getNamesystem().getBlockPoolId();
      int dnToKill = -1;
      for (int i = 0; i < NUM_DN; i++) {
        int activeReplicas = ((CloudFsDatasetImpl) cluster.getDataNodes().get(i).getFSDataset()).getOpenReplicasCount(poolId);
        if (activeReplicas != 0) {
          dnToKill = i;
          LOG.info("Datanode that will be killed " + cluster.getDataNodes().get(i)
            .getFSDataset().getVolumes().get(0).getBasePath());
          break;
        }
      }

      if (dnToKill != -1) {
        cluster.stopDataNode(dnToKill);
        waitDNCount(cluster, NUM_DN - 1);
      } else {
        fail("No datanode selected to kill");
      }
      LOG.info("Killed datanode");

      // -----------read test and write some more data -----------
      //first we should be able to read all the data written
      verifyFile(dfs, "/dir/file1", dataWritten);
      LOG.info("Verified file. data written " + dataWritten);

      //and write some more
      HopsFilesTestHelper.writeData(out, dataWritten, appendSize);
      if (isSync) {
        out.hsync();
      } else {
        out.hflush();
      }
      dataWritten += appendSize;
      verifyFile(dfs, "/dir/file1", dataWritten);
      LOG.info("Written " + appendSize + " bytes. Total data written " + dataWritten);

      // -----------kill a data node gain -----------
      List<BlockInfoContiguous> blks = new ArrayList<>(CloudTestHelper.findAllBlocks().values());
      assert blks.size() >= 1;
      Collections.sort(blks);

      Block blk = new Block(blks.get(blks.size() - 1).getBlockId(),
        blks.get(blks.size() - 1).getNumBytes(),
        blks.get(blks.size() - 1).getGenerationStamp(),
        blks.get(blks.size() - 1).getCloudBucket());
      ExtendedBlock eb = new ExtendedBlock(poolId, blk);
      LOG.info("Last block is " + blk);

      //kill datanode
      dnToKill = -1;
      for (int i = 0; i < NUM_DN - 1; i++) {
        if (cluster.getDataNodes().get(i).getFSDataset().getReplica(eb) != null) {
          dnToKill = i;
          LOG.info("Choosing DN: " + cluster.getDataNodes().get(i).getFSDataset().getVolumes().get(0).getBasePath() + " to kill");
          break;
        }
      }
      if (dnToKill != -1) {
        cluster.stopDataNode(dnToKill);
        waitDNCount(cluster, NUM_DN - 2);
      } else {
        fail("No datanode selected to kill");
      }
      LOG.info("Killed datanode");

      // -----------read and write some more data -----------
      HopsFilesTestHelper.writeData(out, dataWritten, appendSize);
      dataWritten += appendSize;
      LOG.info("Written " + appendSize + " bytes. Total data written " + dataWritten);

      out.close();
      verifyFile(dfs, "/dir/file1", dataWritten);

      ProvidedBlocksChecker pbc =
        cluster.getNamesystem().getBlockManager().getProvidedBlocksChecker();
      long count = pbc.getProvidedBlockReportsCount();
      pbc.scheduleBlockReportNow();
      long ret = CloudBlockReportTestHelper.waitForBRCompletion(pbc, count + 1);
      Thread.sleep(5000);

      verifyFile(dfs, "/dir/file1", dataWritten);
      CloudTestHelper.matchMetadata(cluster.getConfiguration(0));

    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    } finally {
      if (cluster != null) {
        cluster.shutdown();
      }
    }
  }
}
