/*
 * Copyright (C) 2022 HopsWorks
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.server.namenode.cloud.failures;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CloudProvider;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.AppendTestUtil;
import org.apache.hadoop.hdfs.BlockMissingException;
import org.apache.hadoop.hdfs.CloudTestHelper;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.server.blockmanagement.ProvidedBlocksChecker;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.ProvidedBlocksCacheCleaner;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.cloud.CloudPersistenceProviderAzureImpl;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.cloud.CloudPersistenceProviderS3Impl;
import org.apache.hadoop.hdfs.server.namenode.cloud.TestClouds;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.ClassRule;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestName;
import org.junit.rules.Timeout;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;

import java.io.IOException;
import java.util.Collection;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import static org.apache.hadoop.hdfs.server.namenode.cloud.failures.TestCloudDNFailures.sleepSeconds;
import static org.apache.hadoop.hdfs.server.namenode.cloud.failures.TestCloudDNFailures.startNewDataNode;
import static org.junit.Assert.fail;

@RunWith(Parameterized.class)
public class TestCloudFileReadFailureWithReplicas {
  static final Log LOG = AppendTestUtil.LOG;
  static String testBucketPrefix = "hops-test-TCFRFWR";

  @Before
  public void setup() {
    Logger.getLogger(ProvidedBlocksChecker.class).setLevel(Level.DEBUG);
    Logger.getLogger(CloudPersistenceProviderAzureImpl.class).setLevel(Level.DEBUG);
    Logger.getLogger(CloudPersistenceProviderS3Impl.class).setLevel(Level.DEBUG);
    Logger.getLogger(ProvidedBlocksCacheCleaner.class).setLevel(Level.WARN);
  }

  @Parameterized.Parameters
  public static Collection<Object> configs() {
    return TestClouds.CloudProviders;
  }

  CloudProvider defaultCloudProvider = null;
  public TestCloudFileReadFailureWithReplicas(CloudProvider cloudProvider) {
    this.defaultCloudProvider = cloudProvider;
  }

  @Rule
  public TestName testname = new TestName();

  @ClassRule
  public static Timeout classTimeout = Timeout.seconds(60*15);

  @Rule
  public Timeout timeout = Timeout.seconds(60*15);

  static final String DIR =
          "/" + TestCloudFileReadFailureWithReplicas.class.getSimpleName() + "/";

  {
    ((Log4JLogger) CloudPersistenceProviderS3Impl.LOG).getLogger().setLevel(Level.ALL);
  }

  /**
   * Test replace datanode on failure.
   */

  @Test
  public void TestFileReadFailureWithOneReplica() throws Exception {
    try {
      testFileReadOnDataNodeFailure(1, 3);
      fail();
    } catch (Throwable e) {
      if (!(e instanceof ExecutionException &&
              e.getCause().getCause() instanceof BlockMissingException)) {
        throw e;
      }
    }
  }

  @Test
  public void TestFileReadWithMultipleReplica() throws Exception {
    try {
      testFileReadOnDataNodeFailure(2, 3);
    } catch (Throwable e) {
      LOG.info(e, e);
      fail("No Exception was expected");
    }
  }

  public void testFileReadOnDataNodeFailure(int phantomReplication, int numWorker) throws Exception {
    final Configuration conf = new HdfsConfiguration();
    boolean enableCloud = true;
    final long BLK_SIZE = 32 * 1024 * 1024;
    ExecutorService es = Executors.newFixedThreadPool(numWorker);

    if (enableCloud) {
      CloudTestHelper.purgeCloudData(defaultCloudProvider, testBucketPrefix);
      conf.setBoolean(DFSConfigKeys.DFS_ENABLE_CLOUD_PERSISTENCE, true);
      conf.set(DFSConfigKeys.DFS_CLOUD_PROVIDER, defaultCloudProvider.name());
      conf.setInt(DFSConfigKeys.DFS_CLOUD_MAX_PHANTOM_BLOCKS_FOR_READ_KEY, phantomReplication);
      CloudTestHelper.createRandomBucket(conf, testBucketPrefix, testname);
    }
    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLK_SIZE);

    //We want to test if the failed datanode is not marked stale and  if
    // DFS_CLOUD_MAX_PHANTOM_BLOCKS_FOR_READ_KEY is 1 then the
    // read operation should fail.
    //when read operation fails then the client reties the read operation and asks
    //new set of bock location. Here we want to return the same stale DN on failure
    //Increasing the stale time to make sure the NN always return the same stale DN
    conf.setLong(DFSConfigKeys.DFS_NAMENODE_STALE_DATANODE_INTERVAL_KEY, 5 * 60 * 1000);

    //if a datanode fails then the unfinished block report entry will linger for some time
    //before it is reclaimed. Untill the entry is reclaimed other datanodes will not be
    //able to block report. Reducing the BR Max process time to quickly reclaim
    //unfinished block reports
    conf.setLong(DFSConfigKeys.DFS_BR_LB_MAX_BR_PROCESSING_TIME, 5*1000);

    final int INITIAL_NUM_DN = 1;
    final int ADDITIONAL_NUM_DN = 1;

    final MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf).format(true).
            numDataNodes(INITIAL_NUM_DN);
    if (enableCloud) {
      builder.storageTypes(CloudTestHelper.genStorageTypes(INITIAL_NUM_DN));
    }

    final MiniDFSCluster cluster = builder.build();

    try {
      cluster.waitActive();
      final DistributedFileSystem fs = cluster.getFileSystem();
      final Path dir = new Path(DIR);

      final TestCloudDNFailures.SlowWriter[] slowWriters =
        new TestCloudDNFailures.SlowWriter[numWorker];
      final TestCloudDNFailures.SlowReader[] slowReaders =
        new TestCloudDNFailures.SlowReader[numWorker];
      final Future[] futures = new Future[numWorker];

      for (int i = 0; i < numWorker; i++) {
        //create writers to create one block per file.
        //delay is 0ms for fast writing.
        slowWriters[i] = new TestCloudDNFailures.SlowWriter(fs, new Path(dir, "file" + i), 0L,
                1 * 1024 * 1024);
        slowWriters[i].setMaxDataToWrite(BLK_SIZE);
        futures[i] = es.submit(slowWriters[i]);
      }

      for (Future f : futures) {
        f.get();
      }

      //start new datanodes
      startNewDataNode(enableCloud, ADDITIONAL_NUM_DN, cluster, conf);

      // Start slow readers
      for (int i = 0; i < numWorker; i++) {
        //create slow readers with different speed
        slowReaders[i] = new TestCloudDNFailures.SlowReader(fs, slowWriters[i].getFilepath(),
          (i + 1) * 50L,
                2 * 1024 * 1024, slowWriters[i].getFileSize());
        futures[i] = es.submit(slowReaders[i]);
      }

      //start reading
      sleepSeconds(3);

      for (int i = 0; i < cluster.getDataNodes().size(); i++) {
        LOG.info("HopsFS-Cloud. Datanode : " + i + " ID: " + cluster.getDataNodes().get(i).getDatanodeUuid());
      }
      LOG.info("HopsFS-Cloud. Storring First Datanode");
      //kill DN that stores the block
      MiniDFSCluster.DataNodeProperties dnprop = cluster.stopDataNode(0);

      //wait for the read to finish
      for (Future f : futures) {
        f.get();
      }

      //read again and check the contents for the file
      for (TestCloudDNFailures.SlowWriter s : slowWriters) {
        s.verify();
      }

    } finally {
      if (cluster != null) {
        cluster.shutdown();
      }
    }
  }


  @AfterClass
  public static void CleanUp() throws IOException {
    TestClouds.DeleteAllBuckets(testBucketPrefix);
  }
}
