/*
 * Copyright (C) 2022 HopsWorks
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.server.namenode.cloud.failures;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CloudProvider;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.CloudTestHelper;
import org.apache.hadoop.hdfs.DFSClientFaultInjector;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.HopsFilesTestHelper;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.server.blockmanagement.ProvidedBlocksChecker;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.CloudFsDatasetImpl;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.cloud.CloudPersistenceProviderS3Impl;
import org.apache.hadoop.hdfs.server.namenode.CloudBlockReportTestHelper;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.cloud.TestClouds;
import org.apache.http.impl.conn.Wire;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.junit.AfterClass;
import org.junit.ClassRule;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestName;
import org.junit.rules.Timeout;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.mockito.Mockito;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;

import java.io.IOException;
import java.util.Collection;

import static org.apache.hadoop.hdfs.HopsFilesTestHelper.verifyFile;
import static org.apache.hadoop.hdfs.HopsFilesTestHelper.writeFile;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

@RunWith(Parameterized.class)
public class TestCloudPacketCorruption {

  static final Log LOG = LogFactory.getLog(TestCloudPacketCorruption.class);

  static String testBucketPrefix = "hops-test-TCPC";

  @Parameterized.Parameters
  public static Collection<Object> configs() {
    return TestClouds.CloudProviders;
  }

  CloudProvider defaultCloudProvider = null;

  public TestCloudPacketCorruption(CloudProvider cloudProvider) {
    this.defaultCloudProvider = cloudProvider;
  }

  @Rule
  public TestName testname = new TestName();

  @ClassRule
  public static Timeout classTimeout = Timeout.seconds(60*15);

  @Rule
  public Timeout timeout = Timeout.seconds(60*15);

  /*
  Corruption in first bock. Write will fail and the file will be empty
   */
  @Test
  public void TestSimplePacketCorruption1() throws IOException {
    Logger.getLogger(CloudFsDatasetImpl.class).setLevel(Level.ALL);
    Logger.getLogger(CloudPersistenceProviderS3Impl.class).setLevel(Level.DEBUG);
    Logger.getLogger(ProvidedBlocksChecker.class).setLevel(Level.DEBUG);
    Logger.getLogger(Wire.class).setLevel(Level.ERROR);
    Logger.getRootLogger().setLevel(Level.INFO);
    CloudTestHelper.purgeCloudData(defaultCloudProvider, testBucketPrefix);
    MiniDFSCluster cluster = null;
    DFSClientFaultInjector oldInjector = null;
    try {
      Configuration conf = new HdfsConfiguration();
      final int BLK_SIZE = 10 * 1024 * 1024;
      final int NUM_DN = 5;

      conf.setBoolean(DFSConfigKeys.DFS_ENABLE_CLOUD_PERSISTENCE, true);
      conf.set(DFSConfigKeys.DFS_CLOUD_PROVIDER, defaultCloudProvider.name());
      conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLK_SIZE);
      conf.setInt(DFSConfigKeys.DFS_BR_LB_MAX_CONCURRENT_BR_PER_NN, NUM_DN);
      CloudTestHelper.createRandomBucket(conf, testBucketPrefix, testname);

      conf.setLong(DFSConfigKeys.DFS_CLOUD_BLOCK_REPORT_THREAD_SLEEP_INTERVAL_KEY, 1000);
      conf.setInt(DFSConfigKeys.DFS_CLOUD_MARK_BLOCKS_CORRUPT_OR_MISSING_AFTER_KEY, 0);

      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DN).
        storageTypes(CloudTestHelper.genStorageTypes(NUM_DN)).format(true).build();
      cluster.waitActive();
      cluster.setLeasePeriod(5 * 1000, 15 * 1000);

      DFSClientFaultInjector faultInjector
        = Mockito.mock(DFSClientFaultInjector.class);
      oldInjector = DFSClientFaultInjector.instance;
      DFSClientFaultInjector.instance = faultInjector;

      DistributedFileSystem dfs = cluster.getFileSystem();

      dfs.mkdirs(new Path("/dir"));
      dfs.setStoragePolicy(new Path("/dir"), "CLOUD");


      final int[] corruptionCount = {0};
      Answer answer = new Answer() {
        @Override
        public Object answer(InvocationOnMock invocationOnMock) throws Throwable {

          corruptionCount[0]++;
          if (corruptionCount[0] < 2) {
            LOG.info("HopsFS-Cloud. Corrupting the packet");
            return true;
          } else {
            return false;
          }
        }
      };
      Mockito.doAnswer(answer).when(faultInjector).corruptPacket();

      int initialSize = FSNamesystem.getMaxSmallFileSize() + 1;
      FSDataOutputStream out = null;
      try {
        out = dfs.create(new Path("/dir/file1"));
        HopsFilesTestHelper.writeData(out, 0, initialSize);
        out.close();
        fail();
      } catch (Exception e) {
        LOG.info(e, e);
        //expected to fail
      }


      // the client has faild. wait for lease recovery
      long startTime = System.currentTimeMillis();
      while (true) {
        if ((System.currentTimeMillis() - startTime) < 60 * 1000) {
          if (cluster.getNamesystem().getLeaseManager().countLease() == 0) {
            break;
          }
          LOG.info("Waiting for lease recovery ");
          Thread.sleep(1000);
        } else {
          fail("lease recovery failed");
        }
      }


      ProvidedBlocksChecker pbc =
        cluster.getNamesystem().getBlockManager().getProvidedBlocksChecker();
      long expectedBRCount = pbc.getProvidedBlockReportsCount() + 1;
      pbc.scheduleBlockReportNow();
      long ret = CloudBlockReportTestHelper.waitForBRCompletion(pbc, expectedBRCount);
      assertTrue("Exptected " + expectedBRCount + ". Got: " + ret, ret == expectedBRCount);

      Thread.sleep(10000); //wait for deleting work to be compeleted by the DNs

      verifyFile(dfs, "/dir/file1", 0);
      CloudTestHelper.matchMetadata(conf);
      assert CloudTestHelper.findAllBlocks().size() == 0;
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    } finally {
      if (oldInjector != null) {
        DFSClientFaultInjector.instance = oldInjector;
      }
      if (cluster != null) {
        cluster.shutdown();
      }
    }
  }

  /*
  write half block correctly and corrupt the rest of the data. should end up with empty file
   */
  @Test
  public void TestSimplePacketCorruption2() throws IOException {
    Logger.getLogger(CloudFsDatasetImpl.class).setLevel(Level.ALL);
    Logger.getLogger(CloudPersistenceProviderS3Impl.class).setLevel(Level.DEBUG);
    Logger.getLogger(ProvidedBlocksChecker.class).setLevel(Level.DEBUG);
    Logger.getLogger(Wire.class).setLevel(Level.ERROR);
    Logger.getRootLogger().setLevel(Level.INFO);
    CloudTestHelper.purgeCloudData(defaultCloudProvider, testBucketPrefix);
    MiniDFSCluster cluster = null;
    DFSClientFaultInjector oldInjector = null;
    try {
      Configuration conf = new HdfsConfiguration();
      final int BLK_SIZE = 10 * 1024 * 1024;
      final int NUM_DN = 5;

      conf.setBoolean(DFSConfigKeys.DFS_ENABLE_CLOUD_PERSISTENCE, true);
      conf.set(DFSConfigKeys.DFS_CLOUD_PROVIDER, defaultCloudProvider.name());
      conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLK_SIZE);
      conf.setInt(DFSConfigKeys.DFS_BR_LB_MAX_CONCURRENT_BR_PER_NN, NUM_DN);
      CloudTestHelper.createRandomBucket(conf, testBucketPrefix, testname);

      conf.setLong(DFSConfigKeys.DFS_CLOUD_BLOCK_REPORT_THREAD_SLEEP_INTERVAL_KEY, 1000);
      conf.setInt(DFSConfigKeys.DFS_CLOUD_MARK_BLOCKS_CORRUPT_OR_MISSING_AFTER_KEY, 0);

      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DN).
        storageTypes(CloudTestHelper.genStorageTypes(NUM_DN)).format(true).build();
      cluster.waitActive();
      cluster.setLeasePeriod(5 * 1000, 15 * 1000);

      DFSClientFaultInjector faultInjector
        = Mockito.mock(DFSClientFaultInjector.class);
      oldInjector = DFSClientFaultInjector.instance;
      DFSClientFaultInjector.instance = faultInjector;

      DistributedFileSystem dfs = cluster.getFileSystem();

      dfs.mkdirs(new Path("/dir"));
      dfs.setStoragePolicy(new Path("/dir"), "CLOUD");


      final boolean[] startCorruption = {false};
      final int[] corruptionCount = {0};
      Answer answer = new Answer() {
        @Override
        public Object answer(InvocationOnMock invocationOnMock) throws Throwable {

          if (startCorruption[0] && corruptionCount[0] < 1) {
            corruptionCount[0]++;
            LOG.info("HopsFS-Cloud. Corrupting the packet");
            return true;
          } else {
            return false;
          }
        }
      };
      Mockito.doAnswer(answer).when(faultInjector).corruptPacket();

      int initialSize = 100 * 1024;
      FSDataOutputStream out = null;
      try {
        out = dfs.create(new Path("/dir/file1"));
        HopsFilesTestHelper.writeData(out, 0, initialSize);
        startCorruption[0] = true;
        HopsFilesTestHelper.writeData(out, 0, initialSize);
        out.close();
        fail();
      } catch (Exception e) {
        LOG.info(e, e);
        //expected to fail
      }


      // the client has faild. wait for lease recovery
      long startTime = System.currentTimeMillis();
      while (true) {
        if ((System.currentTimeMillis() - startTime) < 60 * 1000) {
          if (cluster.getNamesystem().getLeaseManager().countLease() == 0) {
            break;
          }
          LOG.info("Waiting for lease recovery ");
          Thread.sleep(1000);
        } else {
          fail("lease recovery failed");
        }
      }


      ProvidedBlocksChecker pbc =
        cluster.getNamesystem().getBlockManager().getProvidedBlocksChecker();
      long expectedBRCount = pbc.getProvidedBlockReportsCount() + 1;
      pbc.scheduleBlockReportNow();
      long ret = CloudBlockReportTestHelper.waitForBRCompletion(pbc, expectedBRCount);
      assertTrue("Exptected " + expectedBRCount + ". Got: " + ret, ret == expectedBRCount);

      Thread.sleep(10000); //wait for deleting work to be compeleted by the DNs

      verifyFile(dfs, "/dir/file1", 0);
      CloudTestHelper.matchMetadata(conf);
      assert CloudTestHelper.findAllBlocks().size() == 0;
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    } finally {
      if (oldInjector != null) {
        DFSClientFaultInjector.instance = oldInjector;
      }
      if (cluster != null) {
        cluster.shutdown();
      }
    }
  }

  /*
 corruption happens after compeleting one block
   */
  @Test
  public void TestSimplePacketCorruption3() throws IOException {
    Logger.getLogger(CloudFsDatasetImpl.class).setLevel(Level.ALL);
    Logger.getLogger(CloudPersistenceProviderS3Impl.class).setLevel(Level.DEBUG);
    Logger.getLogger(ProvidedBlocksChecker.class).setLevel(Level.DEBUG);
    Logger.getLogger(Wire.class).setLevel(Level.ERROR);
    Logger.getRootLogger().setLevel(Level.INFO);
    CloudTestHelper.purgeCloudData(defaultCloudProvider, testBucketPrefix);
    MiniDFSCluster cluster = null;
    DFSClientFaultInjector oldInjector = null;
    try {
      Configuration conf = new HdfsConfiguration();
      final int BLK_SIZE = 5 * 1024 * 1024;
      final int NUM_DN = 5;

      conf.setBoolean(DFSConfigKeys.DFS_ENABLE_CLOUD_PERSISTENCE, true);
      conf.set(DFSConfigKeys.DFS_CLOUD_PROVIDER, defaultCloudProvider.name());
      conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLK_SIZE);
      conf.setInt(DFSConfigKeys.DFS_BR_LB_MAX_CONCURRENT_BR_PER_NN, NUM_DN);
      CloudTestHelper.createRandomBucket(conf, testBucketPrefix, testname);

      conf.setLong(DFSConfigKeys.DFS_CLOUD_BLOCK_REPORT_THREAD_SLEEP_INTERVAL_KEY, 1000);
      conf.setInt(DFSConfigKeys.DFS_CLOUD_MARK_BLOCKS_CORRUPT_OR_MISSING_AFTER_KEY, 0);

      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DN).
        storageTypes(CloudTestHelper.genStorageTypes(NUM_DN)).format(true).build();
      cluster.waitActive();
      cluster.setLeasePeriod(5 * 1000, 15 * 1000);

      DFSClientFaultInjector faultInjector
        = Mockito.mock(DFSClientFaultInjector.class);
      oldInjector = DFSClientFaultInjector.instance;
      DFSClientFaultInjector.instance = faultInjector;

      DistributedFileSystem dfs = cluster.getFileSystem();

      dfs.mkdirs(new Path("/dir"));
      dfs.setStoragePolicy(new Path("/dir"), "CLOUD");


      final boolean[] startCorruption = {false};
      final int[] corruptionCount = {0};
      Answer answer = new Answer() {
        @Override
        public Object answer(InvocationOnMock invocationOnMock) throws Throwable {
          if (startCorruption[0] && corruptionCount[0] < 1) {
            corruptionCount[0]++;
            LOG.info("HopsFS-Cloud. Corrupting the packet");
            return true;
          } else {
            return false;
          }
        }
      };
      Mockito.doAnswer(answer).when(faultInjector).corruptPacket();

      int initialSize = BLK_SIZE + 100 * 1024;
      FSDataOutputStream out = null;
      try {
        out = dfs.create(new Path("/dir/file1"));
        HopsFilesTestHelper.writeData(out, 0, initialSize);
        // make sure that first block has been written.
        // otherwise the fault injector will corrupt the packets
        // for the first block
        Thread.sleep(5000);
        startCorruption[0] = true;
        HopsFilesTestHelper.writeData(out, initialSize, BLK_SIZE);
        out.close();
        fail();
      } catch (Exception e) {
        LOG.info(e, e);
        //expected to fail
      }


      // the client has faild. wait for lease recovery
      long startTime = System.currentTimeMillis();
      while (true) {
        if ((System.currentTimeMillis() - startTime) < 60 * 1000) {
          if (cluster.getNamesystem().getLeaseManager().countLease() == 0) {
            break;
          }
          LOG.info("Waiting for lease recovery ");
          Thread.sleep(1000);
        } else {
          fail("lease recovery failed");
        }
      }


      ProvidedBlocksChecker pbc =
        cluster.getNamesystem().getBlockManager().getProvidedBlocksChecker();
      long expectedBRCount = pbc.getProvidedBlockReportsCount() + 1;
      pbc.scheduleBlockReportNow();
      long ret = CloudBlockReportTestHelper.waitForBRCompletion(pbc, expectedBRCount);
      assertTrue("Exptected " + expectedBRCount + ". Got: " + ret, ret == expectedBRCount);

      Thread.sleep(10000); //wait for deleting work to be compeleted by the DNs

      verifyFile(dfs, "/dir/file1", BLK_SIZE);
      CloudTestHelper.matchMetadata(conf);
      assert CloudTestHelper.findAllBlocks().size() == 1;
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    } finally {
      if (oldInjector != null) {
        DFSClientFaultInjector.instance = oldInjector;
      }
      if (cluster != null) {
        cluster.shutdown();
      }
    }
  }

  @Test
  public void TestAppendPacketCorruption() throws IOException {
    Logger.getLogger(CloudFsDatasetImpl.class).setLevel(Level.ALL);
    Logger.getLogger(CloudPersistenceProviderS3Impl.class).setLevel(Level.DEBUG);
    Logger.getLogger(ProvidedBlocksChecker.class).setLevel(Level.DEBUG);
    Logger.getLogger(Wire.class).setLevel(Level.ERROR);
    Logger.getRootLogger().setLevel(Level.INFO);
    CloudTestHelper.purgeCloudData(defaultCloudProvider, testBucketPrefix);
    MiniDFSCluster cluster = null;
    DFSClientFaultInjector oldInjector = null;
    try {
      Configuration conf = new HdfsConfiguration();
      final int BLK_SIZE = 10 * 1024 * 1024;
      final int NUM_DN = 5;

      conf.setBoolean(DFSConfigKeys.DFS_ENABLE_CLOUD_PERSISTENCE, true);
      conf.set(DFSConfigKeys.DFS_CLOUD_PROVIDER, defaultCloudProvider.name());
      conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLK_SIZE);
      conf.setInt(DFSConfigKeys.DFS_BR_LB_MAX_CONCURRENT_BR_PER_NN, NUM_DN);
      CloudTestHelper.createRandomBucket(conf, testBucketPrefix, testname);

      conf.setLong(DFSConfigKeys.DFS_CLOUD_BLOCK_REPORT_THREAD_SLEEP_INTERVAL_KEY, 1000);
      conf.setInt(DFSConfigKeys.DFS_CLOUD_MARK_BLOCKS_CORRUPT_OR_MISSING_AFTER_KEY, 0);

      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DN).
        storageTypes(CloudTestHelper.genStorageTypes(NUM_DN)).format(true).build();
      cluster.waitActive();
      cluster.setLeasePeriod(5 * 1000, 15 * 1000);

      DFSClientFaultInjector faultInjector
        = Mockito.mock(DFSClientFaultInjector.class);
      oldInjector = DFSClientFaultInjector.instance;
      DFSClientFaultInjector.instance = faultInjector;

      DistributedFileSystem dfs = cluster.getFileSystem();

      dfs.mkdirs(new Path("/dir"));
      dfs.setStoragePolicy(new Path("/dir"), "CLOUD");

      int initialSize = FSNamesystem.getMaxSmallFileSize() + 1;
      writeFile(dfs, "/dir/file1", initialSize);  // write to cloud

      final int APPEND_SIZE = 64 * 1024;
      final int[] corruptionCount = {0};
      Answer answer = new Answer() {
        @Override
        public Object answer(InvocationOnMock invocationOnMock) throws Throwable {

          corruptionCount[0]++;
          if (corruptionCount[0] < 2) {
            LOG.info("HopsFS-Cloud. Corrupting the packet");
            return true;
          } else {
            return false;
          }
        }
      };
      Mockito.doAnswer(answer).when(faultInjector).corruptPacket();
      FSDataOutputStream out = null;
      try {
        out = dfs.append(new Path("/dir/file1"));
        HopsFilesTestHelper.writeData(out, initialSize, APPEND_SIZE);
        out.close();
        fail();
      } catch (Exception e) {
        LOG.info(e, e);
        //expected to fail
      }


      // the client has faild. wait for lease recovery
      long startTime = System.currentTimeMillis();
      while (true) {
        if ((System.currentTimeMillis() - startTime) < 60 * 1000) {
          if (cluster.getNamesystem().getLeaseManager().countLease() == 0) {
            break;
          }
          LOG.info("Waiting for lease recovery ");
          Thread.sleep(1000);
        } else {
          fail("lease recovery failed");
        }
      }


      ProvidedBlocksChecker pbc =
        cluster.getNamesystem().getBlockManager().getProvidedBlocksChecker();
      long expectedBRCount = pbc.getProvidedBlockReportsCount() + 1;
      pbc.scheduleBlockReportNow();
      long ret = CloudBlockReportTestHelper.waitForBRCompletion(pbc, expectedBRCount);
      assertTrue("Exptected " + expectedBRCount + ". Got: " + ret, ret == expectedBRCount);

      Thread.sleep(10000); //wait for deleting work to be compeleted by the DNs

      verifyFile(dfs, "/dir/file1", initialSize);
      CloudTestHelper.matchMetadata(conf);
      assert CloudTestHelper.findAllBlocks().size() == 1;
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    } finally {
      if (oldInjector != null) {
        DFSClientFaultInjector.instance = oldInjector;
      }
      if (cluster != null) {
        cluster.shutdown();
      }
    }
  }

  @AfterClass
  public static void CleanUp() throws IOException {
    TestClouds.DeleteAllBuckets(testBucketPrefix);
  }
}
