/*
 * Copyright (C) 2022 HopsWorks AB.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.server.namenode.cloud.append;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CloudProvider;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.CloudTestHelper;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.HopsFilesTestHelper;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.server.blockmanagement.ProvidedBlocksChecker;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.CloudFsDatasetImpl;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.ProvidedBlocksCacheCleaner;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.cloud.CloudPersistenceProvider;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.cloud.CloudPersistenceProviderFactory;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.cloud.CloudPersistenceProviderS3Impl;
import org.apache.hadoop.hdfs.server.namenode.CloudBlockReportTestHelper;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer;
import org.apache.http.impl.conn.Wire;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.mockito.Mockito;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;

import java.io.IOException;

import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.mockito.Matchers.any;

@RunWith(Parameterized.class)
public class AppendClientFailureHelper {

  static final Log LOG = LogFactory.getLog(AppendClientFailureHelper.class);

  public void testCloudAppendClientFailure(String testname, CloudProvider defaultCloudProvider,
                                           String testBucketPrefix) throws IOException {
    Logger.getRootLogger().setLevel(Level.INFO);
    Logger.getLogger(CloudFsDatasetImpl.class).setLevel(Level.ALL);
    Logger.getLogger(CloudPersistenceProviderS3Impl.class).setLevel(Level.ALL);
    Logger.getLogger(ProvidedBlocksChecker.class).setLevel(Level.ALL);
    Logger.getLogger(Wire.class).setLevel(Level.ERROR);
    Logger.getLogger(ProvidedBlocksCacheCleaner.class).setLevel(Level.ERROR);
    CloudTestHelper.purgeCloudData(defaultCloudProvider, testBucketPrefix);
    MiniDFSCluster cluster = null;
    try {
      final int BLK_SIZE = 10 * 1024 * 1024;
      final int BLK_PER_FILE = 1;
      final int NUM_DN = 2;

      Configuration conf = new HdfsConfiguration();
      conf.setBoolean(DFSConfigKeys.DFS_ENABLE_CLOUD_PERSISTENCE, true);
      conf.set(DFSConfigKeys.DFS_CLOUD_PROVIDER, defaultCloudProvider.name());
      conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLK_SIZE);

      // dead datanodes
      conf.setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 2000);
      conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 2L);
      conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, 2);
      conf.setInt(DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY, 5000);
      conf.setLong(DFSConfigKeys.DFS_CLOUD_MARK_BLOCKS_CORRUPT_OR_MISSING_AFTER_KEY,
        1 * 1000);

      CloudTestHelper.createRandomBucket(conf, testBucketPrefix, testname);

      CloudPersistenceProvider cloudConnector = CloudPersistenceProviderFactory.getCloudClient(conf);

      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DN)
        .storageTypes(CloudTestHelper.genStorageTypes(NUM_DN)).format(true).build();
      cluster.waitActive();
      //set low lease recovery times for testing
      cluster.setLeasePeriod(3 * 1000, 10 * 1000);

      DistributedFileSystem dfs = cluster.getFileSystem();

      dfs.mkdirs(new Path("/dir"));
      dfs.setStoragePolicy(new Path("/dir"), "CLOUD");

      HopsFilesTestHelper.writeFile(dfs, "/dir/file", 100 * 1024);


      // append more data to the file and then kill the dn and the client
      FSDataOutputStream out = dfs.append(new Path("/dir/file"));
      //kill the client first
      dfs.getClient().getLeaseRenewer().interruptAndJoin();
      dfs.getClient().abort();
      LOG.info("Aborted the client");

      assertTrue("The NN should not have recoverd the lease for the file ",
        cluster.getNamesystem().getLeaseManager().countLease() == 1);

      // this should not fail.
      HopsFilesTestHelper.verifyFile(cluster.getNewFileSystemInstance(0), "/dir/file", 100 * 1024);

      // wait for lease recovery
      long startTime = System.currentTimeMillis();
      while (true) {
        if ((System.currentTimeMillis() - startTime) < 60 * 1000) {
          if (cluster.getNamesystem().getLeaseManager().countLease() == 0) {
            break;
          }
        }
        Thread.sleep(1000);
      }

      assertTrue("The NN should have recoverd the lease for the file ",
        cluster.getNamesystem().getLeaseManager().countLease() == 0);

      //Lease recovery deletes the cached block. wait for the DN to clean the cache
      Thread.sleep(5000);
      CloudTestHelper.matchMetadata(conf, CloudTestHelper.ExpectedErrors.EXPECTING_MISSING_CACHE_LOCS);
      HopsFilesTestHelper.verifyFile(cluster.getNewFileSystemInstance(0), "/dir/file", 100 * 1024);
      CloudTestHelper.matchMetadata(conf);
    } catch (Exception e) {
      LOG.info(e, e);
      fail(e.getMessage());
    } finally {
      if (cluster != null) {
        cluster.shutdown();
      }
    }
  }

  public void testomitResponses(String testname, CloudProvider defaultCloudProvider,
                                String testBucketPrefix, final boolean omitUpdatePipelineResponse,
                                final boolean omitUpdateBlockForPipelineResponse) throws IOException {
    Logger.getRootLogger().setLevel(Level.INFO);
    Logger.getLogger(CloudFsDatasetImpl.class).setLevel(Level.ALL);
    Logger.getLogger(CloudPersistenceProviderS3Impl.class).setLevel(Level.ALL);
    Logger.getLogger(ProvidedBlocksChecker.class).setLevel(Level.ALL);
    Logger.getLogger(Wire.class).setLevel(Level.ERROR);
    Logger.getLogger(ProvidedBlocksCacheCleaner.class).setLevel(Level.ERROR);
    CloudTestHelper.purgeCloudData(defaultCloudProvider, testBucketPrefix);
    MiniDFSCluster cluster = null;
    try {
      final int BLK_SIZE = 10 * 1024 * 1024;
      final int NUM_DN = 2;

      Configuration conf = new HdfsConfiguration();
      conf.setBoolean(DFSConfigKeys.DFS_ENABLE_CLOUD_PERSISTENCE, true);
      conf.set(DFSConfigKeys.DFS_CLOUD_PROVIDER, defaultCloudProvider.name());
      conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLK_SIZE);

      // dead datanodes
      conf.setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 2000);
      conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 2L);
      conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, 2);
      conf.setInt(DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY, 5000);
      conf.setLong(DFSConfigKeys.DFS_CLOUD_MARK_BLOCKS_CORRUPT_OR_MISSING_AFTER_KEY,
        1 * 1000);

      CloudTestHelper.createRandomBucket(conf, testBucketPrefix, testname);

      CloudPersistenceProvider cloudConnector = CloudPersistenceProviderFactory.getCloudClient(conf);

      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DN)
        .storageTypes(CloudTestHelper.genStorageTypes(NUM_DN)).format(true).build();
      cluster.waitActive();
      //set low lease recovery times for testing
      cluster.setLeasePeriod(30 * 1000, 60 * 1000);

      DistributedFileSystem dfs = cluster.getFileSystem();

      //SPY
      final FSNamesystem fsNamesystem = cluster.getNameNode().getNamesystem();
      final FSNamesystem fsNamesystemSpy = Mockito.spy(fsNamesystem);
      NameNodeRpcServer rpcServer = (NameNodeRpcServer) cluster.getNameNode().getRpcServer();
      rpcServer.setFSNamesystem(fsNamesystemSpy);

      boolean omitMessages[] = new boolean[1];
      omitMessages[0] = true;
      boolean msgReceived[] = new boolean[1];
      msgReceived[0] = false;
      Answer updateBlockForPipeline = new Answer() {
        @Override
        public Object answer(InvocationOnMock invocationOnMock) throws Throwable {
          Object ret = invocationOnMock.callRealMethod();
          msgReceived[0] = true;

          if (omitMessages[0] && omitUpdateBlockForPipelineResponse) {
            LOG.info("Processed updateBlockForPipeline. Omitting response");
            // do not return;
            Thread.sleep(Long.MAX_VALUE);
          }
          LOG.info("Processed updateBlockForPipeline.");
          return ret;
        }
      };
      Mockito.doAnswer(updateBlockForPipeline).when(fsNamesystemSpy).updateBlockForPipeline(any(), any());

      Answer updatePipeline = new Answer() {
        @Override
        public Object answer(InvocationOnMock invocationOnMock) throws Throwable {
          Object ret = invocationOnMock.callRealMethod();
          msgReceived[0] = true;

          if (omitMessages[0] && omitUpdatePipelineResponse) {
            LOG.info("Processed updatePipeline. Omitting response");
            // do not return;
            Thread.sleep(Long.MAX_VALUE);
          }
          LOG.info("Processed updatePipeline.");
          return ret;
        }
      };
      Mockito.doAnswer(updatePipeline).when(fsNamesystemSpy).updatePipeline(any(), any(),
        any(), any(), any());

      dfs.mkdirs(new Path("/dir"));
      dfs.setStoragePolicy(new Path("/dir"), "CLOUD");

      HopsFilesTestHelper.writeFile(dfs, "/dir/file", 100 * 1024);

      // append more data to the file and then kill the dn and the client
      byte buffer[] = new byte[2 * 1024 * 1024];
      FSDataOutputStream out = dfs.append(new Path("/dir/file"));
      out.write(buffer);

      long startTime = System.currentTimeMillis();
      while (true) {
        if ((System.currentTimeMillis() - startTime) < 60 * 1000) {
          if (msgReceived[0]) {
            break;
          }
          Thread.sleep(1000);
        }
      }
      assert msgReceived[0];

      // this should not fail.
      HopsFilesTestHelper.verifyFile(cluster.getNewFileSystemInstance(0), "/dir/file", 100 * 1024);

      //kill the client
      dfs.getClient().getLeaseRenewer().interruptAndJoin();
      dfs.getClient().abort();
      LOG.info("Aborted the client");

      assertTrue("The NN should not have recoverd the lease for the file ",
        cluster.getNamesystem().getLeaseManager().countLease() == 1);

      // this should not fail.
      HopsFilesTestHelper.verifyFile(cluster.getNewFileSystemInstance(0), "/dir/file", 100 * 1024);

      // wait for lease recovery
      startTime = System.currentTimeMillis();
      while (true) {
        if ((System.currentTimeMillis() - startTime) < 120 * 1000) {
          if (cluster.getNamesystem().getLeaseManager().countLease() == 0) {
            break;
          }
          Thread.sleep(1000);
        }
      }

      assertTrue("The NN should have recoverd the lease for the file ",
        cluster.getNamesystem().getLeaseManager().countLease() == 0);

      // this should not fail.
      HopsFilesTestHelper.verifyFile(cluster.getNewFileSystemInstance(0), "/dir/file", 100 * 1024);

      ProvidedBlocksChecker pbc = cluster.getNamesystem().getBlockManager().getProvidedBlocksChecker();
      long count = pbc.getProvidedBlockReportsCount();
      pbc.scheduleBlockReportNow();
      CloudBlockReportTestHelper.waitForBRCompletion(pbc, count + 1);
      Thread.sleep(5000);

      CloudTestHelper.matchMetadata(conf, CloudTestHelper.ExpectedErrors.EXPECTING_MISSING_CACHE_LOCS);
      HopsFilesTestHelper.verifyFile(cluster.getNewFileSystemInstance(0), "/dir/file", 100 * 1024);
      CloudTestHelper.matchMetadata(conf);

      //remove the spy
      rpcServer.setFSNamesystem(fsNamesystem);

      // open the file again and write some more data
      omitMessages[0] = false;
      out = cluster.getNewFileSystemInstance(0).append(new Path("/dir/file"));
      HopsFilesTestHelper.writeData(out, 100 * 1024, BLK_SIZE);
      out.close();
      HopsFilesTestHelper.verifyFile(cluster.getNewFileSystemInstance(0), "/dir/file", 100 * 1024
        + BLK_SIZE);
      CloudTestHelper.matchMetadata(conf);
    } catch (Exception e) {
      LOG.info(e, e);
      fail(e.getMessage());
    } finally {
      if (cluster != null) {
        cluster.shutdown();
      }
    }
  }
}
