package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl;

import com.google.common.annotations.VisibleForTesting;
import io.hops.metadata.hdfs.entity.CloudBucket;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CloudProvider;
import org.apache.hadoop.fs.StorageType;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.ExtendedBlockId;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.RemovedBlock;
import org.apache.hadoop.hdfs.server.common.CloudHelper;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
import org.apache.hadoop.hdfs.server.datanode.*;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.cloud.CloudPersistenceProvider;

import java.io.*;
import java.util.*;
import java.util.concurrent.*;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeReference;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.LengthInputStream;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.cloud.CloudPersistenceProviderFactory;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.cloud.PartRef;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.cloud.UploadID;
import org.apache.hadoop.hdfs.server.protocol.BlockReport;
import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.nativeio.NativeIO;
import org.apache.hadoop.util.DataChecksum;
import org.apache.hadoop.util.DiskChecker;

public class CloudFsDatasetImpl extends FsDatasetImpl {
  /**
   * An FSDataset has a directory where it loads its data files.
   *
   * @param datanode
   * @param storage
   * @param conf
   */
  public static final String GEN_STAMP = "GEN_STAMP";
  public static final String OBJECT_SIZE = "OBJECT_SIZE";
  public static final String META_FILE_SIZE = "META_FILE_SIZE";
  public static final String BLOCK_FILE_SIZE = "BLOCK_FILE_SIZE";

  static final Log LOG = LogFactory.getLog(CloudFsDatasetImpl.class);
  private CloudPersistenceProvider cloud;
  private final boolean bypassCache;
  private final int prefixSize;
  private ExecutorService threadPoolExecutor;
  private final boolean isVersioningSupported;
  private final int readCIDRetries;

  CloudFsDatasetImpl(DataNode datanode, DataStorage storage,
                     Configuration conf) throws IOException {
    super(datanode, storage, conf);
    bypassCache = conf.getBoolean(DFSConfigKeys.DFS_DN_CLOUD_BYPASS_CACHE_KEY,
            DFSConfigKeys.DFS_DN_CLOUD_BYPASS_CACHE_DEFAULT);
    prefixSize = conf.getInt(DFSConfigKeys.DFS_CLOUD_PREFIX_SIZE_KEY,
            DFSConfigKeys.DFS_CLOUD_PREFIX_SIZE_DEFAULT);
    readCIDRetries = conf.getInt(DFSConfigKeys.DFS_CLOUD_READ_CID_RETIRES_KEY,
            DFSConfigKeys.DFS_CLOUD_READ_CID_RETIRES_DEFAULT);

    cloud = CloudPersistenceProviderFactory.getCloudClient(conf);
    cloud.checkAllBuckets(CloudHelper.getBucketsFromConf(conf));
    isVersioningSupported =
            cloud.isVersioningSupported(CloudHelper.getBucketsFromConf(conf).get(0));
    threadPoolExecutor = Executors.newFixedThreadPool(cloud.getXferThreads());
    checkCID();
  }

  void checkCID() throws IOException {
    assert CloudHelper.getBucketsFromConf(conf).size() == 1;
    String bucket = CloudHelper.getBucketsFromConf(conf).get(0);
    String cid = null;
    for (int i = 0; i < readCIDRetries; i++) {
      try {
        cid = cloud.getCID(bucket);
        break;
      } catch (Exception e) {
        LOG.warn("Error reading CID. Exception: " + e);
        try {
          Thread.sleep(1000);
        } catch (InterruptedException ex) {
          throw new RuntimeException(ex);
        }
      }
    }

    if (cid == null) {
      throw new IOException("Unable to read CID from the bucket");
    }

    if (cid.compareTo(dataStorage.clusterID) != 0) {
      String msg = "ClusterID does not match. Expecting: " + dataStorage.clusterID + " Got: " + cid;
      LOG.error(msg);
      throw new IOException(msg);
    }
  }

  /**
   * Write the block to the cloud when sync is called.
   *
   * @param b
   * @throws IOException
   */
  public void syncToCloud(ExtendedBlock b) throws IOException{
    if (!b.isProvidedBlock()) {
      super.syncToCloud(b); // does nothing
    } else {
      // upload to cloud
      syncToCloudInternal(b);
    }
  }

  private void syncToCloudInternal(ExtendedBlock b) throws IOException {

    ReplicaInfo replicaInfo = getReplicaInfo(b);

    File blockFile = replicaInfo.getBlockFile();
    File metaFile = replicaInfo.getMetaFile();

    String blockFileKey = CloudHelper.getBlockKey(prefixSize, b.getLocalBlock());
    String metaFileKey = CloudHelper.getMetaFileKey(prefixSize, b.getLocalBlock());

    if (replicaInfo instanceof ProvidedReplicaBeingWritten) {
      ((ProvidedReplicaBeingWritten) replicaInfo).setSynced(true);
      ((ProvidedReplicaBeingWritten) replicaInfo).setCancellMultipart(true);

      boolean isMultiPart = ((ProvidedReplicaBeingWritten) replicaInfo).isMultipart();
      if(isMultiPart){
        ((ProvidedReplicaBeingWritten) replicaInfo).setMultipart(false);
        //abort the multipart for this block
        cloud.abortMultipartUpload( b.getCloudBucket(), blockFileKey,
                ((ProvidedReplicaBeingWritten) replicaInfo).getUploadID());
      }
    }

    HashMap<String, String> metadata = getMetaMetadataRBW(replicaInfo, metaFile, blockFile);
    cloud.uploadObject(b.getCloudBucket(), metaFileKey, metaFile, metadata);

    cloud.uploadObject(b.getCloudBucket(), blockFileKey, blockFile,
            getBlockFileMetadata(b.getLocalBlock()));

    LOG.info("HopsFS-Cloud. Sync an open block to the cloud. Block: " + b.getLocalBlock() +
            " Block size: " + metadata.get(OBJECT_SIZE) +
            " Block file size " + metadata.get(META_FILE_SIZE) +
            " Meta file size " + metadata.get(BLOCK_FILE_SIZE));

    if(isVersioningSupported){
      if(cloud.objectExists(b.getCloudBucket(), blockFileKey)) {
        cloud.deleteOldVersions(b.getCloudBucket(), blockFileKey);
      }

      if (cloud.objectExists(b.getCloudBucket(), metaFileKey)) {
        cloud.deleteOldVersions(b.getCloudBucket(), metaFileKey);
      }
    }
  }

  @Override
  public void preFinalize(ExtendedBlock b) throws IOException {
    if (!b.isProvidedBlock()) {
      super.preFinalize(b);
    } else {
      // upload to cloud
      preFinalizeInternal(b);
    }
  }

  public void preFinalizeInternal(ExtendedBlock b) throws IOException {
    LOG.debug("HopsFS-Cloud. Prefinalize Stage. Uploading... Block: " + b.getLocalBlock());

    ReplicaInfo replicaInfo = getReplicaInfo(b);
    boolean isMultiPart = false;
    boolean isSynced = false;
    boolean isAppend = false;
    boolean isRecover = false;
    boolean expectedToExistInCloud = false;

    if (replicaInfo instanceof ProvidedReplicaBeingWritten) {
      isMultiPart = ((ProvidedReplicaBeingWritten) replicaInfo).isMultipart();
      isSynced = ((ProvidedReplicaBeingWritten) replicaInfo).isSynced();
      isAppend = ((ProvidedReplicaBeingWritten) replicaInfo).isAppend();
      isRecover = ((ProvidedReplicaBeingWritten) replicaInfo).isRecovered();
      expectedToExistInCloud = isSynced || isAppend || isRecover;
    }


    if (isMultiPart) {
      assert !expectedToExistInCloud;
    }

    File blockFile = replicaInfo.getBlockFile();
    File metaFile = replicaInfo.getMetaFile();
    String blockFileKey = CloudHelper.getBlockKey(prefixSize, b.getLocalBlock());
    String metaFileKey = CloudHelper.getMetaFileKey(prefixSize, b.getLocalBlock());

    if (expectedToExistInCloud || !cloud.objectExists(b.getCloudBucket(), metaFileKey)) {
      cloud.uploadObject(b.getCloudBucket(), metaFileKey, metaFile,
              getMetaMetadata(b.getLocalBlock(), metaFile, blockFile));
    } else {
      LOG.error("HopsFS-Cloud. Block: " + b + " meta file already exists.");
      throw new IOException("Block: " + b + " meta file already exists.");
    }

    if (isMultiPart) {
      while (!((ProvidedReplicaBeingWritten) (replicaInfo)).isMultipartComplete()) {
        try {
          Thread.sleep(30);
        } catch (InterruptedException e) {
        }
      }
    } else {
      if (expectedToExistInCloud || !cloud.objectExists(b.getCloudBucket(), blockFileKey )) {
        cloud.uploadObject(b.getCloudBucket(), blockFileKey, blockFile,
                getBlockFileMetadata(b.getLocalBlock()));
      } else {
        LOG.error("HopsFS-Cloud. Block: " + b + " already exists.");
        throw new IOException("Block: " + b + " already exists.");
      }
    }

    if (isAppend || isRecover) {
      // delete obsolete versions of the block
      for(long gs : ((ProvidedReplicaBeingWritten) replicaInfo).getOldGS()){
        Block oldBlk = new Block(b.getLocalBlock());
        oldBlk.setGenerationStampNoPersistance(gs);
        String oldBlockObjKey = CloudHelper.getBlockKey(prefixSize, oldBlk);
        String oldMetaObjKey = CloudHelper.getMetaFileKey(prefixSize, oldBlk);
        cloud.deleteObject(oldBlk.getCloudBucket(), oldBlockObjKey);
        cloud.deleteObject(oldBlk.getCloudBucket(), oldMetaObjKey);
      }

      if(isVersioningSupported) {
        cloud.deleteOldVersions(b.getCloudBucket(), blockFileKey);
        cloud.deleteOldVersions(b.getCloudBucket(), metaFileKey);
      }
    }
  }

  @Override
  public synchronized void finalizeBlock(ExtendedBlock b) throws IOException {
    if (!b.isProvidedBlock()) {
      super.finalizeBlock(b);
    } else {
      finalizeBlockInternal(b);
    }
  }

  private synchronized void finalizeBlockInternal(ExtendedBlock b) throws IOException {
    LOG.debug("HopsFS-Cloud. Finalizing bloclk. Block: " + b.getLocalBlock());
    if (Thread.interrupted()) {
      // Don't allow data modifications from interrupted threads
      throw new IOException("Cannot finalize block from Interrupted Thread");
    }

    ReplicaInfo replicaInfo = getReplicaInfo(b);
    File blockFile = replicaInfo.getBlockFile();
    File metaFile = replicaInfo.getMetaFile();
    long dfsBytes = blockFile.length() + metaFile.length();

    // release rbw space
    FsVolumeImpl v = (FsVolumeImpl) replicaInfo.getVolume();
    v.releaseReservedSpace(replicaInfo.getBytesReserved());
    v.decDfsUsed(b.getBlockPoolId(), dfsBytes);

    // remove from volumeMap, so we can get it from s3 instead
    volumeMap.remove(b.getBlockPoolId(), replicaInfo.getBlockId());

    if (bypassCache) {
      blockFile.delete();
      metaFile.delete();
    } else {
      //move the blocks to the cache
      FsVolumeImpl cloudVol = getCloudVolume();
      File cDir = cloudVol.getCacheDir(b.getBlockPoolId());

      File movedBlock = new File(cDir, CloudHelper.getBlockKey(prefixSize, b.getLocalBlock()));
      File movedMetaFile = new File(cDir, CloudHelper.getMetaFileKey(prefixSize,
              b.getLocalBlock()));

      moveToCache(blockFile, movedBlock, b.getBlockPoolId());
      moveToCache(metaFile, movedMetaFile, b.getBlockPoolId());
    }
  }

  @Override // FsDatasetSpi
  public InputStream getBlockInputStream(ExtendedBlock b, long seekOffset)
          throws IOException {

    if (!b.isProvidedBlock() ||
            (b.isProvidedBlock() && volumeMap.get(b.getBlockPoolId(), b.getBlockId()) != null)) {
      if(b.isProvidedBlock() && LOG.isDebugEnabled()){
        LOG.debug("HopsFS-Cloud. The block is being written. Get block inputstream " + b.getLocalBlock());
      }
      return super.getBlockInputStream(b, seekOffset);
    } else {
      if (LOG.isDebugEnabled()) {
        LOG.debug("HopsFS-Cloud. Get block inputstream " + b.getLocalBlock());
      }
      FsVolumeImpl cloudVolume = getCloudVolume();
      File localBlkCopy = new File(cloudVolume.getCacheDir(b.getBlockPoolId()),
              CloudHelper.getBlockKey(prefixSize, b.getLocalBlock()));
      String blockFileKey = CloudHelper.getBlockKey(prefixSize, b.getLocalBlock());

      return getInputStreamInternal(b.getCloudBucket(), blockFileKey,
              localBlkCopy, b.getBlockPoolId(), seekOffset);
    }
  }

  @Override // FsDatasetSpi
  public LengthInputStream getMetaDataInputStream(ExtendedBlock b)
          throws IOException {
    if (!b.isProvidedBlock() ||
            (b.isProvidedBlock() && volumeMap.get(b.getBlockPoolId(), b.getBlockId()) != null)) {
      if (b.isProvidedBlock() && LOG.isDebugEnabled()) {
        LOG.debug("HopsFS-Cloud. The block is being written. Get block's metadata inputstream " + b.getLocalBlock());
      }
      return super.getMetaDataInputStream(b);
    } else {
      if(LOG.isDebugEnabled()) {
        LOG.debug("HopsFS-Cloud. Get block's metadata inputstream " + b.getLocalBlock());
      }
      FsVolumeImpl cloudVolume = getCloudVolume();
      String metaFileKey = CloudHelper.getMetaFileKey(prefixSize, b.getLocalBlock());
      File localMetaFileCopy = new File(cloudVolume.getCacheDir(b.getBlockPoolId()),
              CloudHelper.getMetaFileKey(prefixSize, b.getLocalBlock()));

      InputStream is = getInputStreamInternal(b.getCloudBucket(), metaFileKey,
              localMetaFileCopy, b.getBlockPoolId(), 0);
      LengthInputStream lis = new LengthInputStream(is, localMetaFileCopy.length());

      return lis;
    }
  }

  private InputStream getInputStreamInternal(String cloudBucket, String objectKey,
                                             File localCopy, String bpid,
                                             long seekOffset) throws IOException {
    try {
      //check if object exists
      long startTime = System.currentTimeMillis();

      boolean download = bypassCache;
      if (!bypassCache) {
        // make sure that local copy in the cache has same size as in the cloud
        if (localCopy.exists()) {
          long cloudBlockLen = cloud.getObjectSize(cloudBucket, objectKey);
          boolean sizeMatches = cloudBlockLen  == localCopy.length();
          if(!sizeMatches){
            LOG.warn("HopsFS-Cloud. Ignoring cached block."+
                    " The size of the block in cache does not match with the block " +
                    "size in cloud. Block key: " + objectKey + "Block size in cache: "
                    + localCopy.length() + " Block size in cloud: " + cloudBlockLen);
            localCopy.delete();
            download = true;
          }
        } else {
          download =true;
        }
      }

      if (download) {
        cloud.downloadObject(cloudBucket, objectKey, localCopy);
      } else {
        if(LOG.isDebugEnabled()) {
          LOG.debug("HopsFS-Cloud. Reading provided block from cache. Block: " + objectKey);
        }
      }

      InputStream ioStream = new FileInputStream(localCopy);
      ioStream.skip(seekOffset);

      providedBlocksCacheUpdateTS(bpid, localCopy);  //after opening the file put it in the cache

      if(LOG.isDebugEnabled()) {
        LOG.debug("HopsFS-Cloud. " + objectKey + " GetInputStream Fn Time(ms) :" + (System.currentTimeMillis() - startTime));
      }
      return ioStream;
    } catch (IOException e) {
      LOG.warn("Could not read " + objectKey + ". ", e);
      throw e;
    }
  }

  @Deprecated
  @Override // FsDatasetSpi
  public ReplicaInfo getReplica(ExtendedBlock b) {
    if (!b.isProvidedBlock()) {
      return super.getReplica(b);
    } else if (b.isProvidedBlock() && volumeMap.get(b.getBlockPoolId(), b.getBlockId()) != null) {
      return super.getReplica(b);
    } else {
      return getReplicaInternal(b);
    }
  }

  public ReplicaInfo getReplicaInternal(ExtendedBlock b) {
    ReplicaInfo replicaInfo = super.getReplica(b);
    if (replicaInfo != null) {
      return replicaInfo;
    }

    try {
      String metaFileKey = CloudHelper.getMetaFileKey(prefixSize, b.getLocalBlock());
      Map<String, String> metadata = cloud.getUserMetaData(b.getCloudBucket(), metaFileKey);

      long genStamp = Long.parseLong(metadata.get(GEN_STAMP));
      long size = Long.parseLong(metadata.get(OBJECT_SIZE));

      FinalizedReplica info = new FinalizedReplica(b.getBlockId(), size, genStamp,
              b.getCloudBucket(),
              getCloudVolume(), getCloudVolume().getCacheDir(b.getBlockPoolId()));
      return info;
    } catch (IOException up) {
      LOG.info(up, up);
    }
    return null;
  }

  // Finalized provided blocks are removed from the replica map
  public boolean isProvideBlockFinalized(ExtendedBlock b) {
    assert b.isProvidedBlock();
    return super.getReplica(b) == null ? true : false;
  }


  private String getCloudProviderName() {
    return conf.get(DFSConfigKeys.DFS_CLOUD_PROVIDER,
            DFSConfigKeys.DFS_CLOUD_PROVIDER_DEFAULT);
  }

  @Override
  FsVolumeImpl getNewFsVolumeImpl(FsDatasetImpl dataset, String storageID,
                                  File currentDir, Configuration conf,
                                  StorageType storageType) throws IOException {
    if (storageType == StorageType.CLOUD) {
      if (getCloudProviderName().compareToIgnoreCase(CloudProvider.AWS.name()) == 0 ||
              getCloudProviderName().compareToIgnoreCase(CloudProvider.AZURE.name()) == 0 ||
              getCloudProviderName().compareToIgnoreCase(CloudProvider.GCS.name()) == 0) {
        return new CloudFsVolumeImpl(this, storageID, currentDir, conf, storageType);
      } else {
        throw new UnsupportedOperationException("Cloud provider '" +
                getCloudProviderName() + "' is not supported");
      }
    } else {
      return new FsVolumeImpl(this, storageID, currentDir, conf, storageType);
    }
  }

  /**
   * We're informed that a block is no longer valid.  We
   * could lazily garbage-collect the block, but why bother?
   * just get rid of it.
   *
   */
  @Override // FsDatasetSpi
  public void invalidate(String bpid, RemovedBlock invalidBlks[]) throws IOException {
    //NOTE: there is one non-critical issue with this function.
    //Assume there a block is rbw state and the client calls hsync on the block.
    //The block will be uploaded to the cloud.
    //Now assume that this datanode dies. The client will abandon the block
    //and all the data will be written to the new block.
    //Assume, that this datanode comes back online and receives the delete
    //request for the abandoned block. The replicaInfo will not contain information
    //such as the block is synced to the cloud. In that case the datanode
    //will delete the local copy of the block but the copy on the cloud will
    //remain until the next block report clears it.

    final List<String> errors = new ArrayList<String>();

    for (RemovedBlock b : invalidBlks) {

      if(!b.isProvidedBlock()){
        super.invalidateBlock(bpid, b, errors);
        LOG.debug("HopsFS-Cloud. Not a provided block. Calling super to delete the block. Block: "+b);
        continue;
      }

      ReplicaInfo replicaInfo = volumeMap.get(bpid, b);
      boolean rbw = false;
      boolean inCloud = false;
      if (b.isProvidedBlock() && replicaInfo != null ) { // the block is not yet closed
        rbw = true;
        if( replicaInfo instanceof ProvidedReplicaBeingWritten){
          inCloud =
          ((ProvidedReplicaBeingWritten) replicaInfo).isSynced() ||
          ((ProvidedReplicaBeingWritten) replicaInfo).isAppend() ||
          ((ProvidedReplicaBeingWritten) replicaInfo).isRecovered();
        }
      } else if ( b.isProvidedBlock() && replicaInfo == null){
        // the block is closed. It will be in the cloud
        inCloud = true;
      }

      if(rbw){
        LOG.info("HopsFS-Cloud. Scheduling deletion of RBW Block: "+b);
        super.invalidateBlock(bpid, b, errors);
      }

      if(inCloud){
        LOG.info("HopsFS-Cloud. Scheduling deletion of Cloud Block: "+b);
        invalidateProvidedBlock(bpid, b, errors);
      }
    }

    printInvalidationErrors(errors, invalidBlks.length);
  }

  private void invalidateProvidedBlock(String bpid, RemovedBlock invalidBlk, List<String> errors)
          throws IOException {
    final File f;
    final FsVolumeImpl v;
    ReplicaInfo info;
    // block is in the cloud.
    // Edge cases such as deletion of be blocks in flight
    // should be taekn care of by the block reporting system

    FsVolumeImpl cloudVolume = getCloudVolume();

    if (cloudVolume == null) {
      errors.add("HopsFS-Cloud. Failed to delete replica " + invalidBlk);
    }

    File localBlkCopy = new File(cloudVolume.getCacheDir(bpid),
            CloudHelper.getBlockKey(prefixSize, invalidBlk));
    File localMetaFileCopy = new File(cloudVolume.getCacheDir(bpid),
            CloudHelper.getMetaFileKey(prefixSize, invalidBlk));

    LOG.info("HopsFS-Cloud. Scheduling async deletion of block: " + invalidBlk);
    File volumeDir = cloudVolume.getCurrentDir();
    asyncDiskService.deleteAsyncProvidedBlock(new ExtendedBlock(bpid, invalidBlk),
            invalidBlk.isDeleteCloudCopy(), cloud, localBlkCopy, localMetaFileCopy, volumeDir);
  }

  @Override
  FinalizedReplica updateReplicaUnderRecovery(
          String bpid,
          ReplicaUnderRecovery rur,
          long recoveryId,
          long newBlockId,
          long newlength,
          String cloudBucket) throws IOException {
    LOG.info("HopsFS-Cloud. update replica under recovery rur: "+rur);

    if(!rur.isProvidedBlock()){
      return super.updateReplicaUnderRecovery(bpid, rur, recoveryId, newBlockId, newlength,
              cloudBucket);
    }

    boolean uploadedToTheCloud = true;
    ReplicaInfo ri = volumeMap.get(bpid, rur.getBlockId());
    if (ri != null) {   //the block is open
      try {
        checkReplicaFilesInternal(ri);
        uploadedToTheCloud = true;
      } catch (IOException e) {
        super.checkReplicaFiles(ri);
        uploadedToTheCloud = false;
      }
    }

    if (!uploadedToTheCloud) {
      if (ri instanceof ProvidedReplicaUnderRecovery && // upload is in progress using multipart api
              ((ProvidedReplicaUnderRecovery) ri).isPartiallyUploaded()) {
        String blockFileKey = CloudHelper.getBlockKey(prefixSize,
                ((ProvidedReplicaUnderRecovery) ri).getBlock() );
        cloud.abortMultipartUpload( ri.getCloudBucket(), blockFileKey,
                ((ProvidedReplicaUnderRecovery) ri).getUploadID());
      }

      FinalizedReplica fr = super.updateReplicaUnderRecovery(bpid, rur, recoveryId,
              newBlockId, newlength, cloudBucket);

      uploadFinalizedBlockToCloud(bpid, fr);
      return fr;
    } else {
      return updateReplicaUnderRecoveryInternal(bpid, rur, recoveryId,
              newBlockId, newlength, cloudBucket);
    }
  }

  private void uploadFinalizedBlockToCloud(String bpid, FinalizedReplica fr) throws IOException {
    ExtendedBlock eb = new ExtendedBlock(bpid, new Block(fr.getBlockId(), fr.getVisibleLength(),
            fr.getGenerationStamp(), fr.getCloudBucket()));
    preFinalizeInternal(eb);
    finalizeBlockInternal(eb);
  }

  FinalizedReplica updateReplicaUnderRecoveryInternal(
          String bpid,
          ReplicaUnderRecovery rur,
          long recoveryId,
          long newBlockId,
          long newlength,
          String cloudBlock) throws IOException {
    //check recovery id
    if (rur.getRecoveryID() != recoveryId) {
      throw new IOException("rur.getRecoveryID() != recoveryId = " +
              recoveryId + ", rur=" + rur);
    }

    boolean copyOnTruncate = newBlockId > 0L && rur.getBlockId() != newBlockId;
    if (copyOnTruncate == true) {
      throw new UnsupportedOperationException("Truncate using copy is not supported");
    }

    // Create new truncated block with truncated data and bump up GS
    //update length
    if (rur.getNumBytes() < newlength) {
      throw new IOException(
              "rur.getNumBytes() < newlength = " + newlength + ", rur=" + rur);
    }

    LOG.info("HopsFS-Cloud. update replica under recovery rur: "+rur+". Creating a new replica " +
            "in the cloud");

    if (rur.getNumBytes() >= newlength) { // Create a new block even if zero bytes are truncated,
      // because GS needs to be increased.
      truncateProvidedBlock(bpid, rur, rur.getNumBytes(), newlength, recoveryId);
      // update RUR with the new length
      rur.setNumBytesNoPersistance(newlength);
      rur.setGenerationStampNoPersistance(recoveryId);
    }

    return new FinalizedReplica(rur, null, null);
  }

  private void truncateProvidedBlock(String bpid, ReplicaInfo rur, long oldlen,
                                     long newlen, long newGS) throws IOException {
    LOG.info("HopsFS-Cloud. Truncating a block: " + rur.getBlockId() + "_" + rur.getGenerationStamp());

    Block bOld = new Block(rur.getBlockId(), rur.getNumBytes(), rur.getGenerationStamp(),
            rur.getCloudBucket());
    String oldBlkKey = CloudHelper.getBlockKey(prefixSize, bOld);
    String oldBlkMetaKey = CloudHelper.getMetaFileKey(prefixSize, bOld);

    if (newlen > oldlen) {
      throw new IOException("Cannot truncate block to from oldlen (=" + oldlen +
              ") to newlen (=" + newlen + ")");
    }

    //download the block
    FsVolumeImpl vol = getCloudVolume();
    File blockFile = new File(vol.getCacheDir(bpid), oldBlkKey);
    File metaFile = new File(vol.getCacheDir(bpid), oldBlkMetaKey);

    if (!(blockFile.exists() && blockFile.length() == bOld.getNumBytes())) {
      blockFile.delete(); //delete old files if any
      cloud.downloadObject(rur.getCloudBucket(), oldBlkKey, blockFile);
      providedBlocksCacheUpdateTS(bpid, blockFile);
    }

    if (!(metaFile.exists() && metaFile.length() > 0)) {
      metaFile.delete(); //delete old files if any
      cloud.downloadObject(rur.getCloudBucket(), oldBlkMetaKey, metaFile);
      providedBlocksCacheUpdateTS(bpid, metaFile);
    }

    //truncate the disk block and update the metafile
    DataChecksum dcs = BlockMetadataHeader.readHeader(metaFile).getChecksum();
    int checksumsize = dcs.getChecksumSize();
    int bpc = dcs.getBytesPerChecksum();
    long n = (newlen - 1) / bpc + 1;
    long newmetalen = BlockMetadataHeader.getHeaderSize() + n * checksumsize;
    long lastchunkoffset = (n - 1) * bpc;
    int lastchunksize = (int) (newlen - lastchunkoffset);
    byte[] b = new byte[Math.max(lastchunksize, checksumsize)];

    RandomAccessFile blockRAF = new RandomAccessFile(blockFile, "rw");
    try {
      //truncate blockFile
      blockRAF.setLength(newlen);

      //read last chunk
      blockRAF.seek(lastchunkoffset);
      blockRAF.readFully(b, 0, lastchunksize);
    } finally {
      blockRAF.close();
    }

    //compute checksum
    dcs.update(b, 0, lastchunksize);
    dcs.writeValue(b, 0, false);

    //update metaFile
    RandomAccessFile metaRAF = new RandomAccessFile(metaFile, "rw");
    try {
      metaRAF.setLength(newmetalen);
      metaRAF.seek(newmetalen - checksumsize);
      metaRAF.write(b, 0, checksumsize);
    } finally {
      metaRAF.close();
    }

    //update the blocks
    LOG.info("HopsFS-Cloud. Truncated on disk copy of the block: " + bOld);

    Block bNew = new Block(rur.getBlockId(), newlen, newGS, rur.getCloudBucket());
    String newBlkKey = CloudHelper.getBlockKey(prefixSize, bNew);
    String newBlkMetaKey = CloudHelper.getMetaFileKey(prefixSize, bNew);

    if (!cloud.objectExists(rur.getCloudBucket(), newBlkKey)
            && !cloud.objectExists(rur.getCloudBucket(), newBlkMetaKey)) {
      LOG.info("HopsFS-Cloud. Uploading Truncated Block: " + bNew);
      cloud.uploadObject(rur.getCloudBucket(), newBlkMetaKey, metaFile,
              getMetaMetadata(bNew, metaFile, blockFile));
      cloud.uploadObject(rur.getCloudBucket(), newBlkKey, blockFile,
              getBlockFileMetadata(bNew));
    } else {
      LOG.error("HopsFS-Cloud. Block: " + b + " alreay exists.");
      throw new IOException("Block: " + b + " alreay exists.");
    }

    LOG.info("HopsFS-Cloud. Deleting old block from cloud. Block: " + bOld);
    cloud.deleteObject(rur.getCloudBucket(), oldBlkKey);
    cloud.deleteObject(rur.getCloudBucket(), oldBlkMetaKey);

    LOG.info("HopsFS-Cloud. Deleting disk tmp copy: " + bOld);
    blockFile.delete();
    metaFile.delete();

    //remove the entry from replica map
    volumeMap.remove(bpid, bNew.getBlockId());
  }

  @Override
  public void checkReplicaFiles(final ReplicaInfo r) throws IOException {
    //the block files has to be somewhere, either in the cloud
    // or on disk and case of non finalized blocks

    try {
      checkReplicaFilesInternal(r);
    } catch (IOException e) {
      super.checkReplicaFiles(r);
    }
  }

  public void checkReplicaFilesInternal(final ReplicaInfo r) throws IOException {
    //check replica's file
    // make sure that the block and the meta objects exist in S3.
    Block b = new Block(r.getBlockId(), r.getNumBytes(),
            r.getGenerationStamp(), r.getCloudBucket());
    String blockKey = CloudHelper.getBlockKey(prefixSize, b);
    String metaKey = CloudHelper.getMetaFileKey(prefixSize, b);

    if (!cloud.objectExists(r.getCloudBucket(), blockKey)) {
      throw new IOException("Block: " + b + " not found in the cloud storage");
    }

    long blockSize = cloud.getObjectSize(r.getCloudBucket(), blockKey);
    if (blockSize != r.getNumBytes()) {
      throw new IOException(
              "File length mismatched. Expected: " + r.getNumBytes() + " Got: " + blockSize);
    }

    if (!cloud.objectExists(r.getCloudBucket(), metaKey)) {
      throw new IOException("Meta Object for Block: " + b + " not found in the cloud " +
              "storage");
    }

    long metaFileSize = cloud.getObjectSize(r.getCloudBucket(), metaKey);
    if (metaFileSize == 0) {
      throw new IOException("Metafile is empty. Block: " + b);
    }
  }

  @Override
  public synchronized FsVolumeImpl getVolume(final ExtendedBlock b) {

    if (!b.isProvidedBlock()) {
      return super.getVolume(b);
    } else {
      return getVolumeInternal(b);
    }
  }

  @Override // FsDatasetSpi
  public Map<DatanodeStorage, BlockReport> getBlockReports(String bpid) {
    return super.getBlockReports(bpid);
  }

  public synchronized FsVolumeImpl getVolumeInternal(final ExtendedBlock b) {
    if (!b.isProvidedBlock()) {
      return super.getVolume(b);

    } else {
      return getCloudVolume();
    }
  }

  @Override
  public void shutdown() {
    super.shutdown();
    cloud.shutdown();
  }

  @Override // FsDatasetSpi
  public synchronized ReplicaHandler createRbw(
          StorageType storageType, ExtendedBlock b)
          throws IOException {
    ReplicaHandler handler = super.createRbw(storageType,b);
    FsVolumeReference ref = handler.getVolumeReference();
    ProvidedReplicaBeingWritten providedReplicaBeingWritten = new
            ProvidedReplicaBeingWritten((ReplicaBeingWritten)handler.getReplica(),
            cloud.getPartSize());
    volumeMap.add(b.getBlockPoolId(), providedReplicaBeingWritten);
    return new ReplicaHandler(providedReplicaBeingWritten, ref);
  }

  /**
   * Move block files from one storage to another storage.
   * @return Returns the Old replicaInfo
   * @throws IOException
   */
  @Override
  public ReplicaInfo moveBlockAcrossStorage(ExtendedBlock block,
                                            StorageType targetStorageType) throws IOException {
    if (block.isProvidedBlock()) {
      throw new IOException("Moving files stored in the cloud is not supported");
    } else if (targetStorageType != StorageType.CLOUD) { //moving between non-cloud storages
      return super.moveBlockAcrossStorage(block, targetStorageType);
    } else {
      //moving data from disk to cloud
      LOG.info("HopsFS-Cloud. Moving block: " + block + " to " + targetStorageType);

      ReplicaInfo replicaInfo = getReplicaInfo(block);
      if (replicaInfo.getState() != HdfsServerConstants.ReplicaState.FINALIZED) {
        throw new ReplicaNotFoundException(
                ReplicaNotFoundException.UNFINALIZED_REPLICA + block);
      }

      if (replicaInfo.getNumBytes() != block.getNumBytes()) {
        throw new IOException("Corrupted replica " + replicaInfo
                + " with a length of " + replicaInfo.getNumBytes()
                + " expected length is " + block.getNumBytes());
      }

      String bucket = block.getCloudBucket();
      //Check if already uploaded to the cloud
      if (block.getCloudBucket().equals(CloudBucket.NON_EXISTENT_BUCKET_NAME)) {
        //FIXME get the bucket / container name from the namenode
        List<String> buckets = CloudHelper.getBucketsFromConf(conf);
        if(buckets.size()>0){
          bucket = buckets.get(0);
          block.setCloudBucket(bucket);
        } else{
          String error = "HopsFS-Cloud. Moving block: " + block + ". Bucket not set";
          LOG.error(error);
          throw new IOException(error);
        }
      }

      ReplicaInfo newReplicaInfo = new FinalizedReplica(replicaInfo, getCloudVolume(),
              getCloudVolume().getCacheDir(block.getBlockPoolId()));
      newReplicaInfo.setCloudBucketNoPersistance(bucket);
      ExtendedBlock extendedBlock = new ExtendedBlock(block.getBlockPoolId(), newReplicaInfo);

      String blockFileKey = CloudHelper.getBlockKey(prefixSize, block.getLocalBlock());
      String metaFileKey = CloudHelper.getMetaFileKey(prefixSize, block.getLocalBlock());
      if (cloud.objectExists(block.getCloudBucket(), blockFileKey) &&
              cloud.objectExists(block.getCloudBucket(), metaFileKey)) {
        LOG.info("HopsFS-Cloud. Block " + block + " has already been moved to the cloud");
        return null;
      } else {
        LOG.info("HopsFS-Cloud. Moving Block: " + block + " to the cloud.");
        FsVolumeImpl vol = getCloudVolume();
        File cachedBlockFile = new File(vol.getCacheDir(block.getBlockPoolId()), blockFileKey);
        File cachedMetaFile = new File(vol.getCacheDir(block.getBlockPoolId()), metaFileKey);

        //copy to cloud volume
        cloud.uploadObject(block.getCloudBucket(), metaFileKey, replicaInfo.getMetaFile(),
                getMetaMetadata(block.getLocalBlock(),
                        replicaInfo.getMetaFile(),
                        replicaInfo.getBlockFile()));
        cloud.uploadObject(block.getCloudBucket(), blockFileKey, replicaInfo.getBlockFile(),
                getBlockFileMetadata(block.getLocalBlock()));

        datanode.getShortCircuitRegistry().processBlockInvalidation(
                ExtendedBlockId.fromExtendedBlock(extendedBlock));
        datanode.notifyNamenodeBlockMovedToCloud(
                extendedBlock, replicaInfo.getStorageUuid(), newReplicaInfo.getStorageUuid());

        moveToCache(replicaInfo.getBlockFile(), cachedBlockFile, block.getBlockPoolId());
        moveToCache(replicaInfo.getMetaFile(), cachedMetaFile, block.getBlockPoolId());

        // remove from volumeMap, so we can get it from s3 instead
        volumeMap.remove(block.getBlockPoolId(), block.getBlockId());

        return replicaInfo;
      }
    }
  }

  @Override // FsDatasetSpi
  public synchronized long getReplicaVisibleLength(final ExtendedBlock b)
          throws IOException {
    if (!b.isProvidedBlock() ||
            (b.isProvidedBlock() && volumeMap.get(b.getBlockPoolId(), b.getBlockId()) != null)) {
      return super.getReplicaVisibleLength(b);
    }

    try {
      // try to get the length of the block from the cloud
      String metaFileKey = CloudHelper.getMetaFileKey(prefixSize, b.getLocalBlock());
      Map<String, String> metadata = cloud.getUserMetaData(b.getCloudBucket(), metaFileKey);
      long size = Long.parseLong(metadata.get(OBJECT_SIZE));
      long genStamp = Long.parseLong(metadata.get(GEN_STAMP));

      if (genStamp < b.getGenerationStamp()) {
        throw new IOException(
                "cloud.getGenerationStamp() < block.getGenerationStamp(), block=" +
                        b + ", cloud GS =" + genStamp);
      }
      return size;
    } catch (IOException e) {
      LOG.info("HopsFS-Cloud. Unable to get the length of the replica from the cloud. "+e);
      throw new ReplicaNotFoundException(
              ReplicaNotFoundException.NON_EXISTENT_REPLICA + b.getBlockPoolId()
                      + ":" + b.getBlockId());
    }
  }

  @Override
  public synchronized ReplicaHandler append(ExtendedBlock b,
                                            long newGS, long expectedBlockLen) throws IOException {

    if (!b.isProvidedBlock()) {
      return super.append(b, newGS, expectedBlockLen);
    }

    checkRBWForAppend(b, newGS, expectedBlockLen);

    // check the validity of the parameter
    if (newGS < b.getGenerationStamp()) {
      throw new IOException("The new generation stamp " + newGS +
        " should be greater than the replica " + b + "'s generation stamp");
    }

    ReplicaInfo replicaInfo = getReplica(b);  // get the replica information from the cloud.
    LOG.info("HopsFS-Cloud. Appending to " + replicaInfo);

    if (replicaInfo.getNumBytes() != expectedBlockLen) {
      throw new IOException("Corrupted replica " + replicaInfo +
              " with a length of " + replicaInfo.getNumBytes() +
              " expected length is " + expectedBlockLen);
    }

    FsVolumeReference ref = replicaInfo.getVolume().obtainReference();
    ReplicaBeingWritten replica = null;
    try {
      replica = appendInternal(b.getBlockPoolId(), b, (FinalizedReplica)replicaInfo, newGS,
              expectedBlockLen);
    } catch (IOException e) {
      throw e;
    }
    return new ReplicaHandler(replica, ref);
  }

  /*
   * Failed write operation will leave behind RBW files
   * When we receive an append operation for such an operation
   * we delete the RBW if the block is in the cloud
   */
  private void checkRBWForAppend(ExtendedBlock b,
                                 long newGS, long expectedBlockLen) throws IOException {
    ReplicaInfo replicaInfo = volumeMap.get(b.getBlockPoolId(), b.getBlockId());
    if (replicaInfo != null) {

      //if the block is in the cloud then it is safe to delete it
      String metaFileKey = CloudHelper.getMetaFileKey(prefixSize, b.getLocalBlock());
      String blockFileKey = CloudHelper.getBlockKey(prefixSize, b.getLocalBlock());

      if (cloud.objectExists(b.getCloudBucket(), blockFileKey) &&
        cloud.objectExists(b.getCloudBucket(), metaFileKey)) {

        Map<String, String> metadata = cloud.getUserMetaData(b.getCloudBucket(), metaFileKey);

        long genStamp = Long.parseLong(metadata.get(GEN_STAMP));
        long size = Long.parseLong(metadata.get(OBJECT_SIZE));

        if (genStamp == b.getGenerationStamp() && size == expectedBlockLen) {
          // safe to delete it
          deleteRBWImmediately(b, replicaInfo);
          return ;
        }
      }

      if (replicaInfo.getState() != HdfsServerConstants.ReplicaState.FINALIZED) {
        throw new ReplicaNotFoundException(ReplicaNotFoundException.UNFINALIZED_REPLICA + b);
      }
    }
  }

  private void deleteRBWImmediately(ExtendedBlock b, ReplicaInfo rinfo) throws IOException {
    RemovedBlock rbs[] = new RemovedBlock[1];
    rbs[0] = new RemovedBlock(b.getBlockId(),
      Long.MAX_VALUE, // no need to send back ack to NN
      rinfo.getGenerationStamp(), // using the GS of the replica on disk
      b.getCloudBucket(), false);
    this.invalidate(b.getBlockPoolId(), rbs); // this is async
    LOG.info("HopsFS-Cloud: Deleting existing RBW for append operation. blk: " + b);

    while (true) {
      if (volumeMap.get(b.getBlockPoolId(), b.getBlockId()) != null) {
        try {
          Thread.sleep(100);
          LOG.info("HopsFS-Cloud: waiting for the RBW block to be removed. blk: " + b);
        } catch (InterruptedException e) {
          LOG.info(e, e);
        }
      } else {
        break;
      }
    }
  }


  /**
   * Append to a finalized replica
   * Change a finalized replica to be a RBW replica and
   * bump its generation stamp to be the newGS
   *
   * @param bpid
   *     block pool ID
   * @param replicaInfo
   *     a finalized replica
   * @param newGS
   *     new generation stamp
   * @param expectedBlkLen
   *     expected block length
   * @return a RBW replica
   * @throws IOException
   *     if moving the replica from finalized directory
   *     to rbw directory fails
   */
  private synchronized ReplicaBeingWritten appendInternal(String bpid,
                                                          ExtendedBlock block,
                                                          FinalizedReplica replicaInfo,
                                                          long newGS, long expectedBlkLen)
          throws IOException {

    //download the block
    String oldBlkKey = CloudHelper.getBlockKey(prefixSize, block.getLocalBlock());
    String oldBlkMetaKey = CloudHelper.getMetaFileKey(prefixSize, block.getLocalBlock());

    FsVolumeImpl vol = getCloudVolume();
    File oldBlockFile = new File(vol.getCacheDir(bpid), oldBlkKey);
    File oldMetaFile = new File(vol.getCacheDir(bpid), oldBlkMetaKey);

    if (!(oldBlockFile.exists() && oldBlockFile.length() == expectedBlkLen)) {
      LOG.info("HopsFS-Cloud. Downloading the block again from cloud for append op. Block: "+block+
        " Disk size: "+oldBlockFile.length()+ " Expected bytes: "+expectedBlkLen);
      oldBlockFile.delete(); //delete old blk file
      cloud.downloadObject(block.getCloudBucket(), oldBlkKey, oldBlockFile);
      providedBlocksCacheUpdateTS(bpid, oldBlockFile);

      oldMetaFile.delete(); //delete old meta file
      cloud.downloadObject(block.getCloudBucket(), oldBlkMetaKey, oldMetaFile);
      providedBlocksCacheUpdateTS(bpid, oldMetaFile);
    }

    // construct a RBW replica with the new GS
    FsVolumeImpl v = (FsVolumeImpl) replicaInfo.getVolume();
    if (v.getAvailable() < expectedBlkLen - replicaInfo.getNumBytes()) {
      throw new DiskChecker.DiskOutOfSpaceException(
              "Insufficient space for appending to " + replicaInfo);
    }

    File newBlkFile = new File(v.getRbwDir(bpid), replicaInfo.getBlockName());
    ReplicaBeingWritten replicaBeingWritten = new ReplicaBeingWritten(
            replicaInfo.getBlockId(), replicaInfo.getNumBytes(), newGS,
            replicaInfo.getCloudBucket(), v, newBlkFile.getParentFile(),
            Thread.currentThread(), expectedBlkLen);
    ProvidedReplicaBeingWritten newReplicaInfo =
            new ProvidedReplicaBeingWritten(replicaBeingWritten, cloud.getPartSize());
    File newmeta = newReplicaInfo.getMetaFile();

    // rename meta file to rbw directory
    if (LOG.isDebugEnabled()) {
      LOG.debug("HopsFS-Cloud. Renaming " + oldMetaFile + " to " + newmeta);
    }

    Block newBlock = new Block(block.getBlockId(), oldBlockFile.length(), newGS, block.getCloudBucket());
    String newBlkMetaKey = CloudHelper.getMetaFileKey(prefixSize, newBlock);
    Map<String, String> newBlkMetaFileMetaData = getMetaMetadata(newBlock, oldMetaFile, oldBlockFile);
    try {
      // upload the meta file with new GS to the cloud
      copyCloudObject(block.getCloudBucket(), oldBlkMetaKey, newBlkMetaKey, oldMetaFile, newBlkMetaFileMetaData);
      NativeIO.renameTo(oldMetaFile, newmeta);
    } catch (IOException e) {
      throw new IOException("Block " + replicaInfo + " reopen failed. " +
              " Unable to move meta file  " + oldMetaFile +
              " to rbw dir " + newmeta, e);
    }

    // rename block file to rbw directory
    if (LOG.isDebugEnabled()) {
      LOG.debug("HopsFS-Cloud. Renaming " + oldBlockFile + " to " +
              newBlkFile + ", file length=" + oldBlockFile.length());
    }
    try {
      //upload the block with new GS to the cloud
      String newBlkKey = CloudHelper.getBlockKey(prefixSize, newBlock);
      Map<String, String> newBlkMetaData = getBlockFileMetadata(newBlock);
      copyCloudObject(block.getCloudBucket(), oldBlkKey, newBlkKey, oldBlockFile, newBlkMetaData);

      NativeIO.renameTo(oldBlockFile, newBlkFile);
    } catch (IOException e) {
      try {
        NativeIO.renameTo(newmeta, oldMetaFile);
        cloud.deleteObject(block.getCloudBucket(), newBlkMetaKey);
      } catch (IOException ex) {
        LOG.warn("Cannot move meta file " + newmeta +
                "back to the finalized directory " + oldMetaFile, ex);
      }
      throw new IOException("Block " + replicaInfo + " reopen failed. " +
              " Unable to move block file " + oldBlockFile +
              " to rbw dir " + newBlkFile, e);
    }

    // Replace finalized replica by a RBW replica in replicas map
    newReplicaInfo.setAppend(true);
    newReplicaInfo.addOldGS(block.getGenerationStamp());
    volumeMap.add(bpid, newReplicaInfo);
    v.reserveSpaceForRbw(expectedBlkLen - replicaInfo.getNumBytes());
    return newReplicaInfo;
  }

  @Override  // FsDatasetSpi
  public synchronized ReplicaHandler recoverAppend(
          ExtendedBlock b, long newGS, long expectedBlockLen) throws IOException {
    if (!b.isProvidedBlock()) {
      return super.recoverAppend(b, newGS, expectedBlockLen);
    }

    LOG.info("HopsFS-Cloud. Recover failed append operation. block " + b);
    ReplicaInfo replicaInfo = getReplica(b);  // get the replica information from the cloud.
    recoverCheck(replicaInfo, b, newGS, expectedBlockLen);

    FsVolumeReference ref = replicaInfo.getVolume().obtainReference();
    ReplicaBeingWritten replica;
    try {
      // change the replica's state/gs etc.
      if (replicaInfo.getState() == HdfsServerConstants.ReplicaState.FINALIZED) {
        replica = appendInternal(b.getBlockPoolId(), b, (FinalizedReplica) replicaInfo,
                newGS, expectedBlockLen);
      } else { //RBW
        //In HDFS we bump the GS and reopen the block for writing (See recoverAppend in the
        // parent class). But in HopsFS this is not yet supported.
        throw new UnsupportedOperationException("Appending to RBW replica is not supported for " +
                "cloud");
      }
    } catch (IOException e) {
      IOUtils.cleanup(null, ref);
      throw e;
    }
    return new ReplicaHandler(replica, ref);
  }

  @Override
  public synchronized String recoverClose(ExtendedBlock b, long newGS,
                                          long expectedBlockLen) throws IOException {
    ReplicaInfo replicaInfo = null;

    if(!b.isProvidedBlock()) {
      return super.recoverClose(b, newGS, expectedBlockLen);
    } else {
      LOG.info("HopsFS-Cloud. Recover Close RBW replica " + b);
      try {
        replicaInfo = getReplicaInfo(b.getBlockPoolId(), b.getBlockId());
      } catch (ReplicaNotFoundException e){
        replicaInfo = createRbw(b);
      }
    }

    // check replica's state
    replicaInfo = recoverCheck(replicaInfo, b, newGS, expectedBlockLen);
    // bump the replica's GS
    bumpReplicaGS(replicaInfo, newGS);

    // finalize the replica if RBW.
    // Upload the block to the cloud and finalize this block
    Block bockNewGS = new Block(b.getBlockId(), expectedBlockLen, newGS, b.getCloudBucket());
    ExtendedBlock newEB = new ExtendedBlock(b.getBlockPoolId(), bockNewGS);
    finalizeBlock(newEB);

    return replicaInfo.getStorageUuid();
  }

  @Override // FsDatasetSpi
  public synchronized ReplicaHandler recoverRbw(
          ExtendedBlock b, long newGS, long minBytesRcvd, long maxBytesRcvd)
          throws IOException {
    ReplicaInfo replicaInfo = null;
    long oldGS = b.getGenerationStamp();
    if(!b.isProvidedBlock()) {
      return super.recoverRbw(b, newGS,minBytesRcvd, maxBytesRcvd);
    } else {
      LOG.info("HopsFS-Cloud. Recover RBW replica " + b);
      try {
        replicaInfo = getReplicaInfo(b.getBlockPoolId(), b.getBlockId());
      } catch (ReplicaNotFoundException e){
        replicaInfo = createRbw(b);
      }
    }

    // check the replica's state
    if (replicaInfo.getState() != HdfsServerConstants.ReplicaState.RBW) {
      throw new ReplicaNotFoundException(
              ReplicaNotFoundException.NON_RBW_REPLICA + replicaInfo);
    }
    ReplicaBeingWritten rbw = (ReplicaBeingWritten)replicaInfo;

    LOG.info("HopsFS-Cloud. Recovering " + rbw);

    // Stop the previous writer
    rbw.stopWriter(datanode.getDnConf().getXceiverStopTimeout());
    rbw.setWriter(Thread.currentThread());

    // check generation stamp
    long replicaGenerationStamp = rbw.getGenerationStamp();
    if (replicaGenerationStamp < b.getGenerationStamp() ||
            replicaGenerationStamp > newGS) {
      throw new ReplicaNotFoundException(
              ReplicaNotFoundException.UNEXPECTED_GS_REPLICA + b +
                      ". Expected GS range is [" + b.getGenerationStamp() + ", " +
                      newGS + "].");
    }

    // check replica length
    long bytesAcked = rbw.getBytesAcked();
    long numBytes = rbw.getNumBytes();
    if (bytesAcked < minBytesRcvd || numBytes > maxBytesRcvd){
      throw new ReplicaNotFoundException("Unmatched length replica " +
              replicaInfo + ": BytesAcked = " + bytesAcked +
              " BytesRcvd = " + numBytes + " are not in the range of [" +
              minBytesRcvd + ", " + maxBytesRcvd + "].");
    }

    FsVolumeReference ref = rbw.getVolume().obtainReference();
    try {
      // Truncate the potentially corrupt portion.
      // If the source was client and the last node in the pipeline was lost,
      // any corrupt data written after the acked length can go unnoticed.
      if (numBytes > bytesAcked) {
        final File replicafile = rbw.getBlockFile();
        truncateBlock(replicafile, rbw.getMetaFile(), numBytes, bytesAcked);
        rbw.setNumBytesNoPersistance(bytesAcked);
        rbw.setLastChecksumAndDataLen(bytesAcked, null);
      }

      // bump the replica's generation stamp to newGS
      bumpReplicaGS(rbw, newGS);

      //we have to upload the block to the cloud.
      //after completion of this call the client will
      //call updatePipeline and bump-up the GS
      //of the block. if we skip uploading the block
      //to the cloud and then this datanode fails then
      //there is a risk of data loss as there will be no
      //block in the cloud with the updated GS
      try {
        Block newBlock = new Block(b.getBlockId(), rbw.getNumBytes(), newGS, b.getCloudBucket());

        // upload the meta file with new GS to the cloud
        String newBlkMetaKey = CloudHelper.getMetaFileKey(prefixSize, newBlock);
        Map<String, String> newBlkMetaFileMetaData = getMetaMetadata(newBlock,
                rbw.getMetaFile(), rbw.getBlockFile());
        cloud.uploadObject(b.getCloudBucket(),newBlkMetaKey, rbw.getMetaFile(),
        newBlkMetaFileMetaData);

        // upload the block file with new GS to the cloud
        String newBlkKey = CloudHelper.getBlockKey(prefixSize, newBlock);
        Map<String, String> newBlkMetaData = getBlockFileMetadata(newBlock);
        cloud.uploadObject(b.getCloudBucket(),newBlkKey, rbw.getBlockFile(),
          newBlkMetaData);

        assert rbw instanceof  ProvidedReplicaBeingWritten;
        ((ProvidedReplicaBeingWritten)rbw).setRecovered(true);
        ((ProvidedReplicaBeingWritten)rbw).addOldGS(oldGS);
      } catch (IOException e) {
        throw new IOException("Failed to upload  to cloud. block "+b+" newGS: "+newGS, e);
      }
    } catch (IOException e) {
      IOUtils.cleanup(null, ref);
      throw e;
    }
    return new ReplicaHandler(rbw, ref);
  }

  private ReplicaInfo createRbw(ExtendedBlock b) throws IOException {
    // RBW is not on disk. Download from cloud
    ReplicaInfo replicaInfoCloud = getReplica(b);

    String blkKey = CloudHelper.getBlockKey(prefixSize, b.getLocalBlock());
    String blkMetaKey = CloudHelper.getMetaFileKey(prefixSize, b.getLocalBlock());

    FsVolumeImpl vol = getCloudVolume();
    File blockFile = new File(vol.getCacheDir(b.getBlockPoolId()), blkKey);
    File metaFile = new File(vol.getCacheDir(b.getBlockPoolId()), blkMetaKey);

    if (!(blockFile.exists() && blockFile.length() == b.getNumBytes())) {
      blockFile.delete(); //delete old files if any
      cloud.downloadObject(b.getCloudBucket(), blkKey, blockFile);
      providedBlocksCacheUpdateTS(b.getBlockPoolId(), blockFile);
    }

    if (!(metaFile.exists() && metaFile.length() > 0)) {
      metaFile.delete(); //delete old files if any
      cloud.downloadObject(b.getCloudBucket(), blkMetaKey, metaFile);
      providedBlocksCacheUpdateTS(b.getBlockPoolId(), metaFile);
    }

    // construct a RBW replica
    FsVolumeImpl v = (FsVolumeImpl) replicaInfoCloud.getVolume();
    File newBlkFile = new File(v.getRbwDir(b.getBlockPoolId()),
            replicaInfoCloud.getBlockName());
    ReplicaBeingWritten replicaBeingWritten = new ReplicaBeingWritten(
            replicaInfoCloud.getBlockId(), replicaInfoCloud.getNumBytes(),
            replicaInfoCloud.getGenerationStamp(),
            replicaInfoCloud.getCloudBucket(), v, newBlkFile.getParentFile(),
            Thread.currentThread(), b.getNumBytes() );
    ReplicaInfo replicaInfo = new ProvidedReplicaBeingWritten(replicaBeingWritten,
            cloud.getPartSize());
    File newmeta = replicaInfo.getMetaFile();
    NativeIO.renameTo(metaFile, newmeta);
    NativeIO.renameTo(blockFile, newBlkFile);
    volumeMap.add(b.getBlockPoolId(), replicaInfo);
    return replicaInfo;
  }


  private void copyCloudObject(String bucket, String srcKey, String dstKey, File srcFile,
                               Map<String, String> newObjMetadata) throws IOException {
    if (getCloudProviderName().compareToIgnoreCase(CloudProvider.AWS.name()) == 0) {
      cloud.copyObject(bucket, bucket, srcKey, dstKey, newObjMetadata);
    } else if (getCloudProviderName().compareToIgnoreCase(CloudProvider.AZURE.name()) == 0) {
      // azure does not support direct copy so upload a new block
      cloud.uploadObject(bucket, dstKey, srcFile, newObjMetadata );
    } else if (getCloudProviderName().compareToIgnoreCase(CloudProvider.GCS.name()) == 0) {
      cloud.copyObject(bucket, bucket, srcKey, dstKey, newObjMetadata);
    } else {
      throw new UnsupportedOperationException("Cloud provider '" +
              getCloudProviderName() + "' is not supported");
    }
  }

  private boolean moveToCache(File from, File to, String bpid) throws IOException {
    if(bypassCache){
      from.delete();
      return false;
    } else {
      File toBlockParent = new File(to.getParent());
      if (!toBlockParent.exists()) {
        toBlockParent.mkdir();
      }

      if (from.renameTo(to)) {
        LOG.debug("HopsFS-Cloud. Block file " + from + " moved to cloud cache location "+to);
        providedBlocksCacheUpdateTS(bpid, to);
        return true;
      } else {
        String error = "HopsFS-Cloud. Moving file: " + from + " to "+to+" failed";
        LOG.error(error);
        return false;
      }
    }
  }

  public boolean existsInCloud(ExtendedBlock b) throws IOException {
    String blockKey = CloudHelper.getBlockKey(prefixSize, b.getLocalBlock());
    return cloud.objectExists(b.getCloudBucket(), blockKey);
  }

  public boolean skipMultipartUpload(ExtendedBlock b) throws IOException {
    ReplicaInfo replicaInfo = getReplicaInfo(b);
    if (replicaInfo instanceof ProvidedReplicaBeingWritten) {
      return ((ProvidedReplicaBeingWritten) replicaInfo).isSynced() ||
        ((ProvidedReplicaBeingWritten) replicaInfo).isAppend() ||
        ((ProvidedReplicaBeingWritten) replicaInfo).isRecovered()
        ;
    }
    return false;
  }

  public void uploadPart(ExtendedBlock b) throws IOException {
    ProvidedReplicaBeingWritten pReplicaInfo = (ProvidedReplicaBeingWritten) getReplicaInfo(b);
    if(!pReplicaInfo.isPartAvailable()){
      throw new IOException("Not enough data available for multipart upload");
    }

    String blockKey = CloudHelper.getBlockKey(prefixSize, b.getLocalBlock());
    int partId = pReplicaInfo.incrementAndGetNextPart();
    if(partId == 1){
      if (!cloud.objectExists(b.getCloudBucket(), blockKey)){
        UploadID uploadID = cloud.startMultipartUpload(b.getCloudBucket(), blockKey,
                getBlockFileMetadata(b.getLocalBlock()));
        pReplicaInfo.setUploadID(uploadID);
        pReplicaInfo.setMultipart(true);
      } else {
        LOG.error("HopsFS-Cloud. Block: " + b + " alreay exists.");
        throw new IOException("Block: " + b + " alreay exists.");
      }
    }

    File blockFile = pReplicaInfo.getBlockFile();
    long start  = (partId - 1) * pReplicaInfo.getPartSize();
    long end  = (partId) * pReplicaInfo.getPartSize();
    PartUploadWorker worker = new PartUploadWorker(pReplicaInfo, b.getCloudBucket(), blockKey,
            pReplicaInfo.getUploadID(),
            partId, blockFile, start, end);
    pReplicaInfo.addUploadTask(threadPoolExecutor.submit(worker));
  }

  public void finalizeMultipartUpload(ExtendedBlock b) throws IOException {
    ProvidedReplicaBeingWritten pReplicaInfo = (ProvidedReplicaBeingWritten) getReplicaInfo(b);

    if(pReplicaInfo.isCancellMultipart()){
      return;
    }

    assert pReplicaInfo.isMultipart();

    //upload remaining data as single part
    long currentPart = pReplicaInfo.getCurrentPart();
    String blockKey = CloudHelper.getBlockKey(prefixSize, b.getLocalBlock());
    if( pReplicaInfo.getBytesOnDisk() > (currentPart * pReplicaInfo.getPartSize())){
      File blockFile = pReplicaInfo.getBlockFile();
      int newPartID = pReplicaInfo.incrementAndGetNextPart();
      long start  = (currentPart) * pReplicaInfo.getPartSize();
      long end  = pReplicaInfo.getBytesOnDisk();

      PartUploadWorker worker = new PartUploadWorker(pReplicaInfo, b.getCloudBucket(), blockKey,
              pReplicaInfo.getUploadID(),
              newPartID, blockFile, start, end);
      pReplicaInfo.addUploadTask(threadPoolExecutor.submit(worker));
    }

    waitForPartsUpload(pReplicaInfo);

    cloud.finalizeMultipartUpload(b.getCloudBucket(), blockKey, pReplicaInfo.getUploadID(),
            pReplicaInfo.getPartETags());
    pReplicaInfo.setMultipartComplete(true);

    LOG.info("HopsFS-Cloud. Finalized the multipart upload ");
  }

  private void waitForPartsUpload(ProvidedReplicaBeingWritten prbw) throws IOException {
    if(prbw.isCancellMultipart()){
      return;
    }

    for(Future future : prbw.getAllUploadTasks()){
      try{
        PartRef tag = (PartRef) future.get();
        prbw.addEtag(tag);
      } catch (ExecutionException e) {
        LOG.error("Exception was thrown during uploading a block to cloud", e);
        Throwable throwable = e.getCause();
        if (throwable instanceof IOException) {
          throw (IOException) throwable;
        } else {
          throw new IOException(e);
        }
      } catch (InterruptedException e) {
        e.printStackTrace();
      }
    }

  }

  class PartUploadWorker implements Callable{
    private final String bucket;
    private final String key;
    private final UploadID uploadID;
    private final int partID;
    private final File file;
    private final long startPos;
    private final long endPos;
    private final ProvidedReplicaBeingWritten replicaInfo;

    PartUploadWorker(ProvidedReplicaBeingWritten replicaInfo, String bucket, String key, UploadID uploadID,
                     int partID, File file,
                     long startPos, long endPos){
      this.replicaInfo = replicaInfo;
      this.bucket = bucket;
      this.key = key;
      this.uploadID = uploadID;
      this.partID = partID;
      this.file = file;
      this.startPos = startPos;
      this.endPos = endPos;
    }

    @Override
    public Object call() throws Exception {
      if(replicaInfo.isCancellMultipart()) {  // can not use multipart upload as the client has call
        // hsync/hflush
        return null;
      }

      PartRef etag = cloud.uploadPart(bucket, key, uploadID,
              partID, file, startPos, endPos);
      LOG.info("HopsFS-Cloud. Part id to upload "+partID+
              " start " + startPos + " end " + endPos + " payload size " + (endPos - startPos) +
              " Src File "+file.getName());
      if (replicaInfo.isCancellMultipart()) { // client aborted the multipart while this
        // was being uploaded.
        cloud.abortMultipartUpload(bucket, key, uploadID);
        return null;
      } else {
        return etag;
      }
    }
  }

  @VisibleForTesting
  public FsVolumeImpl getCloudVolume() {
    for (FsVolumeImpl vol : getVolumes()) {
      if (vol.getStorageType() == StorageType.CLOUD) {
        return vol;
      }
    }
    return null;
  }

  @VisibleForTesting
  public HashMap<String, String> getBlockFileMetadata(Block b) {
    HashMap<String, String> metadata = new HashMap<>();
    return metadata;
  }

  @VisibleForTesting
  public HashMap<String, String> getMetaMetadata(Block b, File metaFile, File blockFile) {
    HashMap<String, String> metadata = new HashMap<>();
    metadata.put(GEN_STAMP, Long.toString(b.getGenerationStamp()));
    metadata.put(OBJECT_SIZE, Long.toString(b.getNumBytes()));
    //added for debugging. not used
    metadata.put(META_FILE_SIZE, Long.toString(metaFile.length()));
    metadata.put(BLOCK_FILE_SIZE, Long.toString(blockFile.length()));
    return metadata;
  }

  @VisibleForTesting
  public HashMap<String, String> getMetaMetadataRBW(ReplicaInfo b, File metaFile, File blockFile) {
    HashMap<String, String> metadata = new HashMap<>();
    metadata.put(GEN_STAMP, Long.toString(b.getGenerationStamp()));
    metadata.put(OBJECT_SIZE, Long.toString(b.getBlockFile().length()));
    //added for debugging. not used
    metadata.put(META_FILE_SIZE, Long.toString(metaFile.length()));
    metadata.put(BLOCK_FILE_SIZE, Long.toString(blockFile.length()));
    return metadata;
  }

  public void providedBlocksCacheUpdateTS(String bpid, File f) throws IOException {
    FsVolumeImpl cloudVolume = getCloudVolume();
    cloudVolume.getBlockPoolSlice(bpid).fileAccessed(f);
  }

  public void providedBlocksCacheDelete(String bpid, File f) throws IOException {
    FsVolumeImpl cloudVolume = getCloudVolume();
    cloudVolume.getBlockPoolSlice(bpid).fileDeleted(f);
  }

  @VisibleForTesting
  public CloudPersistenceProvider getCloudConnector() {
    return cloud;
  }

  @VisibleForTesting
  public void installMockCloudConnector(CloudPersistenceProvider mock) {
    cloud = mock;
  }

  @VisibleForTesting
  public boolean replicaExistsInVolumeMap(String bpid, long blockID){
    ReplicaInfo replicaInfo = volumeMap.get(bpid, blockID);
    if (replicaInfo != null) {
      return true;
    } else {
      return false;
    }
  }

  @VisibleForTesting
  public int getOpenReplicasCount(String bpid) {
    return volumeMap.size(bpid);
  }
}


