# Why?

1. You want to run R code on the cloud.
2. For whatever reason, you don’t want to use google nor azure.

# Credit

I took most of the code from this gist

# The code

This function takes a list with your instances, the path to your private key, and returns a cluster object that can be used with the future package. I was told that this function will be part of a new package soon.

aws_cluster <- function(instances, key){
# Ensure we are running and initialized
is_running <- vector("logical", length(instances))
is_initialized <- vector("logical", length(instances))

while(!all(is_initialized)) {

for(ii in seq_along(instances)) {

# Current instance
i_ii <- instance_status(instances[[ii]])

# Initially, we don't get any information
if(length(i_ii) == 0) {
next()
}

# First check if we are at least running
if(!is_running[ii]) {
if(unlist(i_ii$item$instanceState$name) == "running") { is_running[ii] <- TRUE message("Instance ", ii, " is running. Now initialzing.") } } # Then check if we are initialized if(!is_initialized[ii]) { if(unlist(i_ii$item$instanceStatus$status) == "ok") {
is_initialized[ii] <- TRUE
message("Instance ", ii, " is initialized.")
}
}

}

}

# Get the public IPs
public_ip <- vapply(
instances,
function(i_ii) {
i_di <- describe_instances(i_ii)
i_di[[1]]$instancesSet[[1]]$networkInterfaceSet$privateIpAddressesSet$association$publicIp }, FUN.VALUE = character(1) ) # Connect! cl <- makeClusterPSOCK( ## Public IP number of EC2 instance public_ip, ## User name (always 'ubuntu') user = "ubuntu", ## Use private SSH key registered with AWS rshopts = c( "-o", "StrictHostKeyChecking=no", "-o", "IdentitiesOnly=yes", "-i", key ), ## Set up .libPaths() for the 'ubuntu' user and ## install future/purrr/furrr packages rscript_args = c( "-e", shQuote("local({p <- Sys.getenv('R_LIBS_USER'); dir.create(p, recursive = TRUE, showWarnings = FALSE); .libPaths(p)})"), "-e", shQuote("install.packages(c('future', 'purrr', 'furrr'))") ), dryrun = FALSE ) return(cl) }  Load the relevant packages # devtools::install_github("cloudyr/aws.ec2", ref = devtools::github_pull(38)) library(aws.ec2) library(future) library(furrr) library(tictoc) aws.signature::use_credentials()  Baseline test plan(sequential) tic("baseline") future_map(1:2, ~Sys.sleep(60)) toc()  baseline: 121.231 sec elapsed image <- "ami-fd2ffe87" # Check your VPC and Security Group settings s <- describe_subnets() g <- describe_sgroups("sg-16fa225d") kp <- describe_keypairs("synology") # <- Your keypair here # Launch the instance using appropriate settings i <- run_instances(image = image, type = "t2.medium", sgroup = g, subnet = s[[1]], min = 2L, # <- Launching 2 medium instances keypair = kp$synology)

cl <- aws_cluster(instances = i, key = "/home/ignacio/AWS/synology.pem")

############## Now we have a cluster object we can use with future
plan(cluster, workers = cl)
tic("test")
future_map(1:2, ~Sys.sleep(60))
toc()


test: 61.698 sec elapsed

Finally, you can programmatically shutdown the cluster we just created

parallel::stopCluster(cl)
terminate_instances(i)