I was able to get a ManagedNodeGroup working with a custom LaunchTemplate that sets up swap in Python. Below is what's working for me.
I was able to set up a swap file on the EC2 instance, and start the kubelet in a way that it would allow swap usage. However I wasn't able to set the config option for swapBehavior, the key doesn't seem to be recognized by the kubelet on EKS (nor is the NodeSwap feature gate) despite documentation saying it should be.
$ pulumi about
CLI
Version 3.46.1
Go Version go1.19.2
Go Compiler gc
Plugins
NAME VERSION
aws 5.7.2
eks 0.42.7
honeycomb 0.0.11
kubernetes 3.23.1
python 3.10.8
_aws_account_id = aws.get_caller_identity().account_id
_K8S_VERSION = "1.23" # latest visible in above version of pulumi-eks
_NODE_ROOT_VOLUME_SIZE_GIB = 60
# Script to run on EKS nodes as root before EKS bootstrapping (which starts the kubelet)
#
# Make a 40GB swap file. This is a gues at allowing a few pods to overrun their
# requested RAM significantly.
# https://stackoverflow.com/questions/17173972/how-do-you-add-swap-to-an-ec2-instance
#
# Enable swap usage in the kubeconfig, following editing commands used in the
# bootstrap script.
# https://github.com/awslabs/amazon-eks-ami/blob/master/files/bootstrap.sh
# https://aws.amazon.com/premiumsupport/knowledge-center/eks-worker-nodes-image-cache/
# https://kubernetes.io/docs/reference/config-api/kubelet-config.v1beta1/
# This user data must be in mime format when passed to a launch template.
# https://docs.aws.amazon.com/eks/latest/userguide/launch-templates.html
#
# From MNG launch template docs:
# "your user data is merged with Amazon EKS user data required for nodes to join the
# cluster. Don't specify any commands in your user data that starts or modifies kubelet."
# Inspecting instance user data shows this and the original user data in separate MIME
# parts, both in the user data with this 1st.
#
# The swapBehavior isn't recognized by AWS kubelet. Docs say it requires
# featureGates.NodeSwap=true, but kubelet also doesn't recognize the feature.
# jq adds quotes around the "swapBehavior" key.
# It seems like the behavior defaults to limited swap: pods are killed at their
# resource limit, regardless of swap availability/usage.
# TODO set UnlimitedSwap if/when possible on AWS, using:
# echo "$(jq ".memorySwap={swapBehavior:\"UnlimitedSwap\"}" $KUBELET_CONFIG)" > $KUBELET_CONFIG
_NODE_USER_DATA_ADD_SWAP_AND_ENABLE_IN_KUBELET_CONFIG = r"""#!/bin/bash
set -e
# Use fallocate which is much faster than dd (essentially instant) since we do not
# care about the initial contents of the file.
fallocate -l 40G /swapfile
chmod 600 /swapfile
mkswap /swapfile
swapon /swapfile
echo "/swapfile swap swap defaults 0 0" >> /etc/fstab
KUBELET_CONFIG=/etc/kubernetes/kubelet/kubelet-config.json
cp $KUBELET_CONFIG $KUBELET_CONFIG.orig
echo "$(jq ".failSwapOn=false" $KUBELET_CONFIG)" > $KUBELET_CONFIG
"""
_USER_DATA_MIME_HEADER = """MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="//"
--//
Content-Type: text/x-shellscript; charset="us-ascii"
"""
_USER_DATA_MIME_FOOTER = """
--//--
"""
def _wrap_and_encode_user_data(script_text: str) -> str:
mime_encapsulated = _USER_DATA_MIME_HEADER + script_text + _USER_DATA_MIME_FOOTER
encoded_bytes = base64.b64encode(mime_encapsulated.encode())
return encoded_bytes.decode("latin1")
def _define_cluster_and_get_provider() -> Tuple[eks.Cluster, k8s.Provider]:
# https://www.pulumi.com/docs/guides/crosswalk/aws/eks/
# https://www.pulumi.com/registry/packages/eks/api-docs/cluster/#cluster
# Map AWS IAM users to Kubernetes internal RBAC admin group. Mapping individual
# users avoids having to go from a group to a role with assume-role policies.
# Kubernetes has its own permissions (RBAC) system, with predefined groups for
# common permissions levels. AWS EKS provides translation from IAM to that, but we
# must explicitly map particular users or roles that should be granted permissions
# within the cluster.
#
# AWS docs: https://docs.aws.amazon.com/eks/latest/userguide/add-user-role.html
# Detailed example: https://apperati.io/articles/managing_eks_access-bs/
# IAM groups are not supported, only users or roles:
# https://github.com/kubernetes-sigs/aws-iam-authenticator/issues/176
user_mappings = []
for username in TEAM_MEMBERS:
user_mappings.append(
eks.UserMappingArgs(
# AWS IAM user to set permissions for
user_arn=f"arn:aws:iam::{_aws_account_id}:user/{username}",
# k8s RBAC group from which this IAM user will get permissions
groups=["system:masters"],
# k8s RBAC username to create for the user
username=username,
)
)
node_role = _define_node_role(EKS_CLUSTER_NAME)
cluster = eks.Cluster(
EKS_CLUSTER_NAME,
name=EKS_CLUSTER_NAME,
version=_K8S_VERSION,
# Details of VPC usage for EKS:
# https://docs.aws.amazon.com/eks/latest/userguide/network_reqs.html
vpc_id=_CLUSTER_VPC,
subnet_ids=_CLUSTER_SUBNETS,
# OpenID Connect Provider maps from k8s to AWS IDs.
# Get the OIDC's ID with:
# aws eks describe-cluster --name <CLUSTER_NAME> --query "cluster.identity.oidc.issuer" --output text
create_oidc_provider=True,
user_mappings=user_mappings,
skip_default_node_group=True,
instance_role=node_role,
)
# Export the kubeconfig to allow kubectl to access the cluster. For example:
# pulumi stack output my-kubeconfig > kubeconfig.yml
# KUBECONFIG=./kubeconfig.yml kubectl get pods -A
pulumi.export(f"my-kubeconfig", cluster.kubeconfig)
# Work around cluster.provider being the wrong type for Namespace to use.
# https://github.com/pulumi/pulumi-eks/issues/662
provider = k8s.Provider(
f"my-cluster-provider",
kubeconfig=cluster.kubeconfig.apply(lambda k: json.dumps(k)),
)
# Configure startup script and root volume size to allow for swap.
#
# Changing the launch template (or included user data script) will cause the
# ManagedNodeGroup to replace nodes, which takes 10-15 minutes.
launch_template = aws.ec2.LaunchTemplate(
f"{EKS_CLUSTER_NAME}-launch-template",
# Set the default device's size to allow for swap.
block_device_mappings=[
aws.ec2.LaunchTemplateBlockDeviceMappingArgs(
device_name="/dev/xvda",
ebs=aws.ec2.LaunchTemplateBlockDeviceMappingEbsArgs(
volume_size=_NODE_ROOT_VOLUME_SIZE_GIB,
),
),
],
user_data=_wrap_and_encode_user_data(
_NODE_USER_DATA_ADD_SWAP_AND_ENABLE_IN_KUBELET_CONFIG
),
# The default version shows up first in the UI, so update it even though
# we don't really need to since we use latest_version below.
update_default_version=True,
# Other settings, such as tags required for the node to join the group/cluster,
# are filled in by default.
)
# The EC2 instances that the cluster will use to execute pods.
# https://www.pulumi.com/registry/packages/eks/api-docs/managednodegroup/
eks.ManagedNodeGroup(
f"{EKS_CLUSTER_NAME}-managed-node-group",
node_group_name=f"{EKS_CLUSTER_NAME}-managed-node-group",
cluster=cluster.core,
version=_K8S_VERSION,
subnet_ids=_CLUSTER_SUBNETS,
node_role=node_role,
instance_types=["r6i.2xlarge"],
scaling_config=aws.eks.NodeGroupScalingConfigArgs(
min_size=1,
desired_size=2,
max_size=4,
),
launch_template={
"id": launch_template.id,
"version": launch_template.latest_version,
},
)
return cluster, provider