Skip to content

Commit 811462e

Browse files
authored
feat(aws-ecs): instance autoscaling and drain hook (#1192)
Make it easy to configure EC2 instance autoscaling for your cluster, and automatically add a Lifecylce Hook Lambda that will delay instance termination until all ECS tasks have drained from the instance. Fixes #1162.
1 parent 52b7554 commit 811462e

11 files changed

+777
-25
lines changed

packages/@aws-cdk/aws-autoscaling/lib/auto-scaling-group.ts

+53-12
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,15 @@ export interface AutoScalingGroupProps {
2828

2929
/**
3030
* Minimum number of instances in the fleet
31+
*
3132
* @default 1
3233
*/
3334
minSize?: number;
3435

3536
/**
3637
* Maximum number of instances in the fleet
37-
* @default 1
38+
*
39+
* @default desiredCapacity
3840
*/
3941
maxSize?: number;
4042

@@ -234,9 +236,12 @@ export class AutoScalingGroup extends cdk.Construct implements IAutoScalingGroup
234236

235237
launchConfig.addDependency(this.role);
236238

239+
const desiredCapacity =
240+
(props.desiredCapacity !== undefined ? props.desiredCapacity :
241+
(props.minSize !== undefined ? props.minSize :
242+
(props.maxSize !== undefined ? props.maxSize : 1)));
237243
const minSize = props.minSize !== undefined ? props.minSize : 1;
238-
const maxSize = props.maxSize !== undefined ? props.maxSize : 1;
239-
const desiredCapacity = props.desiredCapacity !== undefined ? props.desiredCapacity : 1;
244+
const maxSize = props.maxSize !== undefined ? props.maxSize : desiredCapacity;
240245

241246
if (desiredCapacity < minSize || desiredCapacity > maxSize) {
242247
throw new Error(`Should have minSize (${minSize}) <= desiredCapacity (${desiredCapacity}) <= maxSize (${maxSize})`);
@@ -322,8 +327,8 @@ export class AutoScalingGroup extends cdk.Construct implements IAutoScalingGroup
322327
/**
323328
* Scale out or in based on time
324329
*/
325-
public scaleOnSchedule(id: string, props: BasicScheduledActionProps) {
326-
new ScheduledAction(this, `ScheduledAction${id}`, {
330+
public scaleOnSchedule(id: string, props: BasicScheduledActionProps): ScheduledAction {
331+
return new ScheduledAction(this, `ScheduledAction${id}`, {
327332
autoScalingGroup: this,
328333
...props,
329334
});
@@ -332,7 +337,7 @@ export class AutoScalingGroup extends cdk.Construct implements IAutoScalingGroup
332337
/**
333338
* Scale out or in to achieve a target CPU utilization
334339
*/
335-
public scaleOnCpuUtilization(id: string, props: CpuUtilizationScalingProps) {
340+
public scaleOnCpuUtilization(id: string, props: CpuUtilizationScalingProps): TargetTrackingScalingPolicy {
336341
return new TargetTrackingScalingPolicy(this, `ScalingPolicy${id}`, {
337342
autoScalingGroup: this,
338343
predefinedMetric: PredefinedMetric.ASGAverageCPUUtilization,
@@ -344,7 +349,7 @@ export class AutoScalingGroup extends cdk.Construct implements IAutoScalingGroup
344349
/**
345350
* Scale out or in to achieve a target network ingress rate
346351
*/
347-
public scaleOnIncomingBytes(id: string, props: NetworkUtilizationScalingProps) {
352+
public scaleOnIncomingBytes(id: string, props: NetworkUtilizationScalingProps): TargetTrackingScalingPolicy {
348353
return new TargetTrackingScalingPolicy(this, `ScalingPolicy${id}`, {
349354
autoScalingGroup: this,
350355
predefinedMetric: PredefinedMetric.ASGAverageNetworkIn,
@@ -356,7 +361,7 @@ export class AutoScalingGroup extends cdk.Construct implements IAutoScalingGroup
356361
/**
357362
* Scale out or in to achieve a target network egress rate
358363
*/
359-
public scaleOnOutgoingBytes(id: string, props: NetworkUtilizationScalingProps) {
364+
public scaleOnOutgoingBytes(id: string, props: NetworkUtilizationScalingProps): TargetTrackingScalingPolicy {
360365
return new TargetTrackingScalingPolicy(this, `ScalingPolicy${id}`, {
361366
autoScalingGroup: this,
362367
predefinedMetric: PredefinedMetric.ASGAverageNetworkOut,
@@ -371,7 +376,7 @@ export class AutoScalingGroup extends cdk.Construct implements IAutoScalingGroup
371376
* The AutoScalingGroup must have been attached to an Application Load Balancer
372377
* in order to be able to call this.
373378
*/
374-
public scaleOnRequestCount(id: string, props: RequestCountScalingProps) {
379+
public scaleOnRequestCount(id: string, props: RequestCountScalingProps): TargetTrackingScalingPolicy {
375380
if (this.albTargetGroup === undefined) {
376381
throw new Error('Attach the AutoScalingGroup to an Application Load Balancer before calling scaleOnRequestCount()');
377382
}
@@ -389,13 +394,14 @@ export class AutoScalingGroup extends cdk.Construct implements IAutoScalingGroup
389394
// Target tracking policy can only be created after the load balancer has been
390395
// attached to the targetgroup (because we need its ARN).
391396
policy.addDependency(this.albTargetGroup.loadBalancerDependency());
397+
return policy;
392398
}
393399

394400
/**
395401
* Scale out or in in order to keep a metric around a target value
396402
*/
397-
public scaleToTrackMetric(id: string, props: MetricTargetTrackingProps) {
398-
new TargetTrackingScalingPolicy(this, `ScalingPolicy${id}`, {
403+
public scaleToTrackMetric(id: string, props: MetricTargetTrackingProps): TargetTrackingScalingPolicy {
404+
return new TargetTrackingScalingPolicy(this, `ScalingPolicy${id}`, {
399405
autoScalingGroup: this,
400406
customMetric: props.metric,
401407
...props
@@ -405,7 +411,7 @@ export class AutoScalingGroup extends cdk.Construct implements IAutoScalingGroup
405411
/**
406412
* Scale out or in, in response to a metric
407413
*/
408-
public scaleOnMetric(id: string, props: BasicStepScalingPolicyProps) {
414+
public scaleOnMetric(id: string, props: BasicStepScalingPolicyProps): StepScalingPolicy {
409415
return new StepScalingPolicy(this, id, { ...props, autoScalingGroup: this });
410416
}
411417

@@ -658,6 +664,41 @@ export interface IAutoScalingGroup {
658664
* The name of the AutoScalingGroup
659665
*/
660666
readonly autoScalingGroupName: string;
667+
668+
/**
669+
* Send a message to either an SQS queue or SNS topic when instances launch or terminate
670+
*/
671+
onLifecycleTransition(id: string, props: BasicLifecycleHookProps): LifecycleHook;
672+
673+
/**
674+
* Scale out or in based on time
675+
*/
676+
scaleOnSchedule(id: string, props: BasicScheduledActionProps): ScheduledAction;
677+
678+
/**
679+
* Scale out or in to achieve a target CPU utilization
680+
*/
681+
scaleOnCpuUtilization(id: string, props: CpuUtilizationScalingProps): TargetTrackingScalingPolicy;
682+
683+
/**
684+
* Scale out or in to achieve a target network ingress rate
685+
*/
686+
scaleOnIncomingBytes(id: string, props: NetworkUtilizationScalingProps): TargetTrackingScalingPolicy;
687+
688+
/**
689+
* Scale out or in to achieve a target network egress rate
690+
*/
691+
scaleOnOutgoingBytes(id: string, props: NetworkUtilizationScalingProps): TargetTrackingScalingPolicy;
692+
693+
/**
694+
* Scale out or in in order to keep a metric around a target value
695+
*/
696+
scaleToTrackMetric(id: string, props: MetricTargetTrackingProps): TargetTrackingScalingPolicy;
697+
698+
/**
699+
* Scale out or in, in response to a metric
700+
*/
701+
scaleOnMetric(id: string, props: BasicStepScalingPolicyProps): StepScalingPolicy;
661702
}
662703

663704
/**

packages/@aws-cdk/aws-autoscaling/lib/lifecycle-hook.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ export interface BasicLifecycleHookProps {
2727
*
2828
* If the lifecycle hook times out, perform the action in DefaultResult.
2929
*/
30-
heartbeatTimeout?: number;
30+
heartbeatTimeoutSec?: number;
3131

3232
/**
3333
* The state of the Amazon EC2 instance to which you want to attach the lifecycle hook.
@@ -87,7 +87,7 @@ export class LifecycleHook extends cdk.Construct implements api.ILifecycleHook {
8787
const resource = new cloudformation.LifecycleHookResource(this, 'Resource', {
8888
autoScalingGroupName: props.autoScalingGroup.autoScalingGroupName,
8989
defaultResult: props.defaultResult,
90-
heartbeatTimeout: props.heartbeatTimeout,
90+
heartbeatTimeout: props.heartbeatTimeoutSec,
9191
lifecycleHookName: props.lifecycleHookName,
9292
lifecycleTransition: props.lifecycleTransition,
9393
notificationMetadata: props.notificationMetadata,

packages/@aws-cdk/aws-autoscaling/test/test.auto-scaling-group.ts

+72
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,78 @@ export = {
149149
test.done();
150150
},
151151

152+
'can specify only min capacity'(test: Test) {
153+
// GIVEN
154+
const stack = new cdk.Stack();
155+
const vpc = mockVpc(stack);
156+
157+
// WHEN
158+
new autoscaling.AutoScalingGroup(stack, 'MyFleet', {
159+
instanceType: new ec2.InstanceTypePair(ec2.InstanceClass.M4, ec2.InstanceSize.Micro),
160+
machineImage: new ec2.AmazonLinuxImage(),
161+
vpc,
162+
minSize: 10
163+
});
164+
165+
// THEN
166+
expect(stack).to(haveResource("AWS::AutoScaling::AutoScalingGroup", {
167+
MinSize: "10",
168+
MaxSize: "10",
169+
DesiredCapacity: "10",
170+
}
171+
));
172+
173+
test.done();
174+
},
175+
176+
'can specify only max capacity'(test: Test) {
177+
// GIVEN
178+
const stack = new cdk.Stack();
179+
const vpc = mockVpc(stack);
180+
181+
// WHEN
182+
new autoscaling.AutoScalingGroup(stack, 'MyFleet', {
183+
instanceType: new ec2.InstanceTypePair(ec2.InstanceClass.M4, ec2.InstanceSize.Micro),
184+
machineImage: new ec2.AmazonLinuxImage(),
185+
vpc,
186+
maxSize: 10
187+
});
188+
189+
// THEN
190+
expect(stack).to(haveResource("AWS::AutoScaling::AutoScalingGroup", {
191+
MinSize: "1",
192+
MaxSize: "10",
193+
DesiredCapacity: "10",
194+
}
195+
));
196+
197+
test.done();
198+
},
199+
200+
'can specify only desiredCount'(test: Test) {
201+
// GIVEN
202+
const stack = new cdk.Stack();
203+
const vpc = mockVpc(stack);
204+
205+
// WHEN
206+
new autoscaling.AutoScalingGroup(stack, 'MyFleet', {
207+
instanceType: new ec2.InstanceTypePair(ec2.InstanceClass.M4, ec2.InstanceSize.Micro),
208+
machineImage: new ec2.AmazonLinuxImage(),
209+
vpc,
210+
desiredCapacity: 10
211+
});
212+
213+
// THEN
214+
expect(stack).to(haveResource("AWS::AutoScaling::AutoScalingGroup", {
215+
MinSize: "1",
216+
MaxSize: "10",
217+
DesiredCapacity: "10",
218+
}
219+
));
220+
221+
test.done();
222+
},
223+
152224
'addToRolePolicy can be used to add statements to the role policy'(test: Test) {
153225
const stack = new cdk.Stack(undefined, 'MyStack', { env: { region: 'us-east-1', account: '1234' }});
154226
const vpc = mockVpc(stack);

packages/@aws-cdk/aws-ecs/README.md

+24-2
Original file line numberDiff line numberDiff line change
@@ -236,10 +236,32 @@ containers are running on for you. If you're running an ECS cluster however,
236236
your EC2 instances might fill up as your number of Tasks goes up.
237237

238238
To avoid placement errors, you will want to configure AutoScaling for your
239-
EC2 instance group so that your instance count scales with demand.
239+
EC2 instance group so that your instance count scales with demand. To keep
240+
your EC2 instances halfway loaded, scaling up to a maximum of 30 instances
241+
if required:
242+
243+
```ts
244+
const autoScalingGroup = cluster.addDefaultAutoScalingGroupCapacity({
245+
instanceType: new ec2.InstanceType("t2.xlarge"),
246+
minCapacity: 3,
247+
maxCapacity: 30
248+
instanceCount: 3,
249+
250+
// Give instances 5 minutes to drain running tasks when an instance is
251+
// terminated. This is the default, turn this off by specifying 0 or
252+
// change the timeout up to 900 seconds.
253+
taskDrainTimeSec: 300,
254+
});
255+
256+
autoScalingGroup.scaleOnCpuUtilization('KeepCpuHalfwayLoaded', {
257+
targetUtilizationPercent: 50
258+
});
259+
```
260+
261+
See the `@aws-cdk/aws-autoscaling` library for more autoscaling options
262+
you can configure on your instances.
240263

241264
### Roadmap
242265

243-
- [ ] Instance AutoScaling
244266
- [ ] Service Discovery Integration
245267
- [ ] Private registry authentication

packages/@aws-cdk/aws-ecs/lib/cluster.ts

+47-6
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import cloudwatch = require ('@aws-cdk/aws-cloudwatch');
33
import ec2 = require('@aws-cdk/aws-ec2');
44
import iam = require('@aws-cdk/aws-iam');
55
import cdk = require('@aws-cdk/cdk');
6+
import { InstanceDrainHook } from './drain-hook/instance-drain-hook';
67
import { cloudformation } from './ecs.generated';
78

89
/**
@@ -70,19 +71,23 @@ export class Cluster extends cdk.Construct implements ICluster {
7071

7172
/**
7273
* Add a default-configured AutoScalingGroup running the ECS-optimized AMI to this Cluster
74+
*
75+
* Returns the AutoScalingGroup so you can add autoscaling settings to it.
7376
*/
74-
public addDefaultAutoScalingGroupCapacity(options: AddDefaultAutoScalingGroupOptions) {
77+
public addDefaultAutoScalingGroupCapacity(options: AddDefaultAutoScalingGroupOptions): autoscaling.AutoScalingGroup {
7578
const autoScalingGroup = new autoscaling.AutoScalingGroup(this, 'DefaultAutoScalingGroup', {
7679
vpc: this.vpc,
7780
instanceType: options.instanceType,
7881
machineImage: new EcsOptimizedAmi(),
7982
updateType: autoscaling.UpdateType.ReplacingUpdate,
80-
minSize: 0,
81-
maxSize: options.instanceCount || 1,
82-
desiredCapacity: options.instanceCount || 1
83+
minSize: options.minCapacity,
84+
maxSize: options.maxCapacity,
85+
desiredCapacity: options.instanceCount,
8386
});
8487

85-
this.addAutoScalingGroupCapacity(autoScalingGroup);
88+
this.addAutoScalingGroupCapacity(autoScalingGroup, options);
89+
90+
return autoScalingGroup;
8691
}
8792

8893
/**
@@ -118,6 +123,15 @@ export class Cluster extends cdk.Construct implements ICluster {
118123
"logs:CreateLogStream",
119124
"logs:PutLogEvents"
120125
).addAllResources());
126+
127+
// 0 disables, otherwise forward to underlying implementation which picks the sane default
128+
if (options.taskDrainTimeSeconds !== 0) {
129+
new InstanceDrainHook(autoScalingGroup, 'DrainECSHook', {
130+
autoScalingGroup,
131+
cluster: this,
132+
drainTimeSec: options.taskDrainTimeSeconds
133+
});
134+
}
121135
}
122136

123137
/**
@@ -291,12 +305,25 @@ export interface AddAutoScalingGroupCapacityOptions {
291305
* @default false
292306
*/
293307
containersAccessInstanceRole?: boolean;
308+
309+
/**
310+
* Give tasks this many seconds to complete when instances are being scaled in.
311+
*
312+
* Task draining adds a Lambda and a Lifecycle hook to your AutoScalingGroup
313+
* that will delay instance termination until all ECS tasks have drained from
314+
* the instance.
315+
*
316+
* Set to 0 to disable task draining.
317+
*
318+
* @default 300
319+
*/
320+
taskDrainTimeSeconds?: number;
294321
}
295322

296323
/**
297324
* Properties for adding autoScalingGroup
298325
*/
299-
export interface AddDefaultAutoScalingGroupOptions {
326+
export interface AddDefaultAutoScalingGroupOptions extends AddAutoScalingGroupCapacityOptions {
300327

301328
/**
302329
* The type of EC2 instance to launch into your Autoscaling Group
@@ -309,4 +336,18 @@ export interface AddDefaultAutoScalingGroupOptions {
309336
* @default 1
310337
*/
311338
instanceCount?: number;
339+
340+
/**
341+
* Maximum number of instances
342+
*
343+
* @default Same as instanceCount
344+
*/
345+
maxCapacity?: number;
346+
347+
/**
348+
* Minimum number of instances
349+
*
350+
* @default Same as instanceCount
351+
*/
352+
minCapacity?: number;
312353
}

0 commit comments

Comments
 (0)