Skip to content

Commit f23d52f

Browse files
authoredDec 7, 2024
feat: Node pools can enable fast_socket (#2200)
1 parent 1f85f66 commit f23d52f

File tree

16 files changed

+154
-0
lines changed

16 files changed

+154
-0
lines changed
 

‎README.md

+1
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,7 @@ The node_pools variable takes the following parameters:
317317
| disk_size_gb | Size of the disk attached to each node, specified in GB. The smallest allowed disk size is 10GB | 100 | Optional |
318318
| disk_type | Type of the disk attached to each node (e.g. 'pd-standard' or 'pd-ssd') | pd-standard | Optional |
319319
| effect | Effect for the taint | | Required |
320+
| enable_fast_socket | Enable the NCCL Fast Socket feature. `enable_gvnic` must also be enabled. | null | Optional |
320321
| enable_gcfs | Google Container File System (gcfs) has to be enabled for image streaming to be active. Needs image_type to be set to COS_CONTAINERD. | false | Optional |
321322
| enable_gvnic | gVNIC (GVE) is an alternative to the virtIO-based ethernet driver. Needs a Container-Optimized OS node image. | false | Optional |
322323
| enable_integrity_monitoring | Enables monitoring and attestation of the boot integrity of the instance. The attestation is performed against the integrity policy baseline. This baseline is initially derived from the implicitly trusted boot image when the instance is created. | true | Optional |

‎autogen/main/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ The node_pools variable takes the following parameters:
205205
| disk_size_gb | Size of the disk attached to each node, specified in GB. The smallest allowed disk size is 10GB | 100 | Optional |
206206
| disk_type | Type of the disk attached to each node (e.g. 'pd-standard' or 'pd-ssd') | pd-standard | Optional |
207207
| effect | Effect for the taint | | Required |
208+
| enable_fast_socket | Enable the NCCL Fast Socket feature. `enable_gvnic` must also be enabled. | null | Optional |
208209
| enable_gcfs | Google Container File System (gcfs) has to be enabled for image streaming to be active. Needs image_type to be set to COS_CONTAINERD. | false | Optional |
209210
| enable_gvnic | gVNIC (GVE) is an alternative to the virtIO-based ethernet driver. Needs a Container-Optimized OS node image. | false | Optional |
210211
| enable_integrity_monitoring | Enables monitoring and attestation of the boot integrity of the instance. The attestation is performed against the integrity policy baseline. This baseline is initially derived from the implicitly trusted boot image when the instance is created. | true | Optional |

‎autogen/main/cluster.tf.tmpl

+13
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,13 @@ resource "google_container_cluster" "primary" {
537537
}
538538
}
539539

540+
dynamic "fast_socket" {
541+
for_each = lookup(var.node_pools[0], "enable_fast_socket", null) != null ? [var.node_pools[0].enable_fast_socket] : []
542+
content {
543+
enabled = fast_socket.value
544+
}
545+
}
546+
540547
dynamic "kubelet_config" {
541548
for_each = length(setintersection(
542549
keys(var.node_pools[0]),
@@ -930,6 +937,12 @@ resource "google_container_node_pool" "windows_pools" {
930937
enabled = gvnic.value
931938
}
932939
}
940+
dynamic "fast_socket" {
941+
for_each = lookup(each.value, "enable_fast_socket", null) != null ? [each.value.enable_fast_socket] : []
942+
content {
943+
enabled = fast_socket.value
944+
}
945+
}
933946
dynamic "reservation_affinity" {
934947
for_each = lookup(each.value, "queued_provisioning", false) || lookup(each.value, "consume_reservation_type", "") != "" ? [each.value] : []
935948
content {

‎cluster.tf

+19
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,13 @@ resource "google_container_cluster" "primary" {
418418
}
419419
}
420420

421+
dynamic "fast_socket" {
422+
for_each = lookup(var.node_pools[0], "enable_fast_socket", null) != null ? [var.node_pools[0].enable_fast_socket] : []
423+
content {
424+
enabled = fast_socket.value
425+
}
426+
}
427+
421428
dynamic "kubelet_config" {
422429
for_each = length(setintersection(
423430
keys(var.node_pools[0]),
@@ -641,6 +648,12 @@ resource "google_container_node_pool" "pools" {
641648
enabled = gvnic.value
642649
}
643650
}
651+
dynamic "fast_socket" {
652+
for_each = lookup(each.value, "enable_fast_socket", null) != null ? [each.value.enable_fast_socket] : []
653+
content {
654+
enabled = fast_socket.value
655+
}
656+
}
644657
dynamic "reservation_affinity" {
645658
for_each = lookup(each.value, "queued_provisioning", false) || lookup(each.value, "consume_reservation_type", "") != "" ? [each.value] : []
646659
content {
@@ -932,6 +945,12 @@ resource "google_container_node_pool" "windows_pools" {
932945
enabled = gvnic.value
933946
}
934947
}
948+
dynamic "fast_socket" {
949+
for_each = lookup(each.value, "enable_fast_socket", null) != null ? [each.value.enable_fast_socket] : []
950+
content {
951+
enabled = fast_socket.value
952+
}
953+
}
935954
dynamic "reservation_affinity" {
936955
for_each = lookup(each.value, "queued_provisioning", false) || lookup(each.value, "consume_reservation_type", "") != "" ? [each.value] : []
937956
content {

‎modules/beta-private-cluster-update-variant/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,7 @@ The node_pools variable takes the following parameters:
371371
| disk_size_gb | Size of the disk attached to each node, specified in GB. The smallest allowed disk size is 10GB | 100 | Optional |
372372
| disk_type | Type of the disk attached to each node (e.g. 'pd-standard' or 'pd-ssd') | pd-standard | Optional |
373373
| effect | Effect for the taint | | Required |
374+
| enable_fast_socket | Enable the NCCL Fast Socket feature. `enable_gvnic` must also be enabled. | null | Optional |
374375
| enable_gcfs | Google Container File System (gcfs) has to be enabled for image streaming to be active. Needs image_type to be set to COS_CONTAINERD. | false | Optional |
375376
| enable_gvnic | gVNIC (GVE) is an alternative to the virtIO-based ethernet driver. Needs a Container-Optimized OS node image. | false | Optional |
376377
| enable_integrity_monitoring | Enables monitoring and attestation of the boot integrity of the instance. The attestation is performed against the integrity policy baseline. This baseline is initially derived from the implicitly trusted boot image when the instance is created. | true | Optional |

‎modules/beta-private-cluster-update-variant/cluster.tf

+19
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,13 @@ resource "google_container_cluster" "primary" {
456456
}
457457
}
458458

459+
dynamic "fast_socket" {
460+
for_each = lookup(var.node_pools[0], "enable_fast_socket", null) != null ? [var.node_pools[0].enable_fast_socket] : []
461+
content {
462+
enabled = fast_socket.value
463+
}
464+
}
465+
459466
dynamic "kubelet_config" {
460467
for_each = length(setintersection(
461468
keys(var.node_pools[0]),
@@ -799,6 +806,12 @@ resource "google_container_node_pool" "pools" {
799806
enabled = gvnic.value
800807
}
801808
}
809+
dynamic "fast_socket" {
810+
for_each = lookup(each.value, "enable_fast_socket", null) != null ? [each.value.enable_fast_socket] : []
811+
content {
812+
enabled = fast_socket.value
813+
}
814+
}
802815
dynamic "reservation_affinity" {
803816
for_each = lookup(each.value, "queued_provisioning", false) || lookup(each.value, "consume_reservation_type", "") != "" ? [each.value] : []
804817
content {
@@ -1104,6 +1117,12 @@ resource "google_container_node_pool" "windows_pools" {
11041117
enabled = gvnic.value
11051118
}
11061119
}
1120+
dynamic "fast_socket" {
1121+
for_each = lookup(each.value, "enable_fast_socket", null) != null ? [each.value.enable_fast_socket] : []
1122+
content {
1123+
enabled = fast_socket.value
1124+
}
1125+
}
11071126
dynamic "reservation_affinity" {
11081127
for_each = lookup(each.value, "queued_provisioning", false) || lookup(each.value, "consume_reservation_type", "") != "" ? [each.value] : []
11091128
content {

‎modules/beta-private-cluster/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,7 @@ The node_pools variable takes the following parameters:
349349
| disk_size_gb | Size of the disk attached to each node, specified in GB. The smallest allowed disk size is 10GB | 100 | Optional |
350350
| disk_type | Type of the disk attached to each node (e.g. 'pd-standard' or 'pd-ssd') | pd-standard | Optional |
351351
| effect | Effect for the taint | | Required |
352+
| enable_fast_socket | Enable the NCCL Fast Socket feature. `enable_gvnic` must also be enabled. | null | Optional |
352353
| enable_gcfs | Google Container File System (gcfs) has to be enabled for image streaming to be active. Needs image_type to be set to COS_CONTAINERD. | false | Optional |
353354
| enable_gvnic | gVNIC (GVE) is an alternative to the virtIO-based ethernet driver. Needs a Container-Optimized OS node image. | false | Optional |
354355
| enable_integrity_monitoring | Enables monitoring and attestation of the boot integrity of the instance. The attestation is performed against the integrity policy baseline. This baseline is initially derived from the implicitly trusted boot image when the instance is created. | true | Optional |

‎modules/beta-private-cluster/cluster.tf

+19
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,13 @@ resource "google_container_cluster" "primary" {
456456
}
457457
}
458458

459+
dynamic "fast_socket" {
460+
for_each = lookup(var.node_pools[0], "enable_fast_socket", null) != null ? [var.node_pools[0].enable_fast_socket] : []
461+
content {
462+
enabled = fast_socket.value
463+
}
464+
}
465+
459466
dynamic "kubelet_config" {
460467
for_each = length(setintersection(
461468
keys(var.node_pools[0]),
@@ -718,6 +725,12 @@ resource "google_container_node_pool" "pools" {
718725
enabled = gvnic.value
719726
}
720727
}
728+
dynamic "fast_socket" {
729+
for_each = lookup(each.value, "enable_fast_socket", null) != null ? [each.value.enable_fast_socket] : []
730+
content {
731+
enabled = fast_socket.value
732+
}
733+
}
721734
dynamic "reservation_affinity" {
722735
for_each = lookup(each.value, "queued_provisioning", false) || lookup(each.value, "consume_reservation_type", "") != "" ? [each.value] : []
723736
content {
@@ -1022,6 +1035,12 @@ resource "google_container_node_pool" "windows_pools" {
10221035
enabled = gvnic.value
10231036
}
10241037
}
1038+
dynamic "fast_socket" {
1039+
for_each = lookup(each.value, "enable_fast_socket", null) != null ? [each.value.enable_fast_socket] : []
1040+
content {
1041+
enabled = fast_socket.value
1042+
}
1043+
}
10251044
dynamic "reservation_affinity" {
10261045
for_each = lookup(each.value, "queued_provisioning", false) || lookup(each.value, "consume_reservation_type", "") != "" ? [each.value] : []
10271046
content {

‎modules/beta-public-cluster-update-variant/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,7 @@ The node_pools variable takes the following parameters:
357357
| disk_size_gb | Size of the disk attached to each node, specified in GB. The smallest allowed disk size is 10GB | 100 | Optional |
358358
| disk_type | Type of the disk attached to each node (e.g. 'pd-standard' or 'pd-ssd') | pd-standard | Optional |
359359
| effect | Effect for the taint | | Required |
360+
| enable_fast_socket | Enable the NCCL Fast Socket feature. `enable_gvnic` must also be enabled. | null | Optional |
360361
| enable_gcfs | Google Container File System (gcfs) has to be enabled for image streaming to be active. Needs image_type to be set to COS_CONTAINERD. | false | Optional |
361362
| enable_gvnic | gVNIC (GVE) is an alternative to the virtIO-based ethernet driver. Needs a Container-Optimized OS node image. | false | Optional |
362363
| enable_integrity_monitoring | Enables monitoring and attestation of the boot integrity of the instance. The attestation is performed against the integrity policy baseline. This baseline is initially derived from the implicitly trusted boot image when the instance is created. | true | Optional |

‎modules/beta-public-cluster-update-variant/cluster.tf

+19
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,13 @@ resource "google_container_cluster" "primary" {
456456
}
457457
}
458458

459+
dynamic "fast_socket" {
460+
for_each = lookup(var.node_pools[0], "enable_fast_socket", null) != null ? [var.node_pools[0].enable_fast_socket] : []
461+
content {
462+
enabled = fast_socket.value
463+
}
464+
}
465+
459466
dynamic "kubelet_config" {
460467
for_each = length(setintersection(
461468
keys(var.node_pools[0]),
@@ -769,6 +776,12 @@ resource "google_container_node_pool" "pools" {
769776
enabled = gvnic.value
770777
}
771778
}
779+
dynamic "fast_socket" {
780+
for_each = lookup(each.value, "enable_fast_socket", null) != null ? [each.value.enable_fast_socket] : []
781+
content {
782+
enabled = fast_socket.value
783+
}
784+
}
772785
dynamic "reservation_affinity" {
773786
for_each = lookup(each.value, "queued_provisioning", false) || lookup(each.value, "consume_reservation_type", "") != "" ? [each.value] : []
774787
content {
@@ -1074,6 +1087,12 @@ resource "google_container_node_pool" "windows_pools" {
10741087
enabled = gvnic.value
10751088
}
10761089
}
1090+
dynamic "fast_socket" {
1091+
for_each = lookup(each.value, "enable_fast_socket", null) != null ? [each.value.enable_fast_socket] : []
1092+
content {
1093+
enabled = fast_socket.value
1094+
}
1095+
}
10771096
dynamic "reservation_affinity" {
10781097
for_each = lookup(each.value, "queued_provisioning", false) || lookup(each.value, "consume_reservation_type", "") != "" ? [each.value] : []
10791098
content {

‎modules/beta-public-cluster/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,7 @@ The node_pools variable takes the following parameters:
335335
| disk_size_gb | Size of the disk attached to each node, specified in GB. The smallest allowed disk size is 10GB | 100 | Optional |
336336
| disk_type | Type of the disk attached to each node (e.g. 'pd-standard' or 'pd-ssd') | pd-standard | Optional |
337337
| effect | Effect for the taint | | Required |
338+
| enable_fast_socket | Enable the NCCL Fast Socket feature. `enable_gvnic` must also be enabled. | null | Optional |
338339
| enable_gcfs | Google Container File System (gcfs) has to be enabled for image streaming to be active. Needs image_type to be set to COS_CONTAINERD. | false | Optional |
339340
| enable_gvnic | gVNIC (GVE) is an alternative to the virtIO-based ethernet driver. Needs a Container-Optimized OS node image. | false | Optional |
340341
| enable_integrity_monitoring | Enables monitoring and attestation of the boot integrity of the instance. The attestation is performed against the integrity policy baseline. This baseline is initially derived from the implicitly trusted boot image when the instance is created. | true | Optional |

‎modules/beta-public-cluster/cluster.tf

+19
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,13 @@ resource "google_container_cluster" "primary" {
456456
}
457457
}
458458

459+
dynamic "fast_socket" {
460+
for_each = lookup(var.node_pools[0], "enable_fast_socket", null) != null ? [var.node_pools[0].enable_fast_socket] : []
461+
content {
462+
enabled = fast_socket.value
463+
}
464+
}
465+
459466
dynamic "kubelet_config" {
460467
for_each = length(setintersection(
461468
keys(var.node_pools[0]),
@@ -688,6 +695,12 @@ resource "google_container_node_pool" "pools" {
688695
enabled = gvnic.value
689696
}
690697
}
698+
dynamic "fast_socket" {
699+
for_each = lookup(each.value, "enable_fast_socket", null) != null ? [each.value.enable_fast_socket] : []
700+
content {
701+
enabled = fast_socket.value
702+
}
703+
}
691704
dynamic "reservation_affinity" {
692705
for_each = lookup(each.value, "queued_provisioning", false) || lookup(each.value, "consume_reservation_type", "") != "" ? [each.value] : []
693706
content {
@@ -992,6 +1005,12 @@ resource "google_container_node_pool" "windows_pools" {
9921005
enabled = gvnic.value
9931006
}
9941007
}
1008+
dynamic "fast_socket" {
1009+
for_each = lookup(each.value, "enable_fast_socket", null) != null ? [each.value.enable_fast_socket] : []
1010+
content {
1011+
enabled = fast_socket.value
1012+
}
1013+
}
9951014
dynamic "reservation_affinity" {
9961015
for_each = lookup(each.value, "queued_provisioning", false) || lookup(each.value, "consume_reservation_type", "") != "" ? [each.value] : []
9971016
content {

‎modules/private-cluster-update-variant/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,7 @@ The node_pools variable takes the following parameters:
353353
| disk_size_gb | Size of the disk attached to each node, specified in GB. The smallest allowed disk size is 10GB | 100 | Optional |
354354
| disk_type | Type of the disk attached to each node (e.g. 'pd-standard' or 'pd-ssd') | pd-standard | Optional |
355355
| effect | Effect for the taint | | Required |
356+
| enable_fast_socket | Enable the NCCL Fast Socket feature. `enable_gvnic` must also be enabled. | null | Optional |
356357
| enable_gcfs | Google Container File System (gcfs) has to be enabled for image streaming to be active. Needs image_type to be set to COS_CONTAINERD. | false | Optional |
357358
| enable_gvnic | gVNIC (GVE) is an alternative to the virtIO-based ethernet driver. Needs a Container-Optimized OS node image. | false | Optional |
358359
| enable_integrity_monitoring | Enables monitoring and attestation of the boot integrity of the instance. The attestation is performed against the integrity policy baseline. This baseline is initially derived from the implicitly trusted boot image when the instance is created. | true | Optional |

‎modules/private-cluster-update-variant/cluster.tf

+19
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,13 @@ resource "google_container_cluster" "primary" {
418418
}
419419
}
420420

421+
dynamic "fast_socket" {
422+
for_each = lookup(var.node_pools[0], "enable_fast_socket", null) != null ? [var.node_pools[0].enable_fast_socket] : []
423+
content {
424+
enabled = fast_socket.value
425+
}
426+
}
427+
421428
dynamic "kubelet_config" {
422429
for_each = length(setintersection(
423430
keys(var.node_pools[0]),
@@ -751,6 +758,12 @@ resource "google_container_node_pool" "pools" {
751758
enabled = gvnic.value
752759
}
753760
}
761+
dynamic "fast_socket" {
762+
for_each = lookup(each.value, "enable_fast_socket", null) != null ? [each.value.enable_fast_socket] : []
763+
content {
764+
enabled = fast_socket.value
765+
}
766+
}
754767
dynamic "reservation_affinity" {
755768
for_each = lookup(each.value, "queued_provisioning", false) || lookup(each.value, "consume_reservation_type", "") != "" ? [each.value] : []
756769
content {
@@ -1043,6 +1056,12 @@ resource "google_container_node_pool" "windows_pools" {
10431056
enabled = gvnic.value
10441057
}
10451058
}
1059+
dynamic "fast_socket" {
1060+
for_each = lookup(each.value, "enable_fast_socket", null) != null ? [each.value.enable_fast_socket] : []
1061+
content {
1062+
enabled = fast_socket.value
1063+
}
1064+
}
10461065
dynamic "reservation_affinity" {
10471066
for_each = lookup(each.value, "queued_provisioning", false) || lookup(each.value, "consume_reservation_type", "") != "" ? [each.value] : []
10481067
content {

‎modules/private-cluster/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,7 @@ The node_pools variable takes the following parameters:
331331
| disk_size_gb | Size of the disk attached to each node, specified in GB. The smallest allowed disk size is 10GB | 100 | Optional |
332332
| disk_type | Type of the disk attached to each node (e.g. 'pd-standard' or 'pd-ssd') | pd-standard | Optional |
333333
| effect | Effect for the taint | | Required |
334+
| enable_fast_socket | Enable the NCCL Fast Socket feature. `enable_gvnic` must also be enabled. | null | Optional |
334335
| enable_gcfs | Google Container File System (gcfs) has to be enabled for image streaming to be active. Needs image_type to be set to COS_CONTAINERD. | false | Optional |
335336
| enable_gvnic | gVNIC (GVE) is an alternative to the virtIO-based ethernet driver. Needs a Container-Optimized OS node image. | false | Optional |
336337
| enable_integrity_monitoring | Enables monitoring and attestation of the boot integrity of the instance. The attestation is performed against the integrity policy baseline. This baseline is initially derived from the implicitly trusted boot image when the instance is created. | true | Optional |

‎modules/private-cluster/cluster.tf

+19
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,13 @@ resource "google_container_cluster" "primary" {
418418
}
419419
}
420420

421+
dynamic "fast_socket" {
422+
for_each = lookup(var.node_pools[0], "enable_fast_socket", null) != null ? [var.node_pools[0].enable_fast_socket] : []
423+
content {
424+
enabled = fast_socket.value
425+
}
426+
}
427+
421428
dynamic "kubelet_config" {
422429
for_each = length(setintersection(
423430
keys(var.node_pools[0]),
@@ -671,6 +678,12 @@ resource "google_container_node_pool" "pools" {
671678
enabled = gvnic.value
672679
}
673680
}
681+
dynamic "fast_socket" {
682+
for_each = lookup(each.value, "enable_fast_socket", null) != null ? [each.value.enable_fast_socket] : []
683+
content {
684+
enabled = fast_socket.value
685+
}
686+
}
674687
dynamic "reservation_affinity" {
675688
for_each = lookup(each.value, "queued_provisioning", false) || lookup(each.value, "consume_reservation_type", "") != "" ? [each.value] : []
676689
content {
@@ -962,6 +975,12 @@ resource "google_container_node_pool" "windows_pools" {
962975
enabled = gvnic.value
963976
}
964977
}
978+
dynamic "fast_socket" {
979+
for_each = lookup(each.value, "enable_fast_socket", null) != null ? [each.value.enable_fast_socket] : []
980+
content {
981+
enabled = fast_socket.value
982+
}
983+
}
965984
dynamic "reservation_affinity" {
966985
for_each = lookup(each.value, "queued_provisioning", false) || lookup(each.value, "consume_reservation_type", "") != "" ? [each.value] : []
967986
content {

0 commit comments

Comments
 (0)