Skip to content

Commit 84cf1ce

Browse files
committed
Updated Kubernetes to v1.4.3
* Updated kube-dns to v20 + use deployment instead of RC * Fixed issues with coreos cluster checker * Added Kubernetes cluster readiness checker
1 parent 4363a84 commit 84cf1ce

4 files changed

+119
-86
lines changed

deploy_coreos_cluster.sh

+3-1
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ trap - EXIT
162162
trap
163163

164164
OS_NAME="coreos"
165+
SSH_USER="core"
165166

166167
virsh list --all --name | grep -q "^${OS_NAME}1$" && { print_red "'${OS_NAME}1' VM already exists"; exit 1; }
167168

@@ -365,10 +366,11 @@ if [ "x${SKIP_SSH_CHECK}" = "x" ]; then
365366
TRY=$((TRY+1))
366367
if [ $TRY -gt $MAX_SSH_TRIES ]; then
367368
print_red "Can not connect to ssh, exiting..."
369+
exit 1
368370
fi
369371
echo "Trying to connect to ${VM_HOSTNAME} VM, #${TRY} of #${MAX_SSH_TRIES}..."
370372
set +e
371-
RES=$(LANG=en_US ssh -l $SSH_USER -o ConnectTimeout=1 -o PasswordAuthentication=no -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i ${PRIV_KEY_PATH} $VM_HOSTNAME "uptime" 2>&1)
373+
RES=$(LANG=en_US ssh -l $SSH_USER -o BatchMode=yes -o ConnectTimeout=1 -o PasswordAuthentication=no -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i ${PRIV_KEY_PATH} $VM_HOSTNAME "uptime" 2>&1)
372374
RES_CODE=$?
373375
set -e
374376
if [ $RES_CODE -eq 0 ]; then

deploy_k8s_cluster.sh

+33-3
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ OS_NAME="coreos"
175175
PREFIX="k8s"
176176
MASTER_PREFIX="${PREFIX}-master"
177177
NODE_PREFIX="${PREFIX}-node"
178+
SSH_USER="core"
178179

179180
virsh list --all --name | grep -q "^${PREFIX}-[mn]" && { print_red "'$PREFIX-*' VMs already exist"; exit 1; }
180181

@@ -300,7 +301,7 @@ if [ -n "$OPTVAL_CPU" ]; then
300301
CPUs=$OPTVAL_CPU
301302
fi
302303

303-
K8S_RELEASE="v1.3.5"
304+
K8S_RELEASE="v1.4.3"
304305
K8S_IMAGE="gcr.io/google_containers/hyperkube:${K8S_RELEASE}"
305306
FLANNEL_TYPE=vxlan
306307

@@ -445,6 +446,7 @@ done
445446

446447
if [ "x${SKIP_SSH_CHECK}" = "x" ]; then
447448
MAX_SSH_TRIES=50
449+
MAX_KUBECTL_TRIES=200
448450
for SEQ in $(seq 1 $CLUSTER_SIZE); do
449451
if [ "$SEQ" = "1" ]; then
450452
VM_HOSTNAME=$MASTER_PREFIX
@@ -457,10 +459,11 @@ if [ "x${SKIP_SSH_CHECK}" = "x" ]; then
457459
TRY=$((TRY+1))
458460
if [ $TRY -gt $MAX_SSH_TRIES ]; then
459461
print_red "Can not connect to ssh, exiting..."
462+
exit 1
460463
fi
461464
echo "Trying to connect to ${VM_HOSTNAME} VM, #${TRY} of #${MAX_SSH_TRIES}..."
462465
set +e
463-
RES=$(LANG=en_US ssh -l $SSH_USER -o ConnectTimeout=1 -o PasswordAuthentication=no -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i ${PRIV_KEY_PATH} $VM_HOSTNAME "uptime" 2>&1)
466+
RES=$(LANG=en_US ssh -l $SSH_USER -o BatchMode=yes -o ConnectTimeout=1 -o PasswordAuthentication=no -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i ${PRIV_KEY_PATH} $VM_HOSTNAME "uptime" 2>&1)
464467
RES_CODE=$?
465468
set -e
466469
if [ $RES_CODE -eq 0 ]; then
@@ -470,7 +473,34 @@ if [ "x${SKIP_SSH_CHECK}" = "x" ]; then
470473
fi
471474
done
472475
done
473-
print_green "Cluster of $CLUSTER_SIZE $OS_NAME nodes is up and running."
476+
print_green "Cluster of $CLUSTER_SIZE $OS_NAME nodes is up and running, waiting for Kubernetes to be ready..."
477+
for SEQ in $(seq 1 $CLUSTER_SIZE); do
478+
if [ "$SEQ" = "1" ]; then
479+
VM_HOSTNAME=$MASTER_PREFIX
480+
else
481+
NODE_SEQ=$[SEQ-1]
482+
VM_HOSTNAME="${NODE_PREFIX}-$NODE_SEQ"
483+
fi
484+
TRY=0
485+
while true; do
486+
TRY=$((TRY+1))
487+
if [ $TRY -gt $MAX_KUBECTL_TRIES ]; then
488+
print_red "Can not verify Kubernetes status, exiting..."
489+
exit 1
490+
fi
491+
echo "Trying to check whether ${VM_HOSTNAME} Kubernetes node is up and running, #${TRY} of #${MAX_KUBECTL_TRIES}..."
492+
set +e
493+
RES=$(LANG=en_US ssh -l $SSH_USER -o BatchMode=yes -o ConnectTimeout=1 -o PasswordAuthentication=no -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i ${PRIV_KEY_PATH} $MASTER_PREFIX "/opt/bin/kubectl get nodes $VM_HOSTNAME | grep -q Ready" 2>&1)
494+
RES_CODE=$?
495+
set -e
496+
if [ $RES_CODE -eq 0 ]; then
497+
break
498+
else
499+
sleep 1
500+
fi
501+
done
502+
done
503+
print_green "Kubernetes cluster is up and running..."
474504
fi
475505

476506
print_green "Use following command to connect to your cluster: 'ssh -i \"$PRIV_KEY_PATH\" core@$COREOS_MASTER_HOSTNAME'"

deploy_vms_cluster.sh

+3-2
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ runcmd:
227227
ubuntu)
228228
BOOT_HOOK="runcmd:
229229
- service networking restart"
230-
handle_channel_release yakkety current
230+
handle_channel_release xenial current
231231
# extra size for images
232232
IMG_SIZE="10G"
233233
IMG_NAME="${CHANNEL}-server-cloudimg-amd64.qcow2"
@@ -520,10 +520,11 @@ if [ "x${SKIP_SSH_CHECK}" = "x" ]; then
520520
TRY=$((TRY+1))
521521
if [ $TRY -gt $MAX_SSH_TRIES ]; then
522522
print_red "Can not connect to ssh, exiting..."
523+
exit 1
523524
fi
524525
echo "Trying to connect to ${VM_HOSTNAME} VM, #${TRY} of #${MAX_SSH_TRIES}..."
525526
set +e
526-
RES=$(LANG=en_US ssh -l $SSH_USER -o ConnectTimeout=1 -o PasswordAuthentication=no -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i ${PRIV_KEY_PATH} $VM_HOSTNAME "uptime" 2>&1)
527+
RES=$(LANG=en_US ssh -l $SSH_USER -o BatchMode=yes -o ConnectTimeout=1 -o PasswordAuthentication=no -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i ${PRIV_KEY_PATH} $VM_HOSTNAME "uptime" 2>&1)
527528
RES_CODE=$?
528529
set -e
529530
if [ $RES_CODE -eq 0 ]; then

k8s_master.yaml

+80-80
Original file line numberDiff line numberDiff line change
@@ -96,124 +96,124 @@ write-files:
9696
- name: dns-tcp
9797
port: 53
9898
protocol: TCP
99-
10099
---
101-
102-
apiVersion: v1
103-
kind: ReplicationController
100+
apiVersion: extensions/v1beta1
101+
kind: Deployment
104102
metadata:
105-
name: kube-dns-v11
103+
name: kube-dns-v20
106104
namespace: kube-system
107105
labels:
108106
k8s-app: kube-dns
109-
version: v11
107+
version: v20
110108
kubernetes.io/cluster-service: "true"
111109
spec:
110+
strategy:
111+
type: RollingUpdate
112+
rollingUpdate:
113+
# Ensure we have at least 1 alive pod during update (don't kill old pod until new pod is up and running)
114+
maxSurge: 1
115+
maxUnavailable: 0
112116
replicas: 1
113117
selector:
114-
k8s-app: kube-dns
115-
version: v11
118+
matchLabels:
119+
k8s-app: kube-dns
120+
version: v20
116121
template:
117122
metadata:
118123
labels:
119124
k8s-app: kube-dns
120-
version: v11
121-
kubernetes.io/cluster-service: "true"
125+
version: v20
126+
annotations:
127+
scheduler.alpha.kubernetes.io/critical-pod: ''
128+
scheduler.alpha.kubernetes.io/tolerations: '[{"key":"CriticalAddonsOnly", "operator":"Exists"}]'
122129
spec:
123130
containers:
124-
- name: etcd
125-
image: gcr.io/google_containers/etcd:2.2.1
126-
resources:
127-
# keep request = limit to keep this container in guaranteed class
128-
limits:
129-
cpu: 100m
130-
memory: 50Mi
131-
requests:
132-
cpu: 100m
133-
memory: 50Mi
134-
command:
135-
- /usr/local/bin/etcd
136-
- -data-dir
137-
- /var/etcd/data
138-
- -listen-client-urls
139-
- http://127.0.0.1:2379,http://127.0.0.1:4001
140-
- -advertise-client-urls
141-
- http://127.0.0.1:2379,http://127.0.0.1:4001
142-
- -initial-cluster-token
143-
- skydns-etcd
144-
volumeMounts:
145-
- name: etcd-storage
146-
mountPath: /var/etcd/data
147-
- name: kube2sky
148-
image: gcr.io/google_containers/kube2sky:1.14
131+
- name: kubedns
132+
image: gcr.io/google_containers/kubedns-amd64:1.8
149133
resources:
150-
# keep request = limit to keep this container in guaranteed class
134+
# TODO: Set memory limits when we've profiled the container for large
135+
# clusters, then set request = limit to keep this container in
136+
# guaranteed class. Currently, this container falls into the
137+
# "burstable" category so the kubelet doesn't backoff from restarting it.
151138
limits:
152-
cpu: 100m
153-
memory: 50Mi
139+
memory: 170Mi
154140
requests:
155141
cpu: 100m
156-
memory: 50Mi
157-
args:
158-
# command = "/kube2sky"
159-
- --domain=%K8S_DOMAIN%
160-
- name: skydns
161-
image: gcr.io/google_containers/skydns:2015-10-13-8c72f8c
162-
resources:
163-
# keep request = limit to keep this container in guaranteed class
164-
limits:
165-
cpu: 100m
166-
memory: 50Mi
167-
requests:
168-
cpu: 100m
169-
memory: 50Mi
170-
command: ["sh", "-c", "while true; do echo -e \"PUT /v2/keys/skydns/config HTTP/1.1\r\nAccept: */*\r\nContent-Length: 26\r\nContent-Type: application/x-www-form-urlencoded\r\n\r\nvalue=%7B%22ndot%22%3A1%7D\" | nc localhost 2379 2>&1 | grep 'HTTP/1.1 200 OK' && /skydns --machines=http://127.0.0.1:2379 --addr=0.0.0.0:53 --ns-rotate=false --domain=%K8S_DOMAIN%. ; sleep 1; done"]
171-
# args:
172-
# # command = "/skydns"
173-
# - --machines=http://127.0.0.1:2379
174-
# - --addr=0.0.0.0:53
175-
# - --ns-rotate=false
176-
# - --domain=%K8S_DOMAIN%.
177-
ports:
178-
- containerPort: 53
179-
name: dns
180-
protocol: UDP
181-
- containerPort: 53
182-
name: dns-tcp
183-
protocol: TCP
142+
memory: 70Mi
184143
livenessProbe:
185144
httpGet:
186-
path: /healthz
145+
path: /healthz-kubedns
187146
port: 8080
188147
scheme: HTTP
189-
initialDelaySeconds: 30
148+
initialDelaySeconds: 60
190149
timeoutSeconds: 5
150+
successThreshold: 1
151+
failureThreshold: 5
191152
readinessProbe:
192153
httpGet:
193-
path: /healthz
154+
path: /readiness
155+
port: 8081
156+
scheme: HTTP
157+
# we poll on pod startup for the Kubernetes master service and
158+
# only setup the /readiness HTTP server once that's available.
159+
initialDelaySeconds: 3
160+
timeoutSeconds: 5
161+
args:
162+
# command = "/kube-dns"
163+
- --domain=%K8S_DOMAIN%.
164+
- --dns-port=10053
165+
ports:
166+
- containerPort: 10053
167+
name: dns-local
168+
protocol: UDP
169+
- containerPort: 10053
170+
name: dns-tcp-local
171+
protocol: TCP
172+
- name: dnsmasq
173+
image: gcr.io/google_containers/kube-dnsmasq-amd64:1.4
174+
livenessProbe:
175+
httpGet:
176+
path: /healthz-dnsmasq
194177
port: 8080
195178
scheme: HTTP
196-
initialDelaySeconds: 1
179+
initialDelaySeconds: 60
197180
timeoutSeconds: 5
181+
successThreshold: 1
182+
failureThreshold: 5
183+
args:
184+
- --cache-size=1000
185+
- --no-resolv
186+
- --server=127.0.0.1#10053
187+
- --log-facility=-
188+
ports:
189+
- containerPort: 53
190+
name: dns
191+
protocol: UDP
192+
- containerPort: 53
193+
name: dns-tcp
194+
protocol: TCP
198195
- name: healthz
199-
image: gcr.io/google_containers/exechealthz:1.0
196+
image: gcr.io/google_containers/exechealthz-amd64:1.2
200197
resources:
201-
# keep request = limit to keep this container in guaranteed class
202198
limits:
203-
cpu: 10m
204-
memory: 20Mi
199+
memory: 50Mi
205200
requests:
206201
cpu: 10m
207-
memory: 20Mi
202+
# Note that this container shouldn't really need 50Mi of memory. The
203+
# limits are set higher than expected pending investigation on #29688.
204+
# The extra memory was stolen from the kubedns container to keep the
205+
# net memory requested by the pod constant.
206+
memory: 50Mi
208207
args:
209-
- -cmd=nslookup kubernetes.default.svc.%K8S_DOMAIN% 127.0.0.1 >/dev/null
210-
- -port=8080
208+
- --cmd=nslookup kubernetes.default.svc.%K8S_DOMAIN% 127.0.0.1 >/dev/null
209+
- --url=/healthz-dnsmasq
210+
- --cmd=nslookup kubernetes.default.svc.%K8S_DOMAIN% 127.0.0.1:10053 >/dev/null
211+
- --url=/healthz-kubedns
212+
- --port=8080
213+
- --quiet
211214
ports:
212215
- containerPort: 8080
213216
protocol: TCP
214-
volumes:
215-
- name: etcd-storage
216-
emptyDir: {}
217217
dnsPolicy: Default # Don't use cluster DNS.
218218
- path: /etc/kubernetes/manifests/kube-apiserver.yaml
219219
permissions: '0644'

0 commit comments

Comments
 (0)