Skip to content

Commit cd946d7

Browse files
authored
fix: reduce the probability of RESOURCE_EXHAUSTED errors during tests (#734)
* fix: reduce the probability of RESOURCE_EXHAUSTED errors during tests Reduces the probability of RESOURCE_EXHAUSTED errors during tests by making the GetOperation method retry errors with this code with an exponential backoff. The GetOperation method is called repeatedly for long-running operations by a polling future. These calls also count towards the max 5 admin requests per second. Fixes #733 * fix: use default retry settings * chore: run formatter * fix: only retry RESOURCE_EXHAUSTED when throttling requests
1 parent bb51e28 commit cd946d7

File tree

2 files changed

+95
-4
lines changed

2 files changed

+95
-4
lines changed

google-cloud-spanner/src/main/java/com/google/cloud/spanner/spi/v1/GapicSpannerRpc.java

+55-4
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
import com.google.api.gax.core.GaxProperties;
2727
import com.google.api.gax.grpc.GaxGrpcProperties;
2828
import com.google.api.gax.grpc.GrpcCallContext;
29+
import com.google.api.gax.grpc.GrpcCallSettings;
30+
import com.google.api.gax.grpc.GrpcStubCallableFactory;
2931
import com.google.api.gax.grpc.InstantiatingGrpcChannelProvider;
3032
import com.google.api.gax.longrunning.OperationFuture;
3133
import com.google.api.gax.retrying.ResultRetryAlgorithm;
@@ -35,6 +37,7 @@
3537
import com.google.api.gax.rpc.ApiCallContext;
3638
import com.google.api.gax.rpc.ApiClientHeaderProvider;
3739
import com.google.api.gax.rpc.ApiException;
40+
import com.google.api.gax.rpc.ClientContext;
3841
import com.google.api.gax.rpc.FixedHeaderProvider;
3942
import com.google.api.gax.rpc.HeaderProvider;
4043
import com.google.api.gax.rpc.InstantiatingWatchdogProvider;
@@ -44,6 +47,8 @@
4447
import com.google.api.gax.rpc.StatusCode;
4548
import com.google.api.gax.rpc.StreamController;
4649
import com.google.api.gax.rpc.TransportChannelProvider;
50+
import com.google.api.gax.rpc.UnaryCallSettings;
51+
import com.google.api.gax.rpc.UnaryCallable;
4752
import com.google.api.gax.rpc.UnavailableException;
4853
import com.google.api.gax.rpc.WatchdogProvider;
4954
import com.google.api.pathtemplate.PathTemplate;
@@ -59,6 +64,7 @@
5964
import com.google.cloud.spanner.SpannerOptions.CallCredentialsProvider;
6065
import com.google.cloud.spanner.admin.database.v1.stub.DatabaseAdminStub;
6166
import com.google.cloud.spanner.admin.database.v1.stub.DatabaseAdminStubSettings;
67+
import com.google.cloud.spanner.admin.database.v1.stub.GrpcDatabaseAdminCallableFactory;
6268
import com.google.cloud.spanner.admin.database.v1.stub.GrpcDatabaseAdminStub;
6369
import com.google.cloud.spanner.admin.instance.v1.stub.GrpcInstanceAdminStub;
6470
import com.google.cloud.spanner.admin.instance.v1.stub.InstanceAdminStub;
@@ -72,6 +78,7 @@
7278
import com.google.common.base.Preconditions;
7379
import com.google.common.collect.ImmutableList;
7480
import com.google.common.collect.ImmutableMap;
81+
import com.google.common.collect.ImmutableSet;
7582
import com.google.common.util.concurrent.RateLimiter;
7683
import com.google.common.util.concurrent.ThreadFactoryBuilder;
7784
import com.google.iam.v1.GetIamPolicyRequest;
@@ -157,6 +164,7 @@
157164
import java.util.LinkedList;
158165
import java.util.List;
159166
import java.util.Map;
167+
import java.util.Set;
160168
import java.util.concurrent.Callable;
161169
import java.util.concurrent.CancellationException;
162170
import java.util.concurrent.ConcurrentHashMap;
@@ -443,7 +451,45 @@ public GapicSpannerRpc(final SpannerOptions options) {
443451
.setCredentialsProvider(credentialsProvider)
444452
.setStreamWatchdogProvider(watchdogProvider)
445453
.build();
446-
this.databaseAdminStub = GrpcDatabaseAdminStub.create(this.databaseAdminStubSettings);
454+
455+
// Automatically retry RESOURCE_EXHAUSTED for GetOperation if auto-throttling of
456+
// administrative requests has been set. The GetOperation RPC is called repeatedly by gax
457+
// while polling long-running operations for their progress and can also cause these errors.
458+
// The default behavior is not to retry these errors, and this option should normally only be
459+
// enabled for (integration) testing.
460+
if (options.isAutoThrottleAdministrativeRequests()) {
461+
GrpcStubCallableFactory factory =
462+
new GrpcDatabaseAdminCallableFactory() {
463+
@Override
464+
public <RequestT, ResponseT> UnaryCallable<RequestT, ResponseT> createUnaryCallable(
465+
GrpcCallSettings<RequestT, ResponseT> grpcCallSettings,
466+
UnaryCallSettings<RequestT, ResponseT> callSettings,
467+
ClientContext clientContext) {
468+
// Make GetOperation retry on RESOURCE_EXHAUSTED to prevent polling operations from
469+
// failing with an Administrative requests limit exceeded error.
470+
if (grpcCallSettings
471+
.getMethodDescriptor()
472+
.getFullMethodName()
473+
.equals("google.longrunning.Operations/GetOperation")) {
474+
Set<StatusCode.Code> codes =
475+
ImmutableSet.<StatusCode.Code>builderWithExpectedSize(
476+
callSettings.getRetryableCodes().size() + 1)
477+
.addAll(callSettings.getRetryableCodes())
478+
.add(StatusCode.Code.RESOURCE_EXHAUSTED)
479+
.build();
480+
callSettings = callSettings.toBuilder().setRetryableCodes(codes).build();
481+
}
482+
return super.createUnaryCallable(grpcCallSettings, callSettings, clientContext);
483+
}
484+
};
485+
this.databaseAdminStub =
486+
new GrpcDatabaseAdminStubWithCustomCallableFactory(
487+
databaseAdminStubSettings,
488+
ClientContext.create(databaseAdminStubSettings),
489+
factory);
490+
} else {
491+
this.databaseAdminStub = GrpcDatabaseAdminStub.create(databaseAdminStubSettings);
492+
}
447493

448494
// Check whether the SPANNER_EMULATOR_HOST env var has been set, and if so, if the emulator is
449495
// actually running.
@@ -504,9 +550,9 @@ private static void checkEmulatorConnection(
504550

505551
private static final RetrySettings ADMIN_REQUESTS_LIMIT_EXCEEDED_RETRY_SETTINGS =
506552
RetrySettings.newBuilder()
507-
.setInitialRetryDelay(Duration.ofSeconds(2L))
508-
.setRetryDelayMultiplier(1.5)
509-
.setMaxRetryDelay(Duration.ofSeconds(15L))
553+
.setInitialRetryDelay(Duration.ofSeconds(5L))
554+
.setRetryDelayMultiplier(2.0)
555+
.setMaxRetryDelay(Duration.ofSeconds(60L))
510556
.setMaxAttempts(10)
511557
.build();
512558

@@ -1021,6 +1067,11 @@ public OperationFuture<Empty, UpdateDatabaseDdlMetadata> call() throws Exception
10211067
throw newSpannerException(e);
10221068
} catch (ExecutionException e) {
10231069
Throwable t = e.getCause();
1070+
SpannerException se = SpannerExceptionFactory.asSpannerException(t);
1071+
if (se instanceof AdminRequestsPerMinuteExceededException) {
1072+
// Propagate this to trigger a retry.
1073+
throw se;
1074+
}
10241075
if (t instanceof AlreadyExistsException) {
10251076
String operationName =
10261077
OPERATION_NAME_TEMPLATE.instantiate(
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
* Copyright 2020 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.google.cloud.spanner.spi.v1;
18+
19+
import com.google.api.gax.grpc.GrpcStubCallableFactory;
20+
import com.google.api.gax.rpc.ClientContext;
21+
import com.google.api.gax.rpc.StatusCode;
22+
import com.google.cloud.spanner.admin.database.v1.stub.DatabaseAdminStubSettings;
23+
import com.google.cloud.spanner.admin.database.v1.stub.GrpcDatabaseAdminStub;
24+
import java.io.IOException;
25+
26+
/**
27+
* Wrapper around {@link GrpcDatabaseAdminStub} to make the constructor available inside this
28+
* package. This makes it possible to create a {@link GrpcDatabaseAdminStub} with a custom {@link
29+
* GrpcStubCallableFactory} and custom settings. This is used by integration tests to automatically
30+
* retry {@link StatusCode.Code#RESOURCE_EXHAUSTED} errors for certain administrative requests.
31+
*/
32+
class GrpcDatabaseAdminStubWithCustomCallableFactory extends GrpcDatabaseAdminStub {
33+
GrpcDatabaseAdminStubWithCustomCallableFactory(
34+
DatabaseAdminStubSettings settings,
35+
ClientContext clientContext,
36+
GrpcStubCallableFactory callableFactory)
37+
throws IOException {
38+
super(settings, clientContext, callableFactory);
39+
}
40+
}

0 commit comments

Comments
 (0)