Skip to content

Commit a95f6f8

Browse files
authored
fix: retry cancelled error on first statement in transaction (#999)
If the first statement of a read/write transaction fails with a `CANCELLED` error and the error message is `Read/query was cancelled due to the enclosing transaction being invalidated by a later transaction in the same session.`, then the transaction should be retried, as the error could be caused by a previous statement that was abandoned by the client but still executed by the backend. This could be the case if the statement timed out (on the client) or was cancelled. Fixes #938
1 parent 39b0ec4 commit a95f6f8

File tree

6 files changed

+133
-22
lines changed

6 files changed

+133
-22
lines changed

google-cloud-spanner/src/main/java/com/google/cloud/spanner/AbstractReadContext.java

+3-1
Original file line numberDiff line numberDiff line change
@@ -701,7 +701,9 @@ public void close() {
701701
public void onTransactionMetadata(Transaction transaction, boolean shouldIncludeId) {}
702702

703703
@Override
704-
public void onError(SpannerException e, boolean withBeginTransaction) {}
704+
public SpannerException onError(SpannerException e, boolean withBeginTransaction) {
705+
return e;
706+
}
705707

706708
@Override
707709
public void onDone(boolean withBeginTransaction) {}

google-cloud-spanner/src/main/java/com/google/cloud/spanner/AbstractResultSet.java

+4-4
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,8 @@ interface Listener {
8181
void onTransactionMetadata(Transaction transaction, boolean shouldIncludeId)
8282
throws SpannerException;
8383

84-
/** Called when the read finishes with an error. */
85-
void onError(SpannerException e, boolean withBeginTransaction);
84+
/** Called when the read finishes with an error. Returns the error that should be thrown. */
85+
SpannerException onError(SpannerException e, boolean withBeginTransaction);
8686

8787
/** Called when the read finishes normally. */
8888
void onDone(boolean withBeginTransaction);
@@ -159,9 +159,9 @@ public Type getType() {
159159
}
160160

161161
private SpannerException yieldError(SpannerException e, boolean beginTransaction) {
162-
listener.onError(e, beginTransaction);
162+
SpannerException toThrow = listener.onError(e, beginTransaction);
163163
close();
164-
throw e;
164+
throw toThrow;
165165
}
166166
}
167167
/**

google-cloud-spanner/src/main/java/com/google/cloud/spanner/TransactionRunnerImpl.java

+37-15
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,11 @@
7070
class TransactionRunnerImpl implements SessionTransaction, TransactionRunner {
7171
private static final Tracer tracer = Tracing.getTracer();
7272
private static final Logger txnLogger = Logger.getLogger(TransactionRunner.class.getName());
73+
/**
74+
* (Part of) the error message that is returned by Cloud Spanner if a transaction is cancelled
75+
* because it was invalidated by a later transaction in the same session.
76+
*/
77+
private static final String TRANSACTION_CANCELLED_MESSAGE = "invalidated by a later transaction";
7378

7479
@VisibleForTesting
7580
static class TransactionContextImpl extends AbstractReadContext implements TransactionContext {
@@ -372,8 +377,7 @@ public void run() {
372377
}
373378
span.addAnnotation("Commit Failed", TraceUtil.getExceptionAnnotations(e));
374379
TraceUtil.endSpanWithFailure(opSpan, e);
375-
onError((SpannerException) e, false);
376-
res.setException(e);
380+
res.setException(onError((SpannerException) e, false));
377381
}
378382
}
379383
}),
@@ -519,7 +523,7 @@ public void onTransactionMetadata(Transaction transaction, boolean shouldInclude
519523
}
520524

521525
@Override
522-
public void onError(SpannerException e, boolean withBeginTransaction) {
526+
public SpannerException onError(SpannerException e, boolean withBeginTransaction) {
523527
// If the statement that caused an error was the statement that included a BeginTransaction
524528
// option, we simulate an aborted transaction to force a retry of the entire transaction. This
525529
// will cause the retry to execute an explicit BeginTransaction RPC and then the actual
@@ -536,21 +540,41 @@ public void onError(SpannerException e, boolean withBeginTransaction) {
536540
SpannerExceptionFactory.createAbortedExceptionWithRetryDelay(
537541
"Aborted due to failed initial statement", e, 0, 1)));
538542
}
543+
SpannerException exceptionToThrow;
544+
if (withBeginTransaction
545+
&& e.getErrorCode() == ErrorCode.CANCELLED
546+
&& e.getMessage().contains(TRANSACTION_CANCELLED_MESSAGE)) {
547+
// If the first statement of a transaction fails because it was invalidated by a later
548+
// transaction, then the transaction should be retried with an explicit BeginTransaction
549+
// RPC. It could be that this occurred because of a previous transaction that timed out or
550+
// was cancelled by the client, but that was sent to Cloud Spanner and that was still active
551+
// on the backend.
552+
exceptionToThrow =
553+
SpannerExceptionFactory.newSpannerException(
554+
ErrorCode.ABORTED,
555+
e.getMessage(),
556+
SpannerExceptionFactory.createAbortedExceptionWithRetryDelay(
557+
"Aborted due to failed initial statement", e, 0, 1));
558+
} else {
559+
exceptionToThrow = e;
560+
}
539561

540-
if (e.getErrorCode() == ErrorCode.ABORTED) {
562+
if (exceptionToThrow.getErrorCode() == ErrorCode.ABORTED) {
541563
long delay = -1L;
542-
if (e instanceof AbortedException) {
543-
delay = ((AbortedException) e).getRetryDelayInMillis();
564+
if (exceptionToThrow instanceof AbortedException) {
565+
delay = ((AbortedException) exceptionToThrow).getRetryDelayInMillis();
544566
}
545567
if (delay == -1L) {
546-
txnLogger.log(Level.FINE, "Retry duration is missing from the exception.", e);
568+
txnLogger.log(
569+
Level.FINE, "Retry duration is missing from the exception.", exceptionToThrow);
547570
}
548571

549572
synchronized (lock) {
550573
retryDelayInMillis = delay;
551574
aborted = true;
552575
}
553576
}
577+
return exceptionToThrow;
554578
}
555579

556580
@Override
@@ -607,8 +631,8 @@ public long executeUpdate(Statement statement, UpdateOption... options) {
607631
// For standard DML, using the exact row count.
608632
return resultSet.getStats().getRowCountExact();
609633
} catch (Throwable t) {
610-
onError(SpannerExceptionFactory.asSpannerException(t), builder.getTransaction().hasBegin());
611-
throw t;
634+
throw onError(
635+
SpannerExceptionFactory.asSpannerException(t), builder.getTransaction().hasBegin());
612636
}
613637
}
614638

@@ -661,8 +685,7 @@ public Long apply(ResultSet input) {
661685
@Override
662686
public Long apply(Throwable input) {
663687
SpannerException e = SpannerExceptionFactory.asSpannerException(input);
664-
onError(e, builder.getTransaction().hasBegin());
665-
throw e;
688+
throw onError(e, builder.getTransaction().hasBegin());
666689
}
667690
},
668691
MoreExecutors.directExecutor());
@@ -730,8 +753,8 @@ public long[] batchUpdate(Iterable<Statement> statements, UpdateOption... option
730753
}
731754
return results;
732755
} catch (Throwable e) {
733-
onError(SpannerExceptionFactory.asSpannerException(e), builder.getTransaction().hasBegin());
734-
throw e;
756+
throw onError(
757+
SpannerExceptionFactory.asSpannerException(e), builder.getTransaction().hasBegin());
735758
}
736759
}
737760

@@ -788,8 +811,7 @@ public long[] apply(ExecuteBatchDmlResponse batchDmlResponse) {
788811
@Override
789812
public long[] apply(Throwable input) {
790813
SpannerException e = SpannerExceptionFactory.asSpannerException(input);
791-
onError(e, builder.getTransaction().hasBegin());
792-
throw e;
814+
throw onError(e, builder.getTransaction().hasBegin());
793815
}
794816
},
795817
MoreExecutors.directExecutor());

google-cloud-spanner/src/test/java/com/google/cloud/spanner/GrpcResultSetTest.java

+3-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,9 @@ public void onTransactionMetadata(Transaction transaction, boolean shouldInclude
6060
throws SpannerException {}
6161

6262
@Override
63-
public void onError(SpannerException e, boolean withBeginTransaction) {}
63+
public SpannerException onError(SpannerException e, boolean withBeginTransaction) {
64+
return e;
65+
}
6466

6567
@Override
6668
public void onDone(boolean withBeginTransaction) {}

google-cloud-spanner/src/test/java/com/google/cloud/spanner/InlineBeginTransactionTest.java

+83
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
package com.google.cloud.spanner;
1818

1919
import static com.google.common.truth.Truth.assertThat;
20+
import static org.junit.Assert.assertEquals;
2021
import static org.junit.Assert.fail;
2122

2223
import com.google.api.core.ApiAsyncFunction;
@@ -1711,6 +1712,88 @@ public long[] run(TransactionContext transaction) throws Exception {
17111712
assertThat(countRequests(ExecuteBatchDmlRequest.class)).isEqualTo(1);
17121713
assertThat(countRequests(CommitRequest.class)).isEqualTo(0);
17131714
}
1715+
1716+
@Test
1717+
public void testInlinedBeginTx_withCancelledOnFirstStatement() {
1718+
final Statement statement = Statement.of("INSERT INTO FOO (Id) VALUES (1)");
1719+
mockSpanner.putStatementResult(
1720+
StatementResult.exception(
1721+
statement,
1722+
Status.CANCELLED
1723+
.withDescription(
1724+
"Read/query was cancelled due to the enclosing transaction being invalidated by a later transaction in the same session.")
1725+
.asRuntimeException()));
1726+
1727+
DatabaseClient client =
1728+
spanner.getDatabaseClient(DatabaseId.of("[PROJECT]", "[INSTANCE]", "[DATABASE]"));
1729+
long updateCount =
1730+
client
1731+
.readWriteTransaction()
1732+
.run(
1733+
new TransactionCallable<Long>() {
1734+
int attempt = 0;
1735+
1736+
@Override
1737+
public Long run(TransactionContext transaction) throws Exception {
1738+
if (attempt > 0) {
1739+
mockSpanner.putStatementResult(StatementResult.update(statement, 1L));
1740+
}
1741+
attempt++;
1742+
return transaction.executeUpdate(statement);
1743+
}
1744+
});
1745+
assertEquals(1L, updateCount);
1746+
// The transaction will be retried because the first statement that also tried to include the
1747+
// BeginTransaction statement failed with the specific CANCELLED error and did not return a
1748+
// transaction. That forces a retry of the entire transaction with an explicit
1749+
// BeginTransaction RPC.
1750+
assertEquals(1, countRequests(BeginTransactionRequest.class));
1751+
// The update statement will be executed 2 times:
1752+
assertEquals(2, countRequests(ExecuteSqlRequest.class));
1753+
// The transaction will attempt to commit once.
1754+
assertEquals(1, countRequests(CommitRequest.class));
1755+
// The first update will start a transaction, but then fail the update statement. This will
1756+
// start a transaction on the mock server, but that transaction will never be returned to the
1757+
// client.
1758+
assertEquals(2, countTransactionsStarted());
1759+
}
1760+
1761+
@Test
1762+
public void testInlinedBeginTx_withStickyCancelledOnFirstStatement() {
1763+
final Statement statement = Statement.of("INSERT INTO FOO (Id) VALUES (1)");
1764+
mockSpanner.putStatementResult(
1765+
StatementResult.exception(
1766+
statement,
1767+
Status.CANCELLED
1768+
.withDescription(
1769+
"Read/query was cancelled due to the enclosing transaction being invalidated by a later transaction in the same session.")
1770+
.asRuntimeException()));
1771+
1772+
DatabaseClient client =
1773+
spanner.getDatabaseClient(DatabaseId.of("[PROJECT]", "[INSTANCE]", "[DATABASE]"));
1774+
// The CANCELLED error is thrown both on the first and second attempt. The second attempt will
1775+
// not be retried, as it did not include a BeginTransaction option.
1776+
try {
1777+
client
1778+
.readWriteTransaction()
1779+
.run(
1780+
new TransactionCallable<Long>() {
1781+
@Override
1782+
public Long run(TransactionContext transaction) throws Exception {
1783+
return transaction.executeUpdate(statement);
1784+
}
1785+
});
1786+
fail("missing expected exception");
1787+
} catch (SpannerException e) {
1788+
assertEquals(ErrorCode.CANCELLED, e.getErrorCode());
1789+
}
1790+
assertEquals(1, countRequests(BeginTransactionRequest.class));
1791+
// The update statement will be executed 2 times:
1792+
assertEquals(2, countRequests(ExecuteSqlRequest.class));
1793+
// The transaction will never attempt to commit.
1794+
assertEquals(0, countRequests(CommitRequest.class));
1795+
assertEquals(2, countTransactionsStarted());
1796+
}
17141797
}
17151798

17161799
private static int countRequests(Class<? extends AbstractMessage> requestType) {

google-cloud-spanner/src/test/java/com/google/cloud/spanner/ReadFormatTestRunner.java

+3-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,9 @@ public void onTransactionMetadata(Transaction transaction, boolean shouldInclude
4848
throws SpannerException {}
4949

5050
@Override
51-
public void onError(SpannerException e, boolean withBeginTransaction) {}
51+
public SpannerException onError(SpannerException e, boolean withBeginTransaction) {
52+
return e;
53+
}
5254

5355
@Override
5456
public void onDone(boolean withBeginTransaction) {}

0 commit comments

Comments
 (0)