@@ -7,13 +7,13 @@ use janus_aggregator_core::datastore::{
7
7
} ;
8
8
use janus_core:: time:: { Clock , DurationExt , TimeExt } ;
9
9
use janus_messages:: {
10
- query_type:: FixedSize , AggregationJobStep , BatchId , Duration , Interval , TaskId , Time ,
10
+ query_type:: FixedSize , AggregationJobStep , BatchId , Duration , Interval , ReportId , TaskId , Time ,
11
11
} ;
12
12
use prio:: { codec:: Encode , vdaf:: Aggregator } ;
13
13
use rand:: random;
14
14
use std:: {
15
15
cmp:: { max, min, Ordering } ,
16
- collections:: { binary_heap:: PeekMut , hash_map, BinaryHeap , HashMap , VecDeque } ,
16
+ collections:: { binary_heap:: PeekMut , hash_map, BinaryHeap , HashMap , HashSet , VecDeque } ,
17
17
ops:: RangeInclusive ,
18
18
sync:: Arc ,
19
19
} ;
32
32
{
33
33
properties : Properties ,
34
34
aggregation_job_writer : & ' a mut AggregationJobWriter < SEED_SIZE , FixedSize , A > ,
35
- map : HashMap < Option < Time > , Bucket < SEED_SIZE , A > > ,
35
+ buckets : HashMap < Option < Time > , Bucket < SEED_SIZE , A > > ,
36
36
new_batches : Vec < ( BatchId , Option < Time > ) > ,
37
+ report_ids_to_scrub : HashSet < ReportId > ,
37
38
}
38
39
39
40
/// Common properties used by [`BatchCreator`]. This is broken out into a separate structure to make
72
73
task_batch_time_window_size,
73
74
} ,
74
75
aggregation_job_writer,
75
- map : HashMap :: new ( ) ,
76
+ buckets : HashMap :: new ( ) ,
76
77
new_batches : Vec :: new ( ) ,
78
+ report_ids_to_scrub : HashSet :: new ( ) ,
77
79
}
78
80
}
79
81
@@ -95,15 +97,15 @@ where
95
97
. to_batch_interval_start ( & batch_time_window_size)
96
98
} )
97
99
. transpose ( ) ?;
98
- let mut map_entry = self . map . entry ( time_bucket_start_opt) ;
100
+ let mut map_entry = self . buckets . entry ( time_bucket_start_opt) ;
99
101
let bucket = match & mut map_entry {
100
102
hash_map:: Entry :: Occupied ( occupied) => occupied. get_mut ( ) ,
101
103
hash_map:: Entry :: Vacant ( _) => {
102
104
// Lazily find existing unfilled batches.
103
105
let outstanding_batches = tx
104
106
. get_outstanding_batches ( & self . properties . task_id , & time_bucket_start_opt)
105
107
. await ?;
106
- self . map
108
+ self . buckets
107
109
. entry ( time_bucket_start_opt)
108
110
. or_insert_with ( || Bucket :: new ( outstanding_batches) )
109
111
}
@@ -115,6 +117,7 @@ where
115
117
Self :: process_batches (
116
118
& self . properties ,
117
119
self . aggregation_job_writer ,
120
+ & mut self . report_ids_to_scrub ,
118
121
& mut self . new_batches ,
119
122
& time_bucket_start_opt,
120
123
bucket,
@@ -136,6 +139,7 @@ where
136
139
fn process_batches (
137
140
properties : & Properties ,
138
141
aggregation_job_writer : & mut AggregationJobWriter < SEED_SIZE , FixedSize , A > ,
142
+ report_ids_to_scrub : & mut HashSet < ReportId > ,
139
143
new_batches : & mut Vec < ( BatchId , Option < Time > ) > ,
140
144
time_bucket_start : & Option < Time > ,
141
145
bucket : & mut Bucket < SEED_SIZE , A > ,
@@ -182,6 +186,7 @@ where
182
186
desired_aggregation_job_size,
183
187
& mut bucket. unaggregated_reports ,
184
188
aggregation_job_writer,
189
+ report_ids_to_scrub,
185
190
) ?;
186
191
largest_outstanding_batch. add_reports ( desired_aggregation_job_size) ;
187
192
} else {
@@ -209,6 +214,7 @@ where
209
214
desired_aggregation_job_size,
210
215
& mut bucket. unaggregated_reports ,
211
216
aggregation_job_writer,
217
+ report_ids_to_scrub,
212
218
) ?;
213
219
largest_outstanding_batch. add_reports ( desired_aggregation_job_size) ;
214
220
} else {
@@ -249,6 +255,7 @@ where
249
255
desired_aggregation_job_size,
250
256
& mut bucket. unaggregated_reports ,
251
257
aggregation_job_writer,
258
+ report_ids_to_scrub,
252
259
) ?;
253
260
254
261
// Loop to the top of this method to create more aggregation jobs in this newly
@@ -268,6 +275,7 @@ where
268
275
aggregation_job_size : usize ,
269
276
unaggregated_reports : & mut VecDeque < LeaderStoredReport < SEED_SIZE , A > > ,
270
277
aggregation_job_writer : & mut AggregationJobWriter < SEED_SIZE , FixedSize , A > ,
278
+ report_ids_to_scrub : & mut HashSet < ReportId > ,
271
279
) -> Result < ( ) , Error > {
272
280
let aggregation_job_id = random ( ) ;
273
281
debug ! (
@@ -280,7 +288,7 @@ where
280
288
let mut min_client_timestamp = None ;
281
289
let mut max_client_timestamp = None ;
282
290
283
- let report_aggregations = ( 0u64 ..)
291
+ let report_aggregations: Vec < _ > = ( 0u64 ..)
284
292
. zip ( unaggregated_reports. drain ( ..aggregation_job_size) )
285
293
. map ( |( ord, report) | {
286
294
let client_timestamp = * report. metadata ( ) . time ( ) ;
@@ -294,6 +302,7 @@ where
294
302
report. as_start_leader_report_aggregation ( aggregation_job_id, ord)
295
303
} )
296
304
. collect ( ) ;
305
+ report_ids_to_scrub. extend ( report_aggregations. iter ( ) . map ( |ra| * ra. report_id ( ) ) ) ;
297
306
298
307
let min_client_timestamp = min_client_timestamp. unwrap ( ) ; // unwrap safety: aggregation_job_size > 0
299
308
let max_client_timestamp = max_client_timestamp. unwrap ( ) ; // unwrap safety: aggregation_job_size > 0
@@ -329,10 +338,11 @@ where
329
338
// be smaller than max_aggregation_job_size. We will only create jobs smaller than
330
339
// min_aggregation_job_size if the remaining headroom in a batch requires it, otherwise
331
340
// remaining reports will be added to unaggregated_report_ids, to be marked as unaggregated.
332
- for ( time_bucket_start, mut bucket) in self . map . into_iter ( ) {
341
+ for ( time_bucket_start, mut bucket) in self . buckets . into_iter ( ) {
333
342
Self :: process_batches (
334
343
& self . properties ,
335
344
self . aggregation_job_writer ,
345
+ & mut self . report_ids_to_scrub ,
336
346
& mut self . new_batches ,
337
347
& time_bucket_start,
338
348
& mut bucket,
@@ -348,6 +358,11 @@ where
348
358
349
359
try_join ! (
350
360
self . aggregation_job_writer. write( tx, vdaf) ,
361
+ try_join_all(
362
+ self . report_ids_to_scrub
363
+ . iter( )
364
+ . map( |report_id| tx. scrub_client_report( & self . properties. task_id, report_id) )
365
+ ) ,
351
366
try_join_all(
352
367
self . new_batches
353
368
. iter( )
0 commit comments