Skip to content

Commit e09417e

Browse files
authored
Merge pull request #156 from szabadka/master
Reduce guetzli memory usage by 30%
2 parents 62a4cd3 + e25b96c commit e09417e

File tree

6 files changed

+123
-73
lines changed

6 files changed

+123
-73
lines changed

guetzli/butteraugli_comparator.cc

+46-28
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,13 @@
2525
namespace guetzli {
2626

2727
ButteraugliComparator::ButteraugliComparator(const int width, const int height,
28-
const std::vector<uint8_t>& rgb,
28+
const std::vector<uint8_t>* rgb,
2929
const float target_distance,
3030
ProcessStats* stats)
3131
: width_(width),
3232
height_(height),
3333
target_distance_(target_distance),
34+
rgb_orig_(*rgb),
3435
rgb_linear_pregamma_(3, std::vector<float>(width_ * height_)),
3536
comparator_(width_, height_, kButteraugliStep),
3637
distance_(0.0),
@@ -40,51 +41,68 @@ ButteraugliComparator::ButteraugliComparator(const int width, const int height,
4041
for (int c = 0; c < 3; ++c) {
4142
for (int y = 0, ix = 0; y < height_; ++y) {
4243
for (int x = 0; x < width_; ++x, ++ix) {
43-
rgb_linear_pregamma_[c][ix] = lut[rgb[3 * ix + c]];
44+
rgb_linear_pregamma_[c][ix] = lut[rgb_orig_[3 * ix + c]];
4445
}
4546
}
4647
}
47-
const int block_w = (width_ + 7) / 8;
48-
const int block_h = (height_ + 7) / 8;
49-
const int nblocks = block_w * block_h;
50-
per_block_pregamma_.resize(nblocks);
51-
for (int block_y = 0, bx = 0; block_y < block_h; ++block_y) {
52-
for (int block_x = 0; block_x < block_w; ++block_x, ++bx) {
48+
::butteraugli::OpsinDynamicsImage(width_, height_, rgb_linear_pregamma_);
49+
}
50+
51+
void ButteraugliComparator::Compare(const OutputImage& img) {
52+
std::vector<std::vector<float> > rgb(3, std::vector<float>(width_ * height_));
53+
img.ToLinearRGB(&rgb);
54+
::butteraugli::OpsinDynamicsImage(width_, height_, rgb);
55+
comparator_.DiffmapOpsinDynamicsImage(rgb_linear_pregamma_, rgb, distmap_);
56+
distance_ = ::butteraugli::ButteraugliScoreFromDiffmap(distmap_);
57+
GUETZLI_LOG(stats_, " BA[100.00%%] D[%6.4f]", distance_);
58+
}
59+
60+
void ButteraugliComparator::StartBlockComparisons() {
61+
std::vector<std::vector<float> > dummy(3);
62+
::butteraugli::Mask(rgb_linear_pregamma_, rgb_linear_pregamma_,
63+
width_, height_,
64+
&mask_xyz_, &dummy);
65+
}
66+
67+
void ButteraugliComparator::FinishBlockComparisons() {
68+
mask_xyz_.clear();
69+
}
70+
71+
void ButteraugliComparator::SwitchBlock(int block_x, int block_y,
72+
int factor_x, int factor_y) {
73+
block_x_ = block_x;
74+
block_y_ = block_y;
75+
factor_x_ = factor_x;
76+
factor_y_ = factor_y;
77+
per_block_pregamma_.resize(factor_x_ * factor_y_);
78+
const double* lut = Srgb8ToLinearTable();
79+
for (int off_y = 0, bx = 0; off_y < factor_y_; ++off_y) {
80+
for (int off_x = 0; off_x < factor_x_; ++off_x, ++bx) {
5381
per_block_pregamma_[bx].resize(3, std::vector<float>(kDCTBlockSize));
82+
int block_xx = block_x_ * factor_x_ + off_x;
83+
int block_yy = block_y_ * factor_y_ + off_y;
5484
for (int iy = 0, i = 0; iy < 8; ++iy) {
5585
for (int ix = 0; ix < 8; ++ix, ++i) {
56-
int x = std::min(8 * block_x + ix, width_ - 1);
57-
int y = std::min(8 * block_y + iy, height_ - 1);
86+
int x = std::min(8 * block_xx + ix, width_ - 1);
87+
int y = std::min(8 * block_yy + iy, height_ - 1);
5888
int px = y * width_ + x;
5989
for (int c = 0; c < 3; ++c) {
60-
per_block_pregamma_[bx][c][i] = rgb_linear_pregamma_[c][px];
90+
per_block_pregamma_[bx][c][i] = lut[rgb_orig_[3 * px + c]];
6191
}
6292
}
6393
}
6494
::butteraugli::OpsinDynamicsImage(8, 8, per_block_pregamma_[bx]);
6595
}
6696
}
67-
::butteraugli::OpsinDynamicsImage(width_, height_, rgb_linear_pregamma_);
68-
std::vector<std::vector<float> > dummy(3);
69-
::butteraugli::Mask(rgb_linear_pregamma_, rgb_linear_pregamma_,
70-
width_, height_,
71-
&mask_xyz_, &dummy);
72-
}
73-
74-
void ButteraugliComparator::Compare(const OutputImage& img) {
75-
std::vector<std::vector<float> > rgb(3, std::vector<float>(width_ * height_));
76-
img.ToLinearRGB(&rgb);
77-
::butteraugli::OpsinDynamicsImage(width_, height_, rgb);
78-
comparator_.DiffmapOpsinDynamicsImage(rgb_linear_pregamma_, rgb, distmap_);
79-
distance_ = ::butteraugli::ButteraugliScoreFromDiffmap(distmap_);
80-
GUETZLI_LOG(stats_, " BA[100.00%%] D[%6.4f]", distance_);
8197
}
8298

83-
double ButteraugliComparator::CompareBlock(
84-
const OutputImage& img, int block_x, int block_y) const {
99+
double ButteraugliComparator::CompareBlock(const OutputImage& img,
100+
int off_x, int off_y) const {
101+
int block_x = block_x_ * factor_x_ + off_x;
102+
int block_y = block_y_ * factor_y_ + off_y;
85103
int xmin = 8 * block_x;
86104
int ymin = 8 * block_y;
87-
int block_ix = block_y * ((width_ + 7) / 8) + block_x;
105+
int block_ix = off_y * factor_x_ + off_x;
88106
const std::vector<std::vector<float> >& rgb0_c =
89107
per_block_pregamma_[block_ix];
90108

guetzli/butteraugli_comparator.h

+13-2
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,19 @@ constexpr int kButteraugliStep = 3;
3232
class ButteraugliComparator : public Comparator {
3333
public:
3434
ButteraugliComparator(const int width, const int height,
35-
const std::vector<uint8_t>& rgb,
35+
const std::vector<uint8_t>* rgb,
3636
const float target_distance, ProcessStats* stats);
3737

3838
void Compare(const OutputImage& img) override;
3939

40+
void StartBlockComparisons() override;
41+
void FinishBlockComparisons() override;
42+
43+
void SwitchBlock(int block_x, int block_y,
44+
int factor_x, int factor_y) override;
45+
4046
double CompareBlock(const OutputImage& img,
41-
int block_x, int block_y) const override;
47+
int off_x, int off_y) const override;
4248

4349
double ScoreOutputSize(int size) const override;
4450

@@ -60,6 +66,11 @@ class ButteraugliComparator : public Comparator {
6066
const int width_;
6167
const int height_;
6268
const float target_distance_;
69+
const std::vector<uint8_t>& rgb_orig_;
70+
int block_x_;
71+
int block_y_;
72+
int factor_x_;
73+
int factor_y_;
6374
std::vector<std::vector<float>> rgb_linear_pregamma_;
6475
std::vector<std::vector<float>> mask_xyz_;
6576
std::vector<std::vector<std::vector<float>>> per_block_pregamma_;

guetzli/comparator.h

+15-4
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,22 @@ class Comparator {
3636
// baseline image.
3737
virtual void Compare(const OutputImage& img) = 0;
3838

39-
// Compares an 8x8 block of the baseline image with the same block of img and
40-
// returns the resulting per-block distance. The interpretation of the
41-
// returned distance depends on the comparator used.
39+
// Must be called before any CompareBlock() calls can be called.
40+
virtual void StartBlockComparisons() = 0;
41+
// No more CompareBlock() calls can be called after this.
42+
virtual void FinishBlockComparisons() = 0;
43+
44+
// Sets the coordinates of the current macro-block for the purpose of
45+
// CompareBlock() calls.
46+
virtual void SwitchBlock(int block_x, int block_y,
47+
int factor_x, int factor_y) = 0;
48+
49+
// Compares the 8x8 block with offsets (off_x, off_y) within the current
50+
// macro-block of the baseline image with the same block of img and returns
51+
// the resulting per-block distance. The interpretation of the returned
52+
// distance depends on the comparator used.
4253
virtual double CompareBlock(const OutputImage& img,
43-
int block_x, int block_y) const = 0;
54+
int off_x, int off_y) const = 0;
4455

4556
// Returns the combined score of the output image in the last Compare() call
4657
// (or the baseline image, if Compare() was not called yet), based on output

guetzli/guetzli.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ constexpr int kDefaultJPEGQuality = 95;
3434

3535
// An upper estimate of memory usage of Guetzli. The bound is
3636
// max(kLowerMemusaeMB * 1<<20, pixel_count * kBytesPerPixel)
37-
constexpr int kBytesPerPixel = 200;
37+
constexpr int kBytesPerPixel = 125;
3838
constexpr int kLowestMemusageMB = 100; // in MB
3939

4040
constexpr int kDefaultMemlimitMB = 6000; // in MB

guetzli/processor.cc

+42-34
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,11 @@ class Processor {
6464
std::vector<CoeffData>* output_order);
6565
bool SelectQuantMatrix(const JPEGData& jpg_in, const bool downsample,
6666
int best_q[3][kDCTBlockSize],
67-
OutputImage* img,
68-
GuetzliOutput* quantized_out);
67+
OutputImage* img);
6968
QuantData TryQuantMatrix(const JPEGData& jpg_in,
7069
const float target_mul,
7170
int q[3][kDCTBlockSize],
72-
OutputImage* img,
73-
GuetzliOutput* out);
71+
OutputImage* img);
7472
void MaybeOutput(const std::string& encoded_jpg);
7573
void DownsampleImage(OutputImage* img);
7674
void OutputJpeg(const JPEGData& in, std::string* out);
@@ -287,8 +285,7 @@ class QuantMatrixGenerator {
287285
QuantData Processor::TryQuantMatrix(const JPEGData& jpg_in,
288286
const float target_mul,
289287
int q[3][kDCTBlockSize],
290-
OutputImage* img,
291-
GuetzliOutput* out) {
288+
OutputImage* img) {
292289
QuantData data;
293290
memcpy(data.q, q, sizeof(data.q));
294291
img->CopyFromJpegData(jpg_in);
@@ -311,39 +308,31 @@ QuantData Processor::TryQuantMatrix(const JPEGData& jpg_in,
311308
comparator_->Compare(*img);
312309
data.dist_ok = comparator_->DistanceOK(target_mul);
313310
data.jpg_size = encoded_jpg.size();
314-
out->jpeg_data = encoded_jpg;
315-
out->distmap = comparator_->distmap();
316-
out->distmap_aggregate = comparator_->distmap_aggregate();
317-
out->score = comparator_->ScoreOutputSize(encoded_jpg.size());
318311
MaybeOutput(encoded_jpg);
319312
return data;
320313
}
321314

322315
bool Processor::SelectQuantMatrix(const JPEGData& jpg_in, const bool downsample,
323316
int best_q[3][kDCTBlockSize],
324-
OutputImage* img,
325-
GuetzliOutput* quantized_out) {
317+
OutputImage* img) {
326318
QuantMatrixGenerator qgen(downsample, stats_);
327319
// Don't try to go up to exactly the target distance when selecting a
328320
// quantization matrix, since we will need some slack to do the frequency
329321
// masking later.
330322
const float target_mul_high = 0.97f;
331323
const float target_mul_low = 0.95f;
332324

333-
QuantData best = TryQuantMatrix(jpg_in, target_mul_high, best_q, img,
334-
quantized_out);
325+
QuantData best = TryQuantMatrix(jpg_in, target_mul_high, best_q, img);
335326
for (;;) {
336327
int q_next[3][kDCTBlockSize];
337328
if (!qgen.GetNext(q_next)) {
338329
break;
339330
}
340331

341-
GuetzliOutput out;
342-
QuantData data = TryQuantMatrix(jpg_in, target_mul_high, q_next, img, &out);
332+
QuantData data = TryQuantMatrix(jpg_in, target_mul_high, q_next, img);
343333
qgen.Add(data);
344334
if (CompareQuantData(data, best)) {
345335
best = data;
346-
*quantized_out = out;
347336
if (data.dist_ok && !comparator_->DistanceOK(target_mul_low)) {
348337
break;
349338
}
@@ -398,6 +387,7 @@ void Processor::ComputeBlockZeroingOrder(
398387
return a.second < b.second; });
399388
coeff_t processed_block[kBlockSize];
400389
memcpy(processed_block, block, sizeof(processed_block));
390+
comparator_->SwitchBlock(block_x, block_y, factor_x, factor_y);
401391
while (!input_order.empty()) {
402392
float best_err = 1e17f;
403393
int best_i = 0;
@@ -420,7 +410,7 @@ void Processor::ComputeBlockZeroingOrder(
420410
int block_xx = block_x * factor_x + ix;
421411
int block_yy = block_y * factor_y + iy;
422412
if (8 * block_xx < img->width() && 8 * block_yy < img->height()) {
423-
float err = comparator_->CompareBlock(*img, block_xx, block_yy);
413+
float err = comparator_->CompareBlock(*img, ix, iy);
424414
max_err = std::max(max_err, err);
425415
}
426416
}
@@ -548,7 +538,14 @@ void Processor::SelectFrequencyMasking(const JPEGData& jpg, OutputImage* img,
548538
const int block_height = (height + 8 * factor_y - 1) / (8 * factor_y);
549539
const int num_blocks = block_width * block_height;
550540

551-
std::vector<std::vector<CoeffData> > orders(num_blocks);
541+
std::vector<int> candidate_coeff_offsets(num_blocks + 1);
542+
std::vector<uint8_t> candidate_coeffs;
543+
std::vector<float> candidate_coeff_errors;
544+
candidate_coeffs.reserve(60 * num_blocks);
545+
candidate_coeff_errors.reserve(60 * num_blocks);
546+
std::vector<CoeffData> block_order;
547+
block_order.reserve(3 * kDCTBlockSize);
548+
comparator_->StartBlockComparisons();
552549
for (int block_y = 0, block_ix = 0; block_y < block_height; ++block_y) {
553550
for (int block_x = 0; block_x < block_width; ++block_x, ++block_ix) {
554551
coeff_t block[kBlockSize] = { 0 };
@@ -566,11 +563,18 @@ void Processor::SelectFrequencyMasking(const JPEGData& jpg, OutputImage* img,
566563
kDCTBlockSize * sizeof(orig_block[0]));
567564
}
568565
}
566+
block_order.clear();
569567
ComputeBlockZeroingOrder(block, orig_block, block_x, block_y, factor_x,
570-
factor_y, comp_mask, img,
571-
&orders[block_ix]);
568+
factor_y, comp_mask, img, &block_order);
569+
candidate_coeff_offsets[block_ix] = candidate_coeffs.size();
570+
for (size_t i = 0; i < block_order.size(); ++i) {
571+
candidate_coeffs.push_back(block_order[i].idx);
572+
candidate_coeff_errors.push_back(block_order[i].block_err);
573+
}
572574
}
573575
}
576+
comparator_->FinishBlockComparisons();
577+
candidate_coeff_offsets[num_blocks] = candidate_coeffs.size();
574578

575579
std::vector<JpegHistogram> ac_histograms(ncomp);
576580
int jpg_header_size, dc_size;
@@ -619,21 +623,24 @@ void Processor::SelectFrequencyMasking(const JPEGData& jpg, OutputImage* img,
619623
for (int block_y = 0, block_ix = 0; block_y < block_height; ++block_y) {
620624
for (int block_x = 0; block_x < block_width; ++block_x, ++block_ix) {
621625
const int last_index = last_indexes[block_ix];
622-
const std::vector<CoeffData>& order = orders[block_ix];
626+
const int offset = candidate_coeff_offsets[block_ix];
627+
const int num_candidates =
628+
candidate_coeff_offsets[block_ix + 1] - offset;
629+
const float* candidate_errors = &candidate_coeff_errors[offset];
623630
const float max_err = max_block_error[block_ix];
624631
if (block_weight[block_ix] == 0) {
625632
continue;
626633
}
627634
if (direction > 0) {
628-
for (size_t i = last_index; i < order.size(); ++i) {
629-
float val = ((order[i].block_err - max_err) /
635+
for (size_t i = last_index; i < num_candidates; ++i) {
636+
float val = ((candidate_errors[i] - max_err) /
630637
block_weight[block_ix]);
631638
global_order.push_back(std::make_pair(block_ix, val));
632639
}
633-
blocks_to_change += (static_cast<size_t>(last_index) < order.size() ? 1 : 0);
640+
blocks_to_change += (last_index < num_candidates ? 1 : 0);
634641
} else {
635642
for (int i = last_index - 1; i >= 0; --i) {
636-
float val = ((max_err - order[i].block_err) /
643+
float val = ((max_err - candidate_errors[i]) /
637644
block_weight[block_ix]);
638645
global_order.push_back(std::make_pair(block_ix, val));
639646
}
@@ -686,8 +693,9 @@ void Processor::SelectFrequencyMasking(const JPEGData& jpg, OutputImage* img,
686693
const int block_x = block_ix % block_width;
687694
const int block_y = block_ix / block_width;
688695
const int last_idx = last_indexes[block_ix];
689-
const std::vector<CoeffData>& order = orders[block_ix];
690-
const int idx = order[last_idx + std::min(direction, 0)].idx;
696+
const int offset = candidate_coeff_offsets[block_ix];
697+
const uint8_t* candidates = &candidate_coeffs[offset];
698+
const int idx = candidates[last_idx + std::min(direction, 0)];
691699
const int c = idx / kDCTBlockSize;
692700
const int k = idx % kDCTBlockSize;
693701
const int* quant = img->component(c).quant();
@@ -716,6 +724,8 @@ void Processor::SelectFrequencyMasking(const JPEGData& jpg, OutputImage* img,
716724
break;
717725
}
718726
}
727+
size_t global_order_size = global_order.size();
728+
std::vector<std::pair<int, float>>().swap(global_order);
719729

720730
for (int i = 0; i < num_blocks; ++i) {
721731
max_block_error[i] += block_weight[i] * val_threshold * direction;
@@ -734,7 +744,7 @@ void Processor::SelectFrequencyMasking(const JPEGData& jpg, OutputImage* img,
734744
"Blocks[%zd/%d/%d] ValThres[%.4f] Out[%7zd] EstErr[%.2f%%]",
735745
stats_->counters[kNumItersCnt], img->FrameTypeStr().c_str(),
736746
comp_mask, direction > 0 ? "up" : "down", changed_coeffs,
737-
global_order.size(), changed_blocks.size(),
747+
global_order_size, changed_blocks.size(),
738748
blocks_to_change, num_blocks, val_threshold,
739749
encoded_jpg.size(),
740750
100.0 - (100.0 * est_jpg_size) / encoded_jpg.size());
@@ -827,9 +837,7 @@ bool Processor::ProcessJpegData(const Params& params, const JPEGData& jpg_in,
827837
}
828838
int best_q[3][kDCTBlockSize];
829839
memcpy(best_q, q_in, sizeof(best_q));
830-
GuetzliOutput quantized_out;
831-
if (!SelectQuantMatrix(jpg, downsample != 0, best_q,
832-
&img, &quantized_out)) {
840+
if (!SelectQuantMatrix(jpg, downsample != 0, best_q, &img)) {
833841
for (int c = 0; c < 3; ++c) {
834842
for (int i = 0; i < kDCTBlockSize; ++i) {
835843
best_q[c][i] = 1;
@@ -881,7 +889,7 @@ bool Process(const Params& params, ProcessStats* stats,
881889
std::unique_ptr<ButteraugliComparator> comparator;
882890
if (jpg.width >= 32 && jpg.height >= 32) {
883891
comparator.reset(
884-
new ButteraugliComparator(jpg.width, jpg.height, rgb,
892+
new ButteraugliComparator(jpg.width, jpg.height, &rgb,
885893
params.butteraugli_target, stats));
886894
}
887895
bool ok = ProcessJpegData(params, jpg, comparator.get(), &out, stats);
@@ -905,7 +913,7 @@ bool Process(const Params& params, ProcessStats* stats,
905913
std::unique_ptr<ButteraugliComparator> comparator;
906914
if (jpg.width >= 32 && jpg.height >= 32) {
907915
comparator.reset(
908-
new ButteraugliComparator(jpg.width, jpg.height, rgb,
916+
new ButteraugliComparator(jpg.width, jpg.height, &rgb,
909917
params.butteraugli_target, stats));
910918
}
911919
bool ok = ProcessJpegData(params, jpg, comparator.get(), &out, stats);

0 commit comments

Comments
 (0)