[Impeller] remove image upload from IO thread, limit concurrent worke…

…r threads. (#52423) Fixes flutter/flutter#123058 Fixes flutter/flutter#135443 We're currently using the IO thread to bottleneck image uploads. Instead, just use fewer concurrent worker threads - and cap the limit at something small. For a Pixel device, this should use about 2 threads maximum, instead of 5 (4 worker and 1 IO).
flutter · Apr 28, 2024 · f4c20e9 · f4c20e9
1 parent dbeec53
commit f4c20e9
Show file tree

Hide file tree

Showing 3 changed files with 26 additions and 27 deletions.
diff --git a/lib/ui/painting/image_decoder_impeller.cc b/lib/ui/painting/image_decoder_impeller.cc
@@ -10,11 +10,9 @@
 #include "flutter/fml/make_copyable.h"
 #include "flutter/fml/trace_event.h"
 #include "flutter/impeller/core/allocator.h"
-#include "flutter/impeller/core/texture.h"
 #include "flutter/impeller/display_list/dl_image_impeller.h"
 #include "flutter/impeller/renderer/command_buffer.h"
 #include "flutter/impeller/renderer/context.h"
-#include "flutter/lib/ui/painting/image_decoder_skia.h"
 #include "impeller/base/strings.h"
 #include "impeller/core/device_buffer.h"
 #include "impeller/display_list/skia_conversions.h"
@@ -489,29 +487,21 @@ void ImageDecoderImpeller::Decode(fml::RefPtr<ImageDescriptor> descriptor,
           result(nullptr, bitmap_result.decode_error);
           return;
         }
-        auto upload_texture_and_invoke_result = [result, context, bitmap_result,
-                                                 gpu_disabled_switch]() {
-          sk_sp<DlImage> image;
-          std::string decode_error;
-          if (context->GetCapabilities()->SupportsBufferToTextureBlits()) {
-            std::tie(image, decode_error) = UploadTextureToPrivate(
-                context, bitmap_result.device_buffer, bitmap_result.image_info,
-                bitmap_result.sk_bitmap, gpu_disabled_switch);
-            result(image, decode_error);
-          } else {
-            std::tie(image, decode_error) = UploadTextureToStorage(
-                context, bitmap_result.sk_bitmap, gpu_disabled_switch,
-                impeller::StorageMode::kDevicePrivate,
-                /*create_mips=*/true);
-            result(image, decode_error);
-          }
-        };
-        // TODO(jonahwilliams):
-        // https://github.com/flutter/flutter/issues/123058 Technically we
-        // don't need to post tasks to the io runner, but without this
-        // forced serialization we can end up overloading the GPU and/or
-        // competing with raster workloads.
-        io_runner->PostTask(upload_texture_and_invoke_result);
+
+        sk_sp<DlImage> image;
+        std::string decode_error;
+        if (context->GetCapabilities()->SupportsBufferToTextureBlits()) {
+          std::tie(image, decode_error) = UploadTextureToPrivate(
+              context, bitmap_result.device_buffer, bitmap_result.image_info,
+              bitmap_result.sk_bitmap, gpu_disabled_switch);
+          result(image, decode_error);
+        } else {
+          std::tie(image, decode_error) = UploadTextureToStorage(
+              context, bitmap_result.sk_bitmap, gpu_disabled_switch,
+              impeller::StorageMode::kDevicePrivate,
+              /*create_mips=*/true);
+          result(image, decode_error);
+        }
       });
 }
 

diff --git a/runtime/dart_vm.cc b/runtime/dart_vm.cc
@@ -282,12 +282,19 @@ size_t DartVM::GetVMLaunchCount() {
   return gVMLaunchCount;
 }
 
+// Minimum and maximum number of worker threads.
+static constexpr size_t kMinCount = 2;
+static constexpr size_t kMaxCount = 4;
+
 DartVM::DartVM(const std::shared_ptr<const DartVMData>& vm_data,
                std::shared_ptr<IsolateNameServer> isolate_name_server)
     : settings_(vm_data->GetSettings()),
       concurrent_message_loop_(fml::ConcurrentMessageLoop::Create(
-          fml::EfficiencyCoreCount().value_or(
-              std::thread::hardware_concurrency()))),
+          std::clamp(fml::EfficiencyCoreCount().value_or(
+                         std::thread::hardware_concurrency()) /
+                         2,
+                     kMinCount,
+                     kMaxCount))),
       skia_concurrent_executor_(
           [runner = concurrent_message_loop_->GetTaskRunner()](
               const fml::closure& work) { runner->PostTask(work); }),

diff --git a/shell/platform/embedder/tests/embedder_unittests.cc b/shell/platform/embedder/tests/embedder_unittests.cc
@@ -2310,6 +2310,8 @@ TEST_F(EmbedderTest, CanPostTaskToAllNativeThreads) {
   ASSERT_EQ(captures.render_threads_count, 1u);
   ASSERT_EQ(captures.ui_threads_count, 1u);
   ASSERT_EQ(captures.worker_threads_count, worker_count + 1u /* for IO */);
+  EXPECT_GE(captures.worker_threads_count - 1, 2u);
+  EXPECT_LE(captures.worker_threads_count - 1, 4u);
 
   platform_task_runner->PostTask([&]() {
     engine.reset();