Skip to content

Commit

Permalink
OVEP options to disable CPU fallback at compile time (#21166)
Browse files Browse the repository at this point in the history
### Description
Provide user level options to control the fallback on CPU for models not
supported on Intel's NPU hardware.


### Motivation and Context
- Current workflow of OVEP allows safe fallback from OV NPU to OV CPU on
compilation failures. Also supports MLAS CPU fallback in presence of
unsupported custom ops.
- The PR provides a build-time option to disable fallback from OV NPU to
OV CPU.
- The session Option "kOrtSessionOptionsDisableCPUEPFallback" disables
OV CPU and MLAS CPU fallback.
- Also has bug fix for proto creation.

---------

Co-authored-by: jatinwadhwa921 <[email protected]>
Co-authored-by: ankitm3k <[email protected]>
  • Loading branch information
3 people committed Jun 28, 2024
1 parent 21ad004 commit 6baaaf5
Show file tree
Hide file tree
Showing 18 changed files with 247 additions and 165 deletions.
4 changes: 4 additions & 0 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1341,6 +1341,10 @@ if (onnxruntime_USE_OPENVINO)

add_definitions(-DUSE_OPENVINO=1)

if(onnxruntime_NPU_NO_FALLBACK)
add_definitions(-DOPENVINO_DISABLE_NPU_FALLBACK=1)
endif()

if (onnxruntime_USE_OPENVINO_GPU)
add_definitions(-DOPENVINO_CONFIG_GPU=1)
endif()
Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/core/framework/config_options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@ std::string ConfigOptions::GetConfigOrDefault(const std::string& config_key,
}

Status ConfigOptions::AddConfigEntry(const char* config_key, const char* config_value) noexcept {
std::string key(config_key);
std::string key = config_key;
if (key.empty() || key.length() > 128)
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Config key is empty or longer than maximum length 128");

std::string val(config_value);
std::string val = config_value;
if (val.length() > onnxruntime::kMaxStrLen)
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"Config value is longer than maximum length: ",
Expand Down
16 changes: 15 additions & 1 deletion onnxruntime/core/graph/graph_proto_serializer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,21 @@ void GraphViewerToProto(const GraphViewer& graph_view,
*(graph_proto.mutable_output()->Add()) = output_arg->ToProto();
}

for (const auto* value_info : graph_view.GetValueInfo()) {
std::unordered_set<const onnxruntime::NodeArg*> value_info_ = graph_view.GetValueInfo();

// Reserve memory for the vector to avoid reallocations
std::vector<const NodeArg*> value_info_sorted;
value_info_sorted.reserve(value_info_.size());

value_info_sorted.assign(value_info_.begin(), value_info_.end());
auto sort_predicate = [](const NodeArg* v1, const NodeArg* v2) {
return v1->Name() < v2->Name();
};

// This ensures consistent ordering of value_info entries in the output graph
std::sort(value_info_sorted.begin(), value_info_sorted.end(), sort_predicate);

for (const auto* value_info : value_info_sorted) {
*(graph_proto.mutable_value_info()->Add()) = value_info->ToProto();
}

Expand Down
18 changes: 16 additions & 2 deletions onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,11 @@ BackendManager::BackendManager(const GlobalContext& global_context,
subgraph_context_,
ep_ctx_handle_);
} catch (const OnnxRuntimeException& ex) {
if (device_type.find("NPU") != std::string::npos) {
#if defined(OPENVINO_DISABLE_NPU_FALLBACK)
ORT_THROW(ex.what());
#else
if (device_type.find("NPU") != std::string::npos &&
!GetGlobalContext().disable_cpu_fallback) {
LOGS_DEFAULT(WARNING) << ex.what();
LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU."
<< "Falling back to OV CPU for execution";
Expand All @@ -122,6 +126,7 @@ BackendManager::BackendManager(const GlobalContext& global_context,
} else {
ORT_THROW(ex.what());
}
#endif
}
}
}
Expand Down Expand Up @@ -419,7 +424,13 @@ void BackendManager::Compute(OrtKernelContext* context) {
subgraph_context_,
ep_ctx_handle_);
} catch (const OnnxRuntimeException& ex) {
if (GetGlobalContext().device_type.find("NPU") != std::string::npos) {
// Build option disables fallback to CPU on compilation failures with NPU.
#if defined(OPENVINO_DISABLE_NPU_FALLBACK)
LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU.";
ORT_THROW(ex.what());
#else
if (GetGlobalContext().device_type.find("NPU") != std::string::npos &&
!GetGlobalContext().disable_cpu_fallback) {
LOGS_DEFAULT(WARNING) << ex.what();
LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU."
<< "Falling back to OV CPU for execution";
Expand All @@ -434,7 +445,10 @@ void BackendManager::Compute(OrtKernelContext* context) {
} catch (std::string const& msg) {
ORT_THROW(msg);
}
} else {
ORT_THROW(ex.what());
}
#endif
}
backend_map_.insert({key, dynamic_backend});
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -545,14 +545,19 @@ void BasicBackend::Infer(OrtKernelContext* ctx) {
std::cout << "Inference successful" << std::endl;
}

// Create a duplicate infer_request_ shared ptr on the stack in the current local scope,
// as the infer_request gets freed in the next stage the reference count for the infer_request decrements &
// thus we dont have any dangling ptr leading to seg faults in the debug mode subsequent execution call
OVInferRequestPtr infer_request_ = infer_request;

// Once the inference is completed, the infer_request becomes free and is placed back into pool of infer_requests_
inferRequestsQueue_->putIdleRequest(std::move(infer_request));
#ifndef NDEBUG
#ifndef IO_BUFFER_ENABLED // Printing performance counts is disabled when IO_BUFFER_ENABLED
if (openvino_ep::backend_utils::IsDebugEnabled()) {
inferRequestsQueue_->printstatus(); // Printing the elements of infer_requests_ vector pool only in debug mode
std::string& hw_target = global_context_.device_type;
printPerformanceCounts(infer_request, std::cout, hw_target);
printPerformanceCounts(std::move(infer_request_), std::cout, hw_target);
}
#endif
#endif
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/core/providers/openvino/contexts.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ struct GlobalContext {
bool ep_context_embed_mode = true;
bool export_ep_ctx_blob = false;
bool enable_qdq_optimizer = false;
bool disable_cpu_fallback = false;
size_t num_of_threads;
std::string device_type;
std::string precision_str;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
global_context_->OpenVINO_Version = {OPENVINO_VERSION_MAJOR, OPENVINO_VERSION_MINOR};
global_context_->export_ep_ctx_blob = info.export_ep_ctx_blob_;
global_context_->enable_qdq_optimizer = info.enable_qdq_optimizer_;
global_context_->disable_cpu_fallback = info.disable_cpu_fallback_;

// to check if target device is available
// using ie_core capability GetAvailableDevices to fetch list of devices plugged in
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,14 +74,15 @@ struct OpenVINOExecutionProviderInfo {
bool disable_dynamic_shapes_{false};
bool export_ep_ctx_blob_{false};
bool enable_qdq_optimizer_{false};
bool disable_cpu_fallback_{false};

OpenVINOExecutionProviderInfo() = delete;

explicit OpenVINOExecutionProviderInfo(std::string dev_type, std::string precision, bool enable_npu_fast_compile,
size_t num_of_threads, std::string cache_dir, std::string model_priority,
int num_streams, void* context, bool enable_opencl_throttling,
bool disable_dynamic_shapes, bool export_ep_ctx_blob,
bool enable_qdq_optimizer)
bool enable_qdq_optimizer, bool disable_cpu_fallback)
: precision_(precision),
enable_npu_fast_compile_(enable_npu_fast_compile),
num_of_threads_(num_of_threads),
Expand All @@ -92,7 +93,8 @@ struct OpenVINOExecutionProviderInfo {
enable_opencl_throttling_(enable_opencl_throttling),
disable_dynamic_shapes_(disable_dynamic_shapes),
export_ep_ctx_blob_(export_ep_ctx_blob),
enable_qdq_optimizer_(enable_qdq_optimizer) {
enable_qdq_optimizer_(enable_qdq_optimizer),
disable_cpu_fallback_(disable_cpu_fallback) {
std::set<std::string> ov_supported_device_types = {"CPU", "GPU",
"GPU.0", "GPU.1", "NPU"};
if (dev_type == "") {
Expand Down
24 changes: 20 additions & 4 deletions onnxruntime/core/providers/openvino/openvino_provider_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
const char* cache_dir, const char* model_priority,
int num_streams, void* context,
bool enable_opencl_throttling, bool disable_dynamic_shapes,
bool export_ep_ctx_blob, bool enable_qdq_optimizer)
bool export_ep_ctx_blob, bool enable_qdq_optimizer,
bool disable_cpu_fallback)
: precision_(precision),
enable_npu_fast_compile_(enable_npu_fast_compile),
num_of_threads_(num_of_threads),
Expand All @@ -23,7 +24,8 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
enable_opencl_throttling_(enable_opencl_throttling),
disable_dynamic_shapes_(disable_dynamic_shapes),
export_ep_ctx_blob_(export_ep_ctx_blob),
enable_qdq_optimizer_(enable_qdq_optimizer) {
enable_qdq_optimizer_(enable_qdq_optimizer),
disable_cpu_fallback_(disable_cpu_fallback) {
device_type_ = (device_type == nullptr) ? "" : device_type;
cache_dir_ = (cache_dir == nullptr) ? "" : cache_dir;
}
Expand All @@ -45,12 +47,14 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
bool disable_dynamic_shapes_;
bool export_ep_ctx_blob_;
bool enable_qdq_optimizer_;
bool disable_cpu_fallback_;
};

std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
OpenVINOExecutionProviderInfo info(device_type_, precision_, enable_npu_fast_compile_, num_of_threads_,
cache_dir_, model_priority_, num_streams_, context_, enable_opencl_throttling_,
disable_dynamic_shapes_, export_ep_ctx_blob_, enable_qdq_optimizer_);
disable_dynamic_shapes_, export_ep_ctx_blob_, enable_qdq_optimizer_,
disable_cpu_fallback_);
return std::make_unique<OpenVINOExecutionProvider>(info);
}

Expand Down Expand Up @@ -99,6 +103,8 @@ struct OpenVINO_Provider : Provider {

bool enable_qdq_optimizer = false;

bool disable_cpu_fallback = false;

if (provider_options_map.find("device_type") != provider_options_map.end()) {
device_type = provider_options_map.at("device_type").c_str();

Expand Down Expand Up @@ -256,6 +262,15 @@ struct OpenVINO_Provider : Provider {
export_ep_ctx_blob = false;
bool_flag = "";
}

if (provider_options_map.find("disable_cpu_fallback") != provider_options_map.end()) {
bool_flag = provider_options_map.at("disable_cpu_fallback");
if (bool_flag == "true" || bool_flag == "True")
disable_cpu_fallback = true;
else if (bool_flag == "false" || bool_flag == "False")
disable_cpu_fallback = false;
bool_flag = "";
}
return std::make_shared<OpenVINOProviderFactory>(const_cast<char*>(device_type.c_str()),
const_cast<char*>(precision.c_str()),
enable_npu_fast_compile,
Expand All @@ -267,7 +282,8 @@ struct OpenVINO_Provider : Provider {
enable_opencl_throttling,
disable_dynamic_shapes,
export_ep_ctx_blob,
enable_qdq_optimizer);
enable_qdq_optimizer,
disable_cpu_fallback);
}

void Initialize() override {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
struct OrtOpenVINOProviderOptions;

namespace onnxruntime {
struct SessionOptions;
// defined in provider_bridge_ort.cc
struct OpenVINOProviderFactoryCreator {
static std::shared_ptr<IExecutionProviderFactory> Create(const ProviderOptions* provider_options_map);
static std::shared_ptr<IExecutionProviderFactory> Create(ProviderOptions* provider_options_map,
const SessionOptions* session_options);
static std::shared_ptr<IExecutionProviderFactory> Create(const OrtOpenVINOProviderOptions* provider_options);
};
} // namespace onnxruntime
16 changes: 14 additions & 2 deletions onnxruntime/core/session/provider_bridge_ort.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "core/framework/model_metadef_id_generator.h"
#include "core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h"
#include "core/optimizer/qdq_transformer/selectors_actions/shared/utils.h"
#include "core/session/onnxruntime_session_options_config_keys.h"

#include "core/session/onnxruntime_c_api.h"
#include "core/common/string_helper.h"
Expand Down Expand Up @@ -1800,7 +1801,18 @@ std::shared_ptr<IExecutionProviderFactory> OpenVINOProviderFactoryCreator::Creat
return s_library_openvino.Get().CreateExecutionProviderFactory(&ov_options_converted_map);
}

std::shared_ptr<IExecutionProviderFactory> OpenVINOProviderFactoryCreator::Create(const ProviderOptions* provider_options_map) {
void ORTSessionOptionsToOrtOpenVINOProviderOptions(ProviderOptions& ov_options,
const SessionOptions* session_options) {
bool disable_cpu_fallback = session_options->config_options.GetConfigOrDefault(
kOrtSessionOptionsDisableCPUEPFallback, "0") == "1";
if (disable_cpu_fallback)
ov_options["disable_cpu_fallback"] = "true";
}

std::shared_ptr<IExecutionProviderFactory> OpenVINOProviderFactoryCreator::Create(ProviderOptions* provider_options_map,
const SessionOptions* session_options) {
if (session_options)
onnxruntime::ORTSessionOptionsToOrtOpenVINOProviderOptions(*provider_options_map, session_options);
return s_library_openvino.Get().CreateExecutionProviderFactory(provider_options_map);
}

Expand Down Expand Up @@ -2075,7 +2087,7 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_OpenVINO_V2,

provider_options[provider_options_keys[i]] = provider_options_values[i];
}
auto factory = onnxruntime::OpenVINOProviderFactoryCreator::Create(&provider_options);
auto factory = onnxruntime::OpenVINOProviderFactoryCreator::Create(&provider_options, &(options->value));
if (!factory) {
return OrtApis::CreateStatus(ORT_FAIL, "SessionOptionsAppendExecutionProvider_OpenVINO_V2: Failed to load shared library");
}
Expand Down
3 changes: 1 addition & 2 deletions onnxruntime/core/session/provider_registration.cc
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,10 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider,
#endif
} else if (strcmp(provider_name, "OpenVINO") == 0) {
#if defined(USE_OPENVINO)
options->provider_factories.push_back(OpenVINOProviderFactoryCreator::Create(&provider_options));
options->provider_factories.push_back(OpenVINOProviderFactoryCreator::Create(&provider_options, &(options->value)));
#else
status = create_not_supported_status();
#endif

} else if (strcmp(provider_name, "SNPE") == 0) {
#if defined(USE_SNPE)
options->provider_factories.push_back(SNPEProviderFactoryCreator::Create(provider_options));
Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/python/onnxruntime_pybind_schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ void addGlobalSchemaFunctions(pybind11::module& m) {
#ifdef USE_OPENVINO
[]() {
ProviderOptions provider_options_map;
return onnxruntime::OpenVINOProviderFactoryCreator::Create(&provider_options_map);
SessionOptions session_options;
return onnxruntime::OpenVINOProviderFactoryCreator::Create(&provider_options_map, &session_options);
}(),
#endif
#ifdef USE_TENSORRT
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/python/onnxruntime_pybind_state.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1084,7 +1084,7 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
}
}
if (std::shared_ptr<IExecutionProviderFactory> openvino_provider_factory = onnxruntime::OpenVINOProviderFactoryCreator::Create(
&OV_provider_options_map)) {
&OV_provider_options_map, &session_options)) {
auto p = openvino_provider_factory->CreateProvider();
// Reset global variables config to avoid it being accidentally passed on to the next session
openvino_device_type.clear();
Expand Down
4 changes: 4 additions & 0 deletions onnxruntime/python/onnxruntime_pybind_state_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,11 @@ struct OrtStatus {

#elif OPENVINO_CONFIG_HETERO
#define BACKEND_OPENVINO "-OPENVINO_HETERO"

#elif OPENVINO_DISABLE_NPU_FALLBACK
#define BACKEND_OPENVINO "-OPENVINO_DISABLE_NPU_FALLBACK"
#endif

#else
#define BACKEND_OPENVINO ""
#endif
Expand Down
Loading

0 comments on commit 6baaaf5

Please sign in to comment.