diff --git a/CMakeLists.txt b/CMakeLists.txt index 27089f98..1170d3d8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -432,8 +432,8 @@ if (NOT DEBUG) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DNDEBUG") endif () -SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 ${DEBUG_SYMBOL} -O2 -pipe -m64 -fopenmp -Wall -W -fPIC -Wno-unused-parameter -Wno-strict-aliasing -Wno-parentheses -fno-omit-frame-pointer ") -SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99 ${DEBUG_SYMBOL} -O2 -pipe -m64 -fopenmp -Wall -W -fPIC -Wno-unused-parameter -Wno-strict-aliasing -Wno-parentheses -fno-omit-frame-pointer") +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 ${DEBUG_SYMBOL} -O2 -pipe -m64 -fopenmp -Wall -g -W -fPIC -Wno-unused-parameter -Wno-strict-aliasing -Wno-parentheses -fno-omit-frame-pointer ") +SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99 ${DEBUG_SYMBOL} -O2 -pipe -m64 -fopenmp -Wall -g -W -fPIC -Wno-unused-parameter -Wno-strict-aliasing -Wno-parentheses -fno-omit-frame-pointer") add_definitions( -D_GNU_SOURCE diff --git a/include/common/common.h b/include/common/common.h index eded25c9..e31e7070 100644 --- a/include/common/common.h +++ b/include/common/common.h @@ -1387,6 +1387,16 @@ inline bool float_equal(double value, double compare, double epsilon = 1e-9) { return std::fabs(value - compare) < epsilon; } +inline std::string redis_encode(std::string str) { + //\n\n + std::string tmp; + tmp += "\n"; + tmp += std::to_string(str.length()); + tmp += "\n"; + tmp += str; + return tmp; +} + //set double buffer template using DoubleBufferSet = butil::DoublyBufferedData>; diff --git a/include/common/expr_value.h b/include/common/expr_value.h index 707a1780..c97cf817 100644 --- a/include/common/expr_value.h +++ b/include/common/expr_value.h @@ -167,6 +167,7 @@ struct ExprValue { case pb::HLL: case pb::HEX: case pb::TDIGEST: + case pb::JSON: str_val = value.string_val(); break; case pb::BITMAP: { @@ -190,6 +191,7 @@ struct ExprValue { float_precision_len = -1; str_val = value_str; if (primitive_type == pb::STRING + || primitive_type == pb::JSON || primitive_type == pb::HEX || primitive_type == pb::BITMAP || primitive_type == pb::HLL @@ -323,6 +325,7 @@ struct ExprValue { case pb::STRING: case pb::HEX: case pb::BITMAP: + case pb::JSON: case pb::TDIGEST: value->set_string_val(str_val); break; @@ -422,6 +425,7 @@ struct ExprValue { case pb::STRING: case pb::HEX: case pb::HLL: + case pb::JSON: case pb::TDIGEST: return str_val.length(); case pb::DATETIME: @@ -522,6 +526,9 @@ struct ExprValue { case pb::STRING: str_val = get_string(); break; + case pb::JSON: + str_val = get_string(); + break; case pb::BITMAP: { _u.bitmap = new(std::nothrow) Roaring(); if (str_val.size() > 0) { @@ -576,6 +583,7 @@ struct ExprValue { butil::MurmurHash3_x64_128(&_u, 8, seed, out); return out[0]; case pb::STRING: + case pb::JSON: case pb::HEX: { butil::MurmurHash3_x64_128(str_val.c_str(), str_val.size(), seed, out); return out[0]; @@ -620,6 +628,7 @@ struct ExprValue { } case pb::STRING: case pb::HEX: + case pb::JSON: case pb::HLL: case pb::TDIGEST: return str_val; @@ -734,6 +743,7 @@ struct ExprValue { (_u.double_val < other._u.double_val ? -1 : 0); case pb::STRING: case pb::HEX: + case pb::JSON: return str_val.compare(other.str_val); case pb::NULL_TYPE: return -1; @@ -788,7 +798,7 @@ struct ExprValue { } bool is_string() const { - return type == pb::STRING || type == pb::HEX || type == pb::BITMAP || type == pb::HLL || type == pb::TDIGEST; + return type == pb::STRING || type == pb::HEX || type == pb::BITMAP || type == pb::HLL || type == pb::TDIGEST || type == pb::JSON; } bool is_double() const { @@ -862,15 +872,16 @@ struct ExprValue { // 默认不带us static ExprValue Now(int32_t precision = 0) { ExprValue tmp(pb::TIMESTAMP); - tmp._u.uint32_val = time(NULL); - tmp.cast_to(pb::DATETIME); - if (precision > 0 and precision <= 6) { timeval tv; gettimeofday(&tv, NULL); + tmp._u.uint32_val = tv.tv_sec; + tmp.cast_to(pb::DATETIME); tmp._u.uint64_val |= tv.tv_usec; tmp.set_precision_len(precision); } else { + tmp._u.uint32_val = time(NULL); + tmp.cast_to(pb::DATETIME); tmp.set_precision_len(0); } return tmp; @@ -895,14 +906,16 @@ struct ExprValue { long offset = timeinfo.tm_gmtoff; ExprValue tmp(pb::TIMESTAMP); - tmp._u.uint32_val = time(NULL) - offset; - tmp.cast_to(pb::DATETIME); - timeval tv; - gettimeofday(&tv, NULL); - tmp._u.uint64_val |= tv.tv_usec; if (precision >=0 && precision <= 6) { + timeval tv; + gettimeofday(&tv, NULL); + tmp._u.uint32_val = tv.tv_sec - offset; + tmp.cast_to(pb::DATETIME); + tmp._u.uint64_val |= tv.tv_usec; tmp.set_precision_len(precision); } else { + tmp._u.uint32_val = time(NULL) - offset; + tmp.cast_to(pb::DATETIME); tmp.set_precision_len(0); } return tmp; @@ -927,7 +940,7 @@ struct ExprValue { struct HashFunction { size_t operator()(const ExprValue& ev) const { - if (ev.type == pb::STRING || ev.type == pb::HEX) { + if (ev.type == pb::STRING || ev.type == pb::HEX || ev.type == pb::JSON) { return ev.hash(); } return ev._u.uint64_val; diff --git a/include/common/histogram.h b/include/common/histogram.h index aa3c9400..1d8fb5ee 100644 --- a/include/common/histogram.h +++ b/include/common/histogram.h @@ -302,7 +302,9 @@ class PacketSample { sample_sorter.insert_row(batch->get_row().get()); } batch->reset(); - _batch_vector.push_back(batch); + if (batch->size() > 0) { + _batch_vector.push_back(batch); + } } while (!eos); sample_sorter.insert_done(); diff --git a/include/common/schema_factory.h b/include/common/schema_factory.h index 9a898799..bce60a8d 100644 --- a/include/common/schema_factory.h +++ b/include/common/schema_factory.h @@ -60,6 +60,7 @@ static const std::string TABLE_BINLOG_BACKUP_DAYS = "binlog_backup_days"; // struct UserInfo; class TableRecord; +class MutTableKey; typedef std::shared_ptr SmartRecord; typedef std::map StrInt64Map; @@ -975,6 +976,13 @@ typedef ::google::protobuf::RepeatedPtrField StatisticsVec; const std::vector& records, std::vector& region_ids); + int get_region_by_primary_key(int64_t main_table_id, + IndexInfo& index, + MutTableKey &primary_key, + int partition_id, + pb::RegionInfo ®ion_info + ); + bool exist_tableid(int64_t table_id); void get_all_table_by_db(const std::string& namespace_, const std::string& db_name, std::vector& table_ptrs); void get_all_table_version(std::unordered_map& table_id_version); diff --git a/include/common/type_utils.h b/include/common/type_utils.h index 8e7c17d6..6c11e75d 100644 --- a/include/common/type_utils.h +++ b/include/common/type_utils.h @@ -348,6 +348,8 @@ inline uint8_t to_mysql_type(pb::PrimitiveType type) { case pb::BITMAP: case pb::TDIGEST: return MYSQL_TYPE_STRING; + case pb::JSON: + return MYSQL_TYPE_JSON; default: return MYSQL_TYPE_STRING; } @@ -444,6 +446,8 @@ inline std::string to_mysql_type_full_string(pb::PrimitiveType type, return "BITMAP"; case pb::TDIGEST: return "TDIGEST"; + case pb::JSON: + return "json"; default: return ""; } diff --git a/include/engine/table_iterator.h b/include/engine/table_iterator.h index a4d32148..b34e9fff 100644 --- a/include/engine/table_iterator.h +++ b/include/engine/table_iterator.h @@ -57,27 +57,6 @@ struct IndexRange { IndexRange() {} - IndexRange(TableRecord* _left, - TableRecord* _right, - IndexInfo* _index_info, - IndexInfo* _pri_info, - pb::RegionInfo* _region_info, - int left_cnt, - int right_cnt, - bool _l_open, - bool _r_open, - bool _like_prefix) : - left(_left), - right(_right), - index_info(_index_info), - pri_info(_pri_info), - region_info(_region_info), - left_field_cnt(left_cnt), - right_field_cnt(right_cnt), - left_open(_l_open), - right_open(_r_open), - like_prefix(_like_prefix) {} - IndexRange(const MutTableKey& _left, const MutTableKey& _right, IndexInfo* _index_info, diff --git a/include/exec/access_path.h b/include/exec/access_path.h index 8f7592f7..c2b2be31 100755 --- a/include/exec/access_path.h +++ b/include/exec/access_path.h @@ -194,6 +194,7 @@ enum IndexHint { bool _in_pred = false; bool _is_eq_or_in = true; bool _need_filter = false; + uint32_t prefix_ratio_index_score = UINT32_MAX; }; typedef std::shared_ptr SmartPath; } diff --git a/include/exec/dml_node.h b/include/exec/dml_node.h index 839b9476..1e191e88 100644 --- a/include/exec/dml_node.h +++ b/include/exec/dml_node.h @@ -28,7 +28,7 @@ class DMLNode : public ExecNode { virtual void find_place_holder(std::unordered_multimap& placeholders); int insert_row(RuntimeState* state, SmartRecord record, bool is_update = false); int delete_row(RuntimeState* state, SmartRecord record, MemRow* row); - int get_lock_row(RuntimeState* state, SmartRecord record, std::string* pk_str, MemRow* row); + int get_lock_row(RuntimeState* state, SmartRecord record, std::string* pk_str, MemRow* row, int64_t& ttl_ts); int remove_row(RuntimeState* state, SmartRecord record, const std::string& pk_str, bool delete_primary = true); int update_row(RuntimeState* state, SmartRecord record, MemRow* row); @@ -81,6 +81,7 @@ class DMLNode : public ExecNode { return _table_info; } + protected: int init_schema_info(RuntimeState* state); void add_delete_conditon_fields(); @@ -128,6 +129,7 @@ class DMLNode : public ExecNode { int64_t _ttl_timestamp_us = 0; //ttl写入时间,0表示无ttl bool _ddl_need_write = false; int64_t _ddl_index_id = -1; + ExprNode* _last_value_expr = nullptr; // not own it }; } diff --git a/include/exec/fetcher_store.h b/include/exec/fetcher_store.h index c5e4cda3..14c165b5 100755 --- a/include/exec/fetcher_store.h +++ b/include/exec/fetcher_store.h @@ -61,7 +61,11 @@ class OnRPCDone: public google::protobuf::Closure { void retry_times_inc() { _retry_times++; } - + + void set_primary_indexes(std::string *primary_indexes) { + _primary_indexes = primary_indexes; + } + void set_rpc_ctrl(RPCCtrl* ctrl) { _rpc_ctrl = ctrl; } @@ -139,6 +143,7 @@ class OnRPCDone: public google::protobuf::Closure { static bvar::LatencyRecorder total_send_request; static bvar::LatencyRecorder add_backup_send_request; static bvar::LatencyRecorder has_backup_send_request; + std::string *_primary_indexes = nullptr; }; // RPCCtrl只控制rpc的异步发送和并发控制,具体rpc的成功与否结果收集由fetcher_store处理 @@ -153,12 +158,11 @@ class RPCCtrl { std::unique_lock lck(_mutex); while(true) { // 完成 - if (_todo_cnt == 0 && _doing_cnt == 0) { - return 1; - } + // if (_todo_cnt == 0 && _doing_cnt == 0) { + // return 1; + // } // 获取任务 - tasks.clear(); if (_todo_cnt > 0) { for (auto& iter : _ip_task_group_map) { auto task_group = iter.second; @@ -178,7 +182,11 @@ class RPCCtrl { if (tasks.empty()) { // 没有获取到任务,等待唤醒 - _cv.wait(lck); + if ((!_is_pipeline || _add_task_finish) && _todo_cnt == 0 && _doing_cnt == 0) { + return 1; + } else { + _cv.wait(lck); + } } else { // 获取成功 return 0; @@ -200,6 +208,7 @@ class RPCCtrl { } task->set_rpc_ctrl(this); _todo_cnt++; + _cv.notify_one(); } void task_finish(OnRPCDone* task) { @@ -227,7 +236,6 @@ class RPCCtrl { _cv.notify_one(); } - void execute() { while (true) { std::vector tasks; @@ -235,13 +243,34 @@ class RPCCtrl { if (ret == 1) { return; } - for (OnRPCDone* task : tasks) { task->send_request(); } } } + void set_pipeline() { + _is_pipeline = true; + if (!_bthread_started) { + _bthread_started = true; + _bth.run([this](){execute();}); + } + } + + void wait_finish() { + { + std::unique_lock lck(_mutex); + _add_task_finish = true; + _cv.notify_one(); + } + _bth.join(); + _bthread_started = false; + } + + void set_task_concurrency_per_group(int concurrency) { + _task_concurrency_per_group = concurrency; + } + private: struct TaskGroup { TaskGroup() { } @@ -274,9 +303,13 @@ class RPCCtrl { int _todo_cnt = 0; int _done_cnt = 0; int _doing_cnt = 0; + bool _add_task_finish = false; + bool _is_pipeline = false; std::map> _ip_task_group_map; bthread::ConditionVariable _cv; bthread::Mutex _mutex; + Bthread _bth; + bool _bthread_started = false; }; // 全局二级索引降级使用,将主备请求分别发往不同集群 @@ -327,10 +360,20 @@ class FetcherStore { FetcherStore() { } virtual ~FetcherStore() { + SAFE_DELETE(_rpc_control); } - + + void init_rpc_control(RuntimeState * state, pb::OpType opType) { + if (_rpc_control == nullptr) { + _rpc_control = new RPCCtrl(state->calc_single_store_concurrency(opType)); + } else { + _rpc_control->set_task_concurrency_per_group(state->calc_single_store_concurrency(opType)); + } + } + void clear() { region_batch.clear(); + region_batch_list.clear(); index_records.clear(); start_key_sort.clear(); error = E_OK; @@ -399,13 +442,74 @@ class FetcherStore { } int run_not_set_state(RuntimeState* state, - std::map& region_infos, + std::map& region_info, ExecNode* store_request, int start_seq_id, int current_seq_id, pb::OpType op_type, GlobalBackupType backup_type); + int fetcher_select_with_region_primary(RuntimeState* state, + pb::RegionInfo *region_info, + std::string *region_primary, + ExecNode* store_request, + int start_seq_id, + int current_seq_id){ + + uint64_t log_id = state->log_id(); + pb::OpType op_type = pb::OP_SELECT; + if (!_is_pipeline) { + set_pipeline_mode(state, op_type); + } + auto task = new OnRPCDone(this, state, store_request, region_info, + region_info->region_id(), region_info->region_id(), start_seq_id, current_seq_id, op_type); + task->set_primary_indexes(region_primary); + _rpc_control->add_new_task(task); + _traces.insert(task->get_trace()); + return 0; + } + + void clear_result(RuntimeState *state) { + region_batch.clear(); + region_batch_list.clear(); + index_records.clear(); + start_key_sort.clear(); + no_copy_cache_plan_set.clear(); + error = E_OK; + skip_region_set.clear(); + callids.clear(); + primary_timestamp_updated = false; + affected_rows = 0; + scan_rows = 0; + filter_rows = 0; + row_cnt = 0; + client_conn = state->client_conn(); + _traces.clear(); + } + + void set_pipeline_mode(RuntimeState *state, pb::OpType op_type ) { + _is_pipeline = true; + if(_rpc_control == nullptr) { + init_rpc_control(state, op_type); + _rpc_control->set_pipeline(); + } + } + + bool is_pipeline() { + return _is_pipeline; + } + + void wait_finish() { + if (_rpc_control != nullptr) { + _rpc_control->wait_finish(); + } + } + + + std::set> & get_traces() { + return _traces; + } + int run(RuntimeState* state, std::map& region_infos, ExecNode* store_request, @@ -563,6 +667,7 @@ class FetcherStore { std::map> return_str_records; std::map> return_str_old_records; std::map> region_batch; + std::map>> region_batch_list; //std::map> split_region_batch; std::map> region_id_ttl_timestamp_batch; @@ -592,6 +697,9 @@ class FetcherStore { bool need_send_rollback = true; WriteBinlogParam write_binlog_param; GlobalBackupType global_backup_type = GBT_INIT; + RPCCtrl *_rpc_control = nullptr; + bool _is_pipeline = false; + std::set> _traces; }; template diff --git a/include/exec/filter_node.h b/include/exec/filter_node.h index 2e604c05..137fd988 100644 --- a/include/exec/filter_node.h +++ b/include/exec/filter_node.h @@ -35,11 +35,14 @@ class FilterNode : public ExecNode { virtual std::vector* mutable_conjuncts() { return &_conjuncts; } + std::vector& conjuncts() { + return _conjuncts; + } void add_conjunct(ExprNode* conjunct) { _conjuncts.push_back(conjunct); } - const std::vector& pruned_conjuncts() { + std::vector& pruned_conjuncts() { return _pruned_conjuncts; } @@ -74,6 +77,9 @@ class FilterNode : public ExecNode { expr->replace_slot_ref_to_literal(sign_set, literal_maps); } } + void modify_conjuncts(std::vector& conjuncts) { + _conjuncts.swap(conjuncts); + } void modifiy_pruned_conjuncts_by_index(std::vector& filter_condition) { _pruned_conjuncts.clear(); _raw_filter_node.Clear(); @@ -106,6 +112,15 @@ class FilterNode : public ExecNode { e->reset(state); } } + ExprNode* get_last_value() { + for (auto conjunct : _conjuncts) { + auto last_value_expr = conjunct->get_last_value(); + if (last_value_expr != nullptr) { + return last_value_expr; + } + } + return nullptr; + } private: bool need_copy(MemRow* row); diff --git a/include/exec/insert_manager_node.h b/include/exec/insert_manager_node.h index 081de28e..25904c68 100755 --- a/include/exec/insert_manager_node.h +++ b/include/exec/insert_manager_node.h @@ -143,7 +143,7 @@ class InsertManagerNode : public DmlManagerNode { } private: - void update_record(const SmartRecord& record, const SmartRecord& origin_record); + void update_record(const SmartRecord& record, const SmartRecord& origin_record, RuntimeState* state = nullptr); int64_t _table_id = -1; int32_t _tuple_id = -1; int32_t _values_tuple_id = -1; diff --git a/include/exec/lock_primary_node.h b/include/exec/lock_primary_node.h index 7e13205b..461da1d1 100755 --- a/include/exec/lock_primary_node.h +++ b/include/exec/lock_primary_node.h @@ -64,6 +64,7 @@ class LockPrimaryNode : public DMLNode { std::vector _affected_index_ids; std::vector _conjuncts; bool _conjuncts_need_destory = false; + ExprNode* _last_value_expr = nullptr; // not own it }; } diff --git a/include/exec/rocksdb_scan_node.h b/include/exec/rocksdb_scan_node.h index e119d47b..14474431 100644 --- a/include/exec/rocksdb_scan_node.h +++ b/include/exec/rocksdb_scan_node.h @@ -209,14 +209,14 @@ class RocksdbScanNode : public ScanNode { BatchTableKey _scan_range_keys; BatchRecord _multiget_records; RowBatch _multiget_row_batch; - std::vector _left_field_cnts; - std::vector _right_field_cnts; - std::vector _left_opens; - std::vector _right_opens; - std::vector _like_prefixs; + int _left_field_cnt = 0; + int _right_field_cnt = 0; + bool _left_open = false; + bool _right_open = false; + bool _like_prefix = false; + bool _is_eq = false; std::vector _update_slots; std::vector _update_exprs; - bool _use_encoded_key = false; bool _range_key_sorted = false; // trace使用 int _scan_rows = 0; diff --git a/include/exec/scan_node.h b/include/exec/scan_node.h index 1b591a55..e2b20a2e 100644 --- a/include/exec/scan_node.h +++ b/include/exec/scan_node.h @@ -15,6 +15,7 @@ #pragma once #include "exec_node.h" +#include "filter_node.h" #include "access_path.h" #include "table_record.h" #include @@ -150,6 +151,16 @@ class AccessPathMgr { void show_cost(std::vector>& path_infos); int64_t select_index(); + bool use_fulltext_or_vector() { + return _use_fulltext_or_vector; + } + bool use_cost() { + if (SchemaFactory::get_instance()->get_statistics_ptr(_table_id) != nullptr + && SchemaFactory::get_instance()->is_switch_open(_table_id, TABLE_SWITCH_COST) && !_use_fulltext_or_vector) { + return true; + } + return false; + } private: int compare_two_path(SmartPath& outer_path, SmartPath& inner_path); void inner_loop_and_compare(std::map::iterator outer_loop_iter); @@ -190,6 +201,15 @@ struct ScanIndexInfo { std::map region_primary; }; +struct MergeIndexInfo { + std::vector _conjuncts; + std::vector _pruned_conjuncts; + AccessPathMgr _main_path; + int64_t _select_idx = -1; + std::vector _scan_indexs; + bool _has_index = false; // for join recorder +}; + class ScanNode : public ExecNode { public: ScanNode() { @@ -269,6 +289,17 @@ class ScanNode : public ExecNode { index.use_for = use_for; _scan_indexs.emplace_back(std::move(index)); bool has_index = false; + if (pos_index.has_sort_index()) { + _has_index = true; + return; + } + if (pos_index.left_field_cnt() > 0) { + _has_index = true; + return; + } + if (pos_index.right_field_cnt() > 0) { + return; + } for (auto& range : pos_index.ranges()) { if (range.left_field_cnt() > 0) { has_index = true; @@ -278,10 +309,6 @@ class ScanNode : public ExecNode { has_index = true; break; } - if (pos_index.has_sort_index()) { - has_index = true; - break; - } } _has_index = _has_index || has_index; } @@ -292,6 +319,7 @@ class ScanNode : public ExecNode { _scan_indexs.clear(); _pb_node.mutable_derive_node()->mutable_scan_node()->clear_indexes(); _pb_node.mutable_derive_node()->mutable_scan_node()->clear_learner_index(); + clear_merge_index_info(); } bool need_copy(MemRow* row, std::vector& conjuncts) { for (auto conjunct : conjuncts) { @@ -343,10 +371,10 @@ class ScanNode : public ExecNode { } void set_index_useage_and_lock(bool use_global_backup) { + _current_index_mutex.lock(); // 只有在存在global backup的时候才加锁 for (auto& scan_index_info : _scan_indexs) { if (scan_index_info.use_for == ScanIndexInfo::U_GLOBAL_LEARNER) { - _current_index_mutex.lock(); _current_global_backup = use_global_backup; break; } @@ -354,12 +382,12 @@ class ScanNode : public ExecNode { } void current_index_unlock() { - for (auto& scan_index_info : _scan_indexs) { - if (scan_index_info.use_for == ScanIndexInfo::U_GLOBAL_LEARNER) { - _current_index_mutex.unlock(); - break; - } - } + _current_index_mutex.unlock(); + // for (auto& scan_index_info : _scan_indexs) { + // if (scan_index_info.use_for == ScanIndexInfo::U_GLOBAL_LEARNER) { + // break; + // } + // } } bool current_use_global_backup() const { @@ -379,6 +407,47 @@ class ScanNode : public ExecNode { void add_global_condition_again(); + // for index merge + bool need_index_merge(); + SmartPath select_path() { + return _main_path.path(_select_idx); + } + MergeIndexInfo& origin_index_info() { + return _origin_index_info; + } + + void swap_index_info(MergeIndexInfo& info) { + info._main_path.init(_table_id); + std::swap(info._conjuncts, _filter_node->conjuncts()); + std::swap(info._pruned_conjuncts, _filter_node->pruned_conjuncts()); + std::swap(info._main_path, _main_path); + std::swap(info._select_idx, _select_idx); + std::swap(info._scan_indexs, _scan_indexs); + std::swap(info._has_index, _has_index); + } + std::vector& conjuncts_without_or() { + return _conjuncts_without_or; + } + std::vector& or_sub_conjuncts() { + return _or_sub_conjuncts; + } + void add_merge_index_info() { + MergeIndexInfo info; + swap_index_info(info); + _merge_index_infos.emplace_back(info); + } + void clear_merge_index_info() { + _conjuncts_without_or.clear(); + _or_sub_conjuncts.clear(); + _merge_index_infos.clear(); + } + std::vector& merge_index_infos() { + return _merge_index_infos; + } + bool has_merge_index() const { + return _merge_index_infos.size() > 0; + } + protected: pb::Engine _engine = pb::ROCKSDB; int32_t _tuple_id = 0; @@ -403,6 +472,22 @@ class ScanNode : public ExecNode { bthread::Mutex _current_index_mutex; bool _current_global_backup = false; GetMode _get_mode = GET_ONLY; // set to GET_LOCK, when "select ... for update" + + // for index merge +// std::vector _conjuncts; +// std::vector _pruned_conjuncts; + FilterNode* _filter_node = nullptr; // not own it + + std::vector _conjuncts_without_or; + ExprNode* _or_conjunct = nullptr; + std::vector _or_sub_conjuncts; + + MergeIndexInfo _origin_index_info; + std::vector _merge_index_infos; + + +// int64_t _origin_select_idx = -1; +// AccessPathMgr _origin_main_path; //主集群索引选择 }; } diff --git a/include/exec/select_manager_node.h b/include/exec/select_manager_node.h index 22b1a097..03e06de7 100755 --- a/include/exec/select_manager_node.h +++ b/include/exec/select_manager_node.h @@ -19,10 +19,12 @@ #include "scan_node.h" #include "limit_node.h" #include "table_record.h" +#include "table_key.h" #include "proto/store.interface.pb.h" #include "sorter.h" #include "mem_row_compare.h" #include "fetcher_store.h" +#include "rocksdb_scan_node.h" namespace baikaldb { struct FetcherInfo { @@ -38,6 +40,18 @@ struct FetcherInfo { FetcherStore fetcher_store; }; +struct FetcherPrimaryInfo { + pb::PossibleIndex pos_index; + pb::RegionInfo region_info; + int cur_idx = 0; + std::vector region_primary_list; + ~FetcherPrimaryInfo() { + for(auto &rp : region_primary_list) { + SAFE_DELETE(rp); + } + } +}; + class SelectManagerNode : public ExecNode { public: SelectManagerNode() { @@ -75,6 +89,8 @@ class SelectManagerNode : public ExecNode { void multi_fetcher_store_open(FetcherInfo* self_fetcher, FetcherInfo* other_fetcher, RuntimeState* state, ExecNode* exec_node); int fetcher_store_run(RuntimeState* state, ExecNode* exec_node); + int merge_fetcher_store_run(RuntimeState* state, ExecNode* exec_node); + int open_global_index(FetcherInfo* fetcher, RuntimeState* state, ExecNode* exec_node, int64_t global_index_id, @@ -86,8 +102,23 @@ class SelectManagerNode : public ExecNode { RuntimeState* state, ExecNode* exec_node, int64_t main_table_id, + std::functionadd_one_record, + const SmartIndex& pri_info, LimitNode* limit = nullptr); + int fetcher_primary(FetcherInfo* fetcher, + RuntimeState* state, + ExecNode* exec_node, + SmartIndex pri_info, + LimitNode* limit, + int64_t main_table_id); + int fetcher_primary_pipeline(FetcherInfo* fetcher, + RuntimeState* state, + ExecNode* exec_node, + SmartIndex pri_info, + LimitNode* limit, + int64_t main_table_id); + void set_sub_query_runtime_state(RuntimeState* state) { _sub_query_runtime_state = state; } diff --git a/include/exec/sort_node.h b/include/exec/sort_node.h index 57128088..4d375f4c 100644 --- a/include/exec/sort_node.h +++ b/include/exec/sort_node.h @@ -51,6 +51,9 @@ class SortNode : public ExecNode { for (auto expr : _slot_order_exprs) { ExprNode::create_pb_expr(sort_node->add_slot_order_exprs(), expr); } + if (_limit != -1) { + pb_node->set_limit(_limit); + } } void transfer_fetcher_pb(pb::FetcherNode* pb_fetcher) { diff --git a/include/expr/expr_node.h b/include/expr/expr_node.h index cd3260d0..d9c1c195 100644 --- a/include/expr/expr_node.h +++ b/include/expr/expr_node.h @@ -100,12 +100,72 @@ class ExprNode { } virtual ExprNode* get_last_insert_id() { for (auto c : _children) { - if (c->get_last_insert_id() != nullptr) { + auto expr = c->get_last_insert_id(); + if (expr != nullptr) { return c; } } return nullptr; } + virtual ExprNode* get_last_value() { + for (auto c : _children) { + auto expr = c->get_last_value(); + if (expr != nullptr) { + return expr; + } + } + return nullptr; + } + virtual bool is_valid_int_cast(MemRow* row) { + if (_node_type == pb::SLOT_REF || + _node_type == pb::STRING_LITERAL) { + auto v = get_value(row); + if (v.type == pb::STRING) { + char* end = nullptr; + strtoll(v.str_val.c_str(), &end, 10); + if (strlen(end) > 0) { + return false; + } + if (errno == ERANGE) { + errno = 0; + return false; + } + } + return true; + } + for (auto c : _children) { + if (!c->is_valid_int_cast(row)) { + return false; + } + } + return true; + } + + virtual bool is_valid_double_cast(MemRow* row) { + if (_node_type == pb::SLOT_REF || + _node_type == pb::STRING_LITERAL) { + auto v = get_value(row); + if (v.type == pb::STRING) { + char* end = nullptr; + strtod(v.str_val.c_str(), &end); + if (strlen(end) > 0) { + return false; + } + if (errno == ERANGE) { + errno = 0; + return false; + } + } + return true; + } + for (auto c : _children) { + if (!c->is_valid_double_cast(row)) { + return false; + } + } + return true; + } + bool is_row_expr() { return _node_type == pb::ROW_EXPR; } @@ -134,6 +194,7 @@ class ExprNode { // optimize or node to in node static void or_node_optimize(ExprNode** expr_node); + static void like_node_optimize(ExprNode** root, std::vector& new_exprs); bool has_same_children(); bool is_vaild_or_optimize_tree(int32_t level, std::unordered_set* tuple_set); static int change_or_node_to_in(ExprNode** expr_node); @@ -257,6 +318,15 @@ class ExprNode { c->flatten_or_expr(or_exprs); } } + void flatten_and_expr(std::vector* and_exprs) { + if (node_type() != pb::AND_PREDICATE) { + and_exprs->push_back(this); + return; + } + for (auto c : _children) { + c->flatten_and_expr(and_exprs); + } + } virtual void transfer_pb(pb::ExprNode* pb_node); static void create_pb_expr(pb::Expr* expr, ExprNode* root); static int create_tree(const pb::Expr& expr, ExprNode** root); diff --git a/include/expr/internal_functions.h b/include/expr/internal_functions.h index 95d869c4..aa86a4c5 100644 --- a/include/expr/internal_functions.h +++ b/include/expr/internal_functions.h @@ -40,6 +40,7 @@ ExprValue pi(const std::vector& input); ExprValue greatest(const std::vector& input); ExprValue least(const std::vector& input); ExprValue pow(const std::vector& input); +ExprValue bit_count(const std::vector& input); //string functions ExprValue length(const std::vector& input); ExprValue bit_length(const std::vector& input); @@ -66,6 +67,11 @@ ExprValue lpad(const std::vector& input); ExprValue rpad(const std::vector& input); ExprValue instr(const std::vector& input); ExprValue json_extract(const std::vector& input); +ExprValue json_extract1(const std::vector& input); +ExprValue json_type(const std::vector& input); +ExprValue json_array(const std::vector& input); +ExprValue json_object(const std::vector& input); +ExprValue json_valid(const std::vector& input); ExprValue export_set(const std::vector& input); ExprValue to_base64(const std::vector& input); ExprValue from_base64(const std::vector& input); @@ -82,7 +88,7 @@ ExprValue field(const std::vector& input); ExprValue quote(const std::vector& input); ExprValue func_char(const std::vector& input); ExprValue soundex(const std::vector& input); - +ExprValue setrange(const std::vector& input); // datetime functions ExprValue unix_timestamp(const std::vector& input); @@ -137,7 +143,8 @@ ExprValue to_days(const std::vector& input); ExprValue to_seconds(const std::vector& input); ExprValue addtime(const std::vector& input); ExprValue subtime(const std::vector& input); - +ExprValue timeseq(const std::vector& input); +ExprValue timeseq_to_str(const std::vector& input); // hll functions ExprValue hll_add(const std::vector& input); ExprValue hll_merge(const std::vector& input); @@ -193,6 +200,7 @@ ExprValue tdigest_location(const std::vector& input); // other ExprValue version(const std::vector& input); ExprValue last_insert_id(const std::vector& input); +ExprValue last_value(const std::vector& input); ExprValue find_in_set(const std::vector& input); //transfer (latitude A, longitude A), (latitude B, longitude B) to distance of A to B (m) ExprValue point_distance(const std::vector& input); @@ -203,6 +211,16 @@ ExprValue cast_to_signed(const std::vector& inpt); ExprValue cast_to_unsigned(const std::vector& inpt); ExprValue cast_to_string(const std::vector& inpt); ExprValue cast_to_double(const std::vector& inpt); +// bit functions +ExprValue bset(const std::vector& input); +ExprValue band(const std::vector& input); +ExprValue bor(const std::vector& input); +ExprValue bxor(const std::vector& input); +ExprValue bnot(const std::vector& input); +ExprValue bget(const std::vector& input); +ExprValue bpos(const std::vector& input); +ExprValue bcount(const std::vector& input); + } /* vim: set ts=4 sw=4 sts=4 tw=100 */ diff --git a/include/expr/scalar_fn_call.h b/include/expr/scalar_fn_call.h index 14cae8ff..723c3037 100644 --- a/include/expr/scalar_fn_call.h +++ b/include/expr/scalar_fn_call.h @@ -40,6 +40,54 @@ class ScalarFnCall : public ExprNode { } return ExprNode::get_last_insert_id(); } + ExprNode* get_last_value() { + if (_fn.name() == "last_value") { + return this; + } + return ExprNode::get_last_value(); + } + virtual bool is_valid_int_cast(MemRow* row) { + if (_fn.fn_op() == parser::FT_ADD || _fn.fn_op() == parser::FT_MINUS) { + if (_children.size() != 2 || !_children[0]->is_valid_int_cast(row) || !_children[1]->is_valid_int_cast(row)) { + return false; + } + auto left = children(0)->get_value(row).cast_to(pb::INT64)._u.int64_val; + auto right = children(1)->get_value(row).cast_to(pb::INT64)._u.int64_val; + auto s = get_value(row).cast_to(pb::INT64)._u.int64_val; + if (_fn.fn_op() == parser::FT_MINUS) { + right = -right; + } + if (left >= 0 && right >= 0 && s < 0) { + return false; // 上溢 + } + if (left < 0 && right < 0 && s >= 0) { + return false; // 下溢 + } + return true; + } + return ExprNode::is_valid_int_cast(row); + } + virtual bool is_valid_double_cast(MemRow* row) { + if (_fn.fn_op() == parser::FT_ADD || _fn.fn_op() == parser::FT_MINUS) { + if (_children.size() != 2 || !_children[0]->is_valid_double_cast(row) || !_children[1]->is_valid_double_cast(row)) { + return false; + } + auto left = children(0)->get_value(row).cast_to(pb::DOUBLE)._u.double_val; + auto right = children(1)->get_value(row).cast_to(pb::DOUBLE)._u.double_val; + auto s = get_value(row).cast_to(pb::DOUBLE)._u.double_val; + if (_fn.fn_op() == parser::FT_MINUS) { + right = -right; + } + if (left >= 0 && right >= 0 && s < 0) { + return false; // 上溢 + } + if (left < 0 && right < 0 && s >= 0) { + return false; // 下溢 + } + return true; + } + return ExprNode::is_valid_int_cast(row); + } private: ExprValue multi_eq_value(MemRow* row) { for (size_t i = 0; i < children(0)->children_size(); i++) { diff --git a/include/logical_plan/select_planner.h b/include/logical_plan/select_planner.h index 05183267..bdd4a375 100644 --- a/include/logical_plan/select_planner.h +++ b/include/logical_plan/select_planner.h @@ -64,6 +64,8 @@ class SelectPlanner : public LogicalPlanner { int parse_limit(); int subquery_rewrite(); + + int minmax_remove(); bool is_full_export(); diff --git a/include/physical_plan/index_selector.h b/include/physical_plan/index_selector.h index 68d405da..6ad99bc2 100644 --- a/include/physical_plan/index_selector.h +++ b/include/physical_plan/index_selector.h @@ -68,6 +68,15 @@ class IndexSelector { int select_partition(SmartTable& table_info, ScanNode* scan_node, std::map& field_range_map); + int64_t index_merge_selector(const std::vector& tuple_descs, + ScanNode* scan_node, + FilterNode* filter_node, + SortNode* sort_node, + JoinNode* join_node, + bool* index_has_null, + std::map& field_range_type, + const std::string& sample_sql); + SchemaFactory* _factory = SchemaFactory::get_instance(); QueryContext* _ctx = nullptr; diff --git a/include/physical_plan/join_reorder.h b/include/physical_plan/join_reorder.h index 4e835526..db6cb561 100644 --- a/include/physical_plan/join_reorder.h +++ b/include/physical_plan/join_reorder.h @@ -20,6 +20,7 @@ namespace baikaldb { class JoinReorder { public: int analyze(QueryContext* ctx); + int reorder(QueryContext* ctx, ExecNode* node); }; } diff --git a/include/runtime/runtime_state.h b/include/runtime/runtime_state.h index b4dd2487..723960a2 100644 --- a/include/runtime/runtime_state.h +++ b/include/runtime/runtime_state.h @@ -435,6 +435,7 @@ class RuntimeState { ExplainType explain_type = EXPLAIN_NULL; std::shared_ptr cmsketch = nullptr; int64_t last_insert_id = INT64_MIN; //存储baikalStore last_insert_id(expr)更新的字段 + std::string last_value = ""; pb::StoreRes* response = nullptr; bool need_statistics = true; // 用于动态超时的时间统计,如果请求的实例非NORMAL或着返回backup的结果,则不记入统计 @@ -458,6 +459,7 @@ class RuntimeState { int range_count_limit = 0; int64_t _sql_exec_timeout = -1; bool _is_ddl_work = false; + bool must_have_one = false; private: bool _is_inited = false; bool _is_cancelled = false; diff --git a/include/runtime/sorter.h b/include/runtime/sorter.h index 7435343a..25d19354 100644 --- a/include/runtime/sorter.h +++ b/include/runtime/sorter.h @@ -26,13 +26,13 @@ class Sorter { public: Sorter(MemRowCompare* comp) : _comp(comp), _idx(0) { } - void add_batch(std::shared_ptr& batch) { + virtual void add_batch(std::shared_ptr& batch) { batch->reset(); _min_heap.push_back(batch); } - void sort(); - void merge_sort(); - int get_next(RowBatch* batch, bool* eos); + virtual void sort(); + virtual void merge_sort(); + virtual int get_next(RowBatch* batch, bool* eos); size_t batch_size() { return _min_heap.size(); @@ -42,8 +42,9 @@ class Sorter { void make_heap(); void shiftdown(size_t index); -private: +protected: MemRowCompare* _comp; +private: std::vector> _min_heap; size_t _idx; }; diff --git a/include/runtime/topn_sorter.h b/include/runtime/topn_sorter.h new file mode 100644 index 00000000..dc920ff2 --- /dev/null +++ b/include/runtime/topn_sorter.h @@ -0,0 +1,54 @@ +// Copyright (c) 2018-present Baidu, Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include "common.h" +#include "row_batch.h" +#include "mem_row_compare.h" +#include "sorter.h" + +namespace baikaldb { +//对每个batch并行的做sort后,再用heap做归并 +class TopNSorter : public Sorter { +public: + TopNSorter(MemRowCompare* comp) : Sorter(comp), _limit(1) { + } + void set_limit(int limit) { + if (limit > 1) { + _limit = limit; + } + } + virtual void add_batch(std::shared_ptr& batch); + virtual int get_next(RowBatch* batch, bool* eos); + virtual void sort(){ + for (size_t i = 1; i < _current_count; ++ i) { + shiftup(i); + } + } + virtual void merge_sort(){} +private: + void shiftdown(size_t index, bool flag = false); + void shiftup(size_t index, bool flag = false); + +private: + std::vector> _mem_min_heap; + int _limit = 1; + int _current_count = 0; +}; +} + +/* vim: set ts=4 sw=4 sts=4 tw=100 */ diff --git a/include/session/network_socket.h b/include/session/network_socket.h index e930d48b..fd7514af 100644 --- a/include/session/network_socket.h +++ b/include/session/network_socket.h @@ -193,7 +193,8 @@ struct NetworkSocket { int is_auth_result_send_partly; // Auth result is sended partly, // need to go on sending. int64_t last_insert_id; - // Socket status. + std::string last_value = ""; + // string status. std::string current_db; // Current use database. int charset_num; // Client charset number. std::string charset_name; // Client charset name. diff --git a/include/sqlparser/sql_lex.l b/include/sqlparser/sql_lex.l index 7ca3a386..97ce4758 100644 --- a/include/sqlparser/sql_lex.l +++ b/include/sqlparser/sql_lex.l @@ -445,6 +445,8 @@ VAR_SAMP { un_reserved_keyword(yylval, yyscanner, parser); return VAR_SAMP; } \|\| { return OR; } \<\< { return LS_OP; } \>\> { return RS_OP; } +\-\> { return JS_OP; } +\-\>\> { return JS_OP1; } [0-9]+ { //integer diff --git a/include/sqlparser/sql_parse.y b/include/sqlparser/sql_parse.y index 83a2e00c..142c0a9d 100644 --- a/include/sqlparser/sql_parse.y +++ b/include/sqlparser/sql_parse.y @@ -481,7 +481,7 @@ extern int sql_error(YYLTYPE* yylloc, yyscan_t yyscanner, SqlParser* parser, con VAR_SAMP USER_AGG -%token EQ_OP ASSIGN_OP MOD_OP GE_OP GT_OP LE_OP LT_OP NE_OP AND_OP OR_OP NOT_OP LS_OP RS_OP CHINESE_DOT +%token EQ_OP ASSIGN_OP MOD_OP GE_OP GT_OP LE_OP LT_OP NE_OP AND_OP OR_OP NOT_OP LS_OP RS_OP CHINESE_DOT JS_OP JS_OP1 %token IDENT %token STRING_LIT INTEGER_LIT DECIMAL_LIT PLACE_HOLDER_LIT @@ -760,7 +760,8 @@ extern int sql_error(YYLTYPE* yylloc, yyscan_t yyscanner, SqlParser* parser, con %left EQ_OP NE_OP GE_OP GT_OP LE_OP LT_OP IS LIKE IN %left '|' %left '&' -%left LS_OP RS_OP +%left JS_OP1 +%left LS_OP RS_OP JS_OP %left '+' '-' %left '*' '/' MOD_OP MOD %left '^' @@ -1866,6 +1867,38 @@ SelectField: select_field->as_name = $5; $$ = select_field; } + | ColumnName JS_OP STRING_LIT { + SelectField* select_field = new_node(SelectField); + FuncExpr* fun = new_node(FuncExpr); + fun->fn_name = "json_extract1"; + fun->children.push_back($1, parser->arena); + fun->children.push_back($3, parser->arena); + select_field->expr = fun; + parser::String t1, t2; + t1 = "->\""; + t2 = "\""; + select_field->org_name = ((ColumnName*) $1)->name; + select_field->org_name.append("->\"", parser->arena); + select_field->org_name.append(((LiteralExpr*)$3)->_u.str_val.c_str(), parser->arena); + select_field->org_name.append("\"", parser->arena); + $$ = select_field; + } + | ColumnName JS_OP1 STRING_LIT { + SelectField* select_field = new_node(SelectField); + FuncExpr* fun = new_node(FuncExpr); + fun->fn_name = "json_extract"; + fun->children.push_back($1, parser->arena); + fun->children.push_back($3, parser->arena); + select_field->expr = fun; + parser::String t1, t2; + t1 = "->\""; + t2 = "\""; + select_field->org_name = ((ColumnName*) $1)->name; + select_field->org_name.append("->\"", parser->arena); + select_field->org_name.append(((LiteralExpr*)$3)->_u.str_val.c_str(), parser->arena); + select_field->org_name.append("\"", parser->arena); + $$ = select_field; + } ; FieldAsNameOpt: /* EMPTY */ @@ -4028,7 +4061,7 @@ FloatingPointType: BitValueType: BIT { - $$ = MYSQL_TYPE_BIT; + $$ = MYSQL_TYPE_LONG; } ; diff --git a/include/sqlparser/utils.h b/include/sqlparser/utils.h index 7920b961..2e2fb938 100644 --- a/include/sqlparser/utils.h +++ b/include/sqlparser/utils.h @@ -88,6 +88,7 @@ struct String { size_t fast = 0; bool has_slash = false; static std::unordered_map trans_map = { + {'0', '\x00'}, {'\\', '\\'}, {'\"', '\"'}, {'\'', '\''}, @@ -96,7 +97,6 @@ struct String { {'n', '\n'}, {'b', '\b'}, {'Z', '\x1A'}, - {'0', '\0'}, }; while (fast < length) { if (has_slash) { diff --git a/proto/common.proto b/proto/common.proto index c269d97b..a592ed46 100755 --- a/proto/common.proto +++ b/proto/common.proto @@ -65,6 +65,7 @@ enum PrimitiveType { BITMAP = 21; TDIGEST = 22; MAXVALUE_TYPE = 23; + JSON = 24; }; enum SchemaType { @@ -146,4 +147,4 @@ message ExprValue { optional float float_val = 7; optional double double_val = 8; optional bytes string_val = 9; -}; \ No newline at end of file +}; diff --git a/proto/plan.proto b/proto/plan.proto index 4d07b98f..5559a397 100755 --- a/proto/plan.proto +++ b/proto/plan.proto @@ -109,6 +109,11 @@ message PossibleIndex { optional bool use_for_learner = 7; optional bool range_key_sorted = 8; optional bool is_eq = 9; + optional int32 left_field_cnt = 10; + optional int32 right_field_cnt = 11; + optional bool left_open = 12; + optional bool right_open = 13; + optional bool like_prefix = 14; }; enum FulltextNodeType { @@ -280,6 +285,7 @@ message UpdateNode { repeated SlotDescriptor update_slots = 3; repeated Expr update_exprs = 4; //repeated int64 affect_index_ids = 5; + optional int64 row_ttl_duration = 6; //row ttl support, compatible whit prepared stmt }; message PacketNode { diff --git a/proto/store.interface.proto b/proto/store.interface.proto index 64241fba..f8df7f0f 100755 --- a/proto/store.interface.proto +++ b/proto/store.interface.proto @@ -127,6 +127,7 @@ message ExtraReq { message ExtraRes { repeated RegionIndexs infos = 1; optional RegionOfflineBinlogInfo offline_binlog_info = 2; + optional string last_value = 3; }; message StoreReq { diff --git a/src/common/common.cpp b/src/common/common.cpp index 694fc6ff..c769d730 100644 --- a/src/common/common.cpp +++ b/src/common/common.cpp @@ -321,6 +321,7 @@ void stripslashes(std::string& str, bool is_gbk) { size_t fast = 0; bool has_slash = false; static std::unordered_map trans_map = { + {'0', '\x00'}, {'\\', '\\'}, {'\"', '\"'}, {'\'', '\''}, @@ -461,6 +462,7 @@ int primitive_to_proto_type(pb::PrimitiveType type) { { pb::BOOL, FieldDescriptorProto::TYPE_BOOL}, { pb::BITMAP, FieldDescriptorProto::TYPE_BYTES}, { pb::TDIGEST, FieldDescriptorProto::TYPE_BYTES}, + { pb::JSON, FieldDescriptorProto::TYPE_BYTES}, { pb::NULL_TYPE, FieldDescriptorProto::TYPE_BOOL} }; if (_mysql_pb_type_mapping.count(type) == 0) { diff --git a/src/common/information_schema.cpp b/src/common/information_schema.cpp index a5d39fab..d9f57b07 100644 --- a/src/common/information_schema.cpp +++ b/src/common/information_schema.cpp @@ -1216,7 +1216,7 @@ void InformationSchema::init_tables() { record->set_value(record->get_field_by_name("CREATE_TIME"), ct.cast_to(pb::DATETIME)); record->set_string(record->get_field_by_name("TABLE_COLLATION"), coll); record->set_string(record->get_field_by_name("CREATE_OPTIONS"), ""); - record->set_string(record->get_field_by_name("TABLE_COMMENT"), ""); + record->set_string(record->get_field_by_name("TABLE_COMMENT"), table_info->comment); record->set_int64(record->get_field_by_name("TABLE_ID"), table_info->id); records.emplace_back(record); } diff --git a/src/common/schema_factory.cpp b/src/common/schema_factory.cpp index 7b7cf5a8..11ce4312 100644 --- a/src/common/schema_factory.cpp +++ b/src/common/schema_factory.cpp @@ -507,7 +507,7 @@ int SchemaFactory::update_table_internal(SchemaMapping& background, const pb::Sc field_info.default_expr_value.cast_to(field_info.type); } if (field_info.type == pb::STRING || field_info.type == pb::HLL - || field_info.type == pb::BITMAP || field_info.type == pb::TDIGEST) { + || field_info.type == pb::BITMAP || field_info.type == pb::TDIGEST || field_info.type == pb::JSON) { field_info.size = -1; } else { field_info.size = get_num_size(field_info.type); @@ -2410,6 +2410,46 @@ int SchemaFactory::check_region_ranges_consecutive(int64_t table_id) { return 0; } +int SchemaFactory::get_region_by_primary_key(int64_t main_table_id, + IndexInfo& index, + MutTableKey &primary_key, + int partition_id, + pb::RegionInfo ®ion_info) { + + DoubleBufferedTableRegionInfo::ScopedPtr table_region_mapping_ptr; + if (_table_region_mapping.Read(&table_region_mapping_ptr) != 0) { + DB_WARNING("DoubleBufferedTableRegion read scoped ptr error."); + return -1; + } + + auto it = table_region_mapping_ptr->find(index.id); + if (it == table_region_mapping_ptr->end()) { + DB_WARNING("index id[%ld] not in table_region_mapping", index.id); + return -1; + } + auto frontground = it->second; + auto &key_region_mapping = frontground->key_region_mapping; + auto record_template = TableRecord::new_record(main_table_id); + auto key_region_iter = key_region_mapping.find(partition_id); + if (key_region_iter == key_region_mapping.end()) { + DB_WARNING("partition %ld schema not update.", partition_id); + return -1; + } + StrInt64Map &map = key_region_iter->second; + auto region_iter = map.upper_bound(primary_key.data()); + if (region_iter != map.begin()) { + --region_iter; + } + int64_t region_id = region_iter->second; + frontground->get_region_info(region_id, region_info); + region_info.set_start_key(region_iter->first); + region_iter ++; + if (region_iter != map.end()) { + region_info.set_end_key(region_iter->first); + } + return 0; +} + int SchemaFactory::get_region_by_key(IndexInfo& index, const pb::PossibleIndex* primary, std::map& region_infos, @@ -2469,43 +2509,31 @@ int SchemaFactory::get_region_by_key(int64_t main_table_id, template_primary.mutable_sort_index()->CopyFrom(primary->sort_index()); } template_primary.mutable_index_conjuncts()->CopyFrom(primary->index_conjuncts()); + if (primary->has_is_eq()) { + template_primary.set_is_eq(primary->is_eq()); + template_primary.set_left_field_cnt(primary->left_field_cnt()); + template_primary.set_right_field_cnt(primary->right_field_cnt()); + template_primary.set_left_open(primary->left_open()); + template_primary.set_right_open(primary->right_open()); + template_primary.set_like_prefix(primary->like_prefix()); + } std::map> region_idx_map; auto record_template = TableRecord::new_record(main_table_id); int range_size = primary->ranges_size(); for (int i = 0; i < range_size; ++i) { const auto& range = primary->ranges(i); - bool like_prefix = range.like_prefix(); - bool left_open = range.left_open(); - bool right_open = range.right_open(); + bool like_prefix = template_primary.has_like_prefix() ? template_primary.like_prefix() : range.like_prefix(); + bool left_open = template_primary.has_left_open() ? template_primary.left_open() : range.left_open(); + bool right_open = template_primary.has_right_open() ? template_primary.right_open() : range.right_open(); MutTableKey start; MutTableKey end; - if (!range.left_pb_record().empty()) { - auto left = record_template->clone(false); - if (left->decode(range.left_pb_record()) != 0) { - DB_FATAL("Fail to encode pb left, table:%ld", index.id); - return -1; - } - if (left->encode_key(index, start, range.left_field_cnt(), false, like_prefix) != 0) { - DB_FATAL("Fail to encode_key left, table:%ld", index.id); - return -1; - } - } else if (!range.left_key().empty()) { + if (!range.left_key().empty()) { start = MutTableKey(range.left_key(), range.left_full()); } else { left_open = false; } - if (!range.right_pb_record().empty()) { - auto right = record_template->clone(false); - if (right->decode(range.right_pb_record()) != 0) { - DB_FATAL("Fail to encode pb right, table:%ld", index.id); - return -1; - } - if (right->encode_key(index, end, range.right_field_cnt(), false, like_prefix) != 0) { - DB_FATAL("Fail to encode_key right, table:%ld", index.id); - return -1; - } - } else if (!range.right_key().empty()) { + if (!range.right_key().empty()) { end = MutTableKey(range.right_key(), range.right_full()); } else { right_open = false; diff --git a/src/engine/table_iterator.cpp b/src/engine/table_iterator.cpp index 1be86e36..7235377e 100644 --- a/src/engine/table_iterator.cpp +++ b/src/engine/table_iterator.cpp @@ -136,46 +136,13 @@ int Iterator::open(const IndexRange& range, std::map& field left_primary_field_cnt = std::max(0, (range.left_field_cnt - col_cnt)); right_primary_field_cnt = std::max(0, (range.right_field_cnt - col_cnt)); } - if (range.left) { - int ret = range.left->encode_key(*_index_info, _start, left_secondary_field_cnt, false, like_prefix); - if (-2 == ret) { - DB_WARNING("left key has null fields: %ld", index_id); - _valid = false; - return 0; - } else if (0 != ret) { - DB_FATAL("Fail to encode_key, table: %ld", index_id); - return -1; - } - if (_idx_type == pb::I_KEY && left_primary_field_cnt > 0) { - if (0 != range.left->encode_primary_key(*_index_info, _start, left_primary_field_cnt)) { - DB_FATAL("Fail to append_index, reg:%ld, tab:%ld", _region, _index_info->pk); - return -1; - } - } - } else if (range.left_key.size() > 0) { + if (range.left_key.size() > 0) { _start.append_index(range.left_key); } else { //没有指定left bound时,forward遍历从seek region+table开始,backward遍历到左边界停止 _left_open = false; } - - if (range.right) { - int ret = range.right->encode_key(*_index_info, _end, right_secondary_field_cnt, false, like_prefix); - if (-2 == ret) { - DB_WARNING("right key has null fields: %ld", index_id); - _valid = false; - return 0; - } else if (0 != ret) { - DB_FATAL("Fail to encode_key, table: %ld", index_id); - return -1; - } - if (_idx_type == pb::I_KEY && right_primary_field_cnt > 0) { - if (0 != range.right->encode_primary_key(*_index_info, _end, right_primary_field_cnt)) { - DB_FATAL("Fail to append_index, reg:%ld, tab:%ld", _region, _index_info->pk); - return -1; - } - } - } else if (range.right_key.size() > 0) { + if (range.right_key.size() > 0) { _end.append_index(range.right_key); } else { _right_open = false; diff --git a/src/exec/access_path.cpp b/src/exec/access_path.cpp index 4757fa0e..dce94875 100755 --- a/src/exec/access_path.cpp +++ b/src/exec/access_path.cpp @@ -92,7 +92,8 @@ bool AccessPath::check_sort_use_index(Property& sort_property) { std::vector& order_exprs = sort_property.slot_order_exprs; SlotRef* slot_ref = static_cast(order_exprs[0]); size_t idx = 0; - auto& fields = index_info_ptr->fields; + std::vectorfields(index_info_ptr->fields.begin(), index_info_ptr->fields.end()); + fields.insert(fields.end(),index_info_ptr->pk_fields.begin(), index_info_ptr->pk_fields.end()); for (; idx < fields.size(); ++idx) { if (tuple_id == slot_ref->tuple_id() && fields[idx].id == slot_ref->field_id()) { break; @@ -498,6 +499,11 @@ void AccessPath::calc_index_range(int64_t partition_field_id, const std::mapset_right_key(rg.right_key.data()); range->set_right_full(rg.right_key.get_full()); - } else { - // eq通过标记判断,后续可以删掉 - range->set_right_key(rg.left_key.data()); - range->set_right_full(rg.left_key.get_full()); } - range->set_left_field_cnt(_left_field_cnt); - range->set_right_field_cnt(_right_field_cnt); - range->set_left_open(_left_open); - range->set_right_open(_right_open); - range->set_like_prefix(_like_prefix); } } else { is_possible = true; @@ -548,23 +545,20 @@ void AccessPath::calc_index_range(int64_t partition_field_id, const std::mapfields.size() - && (index_type == pb::I_PRIMARY || index_type == pb::I_UNIQ) - && !_like_prefix) { - right_key.set_full(true); - } if (partition_id != -1) { range->set_partition_id(partition_id); } range->set_left_key(left_key.data()); range->set_left_full(left_key.get_full()); - range->set_right_key(right_key.data()); - range->set_right_full(right_key.get_full()); - range->set_left_field_cnt(_left_field_cnt); - range->set_right_field_cnt(_right_field_cnt); - range->set_left_open(_left_open); - range->set_right_open(_right_open); - range->set_like_prefix(_like_prefix); + if (!_is_eq_or_in) { + if (_right_field_cnt == index_info_ptr->fields.size() + && (index_type == pb::I_PRIMARY || index_type == pb::I_UNIQ) + && !_like_prefix) { + right_key.set_full(true); + } + range->set_right_key(right_key.data()); + range->set_right_full(right_key.get_full()); + } } } @@ -610,10 +604,12 @@ void AccessPath::calc_fulltext(Property& sort_property) { default: break; } + pos_index.set_index_id(index_id); + pos_index.set_left_field_cnt(1); + pos_index.set_left_open(false); if (hit_index && values != nullptr) { hit_index_field_ids.emplace(field_id); is_possible = true; - pos_index.set_index_id(index_id); butil::FlatSet filter; filter.init(ajust_flat_size(values->size())); for (auto value : *values) { @@ -624,8 +620,6 @@ void AccessPath::calc_fulltext(Property& sort_property) { filter.insert(str); auto range = pos_index.add_ranges(); range->set_left_key(str); - range->set_left_field_cnt(1); - range->set_left_open(false); if (range_type == MATCH_LANGUAGE) { range->set_match_mode(pb::M_NARUTAL_LANGUAGE); } else if (range_type == MATCH_BOOLEAN) { diff --git a/src/exec/delete_manager_node.cpp b/src/exec/delete_manager_node.cpp index 67d72ea8..e5192a26 100755 --- a/src/exec/delete_manager_node.cpp +++ b/src/exec/delete_manager_node.cpp @@ -16,6 +16,7 @@ #include "network_socket.h" #include "query_context.h" #include "binlog_context.h" +#include "filter_node.h" namespace baikaldb { int DeleteManagerNode::open(RuntimeState* state) { @@ -69,6 +70,27 @@ int DeleteManagerNode::open_global_delete(RuntimeState* state) { DB_WARNING("select manager node fail"); return ret; } + ExprNode* last_value_expr = nullptr; // not own it + FilterNode* where_filter_node = static_cast( + select_manager_or_limit_node->get_node(pb::WHERE_FILTER_NODE)); + if (where_filter_node != nullptr) { + last_value_expr = where_filter_node->get_last_value(); + } + FilterNode* table_filter_node = static_cast( + select_manager_or_limit_node->get_node(pb::TABLE_FILTER_NODE)); + if (table_filter_node != nullptr) { + last_value_expr = table_filter_node->get_last_value(); + } + if (last_value_expr != nullptr) { + ret = last_value_expr->open(); + ON_SCOPE_EXIT(([last_value_expr]() { + last_value_expr->close(); + })); + if (ret < 0) { + DB_WARNING("expr open fail, log_id:%lu ret:%d", state->log_id(), ret); + return ret; + } + } _tuple_id = state->tuple_descs()[0].tuple_id(); SmartRecord record_template = SchemaFactory::get_instance()->new_record(*_table_info); bool eos = false; @@ -87,6 +109,9 @@ int DeleteManagerNode::open_global_delete(RuntimeState* state) { for (auto slot : _primary_slots) { record->set_value(record->get_field_by_tag(slot.field_id()), row->get_value(_tuple_id, slot.slot_id())); } + if (last_value_expr != nullptr) { + state->client_conn()->last_value += redis_encode(last_value_expr->get_value(row).get_string()); + } scan_records.push_back(record); } } while (!eos); diff --git a/src/exec/dml_node.cpp b/src/exec/dml_node.cpp index f93c0d78..f7cd904d 100644 --- a/src/exec/dml_node.cpp +++ b/src/exec/dml_node.cpp @@ -14,6 +14,7 @@ #include "runtime_state.h" #include "dml_node.h" +#include "filter_node.h" namespace baikaldb { @@ -207,6 +208,14 @@ int DMLNode::init_schema_info(RuntimeState* state) { } else { _affected_indexes = _all_indexes; } + FilterNode* where_filter_node = static_cast(get_node(pb::WHERE_FILTER_NODE)); + if (where_filter_node != nullptr) { + _last_value_expr = where_filter_node->get_last_value(); + } + FilterNode* table_filter_node = static_cast(get_node(pb::TABLE_FILTER_NODE)); + if (table_filter_node != nullptr) { + _last_value_expr = table_filter_node->get_last_value(); + } return 0; } @@ -378,7 +387,8 @@ int DMLNode::insert_row(RuntimeState* state, SmartRecord record, bool is_update) } return ret; } else if (_is_replace) { - ret = delete_row(state, old_record, nullptr); + std::unique_ptr row = state->mem_row_desc()->fetch_mem_row(); + ret = delete_row(state, old_record, row.get()); if (ret < 0) { DB_WARNING_STATE(state, "remove fail, index:%ld ,ret:%d", info.id, ret); return -1; @@ -506,7 +516,7 @@ int DMLNode::insert_row(RuntimeState* state, SmartRecord record, bool is_update) return ++affected_rows; } -int DMLNode::get_lock_row(RuntimeState* state, SmartRecord record, std::string* pk_str, MemRow* row) { +int DMLNode::get_lock_row(RuntimeState* state, SmartRecord record, std::string* pk_str, MemRow* row, int64_t& ttl_ts) { int ret = 0; MutTableKey pk_key; ret = record->encode_key(*_pri_info, pk_key, -1, false); @@ -522,12 +532,12 @@ int DMLNode::get_lock_row(RuntimeState* state, SmartRecord record, std::string* record->decode_key(*_pri_info, *pk_str); } //delete requires all fields (index and non-index fields) - ret = _txn->get_update_primary(_region_id, *_pri_info, record, _field_ids, GET_LOCK, true); + ret = _txn->get_update_primary(_region_id, *_pri_info, record, _field_ids, GET_LOCK, true, ttl_ts); if (ret < 0) { return ret; } if (row != nullptr && _tuple_desc != nullptr - && (_node_type == pb::DELETE_NODE || _node_type == pb::UPDATE_NODE)) { + && (_node_type == pb::DELETE_NODE || _node_type == pb::UPDATE_NODE || _node_type == pb::LOCK_PRIMARY_NODE)) { for (auto slot : _tuple_desc->slots()) { auto field = record->get_field_by_tag(slot.field_id()); row->set_value(slot.tuple_id(), slot.slot_id(), @@ -637,7 +647,8 @@ int DMLNode::remove_row(RuntimeState* state, SmartRecord record, int DMLNode::delete_row(RuntimeState* state, SmartRecord record, MemRow* row) { int ret = 0; std::string pk_str; - ret = get_lock_row(state, record, &pk_str, row); + int64_t ttl_ts = 0; + ret = get_lock_row(state, record, &pk_str, row, ttl_ts); if (ret == -3) { //DB_WARNING_STATE(state, "key not in this region:%ld", _region_id); return 0; @@ -648,6 +659,10 @@ int DMLNode::delete_row(RuntimeState* state, SmartRecord record, MemRow* row) { DB_WARNING_STATE(state, "lock table:%ld failed", _table_id); return -1; } + if (_last_value_expr != nullptr) { + state->last_value += redis_encode(_last_value_expr->get_value(row).get_string()); + } + if (!satisfy_condition_again(state, row)) { DB_WARNING_STATE(state, "condition changed when delete record:%s", record->debug_string().c_str()); // UndoGetForUpdate(pk_str)? @@ -673,7 +688,8 @@ bool DMLNode::satisfy_condition_again(RuntimeState* state, MemRow* row) { int DMLNode::update_row(RuntimeState* state, SmartRecord record, MemRow* row) { int ret = 0; std::string pk_str; - ret = get_lock_row(state, record, &pk_str, row); + int64_t ttl_ts = 0; + ret = get_lock_row(state, record, &pk_str, row, ttl_ts); if (ret == -3) { //DB_WARNING_STATE(state, "key not in this region:%ld", _region_id); return 0; @@ -689,6 +705,13 @@ int DMLNode::update_row(RuntimeState* state, SmartRecord record, MemRow* row) { // UndoGetForUpdate(pk_str)? 同一个txn GetForUpdate与UndoGetForUpdate之间不要写pk_str return 0; } + // _row_ttl_duration == -1 代表保持原ttl意思 + // TODO: 全局索引 keep ttl功能 + if (_row_ttl_duration == -1 && _ttl_timestamp_us > 0 && ttl_ts > 0) { + _ttl_timestamp_us = ttl_ts; + _txn->set_write_ttl_timestamp_us(_ttl_timestamp_us); + DB_DEBUG("keep ttl_timestamp_us: %ld", _ttl_timestamp_us); + } _indexes_ptr = &_affected_indexes; // 影响了主键需要删除旧的行 ret = remove_row(state, record, pk_str, _update_affect_primary); @@ -730,6 +753,26 @@ int DMLNode::update_row(RuntimeState* state, SmartRecord record, MemRow* row) { if (last_insert_id_expr != nullptr) { state->last_insert_id = last_insert_id_expr->get_value(row).get_numberic(); } + auto last_value_expr = expr->get_last_value(); + if (last_value_expr != nullptr) { + // 类型检查 + if (last_value_expr->children_size() == 2 && last_value_expr->children(1)->is_literal()) { + std::string frt = last_value_expr->children(1)->get_value(nullptr).get_string(); + bool is_valid = true; + if (frt == "%d") { + is_valid = last_value_expr->children(0)->is_valid_int_cast(row); + } else if (frt == "%f") { + is_valid = last_value_expr->children(0)->is_valid_double_cast(row); + } + if (!is_valid) { + state->error_code = ER_ILLEGAL_VALUE_FOR_TYPE; + state->error_msg << "ERR value is not an integer or out of range"; + DB_WARNING_STATE(state, "ERR value is not an integer or out of range"); + return -1; + } + } + state->last_value += redis_encode(last_value_expr->get_value(row).get_string()); + } } ret = insert_row(state, record, true); if (ret < 0) { diff --git a/src/exec/fetcher_store.cpp b/src/exec/fetcher_store.cpp index d675bc85..376ced54 100755 --- a/src/exec/fetcher_store.cpp +++ b/src/exec/fetcher_store.cpp @@ -243,8 +243,12 @@ ErrorType OnRPCDone::fill_request() { if (scan_node != nullptr) { bool use_global_backup = _fetcher_store->global_backup_type == GBT_LEARNER; scan_node->set_index_useage_and_lock(use_global_backup); + if (_primary_indexes != nullptr) { + scan_node->scan_indexs()[0].region_primary[_region_id] = *_primary_indexes; + } } + ExecNode::create_pb_plan(_old_region_id, _request.mutable_plan(), _store_request); if (scan_node != nullptr) { @@ -863,6 +867,11 @@ ErrorType OnRPCDone::handle_response(const std::string& remote_side) { if (_response.has_last_insert_id()) { _client_conn->last_insert_id = _response.last_insert_id(); } + if (_response.has_extra_res()) { + if (_response.extra_res().has_last_value()) { + _client_conn->last_value = _response.extra_res().last_value(); + } + } if (_op_type != pb::OP_SELECT && _op_type != pb::OP_SELECT_FOR_UPDATE && _op_type != pb::OP_ROLLBACK && _op_type != pb::OP_COMMIT) { _fetcher_store->affected_rows += _response.affected_rows(); _client_conn->txn_affected_rows += _response.affected_rows(); @@ -940,13 +949,21 @@ ErrorType OnRPCDone::handle_response(const std::string& remote_side) { } { BAIDU_SCOPED_LOCK(_fetcher_store->region_lock); - // merge可能会重复请求相同的region_id - if (_fetcher_store->region_batch.count(_region_id) == 1) { - _fetcher_store->region_batch[_region_id] = batch; + if (!_fetcher_store->is_pipeline()) { + // merge可能会重复请求相同的region_id + if (_fetcher_store->region_batch.count(_region_id) == 1) { + _fetcher_store->region_batch[_region_id] = batch; + } else { + //分裂单独处理start_key_sort + _fetcher_store->start_key_sort.emplace(_info.start_key(), _region_id); + _fetcher_store->region_batch[_region_id] = batch; + } } else { - //分裂单独处理start_key_sort - _fetcher_store->start_key_sort.emplace(_info.start_key(), _region_id); - _fetcher_store->region_batch[_region_id] = batch; + auto &batch_list = _fetcher_store->region_batch_list[_region_id]; + if (batch_list.size() == 0) { + _fetcher_store->start_key_sort.emplace(_info.start_key(), _region_id); + } + batch_list.push_back(batch); } } @@ -1151,6 +1168,7 @@ int FetcherStore::run_not_set_state(RuntimeState* state, pb::OpType op_type, GlobalBackupType backup_type) { region_batch.clear(); + region_batch_list.clear(); index_records.clear(); start_key_sort.clear(); no_copy_cache_plan_set.clear(); diff --git a/src/exec/filter_node.cpp b/src/exec/filter_node.cpp index 4ae61fa3..04041c12 100644 --- a/src/exec/filter_node.cpp +++ b/src/exec/filter_node.cpp @@ -342,8 +342,8 @@ int FilterNode::expr_optimize(QueryContext* ctx) { DB_WARNING("ExecNode::optimize fail, ret:%d", ret); return ret; } - // sign => pred - std::map pred_map; + + std::vector like2range; for (auto& expr : _conjuncts) { //类型推导 ret = expr->expr_optimize(); @@ -352,7 +352,15 @@ int FilterNode::expr_optimize(QueryContext* ctx) { return ret; } ExprNode::or_node_optimize(&expr); - + ExprNode::like_node_optimize(&expr, like2range); + } + for (auto expr : like2range) { + _conjuncts.push_back(expr); + } + + // sign => pred + std::map pred_map; + for (auto& expr : _conjuncts) { //非bool型表达式判断 if (expr->col_type() != pb::BOOL) { ExprNode::_s_non_boolean_sql_cnts << 1; @@ -674,6 +682,7 @@ int FilterNode::get_next(RuntimeState* state, RowBatch* batch, bool* eos) { } } std::unique_ptr& row = _child_row_batch.get_row(); + if (_is_explain || need_copy(row.get())) { batch->move_row(std::move(row)); ++_num_rows_returned; diff --git a/src/exec/insert_manager_node.cpp b/src/exec/insert_manager_node.cpp index 6b43add4..ee1e9844 100644 --- a/src/exec/insert_manager_node.cpp +++ b/src/exec/insert_manager_node.cpp @@ -533,9 +533,11 @@ int InsertManagerNode::insert_on_dup_key_update(RuntimeState* state) { std::set dup_record_ids; for (auto pair : _index_info_map) { int64_t index_id = pair.first; + RuntimeState* tmp_state = index_id == _pri_info->id ? state : nullptr; auto key_ids_map = _index_keys_record_map[index_id]; auto info = pair.second; auto return_records = _store_records[index_id]; + for (auto& record : return_records) { MutTableKey mt_key; ret = record->encode_key(*info, mt_key, -1, false); @@ -548,7 +550,7 @@ int InsertManagerNode::insert_on_dup_key_update(RuntimeState* state) { // 移除冲突行 for (auto id : ids_set) { if (_record_ids.erase(id)) { - update_record(_on_dup_key_update_records[index_id][key], _origin_records[id]); + update_record(_on_dup_key_update_records[index_id][key], _origin_records[id], tmp_state); } } } @@ -632,7 +634,7 @@ int InsertManagerNode::insert_on_dup_key_update(RuntimeState* state) { return _affected_rows; } -void InsertManagerNode::update_record(const SmartRecord& record, const SmartRecord& origin_record) { +void InsertManagerNode::update_record(const SmartRecord& record, const SmartRecord& origin_record, RuntimeState* state) { // 处理values函数 _dup_update_row->clear(); if (_values_tuple_desc != nullptr) { @@ -663,6 +665,12 @@ void InsertManagerNode::update_record(const SmartRecord& record, const SmartReco record->set_value(record->get_field_by_tag(slot.field_id()), expr->get_value(row).cast_to(slot.slot_type())); } + if (state != nullptr) { + auto last_value_expr = expr->get_last_value(); + if (last_value_expr != nullptr) { + state->client_conn()->last_value += redis_encode(last_value_expr->get_value(row).get_string()); + }; + } } } diff --git a/src/exec/lock_primary_node.cpp b/src/exec/lock_primary_node.cpp index 6c96aa9e..0dbf2f96 100755 --- a/src/exec/lock_primary_node.cpp +++ b/src/exec/lock_primary_node.cpp @@ -223,7 +223,8 @@ int LockPrimaryNode::open(RuntimeState* state) { case pb::LOCK_GET_DML: { for (auto& record : delete_records) { //DB_WARNING_STATE(state,"record:%s", record->debug_string().c_str()); - ret = delete_row(state, record, nullptr); + std::unique_ptr row = state->mem_row_desc()->fetch_mem_row(); + ret = delete_row(state, record, row.get()); if (ret < 0) { DB_WARNING_STATE(state, "delete_row fail"); return -1; diff --git a/src/exec/packet_node.cpp b/src/exec/packet_node.cpp index 67155a41..e99dad2b 100644 --- a/src/exec/packet_node.cpp +++ b/src/exec/packet_node.cpp @@ -668,7 +668,9 @@ int PacketNode::open_analyze(RuntimeState* state) { return ret; } state->inc_num_returned_rows(batch->size()); - batch_vector.push_back(batch); + if (batch->size() > 0) { + batch_vector.push_back(batch); + } } while (!eos); std::vector slot_order_exprs; @@ -816,6 +818,11 @@ int PacketNode::pack_ok(int num_affected_rows, NetworkSocket* client) { bytes[1] = (0 >> 8) & 0xff; tmp_buf.byte_array_append_len(bytes, 2); + if (!client->last_value.empty()) { + tmp_buf.pack_length_coded_string(client->last_value, false); + client->last_value.clear(); + } + return _send_buf->network_queue_send_append(tmp_buf._data, tmp_buf._size, ++client->packet_id, 0); } diff --git a/src/exec/rocksdb_scan_node.cpp b/src/exec/rocksdb_scan_node.cpp index 9bdeb709..c2a7763a 100644 --- a/src/exec/rocksdb_scan_node.cpp +++ b/src/exec/rocksdb_scan_node.cpp @@ -169,17 +169,7 @@ int RocksdbScanNode::choose_index(RuntimeState* state) { } for (auto& range : pos_index.ranges()) { std::string word; - if (range.has_left_key()) { - word = range.left_key(); - } else { - SmartRecord record = _factory->new_record(_table_id); - record->decode(range.left_pb_record()); - ret = record->get_reverse_word(*index_info, word); - if (ret < 0) { - DB_WARNING_STATE(state, "index_info to word fail for index_id: %ld", index_id); - return ret; - } - } + word = range.left_key(); _reverse_infos.emplace_back(*index_info); _query_words.emplace_back(word); _match_modes.emplace_back(range.match_mode()); @@ -195,68 +185,51 @@ int RocksdbScanNode::choose_index(RuntimeState* state) { //DB_WARNING_STATE(state, "use_index: %ld table_id: %ld left:%d, right:%d", // _index_id, _table_id, pos_index.ranges(0).left_field_cnt(), pos_index.ranges(0).right_field_cnt()); - bool is_eq = true; - bool like_prefix = true; int64_t ranges_used_size = 0; bool check_memory = false; if (pos_index.ranges_size() > FLAGS_in_predicate_check_threshold) { check_memory = true; } + bool has_global_param = false; + if (pos_index.has_is_eq()) { + has_global_param = true; + _is_eq = pos_index.is_eq(); + _like_prefix = pos_index.like_prefix(); + _left_field_cnt = pos_index.left_field_cnt(); + _left_open = pos_index.left_open(); + _right_field_cnt = pos_index.right_field_cnt(); + _right_open = pos_index.right_open(); + } for (auto& range : pos_index.ranges()) { - if (range.has_left_key()) { - _use_encoded_key = true; - if (range.left_key() != range.right_key()) { - is_eq = false; - } + if (!_is_eq) { _scan_range_keys.add_key(range.left_key(), range.left_full(), range.right_key(), range.right_full()); - if (check_memory) { - ranges_used_size += range.left_key().size() * 2; - ranges_used_size += range.right_key().size() * 2; - ranges_used_size += 100; // 估计值 - } } else { - SmartRecord left_record = _factory->new_record(_table_id); - SmartRecord right_record = _factory->new_record(_table_id); - left_record->decode(range.left_pb_record()); - right_record->decode(range.right_pb_record()); - if (range.left_pb_record() != range.right_pb_record()) { - is_eq = false; - } - _left_records.emplace_back(left_record); - _right_records.emplace_back(right_record); - if (check_memory) { - ranges_used_size += left_record->used_size(); - ranges_used_size += right_record->used_size(); - ranges_used_size += 100; // 估计值 - } - } - int left_field_cnt = range.left_field_cnt(); - int right_field_cnt = range.right_field_cnt(); - bool left_open = range.left_open(); - bool right_open = range.right_open(); - like_prefix = range.like_prefix(); - if (left_field_cnt != right_field_cnt) { - is_eq = false; - } - //DB_WARNING_STATE(state, "left_open:%d right_open:%d", left_open, right_open); - if (left_open || right_open) { - is_eq = false; - } - _left_field_cnts.emplace_back(left_field_cnt); - _right_field_cnts.emplace_back(right_field_cnt); - _left_opens.push_back(left_open); - _right_opens.push_back(right_open); - _like_prefixs.push_back(like_prefix); - } - if (pos_index.has_is_eq()) { - is_eq = pos_index.is_eq(); + _scan_range_keys.add_key(range.left_key(), range.left_full(), range.left_key(), range.left_full()); + } + if (check_memory) { + ranges_used_size += range.left_key().size() * 2; + ranges_used_size += range.right_key().size() * 2; + ranges_used_size += 100; // 估计值 + } + if (!has_global_param) { + has_global_param = true; + _left_field_cnt = range.left_field_cnt(); + _right_field_cnt = range.right_field_cnt(); + _left_open = range.left_open(); + _right_open = range.right_open(); + _like_prefix = range.like_prefix(); + if (_is_eq) { + _right_field_cnt = _left_field_cnt; + _right_open = _left_open; + } + } } _scan_range_keys.set_start_capacity(state->row_batch_capacity()); if (check_memory && 0 != state->memory_limit_exceeded(std::numeric_limits::max(), ranges_used_size)) { return -1; } if (_index_info->type == pb::I_PRIMARY || _index_info->type == pb::I_UNIQ) { - if (_left_field_cnts[_idx] == (int)_index_info->fields.size() && is_eq && !like_prefix) { + if (_left_field_cnt == (int)_index_info->fields.size() && _is_eq && !_like_prefix) { //DB_WARNING_STATE(state, "index use get ,index:%ld", _index_info.id); _use_get = true; } @@ -683,15 +656,10 @@ void RocksdbScanNode::close(RuntimeState* state) { for (auto expr : _scan_conjuncts) { expr->close(); } - _idx = 0; _left_records.clear(); _right_records.clear(); - _left_field_cnts.clear(); - _right_field_cnts.clear(); - _left_opens.clear(); - _right_opens.clear(); - _like_prefixs.clear(); _topk = 10; + _idx = 0; _reverse_infos.clear(); _query_words.clear(); _match_modes.clear(); @@ -752,30 +720,19 @@ int RocksdbScanNode::get_next_by_table_get(RuntimeState* state, RowBatch* batch, if (batch->is_full()) { return 0; } - if (_idx >= _left_records.size() && _scan_range_keys.is_traverse_over()) { + if (_scan_range_keys.is_traverse_over()) { *eos = true; return 0; } if (!FLAGS_scan_use_multi_get || _get_mode != GET_ONLY) { ++_scan_rows; - if (_use_encoded_key) { - _idx++; - auto key_pair = _scan_range_keys.get_next(); - int ret = txn->get_update_primary(_region_id, *_pri_info, key_pair->left_key(), record, - _field_ids, _get_mode, state->need_check_region()); - if (ret < 0) { - continue; - } - _read_disk_size += txn->read_disk_size; - } else { - record = _left_records[_idx++]; - int ret = txn->get_update_primary(_region_id, *_pri_info, record, _field_ids, _get_mode, - state->need_check_region()); - if (ret < 0) { - continue; - } - _read_disk_size += txn->read_disk_size; + auto key_pair = _scan_range_keys.get_next(); + int ret = txn->get_update_primary(_region_id, *_pri_info, key_pair->left_key(), record, + _field_ids, _get_mode, state->need_check_region()); + if (ret < 0) { + continue; } + _read_disk_size += txn->read_disk_size; std::unique_ptr row = _mem_row_desc->fetch_mem_row(); for (auto slot : _tuple_desc->slots()) { auto field = record->get_field_by_tag(slot.field_id()); @@ -791,7 +748,6 @@ int RocksdbScanNode::get_next_by_table_get(RuntimeState* state, RowBatch* batch, ++_num_rows_returned; } else { auto key_pairs = _scan_range_keys.get_next_batch(); - _idx += key_pairs.size(); _scan_rows += key_pairs.size(); int ret = txn->multiget_primary(_region_id, *_pri_info, key_pairs, _tuple_id, _mem_row_desc, &_multiget_row_batch, _field_ids, _field_slot, state->need_check_region(), _range_key_sorted); @@ -832,30 +788,21 @@ int RocksdbScanNode::get_next_by_index_get(RuntimeState* state, RowBatch* batch, if (batch->is_full()) { return 0; } - if (_idx >= _left_records.size() && _scan_range_keys.is_traverse_over()) { + if (_scan_range_keys.is_traverse_over()) { *eos = true; return 0; } if (!FLAGS_scan_use_multi_get || _get_mode != GET_ONLY) { ++_scan_rows; - if (_use_encoded_key) { - auto key_pair = _scan_range_keys.get_next(); - int ret = txn->get_update_secondary(_region_id, *_pri_info, *_index_info, key_pair->left_key(), record, - GET_ONLY, true); - if (ret < 0) { - continue; - } - _read_disk_size += txn->read_disk_size; - if (_index_info->type == pb::I_UNIQ) { - record->decode_key(*_index_info, key_pair->left_key().data()); - } - } else { - record = _left_records[_idx++]; - int ret = txn->get_update_secondary(_region_id, *_pri_info, *_index_info, record, GET_ONLY, true); - if (ret < 0) { - continue; - } - _read_disk_size += txn->read_disk_size; + auto key_pair = _scan_range_keys.get_next(); + int ret = txn->get_update_secondary(_region_id, *_pri_info, *_index_info, key_pair->left_key(), record, + GET_ONLY, true); + if (ret < 0) { + continue; + } + _read_disk_size += txn->read_disk_size; + if (_index_info->type == pb::I_UNIQ) { + record->decode_key(*_index_info, key_pair->left_key().data()); } if (!_is_covering_index && !_is_global_index) { ++get_primary_cnt; @@ -882,7 +829,6 @@ int RocksdbScanNode::get_next_by_index_get(RuntimeState* state, RowBatch* batch, ++_num_rows_returned; } else { auto key_pairs = _scan_range_keys.get_next_batch(); - _idx += key_pairs.size(); _scan_rows += key_pairs.size(); int ret = txn->multiget_secondary(_region_id, *_pri_info, *_index_info, key_pairs, record, _multiget_records, _tuple_id, _mem_row_desc, &_multiget_row_batch, _field_slot, @@ -1137,35 +1083,22 @@ int RocksdbScanNode::get_next_by_table_seek(RuntimeState* state, RowBatch* batch } if (_table_iter == nullptr || !_table_iter->valid() || range_reach_limit()) { - if (_idx >= _left_records.size() && _scan_range_keys.is_traverse_over()) { + if (_scan_range_keys.is_traverse_over()) { *eos = true; return 0; } else { IndexRange range; - if (_use_encoded_key) { - auto key_pair = _scan_range_keys.get_next(); - range = IndexRange(key_pair->left_key(), - key_pair->right_key(), - _index_info.get(), - _pri_info.get(), - _region_info, - _left_field_cnts[_idx], - _right_field_cnts[_idx], - _left_opens[_idx], - _right_opens[_idx], - _like_prefixs[_idx]); - } else { - range = IndexRange(_left_records[_idx].get(), - _right_records[_idx].get(), - _index_info.get(), - _pri_info.get(), - _region_info, - _left_field_cnts[_idx], - _right_field_cnts[_idx], - _left_opens[_idx], - _right_opens[_idx], - _like_prefixs[_idx]); - } + auto key_pair = _scan_range_keys.get_next(); + range = IndexRange(key_pair->left_key(), + key_pair->right_key(), + _index_info.get(), + _pri_info.get(), + _region_info, + _left_field_cnt, + _right_field_cnt, + _left_open, + _right_open, + _like_prefix); delete _table_iter; _table_iter = Iterator::scan_primary( state->txn(), range, _field_ids, _field_slot, state->need_check_region(), _scan_forward); @@ -1177,7 +1110,6 @@ int RocksdbScanNode::get_next_by_table_seek(RuntimeState* state, RowBatch* batch _table_iter->set_mode(KEY_ONLY); } _num_rows_returned_by_range = 0; - _idx++; continue; } } @@ -1386,35 +1318,22 @@ int RocksdbScanNode::get_next_by_index_seek(RuntimeState* state, RowBatch* batch } } else { if (_index_iter == nullptr || !_index_iter->valid() || range_reach_limit()) { - if (_idx >= _left_records.size() && _scan_range_keys.is_traverse_over()) { + if (_scan_range_keys.is_traverse_over()) { multiget_last_records = true; continue; } else { IndexRange range; - if (_use_encoded_key) { - auto key_pair = _scan_range_keys.get_next(); - range = IndexRange(key_pair->left_key(), - key_pair->right_key(), - _index_info.get(), - _pri_info.get(), - _region_info, - _left_field_cnts[_idx], - _right_field_cnts[_idx], - _left_opens[_idx], - _right_opens[_idx], - _like_prefixs[_idx]); - } else { - range = IndexRange(_left_records[_idx].get(), - _right_records[_idx].get(), - _index_info.get(), - _pri_info.get(), - _region_info, - _left_field_cnts[_idx], - _right_field_cnts[_idx], - _left_opens[_idx], - _right_opens[_idx], - _like_prefixs[_idx]); - } + auto key_pair = _scan_range_keys.get_next(); + range = IndexRange(key_pair->left_key(), + key_pair->right_key(), + _index_info.get(), + _pri_info.get(), + _region_info, + _left_field_cnt, + _right_field_cnt, + _left_open, + _right_open, + _like_prefix); delete _index_iter; _index_iter = Iterator::scan_secondary(state->txn(), range, _field_slot, true, _scan_forward); if (_index_iter == nullptr) { @@ -1422,7 +1341,6 @@ int RocksdbScanNode::get_next_by_index_seek(RuntimeState* state, RowBatch* batch return -1; } _num_rows_returned_by_range = 0; - _idx++; continue; } } diff --git a/src/exec/scan_node.cpp b/src/exec/scan_node.cpp index f7d4fbea..3deb6ecd 100644 --- a/src/exec/scan_node.cpp +++ b/src/exec/scan_node.cpp @@ -15,7 +15,6 @@ #include #include #include "scan_node.h" -#include "filter_node.h" #include "join_node.h" #include "schema_factory.h" #include "scalar_fn_call.h" @@ -87,6 +86,7 @@ int64_t AccessPathMgr::select_index_common() { prefix_ratio_index_score = 0; } prefix_ratio_id_mapping.insert(std::make_pair(prefix_ratio_index_score, index_id)); + path->prefix_ratio_index_score = prefix_ratio_index_score; //DB_NOTICE("index_id:%ld prefix_ratio_index_score:%u", index_id,prefix_ratio_index_score); // 优先选倒排或向量,没有就取第一个 switch (info.type) { @@ -180,7 +180,18 @@ void ScanNode::show_explain(std::vector>& out explain_info["partitions"] = pt; } } - if (!has_index() && _scan_indexs.empty()) { + if (_merge_index_infos.size() > 0) { + explain_info["type"] = "index_merge"; + explain_info["key"] = ""; + for (auto& merge_index_info : _merge_index_infos) { + int64_t index_id = merge_index_info._select_idx; + explain_info["key"] += factory->get_index_info(index_id).short_name; + explain_info["key"] += ","; + } + if (!explain_info["key"].empty()) { + explain_info["key"].pop_back(); + } + } else if (!has_index() && _scan_indexs.empty()) { explain_info["type"] = "ALL"; } else { explain_info["possible_keys"] = ""; @@ -204,9 +215,9 @@ void ScanNode::show_explain(std::vector>& out explain_info["type"] = "range"; auto& pos_index = _main_path.path(index_id)->pos_index; if (pos_index.ranges_size() == 1) { - int field_cnt = pos_index.ranges(0).left_field_cnt(); + int field_cnt = pos_index.has_left_field_cnt() ? pos_index.left_field_cnt() : pos_index.ranges(0).left_field_cnt(); if (field_cnt == (int)index_info.fields.size() && - pos_index.ranges(0).left_pb_record() == pos_index.ranges(0).right_pb_record()) { + pos_index.ranges(0).left_key() == pos_index.ranges(0).right_key()) { explain_info["type"] = "eq_ref"; if (index_info.type == pb::I_UNIQ || index_info.type == pb::I_PRIMARY) { explain_info["type"] = "const"; @@ -624,15 +635,15 @@ int64_t ScanNode::select_index_in_baikaldb(const std::string& sample_sql) { std::vector learner_condition; learner_condition.insert(learner_condition.end(), learner_path->other_condition.begin(), learner_path->other_condition.end()); - if (get_parent()->node_type() == pb::TABLE_FILTER_NODE || - get_parent()->node_type() == pb::WHERE_FILTER_NODE) { + if (get_parent() != nullptr && (get_parent()->node_type() == pb::TABLE_FILTER_NODE || + get_parent()->node_type() == pb::WHERE_FILTER_NODE)) { static_cast(get_parent())->modifiy_pruned_conjuncts_by_index_learner(learner_condition); } } } // modify filter conjuncts - if (get_parent()->node_type() == pb::TABLE_FILTER_NODE || - get_parent()->node_type() == pb::WHERE_FILTER_NODE) { + if (get_parent() != nullptr && (get_parent()->node_type() == pb::TABLE_FILTER_NODE || + get_parent()->node_type() == pb::WHERE_FILTER_NODE)) { static_cast(get_parent())->modifiy_pruned_conjuncts_by_index(filter_condition); } _select_idx = select_idx; @@ -795,6 +806,48 @@ int ScanNode::create_fulltext_index_tree() { } return 0; } +bool ScanNode::need_index_merge() { + if (backup_scan_index() != nullptr) { + return false; + } + if (_main_path.use_fulltext_or_vector() || _main_path.use_cost()) { + return false; + } + + if (get_parent()->node_type() == pb::TABLE_FILTER_NODE || + get_parent()->node_type() == pb::WHERE_FILTER_NODE) { + _filter_node = static_cast(get_parent()); + } else { + return false; + } + + auto path = select_path(); + std::vector conditions; + conditions.insert(conditions.end(), path->other_condition.begin(), path->other_condition.end()); + conditions.insert(conditions.end(), path->index_other_condition.begin(), path->index_other_condition.end()); + if (conditions.empty()) { + return false; + } + _or_conjunct = nullptr; + for (auto conjunct : conditions) { + if (conjunct->node_type() == pb::OR_PREDICATE && _or_conjunct == nullptr) { + _or_conjunct = conjunct; + } + } + if (_or_conjunct == nullptr) { + return false; + } + + _conjuncts_without_or.clear(); + for (auto conjunct : _filter_node->conjuncts()) { + if (_or_conjunct != conjunct) { + _conjuncts_without_or.push_back(conjunct); + } + } + _or_sub_conjuncts.clear(); + _or_conjunct->flatten_or_expr(&_or_sub_conjuncts); + return true; +} } /* vim: set ts=4 sw=4 sts=4 tw=100 */ diff --git a/src/exec/select_manager_node.cpp b/src/exec/select_manager_node.cpp index 9dd39f7f..4243a83f 100755 --- a/src/exec/select_manager_node.cpp +++ b/src/exec/select_manager_node.cpp @@ -15,14 +15,14 @@ #include "select_manager_node.h" #include "filter_node.h" #include "network_socket.h" -#include "rocksdb_scan_node.h" -#include "limit_node.h" #include "agg_node.h" #include "query_context.h" namespace baikaldb { DEFINE_bool(global_index_read_consistent, true, "double check for global and primary region consistency"); DEFINE_int32(max_select_region_count, -1, "max select sql region count limit, default:-1 means no limit"); +DEFINE_int32(fetcher_primary_once_count, 16384, "fetcher primary once count"); +DEFINE_bool(fetcher_primary_pipeline, false, "fetcher primary pipeline"); int SelectManagerNode::open(RuntimeState* state) { START_LOCAL_TRACE(get_trace(), state->get_trace_cost(), OPEN_TRACE, ([state](TraceLocalNode& local_node) { local_node.set_scan_rows(state->num_scan_rows()); @@ -140,6 +140,14 @@ int SelectManagerNode::get_next(RuntimeState* state, RowBatch* batch, bool* eos) *eos = true; _num_rows_returned = _limit; return 0; + } else if (*eos == true) { + if (state->must_have_one && _num_rows_returned == 0) { + // 生成null返回 + std::unique_ptr row = state->mem_row_desc()->fetch_mem_row(); + batch->move_row(std::move(row)); + _num_rows_returned = 1; + return 0; + } } return 0; } @@ -210,6 +218,9 @@ void SelectManagerNode::multi_fetcher_store_open(FetcherInfo* self_fetcher, Fetc int SelectManagerNode::fetcher_store_run(RuntimeState* state, ExecNode* exec_node) { RocksdbScanNode* scan_node = static_cast(exec_node); + if (scan_node->has_merge_index()) { + return merge_fetcher_store_run(state, exec_node); + } FetcherStore* fetcher_store = nullptr; FetcherInfo main_fetcher; FetcherInfo backup_fetcher; @@ -286,13 +297,25 @@ int SelectManagerNode::fetcher_store_run(RuntimeState* state, ExecNode* exec_nod } for (auto& pair : fetcher_store->start_key_sort) { - auto iter = fetcher_store->region_batch.find(pair.second); - if (iter != fetcher_store->region_batch.end()) { - auto& batch = iter->second; - if (batch != nullptr && batch->size() != 0) { - _sorter->add_batch(batch); + if (!fetcher_store->is_pipeline()) { + auto iter = fetcher_store->region_batch.find(pair.second); + if (iter != fetcher_store->region_batch.end()) { + auto& batch = iter->second; + if (batch != nullptr && batch->size() != 0) { + _sorter->add_batch(batch); + } + fetcher_store->region_batch.erase(iter); + } + } else { + auto iter = fetcher_store->region_batch_list.find(pair.second); + if (iter != fetcher_store->region_batch_list.end()) { + for (auto &batch : iter->second) { + if (batch != nullptr && batch->size() != 0) { + _sorter->add_batch(batch); + } + } + fetcher_store->region_batch_list.erase(iter); } - fetcher_store->region_batch.erase(iter); } } // 无sort节点时不会排序,按顺序输出 @@ -332,6 +355,10 @@ int SelectManagerNode::open_global_index(FetcherInfo* fetcher, RuntimeState* sta for (auto& field : index_info->fields) { index_field_ids.insert(field.id); } + SmartIndex pri_info = _factory->get_index_info_ptr(main_table_id); + for (auto& field : pri_info->fields) { + index_field_ids.insert(field.id); + } std::unordered_set expr_field_ids; for (auto expr : sort_node->slot_order_exprs()) { expr->get_all_field_ids(expr_field_ids); @@ -370,26 +397,170 @@ int SelectManagerNode::open_global_index(FetcherInfo* fetcher, RuntimeState* sta } parent = parent->get_parent(); } + LimitNode* limit = static_cast(parent); - if (need_pushdown && limit != nullptr) { - ret = construct_primary_possible_index(fetcher->fetcher_store, fetcher->scan_index, state, scan_node, main_table_id, limit); + if (!need_pushdown) { + limit = nullptr; + } + + auto pri_info = _factory->get_index_info_ptr(main_table_id); + if (pri_info == nullptr) { + DB_WARNING("pri index info not found table_id:%ld", main_table_id); + return -1; + } + + if (!FLAGS_fetcher_primary_pipeline) { + return fetcher_primary(fetcher, state, scan_node, pri_info, limit, main_table_id); } else { - ret = construct_primary_possible_index(fetcher->fetcher_store, fetcher->scan_index, state, scan_node, main_table_id); + return fetcher_primary_pipeline(fetcher, state, scan_node, pri_info, limit, main_table_id); } + return 0; +} + + +int SelectManagerNode::fetcher_primary_pipeline( + FetcherInfo* fetcher, + RuntimeState* state, + ExecNode* exec_node, + SmartIndex pri_info, + LimitNode* limit, + int64_t main_table_id) { + + RocksdbScanNode* scan_node = static_cast(exec_node); + auto client_conn = state->client_conn(); + std::map region_primary_map; + std::map region_infos; + + FetcherStore global_index_fetcher; + global_index_fetcher.start_key_sort = fetcher->fetcher_store.start_key_sort; + global_index_fetcher.region_batch = fetcher->fetcher_store.region_batch; + fetcher->fetcher_store.clear_result(state); + + auto fetcher_primary_func = [&](FetcherPrimaryInfo& fetcher_primary_info) { + std::string *region_primary = new std::string(""); + fetcher_primary_info.pos_index.set_index_id(main_table_id); + fetcher_primary_info.pos_index.set_left_field_cnt(pri_info->fields.size()); + fetcher_primary_info.pos_index.set_left_open(false); + fetcher_primary_info.pos_index.set_is_eq(true); + fetcher_primary_info.pos_index.SerializeToString(region_primary); + fetcher_primary_info.pos_index.clear_ranges(); + fetcher_primary_info.region_primary_list.push_back(region_primary); + fetcher->fetcher_store.fetcher_select_with_region_primary(state, &fetcher_primary_info.region_info, fetcher_primary_info.region_primary_list[fetcher_primary_info.cur_idx], _children[0], client_conn->seq_id, client_conn->seq_id); + fetcher_primary_info.cur_idx++; + }; + + + std::map finded_region_map; + + int fetcher_primary_count = std::max(FLAGS_fetcher_primary_once_count, 1024); + + auto add_one_record = [&](MutTableKey &key) -> int { + pb::RegionInfo region_info; + auto find_iter = finded_region_map.upper_bound(key.data()); + if (find_iter != finded_region_map.begin()) { + --find_iter; + } + if (find_iter != finded_region_map.end()) { + if (find_iter->second.end_key() == "" || key.data() < find_iter->second.end_key()) { + region_info = find_iter->second; + } + } + if (!region_info.has_region_id()){ + int ret = _factory->get_region_by_primary_key(main_table_id, *pri_info, key, 0, region_info); + if (ret) { + DB_WARNING("get region error!"); + return ret; + } + finded_region_map[region_info.start_key()] = region_info; + } + auto iter = region_primary_map.find(region_info.region_id()); + if (iter == region_primary_map.end()) { + region_primary_map[region_info.region_id()].region_info = region_info; + } + auto &fetcher_primary_info = region_primary_map[region_info.region_id()]; + auto range = fetcher_primary_info.pos_index.add_ranges(); + range->set_left_key(key.data()); + range->set_left_full(true); + if (fetcher_primary_info.pos_index.ranges_size() >= fetcher_primary_count) { + fetcher_primary_func(fetcher_primary_info); + } + return 0; + }; + + int ret = construct_primary_possible_index(global_index_fetcher, fetcher->scan_index, state, scan_node, main_table_id, add_one_record, pri_info, limit); + if (ret < 0) { DB_WARNING("construct primary possible index failed"); return ret; } - ret = fetcher->fetcher_store.run_not_set_state(state, fetcher->scan_index->region_infos, _children[0], + for (auto &kv : region_primary_map) { + if (kv.second.pos_index.ranges_size() != 0) { + fetcher_primary_func(kv.second); + } + } + fetcher->fetcher_store.wait_finish(); + if (fetcher->fetcher_store.error != E_OK) { + return -1; + } + if (_children[0]->get_trace() != nullptr) { + for (auto trace : fetcher->fetcher_store.get_traces()) { + (*_children[0]->get_trace()->add_child_nodes()) = *trace; + } + } + return ret; +} + +int SelectManagerNode::fetcher_primary( + FetcherInfo* fetcher, + RuntimeState* state, + ExecNode* exec_node, + SmartIndex pri_info, + LimitNode* limit, + int64_t main_table_id) { + + + RocksdbScanNode* scan_node = static_cast(exec_node); + auto client_conn = state->client_conn(); + pb::PossibleIndex pos_index; + pos_index.set_index_id(main_table_id); + pos_index.set_left_field_cnt(pri_info->fields.size()); + pos_index.set_left_open(false); + pos_index.set_is_eq(true); + + auto add_one_record = [&](MutTableKey &key) -> int { + auto range = pos_index.add_ranges(); + range->set_left_key(key.data()); + range->set_left_full(key.get_full()); + return 0; + }; + + auto ret = construct_primary_possible_index(fetcher->fetcher_store, fetcher->scan_index, state, exec_node, main_table_id, add_one_record, pri_info, limit); + + if (ret < 0) { + DB_WARNING("construct primary possible index failed"); + return ret; + } + + pos_index.SerializeToString(&fetcher->scan_index->raw_index); + + ret = _factory->get_region_by_key(main_table_id, *pri_info, &pos_index, fetcher->scan_index->region_infos, + &fetcher->scan_index->region_primary, scan_node->get_partition()); + if (ret < 0) { + DB_WARNING("get region info failed!"); + return ret; + } + + ret = fetcher->fetcher_store.run_not_set_state(state, fetcher->scan_index->region_infos, _children[0], client_conn->seq_id, client_conn->seq_id, pb::OP_SELECT, fetcher->global_backup_type); if (ret < 0) { - DB_WARNING("select manager fetcher mnager node open fail, txn_id: %lu, log_id:%lu", + DB_WARNING("select manager fetcher mnager node open fail, txn_id: %lu, log_id:%lu", state->txn_id, state->log_id()); - return ret; + return ret; } return ret; } + int SelectManagerNode::construct_primary_possible_index( FetcherStore& fetcher_store, @@ -397,36 +568,27 @@ int SelectManagerNode::construct_primary_possible_index( RuntimeState* state, ExecNode* exec_node, int64_t main_table_id, + std::function add_one_record, + const SmartIndex& pri_info, LimitNode* limit) { + RocksdbScanNode* scan_node = static_cast(exec_node); int32_t tuple_id = scan_node->tuple_id(); - auto pri_info = _factory->get_index_info_ptr(main_table_id); - if (pri_info == nullptr) { - DB_WARNING("pri index info not found table_id:%ld", main_table_id); - return -1; - } - auto table_info = _factory->get_table_info_ptr(main_table_id); - if (table_info == nullptr) { - DB_WARNING("pri index info not found table_id:%ld", main_table_id); - return -1; - } - // 不能直接清理所有索引,可能有backup请求使用scan_node - // scan_node->clear_possible_indexes(); - // pb::ScanNode* pb_scan_node = scan_node->mutable_pb_node()->mutable_derive_node()->mutable_scan_node(); - // auto pos_index = pb_scan_node->add_indexes(); - // pos_index->set_index_id(main_table_id); - // scan_node->set_router_index_id(main_table_id); - scan_index_info->router_index = nullptr; scan_index_info->raw_index.clear(); scan_index_info->region_infos.clear(); scan_index_info->region_primary.clear(); scan_index_info->router_index_id = main_table_id; scan_index_info->index_id = main_table_id; - pb::PossibleIndex pos_index; - pos_index.set_index_id(main_table_id); SmartRecord record_template = _factory->new_record(main_table_id); - auto tsorter = std::make_shared(_mem_row_compare.get()); + std::shared_ptr tmp_mem_row_compare; + std::vector null_slot_order_exprs; + if (limit != nullptr) { + tmp_mem_row_compare = std::make_shared(_slot_order_exprs, _is_asc, _is_null_first); + } else { + tmp_mem_row_compare = std::make_shared(null_slot_order_exprs, _is_asc, _is_null_first); + } + auto tsorter = std::make_shared(tmp_mem_row_compare.get()); for (auto& pair : fetcher_store.start_key_sort) { auto& batch = fetcher_store.region_batch[pair.second]; if (batch == nullptr) { @@ -441,10 +603,7 @@ int SelectManagerNode::construct_primary_possible_index( } tsorter->merge_sort(); bool eos = false; - int64_t limit_cnt = 0x7fffffff; - if (limit != nullptr) { - limit_cnt = limit->get_limit(); - } + int cnt = 0; while (!eos) { std::shared_ptr batch = std::make_shared(); auto ret = tsorter->get_next(batch.get(), &eos); @@ -470,31 +629,151 @@ int SelectManagerNode::construct_primary_possible_index( } record->set_value(record->get_field_by_tag(field_id), mem_row->get_value(tuple_id, slot_id)); } - auto range = pos_index.add_ranges(); MutTableKey key; if (record->encode_key(*pri_info.get(), key, pri_info->fields.size(), false, false) != 0) { DB_FATAL("Fail to encode_key left, table:%ld", pri_info->id); return -1; } - range->set_left_key(key.data()); - range->set_left_full(key.get_full()); - range->set_right_key(key.data()); - range->set_right_full(key.get_full()); - range->set_left_field_cnt(pri_info->fields.size()); - range->set_right_field_cnt(pri_info->fields.size()); - range->set_left_open(false); - range->set_right_open(false); - limit_cnt --; - if(!limit_cnt) { + int ret = add_one_record(key); + if (ret) { + return ret; + } + cnt ++; + if (limit != nullptr && cnt >= limit->get_limit()) { eos = true; break; } } } - //重新做路由选择 - pos_index.SerializeToString(&scan_index_info->raw_index); - return _factory->get_region_by_key(main_table_id, *pri_info, &pos_index, scan_index_info->region_infos, - &scan_index_info->region_primary, scan_node->get_partition()); + return 0; +} + +int SelectManagerNode::merge_fetcher_store_run(RuntimeState* state, ExecNode* exec_node) { + RocksdbScanNode* scan_node = static_cast(exec_node); + int64_t main_table_id = scan_node->table_id(); + int32_t tuple_id = scan_node->tuple_id(); + auto pri_info = _factory->get_index_info_ptr(main_table_id); + if (pri_info == nullptr) { + DB_WARNING("pri index info not found table_id:%ld", main_table_id); + return -1; + } + butil::FlatSet filter; + filter.init(12301); + + SmartRecord record_template = _factory->new_record(main_table_id); + FilterNode* filter_node = static_cast(scan_node->get_parent()); + int64_t affected_rows = 0; + + + auto remove_batch_same_row = [&](std::shared_ptr& batch, std::shared_ptr uniq_batch) -> int { + for (batch->reset(); !batch->is_traverse_over(); batch->next()) { + std::unique_ptr& mem_row = batch->get_row(); + SmartRecord record = record_template->clone(false); + for (auto& pri_field : pri_info->fields) { + int32_t field_id = pri_field.id; + int32_t slot_id = state->get_slot_id(tuple_id, field_id); + if (slot_id == -1) { + DB_WARNING("field_id:%d tuple_id:%d, slot_id:%d", field_id, tuple_id, slot_id); + return -1; + } + record->set_value(record->get_field_by_tag(field_id), mem_row->get_value(tuple_id, slot_id)); + } + MutTableKey key; + if (record->encode_key(*pri_info.get(), key, pri_info->fields.size(), false, false) != 0) { + DB_FATAL("Fail to encode_key left, table:%ld", pri_info->id); + return -1; + } + if (filter.seek(key.data()) != nullptr) { + continue; + } + filter.insert(key.data()); + uniq_batch->move_row(std::move(mem_row)); + affected_rows++; + } + return 0; + }; + + for (auto& index_info : scan_node->merge_index_infos()) { + filter_node->modifiy_pruned_conjuncts_by_index(index_info._pruned_conjuncts); + scan_node->swap_index_info(index_info); + if (filter_node->get_limit() != -1 && filter_node->pruned_conjuncts().empty()) { + scan_node->set_limit(filter_node->get_limit()); + } + FetcherStore* fetcher_store = nullptr; + FetcherInfo main_fetcher; + ScanIndexInfo* main_scan_index = scan_node->main_scan_index(); + int ret = 0; + if (main_scan_index == nullptr) { + return -1; + } + auto& scan_index_info = *main_scan_index; + auto index_ptr = _factory->get_index_info_ptr(scan_index_info.router_index_id); + if (index_ptr == nullptr) { + DB_WARNING("invalid index info: %ld", scan_index_info.router_index_id); + return -1; + } + _factory->get_region_by_key(main_table_id, + *index_ptr, scan_index_info.router_index, + scan_index_info.region_infos, + &scan_index_info.region_primary, + scan_node->get_partition()); + scan_node->set_region_infos(scan_index_info.region_infos); + + main_fetcher.scan_index = main_scan_index; + fetcher_store = &main_fetcher.fetcher_store; + ret = single_fetcher_store_open(&main_fetcher, state, exec_node); + if (ret < 0) { + state->error_code = fetcher_store->error_code; + state->error_msg.str(""); + state->error_msg << fetcher_store->error_msg.str(); + DB_WARNING("single_fetcher_store_open fail, txn_id: %lu, log_id:%lu, router index_id: %ld", + state->txn_id, state->log_id(), main_fetcher.scan_index->router_index_id); + return -1; + } + + for (auto& pair : fetcher_store->start_key_sort) { + if (fetcher_store->is_pipeline()) { + auto iter = fetcher_store->region_batch_list.find(pair.second); + if (iter == fetcher_store->region_batch_list.end()) { + continue; + } + auto& batch_list = iter->second; + for (auto& batch: batch_list) { + if (batch != nullptr && batch->size() != 0) { + std::shared_ptr uniq_batch = std::make_shared(); + auto ret = remove_batch_same_row(batch, uniq_batch); + if (ret) { + return ret; + } + if (uniq_batch != nullptr && uniq_batch->size() != 0) { + _sorter->add_batch(uniq_batch); + } + } + } + fetcher_store->region_batch_list.erase(iter); + } else { + auto iter = fetcher_store->region_batch.find(pair.second); + if (iter == fetcher_store->region_batch.end()) { + continue; + } + auto& batch = iter->second; + if (batch != nullptr && batch->size() != 0) { + std::shared_ptr uniq_batch = std::make_shared(); + auto ret = remove_batch_same_row(batch, uniq_batch); + if (ret) { + return ret; + } + if (uniq_batch != nullptr && uniq_batch->size() != 0) { + _sorter->add_batch(uniq_batch); + } + } + fetcher_store->region_batch.erase(iter); + } + } + } + // 无sort节点时不会排序,按顺序输出 + _sorter->merge_sort(); + return affected_rows; } } diff --git a/src/exec/sort_node.cpp b/src/exec/sort_node.cpp index 322ff3a0..e4dc5e72 100644 --- a/src/exec/sort_node.cpp +++ b/src/exec/sort_node.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include "sort_node.h" +#include "topn_sorter.h" #include "runtime_state.h" #include "query_context.h" @@ -157,7 +158,12 @@ int SortNode::open(RuntimeState* state) { _mem_row_desc = state->mem_row_desc(); _mem_row_compare = std::make_shared( _slot_order_exprs, _is_asc, _is_null_first); - _sorter = std::make_shared(_mem_row_compare.get()); + if (_limit == -1) { + _sorter = std::make_shared(_mem_row_compare.get()); + } else { + _sorter = std::make_shared(_mem_row_compare.get()); + ((TopNSorter*)_sorter.get())->set_limit(_limit); + } bool eos = false; int count = 0; diff --git a/src/exec/update_manager_node.cpp b/src/exec/update_manager_node.cpp index 50bb2b42..88715683 100644 --- a/src/exec/update_manager_node.cpp +++ b/src/exec/update_manager_node.cpp @@ -181,6 +181,9 @@ int UpdateManagerNode::open(RuntimeState* state) { update_record(state, new_record); _insert_scan_records.emplace_back(new_record); } + if (state->error_code == ER_ILLEGAL_VALUE_FOR_TYPE) { + return -1; + } InsertManagerNode* insert_manager = static_cast(_children[1]); insert_manager->init_insert_info(this); @@ -259,6 +262,26 @@ void UpdateManagerNode::update_record(RuntimeState* state, SmartRecord record) { state->last_insert_id = last_insert_id_expr->get_value(row).get_numberic(); state->client_conn()->last_insert_id = state->last_insert_id; } + auto last_value_expr = expr->get_last_value(); + if (last_value_expr != nullptr) { + // 类型检查 + if (last_value_expr->children_size() == 2 && last_value_expr->children(1)->is_literal()) { + std::string frt = last_value_expr->children(1)->get_value(nullptr).get_string(); + bool is_valid = true; + if (frt == "%d") { + is_valid = last_value_expr->is_valid_int_cast(row); + } else if (frt == "%f") { + is_valid = last_value_expr->is_valid_double_cast(row); + } + if (!is_valid) { + state->error_code = ER_ILLEGAL_VALUE_FOR_TYPE; + state->error_msg << "ERR value is not an integer or out of range"; + DB_WARNING_STATE(state, "ERR value is not an integer or out of range"); + return; + } + } + state->client_conn()->last_value += redis_encode(last_value_expr->get_value(row).get_string()); + } } } } diff --git a/src/exec/update_node.cpp b/src/exec/update_node.cpp index fa0dcc79..f8409c57 100644 --- a/src/exec/update_node.cpp +++ b/src/exec/update_node.cpp @@ -29,6 +29,8 @@ int UpdateNode::init(const pb::PlanNode& node) { } _table_id = node.derive_node().update_node().table_id(); _global_index_id = _table_id; + _row_ttl_duration = node.derive_node().update_node().row_ttl_duration(); + DB_DEBUG("_row_ttl_duration:%ld", _row_ttl_duration); _primary_slots.clear(); _primary_slots.reserve(node.derive_node().update_node().primary_slots_size()); for (auto& slot : node.derive_node().update_node().primary_slots()) { diff --git a/src/expr/expr_node.cpp b/src/expr/expr_node.cpp index 9ecdbc91..42044f13 100644 --- a/src/expr/expr_node.cpp +++ b/src/expr/expr_node.cpp @@ -341,6 +341,93 @@ void ExprNode::or_node_optimize(ExprNode** root) { return; } +void ExprNode::like_node_optimize(ExprNode** root, std::vector& new_exprs) { + if (*root == nullptr) { + return; + } + if ((*root)->node_type() != pb::LIKE_PREDICATE) { + return; + } + auto expr = *root; + SlotRef* slot = (SlotRef*)expr->children(0); + if (slot->col_type() != pb::STRING) { + return; + } + if (expr->children(1)->is_constant()) { + expr->children(1)->open(); + } else { + return; + } + bool is_eq = false; + bool is_prefix = false; + ExprValue prefix_value(pb::STRING); + static_cast(expr)->hit_index(&is_eq, &is_prefix, &(prefix_value.str_val)); + std::string old_val = expr->children(1)->get_value(nullptr).get_string(); + if (!is_prefix || old_val.length() > prefix_value.str_val.length() + 1) { + return; + } + if (is_eq) { + ScalarFnCall * eqexpr = new ScalarFnCall(); + SlotRef *sloteq = slot->clone(); + Literal *eqval = new Literal(prefix_value); + pb::ExprNode node; + node.set_node_type(pb::FUNCTION_CALL); + node.set_col_type(pb::BOOL); + pb::Function* func = node.mutable_fn(); + func->set_name("eq_string_string"); + func->set_fn_op(parser::FT_EQ); + eqexpr->init(node); + eqexpr->set_is_constant(false); + eqexpr->add_child(sloteq); + eqexpr->add_child(eqval); + *root = eqexpr; + ExprNode::destroy_tree(expr); + return ; + } else if (is_prefix) { + ScalarFnCall *geexpr = new ScalarFnCall(); + SlotRef *slotge = slot->clone(); + Literal *geval = new Literal(prefix_value); + pb::ExprNode node; + node.set_node_type(pb::FUNCTION_CALL); + node.set_col_type(pb::BOOL); + pb::Function* func = node.mutable_fn(); + func->set_name("ge_string_string"); + func->set_fn_op(parser::FT_GE); + geexpr->init(node); + geexpr->set_is_constant(false); + geexpr->add_child(slotge); + geexpr->add_child(geval); + *root = geexpr; + + ScalarFnCall *ltexpr = new ScalarFnCall(); + SlotRef* ltslot = slot->clone(); + ExprValue end_val = prefix_value; + int i = end_val.str_val.length() - 1; + for (; i >= 0; i --) { + uint8_t c = end_val.str_val[i]; + if (c == 255) { + continue; + } + end_val.str_val[i] = char(c + 1); + break; + } + end_val.str_val = end_val.str_val.substr(0, i + 1); + Literal *ltval = new Literal(end_val); + pb::ExprNode ltnode; + ltnode.set_node_type(pb::FUNCTION_CALL); + ltnode.set_col_type(pb::BOOL); + func = ltnode.mutable_fn(); + func->set_name("lt_string_string"); + func->set_fn_op(parser::FT_LT); + ltexpr->init(ltnode); + ltexpr->set_is_constant(false); + ltexpr->add_child(ltslot); + ltexpr->add_child(ltval); + new_exprs.push_back(ltexpr); + ExprNode::destroy_tree(expr); + } +} + int ExprNode::create_expr_node(const pb::ExprNode& node, ExprNode** expr_node) { switch (node.node_type()) { case pb::SLOT_REF: diff --git a/src/expr/fn_manager.cpp b/src/expr/fn_manager.cpp index b9bd0e61..d7d62b9b 100644 --- a/src/expr/fn_manager.cpp +++ b/src/expr/fn_manager.cpp @@ -124,6 +124,7 @@ void FunctionManager::register_operators() { register_object_ret("least", least, pb::DOUBLE); register_object_ret("ceil", ceil, pb::INT64); register_object_ret("ceiling", ceil, pb::INT64); + register_object_ret("bit_count", bit_count, pb::INT64); // str funcs register_object_ret("length", length, pb::INT64); @@ -154,6 +155,11 @@ void FunctionManager::register_operators() { register_object_ret("rpad", rpad, pb::STRING); register_object_ret("instr", instr, pb::INT32); register_object_ret("json_extract", json_extract, pb::STRING); + register_object_ret("json_extract1", json_extract1, pb::STRING); + register_object_ret("json_type", json_type, pb::STRING); + register_object_ret("json_array", json_array, pb::STRING); + register_object_ret("json_object", json_object, pb::STRING); + register_object_ret("json_valid", json_valid, pb::BOOL); register_object_ret("export_set", export_set, pb::STRING); register_object_ret("to_base64", to_base64, pb::STRING); register_object_ret("from_base64", from_base64, pb::STRING); @@ -170,7 +176,7 @@ void FunctionManager::register_operators() { register_object_ret("quote", quote, pb::STRING); register_object_ret("char", func_char, pb::STRING); register_object_ret("soundex", soundex, pb::STRING); - + register_object_ret("setrange", setrange, pb::STRING); // date funcs register_object_ret("unix_timestamp", unix_timestamp, pb::INT64); @@ -231,6 +237,8 @@ void FunctionManager::register_operators() { register_object_ret("timestamp_to_tso", timestamp_to_tso, pb::INT64); register_object_ret("to_days", to_days, pb::INT64); register_object_ret("to_seconds", to_seconds, pb::INT64); + register_object_ret("timeseq", timeseq, pb::INT64); + register_object_ret("timeseq_to_str", timeseq_to_str, pb::STRING); // hll funcs register_object_ret("hll_add", hll_add, pb::HLL); register_object_ret("hll_merge", hll_merge, pb::HLL); @@ -287,6 +295,7 @@ void FunctionManager::register_operators() { register_object_ret("version", version, pb::STRING); register_object_ret("last_insert_id", last_insert_id, pb::INT64); + register_object_ret("last_value", last_value, pb::STRING); // register_object_ret("point_distance", point_distance, pb::INT64); register_object_ret("cast_to_date", cast_to_date, pb::DATE); @@ -296,6 +305,16 @@ void FunctionManager::register_operators() { register_object_ret("cast_to_signed", cast_to_signed, pb::INT64); register_object_ret("cast_to_unsigned", cast_to_unsigned, pb::INT64); register_object_ret("cast_to_double", cast_to_double, pb::DOUBLE); + + // redis bitmap funcs + register_object_ret("bset", bset, pb::STRING); + register_object_ret("band", band, pb::STRING); + register_object_ret("bor", bor, pb::STRING); + register_object_ret("bxor", bxor, pb::STRING); + register_object_ret("bnot", bnot, pb::STRING); + register_object_ret("bget", bget, pb::INT64); + register_object_ret("bpos", bpos, pb::INT64); + register_object_ret("bcount", bcount, pb::INT64); } int FunctionManager::init() { diff --git a/src/expr/internal_functions.cpp b/src/expr/internal_functions.cpp index fe6776ab..479483f6 100644 --- a/src/expr/internal_functions.cpp +++ b/src/expr/internal_functions.cpp @@ -15,6 +15,7 @@ #include "internal_functions.h" #include #include +#include #include #include "hll_common.h" #include "datetime.h" @@ -328,6 +329,21 @@ ExprValue bit_length(const std::vector& input) { tmp._u.uint32_val = input[0].get_string().size() * 8; return tmp; } +ExprValue bit_count(const std::vector& input) { + if (input.size() != 1 || input[0].is_null()) { + return ExprValue::Null(); + } + ExprValue tmp = input[0]; + tmp.cast_to(pb::UINT64); + ExprValue res(pb::INT64); + while (tmp._u.uint64_val) { + if (tmp._u.uint64_val & 1) { + res._u.int64_val += 1; + } + tmp._u.uint64_val >>= 1; + } + return res; +} ExprValue lower(const std::vector& input) { if (input.size() == 0 || input[0].is_null()) { @@ -833,6 +849,158 @@ ExprValue json_extract(const std::vector& input) { return tmp; } +ExprValue json_extract1(const std::vector& input) { + if (input.size() != 2) { + return ExprValue::Null(); + } + + for (auto s : input) { + if (s.is_null()) { + return ExprValue::Null(); + } + } + std::string json_str = input[0].get_string(); + std::string path = input[1].get_string(); + if (path.length() > 0 && path[0] == '$') { + path.erase(path.begin()); + } else { + return ExprValue::Null(); + } + std::replace(path.begin(), path.end(), '.', '/'); + std::replace(path.begin(), path.end(), '[', '/'); + path.erase(std::remove(path.begin(), path.end(), ']'), path.end()); + + rapidjson::Document doc; + try { + doc.Parse<0>(json_str.c_str()); + if (doc.HasParseError()) { + rapidjson::ParseErrorCode code = doc.GetParseError(); + DB_WARNING("parse json_str error [code:%d][%s]", code, json_str.c_str()); + return ExprValue::Null(); + } + + } catch (...) { + DB_WARNING("parse json_str error [%s]", json_str.c_str()); + return ExprValue::Null(); + } + rapidjson::Pointer pointer(path.c_str()); + if (!pointer.IsValid()) { + DB_WARNING("invalid path: [%s]", path.c_str()); + return ExprValue::Null(); + } + + const rapidjson::Value *pValue = rapidjson::GetValueByPointer(doc, pointer); + if (pValue == nullptr) { + DB_WARNING("the path: [%s] does not exist in doc [%s]", path.c_str(), json_str.c_str()); + return ExprValue::Null(); + } + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + // TODO type on fly + ExprValue tmp(pb::STRING); + /* + if (pValue->IsString()) { + tmp.str_val = pValue->GetString(); + } else if (pValue->IsInt()) { + tmp.str_val = std::to_string(pValue->GetInt()); + } else if (pValue->IsInt64()) { + tmp.str_val = std::to_string(pValue->GetInt64()); + } else if (pValue->IsUint()) { + tmp.str_val = std::to_string(pValue->GetUint()); + } else if (pValue->IsUint64()) { + tmp.str_val = std::to_string(pValue->GetUint64()); + } else if (pValue->IsDouble()) { + tmp.str_val = std::to_string(pValue->GetDouble()); + } else if (pValue->IsFloat()) { + tmp.str_val = std::to_string(pValue->GetFloat()); + } else if (pValue->IsBool()) { + tmp.str_val = std::to_string(pValue->GetBool()); + } + */ + pValue->Accept(writer); + tmp.str_val = buffer.GetString(); + return tmp; +} + +ExprValue json_type(const std::vector& input) { + if (input.size() != 1) { + return ExprValue::Null(); + } + ExprValue res(pb::STRING); + if (input[0].is_int()) { + res.str_val = "INTEGER"; + } else if (input[0].is_double()) { + res.str_val = "DOUBLE"; + } else if (input[0].is_bool()) { + res.str_val = "BOOLEAN"; + } else if (input[0].is_null()) { + res.str_val = "NULL"; + } else if (input[0].is_string()) { + rapidjson::Document root; + root.Parse<0>(input[0].str_val.c_str()); + if (root.IsObject()) { + res.str_val = "OBJECT"; + } else if (root.IsArray()) { + res.str_val = "ARRAY"; + } else { + res.str_val = "STRING"; + } + } else { + return ExprValue::Null(); + } + return res; +} + +ExprValue json_array(const std::vector& input) { + if (input.size() < 1) { + return ExprValue::Null(); + } + rapidjson::Document list; + list.SetArray(); + for (size_t i = 0; i < input.size(); i ++) { + list.PushBack(rapidjson::StringRef(input[i].get_string().c_str()), list.GetAllocator()); + } + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + list.Accept(writer); + ExprValue res(pb::STRING); + res.str_val = buffer.GetString(); + return res; +} + +ExprValue json_object(const std::vector& input) { + if (input.size() < 1 || input.size() & 1) { + return ExprValue::Null(); + } + rapidjson::Document obj; + obj.SetObject(); + // TODO 相同的key会重复 + for (size_t i = 0; i < input.size() ; i += 2) { + obj.AddMember(rapidjson::StringRef(input[i].get_string().c_str()), rapidjson::StringRef(input[i + 1].get_string().c_str()), obj.GetAllocator()); + } + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + obj.Accept(writer); + ExprValue res(pb::STRING); + res.str_val = buffer.GetString(); + return res; +} + +ExprValue json_valid(const std::vector& input) { + if (input.size() != 1) { + return ExprValue::Null(); + } + if (input[0].type != pb::JSON && input[0].type != pb::STRING) { + return ExprValue::Null(); + } + rapidjson::Document obj; + obj.Parse<0>(input[0].str_val.c_str()); + if (obj.HasParseError()) { + return ExprValue::False(); + } + return ExprValue::True(); +} + ExprValue substring_index(const std::vector& input) { if (input.size() != 3) { return ExprValue::Null(); @@ -885,6 +1053,32 @@ ExprValue substring_index(const std::vector& input) { return tmp; } +ExprValue setrange(const std::vector& input) { + if (input.size() != 3) { + return ExprValue::Null(); + } + ExprValue tmp(pb::STRING); + std::string str = input[0].get_string(); + int64_t offset = input[1].get_numberic(); + std::string value = input[2].get_string(); + if (offset < 0 || offset > UINT16_MAX) { + return ExprValue::Null(); + } + if (offset > str.length()) { + tmp.str_val = str; + tmp.str_val += std::string(offset - str.length(), '\x00'); + tmp.str_val += value; + return tmp; + } + tmp.str_val.append(str.begin(), str.begin() + offset); + tmp.str_val += value; + if (offset + value.length() >= str.length()) { + return tmp; + } + tmp.str_val.append(str.begin() + offset + value.length(), str.end()); + return tmp; +} + ExprValue unix_timestamp(const std::vector& input) { ExprValue tmp(pb::UINT32); if (input.size() == 0) { @@ -1835,6 +2029,42 @@ ExprValue timestamp_to_tso(const std::vector& input) { tmp._u.int64_val = timestamp_to_ts(arg_timestamp._u.uint32_val); return tmp; } +static const uint64_t MAX_TIME_SERIES_US = 4573968371548160000; // 等于 str_to_date("5000","") + 0 +ExprValue timeseq(const std::vector& input) { + if (input.size() > 1) { + return ExprValue::Null(); + } + bool is_reverse = false; + if (input.size() == 1 && !(input[0].is_null())) { + ExprValue arg1 = input[0]; + is_reverse = arg1.cast_to(pb::INT32)._u.int32_val < 0; + } + ExprValue tmp(pb::UINT64); + if (is_reverse) { + tmp._u.uint64_val = MAX_TIME_SERIES_US - ExprValue::Now(6)._u.uint64_val; + } else { + tmp._u.uint64_val = MAX_TIME_SERIES_US + ExprValue::Now(6)._u.uint64_val; + } + return tmp; +} + +ExprValue timeseq_to_str(const std::vector& input) { + if (input.size() != 1 || input[0].is_null()) { + return ExprValue::Null(); + } + ExprValue tmp(pb::STRING); + ExprValue arg1 = input[0]; + arg1.cast_to(pb::UINT64); + int64_t val = arg1._u.uint64_val - MAX_TIME_SERIES_US; + if (val < 0) { + tmp.str_val += "-"; + val = -val; + } + arg1._u.uint64_val = val; + arg1.cast_to(pb::DATETIME); + tmp.str_val += arg1.get_string(); + return tmp; +} ExprValue hll_add(const std::vector& input) { if (input.size() == 0) { @@ -2431,6 +2661,13 @@ ExprValue last_insert_id(const std::vector& input) { ExprValue tmp = input[0]; return tmp.cast_to(pb::INT64); } +ExprValue last_value(const std::vector& input) { + if (input.size() == 0) { + return ExprValue::Null(); + } + ExprValue tmp = input[0]; + return tmp.cast_to(pb::STRING); +} ExprValue point_distance(const std::vector& input) { if (input.size() < 4) { @@ -2951,7 +3188,214 @@ ExprValue soundex(const std::vector& input) { res.str_val = code; return res; } +#define NBBY 8 +#define SETBIT(a,i) ((a)[(i)/NBBY] |= 1<<(NBBY-1-(i)%NBBY)) +#define CLRBIT(a,i) ((a)[(i)/NBBY] &= ~(1<<(NBBY-1-(i)%NBBY))) +#define ISSET(a,i) ((a)[(i)/NBBY] & (1<<(NBBY-1-(i)%NBBY))) +#define ISCLR(a,i) (((a)[(i)/NBBY] & (1<<(NBBY-1-(i)%NBBY))) == 0) + +ExprValue bset(const std::vector& input) { + if (input.size() != 3) { + return ExprValue::Null(); + } + ExprValue value = input[0]; + if (value.is_null()) { + value.str_val.clear(); + value.type = pb::STRING; + } + value.cast_to(pb::STRING); + ExprValue offset = input[1]; + offset.cast_to(pb::UINT64); + ExprValue bit = input[2]; + bit.cast_to(pb::BOOL); + int n = offset._u.uint64_val/NBBY + 1 - value.str_val.length(); + if (n > 0) { + value.str_val.append(n ,'\0'); + } + if (bit._u.bool_val) { + SETBIT(value.str_val, offset._u.uint64_val); + } else { + CLRBIT(value.str_val, offset._u.uint64_val); + } + return value; +} +ExprValue bget(const std::vector& input) { + if (input.size() != 2) { + return ExprValue::Null(); + } + ExprValue ret(pb::UINT64); + ExprValue value = input[0]; + if (value.is_null()) { + value.str_val.clear(); + value.type = pb::STRING; + } + value.cast_to(pb::STRING); + ExprValue offset = input[1]; + offset.cast_to(pb::UINT64); + int n = offset._u.uint64_val/NBBY + 1 - value.str_val.length(); + if (n > 0) { + ret._u.uint64_val = 0; + } + ret._u.uint64_val = ISSET(value.str_val, offset._u.uint64_val) ? 1 : 0; + return ret; +} +ExprValue band(const std::vector& input) { + if (input.size() != 2) { + return ExprValue::Null(); + } + ExprValue ret(pb::STRING); + ExprValue v1 = input[0]; + v1.cast_to(pb::STRING); + ExprValue v2 = input[1]; + v2.cast_to(pb::STRING); + + std::string& s1 = v1.str_val; + std::string& s2 = v2.str_val; + if (s1.size() < s2.size()) { + s1.append(s1.size() - s2.size(),'\0'); + } else { + s2.append(s2.size() - s1.size(),'\0'); + } + for (size_t i = 0; i < s1.size(); i++) { + ret.str_val.push_back(s1[i] & s2[i]); + } + return ret; +} +ExprValue bor(const std::vector& input) { + if (input.size() != 2) { + return ExprValue::Null(); + } + ExprValue ret(pb::STRING); + ExprValue v1 = input[0]; + v1.cast_to(pb::STRING); + ExprValue v2 = input[1]; + v2.cast_to(pb::STRING); + + std::string& s1 = v1.str_val; + std::string& s2 = v2.str_val; + if (s1.size() < s2.size()) { + s1.append(s1.size() - s2.size(),'\0'); + } else { + s2.append(s2.size() - s1.size(),'\0'); + } + for (size_t i = 0; i < s1.size(); i++) { + ret.str_val.push_back(s1[i] | s2[i]); + } + return ret; +} +ExprValue bxor(const std::vector& input) { + if (input.size() != 2) { + return ExprValue::Null(); + } + ExprValue ret(pb::STRING); + ExprValue v1 = input[0]; + v1.cast_to(pb::STRING); + ExprValue v2 = input[1]; + v2.cast_to(pb::STRING); + std::string& s1 = v1.str_val; + std::string& s2 = v2.str_val; + if (s1.size() < s2.size()) { + s1.append(s1.size() - s2.size(),'\0'); + } else { + s2.append(s2.size() - s1.size(),'\0'); + } + for (size_t i = 0; i < s1.size(); i++) { + ret.str_val.push_back(s1[i] ^ s2[i]); + } + return ret; +} +ExprValue bnot(const std::vector& input) { + if (input.size() != 1) { + return ExprValue::Null(); + } + ExprValue ret(pb::STRING); + ExprValue v = input[0]; + v.cast_to(pb::STRING); + std::string& s = v.str_val; + for (size_t i = 0; i < s.size(); i++) { + ret.str_val.push_back(~s[i]); + } + return ret; +} +ExprValue bpos(const std::vector& input) { + if (input.size() != 2 && input.size() != 4 && input.size() != 5) { + return ExprValue::Null(); + } + ExprValue ret(pb::INT64); + ExprValue value = input[0]; + if (value.is_null()) { + value.str_val.clear(); + value.type = pb::STRING; + } + value.cast_to(pb::STRING); + ExprValue bit = input[1]; + bit.cast_to(pb::BOOL); + std::string& d = value.str_val; + + int64_t off = 0; + int64_t end = d.size() * NBBY; + int STEP = NBBY; + if (input.size() == 5 && to_lower(input[4].get_string()) == "bit") { + STEP = 1; + } + if (input.size() >= 4) { + ExprValue s = input[2]; + off = s.cast_to(pb::INT64)._u.int64_val * STEP; + ExprValue e = input[3]; + end = std::min(end, (e.cast_to(pb::INT64)._u.int64_val + 1) * STEP); + } + if (bit._u.bool_val) { + for (; off < end; off++) { + if (ISSET(d, off)) { + ret._u.int64_val = off; + return ret; + } + } + ret._u.int64_val = -1; + } else { + for (; off < end; off++) { + if (ISCLR(d, off)) { + ret._u.int64_val = off; + return ret; + } + } + ret._u.int64_val = (input.size() == 4) ? -1 : end; + } + return ret; +} +ExprValue bcount(const std::vector& input) { + if (input.size() != 1 && input.size() != 3 && input.size() != 4) { + return ExprValue::Null(); + } + ExprValue ret(pb::INT64); + + ExprValue value = input[0]; + if (value.is_null()) { + value.str_val.clear(); + value.type = pb::STRING; + } + value.cast_to(pb::STRING); + std::string& d = value.str_val; + + int64_t off = 0; + int64_t end = d.size() * NBBY;int STEP = NBBY; + if (input.size() == 4 && to_lower(input[3].get_string()) == "bit") { + STEP = 1; + } + if (input.size() >= 3) { + ExprValue s = input[1]; + off = s.cast_to(pb::INT64)._u.int64_val * STEP; + ExprValue e = input[2]; + end = std::min(end, (e.cast_to(pb::INT64)._u.int64_val + 1) * STEP); + } + for (; off < end; off++) { + if (ISSET(d, off)) { + ret._u.int64_val++; + } + } + return ret; +} } /* vim: set ts=4 sw=4 sts=4 tw=100 */ diff --git a/src/logical_plan/ddl_planner.cpp b/src/logical_plan/ddl_planner.cpp index bfb5b1c4..110202e4 100644 --- a/src/logical_plan/ddl_planner.cpp +++ b/src/logical_plan/ddl_planner.cpp @@ -1899,6 +1899,9 @@ pb::PrimitiveType DDLPlanner::to_baikal_type(parser::FieldType* field_type) { case parser::MYSQL_TYPE_TDIGEST: { return pb::TDIGEST; } break; + case parser::MYSQL_TYPE_JSON: { + return pb::JSON; + } break; default : { DB_WARNING("unsupported item type: %d", field_type->type); return pb::INVALID_TYPE; diff --git a/src/logical_plan/ddl_work_planner.cpp b/src/logical_plan/ddl_work_planner.cpp index 2d14d326..1e8e295f 100755 --- a/src/logical_plan/ddl_work_planner.cpp +++ b/src/logical_plan/ddl_work_planner.cpp @@ -198,18 +198,12 @@ std::unique_ptr DDLWorkPlanner::create_scan_node() { _pos_index.Clear(); _pos_index.set_index_id(_table_id); auto range_index = _pos_index.add_ranges(); + _pos_index.set_left_field_cnt(_field_num); + _pos_index.set_left_open(true); if (_start_key != "") { range_index->set_left_key(_start_key); range_index->set_left_full(_ddl_pk_key_is_full); - range_index->set_left_field_cnt(_field_num); - range_index->set_left_open(true); } - // 暂时用不上 - // if (_end_key != "") { - // range_index->set_right_pb_record(_end_key); - // range_index->set_right_field_cnt(_field_num); - // range_index->set_right_open(true); - // } if (!_is_global_index) { if (_is_column_ddl) { pb_scan_node->set_ddl_work_type(pb::DDL_COLUMN); diff --git a/src/logical_plan/insert_planner.cpp b/src/logical_plan/insert_planner.cpp index 8bd0bccf..a69fcb55 100644 --- a/src/logical_plan/insert_planner.cpp +++ b/src/logical_plan/insert_planner.cpp @@ -36,7 +36,7 @@ int InsertPlanner::plan() { insert->set_need_ignore(_insert_stmt->is_ignore); insert->set_is_replace(_insert_stmt->is_replace); insert->set_is_merge(_insert_stmt->is_merge); - if (_ctx->row_ttl_duration > 0) { + if (_ctx->row_ttl_duration > 0 || _ctx->row_ttl_duration == -1) { insert->set_row_ttl_duration(_ctx->row_ttl_duration); DB_DEBUG("row_ttl_duration: %ld", _ctx->row_ttl_duration); } diff --git a/src/logical_plan/logical_planner.cpp b/src/logical_plan/logical_planner.cpp index 7a0b9655..a1ae3881 100644 --- a/src/logical_plan/logical_planner.cpp +++ b/src/logical_plan/logical_planner.cpp @@ -542,50 +542,29 @@ int LogicalPlanner::generate_sql_sign(QueryContext* ctx, parser::StmtNode* stmt) pb::DerivePlanNode* derive = node->mutable_derive_node(); pb::ScanNode* scan = derive->mutable_scan_node(); int64_t table_id = scan->table_id(); - if (table_index_map.count(table_id) > 0) { - for (auto& index_name: table_index_map[table_id]) { - int64_t index_id = 0; - auto ret = _factory->get_index_id(table_id, index_name, index_id); - if (ret != 0) { - DB_WARNING("index_name: %s in table:%s not exist", index_name.c_str(), - _factory->get_table_info_ptr(table_id)->name.c_str()); - continue; - } - scan->add_force_indexes(index_id); - } - } - } - } - } - } - if (!ctx->sign_forceindex.empty()) { - if (ctx->sign_forceindex.count(stat_info->sign) > 0) { - auto& table_index_map = ctx->sign_forceindex[stat_info->sign]; - for (int i = 0; i < ctx->plan.nodes_size(); i++) { - auto node = ctx->plan.mutable_nodes(i); - if (node->node_type() != pb::SCAN_NODE) { - continue; - } - pb::DerivePlanNode* derive = node->mutable_derive_node(); - pb::ScanNode* scan = derive->mutable_scan_node(); - int64_t table_id = scan->table_id(); - if (table_index_map.count(table_id) > 0) { - for (auto& index_name: table_index_map[table_id]) { - int64_t index_id = 0; - auto ret = _factory->get_index_id(table_id, index_name, index_id); - if (ret != 0) { - DB_WARNING("index_name: %s in table:%s not exist", index_name.c_str(), - _factory->get_table_info_ptr(table_id)->name.c_str()); - continue; + + if (table_index_map.count(table_id) > 0) { + for (auto& index_name: table_index_map[table_id]) { + int64_t index_id = 0; + auto ret = _factory->get_index_id(table_id, index_name, index_id); + if (ret != 0) { + DB_WARNING("index_name: %s in table:%s not exist", index_name.c_str(), + _factory->get_table_info_ptr(table_id)->name.c_str()); + continue; + } + auto indexInfoPtr = _factory->get_index_info_ptr(index_id); + if (indexInfoPtr == nullptr || indexInfoPtr->state != pb::IS_PUBLIC || + indexInfoPtr->index_hint_status != pb::IHS_NORMAL) { + continue; + } + scan->add_force_indexes(index_id); } - scan->add_force_indexes(index_id); } } } } } - return 0; } @@ -634,19 +613,19 @@ int LogicalPlanner::gen_subquery_plan(parser::DmlNode* subquery, const SmartPlan } } _cur_sub_ctx->expr_params.row_filed_number = planner->select_names().size(); - int ret = _cur_sub_ctx->create_plan_tree(); - if (ret < 0) { - DB_WARNING("Failed to pb_plan to execnode"); - return -1; - } _ctx->set_kill_ctx(_cur_sub_ctx); auto stat_info = &(_cur_sub_ctx->stat_info); - ret = generate_sql_sign(_cur_sub_ctx.get(), subquery); + int ret = generate_sql_sign(_cur_sub_ctx.get(), subquery); if (ret < 0) { return -1; } auto& client_conn = _ctx->client_conn; client_conn->insert_subquery_sign(stat_info->sign); + ret = _cur_sub_ctx->create_plan_tree(); + if (ret < 0) { + DB_WARNING("Failed to pb_plan to execnode"); + return -1; + } return 0; } @@ -782,20 +761,6 @@ int LogicalPlanner::add_table(const std::string& database, const std::string& ta } } - _ctx->sign_blacklist.insert(tbl_ptr->sign_blacklist.begin(), tbl_ptr->sign_blacklist.end()); - _ctx->sign_forcelearner.insert(tbl_ptr->sign_forcelearner.begin(), tbl_ptr->sign_forcelearner.end()); - for (auto& sign_index : tbl_ptr->sign_forceindex) { - std::vector vec; - boost::split(vec, sign_index, boost::is_any_of(":")); - if (vec.size() != 2) { - continue; - } - uint64_t sign_num = strtoull(vec[0].c_str(), nullptr, 10); - auto& table_index_map = _ctx->sign_forceindex[sign_num]; - auto& force_index_set = table_index_map[tableid]; - force_index_set.insert(vec[1]); - } - // 通用降级路由 // 复杂sql(join和子查询)不降级 if (MetaServerInteract::get_backup_instance()->is_inited() && tbl_ptr->have_backup && !_ctx->is_complex && @@ -882,6 +847,22 @@ int LogicalPlanner::add_table(const std::string& database, const std::string& ta _partition_names.clear(); } _ctx->stat_info.table_id = tableid; + auto tbl_ptr = _factory->get_table_info_ptr(tableid); + _ctx->sign_blacklist.insert(tbl_ptr->sign_blacklist.begin(), tbl_ptr->sign_blacklist.end()); + _ctx->sign_forcelearner.insert(tbl_ptr->sign_forcelearner.begin(), tbl_ptr->sign_forcelearner.end()); + for (auto& sign_index : tbl_ptr->sign_forceindex) { + std::vector vec; + boost::split(vec, sign_index, boost::is_any_of(":")); + if (vec.size() != 2) { + continue; + } + uint64_t sign_num = strtoull(vec[0].c_str(), nullptr, 10); + auto& table_index_map = _ctx->sign_forceindex[sign_num]; + auto& force_index_set = table_index_map[tableid]; + force_index_set.insert(vec[1]); + } + + ScanTupleInfo* tuple_info = get_scan_tuple(alias_full_name, tableid); _ctx->current_tuple_ids.emplace(tuple_info->tuple_id); _ctx->current_table_tuple_ids.emplace(tuple_info->tuple_id); @@ -1683,6 +1664,14 @@ int LogicalPlanner::create_scala_func_expr(const parser::FuncExpr* item, node->mutable_derive_node()->set_int_val(_ctx->client_conn->last_insert_id); return 0; } + if (lower_fn_name == "last_value" && item->children.size() == 0) { + pb::ExprNode* node = expr.add_nodes(); + node->set_node_type(pb::STRING_LITERAL); + node->set_col_type(pb::STRING); + node->set_num_children(0); + node->mutable_derive_node()->set_string_val(_ctx->client_conn->last_value); + return 0; + } if (lower_fn_name == "database" || lower_fn_name == "schema") { pb::ExprNode* node = expr.add_nodes(); @@ -2832,6 +2821,7 @@ int LogicalPlanner::create_sort_node() { sort->add_is_null_first(_order_ascs[idx]); } sort->set_tuple_id(_order_tuple_id); + return 0; } int LogicalPlanner::create_join_and_scan_nodes(JoinMemTmp* join_root, ApplyMemTmp* apply_root) { diff --git a/src/logical_plan/prepare_planner.cpp b/src/logical_plan/prepare_planner.cpp index f124fd9b..90288e75 100644 --- a/src/logical_plan/prepare_planner.cpp +++ b/src/logical_plan/prepare_planner.cpp @@ -18,6 +18,7 @@ #include "insert_planner.h" #include "delete_planner.h" #include "update_planner.h" +#include "union_planner.h" #include "transaction_planner.h" #include "exec_node.h" #include "packet_node.h" @@ -188,6 +189,9 @@ int PreparePlanner::stmt_prepare(const std::string& stmt_name, const std::string case parser::NT_DELETE: planner.reset(new DeletePlanner(prepare_ctx.get())); break; + case parser::NT_UNION: + planner.reset(new UnionPlanner(prepare_ctx.get())); + break; default: DB_WARNING("un-supported prepare command type: %d", prepare_ctx->stmt_type); return -1; @@ -210,6 +214,9 @@ int PreparePlanner::stmt_prepare(const std::string& stmt_name, const std::string return -1; } prepare_ctx->root->find_place_holder(prepare_ctx->placeholders); + for (auto sub_query_ctx : prepare_ctx->sub_query_plans) { + sub_query_ctx->root->find_place_holder(prepare_ctx->placeholders); + } /* // 包括类型推导与常量表达式计算 ret = ExprOptimize().analyze(prepare_ctx.get()); @@ -242,8 +249,8 @@ int PreparePlanner::stmt_execute(const std::string& stmt_name, std::vectorcopy_query_context(prepare_ctx.get()); auto* p_placeholders = &prepare_ctx->placeholders; - if (!prepare_ctx->is_select) { - // TODO dml的plan复用 + // TODO dml的plan复用 + if (!prepare_ctx->is_select || prepare_ctx->sub_query_plans.size() > 0) { // enable_2pc=true or table has global index need generate txn_id set_dml_txn_state(prepare_ctx->prepared_table_id); _ctx->plan.CopyFrom(prepare_ctx->plan); @@ -258,6 +265,15 @@ int PreparePlanner::stmt_execute(const std::string& stmt_name, std::vectorroot->find_place_holder(_ctx->placeholders); + for (auto sub_query_ctx : prepare_ctx->sub_query_plans) { + int ret = sub_query_ctx->create_plan_tree(); + if (ret < 0) { + DB_WARNING("Failed to pb_plan to execnode"); + return -1; + } + _ctx->add_sub_ctx(sub_query_ctx); + sub_query_ctx->root->find_place_holder(_ctx->placeholders); + } p_placeholders = &_ctx->placeholders; } if (p_placeholders == nullptr) { diff --git a/src/logical_plan/query_context.cpp b/src/logical_plan/query_context.cpp index 13ce6c51..18304f1b 100644 --- a/src/logical_plan/query_context.cpp +++ b/src/logical_plan/query_context.cpp @@ -80,6 +80,11 @@ int QueryContext::copy_query_context(QueryContext* p_query_ctx) { stat_info.sample_sql << p_query_ctx->stat_info.sample_sql.str(); need_learner_backup = p_query_ctx->need_learner_backup; use_backup = p_query_ctx->use_backup; + has_derived_table = p_query_ctx->has_derived_table; + derived_table_ctx_mapping.insert(p_query_ctx->derived_table_ctx_mapping.begin(), + p_query_ctx->derived_table_ctx_mapping.end()); + slot_column_mapping.insert(p_query_ctx->slot_column_mapping.begin(), + p_query_ctx->slot_column_mapping.end()); // runtime state if (p_query_ctx->is_select) { diff --git a/src/logical_plan/select_planner.cpp b/src/logical_plan/select_planner.cpp index 4270d150..bfc3285f 100644 --- a/src/logical_plan/select_planner.cpp +++ b/src/logical_plan/select_planner.cpp @@ -96,6 +96,10 @@ int SelectPlanner::plan() { return -1; } + if (0 != minmax_remove()) { + return -1; + } + if (_ctx->is_base_subscribe) { if (0 != get_base_subscribe_scan_ref_slot()) { return -1; @@ -342,6 +346,65 @@ void SelectPlanner::get_slot_column_mapping() { } } +int SelectPlanner::minmax_remove() { + if (!_distinct_agg_funcs.empty() || ! _group_exprs.empty()) { + return 0; + } + if (_select_exprs.size() != 1 || _select_exprs[0].nodes(0).node_type() != pb::AGG_EXPR) { + return 0; + } + if (_group_slots.size() != 0 || _order_exprs.size() != 0 || _group_exprs.size() != 0) { + return 0; + } + pb::Expr select_expr = _select_exprs[0]; + if (select_expr.nodes_size() != 2) { + return 0; + } + if (select_expr.nodes(0).node_type() != pb::AGG_EXPR) { + return 0; + } + std::string fn_name = select_expr.nodes(0).fn().name(); + if (fn_name != "max" && fn_name != "min") { + return 0; + } + pb::ExprNode slot = select_expr.nodes(1); + if (slot.node_type() != pb::SLOT_REF) { + return 0; + } + _select_exprs.clear(); + _group_exprs.clear(); + _agg_funcs.clear(); + pb::Expr new_select; + new_select.set_database(select_expr.database()); + new_select.set_table(select_expr.table()); + auto add_node = new_select.add_nodes(); + *add_node = slot; + _select_exprs.push_back(new_select); + pb::Expr order_expr; + order_expr.set_database(select_expr.database()); + order_expr.set_table(select_expr.table()); + add_node = order_expr.add_nodes(); + *add_node = slot; + _order_exprs.push_back(order_expr); + if (fn_name == "max") { + _order_ascs.push_back(false); + } else { + _order_ascs.push_back(true); + } + _ctx->get_runtime_state()->must_have_one = true; + _limit_offset.clear_nodes(); + auto offset = _limit_offset.add_nodes(); + offset->mutable_derive_node()->set_int_val(0); + offset->set_node_type(pb::INT_LITERAL); + offset->set_col_type(pb::INT64); + _limit_count.clear_nodes(); + auto limit = _limit_count.add_nodes(); + limit->mutable_derive_node()->set_int_val(1); + limit->set_node_type(pb::INT_LITERAL); + limit->set_col_type(pb::INT64); + return 0; +} + int SelectPlanner::subquery_rewrite() { if (!_ctx->expr_params.is_expr_subquery) { return 0; @@ -427,7 +490,8 @@ void SelectPlanner::create_dual_scan_node() { } int SelectPlanner::create_limit_node() { - if (_select->limit == nullptr) { +// if (_select->limit == nullptr && + if (_limit_offset.nodes_size() == 0) { return 0; } pb::PlanNode* limit_node = _ctx->add_plan_node(); @@ -454,29 +518,15 @@ int SelectPlanner::create_limit_node() { int SelectPlanner::create_agg_node() { if (_select->select_opt != nullptr && _select->select_opt->distinct == true) { // select distinct ()xxx, xxx from xx.xx (no group by) - if (!_agg_funcs.empty() || !_distinct_agg_funcs.empty() || !_group_exprs.empty()) { - DB_WARNING("distinct query doesnot support group by"); - return -1; - } - pb::PlanNode* agg_node = _ctx->add_plan_node(); - agg_node->set_node_type(pb::AGG_NODE); - agg_node->set_limit(-1); - agg_node->set_is_explain(_ctx->is_explain); - agg_node->set_num_children(1); //TODO - pb::DerivePlanNode* derive = agg_node->mutable_derive_node(); - pb::AggNode* agg = derive->mutable_agg_node(); - - for (uint32_t idx = 0; idx < _select_exprs.size(); ++idx) { - pb::Expr* expr = agg->add_group_exprs(); - expr->CopyFrom(_select_exprs[idx]); -// if (_select_exprs[idx].nodes_size() != 1) { -// DB_WARNING("invalid distinct expr"); -// return -1; -// } -// expr->add_nodes()->CopyFrom(_select_exprs[idx].nodes(0)); + // distinct转group by + if (_group_exprs.empty()) { + for (uint32_t idx = 0; idx < _select_exprs.size(); ++idx) { + //非agg func的列加到group exprs中 + if (_select_exprs[idx].nodes(0).node_type() != pb::AGG_EXPR) { + _group_exprs.push_back(_select_exprs[idx]); + } + } } - agg->set_agg_tuple_id(-1); - return 0; } if (_agg_funcs.empty() && _distinct_agg_funcs.empty() && _group_exprs.empty()) { return 0; diff --git a/src/logical_plan/update_planner.cpp b/src/logical_plan/update_planner.cpp index 607bdc05..0028d2f0 100644 --- a/src/logical_plan/update_planner.cpp +++ b/src/logical_plan/update_planner.cpp @@ -136,6 +136,10 @@ int UpdatePlanner::create_update_node(pb::PlanNode* update_node) { auto& slot = get_scan_ref_slot(try_to_lower(_current_tables[0]), table_id, field.id, field.type); update->add_primary_slots()->CopyFrom(slot); } + if (_ctx->row_ttl_duration > 0 || _ctx->row_ttl_duration == -1) { + update->set_row_ttl_duration(_ctx->row_ttl_duration); + DB_DEBUG("row_ttl_duration: %ld", _ctx->row_ttl_duration); + } return 0; } diff --git a/src/meta_server/table_manager.cpp b/src/meta_server/table_manager.cpp index 16c6e0bb..e4ec47b5 100644 --- a/src/meta_server/table_manager.cpp +++ b/src/meta_server/table_manager.cpp @@ -1996,7 +1996,7 @@ void TableManager::rename_field(const pb::MetaManagerRequest& request, } int32_t field_id = 0; for (auto& mem_field : *mem_schema_pb.mutable_fields()) { - if (mem_field.field_name() == field.field_name()) { + if (!mem_field.deleted() && mem_field.field_name() == field.field_name()) { mem_field.set_field_name(field.new_field_name()); field_id = mem_field.field_id(); } @@ -2071,7 +2071,7 @@ void TableManager::modify_field(const pb::MetaManagerRequest& request, return; } for (auto& mem_field : *mem_schema_pb.mutable_fields()) { - if (mem_field.field_name() == field_name) { + if (!mem_field.deleted() && mem_field.field_name() == field_name) { if (field.has_mysql_type()) { if (!check_field_is_compatible_type(mem_field, field)) { // TODO 数据类型变更仅支持meta-only, 有损变更待支持 @@ -4364,7 +4364,7 @@ void TableManager::link_binlog(const pb::MetaManagerRequest& request, const int6 } if (request.table_info().has_link_field()) { for (const auto& field_info : mem_schema_pb.fields()) { - if (field_info.field_name() == request.table_info().link_field().field_name()) { + if (!field_info.deleted() && field_info.field_name() == request.table_info().link_field().field_name()) { link_field = field_info; get_field_info = true; break; diff --git a/src/physical_plan/index_selector.cpp b/src/physical_plan/index_selector.cpp index e2eac8ff..ba58d07c 100644 --- a/src/physical_plan/index_selector.cpp +++ b/src/physical_plan/index_selector.cpp @@ -102,6 +102,15 @@ int IndexSelector::analyze(QueryContext* ctx) { ctx->index_ids.insert(ret); ctx->field_range_type = field_range_type; } + + index_merge_selector(ctx->tuple_descs(), + static_cast(scan_node_ptr), + filter_node, + (join_node != NULL || agg_node != NULL) ? NULL: sort_node, + join_node, + &index_has_null, + field_range_type, + ctx->stat_info.sample_sql.str()); } return 0; } @@ -853,6 +862,70 @@ int IndexSelector::select_partition(SmartTable& table_info, ScanNode* scan_node, return 0; } +int64_t IndexSelector::index_merge_selector(const std::vector& tuple_descs, + ScanNode* scan_node, + FilterNode* filter_node, + SortNode* sort_node, + JoinNode* join_node, + bool* index_has_null, + std::map& field_range_type, + const std::string& sample_sql) { + if (join_node != nullptr) { + // join_node 暂不处理 + return 0; + } + if (filter_node == nullptr) { + return 0; + } + if (!scan_node->need_index_merge()) { + return 0; + } + uint32_t select_index_score = scan_node->select_path()->prefix_ratio_index_score; + scan_node->swap_index_info(scan_node->origin_index_info()); + std::vector& conjuncts_without_or = scan_node->conjuncts_without_or(); + std::vector& or_sub_conjuncts = scan_node->or_sub_conjuncts(); + bool use_index_merge = true; + for (auto or_sub_conjunct: or_sub_conjuncts) { + std::vector conjuncts = conjuncts_without_or; + std::vector sub_conjuncts; + or_sub_conjunct->flatten_and_expr(&sub_conjuncts); + + conjuncts.insert(conjuncts.end(), sub_conjuncts.begin(), sub_conjuncts.end()); + filter_node->modify_conjuncts(conjuncts); // filter_node's conjuncts are modified + index_selector(tuple_descs, scan_node, filter_node, sort_node, join_node, + index_has_null, field_range_type, sample_sql); // filter_node's pruned_conjuncts are modified + auto path = scan_node->select_path(); + if (scan_node->select_path()->prefix_ratio_index_score <= select_index_score && + scan_node->select_path()->prefix_ratio_index_score != UINT32_MAX) { + use_index_merge = false; + break; + } + if (!scan_node->has_index()) { + use_index_merge = false; + break; + } + bool select_index_contain_any_sub_conjunct = false; + for (auto expr : sub_conjuncts) { + if (scan_node->select_path()->need_cut_index_range_condition.count(expr) != 0) { + select_index_contain_any_sub_conjunct = true; + break; + } + } + if (!select_index_contain_any_sub_conjunct) { + use_index_merge = false; + break; + } + scan_node->add_merge_index_info(); + } + filter_node->modifiy_pruned_conjuncts_by_index(scan_node->origin_index_info()._pruned_conjuncts); // 还原filter_node + scan_node->swap_index_info(scan_node->origin_index_info()); // 还原scan_node + if (!use_index_merge) { + scan_node->clear_merge_index_info(); + } else { + scan_node->scan_indexs().clear(); // 避免scan_plan_router + } + return 0; +} } /* vim: set ts=4 sw=4 sts=4 tw=100 */ diff --git a/src/physical_plan/join_reorder.cpp b/src/physical_plan/join_reorder.cpp index 066c7119..f1236b76 100644 --- a/src/physical_plan/join_reorder.cpp +++ b/src/physical_plan/join_reorder.cpp @@ -20,17 +20,28 @@ namespace baikaldb { int JoinReorder::analyze(QueryContext* ctx) { - JoinNode* join = static_cast(ctx->root->get_node(pb::JOIN_NODE)); + return reorder(ctx, ctx->root); +} +int JoinReorder::reorder(QueryContext* ctx, ExecNode* exec_node) { + JoinNode* join = static_cast(exec_node->get_node(pb::JOIN_NODE)); if (join == nullptr) { return 0; } + int ret = 0; std::map tuple_join_child_map; // join的所有非join孩子 std::map> tuple_equals_map; // 等值条件信息 std::vector tuple_order; // 目前join顺序 std::vector conditions; // join的全部条件,reorder需要重新下推 // 获取所有信息 if (!join->need_reorder(tuple_join_child_map, tuple_equals_map, tuple_order, conditions)) { - return 0; + if (join->children_size() == 2) { + ret = reorder(ctx, join->children(0)); + if (ret != 0) { + return ret; + } + ret = reorder(ctx, join->children(1)); + } + return ret; } ScanNode* first_node = static_cast( tuple_join_child_map[tuple_order[0]]->get_node(pb::SCAN_NODE)); diff --git a/src/physical_plan/plan_router.cpp b/src/physical_plan/plan_router.cpp index 10c7c6b6..4332c894 100644 --- a/src/physical_plan/plan_router.cpp +++ b/src/physical_plan/plan_router.cpp @@ -269,7 +269,8 @@ int PlanRouter::scan_plan_router(RocksdbScanNode* scan_node, } //如果该表没有全局二级索引 //full_export+join也需要把主键放入slot - if (!schema_factory->has_global_index(main_table_id) && !is_full_export) { + // merge_index需要返回主键排重 + if (!schema_factory->has_global_index(main_table_id) && !is_full_export && !scan_node->has_merge_index()) { return 0; } bool need_put_pk = false; @@ -282,6 +283,8 @@ int PlanRouter::scan_plan_router(RocksdbScanNode* scan_node, } else if (hit_global && !covering_index) { // 如果只是索引覆盖,则不需要进行后续的操作 need_put_pk = true; + } else if (scan_node->has_merge_index()) { + need_put_pk = true; } if (!need_put_pk) { return 0; diff --git a/src/physical_plan/separate.cpp b/src/physical_plan/separate.cpp index 8870bcbb..5b32c1b7 100644 --- a/src/physical_plan/separate.cpp +++ b/src/physical_plan/separate.cpp @@ -218,11 +218,13 @@ int Separate::separate_simple_select(QueryContext* ctx, ExecNode* plan) { DB_WARNING("create manager_node failed"); return -1; } + bool has_merge_index = false; if (scan_nodes.size() > 0) { std::map region_infos = static_cast(scan_nodes[0])->region_infos(); manager_node->set_region_infos(region_infos); static_cast(scan_nodes[0])->set_related_manager_node(manager_node.get()); + has_merge_index = static_cast(scan_nodes[0])->has_merge_index(); } if (ctx->sub_query_plans.size() == 1) { @@ -258,6 +260,12 @@ int Separate::separate_simple_select(QueryContext* ctx, ExecNode* plan) { if (agg_node != nullptr) { ExecNode* parent = agg_node->get_parent(); + if (has_merge_index) { + manager_node->add_child(agg_node->children(0)); + agg_node->clear_children(); + agg_node->add_child(manager_node.release()); + return 0; + } pb::PlanNode pb_node; agg_node->transfer_pb(0, &pb_node); pb_node.set_node_type(pb::MERGE_AGG_NODE); diff --git a/src/protocol/network_server.cpp b/src/protocol/network_server.cpp index cc131dcd..d40f0688 100644 --- a/src/protocol/network_server.cpp +++ b/src/protocol/network_server.cpp @@ -45,6 +45,7 @@ DEFINE_double(backup_error_percent, 0.5, "use backup table if backup_error_perce DEFINE_int64(health_check_interval_us, 10 * 1000 * 1000, "health_check_interval_us"); DECLARE_bool(need_health_check); DEFINE_int64(health_check_store_timeout_ms, 2000, "health_check_store_timeout_ms"); +DEFINE_int32(health_check_store_concurrency, 20, "health_check_store concurrency"); DEFINE_bool(fetch_instance_id, false, "fetch baikaldb instace id, used for generate transaction id"); DEFINE_string(hostname, "HOSTNAME", "matrix instance name"); DEFINE_bool(insert_agg_sql, false, "whether insert agg_sql"); @@ -834,7 +835,7 @@ static void on_health_check_done(pb::StoreRes* response, brpc::Controller* cntl, pb::Status new_status = pb::NORMAL; if (cntl->Failed()) { if (cntl->ErrorCode() == brpc::ERPCTIMEDOUT || - cntl->ErrorCode() == ETIMEDOUT) { + cntl->ErrorCode() == ETIMEDOUT || cntl->ErrorCode() == EHOSTDOWN) { new_status = pb::DEAD; DB_WARNING("addr:%s is dead(hang), need rpc cancel, errcode:%d, error:%s", addr.c_str(), cntl->ErrorCode(), cntl->ErrorText().c_str()); @@ -941,17 +942,34 @@ void NetworkServer::process_other_heart_beat_response(const pb::BaikalOtherHeart void NetworkServer::connection_timeout_check() { auto check_func = [this]() { std::set need_cancel_addrs; + std::set logical_rooms; + std::set dead_logical_rooms; + std::map logical_room_instance_cnt; SchemaFactory* factory = SchemaFactory::get_instance(); std::unordered_map info_map; factory->get_all_instance_status(&info_map); for (auto& pair : info_map) { + logical_room_instance_cnt[pair.second.logical_room] += 1; if (pair.second.status == pb::DEAD && pair.second.need_cancel) { need_cancel_addrs.emplace(pair.first); + dead_logical_rooms.emplace(pair.second.logical_room); } + logical_rooms.emplace(pair.second.logical_room); + } + // 机房级故障判断 + bool is_logical_room_faulty = false; + if (dead_logical_rooms.size() == 1 && logical_rooms.size() > 2) { + auto logical_room = *(dead_logical_rooms.begin()); + if (need_cancel_addrs.size() <= logical_room_instance_cnt[logical_room]) { + is_logical_room_faulty = true; + } + } + if (is_logical_room_faulty) { + DB_WARNING("may be logical_room faulty, size: %lu/%lu", need_cancel_addrs.size(), info_map.size()); } //dead实例不会太多,设置个阈值,太多了则不做处理 size_t max_dead_cnt = std::min(info_map.size() / 10 + 1, (size_t)5); - if (need_cancel_addrs.size() > max_dead_cnt) { + if (need_cancel_addrs.size() > max_dead_cnt && !is_logical_room_faulty) { DB_WARNING("too many dead instance, size: %lu/%lu", need_cancel_addrs.size(), info_map.size()); need_cancel_addrs.clear(); } diff --git a/src/runtime/topn_sorter.cpp b/src/runtime/topn_sorter.cpp new file mode 100644 index 00000000..232739a8 --- /dev/null +++ b/src/runtime/topn_sorter.cpp @@ -0,0 +1,113 @@ +// Copyright (c) 2018-present Baidu, Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "topn_sorter.h" + +namespace baikaldb { + +void TopNSorter::add_batch(std::shared_ptr& batch){ + while (!batch->is_traverse_over()) { + if (_current_count < _limit) { + _mem_min_heap.push_back(std::move(batch->get_row())); + _current_count ++; + if (!_comp->need_not_compare()) { + shiftup(_current_count - 1, true); + } + } else { + auto& row = batch->get_row(); + if (!_comp->need_not_compare()) { + if (_comp->less(row.get(), _mem_min_heap[0].get())) { + _mem_min_heap[0] = std::move(row); + shiftdown(0, true); + } + } + } + batch->next(); + } +} + +int TopNSorter::get_next(RowBatch* batch, bool* eos) { + while (1) { + if (batch->is_full()) { + return 0; + } + if (!_current_count) { + *eos = true; + return 0; + } + if (_comp->need_not_compare()) { + auto row = _mem_min_heap[_current_count - 1].get(); + batch->move_row(std::unique_ptr(row)); + _current_count --; + _mem_min_heap[_current_count].reset(); + continue; + } + batch->move_row(std::move(_mem_min_heap[0])); + _current_count --; + _mem_min_heap[0] = std::move(_mem_min_heap[_current_count]); + shiftdown(0, false); + } + return 0; +} +void TopNSorter::shiftdown(size_t index, bool flag) { + size_t left_index = index * 2 + 1; + size_t right_index = left_index + 1; + if (left_index >= _current_count) { + return; + } + size_t min_index = index; + if (left_index < _current_count) { + int64_t com = _comp->compare(_mem_min_heap[left_index].get(), + _mem_min_heap[min_index].get()); + if (flag) { + com = -com; + } + if (com < 0) { + min_index = left_index; + } + } + if (right_index < _current_count) { + int64_t com = _comp->compare(_mem_min_heap[right_index].get(), + _mem_min_heap[min_index].get()); + if (flag) { + com = -com; + } + if (com < 0) { + min_index = right_index; + } + } + if (min_index != index) { + std::iter_swap(_mem_min_heap.begin() + min_index, _mem_min_heap.begin() + index); + shiftdown(min_index, flag); + } +} + +void TopNSorter::shiftup(size_t index, bool flag) { + if (index == 0) { + return; + } + size_t parent = (index - 1) / 2; + auto com = _comp->compare(_mem_min_heap[index].get(), _mem_min_heap[parent].get()); + if (flag) { + com = -com; + } + if (com < 0) { + std::iter_swap(_mem_min_heap.begin() + index, _mem_min_heap.begin() + parent); + shiftup(parent, flag); + } +} + +} + +/* vim: set ts=4 sw=4 sts=4 tw=100 */ diff --git a/src/session/network_socket.cpp b/src/session/network_socket.cpp index ea9483ee..f43d53c6 100644 --- a/src/session/network_socket.cpp +++ b/src/session/network_socket.cpp @@ -50,6 +50,7 @@ NetworkSocket::NetworkSocket() { packet_read_len = 0; is_handshake_send_partly = 0; last_insert_id = 0; + last_value = ""; has_error_packet = false; is_auth_result_send_partly = 0; query_ctx.reset(new QueryContext); diff --git a/src/store/region.cpp b/src/store/region.cpp index 7be92ee9..df20910b 100644 --- a/src/store/region.cpp +++ b/src/store/region.cpp @@ -542,7 +542,9 @@ int Region::execute_cached_cmd(const pb::StoreReq& request, pb::StoreRes& respon if (res.has_last_insert_id()) { response.set_last_insert_id(res.last_insert_id()); } - + if (res.has_extra_res() && res.extra_res().has_last_value()) { + response.mutable_extra_res()->set_last_value(res.extra_res().last_value()); + } // if this is the BEGIN cmd, we need to refresh the txn handler if (op_type == pb::OP_BEGIN && (nullptr == (txn = _txn_pool.get_txn(txn_id)))) { char errmsg[100]; @@ -2061,6 +2063,9 @@ void Region::dml_2pc(const pb::StoreReq& request, if (state.last_insert_id != INT64_MIN) { response.set_last_insert_id(state.last_insert_id); } + if (state.last_value != "") { + response.mutable_extra_res()->set_last_value((state.last_value)); + } response.set_scan_rows(state.num_scan_rows()); response.set_read_disk_size(state.read_disk_size()); response.set_errcode(pb::SUCCESS); @@ -2416,6 +2421,9 @@ void Region::dml_1pc(const pb::StoreReq& request, pb::OpType op_type, if (state.last_insert_id != INT64_MIN) { response.set_last_insert_id(state.last_insert_id); } + if (state.last_value != "") { + response.mutable_extra_res()->set_last_value((state.last_value)); + } } else { response.set_errcode(pb::EXEC_FAIL); response.set_errmsg("txn commit failed."); @@ -3267,6 +3275,9 @@ void Region::do_apply(int64_t term, int64_t index, const pb::StoreReq& request, if (res.has_last_insert_id()) { ((DMLClosure*)done)->response->set_last_insert_id(res.last_insert_id()); } + if (res.has_extra_res() && res.extra_res().has_last_value()) { + ((DMLClosure*)done)->response->mutable_extra_res()->set_last_value(res.extra_res().last_value()); + } } //DB_WARNING("dml_1pc %s", res.trace_nodes().DebugString().c_str()); break; @@ -3914,6 +3925,9 @@ void Region::apply_txn_request(const pb::StoreReq& request, braft::Closure* done if (res.has_last_insert_id()) { ((DMLClosure*)done)->response->set_last_insert_id(res.last_insert_id()); } + if (res.has_extra_res() && res.extra_res().has_last_value()) { + ((DMLClosure*)done)->response->mutable_extra_res()->set_last_value(res.extra_res().last_value()); + } } } void Region::start_split(braft::Closure* done, int64_t applied_index, int64_t term) { diff --git a/src/store/region_binlog.cpp b/src/store/region_binlog.cpp index 7cecdf17..43469fec 100755 --- a/src/store/region_binlog.cpp +++ b/src/store/region_binlog.cpp @@ -504,8 +504,13 @@ int Region::binlog_scan_when_restart() { left_record->set_value(left_record->get_field_by_tag(1), value); right_record->decode(""); - IndexRange range(left_record.get(), right_record.get(), binlog_pri.get(), binlog_pri.get(), - &_region_info, 1, 0, false, false, false); + MutTableKey left_key, right_key; + if (left_record->encode_key(*binlog_pri.get(), left_key, binlog_pri.get()->fields.size(), false, false) != 0) { + DB_FATAL("Fail to encode_key left, table:%ld", binlog_table.get()->id); + return -1; + } + left_key.set_full(true); + IndexRange range(left_key, right_key, binlog_pri.get(), binlog_pri.get(), &_region_info, 1, 0, false, false, false); std::map field_ids; std::vector field_slot; @@ -1311,8 +1316,13 @@ int64_t Region::read_data_cf_oldest_ts() { left_record->set_value(left_record->get_field_by_tag(1), value); right_record->decode(""); - IndexRange range(left_record.get(), right_record.get(), binlog_pri.get(), binlog_pri.get(), - &_region_info, 1, 0, false, false, false); + MutTableKey left_key, right_key; + if (left_record->encode_key(*binlog_pri.get(), left_key, binlog_pri.get()->fields.size(), false, false) != 0) { + DB_FATAL("Fail to encode_key left, table:%ld", binlog_table.get()->id); + return -1; + } + left_key.set_full(true); + IndexRange range(left_key, right_key, binlog_pri.get(), binlog_pri.get(), &_region_info, 1, 0, false, false, false); std::map field_ids; std::vector field_slot; @@ -1517,8 +1527,14 @@ void Region::read_binlog(const pb::StoreReq* request, left_record->set_value(left_record->get_field_by_tag(1), value); right_record->decode(""); - IndexRange range(left_record.get(), right_record.get(), binlog_pri.get(), binlog_pri.get(), - &_region_info, 1, 0, false, false, false); + MutTableKey left_key, right_key; + if (left_record->encode_key(*binlog_pri.get(), left_key, binlog_pri.get()->fields.size(), false, false) != 0) { + DB_FATAL("Fail to encode_key left, table:%ld", binlog_table.get()->id); + return ; + } + + left_key.set_full(true); + IndexRange range(left_key, right_key, binlog_pri.get(), binlog_pri.get(), &_region_info, 1, 0, false, false, false); std::map field_ids; std::vector field_slot;