From cca7d63318dee393d4e8874ada0f7790b63b28c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ferm=C3=ADn=20Gal=C3=A1n=20M=C3=A1rquez?= Date: Tue, 9 Apr 2024 15:57:04 +0200 Subject: [PATCH 1/7] FIX rename entities_consistency.py script to more general oriondb_consistency.py --- .../README.md | 61 ++++--- .../oriondb_consistency.py} | 168 +++++++++--------- .../requirements.txt | 0 .../test_oriondb_consistency.py} | 6 +- .../validation_data.js | 0 5 files changed, 119 insertions(+), 116 deletions(-) rename scripts/{entities_consistency => oriondb_consistency}/README.md (76%) rename scripts/{entities_consistency/entities_consistency.py => oriondb_consistency/oriondb_consistency.py} (91%) rename scripts/{entities_consistency => oriondb_consistency}/requirements.txt (100%) rename scripts/{entities_consistency/test_entities_consistency.py => oriondb_consistency/test_oriondb_consistency.py} (93%) rename scripts/{entities_consistency => oriondb_consistency}/validation_data.js (100%) diff --git a/scripts/entities_consistency/README.md b/scripts/oriondb_consistency/README.md similarity index 76% rename from scripts/entities_consistency/README.md rename to scripts/oriondb_consistency/README.md index 30474120a6..9816f06b29 100644 --- a/scripts/entities_consistency/README.md +++ b/scripts/oriondb_consistency/README.md @@ -1,7 +1,7 @@ -The entities consistency script analyze the contents of the entities database in orion DBs and +The database consistency script analyze the contents of the Orion database (entities, subscriptions, etc.) and check several consistency rules, reporting violations found. -Ref: [entity document database model]([../../doc/manuals/admin/database_model.md#entities-collection]) +Ref: [Orion database model](../../doc/manuals/admin/database_model.md) ## Requirements @@ -10,15 +10,18 @@ Usage of virtual env is recommended. ## Usage -Run `entities_consistency.py -h` for arguments details. +Run `oriondb_consistency.py -h` for arguments details. ## Rules -* Rules 1x: DB inconsistencies (use to be severe problems) -* Rules 2x: Syntax restrictions -* Rules 9x: Usage of legacy features +* Rules for entities + * Rules E1x: DB inconsistencies (use to be severe problems) + * Rules E2x: Syntax restrictions + * Rules E9x: Usage of legacy features +* Rules for subscriptions + * Rules S9x: Usage of legacy features -### Rule 10: `_id` field consistency +### Rule E10: `_id` field consistency Each entity in DB has an `_id` field with three subfields: @@ -26,7 +29,7 @@ Each entity in DB has an `_id` field with three subfields: * `type` * `servicePath` -### Rule 11: mandatory fields in entity +### Rule E11: mandatory fields in entity The following fields are mandatory: @@ -36,7 +39,7 @@ The following fields are mandatory: It is not an exhaustive check of every field in the database model, but some entities created/updated with old Orion versions may be missing them. -### Rule 12: mandatory fields in attribute +### Rule E12: mandatory fields in attribute The following subfields are mandatory for every attribute: @@ -46,15 +49,15 @@ The following subfields are mandatory for every attribute: It is not an exhaustive check of every field in the database model, but some entities created/updated with old Orion versions may be missing them. -### Rule 13: `attrNames` field consistency +### Rule E13: `attrNames` field consistency For each item in `attrNames` array there is a corresponding key in `attrs` object and the other way around. -### Rule 14: `mdNames` field consistency +### Rule E14: `mdNames` field consistency For every attribute, for each item in `mdNames` array there is a corresponding key in `md` object and the other way around. -### Rule 15: not swapped subkeys in `_id` +### Rule E15: not swapped subkeys in `_id` In MongoDB JSON objects are stored taking order into account, so DB allows to have a document with `_id` equal to `{"id": "E", "type": "T", "servicePath": "/"}` and at the same time have another document with `_id` @@ -62,7 +65,7 @@ equal to `{"type": "T", "id": "E", "servicePath": "/"}` without violating `_id` This rule checks that this is not happening in the entities collection. -### Rule 16: `location` field consistency +### Rule E16: `location` field consistency Check that location in consistent. In particular: @@ -73,41 +76,41 @@ Check that location in consistent. In particular: This rule is for location inconsistencies. For usage of deprecated geo types, there are additional rules in the 9x group. -### Rule 17: `lastCorrelator` existence +### Rule E17: `lastCorrelator` existence Check if `lastCorrelator` is included. This field was introduced in [Orion 1.8.0](https://github.com/telefonicaid/fiware-orion/releases/tag/1.8.0) (released in September 11th, 2017). -### Rule 20: entity id syntax +### Rule E20: entity id syntax Check [identifiers syntax restrictions](../../doc/manuals/orion-api.md#identifiers-syntax-restrictions) for this case. -### Rule 21: entity type syntax +### Rule E21: entity type syntax Check [identifiers syntax restrictions](../../doc/manuals/orion-api.md#identifiers-syntax-restrictions) for this case. -### Rule 22: entity servicePath syntax +### Rule E22: entity servicePath syntax Check [servicePath documentation](https://github.com/telefonicaid/fiware-orion/blob/master/doc/manuals/orion-api.md#entity-service-path) -### Rule 23: attribute name syntax +### Rule E23: attribute name syntax Check [identifiers syntax restrictions](../../doc/manuals/orion-api.md#identifiers-syntax-restrictions) for this case. -### Rule 24: attribute type syntax +### Rule E24: attribute type syntax Check [identifiers syntax restrictions](../../doc/manuals/orion-api.md#identifiers-syntax-restrictions) for this case. -### Rule 25: metadata name syntax +### Rule E25: metadata name syntax Check [identifiers syntax restrictions](../../doc/manuals/orion-api.md#identifiers-syntax-restrictions) for this case. -### Rule 26: metadata type syntax +### Rule E26: metadata type syntax Check [identifiers syntax restrictions](../../doc/manuals/orion-api.md#identifiers-syntax-restrictions) for this case. -### Rule 90: detect usage of `geo:x` attribute type where `x` different from `json` +### Rule E90: detect usage of `geo:x` attribute type where `x` different from `json` Check usage of deprecated geo-location types, i.e: @@ -123,13 +126,13 @@ Suggested action is to: Note this rule doesn't check location consistency for this case (e.g. more than one geo-location attribute in the same entity). That's done by another rule in the 1x group. -### Rule 91: detect usage of more than one legacy `location` metadata +### Rule E91: detect usage of more than one legacy `location` metadata Check usage of `location` in more than one attribute of the same entity. Note this rule doesn't check location consistency for this case (that's done by another rule in the 1x group). -### Rule 92: detect legacy `location` metadata should be `WGS84` or `WSG84` +### Rule E92: detect legacy `location` metadata should be `WGS84` or `WSG84` The value of the `location` metadata should be `WGS84` or `WSG84`. @@ -138,7 +141,7 @@ Additional consideration: * Entities attributes may have `location` metadata with values different from `WGS84` or `WSG84` if created using NGSIv2. That would be a false positive in this rule validation * This rule doesn't check location consistency for this case (that's done by another rule in the 1x group). -### Rule 93: detect usage of redundant legacy `location` +### Rule E93: detect usage of redundant legacy `location` Checks usage of redundant `location` metadata, i.e. when at the same time a `geo:` type is used in the same attribute. @@ -150,7 +153,7 @@ Additional, considerations: * This rule assumes only one `location` is in the entity (i.e. Rule 91 is not violated). If that doesn't occur, only the first occurrence is taken into account. * This rule doesn't check location consistency for this case (that's done by another rule in the 1x group). -### Rule 94: detect usage of not redundant legacy `location` +### Rule E94: detect usage of not redundant legacy `location` Checks usage of not redundant `location` metadata, i.e. when at the same time the type of the attribute is nog `geo:`. same attribute. @@ -168,9 +171,9 @@ Additional, considerations: ## Testing -You can test the `entities_consistency.py` script this qy: +You can test the `oriondb_consistency.py` script this qy: 1. Populate `orion-validation` DB with testing document. To do so, copy-paste the content of the `validation_data.js` in `mongosh` -2. Run `test_entities_consistency.py` test and check the log output +2. Run `test_oriondb_consistency.py` test and check the log output -You can also run `test_entities_consistenct.py` under coverage to check every rule is covering all the possible validation cases. +You can also run `test_orion_consistenct.py` under coverage to check every rule is covering all the possible validation cases. diff --git a/scripts/entities_consistency/entities_consistency.py b/scripts/oriondb_consistency/oriondb_consistency.py similarity index 91% rename from scripts/entities_consistency/entities_consistency.py rename to scripts/oriondb_consistency/oriondb_consistency.py index 4da9541710..65f0c41393 100644 --- a/scripts/entities_consistency/entities_consistency.py +++ b/scripts/oriondb_consistency/oriondb_consistency.py @@ -206,9 +206,9 @@ def check_id(id): # Rules functions -def rule10(entity): +def ruleE10(entity): """ - Rule 10: `_id` field consistency + Rule E10: `_id` field consistency See README.md for an explanation of the rule """ @@ -224,9 +224,9 @@ def rule10(entity): return None -def rule11(entity): +def ruleE11(entity): """ - Rule 11: mandatory fields in entity + Rule E11: mandatory fields in entity See README.md for an explanation of the rule """ @@ -242,9 +242,9 @@ def rule11(entity): return None -def rule12(entity): +def ruleE12(entity): """ - Rule 12: mandatory fields in attribute + Rule E12: mandatory fields in attribute See README.md for an explanation of the rule """ @@ -265,9 +265,9 @@ def rule12(entity): return None -def rule13(entity): +def ruleE13(entity): """ - Rule 13: `attrNames` field consistency + Rule E13: `attrNames` field consistency See README.md for an explanation of the rule """ @@ -300,9 +300,9 @@ def rule13(entity): return None -def rule14(entity): +def ruleE14(entity): """ - Rule 14: `mdNames` field consistency + Rule E14: `mdNames` field consistency See README.md for an explanation of the rule """ @@ -339,9 +339,9 @@ def rule14(entity): return None -def rule15(entities_collection): +def ruleE15(entities_collection): """ - Rule 15: not swapped subkeys in `_id` + Rule E15: not swapped subkeys in `_id` See README.md for an explanation of the rule @@ -374,9 +374,9 @@ def rule15(entities_collection): return None -def rule16(entity): +def ruleE16(entity): """ - Rule 16: `location` field consistency + Rule E16: `location` field consistency See README.md for an explanation of the rule """ @@ -422,9 +422,9 @@ def rule16(entity): return f"location field detected but no geo attribute is present (maybe metadata location is used?)" -def rule17(entity): +def ruleE17(entity): """ - Rule 17: `lastCorrelator` existence + Rule E17: `lastCorrelator` existence See README.md for an explanation of the rule """ @@ -434,9 +434,9 @@ def rule17(entity): return None -def rule20(entity): +def ruleE20(entity): """ - Rule 20: entity id syntax + Rule E20: entity id syntax See README.md for an explanation of the rule """ @@ -450,9 +450,9 @@ def rule20(entity): return None -def rule21(entity): +def ruleE21(entity): """ - Rule 21: entity type syntax + Rule E21: entity type syntax See README.md for an explanation of the rule """ @@ -466,9 +466,9 @@ def rule21(entity): return None -def rule22(entity): +def ruleE22(entity): """ - Rule 22: entity servicePath syntax + Rule E22: entity servicePath syntax See README.md for an explanation of the rule @@ -504,9 +504,9 @@ def rule22(entity): return f"unallowed characters in '{sp_levels[i]}' in servicePath level #{i}" -def rule23(entity): +def ruleE23(entity): """ - Rule 23: attribute name syntax + Rule E23: attribute name syntax See README.md for an explanation of the rule """ @@ -532,9 +532,9 @@ def rule23(entity): return None -def rule24(entity): +def ruleE24(entity): """ - Rule 24: attribute type syntax + Rule E24: attribute type syntax See README.md for an explanation of the rule """ @@ -554,9 +554,9 @@ def rule24(entity): return None -def rule25(entity): +def ruleE25(entity): """ - Rule 25: metadata name syntax + Rule E25: metadata name syntax See README.md for an explanation of the rule """ @@ -583,9 +583,9 @@ def rule25(entity): return None -def rule26(entity): +def ruleE26(entity): """ - Rule 26: metadata type syntax + Rule E26: metadata type syntax See README.md for an explanation of the rule """ @@ -607,9 +607,9 @@ def rule26(entity): return None -def rule90(entity): +def ruleE90(entity): """ - Rule 90: detect usage of `geo:x` attribute type where `x` different from `json` + Rule E90: detect usage of `geo:x` attribute type where `x` different from `json` See README.md for an explanation of the rule """ @@ -628,9 +628,9 @@ def rule90(entity): return f"usage of deprecated geo type in attributes: {', '.join(s)}" -def rule91(entity): +def ruleE91(entity): """ - Rule 91: detect usage of more than one legacy `location` metadata + Rule E91: detect usage of more than one legacy `location` metadata See README.md for an explanation of the rule """ @@ -646,9 +646,9 @@ def rule91(entity): return None -def rule92(entity): +def ruleE92(entity): """ - Rule 92: detect legacy `location` metadata should be `WGS84` or `WSG84` + Rule E92: detect legacy `location` metadata should be `WGS84` or `WSG84` See README.md for an explanation of the rule """ @@ -667,9 +667,9 @@ def rule92(entity): return None -def rule93(entity): +def ruleE93(entity): """ - Rule 93: detect usage of redundant legacy `location` + Rule E93: detect usage of redundant legacy `location` See README.md for an explanation of the rule """ @@ -682,9 +682,9 @@ def rule93(entity): return None -def rule94(entity): +def ruleE94(entity): """ - Rule 94: detect usage of not redundant legacy `location` + Rule E94: detect usage of not redundant legacy `location` See README.md for an explanation of the rule """ @@ -700,106 +700,106 @@ def rule94(entity): rules = [ # Rules 1x { - 'label': 'Rule10', + 'label': 'RuleE10', 'global': False, - 'func': rule10 + 'func': ruleE10 }, { - 'label': 'Rule11', + 'label': 'RuleE11', 'global': False, - 'func': rule11 + 'func': ruleE11 }, { - 'label': 'Rule12', + 'label': 'RuleE12', 'global': False, - 'func': rule12 + 'func': ruleE12 }, { - 'label': 'Rule13', + 'label': 'RuleE13', 'global': False, - 'func': rule13 + 'func': ruleE13 }, { - 'label': 'Rule14', + 'label': 'RuleE14', 'global': False, - 'func': rule14 + 'func': ruleE14 }, { - 'label': 'Rule15', + 'label': 'RuleE15', 'global': True, - 'func': rule15 + 'func': ruleE15 }, { - 'label': 'Rule16', + 'label': 'RuleE16', 'global': False, - 'func': rule16 + 'func': ruleE16 }, { - 'label': 'Rule17', + 'label': 'RuleE17', 'global': False, - 'func': rule17 + 'func': ruleE17 }, # Rules 2x { - 'label': 'Rule20', + 'label': 'RuleE20', 'global': False, - 'func': rule20 + 'func': ruleE20 }, { - 'label': 'Rule21', + 'label': 'RuleE21', 'global': False, - 'func': rule21 + 'func': ruleE21 }, { - 'label': 'Rule22', + 'label': 'RuleE22', 'global': False, - 'func': rule22 + 'func': ruleE22 }, { - 'label': 'Rule23', + 'label': 'RuleE23', 'global': False, - 'func': rule23 + 'func': ruleE23 }, { - 'label': 'Rule24', + 'label': 'RuleE24', 'global': False, - 'func': rule24 + 'func': ruleE24 }, { - 'label': 'Rule25', + 'label': 'RuleE25', 'global': False, - 'func': rule25 + 'func': ruleE25 }, { - 'label': 'Rule26', + 'label': 'RuleE26', 'global': False, - 'func': rule26 + 'func': ruleE26 }, # Rules 9x { - 'label': 'Rule90', + 'label': 'RuleE90', 'global': False, - 'func': rule90 + 'func': ruleE90 }, { - 'label': 'Rule91', + 'label': 'RuleE91', 'global': False, - 'func': rule91 + 'func': ruleE91 }, { - 'label': 'Rule92', + 'label': 'RuleE92', 'global': False, - 'func': rule92 + 'func': ruleE92 }, { - 'label': 'Rule93', + 'label': 'RuleE93', 'global': False, - 'func': rule93 + 'func': ruleE93 }, { - 'label': 'Rule94', + 'label': 'RuleE94', 'global': False, - 'func': rule94 + 'func': ruleE94 } ] @@ -876,8 +876,8 @@ def process_db(logger, db_name, db_conn, include_entity_date, query, rules_exp): if __name__ == '__main__': parser = argparse.ArgumentParser( - prog='entities_consistency', - description='Check consistency in Orion entities collection in DB') + prog='oriondb_consistency', + description='Check consistency in Orion DB') parser.add_argument('--mongoUri', dest='mongo_uri', default='mongodb://localhost:27017', help='MongoDB URI. Default is mongodb://localhost:27017') @@ -885,9 +885,9 @@ def process_db(logger, db_name, db_conn, include_entity_date, query, rules_exp): help='DB name to check. If omitted all DBs starting with "orion" will be checked.') parser.add_argument('--include-entities-date', dest='include_entities_date', default=False, action='store_true', help='include entity modification time in log traces') - parser.add_argument('--query', dest='query', default='{}', + parser.add_argument('--entities-query', dest='query', default='{}', help='query to filter entities to check, in JSON MongoDB query language. By default, ' - 'all entities in the collection will be checked.') + 'all entities in the collection will be checked. Applies to Rule Exx rules.') parser.add_argument('--rules-exp', dest='rules_exp', help='Specifies the rules to apply, as a regular expression. By default all rules are applied.') parser.add_argument('--logLevel', dest='log_level', choices=['DEBUG', 'INFO', 'WARN', 'ERROR'], default='INFO', diff --git a/scripts/entities_consistency/requirements.txt b/scripts/oriondb_consistency/requirements.txt similarity index 100% rename from scripts/entities_consistency/requirements.txt rename to scripts/oriondb_consistency/requirements.txt diff --git a/scripts/entities_consistency/test_entities_consistency.py b/scripts/oriondb_consistency/test_oriondb_consistency.py similarity index 93% rename from scripts/entities_consistency/test_entities_consistency.py rename to scripts/oriondb_consistency/test_oriondb_consistency.py index f0ab04e399..07ff631bc0 100644 --- a/scripts/entities_consistency/test_entities_consistency.py +++ b/scripts/oriondb_consistency/test_oriondb_consistency.py @@ -30,13 +30,13 @@ # the result as log output. # # You can run this test under coverage, so you can check the coverage of the different rules in -# entities_consistency.py +# oriondb_consistency.py import unittest from pymongo import MongoClient import logging -from entities_consistency import process_db +from oriondb_consistency import process_db class TestEntitiesConsistency(unittest.TestCase): def test_process_db(self): @@ -51,5 +51,5 @@ def test_process_db(self): logger = logging.getLogger() # connect to MongoDB and process validation DB - mongo_client = MongoClient('mongodb://localhost:27017') + mongo_client = MongoClient('mongodb://localhost:37017') process_db(logger, 'orion-validation', mongo_client, False, {}, None) diff --git a/scripts/entities_consistency/validation_data.js b/scripts/oriondb_consistency/validation_data.js similarity index 100% rename from scripts/entities_consistency/validation_data.js rename to scripts/oriondb_consistency/validation_data.js From 7039520372391fbd62500f90bc5aa5282111cfeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ferm=C3=ADn=20Gal=C3=A1n=20M=C3=A1rquez?= Date: Tue, 9 Apr 2024 17:23:47 +0200 Subject: [PATCH 2/7] ADD Rule S90 --- scripts/oriondb_consistency/README.md | 8 + .../oriondb_consistency.py | 165 ++++++++++++------ .../test_oriondb_consistency.py | 6 +- .../oriondb_consistency/validation_data.js | 53 ++++++ 4 files changed, 177 insertions(+), 55 deletions(-) diff --git a/scripts/oriondb_consistency/README.md b/scripts/oriondb_consistency/README.md index 9816f06b29..52499ecf4f 100644 --- a/scripts/oriondb_consistency/README.md +++ b/scripts/oriondb_consistency/README.md @@ -169,6 +169,14 @@ Additional, considerations: * This rule assumes only one `location` is in the entity (i.e. Rule 91 is not violated). If that doesn't occur, only the first occurrence is taken into account. * This rule doesn't check location consistency for this case (that's done by another rule in the 1x group). +### Rule S90: detect usage of legacy notification format + +Check usage of legacy notification format in subscriptions (i.e. [`notification.attrsFormat`](../../doc/manuals/orion-api.md#subscriptionnotification) set to `legacy`). + +Suggestion action is to: + +* Change `notification.attrsFormat` to one of the supported formats (e.g. `normalized`). + ## Testing You can test the `oriondb_consistency.py` script this qy: diff --git a/scripts/oriondb_consistency/oriondb_consistency.py b/scripts/oriondb_consistency/oriondb_consistency.py index 65f0c41393..6756e65c07 100644 --- a/scripts/oriondb_consistency/oriondb_consistency.py +++ b/scripts/oriondb_consistency/oriondb_consistency.py @@ -696,115 +696,165 @@ def ruleE94(entity): return None +def ruleS90(csub): + """ + Rule S90: Check usage of legacy notification format in subscriptions + + See README.md for an explanation of the rule + """ + if csub['format'] == 'JSON': + return f"notification legacy format in use (endpoint: {csub['reference']})" + + return None -rules = [ - # Rules 1x +rules_inventory = [ + # Rules E1x { 'label': 'RuleE10', + 'collection': 'entities', 'global': False, 'func': ruleE10 }, { 'label': 'RuleE11', + 'collection': 'entities', 'global': False, 'func': ruleE11 }, { 'label': 'RuleE12', + 'collection': 'entities', 'global': False, 'func': ruleE12 }, { 'label': 'RuleE13', + 'collection': 'entities', 'global': False, 'func': ruleE13 }, { 'label': 'RuleE14', + 'collection': 'entities', 'global': False, 'func': ruleE14 }, { 'label': 'RuleE15', + 'collection': 'entities', 'global': True, 'func': ruleE15 }, { 'label': 'RuleE16', + 'collection': 'entities', 'global': False, 'func': ruleE16 }, { 'label': 'RuleE17', + 'collection': 'entities', 'global': False, 'func': ruleE17 }, - # Rules 2x + # Rules E2x { 'label': 'RuleE20', + 'collection': 'entities', 'global': False, 'func': ruleE20 }, { 'label': 'RuleE21', + 'collection': 'entities', 'global': False, 'func': ruleE21 }, { 'label': 'RuleE22', + 'collection': 'entities', 'global': False, 'func': ruleE22 }, { 'label': 'RuleE23', + 'collection': 'entities', 'global': False, 'func': ruleE23 }, { 'label': 'RuleE24', + 'collection': 'entities', 'global': False, 'func': ruleE24 }, { 'label': 'RuleE25', + 'collection': 'entities', 'global': False, 'func': ruleE25 }, { 'label': 'RuleE26', + 'collection': 'entities', 'global': False, 'func': ruleE26 }, - # Rules 9x + # Rules E9x { 'label': 'RuleE90', + 'collection': 'entities', 'global': False, 'func': ruleE90 }, { 'label': 'RuleE91', + 'collection': 'entities', 'global': False, 'func': ruleE91 }, { 'label': 'RuleE92', + 'collection': 'entities', 'global': False, 'func': ruleE92 }, { 'label': 'RuleE93', + 'collection': 'entities', 'global': False, 'func': ruleE93 }, { 'label': 'RuleE94', + 'collection': 'entities', 'global': False, 'func': ruleE94 + }, + # Rules S9x + { + 'label': 'RuleS90', + 'collection': 'csubs', + 'global': False, + 'func': ruleS90 } ] +def get_id(doc, col, include_entity_date): + """ + Depending the collection and some arguments, the id is got in a way or another + """ + if col == 'entities': + id_string = json.dumps(doc['_id']) + if 'modDate' in doc: + id_string = f"({datetime.fromtimestamp(doc['modDate']).strftime('%Y-%m-%dT%H:%M:%SZ')}) {id_string}" + else: + id_string = f"()) {id_string}" + return f"entity {id_string}" + else: # col == 'csubs' + return f"subscription {doc['_id']}" -def process_db(logger, db_name, db_conn, include_entity_date, query, rules_exp): +def process_db(logger, db_name, db_conn, include_entity_date, queries, rules_exp): """ Process an individual DB @@ -812,64 +862,65 @@ def process_db(logger, db_name, db_conn, include_entity_date, query, rules_exp): :param db_name: the name of the DB to process :param db_conn: connection to MongoDB :param include_entity_date: if True, include entity modification date in log traces - :param query: query to filter entities to be processed + :param queries: dict with per-colletion queries to filter entities to be processed (the key in the dictionary is + the collection to apply the query) :param rules_exp: regular expression to filter rules to apply :return: fails """ logger.info(f'processing {db_name}') - n = 0 - failed_entities = 0 + n = { + 'entities': 0, + 'csubs': 0 + } + failed_docs = { + 'entities': 0, + 'csubs': 0 + } fails = 0 - # check collection existence - if 'entities' not in db_conn[db_name].list_collection_names(): - logger.warning(f'collections entities not found in {db_name} database, nothing to do') - return + for col in ['entities', 'csubs']: + if col not in db_conn[db_name].list_collection_names(): + logger.warning(f'collection {col} not found in {db_name} database') - # apply global rules - for rule in rules: - if rules_exp is not None and not re.search(rules_exp, rule['label']): - continue + # filter out rules + rules = [] + for rule in rules_inventory: + if rules_exp is None or re.search(rules_exp, rule['label']): + rules.append(rule) + # first process global rules + for rule in rules: if rule['global']: - s = rule['func'](db_conn[db_name]['entities']) + col = rule['collection'] + s = rule['func'](db_conn[db_name][col]) if s is not None: - logger.warning(f'DB {db_name} {rule["label"]} violation in entities collection: {s}') + logger.warning(f'DB {db_name} {rule["label"]} violation in {col} collection: {s}') fails += 1 - # apply per-entity rules - for entity in db_conn[db_name]['entities'].find(query): - n += 1 - entity_fail = False - - id_string = json.dumps(entity['_id']) - if include_entity_date: - if 'modDate' in entity: - id_string = f"({datetime.fromtimestamp(entity['modDate']).strftime('%Y-%m-%dT%H:%M:%SZ')}) {id_string}" - else: - id_string = f"()) {id_string}" - - logger.debug(f'* processing entity {id_string}') - for rule in rules: - if rules_exp is not None and not re.search(rules_exp, rule['label']): - continue - - if not rule['global']: - s = rule['func'](entity) - if s is not None: - logger.warning(f'DB {db_name} {rule["label"]} violation for entity {id_string}: {s}') - entity_fail = True - fails += 1 - - if entity_fail: - failed_entities += 1 - - if n > 0: - logger.info( - f'processed {db_name}: {failed_entities}/{n} ({round(failed_entities / n * 100, 2)}%) failed entities with {fails} rule violations') - else: - logger.warning(f'{db_name} has 0 entities (maybe it should be cleaned up?)') + # second process not global rules, per collection + for col in ['entities', 'csubs']: + for doc in db_conn[db_name][col].find(queries[col]): + n[col] += 1 + doc_fail = False + id_string = get_id(doc, col, include_entity_date) + logger.debug(f'* processing {id_string}') + for rule in rules: + if not rule['global'] and rule['collection'] == col: + s = rule['func'](doc) + if s is not None: + logger.warning(f'DB {db_name} {rule["label"]} violation for {id_string}: {s}') + doc_fail = True + fails += 1 + + if doc_fail: + failed_docs[col] += 1 + + + for col in ['entities', 'csubs']: + if n[col] > 0: + logger.info( + f'processed {db_name} in collection {col}: {failed_docs[col]}/{n[col]} ({round(failed_docs[col] / n[col] * 100, 2)}%) failed docs') return fails @@ -885,9 +936,12 @@ def process_db(logger, db_name, db_conn, include_entity_date, query, rules_exp): help='DB name to check. If omitted all DBs starting with "orion" will be checked.') parser.add_argument('--include-entities-date', dest='include_entities_date', default=False, action='store_true', help='include entity modification time in log traces') - parser.add_argument('--entities-query', dest='query', default='{}', + parser.add_argument('--query-entities', dest='query_entities', default='{}', help='query to filter entities to check, in JSON MongoDB query language. By default, ' 'all entities in the collection will be checked. Applies to Rule Exx rules.') + parser.add_argument('--query-csubs', dest='query_csubs', default='{}', + help='query to filter csubs to check, in JSON MongoDB query language. By default, ' + 'all subscriptions in the collection will be checked. Applies to Rule Sxx rules.') parser.add_argument('--rules-exp', dest='rules_exp', help='Specifies the rules to apply, as a regular expression. By default all rules are applied.') parser.add_argument('--logLevel', dest='log_level', choices=['DEBUG', 'INFO', 'WARN', 'ERROR'], default='INFO', @@ -909,12 +963,15 @@ def process_db(logger, db_name, db_conn, include_entity_date, query, rules_exp): db_names = mongo_client.list_database_names() # to remove starting and trailing ' char, in case it is used - query = json.loads(args.query.replace("'", "")) + queries = { + 'entities': json.loads(args.query_entities.replace("'", "")), + 'csubs': json.loads(args.query_csubs.replace("'", "")), + } fails = 0 if args.db is not None: if args.db in db_names: - fails += process_db(logger, args.db, mongo_client, args.include_entities_date, query, args.rules_exp) + fails += process_db(logger, args.db, mongo_client, args.include_entities_date, queries, args.rules_exp) else: logger.fatal(f'database {args.db} does not exist') sys.exit(1) @@ -922,6 +979,6 @@ def process_db(logger, db_name, db_conn, include_entity_date, query, rules_exp): # Process all Orion databases for db_name in db_names: if db_name.startswith('orion-'): - fails += process_db(logger, db_name, mongo_client, args.include_entities_date, query, args.rules_exp) + fails += process_db(logger, db_name, mongo_client, args.include_entities_date, queries, args.rules_exp) logger.info(f'total rule violations: {fails}') diff --git a/scripts/oriondb_consistency/test_oriondb_consistency.py b/scripts/oriondb_consistency/test_oriondb_consistency.py index 07ff631bc0..3ccf08237e 100644 --- a/scripts/oriondb_consistency/test_oriondb_consistency.py +++ b/scripts/oriondb_consistency/test_oriondb_consistency.py @@ -52,4 +52,8 @@ def test_process_db(self): # connect to MongoDB and process validation DB mongo_client = MongoClient('mongodb://localhost:37017') - process_db(logger, 'orion-validation', mongo_client, False, {}, None) + queries = { + 'entities': {}, + 'csubs': {} + } + process_db(logger, 'orion-validation', mongo_client, False, queries, None) diff --git a/scripts/oriondb_consistency/validation_data.js b/scripts/oriondb_consistency/validation_data.js index a602e41ff0..0d1e929d44 100644 --- a/scripts/oriondb_consistency/validation_data.js +++ b/scripts/oriondb_consistency/validation_data.js @@ -4451,3 +4451,56 @@ db.getSiblingDB("orion-validation").entities.insertMany([ "lastCorrelator": "88e1bc10-b9dd-11ee-8755-080027cd35f1" } ]) + +db.getSiblingDB("orion-validation").csubs.insertMany([ + { + "expiration": { + "$numberLong": "9223372036854775807" + }, + "reference": "http://notify-receptor:5055/notify", + "custom": true, + "timeout": { + "$numberLong": "0" + }, + "headers": { + "fiware-servicepath": "/SS1" + }, + "throttling": { + "$numberLong": "0" + }, + "maxFailsLimit": { + "$numberLong": "-1" + }, + "servicePath": "/SS2", + "status": "active", + "statusLastChange": 1682640509.6958137, + "entities": [ + { + "id": ".*", + "isPattern": "true", + "type": "T", + "isTypePattern": false + } + ], + "attrs": [ + "temperature", + ], + "metadata": [], + "blacklist": false, + "onlyChanged": false, + "covered": false, + "description": "Foobar", + "conditions": [ + "TimeInstant" + ], + "expression": { + "q": "", + "mq": "", + "geometry": "", + "coords": "", + "georel": "" + }, + "altTypes": [], + "format": "JSON" + } +]) From a88563fb0c72b7355b0a440799fdb92d57a3a47f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ferm=C3=ADn=20Gal=C3=A1n=20M=C3=A1rquez?= Date: Tue, 9 Apr 2024 18:12:25 +0200 Subject: [PATCH 3/7] FIX autofix framework added --- scripts/oriondb_consistency/README.md | 14 ++ .../oriondb_consistency.py | 178 ++++++++++++------ .../test_oriondb_consistency.py | 2 +- 3 files changed, 134 insertions(+), 60 deletions(-) diff --git a/scripts/oriondb_consistency/README.md b/scripts/oriondb_consistency/README.md index 52499ecf4f..bbc141edfd 100644 --- a/scripts/oriondb_consistency/README.md +++ b/scripts/oriondb_consistency/README.md @@ -177,6 +177,20 @@ Suggestion action is to: * Change `notification.attrsFormat` to one of the supported formats (e.g. `normalized`). +Autofix action: + +* `notification.attrsFormat` is set to `normalized`. + +## Autofix mode + +If `--autofix` is spefified in the command line arguments, the script implements automatic fixing of documents +violating the rules. + +**WARNING: the usage of `--autofix` will do DB modification**. Use it with care. It is strongly recommended to +do a backup of your data before using it. + +It only works for some rules. Check specific rules documntation for details (look for "Autofix action"). + ## Testing You can test the `oriondb_consistency.py` script this qy: diff --git a/scripts/oriondb_consistency/oriondb_consistency.py b/scripts/oriondb_consistency/oriondb_consistency.py index 6756e65c07..7dd289d2a8 100644 --- a/scripts/oriondb_consistency/oriondb_consistency.py +++ b/scripts/oriondb_consistency/oriondb_consistency.py @@ -31,6 +31,7 @@ import json import sys import re +import copy # Helper functions @@ -219,9 +220,11 @@ def ruleE10(entity): missing_fields.append(field) if len(missing_fields) > 0: - return f"missing subfields in _id: {', '.join(missing_fields)}" + r = f"missing subfields in _id: {', '.join(missing_fields)}" else: - return None + r = None + + return r, None def ruleE11(entity): @@ -237,9 +240,11 @@ def ruleE11(entity): missing_fields.append(field) if len(missing_fields) > 0: - return f"missing fields: {', '.join(missing_fields)}" + r = f"missing fields: {', '.join(missing_fields)}" else: - return None + r = None + + return r, None def ruleE12(entity): @@ -260,9 +265,11 @@ def ruleE12(entity): s.append(f"in attribute '{attr}' missing fields: {', '.join(missing_fields)}") if len(s) > 0: - return ', '.join(s) + r = ', '.join(s) else: - return None + r = None + + return r, None def ruleE13(entity): @@ -295,10 +302,11 @@ def ruleE13(entity): s.append(f"attributes in attrs object not found in attrNames: {','.join(attrs_not_in_attrnames)}") if len(s) > 0: - return ', '.join(s) + r = ', '.join(s) else: - return None + r = None + return r, None def ruleE14(entity): """ @@ -334,9 +342,11 @@ def ruleE14(entity): f"in attribute '{item}' metadata in md object not found in mdNames: {', '.join(md_not_in_mdnames)}") if len(s) > 0: - return ', '.join(s) + r = ', '.join(s) else: - return None + r = None + + return r, None def ruleE15(entities_collection): @@ -369,10 +379,11 @@ def ruleE15(entities_collection): f"_id uniqueness violation for entity id='{id}' type='{type}' servicePath='{service_path}' found {count} times") if len(s) > 0: - return ', '.join(s) + r = ', '.join(s) else: - return None + r = None + return r, None def ruleE16(entity): """ @@ -380,6 +391,8 @@ def ruleE16(entity): See README.md for an explanation of the rule """ + # FIXME: this function should be re-factored. It has many return points. It's ugly... + # check that as much as one attribute is using geo type geo_attrs = [] for attr in entity['attrs']: @@ -388,7 +401,7 @@ def ruleE16(entity): geo_attrs.append(attr) if len(geo_attrs) > 1: - return f"more than one attribute with geo type: {', '.join(geo_attrs)}" + return f"more than one attribute with geo type: {', '.join(geo_attrs)}", None if len(geo_attrs) == 1: # If geo attr found, then check that there is consistent location field @@ -396,13 +409,13 @@ def ruleE16(entity): geo_type = entity['attrs'][geo_attr]['type'] if 'location' not in entity: - return f"geo location '{geo_attr}' ({geo_type}) not null but location field not found in entity" + return f"geo location '{geo_attr}' ({geo_type}) not null but location field not found in entity", None if entity['location']['attrName'] != geo_attr: - return f"location.attrName ({entity['location']['attrName']}) differs from '{geo_attr}'" + return f"location.attrName ({entity['location']['attrName']}) differs from '{geo_attr}'", None geo_json = to_geo_json(entity['attrs'][geo_attr]) if type(geo_json) == str: - return geo_json + return geo_json, None # https://www.testcult.com/deep-comparison-of-json-in-python/ diff = DeepDiff(geo_json, entity['location']['coords'], ignore_order=True) @@ -412,14 +425,16 @@ def ruleE16(entity): geo_json = convert_strings_to_numbers(geo_json) if not DeepDiff(geo_json, entity['location']['coords'], ignore_order=True): return f"location.coords and GeoJSON derived from '{geo_attr}' ({geo_type}) is consistent, but value " \ - f"should use numbers for coordinates instead of strings" + f"should use numbers for coordinates instead of strings", None else: # Other causes - return f"location.coords and GeoJSON derived from '{geo_attr}' ({geo_type}) value: {diff}" + return f"location.coords and GeoJSON derived from '{geo_attr}' ({geo_type}) value: {diff}", None else: # len(geo_attrs) == 0 # If no geo attr found, check there isn't a location field if 'location' in entity: - return f"location field detected but no geo attribute is present (maybe metadata location is used?)" + return f"location field detected but no geo attribute is present (maybe metadata location is used?)", None + + return None, None def ruleE17(entity): @@ -429,9 +444,11 @@ def ruleE17(entity): See README.md for an explanation of the rule """ if 'lastCorrelator' not in entity: - return f"missing lastCorrelator" + r = f"missing lastCorrelator" else: - return None + r = None + + return r, None def ruleE20(entity): @@ -445,9 +462,9 @@ def ruleE20(entity): if 'id' in entity['_id']: r = check_id(entity['_id']['id']) if r is not None: - return f"entity id ({entity['_id']['id']}) syntax violation: {r}" + return f"entity id ({entity['_id']['id']}) syntax violation: {r}", None - return None + return None, None def ruleE21(entity): @@ -461,9 +478,9 @@ def ruleE21(entity): if 'type' in entity['_id']: r = check_id(entity['_id']['type']) if r is not None: - return f"entity type ({entity['_id']['type']}) syntax violation: {r}" + return f"entity type ({entity['_id']['type']}) syntax violation: {r}", None - return None + return None, None def ruleE22(entity): @@ -477,32 +494,33 @@ def ruleE22(entity): # servicePath existence is checked by another rule if 'servicePath' not in entity['_id']: - return None + return None, None sp = entity['_id']['servicePath'] # Scope must start with / (only "absolute" scopes are allowed) if not sp.startswith('/'): - return f"servicePath '{sp}' does not starts with '/'" + return f"servicePath '{sp}' does not starts with '/'", None # This special case can be problematic (as split will detect always a level and the minimum length rule # will break. So early return if sp == '/': - return None + return None, None # 10 maximum scope levels in a path sp_levels = sp[1:].split('/') if len(sp_levels) > 10: - return f"servicePath has {len(sp_levels)} tokens but the limit is 10" + return f"servicePath has {len(sp_levels)} tokens but the limit is 10", None # 50 maximum characters in each level (1 char is minimum), only alphanumeric and underscore allowed for i in range(len(sp_levels)): if len(sp_levels[i]) == 0: - return f'servicePath level #{i} length is 0 but minimum is 1' + return f'servicePath level #{i} length is 0 but minimum is 1', None if len(sp_levels[i]) > 50: - return f'servicePath level #{i} length is {len(sp_levels[i])} but maximum is 50' + return f'servicePath level #{i} length is {len(sp_levels[i])} but maximum is 50', None if re.search('[^a-zA-Z0-9_]', sp_levels[i]): - return f"unallowed characters in '{sp_levels[i]}' in servicePath level #{i}" + return f"unallowed characters in '{sp_levels[i]}' in servicePath level #{i}", None + return None, None def ruleE23(entity): """ @@ -527,9 +545,11 @@ def ruleE23(entity): s.append(f"attribute name ({attr}) syntax violation: {r}") if len(s) > 0: - return ', '.join(s) + r = ', '.join(s) else: - return None + r = None + + return r, None def ruleE24(entity): @@ -549,9 +569,11 @@ def ruleE24(entity): s.append(f"in attribute '{attr}' type ({type}) syntax violation: {r}") if len(s) > 0: - return ', '.join(s) + r = ', '.join(s) else: - return None + r = None + + return r, None def ruleE25(entity): @@ -578,9 +600,11 @@ def ruleE25(entity): s.append(f"in attribute '{attr}' metadata name ({md}) syntax violation: {r}") if len(s) > 0: - return ', '.join(s) + r = ', '.join(s) else: - return None + r = None + + return r, None def ruleE26(entity): @@ -602,9 +626,11 @@ def ruleE26(entity): s.append(f"in attribute '{attr}' metadata '{md}' type ({type}) syntax violation: {r}") if len(s) > 0: - return ', '.join(s) + r = ', '.join(s) else: - return None + r = None + + return r, None def ruleE90(entity): @@ -625,7 +651,11 @@ def ruleE90(entity): # if len(s) > 0: - return f"usage of deprecated geo type in attributes: {', '.join(s)}" + r = f"usage of deprecated geo type in attributes: {', '.join(s)}" + else: + r = None + + return r, None def ruleE91(entity): @@ -641,9 +671,11 @@ def ruleE91(entity): attrs.append(attr) if len(attrs) > 1: - return f"location metadata found {len(attrs)} times in attributes: {', '.join(attrs)} (maximum should be just 1)" + r = f"location metadata found {len(attrs)} times in attributes: {', '.join(attrs)} (maximum should be just 1)" else: - return None + r = None + + return r, None def ruleE92(entity): @@ -662,9 +694,11 @@ def ruleE92(entity): f"in attribute '{attr}' location metadata value is {location_value} (should be WGS84 or WSG84)") if len(s) > 0: - return ', '.join(s) + r = ', '.join(s) else: - return None + r = None + + return r, None def ruleE93(entity): @@ -677,9 +711,9 @@ def ruleE93(entity): if 'md' in entity['attrs'][attr]: for md in entity['attrs'][attr]['md']: if md == 'location' and is_geo_type(entity['attrs'][attr]['type']): - return f"in attribute '{attr}' redundant location metadata found (attribute is already using {entity['attrs'][attr]['type']} type)" + return f"in attribute '{attr}' redundant location metadata found (attribute is already using {entity['attrs'][attr]['type']} type)", None - return None + return None, None def ruleE94(entity): @@ -692,9 +726,9 @@ def ruleE94(entity): if 'md' in entity['attrs'][attr]: for md in entity['attrs'][attr]['md']: if md == 'location' and not is_geo_type(entity['attrs'][attr]['type']): - return f"in attribute '{attr}' location metadata found (attribute type is {entity['attrs'][attr]['type']})" + return f"in attribute '{attr}' location metadata found (attribute type is {entity['attrs'][attr]['type']})", None - return None + return None, None def ruleS90(csub): """ @@ -703,9 +737,14 @@ def ruleS90(csub): See README.md for an explanation of the rule """ if csub['format'] == 'JSON': - return f"notification legacy format in use (endpoint: {csub['reference']})" + r = f"notification legacy format in use (endpoint: {csub['reference']})" + fixed_csub = copy.deepcopy(csub) + fixed_csub['format'] = 'normalized' + else: + r = None + fixed_csub = None - return None + return r, fixed_csub rules_inventory = [ # Rules E1x @@ -846,15 +885,16 @@ def get_id(doc, col, include_entity_date): """ if col == 'entities': id_string = json.dumps(doc['_id']) - if 'modDate' in doc: - id_string = f"({datetime.fromtimestamp(doc['modDate']).strftime('%Y-%m-%dT%H:%M:%SZ')}) {id_string}" - else: - id_string = f"()) {id_string}" + if include_entity_date: + if 'modDate' in doc: + id_string = f"({datetime.fromtimestamp(doc['modDate']).strftime('%Y-%m-%dT%H:%M:%SZ')}) {id_string}" + else: + id_string = f"()) {id_string}" return f"entity {id_string}" else: # col == 'csubs' return f"subscription {doc['_id']}" -def process_db(logger, db_name, db_conn, include_entity_date, queries, rules_exp): +def process_db(logger, db_name, db_conn, include_entity_date, queries, rules_exp, autofix): """ Process an individual DB @@ -865,6 +905,7 @@ def process_db(logger, db_name, db_conn, include_entity_date, queries, rules_exp :param queries: dict with per-colletion queries to filter entities to be processed (the key in the dictionary is the collection to apply the query) :param rules_exp: regular expression to filter rules to apply + :param autofix: True if autofix is activated :return: fails """ @@ -890,6 +931,7 @@ def process_db(logger, db_name, db_conn, include_entity_date, queries, rules_exp rules.append(rule) # first process global rules + # NOTE: global rules doesn't accept autofix parameter for rule in rules: if rule['global']: col = rule['collection'] @@ -899,6 +941,10 @@ def process_db(logger, db_name, db_conn, include_entity_date, queries, rules_exp fails += 1 # second process not global rules, per collection + autofix = { + 'entities': [], + 'csubs': [] + } for col in ['entities', 'csubs']: for doc in db_conn[db_name][col].find(queries[col]): n[col] += 1 @@ -907,11 +953,13 @@ def process_db(logger, db_name, db_conn, include_entity_date, queries, rules_exp logger.debug(f'* processing {id_string}') for rule in rules: if not rule['global'] and rule['collection'] == col: - s = rule['func'](doc) + (s, fixed_doc) = rule['func'](doc) if s is not None: logger.warning(f'DB {db_name} {rule["label"]} violation for {id_string}: {s}') doc_fail = True fails += 1 + if fixed_doc is not None: + autofix[col].append(fixed_doc) if doc_fail: failed_docs[col] += 1 @@ -922,6 +970,8 @@ def process_db(logger, db_name, db_conn, include_entity_date, queries, rules_exp logger.info( f'processed {db_name} in collection {col}: {failed_docs[col]}/{n[col]} ({round(failed_docs[col] / n[col] * 100, 2)}%) failed docs') + # TODO: implement save logic for autofix dict + return fails @@ -944,10 +994,20 @@ def process_db(logger, db_name, db_conn, include_entity_date, queries, rules_exp 'all subscriptions in the collection will be checked. Applies to Rule Sxx rules.') parser.add_argument('--rules-exp', dest='rules_exp', help='Specifies the rules to apply, as a regular expression. By default all rules are applied.') + parser.add_argument('--autofix', dest='atutofix', action='store_true', + help='Applies some automatic fixes. Not for all rules. Check documentation. WARNING: this ' + 'operation may modify OrionDBs, use with care') parser.add_argument('--logLevel', dest='log_level', choices=['DEBUG', 'INFO', 'WARN', 'ERROR'], default='INFO', help='log level. Default is INFO') args = parser.parse_args() + if args.autofix: + print("WARNING!!!! Parameter --autofix has been activated, so this script may modify documents in Orion DBs (check documentation") + print("for details). These modifications cannot be undone. If you are sure you want to continue type 'yes' and press Enter") + confirm = input() + if (confirm != 'yes'): + sys.exit() + # sets the logging configuration logging.basicConfig( level=logging.getLevelName(args.log_level), @@ -971,7 +1031,7 @@ def process_db(logger, db_name, db_conn, include_entity_date, queries, rules_exp fails = 0 if args.db is not None: if args.db in db_names: - fails += process_db(logger, args.db, mongo_client, args.include_entities_date, queries, args.rules_exp) + fails += process_db(logger, args.db, mongo_client, args.include_entities_date, queries, args.rules_exp, args.autofix) else: logger.fatal(f'database {args.db} does not exist') sys.exit(1) @@ -979,6 +1039,6 @@ def process_db(logger, db_name, db_conn, include_entity_date, queries, rules_exp # Process all Orion databases for db_name in db_names: if db_name.startswith('orion-'): - fails += process_db(logger, db_name, mongo_client, args.include_entities_date, queries, args.rules_exp) + fails += process_db(logger, db_name, mongo_client, args.include_entities_date, queries, args.rules_exp, args.autofix) logger.info(f'total rule violations: {fails}') diff --git a/scripts/oriondb_consistency/test_oriondb_consistency.py b/scripts/oriondb_consistency/test_oriondb_consistency.py index 3ccf08237e..6181a9042e 100644 --- a/scripts/oriondb_consistency/test_oriondb_consistency.py +++ b/scripts/oriondb_consistency/test_oriondb_consistency.py @@ -56,4 +56,4 @@ def test_process_db(self): 'entities': {}, 'csubs': {} } - process_db(logger, 'orion-validation', mongo_client, False, queries, None) + process_db(logger, 'orion-validation', mongo_client, False, queries, None, False) From 21ec49f9ddb82d35eabee8c5861135c65de90a04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ferm=C3=ADn=20Gal=C3=A1n=20M=C3=A1rquez?= Date: Wed, 10 Apr 2024 13:23:41 +0200 Subject: [PATCH 4/7] FIX autofix updates in db --- .../oriondb_consistency.py | 70 +++++++++++-------- 1 file changed, 39 insertions(+), 31 deletions(-) diff --git a/scripts/oriondb_consistency/oriondb_consistency.py b/scripts/oriondb_consistency/oriondb_consistency.py index 7dd289d2a8..b9ab0efb85 100644 --- a/scripts/oriondb_consistency/oriondb_consistency.py +++ b/scripts/oriondb_consistency/oriondb_consistency.py @@ -23,7 +23,7 @@ __author__ = 'fermin' -from pymongo import MongoClient +from pymongo import MongoClient, ReplaceOne from deepdiff import DeepDiff from datetime import datetime import argparse @@ -746,6 +746,11 @@ def ruleS90(csub): return r, fixed_csub +collections_inventory = [ + 'entities', + 'csubs' +] + rules_inventory = [ # Rules E1x { @@ -910,17 +915,16 @@ def process_db(logger, db_name, db_conn, include_entity_date, queries, rules_exp """ logger.info(f'processing {db_name}') - n = { - 'entities': 0, - 'csubs': 0 - } - failed_docs = { - 'entities': 0, - 'csubs': 0 - } - fails = 0 - - for col in ['entities', 'csubs']: + n = {} + n_failed = {} + modified_docs = {} + for col in collections_inventory: + n[col] = 0 + n_failed[col] = 0 + modified_docs[col] = [] + n_fails = 0 + + for col in collections_inventory: if col not in db_conn[db_name].list_collection_names(): logger.warning(f'collection {col} not found in {db_name} database') @@ -930,22 +934,19 @@ def process_db(logger, db_name, db_conn, include_entity_date, queries, rules_exp if rules_exp is None or re.search(rules_exp, rule['label']): rules.append(rule) - # first process global rules - # NOTE: global rules doesn't accept autofix parameter + # first: process global rules for rule in rules: if rule['global']: col = rule['collection'] - s = rule['func'](db_conn[db_name][col]) + # FIXME: fixed_doc doesn't make sense for global rules (althoug it could be implemented in a more + # general way, returning an array, thinking in a future possible extension) + (s, fixed_doc) = rule['func'](db_conn[db_name][col]) if s is not None: logger.warning(f'DB {db_name} {rule["label"]} violation in {col} collection: {s}') - fails += 1 + n_fails += 1 - # second process not global rules, per collection - autofix = { - 'entities': [], - 'csubs': [] - } - for col in ['entities', 'csubs']: + # second: process not global rules, per collection + for col in collections_inventory: for doc in db_conn[db_name][col].find(queries[col]): n[col] += 1 doc_fail = False @@ -957,22 +958,29 @@ def process_db(logger, db_name, db_conn, include_entity_date, queries, rules_exp if s is not None: logger.warning(f'DB {db_name} {rule["label"]} violation for {id_string}: {s}') doc_fail = True - fails += 1 + n_fails += 1 if fixed_doc is not None: - autofix[col].append(fixed_doc) + modified_docs[col].append(fixed_doc) if doc_fail: - failed_docs[col] += 1 - + n_failed[col] += 1 - for col in ['entities', 'csubs']: + for col in collections_inventory: if n[col] > 0: logger.info( - f'processed {db_name} in collection {col}: {failed_docs[col]}/{n[col]} ({round(failed_docs[col] / n[col] * 100, 2)}%) failed docs') + f'processed {db_name} in collection {col}: {n_failed[col]}/{n[col]} ({round(n_failed[col] / n[col] * 100, 2)}%) failed docs') - # TODO: implement save logic for autofix dict + for col in collections_inventory: + bulk = [] + logger.debug(f'about to update in {col} collection: {modified_docs[col]}') + for doc in modified_docs[col]: + bulk.append(ReplaceOne({'_id': doc['_id']}, doc)) + logger.info(f'{len(bulk)} documents in {col} collection could be fixed') + if autofix and len(bulk) > 0: + logger.info(f'updating {len(bulk)} documents in {col} collection...') + db_conn[db_name][col].bulk_write(bulk) - return fails + return n_fails if __name__ == '__main__': @@ -994,7 +1002,7 @@ def process_db(logger, db_name, db_conn, include_entity_date, queries, rules_exp 'all subscriptions in the collection will be checked. Applies to Rule Sxx rules.') parser.add_argument('--rules-exp', dest='rules_exp', help='Specifies the rules to apply, as a regular expression. By default all rules are applied.') - parser.add_argument('--autofix', dest='atutofix', action='store_true', + parser.add_argument('--autofix', dest='autofix', action='store_true', help='Applies some automatic fixes. Not for all rules. Check documentation. WARNING: this ' 'operation may modify OrionDBs, use with care') parser.add_argument('--logLevel', dest='log_level', choices=['DEBUG', 'INFO', 'WARN', 'ERROR'], default='INFO', From b7ad3ac333cbbdf5ddb657fb247c41d065ea0766 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ferm=C3=ADn=20Gal=C3=A1n=20M=C3=A1rquez?= Date: Wed, 10 Apr 2024 13:37:52 +0200 Subject: [PATCH 5/7] FIX minor fix --- scripts/oriondb_consistency/oriondb_consistency.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/oriondb_consistency/oriondb_consistency.py b/scripts/oriondb_consistency/oriondb_consistency.py index b9ab0efb85..eefb2c306d 100644 --- a/scripts/oriondb_consistency/oriondb_consistency.py +++ b/scripts/oriondb_consistency/oriondb_consistency.py @@ -975,10 +975,11 @@ def process_db(logger, db_name, db_conn, include_entity_date, queries, rules_exp logger.debug(f'about to update in {col} collection: {modified_docs[col]}') for doc in modified_docs[col]: bulk.append(ReplaceOne({'_id': doc['_id']}, doc)) - logger.info(f'{len(bulk)} documents in {col} collection could be fixed') - if autofix and len(bulk) > 0: - logger.info(f'updating {len(bulk)} documents in {col} collection...') - db_conn[db_name][col].bulk_write(bulk) + if len(bulk) > 0: + logger.info(f'{len(bulk)} documents in {col} collection could be fixed') + if autofix: + logger.info(f'updating {len(bulk)} documents in {col} collection...') + db_conn[db_name][col].bulk_write(bulk) return n_fails From 4dbc1bf22d6c835fd2eb4de33fd552eb3ed19502 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ferm=C3=ADn=20Gal=C3=A1n=20M=C3=A1rquez?= Date: Wed, 10 Apr 2024 13:47:02 +0200 Subject: [PATCH 6/7] FIX minor fix --- scripts/oriondb_consistency/oriondb_consistency.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/oriondb_consistency/oriondb_consistency.py b/scripts/oriondb_consistency/oriondb_consistency.py index eefb2c306d..167fafa2be 100644 --- a/scripts/oriondb_consistency/oriondb_consistency.py +++ b/scripts/oriondb_consistency/oriondb_consistency.py @@ -976,9 +976,9 @@ def process_db(logger, db_name, db_conn, include_entity_date, queries, rules_exp for doc in modified_docs[col]: bulk.append(ReplaceOne({'_id': doc['_id']}, doc)) if len(bulk) > 0: - logger.info(f'{len(bulk)} documents in {col} collection could be fixed') + logger.warn(f'{len(bulk)} documents in {col} collection could be fixed') if autofix: - logger.info(f'updating {len(bulk)} documents in {col} collection...') + logger.warn(f'updating {len(bulk)} documents in {col} collection...') db_conn[db_name][col].bulk_write(bulk) return n_fails From 95a95a1b162bef13ac6412d290f590655776dff0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ferm=C3=ADn=20Gal=C3=A1n=20M=C3=A1rquez?= Date: Wed, 10 Apr 2024 13:47:54 +0200 Subject: [PATCH 7/7] FIX minor fix --- scripts/oriondb_consistency/oriondb_consistency.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/oriondb_consistency/oriondb_consistency.py b/scripts/oriondb_consistency/oriondb_consistency.py index 167fafa2be..db1cc84637 100644 --- a/scripts/oriondb_consistency/oriondb_consistency.py +++ b/scripts/oriondb_consistency/oriondb_consistency.py @@ -976,9 +976,9 @@ def process_db(logger, db_name, db_conn, include_entity_date, queries, rules_exp for doc in modified_docs[col]: bulk.append(ReplaceOne({'_id': doc['_id']}, doc)) if len(bulk) > 0: - logger.warn(f'{len(bulk)} documents in {col} collection could be fixed') + logger.warning(f'{len(bulk)} documents in {col} collection could be fixed') if autofix: - logger.warn(f'updating {len(bulk)} documents in {col} collection...') + logger.warning(f'updating {len(bulk)} documents in {col} collection...') db_conn[db_name][col].bulk_write(bulk) return n_fails