1 Star 0 Fork 3

algorithmofdish / gala-spider_rpm

forked from src-openEuler / gala-spider 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0001-cause-inference-optimization.patch 25.58 KB
一键复制 编辑 原始数据 按行查看 历史
algorithmofdish 提交于 2022-11-22 16:20 . cause inference optimization
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679
From ac48cfdcac8c93ba59754899237a37dac40659ac Mon Sep 17 00:00:00 2001
From: algorithmofdish <hexiujun1@huawei.com>
Date: Fri, 18 Nov 2022 11:07:47 +0800
Subject: [PATCH] feat(infer): cause inference optimization
1. categorize metrics and select one as cause with max abnormal score;
2. add virtual metric to complete the fault propagation path;
3. use abnormal score without nomarlization;
4. fix a bug where subgraph are incompletely obtained.
---
cause_inference/__main__.py | 4 +-
cause_inference/arangodb.py | 24 +++--
cause_inference/cause_infer.py | 11 +--
cause_inference/infer_policy.py | 84 ++++++++++++++----
cause_inference/model.py | 64 +++++++++++---
cause_inference/rule_parser.py | 149 ++++++++++++++++++++++++--------
config/infer-rule.yaml | 98 +++++++++++++++++----
7 files changed, 337 insertions(+), 97 deletions(-)
diff --git a/cause_inference/__main__.py b/cause_inference/__main__.py
index baddbe4..093f7ac 100644
--- a/cause_inference/__main__.py
+++ b/cause_inference/__main__.py
@@ -15,7 +15,7 @@ from cause_inference.config import init_infer_config
from cause_inference.model import AbnormalEvent
from cause_inference.cause_infer import cause_locating
from cause_inference.cause_infer import parse_abn_evt
-from cause_inference.cause_infer import normalize_abn_score
+from cause_inference.cause_infer import preprocess_abn_score
from cause_inference.rule_parser import rule_engine
from cause_inference.exceptions import InferenceException
from cause_inference.exceptions import DataParseException
@@ -172,7 +172,7 @@ def get_recommend_metric_evts(abn_kpi_data: dict) -> List[AbnormalEvent]:
metric_evt = AbnormalEvent(
timestamp=abn_kpi_data.get('Timestamp'),
abnormal_metric_id=metric_data.get('metric', ''),
- abnormal_score=normalize_abn_score(metric_data.get('score')),
+ abnormal_score=preprocess_abn_score(metric_data.get('score')),
metric_labels=metric_data.get('label', {}),
desc=metric_data.get('description', '')
)
diff --git a/cause_inference/arangodb.py b/cause_inference/arangodb.py
index 424e500..d7982b7 100644
--- a/cause_inference/arangodb.py
+++ b/cause_inference/arangodb.py
@@ -22,6 +22,14 @@ def connect_to_arangodb(arango_url, db_name):
return conn.databases[db_name]
+def query_all(db, aql_query, bind_vars=None, raw_results=True):
+ res = []
+ query_hdl = db.AQLQuery(aql_query, bindVars=bind_vars, rawResults=raw_results)
+ for item in query_hdl:
+ res.append(item)
+ return res
+
+
def query_recent_topo_ts(db: Database, ts):
bind_vars = {'@collection': _TIMESTAMP_COLL_NAME, 'ts': ts}
aql_query = '''
@@ -32,12 +40,12 @@ def query_recent_topo_ts(db: Database, ts):
RETURN t._key
'''
try:
- query_res = db.AQLQuery(aql_query, bindVars=bind_vars, rawResults=True).response
+ query_res = query_all(db, aql_query, bind_vars)
except AQLQueryError as ex:
raise DBException(ex) from ex
- if query_res.get('error') or not query_res.get('result'):
+ if len(query_res) == 0:
raise DBException('Can not find topological graph at the abnormal timestamp {}'.format(ts))
- last_ts = query_res.get('result')[0]
+ last_ts = query_res[0]
return int(last_ts)
@@ -56,12 +64,12 @@ def query_topo_entities(db: Database, ts, query_options=None):
return v
'''.format(filter_str)
try:
- query_res = db.AQLQuery(aql_query, bindVars=bind_vars, rawResults=True).response
+ query_res = query_all(db, aql_query, bind_vars)
except AQLQueryError as ex:
raise DBException(ex) from ex
- if query_res.get('error') or not query_res.get('result'):
+ if len(query_res) == 0:
raise DBException('Can not find observe entities satisfied.')
- return query_res.get('result')
+ return query_res
def query_subgraph(db, ts, start_node_id, edge_collection, depth=1):
@@ -83,12 +91,12 @@ def query_subgraph(db, ts, start_node_id, edge_collection, depth=1):
return {{"vertex": v, "edge": e}}
'''.format(edge_coll_str)
try:
- query_res = db.AQLQuery(aql_query, bindVars=bind_vars, rawResults=True).response
+ query_res = query_all(db, aql_query, bind_vars)
except AQLQueryError as ex:
raise DBException(ex) from ex
vertices = {}
edges = {}
- for item in query_res.get('result'):
+ for item in query_res:
vertex = item.get('vertex')
edge = item.get('edge')
vertices.setdefault(vertex.get('_id'), vertex)
diff --git a/cause_inference/cause_infer.py b/cause_inference/cause_infer.py
index 1954abf..dff26d0 100644
--- a/cause_inference/cause_infer.py
+++ b/cause_inference/cause_infer.py
@@ -23,8 +23,8 @@ from cause_inference.infer_policy import InferPolicy
from cause_inference.infer_policy import get_infer_policy
-def normalize_abn_score(score):
- return expit(score)
+def preprocess_abn_score(score):
+ return max(0, score)
# 因果推理
@@ -172,9 +172,10 @@ def cause_locating(abnormal_kpi: AbnormalEvent, abnormal_metrics: List[AbnormalE
# 5. 生成异常指标之间的因果图
causal_graph.init_metric_cause_graph()
- logger.logger.debug("Causal graph nodes are: {}".format(causal_graph.entity_cause_graph.nodes))
- logger.logger.debug("Causal graph predecessors: {}".format(causal_graph.entity_cause_graph.pred))
- logger.logger.debug("Causal graph successors: {}".format(causal_graph.entity_cause_graph.succ))
+ logger.logger.debug("Entity cause graph nodes are: {}".format(causal_graph.entity_cause_graph.nodes))
+ logger.logger.debug("Entity cause graph edges are: {}".format(causal_graph.entity_cause_graph.edges))
+ logger.logger.debug("Metric cause graph nodes are: {}".format(causal_graph.metric_cause_graph.nodes))
+ logger.logger.debug("Metric cause graph edges are: {}".format(causal_graph.metric_cause_graph.edges))
# 6. 以故障传播图 + 异常KPI为输入,执行根因推导算法,输出 topK 根因指标
infer_policy = get_infer_policy(infer_config.infer_conf.get('infer_policy'))
diff --git a/cause_inference/infer_policy.py b/cause_inference/infer_policy.py
index c0952e7..0663e74 100644
--- a/cause_inference/infer_policy.py
+++ b/cause_inference/infer_policy.py
@@ -5,9 +5,10 @@ from typing import List
import networkx as nx
-from cause_inference.cause_infer import CausalGraph
-from cause_inference.cause_infer import Cause
+from cause_inference.model import CausalGraph
+from cause_inference.model import Cause
from cause_inference.exceptions import InferenceException
+from cause_inference import rule_parser
class InferPolicy(ABC):
@@ -114,22 +115,28 @@ class DfsPolicy(InferPolicy):
if length < 1:
return 0.0
total_score = 0.0
+ num_of_valid_node = 0
for node in path[:length]:
+ if is_virtual_node(node):
+ continue
total_score += cause_graph.nodes[node].get('abnormal_score', 0)
- if length != 0:
- total_score /= length
+ num_of_valid_node += 1
+ if num_of_valid_node != 0:
+ total_score /= num_of_valid_node
return total_score
- def infer(self, causal_graph: CausalGraph, top_k: int) -> List[Cause]:
- cause_graph = causal_graph.metric_cause_graph
- abn_node_id = (causal_graph.entity_id_of_abn_kpi, causal_graph.abnormal_kpi.abnormal_metric_id)
-
- reverse_graph = nx.DiGraph()
- reverse_graph.add_nodes_from(cause_graph.nodes)
+ @staticmethod
+ def reverse_graph(cause_graph):
+ reversed_graph = nx.DiGraph()
+ reversed_graph.add_nodes_from(cause_graph.nodes)
for from_, to in cause_graph.edges:
- reverse_graph.add_edge(to, from_)
+ reversed_graph.add_edge(to, from_)
+ return reversed_graph
- successors = nx.dfs_successors(reverse_graph, abn_node_id)
+ @staticmethod
+ def get_all_paths_to_abn_node(abn_node_id, cause_graph):
+ reversed_graph = DfsPolicy.reverse_graph(cause_graph)
+ successors = nx.dfs_successors(reversed_graph, abn_node_id)
paths = []
path = []
@@ -145,24 +152,63 @@ class DfsPolicy(InferPolicy):
path.append(abn_node_id)
dfs_path(abn_node_id)
+ return paths
+ @staticmethod
+ def get_scored_paths(cause_graph, paths) -> list:
scored_paths = []
- for p in paths:
+ for path in paths:
scored_paths.append({
- 'score': self.calc_path_score(p, cause_graph),
- 'path': p
+ 'score': DfsPolicy.calc_path_score(path, cause_graph),
+ 'path': path
})
- scored_paths = sorted(scored_paths, key=lambda k: k['score'], reverse=True)
- scored_paths = scored_paths[:top_k]
+ return scored_paths
+
+ @staticmethod
+ def get_top_paths(scored_paths, top_k) -> list:
+ top_paths = []
+ node_selected = set()
+ metric_selected = set()
+ for path in scored_paths:
+ if len(top_paths) == top_k:
+ break
+ cause_node_id = path.get('path')[0]
+ if cause_node_id in node_selected:
+ continue
+ if cause_node_id[1] in metric_selected:
+ continue
+ if is_virtual_node(cause_node_id):
+ continue
+ node_selected.add(cause_node_id)
+ metric_selected.add(cause_node_id[1])
+ top_paths.append(path)
+
+ return top_paths
+ @staticmethod
+ def get_top_causes(top_paths) -> List[Cause]:
res = []
- for item in scored_paths:
+ for item in top_paths:
cause_node_id = item.get('path')[0]
cause = Cause(cause_node_id[1], cause_node_id[0], item.get('score'), item.get('path'))
res.append(cause)
-
return res
+ def infer(self, causal_graph: CausalGraph, top_k: int) -> List[Cause]:
+ cause_graph = causal_graph.metric_cause_graph
+ abn_node_id = (causal_graph.entity_id_of_abn_kpi, causal_graph.abnormal_kpi.abnormal_metric_id)
+
+ paths = self.get_all_paths_to_abn_node(abn_node_id, cause_graph)
+ scored_paths = self.get_scored_paths(cause_graph, paths)
+ scored_paths = sorted(scored_paths, key=lambda k: k['score'], reverse=True)
+ top_paths = self.get_top_paths(scored_paths, top_k)
+
+ return self.get_top_causes(top_paths)
+
+
+def is_virtual_node(node_id) -> bool:
+ return rule_parser.is_virtual_metric(node_id[1])
+
def get_infer_policy(policy: str, **options) -> InferPolicy:
if policy == 'dfs':
diff --git a/cause_inference/model.py b/cause_inference/model.py
index eb473da..136e4c3 100644
--- a/cause_inference/model.py
+++ b/cause_inference/model.py
@@ -7,6 +7,7 @@ from spider.conf.observe_meta import ObserveMetaMgt
from spider.util import logger
from spider.util.entity import concate_entity_id
from spider.util.entity import escape_entity_id
+from cause_inference import rule_parser
class AbnormalEvent:
@@ -77,6 +78,10 @@ class CausalGraph:
self.metric_cause_graph = nx.DiGraph()
self.init_casual_graph()
+ @staticmethod
+ def is_virtual_metric_group(metric_group) -> bool:
+ return len(metric_group) == 1 and rule_parser.is_virtual_metric(metric_group[0])
+
def init_casual_graph(self):
for node_id, node_attrs in self.topo_nodes.items():
self.entity_cause_graph.add_node(node_id, **node_attrs)
@@ -134,19 +139,54 @@ class CausalGraph:
self.init_metric_edge(edge)
def init_metric_edge(self, entity_edge):
- from_entity_id = entity_edge[0]
- to_entity_id = entity_edge[1]
+ f_entity_id = entity_edge[0]
+ t_entity_id = entity_edge[1]
+ avail_relations = self.get_avail_metric_causal_relations(entity_edge)
+
+ unique = set()
+ for f_metric_group, t_metric_group in avail_relations:
+ if self.is_virtual_metric_group(f_metric_group):
+ self.add_virtual_metric_node(f_entity_id, f_metric_group[0])
+ if self.is_virtual_metric_group(t_metric_group):
+ self.add_virtual_metric_node(t_entity_id, t_metric_group[0])
+
+ f_metric_id = self.metric_with_largest_abn_score(f_metric_group, f_entity_id)
+ t_metric_id = self.metric_with_largest_abn_score(t_metric_group, t_entity_id)
+ if (f_metric_id, t_metric_id) not in unique:
+ self.metric_cause_graph.add_edge((f_entity_id, f_metric_id), (t_entity_id, t_metric_id))
+ unique.add((f_metric_id, t_metric_id))
+
+ def add_virtual_metric_node(self, entity_id, metric_id):
+ self.metric_cause_graph.add_node((entity_id, metric_id))
+
+ def get_avail_metric_causal_relations(self, entity_edge):
+ f_metric_ids = self.get_abn_metric_ids(entity_edge[0])
+ t_metric_ids = self.get_abn_metric_ids(entity_edge[1])
rule_meta = self.entity_cause_graph.edges[entity_edge].get('rule_meta')
- for from_metric_evt in self.get_abnormal_metrics_of_node(from_entity_id):
- for to_metric_evt in self.get_abnormal_metrics_of_node(to_entity_id):
- from_metric_id = from_metric_evt.abnormal_metric_id
- to_metric_id = to_metric_evt.abnormal_metric_id
- if rule_meta is not None and not rule_meta.check_metric_pair(from_metric_id, to_metric_id):
- continue
- self.metric_cause_graph.add_edge(
- (from_entity_id, from_metric_id),
- (to_entity_id, to_metric_id)
- )
+ return rule_meta.get_avail_causal_relations(f_metric_ids, t_metric_ids)
+
+ def get_abn_metric_ids(self, entity_id):
+ metric_evts = self.get_abnormal_metrics_of_node(entity_id)
+ return [evt.abnormal_metric_id for evt in metric_evts]
+
+ def metric_with_largest_abn_score(self, metric_group: list, entity_id) -> str:
+ if len(metric_group) == 1:
+ return metric_group[0]
+
+ metric_evt_map = {}
+ metric_evts = self.get_abnormal_metrics_of_node(entity_id)
+ for metric_evt in metric_evts:
+ metric_evt_map.setdefault(metric_evt.abnormal_metric_id, metric_evt)
+
+ metric_id_of_largest = metric_group[0]
+ largest_abn_score = metric_evt_map.get(metric_id_of_largest).abnormal_score
+ for metric_id in metric_group:
+ abn_score = metric_evt_map.get(metric_id).abnormal_score
+ if abn_score > largest_abn_score:
+ metric_id_of_largest = metric_id
+ largest_abn_score = abn_score
+
+ return metric_id_of_largest
class Cause:
diff --git a/cause_inference/rule_parser.py b/cause_inference/rule_parser.py
index f08ed3c..eb6227c 100644
--- a/cause_inference/rule_parser.py
+++ b/cause_inference/rule_parser.py
@@ -1,7 +1,7 @@
import os
from abc import ABCMeta
from abc import abstractmethod
-from typing import List
+from typing import List, Dict, Tuple
import yaml
@@ -9,33 +9,86 @@ from spider.conf.observe_meta import RelationType
from spider.conf.observe_meta import EntityType
from spider.util import logger
+METRIC_CATEGORY_ALL = 'ALL'
+METRIC_CATEGORY_OTHER = 'OTHER'
+METRIC_CATEGORY_VIRTUAL = 'VIRTUAL'
+METRIC_ID_OF_CATEGORY_VIRTUAL = 'virtual_metric'
-class MetricPairSet:
- def __init__(self, from_: set, to_: set):
+
+def is_virtual_metric(metric_id: str) -> bool:
+ return metric_id == METRIC_ID_OF_CATEGORY_VIRTUAL
+
+
+class MetricCategoryPair:
+ def __init__(self, from_: str, to_: str):
self.from_ = from_
self.to_ = to_
- def check_metric_pair(self, from_metric_id: str, to_metric_id: str) -> bool:
- if self.from_ and from_metric_id not in self.from_:
- return False
- if self.to_ and to_metric_id not in self.to_:
- return False
- return True
-
class RuleMeta:
- def __init__(self, from_type, to_type, metric_range=None):
+ def __init__(self, from_type, to_type, from_categories=None, to_categories=None, metric_range=None):
self.from_type = from_type
self.to_type = to_type
- self.metric_range: List[MetricPairSet] = metric_range or []
-
- def check_metric_pair(self, from_metric_id: str, to_metric_id: str) -> bool:
- if not self.metric_range:
- return True
- for item in self.metric_range:
- if item.check_metric_pair(from_metric_id, to_metric_id):
- return True
- return False
+ self.from_categories = from_categories or {}
+ self.to_categories = to_categories or {}
+ self.category_pairs: List[MetricCategoryPair] = metric_range or []
+
+ @staticmethod
+ def aggregate_metric_from_groups(category_type, metric_groups) -> List[list]:
+ res = []
+ if category_type == METRIC_CATEGORY_ALL:
+ for cate_type, metric_group in metric_groups.items():
+ if cate_type == METRIC_CATEGORY_VIRTUAL:
+ continue
+ elif cate_type == METRIC_CATEGORY_OTHER:
+ res.extend([metric] for metric in metric_group)
+ else:
+ res.append(metric_group)
+ else:
+ metric_group = metric_groups.get(category_type)
+ if metric_group:
+ res.append(metric_group)
+
+ return res
+
+ @staticmethod
+ def _group_metric_by_category(metrics, categories) -> Dict[str, list]:
+ parts = {}
+ parted_metrics = set()
+ for cate_type, cate_metrics in categories.items():
+ part = []
+ for metric in metrics:
+ if metric in cate_metrics:
+ part.append(metric)
+ parted_metrics.add(metric)
+ if len(part) > 0:
+ parts.setdefault(cate_type, part)
+
+ other_part = []
+ for metric in metrics:
+ if metric not in parted_metrics:
+ other_part.append(metric)
+ if len(other_part) > 0:
+ parts.setdefault(METRIC_CATEGORY_OTHER, other_part)
+
+ virtual_part = [METRIC_ID_OF_CATEGORY_VIRTUAL]
+ parts.setdefault(METRIC_CATEGORY_VIRTUAL, virtual_part)
+
+ return parts
+
+ def get_avail_causal_relations(self, real_from_metrics, real_to_metrics) -> List[Tuple[list, list]]:
+ causal_relations = []
+
+ from_groups = self._group_metric_by_category(real_from_metrics, self.from_categories)
+ to_groups = self._group_metric_by_category(real_to_metrics, self.to_categories)
+ for cate_pair in self.category_pairs:
+ all_from_metrics = self.aggregate_metric_from_groups(cate_pair.from_, from_groups)
+ all_to_metrics = self.aggregate_metric_from_groups(cate_pair.to_, to_groups)
+ for from_metrics in all_from_metrics:
+ for to_metrics in all_to_metrics:
+ causal_relations.append((from_metrics, to_metrics))
+
+ return causal_relations
class Rule(metaclass=ABCMeta):
@@ -180,7 +233,8 @@ class NicRule1(Rule):
class RuleEngine:
def __init__(self):
self.rules: List[Rule] = []
- self.rule_metas = {}
+ self.metric_categories = {}
+ self.rule_metas: Dict[tuple, RuleMeta] = {}
def add_rule(self, rule: Rule):
self.rules.append(rule)
@@ -190,33 +244,58 @@ class RuleEngine:
rule.rule_parsing(causal_graph)
def load_rule_meta_from_yaml(self, rule_path: str) -> bool:
- abs_rule_path = os.path.abspath(rule_path)
- if not os.path.exists(abs_rule_path):
- logger.logger.warning("Rule meta path '{}' not exist", abs_rule_path)
- return True
try:
- with open(abs_rule_path, 'r') as file:
+ with open(os.path.abspath(rule_path), 'r') as file:
data = yaml.safe_load(file)
except IOError as ex:
logger.logger.warning(ex)
return False
- infer_rules = data.get("infer_rules", [])
- for rule_meta in infer_rules:
- saved_metric_range = []
- for item in rule_meta.get("metric_range", []):
- saved_metric_range.append(MetricPairSet(set(item.get('from', [])), set(item.get('to', []))))
- saved_rule_meta = RuleMeta(rule_meta.get('from_type'), rule_meta.get('to_type'), saved_metric_range)
- self.rule_metas.setdefault((rule_meta.get("from_type"), rule_meta.get("to_type")), saved_rule_meta)
-
+ self.load_rule_meta_from_dict(data)
return True
+ def create_default_rule_meta(self, from_type, to_type):
+ return RuleMeta(
+ from_type,
+ to_type,
+ self.metric_categories.get(from_type),
+ self.metric_categories.get(to_type),
+ [MetricCategoryPair(METRIC_CATEGORY_ALL, METRIC_CATEGORY_ALL)]
+ )
+
def add_rule_meta(self, causal_graph):
entity_cause_graph = causal_graph.entity_cause_graph
for edge in entity_cause_graph.edges:
from_type = entity_cause_graph.nodes[edge[0]].get('type')
to_type = entity_cause_graph.nodes[edge[1]].get('type')
- entity_cause_graph.edges[edge]["rule_meta"] = self.rule_metas.get((from_type, to_type))
+ rule_meta = self.rule_metas.get((from_type, to_type))
+ if not rule_meta:
+ rule_meta = self.create_default_rule_meta(from_type, to_type)
+ entity_cause_graph.edges[edge]["rule_meta"] = rule_meta
+
+ def load_rule_meta_from_dict(self, data: dict):
+ self.load_metric_categories(data.get('metric_categories', {}))
+ self.load_infer_rules(data.get("infer_rules", []))
+
+ def load_metric_categories(self, metric_categories: dict):
+ for entity_type, categories in metric_categories.items():
+ category_dict = {}
+ for category in categories:
+ category_dict.setdefault(category.get('category'), category.get('metrics'))
+ self.metric_categories.setdefault(entity_type, category_dict)
+
+ def load_infer_rules(self, infer_rules: list):
+ for rule_meta in infer_rules:
+ from_entity_type = rule_meta.get('from_type')
+ to_entity_type = rule_meta.get('to_type')
+ saved_metric_range = []
+ for item in rule_meta.get("metric_range", []):
+ from_category = item.get('from')
+ to_category = item.get('to')
+ saved_metric_range.append(MetricCategoryPair(from_category, to_category))
+ saved_rule_meta = RuleMeta(from_entity_type, to_entity_type, self.metric_categories.get(from_entity_type),
+ self.metric_categories.get(to_entity_type), saved_metric_range)
+ self.rule_metas.setdefault((from_entity_type, to_entity_type), saved_rule_meta)
rule_engine = RuleEngine()
diff --git a/config/infer-rule.yaml b/config/infer-rule.yaml
index c3bc3f3..d287972 100644
--- a/config/infer-rule.yaml
+++ b/config/infer-rule.yaml
@@ -1,32 +1,98 @@
+metric_categories:
+ proc:
+ -
+ category: PROC_CPU
+ metrics:
+ - gala_gopher_proc_utime_jiffies
+ - gala_gopher_proc_stime_jiffies
+ -
+ category: PROC_IO_LOAD
+ metrics:
+ - gala_gopher_proc_read_bytes
+ - gala_gopher_proc_write_bytes
+ - gala_gopher_proc_less_4k_io_read
+ - gala_gopher_proc_less_4k_io_write
+ - gala_gopher_proc_greater_4k_io_read
+ - gala_gopher_proc_greater_4k_io_write
+ disk:
+ -
+ category: DISK_LOAD
+ metrics:
+ - gala_gopher_disk_rspeed_kB
+ - gala_gopher_disk_wspeed_kB
+ - gala_gopher_disk_rspeed
+ - gala_gopher_disk_wspeed
+ -
+ category: DISK_DELAY
+ metrics:
+ - gala_gopher_disk_r_await
+ - gala_gopher_disk_w_await
+ - gala_gopher_disk_rareq
+ - gala_gopher_disk_wareq
+ block:
+ -
+ category: BLOCK_DELAY
+ metrics:
+ - gala_gopher_block_latency_req_max
+ - gala_gopher_block_latency_req_last
+ - gala_gopher_block_latency_req_sum
+ - gala_gopher_block_latency_req_jitter
+ nic:
+ -
+ category: NIC_DROP
+ metrics:
+ - gala_gopher_nic_tc_sent_drop
+ - gala_gopher_nic_tx_dropped
+ - gala_gopher_nic_rx_dropped
+ cpu:
+ -
+ category: CPU_TOTAL
+ metrics:
+ - gala_gopher_cpu_total_used_per
+
infer_rules:
-
from_type: cpu
to_type: proc
metric_range:
-
- from: []
- to:
- - gala_gopher_proc_utime_jiffies
- - gala_gopher_proc_stime_jiffies
+ from: CPU_TOTAL
+ to: PROC_CPU
-
from_type: block
to_type: proc
metric_range:
-
- from: []
- to:
- - gala_gopher_proc_ns_ext4_read
- - gala_gopher_proc_ns_ext4_write
- - gala_gopher_proc_ns_ext4_flush
- - gala_gopher_proc_ns_overlay_read
- - gala_gopher_proc_ns_overlay_write
- - gala_gopher_proc_ns_overlay_flush
+ from: BLOCK_DELAY
+ to: VIRTUAL
-
from_type: proc
to_type: disk
metric_range:
-
- from:
- - gala_gopher_proc_read_bytes
- - gala_gopher_proc_write_bytes
- to: []
\ No newline at end of file
+ from: PROC_IO_LOAD
+ to: ALL
+ -
+ from_type: disk
+ to_type: block
+ metric_range:
+ -
+ from: DISK_DELAY
+ to: BLOCK_DELAY
+ -
+ from_type: proc
+ to_type: sli
+ metric_range:
+ -
+ from: VIRTUAL
+ to: ALL
+ -
+ from: PROC_CPU
+ to: ALL
+ -
+ from_type: tcp_link
+ to_type: proc
+ metric_range:
+ -
+ from: ALL
+ to: VIRTUAL
\ No newline at end of file
--
2.21.0.windows.1
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/algorithmofdish/gala-spider_rpm.git
git@gitee.com:algorithmofdish/gala-spider_rpm.git
algorithmofdish
gala-spider_rpm
gala-spider_rpm
master

搜索帮助

344bd9b3 5694891 D2dac590 5694891