Skip to content

Commit 6d7a68d

Browse files
alekszievrdexters1borisarzentar
authoredMar 3, 2025
Feat: Store descriptive metrics identified by pipeline run id [cog-1260] (#582)
<!-- .github/pull_request_template.md --> ## Description <!-- Provide a clear description of the changes in this PR --> ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Introduced a new analytic capability that calculates descriptive graph metrics for pipeline runs when enabled. - Updated the execution flow to include an option for activating the graph metrics step. - **Chores** - Removed the previous mechanism for storing descriptive metrics to streamline the system. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: Igor Ilic <30923996+dexters1@users.noreply.github.com> Co-authored-by: Boris <boris@topoteretes.com>
1 parent 10e4bfb commit 6d7a68d

File tree

5 files changed

+69
-54
lines changed

5 files changed

+69
-54
lines changed
 

‎cognee/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from cognee.modules.visualization.cognee_network_visualization import (
99
cognee_network_visualization,
1010
)
11+
from .modules.data.operations.get_pipeline_run_metrics import get_pipeline_run_metrics
1112

1213
# Pipelines
1314
from .modules import pipelines

‎cognee/modules/data/methods/__init__.py

-2
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,3 @@
1111
# Delete
1212
from .delete_dataset import delete_dataset
1313
from .delete_data import delete_data
14-
15-
from .store_descriptive_metrics import store_descriptive_metrics

‎cognee/modules/data/methods/store_descriptive_metrics.py

-50
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
from cognee.infrastructure.databases.relational import get_relational_engine
2+
from sqlalchemy import select
3+
from sqlalchemy.sql import func
4+
from cognee.modules.data.models import Data
5+
from cognee.modules.data.models import GraphMetrics
6+
from cognee.infrastructure.databases.graph import get_graph_engine
7+
from cognee.modules.pipelines.models import PipelineRun
8+
9+
10+
async def fetch_token_count(db_engine) -> int:
11+
"""
12+
Fetches and sums token counts from the database.
13+
14+
Returns:
15+
int: The total number of tokens across all documents.
16+
"""
17+
18+
async with db_engine.get_async_session() as session:
19+
token_count_sum = await session.execute(select(func.sum(Data.token_count)))
20+
token_count_sum = token_count_sum.scalar()
21+
22+
return token_count_sum
23+
24+
25+
async def get_pipeline_run_metrics(pipeline_runs: list[PipelineRun], include_optional: bool):
26+
db_engine = get_relational_engine()
27+
graph_engine = await get_graph_engine()
28+
29+
metrics_for_pipeline_runs = []
30+
31+
async with db_engine.get_async_session() as session:
32+
for pipeline_run in pipeline_runs:
33+
existing_metrics = await session.execute(
34+
select(GraphMetrics).where(GraphMetrics.id == pipeline_run.pipeline_run_id)
35+
)
36+
existing_metrics = existing_metrics.scalars().first()
37+
38+
if existing_metrics:
39+
metrics_for_pipeline_runs.append(existing_metrics)
40+
else:
41+
graph_metrics = await graph_engine.get_graph_metrics(include_optional)
42+
metrics = GraphMetrics(
43+
id=pipeline_run.pipeline_run_id,
44+
num_tokens=await fetch_token_count(db_engine),
45+
num_nodes=graph_metrics["num_nodes"],
46+
num_edges=graph_metrics["num_edges"],
47+
mean_degree=graph_metrics["mean_degree"],
48+
edge_density=graph_metrics["edge_density"],
49+
num_connected_components=graph_metrics["num_connected_components"],
50+
sizes_of_connected_components=graph_metrics["sizes_of_connected_components"],
51+
num_selfloops=graph_metrics["num_selfloops"],
52+
diameter=graph_metrics["diameter"],
53+
avg_shortest_path_length=graph_metrics["avg_shortest_path_length"],
54+
avg_clustering=graph_metrics["avg_clustering"],
55+
)
56+
metrics_for_pipeline_runs.append(metrics)
57+
session.add(metrics)
58+
await session.commit()
59+
60+
return metrics_for_pipeline_runs

‎examples/python/dynamic_steps_example.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -180,10 +180,15 @@ async def main(enable_steps):
180180

181181
# Step 3: Create knowledge graph
182182
if enable_steps.get("cognify"):
183-
await cognee.cognify()
183+
pipeline_run = await cognee.cognify()
184184
print("Knowledge graph created.")
185185

186-
# Step 4: Query insights
186+
# Step 4: Calculate descriptive metrics
187+
if enable_steps.get("graph_metrics"):
188+
await cognee.get_pipeline_run_metrics(pipeline_run, include_optional=True)
189+
print("Descriptive graph metrics saved to database.")
190+
191+
# Step 5: Query insights
187192
if enable_steps.get("retriever"):
188193
search_results = await cognee.search(
189194
query_type=SearchType.GRAPH_COMPLETION, query_text="Who has experience in design tools?"
@@ -201,6 +206,7 @@ async def main(enable_steps):
201206
"prune_system": rebuild_kg,
202207
"add_text": rebuild_kg,
203208
"cognify": rebuild_kg,
209+
"graph_metrics": rebuild_kg,
204210
"retriever": retrieve,
205211
}
206212

0 commit comments

Comments
 (0)