|
| 1 | +from cognee.infrastructure.engine import DataPoint |
| 2 | +from cognee.infrastructure.databases.relational import get_relational_engine |
| 3 | +from sqlalchemy import select |
| 4 | +from sqlalchemy.sql import func |
| 5 | +from cognee.modules.data.models import Data |
| 6 | +from cognee.modules.data.models import GraphMetrics |
| 7 | +import uuid |
| 8 | +from cognee.infrastructure.databases.graph import get_graph_engine |
| 9 | +from cognee.modules.pipelines.models import PipelineRun |
| 10 | + |
| 11 | + |
| 12 | +async def fetch_token_count(db_engine) -> int: |
| 13 | + """ |
| 14 | + Fetches and sums token counts from the database. |
| 15 | +
|
| 16 | + Returns: |
| 17 | + int: The total number of tokens across all documents. |
| 18 | + """ |
| 19 | + |
| 20 | + async with db_engine.get_async_session() as session: |
| 21 | + token_count_sum = await session.execute(select(func.sum(Data.token_count))) |
| 22 | + token_count_sum = token_count_sum.scalar() |
| 23 | + |
| 24 | + return token_count_sum |
| 25 | + |
| 26 | + |
| 27 | +async def store_descriptive_metrics(pipeline_runs: list[PipelineRun], include_optional: bool): |
| 28 | + db_engine = get_relational_engine() |
| 29 | + graph_engine = await get_graph_engine() |
| 30 | + |
| 31 | + metrics_for_pipeline_runs = [] |
| 32 | + |
| 33 | + async with db_engine.get_async_session() as session: |
| 34 | + for pipeline_run in pipeline_runs: |
| 35 | + existing_metrics = await session.execute( |
| 36 | + select(GraphMetrics).where(GraphMetrics.id == pipeline_run.pipeline_run_id) |
| 37 | + ) |
| 38 | + existing_metrics = existing_metrics.scalars().first() |
| 39 | + |
| 40 | + if existing_metrics: |
| 41 | + metrics_for_pipeline_runs.append(existing_metrics) |
| 42 | + else: |
| 43 | + graph_metrics = await graph_engine.get_graph_metrics(include_optional) |
| 44 | + metrics = GraphMetrics( |
| 45 | + id=pipeline_run.pipeline_run_id, |
| 46 | + num_tokens=await fetch_token_count(db_engine), |
| 47 | + num_nodes=graph_metrics["num_nodes"], |
| 48 | + num_edges=graph_metrics["num_edges"], |
| 49 | + mean_degree=graph_metrics["mean_degree"], |
| 50 | + edge_density=graph_metrics["edge_density"], |
| 51 | + num_connected_components=graph_metrics["num_connected_components"], |
| 52 | + sizes_of_connected_components=graph_metrics["sizes_of_connected_components"], |
| 53 | + num_selfloops=graph_metrics["num_selfloops"], |
| 54 | + diameter=graph_metrics["diameter"], |
| 55 | + avg_shortest_path_length=graph_metrics["avg_shortest_path_length"], |
| 56 | + avg_clustering=graph_metrics["avg_clustering"], |
| 57 | + ) |
| 58 | + metrics_for_pipeline_runs.append(metrics) |
| 59 | + session.add(metrics) |
| 60 | + await session.commit() |
| 61 | + |
| 62 | + return metrics_for_pipeline_runs |
0 commit comments