Source code for class_factory.concept_web.build_concept_map

"""Build and analyze concept maps from relationship data.

This module provides functionality to create, analyze and visualize concept maps
based on relationships between concepts extracted from educational content.

Functions:
    build_graph: Create a weighted graph from concept relationships.
    detect_communities: Identify concept clusters using various community detection algorithms.

The module supports both directed and undirected graphs, with features including:
- Edge weight normalization
- Node centrality calculation
- Community detection using multiple algorithms (leiden, louvain, spectral)
- Visualization preparation with node sizes and community labels
"""
import logging
import os
from pathlib import Path
from typing import List, Tuple

import networkx as nx
import networkx.algorithms.community as nx_comm
from cdlib import algorithms
from dotenv import load_dotenv
from sklearn.cluster import SpectralClustering

# %%
from class_factory.concept_web.concept_extraction import process_relationships

os.environ["CDLIB_WARNINGS"] = "ignore"

# base libraries

# graph setup
# env setup

# self-defined utils

load_dotenv()

# Path definitions
projectDir = Path(os.getenv('projectDir'))
dataDir = projectDir / "tests/data/"

# %%


[docs] def build_graph( processed_relationships: List[Tuple[str, str, str]], directed: bool = False ) -> nx.Graph | nx.DiGraph: """ Build a weighted (directed or undirected) graph from processed concept relationships. Args: processed_relationships (List[Tuple[str, str, str]]): List of (concept1, relationship, concept2) tuples. directed (bool, optional): If True, creates a directed graph. Defaults to False. Returns: nx.Graph | nx.DiGraph: Graph with node and edge attributes for visualization and analysis. Raises: ValueError: If relationships are not correctly formatted. """ # Initialize an undirected graph G = nx.DiGraph() if directed else nx.Graph() # Add nodes and edges from relationships for concept1, relationship, concept2 in processed_relationships: if relationship not in ["None", "none"]: if G.has_edge(concept1, concept2): # f"{concept1} -> {relationship} -> {concept2}") G[concept1][concept2]['relation'].add(relationship) G[concept1][concept2]['weight'] += 1 else: # [f"{concept1} -> {relationship} -> {concept2}"]) G.add_edge(concept1, concept2, weight=1, relation={relationship}) # Normalize edge weights and centrality edge_weights = nx.get_edge_attributes(G, 'weight').values() # Calculate min and max weights # Avoid division by zero max_weight = max(edge_weights) if edge_weights else 1 # Avoid division by zero min_weight = min(edge_weights) if edge_weights else 1 # Normalize edge weights min_normalized_weight = 0.5 max_normalized_weight = 4 try: for u, v, d in G.edges(data=True): normalized_weight = min_normalized_weight + (max_normalized_weight - min_normalized_weight) * \ (d['weight'] - min_weight) / (max_weight - min_weight) G[u][v]['normalized_weight'] = normalized_weight # Calculate degree centrality for each node if directed: centrality = nx.in_degree_centrality(G) else: centrality = nx.degree_centrality(G) # Normalize centrality to a range suitable for text size (e.g., 10 to 50) min_size = 6 max_size = 24 max_centrality = max(centrality.values()) min_centrality = min(centrality.values()) for node, centrality_value in centrality.items(): normalized_size = min_size + (max_size - min_size) * ( centrality_value - min_centrality) / (max_centrality - min_centrality) G.nodes[node]['text_size'] = normalized_size G.nodes[node]['centrality'] = centrality_value except ZeroDivisionError: # Log a warning that the graph could not be normalized logging.warning( "Normalization of weights and centrality skipped due to lack of variation in the graph.\nReturning unnormalized edge weight and text size") # Fall back to default sizes if normalization fails for node in G.nodes(): G.nodes[node]['text_size'] = 12 # Default text size G.nodes[node]['centrality'] = 0.5 # Default centrality return G
[docs] def detect_communities( G: nx.Graph | nx.DiGraph, method: str = "leiden", num_clusters: int | None = None ) -> nx.Graph | nx.DiGraph: """ Detect communities in a concept graph using the specified algorithm. Args: G (nx.Graph | nx.DiGraph): The input graph. method (str, optional): Community detection algorithm ('leiden', 'louvain', 'spectral'). Defaults to 'leiden'. num_clusters (int | None, optional): Number of clusters for spectral clustering. Defaults to None. Returns: nx.Graph | nx.DiGraph: Graph with 'community' node attributes. Raises: ValueError: If the specified method is not recognized. """ G_copy = G.copy() if method == "leiden": # Use Louvain method for community detection communities_obj = algorithms.leiden(G) # extract communities from 'nodeclustering' object communities = communities_obj.communities elif method == "louvain": # Use Louvain method for community detection communities = nx_comm.louvain_communities(G) elif method == "spectral": # Create a list of node names to maintain the order nodes = list(G.nodes()) # Create the adjacency matrix for the graph adj_matrix = nx.to_numpy_array(G, nodelist=nodes) # Apply spectral clustering sc = SpectralClustering(n_clusters=num_clusters, affinity='precomputed', assign_labels='kmeans') labels = sc.fit_predict(adj_matrix) # Group nodes by their cluster labels using node names instead of indices communities = [set() for _ in range(num_clusters)] for node, label in zip(nodes, labels): communities[label].add(node) else: raise ValueError( f"Unknown method: {method}. Choose 'louvain' or 'spectral'.") # Assign each node to its community for visualization for i, community in enumerate(communities): for node in community: # Assign a group ID for use in visualization G_copy.nodes[node]['community'] = i return G_copy
if __name__ == "__main__": import json with open(Path.home() / dataDir / 'conceptlist_test.json', 'r') as f: conceptlist = json.load(f) with open(Path.home() / dataDir / 'relationshiplist_test.json', 'r') as f: relationship_list = json.load(f) # Build the graph G_base = build_graph(relationship_list, directed=True) # Detect communities using Louvain method G = detect_communities(G_base, method="leiden") # %%