Finding all directed paths in networkx and saving them as a dataframe

Question:

I need to find all directed paths in a network as shown in the sample, and save the directed paths in a new dataframe.

Sample:

import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

sample_dict = {
    'target': ['A', 'A', 'B', 'B', 'F'],
    'source': ['B', 'E', 'C', 'D', 'G'],
}

sample_data = pd.DataFrame(sample_dict)

G = nx.from_pandas_edgelist(sample_data,
                         source='source',
                         target='target',
                         create_using=nx.DiGraph())

pos = nx.spring_layout(G)
nx.draw(G, pos, with_labels=True)
plt.show()

enter image description here

I have tired with nx.weakly_connected_components, but i don’t know how to account for direction.

d = {}
for c in nx.weakly_connected_components(G):
    path= ','.join(sorted(c))
    for n in c:
        d[n] = path
attempt_data = pd.DataFrame(d.items())


    0   1
0   A   A,B,C,D,E
1   C   A,B,C,D,E
2   D   A,B,C,D,E
3   E   A,B,C,D,E
4   B   A,B,C,D,E
5   G   F,G
6   F   F,G

Desired output:

desired_dict = {
    'unit': ['A', 'A', 'A', 'B', 'B', 'C', 'D', 'E', 'F', 'G'],
    'group': ['A,B,C', 'A,B,D', 'A,E', 'A,B,C', 'A,B,D', 'A,B,C', 'A,B,D', 'A,E', 'F,G', 'F,G']
}

desired_data = pd.DataFrame(desired_dict)
print(desired_data)

  unit  group
0   A   A,B,C
1   A   A,B,D
2   A   A,E
3   B   A,B,C
4   B   A,B,D
5   C   A,B,C
6   D   A,B,D
7   E   A,E
8   F   F,G
9   G   F,G
Asked By: hagder

||

Answers:

I will try to give a solution "ugly". The steps are commented for explaining the codes. But I have used a lot of for loops, if someone can improve this, I will be appreciated.

# Find the sources and targets nodes with degrees
sources = [x for x in G.nodes() if G.out_degree(x)==1 and G.in_degree(x)==0]
targets = [x for x in G.nodes() if G.out_degree(x)==0 and G.in_degree(x)>=1]


# Generate all the paths with the sources and targets
paths = []
for source_node in sources:
    for target_node in targets:
        path = list(nx.all_simple_paths(G, source=source_node, target=target_node))
        if len(path) > 0:
            paths.append(path[0])
            

# Find the corresponding path to the node             
unit_list = []
group_list = []

for node in G.nodes():
    for path in paths: 
        if node in path:
            unit_list.append(node)
            group_list.append(','.join(reversed(path)))

# Sort the output with the order of the nodes

sorted_list = list(zip(*sorted(zip(unit_list, group_list))))

desired_dict  = {'unit' : sorted_list[0],
                 'group' : sorted_list[1]}

desired_data = pd.DataFrame(desired_dict)

print(desired_data)

The result is as you wished:

 unit  group
0    A  A,B,C
1    A  A,B,D
2    A    A,E
3    B  A,B,C
4    B  A,B,D
5    C  A,B,C
6    D  A,B,D
7    E    A,E
8    F    F,G
9    G    F,G
Answered By: HMH1013
# Find the sources and targets nodes with degrees
sources = [x for x in G.nodes() if G.out_degree(x)==1 and G.in_degree(x)==0]
targets = [x for x in G.nodes() if G.out_degree(x)==0 and G.in_degree(x)>=1]


# Generate all the paths with the sources and targets
paths = []
for source_node in sources:
    for target_node in targets:
        path = list(nx.all_simple_paths(G, source=source_node,             
target=target_node))
    if len(path) > 0:
        paths.append(path[0])
        

# Find the corresponding path to the node             
unit_list = []
group_list = []

for node in G.nodes():
    for path in paths: 
        if node in path:
            unit_list.append(node)
            group_list.append(','.join(reversed(path)))

# Sort the output with the order of the nodes
sorted_list = list(zip(*sorted(zip(unit_list, group_list))))

desired_dict  = {'unit' : sorted_list[0],
                 'group' : sorted_list[1]}

desired_data = pd.DataFrame(desired_dict)

This work! But it was very slow with a large dataset, made some adjustments which made it somewhat faster

# Find the sources and targets nodes with degrees
sources = [x for x in G.nodes() if G.out_degree(x)==1 and G.in_degree(x)==0]
targets = [x for x in G.nodes() if G.out_degree(x)==0 and G.in_degree(x)>=1]

# Initialize a dictionary to store paths for each node
node_to_paths = {node: [] for node in G.nodes()}

# Generate all the paths with the sources and targets
for source_node in sources:
    for target_node in targets:
        paths = list(nx.all_simple_paths(G, source=source_node,             
        target=target_node))
        for path in paths:
            reversed_path = list(reversed(path))
            path_str = ','.join(reversed_path)
            for node in reversed_path:
                node_to_paths[node].append(path_str)

units = []
groups = []

for unit, group_list in node_to_paths.items():
    for group in group_list:
        units.append(unit)
        groups.append(group)

desired_data = pd.DataFrame({
    'unit': units,
    'group': groups
})
Answered By: hagder