How can I load a saved JSON tree with treelib?
Question:
I have made a Python script wherein I process a big html with BeautifulSoup while I build a tree from it using treelib: http://xiaming.me/treelib/.
I have found that this library comes with methods to save the tree file on my system and also parsing it to JSON. But after I do this, how can I load it?
It is not efficient to build the same entire tree for each run. I think I can make a function to parse the JSON tree previously written to a file but I just want to be sure if there exists another easy way or not.
Thanks in advance
Answers:
The simple Answer
With this treelib, you can’t.
As they say in their documentation (http://xiaming.me/treelib/pyapi.html#node-objects):
tree.save2file(filename[, nid[, level[, idhidden[, filter[, key[, reverse]]]]]]])
Save the tree into file for offline analysis.
It does not contain any JSON-Parser, so it can not read the files.
What can you do?
You have no other option as building the tree each time for every run.
Implement a JSON-Reader that parses the file and creates the tree for you.
I have built a small parser for my case. Maybe it works in your case.
The note identifiers are named after the tag plus the depth of the node in the tree (tag+depth).
import json
from types import prepare_class
from treelib import Node, Tree, node
import os
file_path = os.path.abspath(os.path.dirname(__file__))
with open(file_path + '\tree.json') as f:
tree_json = json.load(f)
tree = Tree()
def load_tree(json_tree, depth=0, parent=None):
k, value = list(json_tree.items())[0]
if parent is None:
tree.create_node(tag=str(k), identifier=str(k)+str(depth))
parent = tree.get_node(str(k)+str(depth))
for counter,value in enumerate(json_tree[k]['children']):
if isinstance(json_tree[k]['children'][counter], str):
tree.create_node(tag=value, identifier=value+str(depth), parent=parent)
else:
tree.create_node(tag=list(value)[0], identifier=list(value)[0]+str(depth), parent=parent)
load_tree(json_tree[k]['children'][counter], depth+1, tree.get_node(list(value)[0]+str(depth)) )
load_tree(tree_json)
I have created a function to convert json to a tree:
from treelib import Node, Tree, node
def create_node(tree, s, counter_byref, verbose, parent_id=None):
node_id = counter_byref[0]
if verbose:
print(f"tree.create_node({s}, {node_id}, parent={parent_id})")
tree.create_node(s, node_id, parent=parent_id)
counter_byref[0] += 1
return node_id
def to_compact_string(o):
if type(o) == dict:
if len(o)>1:
raise Exception()
k,v =next(iter(o.items()))
return f'{k}:{to_compact_string(v)}'
elif type(o) == list:
if len(o)>1:
raise Exception()
return f'[{to_compact_string(next(iter(o)))}]'
else:
return str(o)
def to_compact(tree, o, counter_byref, verbose, parent_id):
try:
s = to_compact_string(o)
if verbose:
print(f"# to_compact({o}) ==> [{s}]")
create_node(tree, s, counter_byref, verbose, parent_id=parent_id)
return True
except:
return False
def json_2_tree(o , parent_id=None, tree=None, counter_byref=[0], verbose=False, compact_single_dict=False, listsNodeSymbol='+'):
if tree is None:
tree = Tree()
parent_id = create_node(tree, '+', counter_byref, verbose)
if compact_single_dict and to_compact(tree, o, counter_byref, verbose, parent_id):
# no need to do more, inserted as a single node
pass
elif type(o) == dict:
for k,v in o.items():
if compact_single_dict and to_compact(tree, {k:v}, counter_byref, verbose, parent_id):
# no need to do more, inserted as a single node
continue
key_nd_id = create_node(tree, str(k), counter_byref, verbose, parent_id=parent_id)
if verbose:
print(f"# json_2_tree({v})")
json_2_tree(v , parent_id=key_nd_id, tree=tree, counter_byref=counter_byref, verbose=verbose, listsNodeSymbol=listsNodeSymbol, compact_single_dict=compact_single_dict)
elif type(o) == list:
if listsNodeSymbol is not None:
parent_id = create_node(tree, listsNodeSymbol, counter_byref, verbose, parent_id=parent_id)
for i in o:
if compact_single_dict and to_compact(tree, i, counter_byref, verbose, parent_id):
# no need to do more, inserted as a single node
continue
if verbose:
print(f"# json_2_tree({i})")
json_2_tree(i , parent_id=parent_id, tree=tree, counter_byref=counter_byref, verbose=verbose,listsNodeSymbol=listsNodeSymbol, compact_single_dict=compact_single_dict)
else: #node
create_node(tree, str(o), counter_byref, verbose, parent_id=parent_id)
return tree
Then for example:
import json
j = json.loads('{"2": 3, "4": [5, 6], "7": {"8": 9}}')
json_2_tree(j ,verbose=False,listsNodeSymbol='+' ).show()
gives:
+
├── 2
│ └── 3
├── 4
│ └── +
│ ├── 5
│ └── 6
└── 7
└── 8
└── 9
While
json_2_tree(j ,listsNodeSymbol=None, verbose=False ).show()
+
├── 2
│ └── 3
├── 4
│ ├── 5
│ └── 6
└── 7
└── 8
└── 9
And
json_2_tree(j ,compact_single_dict=True,listsNodeSymbol=None).show()
+
├── 2:3
├── 4
│ ├── 5
│ └── 6
└── 7:8:9
As you see, there are different trees one can make depending on how explicit vs. compact he wants to be.
As you see, there are different trees one can make depending on how explicit vs. compact he wants to be.
One of my favorites, and one of the most compact ones might be using yaml:
import yaml
j = json.loads('{"2": "3", "4": ["5", "6"], "7": {"8": "9"}}')
print(yaml.dump(j, sort_keys=False))
Gives the compact and unambiguous:
'2': '3'
'4':
- '5'
- '6'
'7':
'8': '9'
I have made a Python script wherein I process a big html with BeautifulSoup while I build a tree from it using treelib: http://xiaming.me/treelib/.
I have found that this library comes with methods to save the tree file on my system and also parsing it to JSON. But after I do this, how can I load it?
It is not efficient to build the same entire tree for each run. I think I can make a function to parse the JSON tree previously written to a file but I just want to be sure if there exists another easy way or not.
Thanks in advance
The simple Answer
With this treelib, you can’t.
As they say in their documentation (http://xiaming.me/treelib/pyapi.html#node-objects):
tree.save2file(filename[, nid[, level[, idhidden[, filter[, key[, reverse]]]]]]])
Save the tree into file for offline analysis.
It does not contain any JSON-Parser, so it can not read the files.
What can you do?
You have no other option as building the tree each time for every run.
Implement a JSON-Reader that parses the file and creates the tree for you.
I have built a small parser for my case. Maybe it works in your case.
The note identifiers are named after the tag plus the depth of the node in the tree (tag+depth).
import json
from types import prepare_class
from treelib import Node, Tree, node
import os
file_path = os.path.abspath(os.path.dirname(__file__))
with open(file_path + '\tree.json') as f:
tree_json = json.load(f)
tree = Tree()
def load_tree(json_tree, depth=0, parent=None):
k, value = list(json_tree.items())[0]
if parent is None:
tree.create_node(tag=str(k), identifier=str(k)+str(depth))
parent = tree.get_node(str(k)+str(depth))
for counter,value in enumerate(json_tree[k]['children']):
if isinstance(json_tree[k]['children'][counter], str):
tree.create_node(tag=value, identifier=value+str(depth), parent=parent)
else:
tree.create_node(tag=list(value)[0], identifier=list(value)[0]+str(depth), parent=parent)
load_tree(json_tree[k]['children'][counter], depth+1, tree.get_node(list(value)[0]+str(depth)) )
load_tree(tree_json)
I have created a function to convert json to a tree:
from treelib import Node, Tree, node
def create_node(tree, s, counter_byref, verbose, parent_id=None):
node_id = counter_byref[0]
if verbose:
print(f"tree.create_node({s}, {node_id}, parent={parent_id})")
tree.create_node(s, node_id, parent=parent_id)
counter_byref[0] += 1
return node_id
def to_compact_string(o):
if type(o) == dict:
if len(o)>1:
raise Exception()
k,v =next(iter(o.items()))
return f'{k}:{to_compact_string(v)}'
elif type(o) == list:
if len(o)>1:
raise Exception()
return f'[{to_compact_string(next(iter(o)))}]'
else:
return str(o)
def to_compact(tree, o, counter_byref, verbose, parent_id):
try:
s = to_compact_string(o)
if verbose:
print(f"# to_compact({o}) ==> [{s}]")
create_node(tree, s, counter_byref, verbose, parent_id=parent_id)
return True
except:
return False
def json_2_tree(o , parent_id=None, tree=None, counter_byref=[0], verbose=False, compact_single_dict=False, listsNodeSymbol='+'):
if tree is None:
tree = Tree()
parent_id = create_node(tree, '+', counter_byref, verbose)
if compact_single_dict and to_compact(tree, o, counter_byref, verbose, parent_id):
# no need to do more, inserted as a single node
pass
elif type(o) == dict:
for k,v in o.items():
if compact_single_dict and to_compact(tree, {k:v}, counter_byref, verbose, parent_id):
# no need to do more, inserted as a single node
continue
key_nd_id = create_node(tree, str(k), counter_byref, verbose, parent_id=parent_id)
if verbose:
print(f"# json_2_tree({v})")
json_2_tree(v , parent_id=key_nd_id, tree=tree, counter_byref=counter_byref, verbose=verbose, listsNodeSymbol=listsNodeSymbol, compact_single_dict=compact_single_dict)
elif type(o) == list:
if listsNodeSymbol is not None:
parent_id = create_node(tree, listsNodeSymbol, counter_byref, verbose, parent_id=parent_id)
for i in o:
if compact_single_dict and to_compact(tree, i, counter_byref, verbose, parent_id):
# no need to do more, inserted as a single node
continue
if verbose:
print(f"# json_2_tree({i})")
json_2_tree(i , parent_id=parent_id, tree=tree, counter_byref=counter_byref, verbose=verbose,listsNodeSymbol=listsNodeSymbol, compact_single_dict=compact_single_dict)
else: #node
create_node(tree, str(o), counter_byref, verbose, parent_id=parent_id)
return tree
Then for example:
import json
j = json.loads('{"2": 3, "4": [5, 6], "7": {"8": 9}}')
json_2_tree(j ,verbose=False,listsNodeSymbol='+' ).show()
gives:
+
├── 2
│ └── 3
├── 4
│ └── +
│ ├── 5
│ └── 6
└── 7
└── 8
└── 9
While
json_2_tree(j ,listsNodeSymbol=None, verbose=False ).show()
+
├── 2
│ └── 3
├── 4
│ ├── 5
│ └── 6
└── 7
└── 8
└── 9
And
json_2_tree(j ,compact_single_dict=True,listsNodeSymbol=None).show()
+
├── 2:3
├── 4
│ ├── 5
│ └── 6
└── 7:8:9
As you see, there are different trees one can make depending on how explicit vs. compact he wants to be.
As you see, there are different trees one can make depending on how explicit vs. compact he wants to be.
One of my favorites, and one of the most compact ones might be using yaml:
import yaml
j = json.loads('{"2": "3", "4": ["5", "6"], "7": {"8": "9"}}')
print(yaml.dump(j, sort_keys=False))
Gives the compact and unambiguous:
'2': '3'
'4':
- '5'
- '6'
'7':
'8': '9'