How to calculate how much network IO a python script is using?
Question:
Let’s consider the following file script.py
that makes a dummy request:
import requests
response = requests.get("https://example.com")
I can estimate how much network bandwidth I have used by the size of response
.
What if the script has become complex and involves the use of multiple libraries? It gathers information from multiple websites, processes it, and outputs the results as a json file in the same directory.
In this scenario, how can the network bandwidth usage of the script be determined for each run? Would I have to manually go through the codebase and "add up" all the requests by hand? That does not seem feasible.
Answers:
Here is the solution code I ended up using:
import requests
total_sent_length = 0
total_recv_length = 0
get_sent_length = 0
get_recv_length = 0
post_sent_length = 0
post_recv_length = 0
old_request_method_get = requests.get
old_request_method_post = requests.post
def format_size(bytes, suffix='B'):
"""
return bytes in a human readable format
"""
for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
if abs(bytes) < 1024.0:
return '%3.1f%s%s' % (bytes, unit, suffix)
bytes /= 1024.0
return '%.1f%s%s' % (bytes, 'Y', suffix)
def rlen(response):
"""
approximate request size sent to server
"""
len_of_meth = len(response.request.method)
len_of_addr = len(response.request.url)
len_of_head = len('rn'.join('{}{}'.format(k, v) for k, v in response.request.headers.items()))
len_of_body = len(response.request.body if response.request.body else [])
return len_of_meth + len_of_addr + len_of_head + len_of_body
def patched_get(*args, **kwargs):
"""
delegate functionality and record stats
"""
global total_sent_length
global total_recv_length
global get_sent_length
global get_recv_length
response = old_request_method_get(*args, **kwargs)
total_sent_length += rlen(response)
total_recv_length += len(response.content)
get_sent_length += rlen(response)
get_recv_length += len(response.content)
return response
def patched_post(*args, **kwargs):
"""
delegate functionality and record stats
"""
global total_sent_length
global total_recv_length
global post_sent_length
global post_recv_length
response = old_request_method_post(*args, **kwargs)
total_sent_length += rlen(response)
total_recv_length += len(response.content)
post_sent_length += rlen(response)
post_recv_length += len(response.content)
return response
requests.get = patched_get
requests.post = patched_post
print('total_sent_length', format_size(total_sent_length))
print('total_recv_length', format_size(total_recv_length))
resp1 = requests.get('http://www.example.com/')
resp2 = requests.get('http://www.example.com/')
print('total_sent_length', format_size(total_sent_length))
print('total_recv_length', format_size(total_recv_length))
And here would be the sample results of a run:
total_sent_length 0.0B
total_recv_length 0.0B
total_sent_length 242.0B
total_recv_length 2.5KB
Let’s consider the following file script.py
that makes a dummy request:
import requests
response = requests.get("https://example.com")
I can estimate how much network bandwidth I have used by the size of response
.
What if the script has become complex and involves the use of multiple libraries? It gathers information from multiple websites, processes it, and outputs the results as a json file in the same directory.
In this scenario, how can the network bandwidth usage of the script be determined for each run? Would I have to manually go through the codebase and "add up" all the requests by hand? That does not seem feasible.
Here is the solution code I ended up using:
import requests
total_sent_length = 0
total_recv_length = 0
get_sent_length = 0
get_recv_length = 0
post_sent_length = 0
post_recv_length = 0
old_request_method_get = requests.get
old_request_method_post = requests.post
def format_size(bytes, suffix='B'):
"""
return bytes in a human readable format
"""
for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
if abs(bytes) < 1024.0:
return '%3.1f%s%s' % (bytes, unit, suffix)
bytes /= 1024.0
return '%.1f%s%s' % (bytes, 'Y', suffix)
def rlen(response):
"""
approximate request size sent to server
"""
len_of_meth = len(response.request.method)
len_of_addr = len(response.request.url)
len_of_head = len('rn'.join('{}{}'.format(k, v) for k, v in response.request.headers.items()))
len_of_body = len(response.request.body if response.request.body else [])
return len_of_meth + len_of_addr + len_of_head + len_of_body
def patched_get(*args, **kwargs):
"""
delegate functionality and record stats
"""
global total_sent_length
global total_recv_length
global get_sent_length
global get_recv_length
response = old_request_method_get(*args, **kwargs)
total_sent_length += rlen(response)
total_recv_length += len(response.content)
get_sent_length += rlen(response)
get_recv_length += len(response.content)
return response
def patched_post(*args, **kwargs):
"""
delegate functionality and record stats
"""
global total_sent_length
global total_recv_length
global post_sent_length
global post_recv_length
response = old_request_method_post(*args, **kwargs)
total_sent_length += rlen(response)
total_recv_length += len(response.content)
post_sent_length += rlen(response)
post_recv_length += len(response.content)
return response
requests.get = patched_get
requests.post = patched_post
print('total_sent_length', format_size(total_sent_length))
print('total_recv_length', format_size(total_recv_length))
resp1 = requests.get('http://www.example.com/')
resp2 = requests.get('http://www.example.com/')
print('total_sent_length', format_size(total_sent_length))
print('total_recv_length', format_size(total_recv_length))
And here would be the sample results of a run:
total_sent_length 0.0B
total_recv_length 0.0B
total_sent_length 242.0B
total_recv_length 2.5KB