python using concurrent futures to read file asynchronously method guidance
Question:
I want to add concurrency.futures
module asynchronous I/O reading to my script. I want file to be read one time, then the result to be worked on.
As logic of module does not align with it I created two different functions which separately reads two separate time the file, as pandas dataframe and then gives me the result.
import pandas as pd
import sys,os, time,re
import concurrent.futures
start=(time.perf_counter())
def getting_file_path(fileName):
if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
path_actual = os.getcwd()
path_main_folder = path_actual[:-4]
path_result = path_main_folder + fileName
print('frozen path',os.path.normpath(path_result))
return path_result
else:
return fileName
def read_keys_dropdown():
global lst_dropdown_keys
file_to_read = pd.read_json(getting_file_path('./ConfigurationFile/configFile.csv'))
lst_dropdown_keys=list(file_to_read.to_dict().keys())
lst_dropdown_keys.pop(0)
lst_dropdown_keys.pop(-1)
return lst_dropdown_keys
def read_url():
pattern = re.compile(r"^(?:/.|[^//])*/((?:\.|[^/\])*)/")
file_to_read=pd.read_json(getting_file_path('./ConfigurationFile/configFile.csv'))
result = (re.match(pattern, file_to_read.values[0][0]))
return pattern.match(file_to_read.values[0][0]).group(1)
with concurrent.futures.ThreadPoolExecutor() as executor:
res_1=executor.submit(read_keys_dropdown)
res_2=executor.submit(read_url)
finish=(time.perf_counter())
print(res_1.result(),res_2.result(),finish-start,sep=';')
Before, I was doing it differently. I was reading file_to_read = pd.read_json(getting_file_path('./ConfigurationFile/configFile.csv'))
in global scope then using that variable name in both functions.
I tried to do something like reading data, and then working on result it gave me Futures
object has no attribute
to_dict, nor values[0]…so, if I need to fasten my script and concurrency or threading modules are better choice for I/O reading files, then how else I can use them in my script?
Answers:
Here’s an example of how you could use a class with "lazy" loading.
The logic in the path() and read_keys_dropdown() functions is taken from the original question and is very likely not going to achieve the desired objective.
This is obviously untested and there are outstanding questions such as why read_json() is being used for a CSV file.
import sys
import os
import pandas as pd
import re
class MyClass:
def __init__(self, filename='./ConfigurationFile/configFile.csv'):
self._filename = filename
self._path = None
self._df = None
self._pattern = None
@property
def filename(self):
return self._filename
@property
def path(self):
if self._path is None:
if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
# the following code looks highly dubious
path_actual = os.getcwd()
path_main_folder = path_actual[:-4]
self._path = path_main_folder + self.filename
print('frozen path', os.path.normpath(self._path))
else:
self._path = self.filename
return self._path
@property
def df(self):
if self._df is None:
self._df = pd.read_json(self.path)
return self._df
@property
def pattern(self):
if self._pattern is None:
self._pattern = re.compile(r"^(?:/.|[^//])*/((?:\.|[^/\])*)/")
return self._pattern
def read_keys_dropdown(self):
# the following code looks highly dubious
lst_dropdown_keys = list(self.df.to_dict().keys())
lst_dropdown_keys.pop(0)
lst_dropdown_keys.pop(-1)
return lst_dropdown_keys
def read_url(self):
if m := self.pattern.match(self.df.values[0][0]):
return m.group(1)
clazz = MyClass()
print(clazz.read_keys_dropdown())
print(clazz.read_url())
I want to add concurrency.futures
module asynchronous I/O reading to my script. I want file to be read one time, then the result to be worked on.
As logic of module does not align with it I created two different functions which separately reads two separate time the file, as pandas dataframe and then gives me the result.
import pandas as pd
import sys,os, time,re
import concurrent.futures
start=(time.perf_counter())
def getting_file_path(fileName):
if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
path_actual = os.getcwd()
path_main_folder = path_actual[:-4]
path_result = path_main_folder + fileName
print('frozen path',os.path.normpath(path_result))
return path_result
else:
return fileName
def read_keys_dropdown():
global lst_dropdown_keys
file_to_read = pd.read_json(getting_file_path('./ConfigurationFile/configFile.csv'))
lst_dropdown_keys=list(file_to_read.to_dict().keys())
lst_dropdown_keys.pop(0)
lst_dropdown_keys.pop(-1)
return lst_dropdown_keys
def read_url():
pattern = re.compile(r"^(?:/.|[^//])*/((?:\.|[^/\])*)/")
file_to_read=pd.read_json(getting_file_path('./ConfigurationFile/configFile.csv'))
result = (re.match(pattern, file_to_read.values[0][0]))
return pattern.match(file_to_read.values[0][0]).group(1)
with concurrent.futures.ThreadPoolExecutor() as executor:
res_1=executor.submit(read_keys_dropdown)
res_2=executor.submit(read_url)
finish=(time.perf_counter())
print(res_1.result(),res_2.result(),finish-start,sep=';')
Before, I was doing it differently. I was reading file_to_read = pd.read_json(getting_file_path('./ConfigurationFile/configFile.csv'))
in global scope then using that variable name in both functions.
I tried to do something like reading data, and then working on result it gave me Futures
object has no attribute
to_dict, nor values[0]…so, if I need to fasten my script and concurrency or threading modules are better choice for I/O reading files, then how else I can use them in my script?
Here’s an example of how you could use a class with "lazy" loading.
The logic in the path() and read_keys_dropdown() functions is taken from the original question and is very likely not going to achieve the desired objective.
This is obviously untested and there are outstanding questions such as why read_json() is being used for a CSV file.
import sys
import os
import pandas as pd
import re
class MyClass:
def __init__(self, filename='./ConfigurationFile/configFile.csv'):
self._filename = filename
self._path = None
self._df = None
self._pattern = None
@property
def filename(self):
return self._filename
@property
def path(self):
if self._path is None:
if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
# the following code looks highly dubious
path_actual = os.getcwd()
path_main_folder = path_actual[:-4]
self._path = path_main_folder + self.filename
print('frozen path', os.path.normpath(self._path))
else:
self._path = self.filename
return self._path
@property
def df(self):
if self._df is None:
self._df = pd.read_json(self.path)
return self._df
@property
def pattern(self):
if self._pattern is None:
self._pattern = re.compile(r"^(?:/.|[^//])*/((?:\.|[^/\])*)/")
return self._pattern
def read_keys_dropdown(self):
# the following code looks highly dubious
lst_dropdown_keys = list(self.df.to_dict().keys())
lst_dropdown_keys.pop(0)
lst_dropdown_keys.pop(-1)
return lst_dropdown_keys
def read_url(self):
if m := self.pattern.match(self.df.values[0][0]):
return m.group(1)
clazz = MyClass()
print(clazz.read_keys_dropdown())
print(clazz.read_url())