python using concurrent futures to read file asynchronously method guidance

Question:

I want to add concurrency.futures module asynchronous I/O reading to my script. I want file to be read one time, then the result to be worked on.
As logic of module does not align with it I created two different functions which separately reads two separate time the file, as pandas dataframe and then gives me the result.

import pandas as pd
import sys,os, time,re
import concurrent.futures

start=(time.perf_counter())
def getting_file_path(fileName):
    if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
        path_actual = os.getcwd()
        path_main_folder = path_actual[:-4]
        path_result = path_main_folder + fileName
        print('frozen path',os.path.normpath(path_result))
        return path_result
    else:
        return fileName


def read_keys_dropdown():
    global lst_dropdown_keys
    file_to_read = pd.read_json(getting_file_path('./ConfigurationFile/configFile.csv'))
    lst_dropdown_keys=list(file_to_read.to_dict().keys())
    lst_dropdown_keys.pop(0)
    lst_dropdown_keys.pop(-1)
    return lst_dropdown_keys


def read_url():
    pattern = re.compile(r"^(?:/.|[^//])*/((?:\.|[^/\])*)/")
    file_to_read=pd.read_json(getting_file_path('./ConfigurationFile/configFile.csv'))
    result = (re.match(pattern, file_to_read.values[0][0]))
    return pattern.match(file_to_read.values[0][0]).group(1)


with concurrent.futures.ThreadPoolExecutor() as executor:
    res_1=executor.submit(read_keys_dropdown)
    res_2=executor.submit(read_url)
    finish=(time.perf_counter())
    print(res_1.result(),res_2.result(),finish-start,sep=';')

Before, I was doing it differently. I was reading file_to_read = pd.read_json(getting_file_path('./ConfigurationFile/configFile.csv')) in global scope then using that variable name in both functions.
I tried to do something like reading data, and then working on result it gave me Futures object has no attribute to_dict, nor values[0]…so, if I need to fasten my script and concurrency or threading modules are better choice for I/O reading files, then how else I can use them in my script?

Asked By: xlmaster

||

Answers:

Here’s an example of how you could use a class with "lazy" loading.

The logic in the path() and read_keys_dropdown() functions is taken from the original question and is very likely not going to achieve the desired objective.

This is obviously untested and there are outstanding questions such as why read_json() is being used for a CSV file.

import sys
import os
import pandas as pd
import re

class MyClass:
    def __init__(self, filename='./ConfigurationFile/configFile.csv'):
        self._filename = filename
        self._path = None
        self._df = None
        self._pattern = None

    @property
    def filename(self):
        return self._filename

    @property
    def path(self):
        if self._path is None:
            if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
                # the following code looks highly dubious
                path_actual = os.getcwd()
                path_main_folder = path_actual[:-4]
                self._path = path_main_folder + self.filename
                print('frozen path', os.path.normpath(self._path))
            else:
                self._path = self.filename
        return self._path

    @property
    def df(self):
        if self._df is None:
            self._df = pd.read_json(self.path)
        return self._df

    @property
    def pattern(self):
        if self._pattern is None:
            self._pattern = re.compile(r"^(?:/.|[^//])*/((?:\.|[^/\])*)/")
        return self._pattern

    def read_keys_dropdown(self):
        # the following code looks highly dubious
        lst_dropdown_keys = list(self.df.to_dict().keys())
        lst_dropdown_keys.pop(0)
        lst_dropdown_keys.pop(-1)
        return lst_dropdown_keys

    def read_url(self):
        if m := self.pattern.match(self.df.values[0][0]):
            return m.group(1)


clazz = MyClass()

print(clazz.read_keys_dropdown())
print(clazz.read_url())
Answered By: DarkKnight
Categories: questions Tags: ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.