Averaging numeric values of all files for three different folders using Python
Question:
I am trying to average the numeric values in the file for each var
. It works fine for var_values=1.0
but it is not working for var_values=5.0,10.0
. I present the current output. The expected output should have numeric values for var_values=5.0,10.0
as each file in the respective folders have non-zero values.
import os
import csv
NFILES = 501
Folder_name = '4_1012_nodes_var_1_K_1e3_Cb_1e-3_D_1e-10'
K = 1e3
Cb = 1e-3
DiffCoeff = 1e-10
mean_radii = 50.0
var_values = [1.0, 5.0, 10.0]
Distribution = 'ND'
for var in var_values:
numeric_values = [] # Reset the list for each var iteration
file_count = 0 # Counter for the number of files processed
for i in range(1, NFILES):
folder_path = rf"D:UsersdebanikbOneDrive - TechnionResearch_TechnionPython_PNMSurfactant A-D{Folder_name}{i}Time_{mean_radii}_{var}{Distribution}_{K}_{Cb}_{DiffCoeff}.csv"
if os.path.isfile(folder_path):
with open(folder_path, 'r') as file:
reader = csv.reader(file)
row_values = []
for row in reader:
for value in row:
try:
numeric_value = float(value)
row_values.append(numeric_value)
except ValueError:
pass
if row_values:
numeric_values.extend(row_values)
file_count += 1
if file_count > 0:
average = sum(numeric_values) / len(numeric_values)
print(f"Average of numeric values for var={var}:", average)
print(f"Number of files averaged for var={var}:", file_count)
else:
print(f"No numeric values found for var={var}.")
The data format of the files look like
The current output is
Average of numeric values for var=1.0: 1.4373770078023274
Number of files averaged for var=1.0: 34
No numeric values found for var=5.0.
No numeric values found for var=10.0.
Answers:
Given the very simple nature of the file content, there’s no real need for the CSV module.
Just do this:
import os
Folder_name = '4_1012_nodes_var_1_K_1e3_Cb_1e-3_D_1e-10'
K = 1e3
Cb = 1e-3
DiffCoeff = 1e-10
mean_radii = 50.0
var_values = [1.0, 5.0, 10.0]
Distribution = 'ND'
NFILES = 500
DIR = f'D:\Users\debanikb\OneDrive - Technion\Research_Technion\Python_PNM\Surfactant A-D\{Folder_name}'
results = {}
file_count = {}
for i in range(1, NFILES+1):
if os.path.isdir(_dir := os.path.join(DIR, str(i))):
for var in var_values:
path = os.path.join(_dir, f'Time_{mean_radii}_{var}{Distribution}_{K}_{Cb}_{DiffCoeff}.csv')
try:
with open(path) as file:
file_count[var] = file_count.get(var, 0) + 1
results.setdefault(var, []).append(file.readlines()[1])
except Exception:
pass
At this point the dictionaries results and file_count will have all the information you need to proceed
for var in var_values:
if (times := results.get(var)):
mean = sum(map(float, times)) / len(times)
print(f'Average of numeric values for var={var}: {mean}')
print(f'Number of files averaged for var={var}: {file_count[var]}')
else:
print(f'No numeric values found for var={var}')
I am trying to average the numeric values in the file for each var
. It works fine for var_values=1.0
but it is not working for var_values=5.0,10.0
. I present the current output. The expected output should have numeric values for var_values=5.0,10.0
as each file in the respective folders have non-zero values.
import os
import csv
NFILES = 501
Folder_name = '4_1012_nodes_var_1_K_1e3_Cb_1e-3_D_1e-10'
K = 1e3
Cb = 1e-3
DiffCoeff = 1e-10
mean_radii = 50.0
var_values = [1.0, 5.0, 10.0]
Distribution = 'ND'
for var in var_values:
numeric_values = [] # Reset the list for each var iteration
file_count = 0 # Counter for the number of files processed
for i in range(1, NFILES):
folder_path = rf"D:UsersdebanikbOneDrive - TechnionResearch_TechnionPython_PNMSurfactant A-D{Folder_name}{i}Time_{mean_radii}_{var}{Distribution}_{K}_{Cb}_{DiffCoeff}.csv"
if os.path.isfile(folder_path):
with open(folder_path, 'r') as file:
reader = csv.reader(file)
row_values = []
for row in reader:
for value in row:
try:
numeric_value = float(value)
row_values.append(numeric_value)
except ValueError:
pass
if row_values:
numeric_values.extend(row_values)
file_count += 1
if file_count > 0:
average = sum(numeric_values) / len(numeric_values)
print(f"Average of numeric values for var={var}:", average)
print(f"Number of files averaged for var={var}:", file_count)
else:
print(f"No numeric values found for var={var}.")
The data format of the files look like
The current output is
Average of numeric values for var=1.0: 1.4373770078023274
Number of files averaged for var=1.0: 34
No numeric values found for var=5.0.
No numeric values found for var=10.0.
Given the very simple nature of the file content, there’s no real need for the CSV module.
Just do this:
import os
Folder_name = '4_1012_nodes_var_1_K_1e3_Cb_1e-3_D_1e-10'
K = 1e3
Cb = 1e-3
DiffCoeff = 1e-10
mean_radii = 50.0
var_values = [1.0, 5.0, 10.0]
Distribution = 'ND'
NFILES = 500
DIR = f'D:\Users\debanikb\OneDrive - Technion\Research_Technion\Python_PNM\Surfactant A-D\{Folder_name}'
results = {}
file_count = {}
for i in range(1, NFILES+1):
if os.path.isdir(_dir := os.path.join(DIR, str(i))):
for var in var_values:
path = os.path.join(_dir, f'Time_{mean_radii}_{var}{Distribution}_{K}_{Cb}_{DiffCoeff}.csv')
try:
with open(path) as file:
file_count[var] = file_count.get(var, 0) + 1
results.setdefault(var, []).append(file.readlines()[1])
except Exception:
pass
At this point the dictionaries results and file_count will have all the information you need to proceed
for var in var_values:
if (times := results.get(var)):
mean = sum(map(float, times)) / len(times)
print(f'Average of numeric values for var={var}: {mean}')
print(f'Number of files averaged for var={var}: {file_count[var]}')
else:
print(f'No numeric values found for var={var}')