Averaging numeric values of all files for three different folders using Python

Question:

I am trying to average the numeric values in the file for each var. It works fine for var_values=1.0 but it is not working for var_values=5.0,10.0. I present the current output. The expected output should have numeric values for var_values=5.0,10.0 as each file in the respective folders have non-zero values.

import os
import csv

NFILES = 501
Folder_name = '4_1012_nodes_var_1_K_1e3_Cb_1e-3_D_1e-10'

K = 1e3
Cb = 1e-3
DiffCoeff = 1e-10
mean_radii = 50.0
var_values = [1.0, 5.0, 10.0]
Distribution = 'ND'

for var in var_values:
    numeric_values = []  # Reset the list for each var iteration
    file_count = 0  # Counter for the number of files processed

    for i in range(1, NFILES):
        folder_path = rf"D:UsersdebanikbOneDrive - TechnionResearch_TechnionPython_PNMSurfactant A-D{Folder_name}{i}Time_{mean_radii}_{var}{Distribution}_{K}_{Cb}_{DiffCoeff}.csv"

        if os.path.isfile(folder_path):
            with open(folder_path, 'r') as file:
                reader = csv.reader(file)
                row_values = []
                for row in reader:
                    for value in row:
                        try:
                            numeric_value = float(value)
                            row_values.append(numeric_value)
                        except ValueError:
                            pass
                if row_values:
                    numeric_values.extend(row_values)
                file_count += 1

    if file_count > 0:
        average = sum(numeric_values) / len(numeric_values)
        print(f"Average of numeric values for var={var}:", average)
        print(f"Number of files averaged for var={var}:", file_count)
    else:
        print(f"No numeric values found for var={var}.")

The data format of the files look like

enter image description here

The current output is

Average of numeric values for var=1.0: 1.4373770078023274
Number of files averaged for var=1.0: 34
No numeric values found for var=5.0.
No numeric values found for var=10.0.
Asked By: KeplerNick123

||

Answers:

Given the very simple nature of the file content, there’s no real need for the CSV module.

Just do this:

import os

Folder_name = '4_1012_nodes_var_1_K_1e3_Cb_1e-3_D_1e-10'
K = 1e3
Cb = 1e-3
DiffCoeff = 1e-10
mean_radii = 50.0
var_values = [1.0, 5.0, 10.0]
Distribution = 'ND'
NFILES = 500
DIR = f'D:\Users\debanikb\OneDrive - Technion\Research_Technion\Python_PNM\Surfactant A-D\{Folder_name}'

results = {}
file_count = {}

for i in range(1, NFILES+1):
    if os.path.isdir(_dir := os.path.join(DIR, str(i))):
        for var in var_values:
            path = os.path.join(_dir, f'Time_{mean_radii}_{var}{Distribution}_{K}_{Cb}_{DiffCoeff}.csv')
            try:
                with open(path) as file:
                    file_count[var] = file_count.get(var, 0) + 1
                    results.setdefault(var, []).append(file.readlines()[1])
            except Exception:
                pass

At this point the dictionaries results and file_count will have all the information you need to proceed

for var in var_values:
    if (times := results.get(var)):
        mean = sum(map(float, times)) / len(times)
        print(f'Average of numeric values for var={var}: {mean}')
        print(f'Number of files averaged for var={var}: {file_count[var]}')
    else:
        print(f'No numeric values found for var={var}')
Answered By: DarkKnight
Categories: questions Tags: , ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.