Compare two csv files in python and retain headers of changes

Question:

I’m trying to compare two csv files in python and output the differences along with the headers of each column. So far, with what I’m doing, it outputs all columns instead of just the ones with differences

import csv

with open('firstfile.csv', 'r') as f1:
    file1 = f1.readlines()

with open('secondfile.csv', 'r') as f2:
    file2 = f2.readlines()

with open('results.csv', 'w') as outFile:
    outFile.write(file1[0])
    for line in file2:
        if line not in file1:
            outFile.write(line)
Asked By: user6302747

||

Answers:

I think this code resolves your problem

import sys

with open('file1.csv', 'r') as f1:
    file1 = f1.readlines()

with open('file2.csv', 'r') as f2:
    file2 = f2.readlines()

delimiter = 't'  # Column delimiter in you file
headers_of_first_file = file1[0].strip().split(delimiter)
headers_of_second_file = file2[0].strip().split(delimiter)

# You can remove this assert if you want to work files with different columns then you have to add some more code in next blocks
different_headers = set(headers_of_first_file).symmetric_difference(headers_of_second_file)
if different_headers:
    print('Files have difference in headers: ', different_headers)
    sys.exit(-1)

# Build map {header: [all_values]}
first_file_map = {header: [] for header in headers_of_first_file}
for row in file1[1:]:
    for index, cell in enumerate(row.strip().split(delimiter)):
        first_file_map[headers_of_first_file[index]].append(cell)

# Check by built map. Dont forget that columns may change order
result = set()
for row in file2[1:]:
    for index, cell in enumerate(row.strip().split(delimiter)):
        if cell not in first_file_map[headers_of_second_file[index]]:
            result.add(headers_of_second_file[index])

with open('results.csv', 'w') as out_file:
    out_file.write('t'.join(result))

UPD files example:

Column1 Column2 Column3 Column5 Column4
1   2   3   5   4
10  20  30  50  40

Column1 Column2 Column3 Column4 Column5
11  2   3   4   5
10  10  30  40  50

‘t’ is delimiter

Answered By: Ruslan Galimov
import csv

def compareList(l1,l2):
   if(len(l1)==len(l2) and len(l1)==sum([1 for i,j in zip(l1,l2) if i==j])):
      return "Equal"
   else:
      return "Non equal"

file1 = "C:/Users/Sarvesh/Downloads/a.csv"
file2 = "C:/Users/Sarvesh/Downloads/b.csv"

with open(file1, 'r') as csv1, open(file2, 'r') as csv2:  # Import CSV files
    import1 = csv1.readlines()
    import2 = csv2.readlines()

    # creating an object of csv reader
    # with the delimiter as ,
    csv_reader = csv.reader(import1, delimiter='|')
    # list to store the names of columns
    list_of_column_name1 = []
    # loop to iterate through the rows of csv
    for row in csv_reader:
        # adding the first row
        list_of_column_name1.append(row)
        # breaking the loop after the
        # first iteration itself
        break

    csv_reader = csv.reader(import2, delimiter='|')
    # list to store the names of columns
    list_of_column_name2 = []
    # loop to iterate through the rows of csv
    for row in csv_reader:
        # adding the first row
        list_of_column_name2.append(row)
        # breaking the loop after the
        # first iteration itself
        break

# printing the result
print("1List of column names : ", list_of_column_name1[0])

print("2List of column names : ", list_of_column_name2[0])

print("First comparison",compareList(list_of_column_name1,list_of_column_name2))
Answered By: sarvesh shukla
Categories: questions Tags:
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.