python csv DictReader reset position to second line

Question:

I have the python code below that reads two csv files and finds matching rows. The csv files have a header row so I need to reset file2/reader2 to the second row on each iteration of reader1.

How do I reset the position of file2/reader2 to the first data row to search for a match?

import csv

# Read in the data from the two CSV files
with open('file1.csv') as file1, open('file2.csv') as file2:
  reader1 = csv.DictReader(file1)
  reader2 = csv.DictReader(file2)

  # Create an empty list to store the merged data
  merged_rows = []

  # Iterate over the rows in the first file
  for row1 in reader1:
    # Get the contact number from the current row
    contact_no = row1['contact_no']

    ### file2 seek to start of second line to find matches again

    # Iterate over the rows in the second file
    for row2 in reader2:
      # Check if the contact number in the current row
      # of the second file matches the contact number
      # from the current row of the first file
      if row2['contact_no'] == contact_no:
        # If the contact numbers match, merge the rows
        # by adding the data from the second row to the
        # first row
        row1.update(row2)

        # Add the merged row to the list of merged rows
        merged_rows.append(row1)

  # Write the merged rows to a new CSV file
  with open('merged_file.csv', 'w') as out_file:
    # Create a writer object and write the header row
    writer = csv.DictWriter(out_file, fieldnames=row1.keys())
    writer.writeheader()

    # Write each of the merged rows to the output file
    for row in merged_rows:
      writer.writerow(row)
Asked By: user3720435

||

Answers:

Unless file2.csv is very large, I would load it in memory and keep it there.

  ...
  reader2 = csv.DictReader(file2)
  reader2_rows = list(reader2)
  ...
  for row1 in reader1:
    ...
    for row2 in reader2_rows:
      ...

Putting this into your code, it becomes:

import csv

# Read in the data from the two CSV files
with open('file1.csv') as file1, open('file2.csv') as file2:
  reader1 = csv.DictReader(file1)
  reader2 = csv.DictReader(file2)

  # load reader2 rows into memory since we reuse them multiple times
  reader2_rows = list(reader2)

  # Create an empty list to store the merged data
  merged_rows = []

  # Iterate over the rows in the first file
  for row1 in reader1:
    # Get the contact number from the current row
    contact_no = row1['contact_no']

    ### file2 seek to start of second line to find matches again

    # Iterate over the rows in the second file
    for row2 in reader2_rows:
      # Check if the contact number in the current row
      # of the second file matches the contact number
      # from the current row of the first file
      if row2['contact_no'] == contact_no:
        # If the contact numbers match, merge the rows
        # by adding the data from the second row to the
        # first row
        row1.update(row2)

        # Add the merged row to the list of merged rows
        merged_rows.append(row1)

  # Write the merged rows to a new CSV file
  with open('merged_file.csv', 'w') as out_file:
    # Create a writer object and write the header row
    writer = csv.DictWriter(out_file, fieldnames=row1.keys())
    writer.writeheader()

    # Write each of the merged rows to the output file
    for row in merged_rows:
      writer.writerow(row)
Answered By: joanis
Categories: questions Tags:
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.