Import yaml to excel

Question:

I have following .YAML file:

Name: Tom
Surname: Smith
Status: Married
Childs:
- ChildName: Willy
  Age: 30
  Grandchild:
  - GrandchildName: John
    Age: 4
  - GrandchildName: Maria
    Age: 3
- ChildName: Arthur
  Age: 40
  Grandchild: N/A

What I’ve done:

import openpyxl
import yaml

with open('family.yaml') as file:
    family_list = yaml.load(file, Loader=yaml.FullLoader)

def create_workbook():
    wb = openpyxl.Workbook('family.xlsx')
    wb.save('family.xlsx')

def update_workbook():
    wb = openpyxl.load_workbook('family.xlsx')

    sh1 = wb.active

    for item in family_list.items():
        sh1.append(item)
    wb.save('family.xlsx')

create_workbook()
update_workbook()

This code allows me add data (without "pause" part, so only Name, Surname, Status) to excel but "Name", "Surname" etc. are added in one column A1 (not in rows as I want to)

Final excel workbook I want to get

I’m not sure how to append and format (each pause sign should be new row) mentioned data. I will be grateful for any tips!

Asked By: user19263504

||

Answers:

I have decided to split the yaml load into three dictionaries;
Parent
Child
Grandchild
so they can be adjusted to suit the excel layout. Then use Pandas to export these to Excel at the appropriate row/column locations.

The code will process YAML files in ‘yaml_files’. On the first run for
the set ‘save_file’ the Headers are written to Row 1 and each YAML file data is then
placed under the previous entry down the spreadsheet across columns A – G. Any subsequent
run on the same ‘save_file’ will place the new data below the last data written.
Each file entry is also highlighted by alternating background colour.
You can change the background colors used or disable if you wish.

import glob
import os
import pandas as pd
import yaml
from openpyxl import load_workbook
from yaml.loader import SafeLoader
from openpyxl.utils import get_column_letter
from openpyxl.styles import PatternFill


def write_next_segments(dataf, header, start_row, start_col):
    dataframe = ''

    if start_col == 0:
        dataframe = pd.DataFrame([parent_dictionary])
        dataframe.to_excel(writer, sheet_name=save_sheet,
                           index=False,
                           header=header,
                           startrow=start_row,
                           startcol=start_col)
    else:
        for ele in dataf:
            dataframe = pd.DataFrame([dataf[ele]])
            dataframe.to_excel(writer, sheet_name=save_sheet,
                               index=False,
                               header=header,
                               startrow=start_row,
                               startcol=start_col)
            if header:
                header = False
                start_row += 2
            else:
                start_row += 1

    adjust_colmns(dataframe, start_col)


def adjust_colmns(dataframe, offset):
    for column in dataframe:
        if column != '':
            column_width = max(dataframe[column].astype(str).map(len).max(), len(column)) + 2
            col_letter = get_column_letter(dataframe.columns.get_loc(column) + offset + 1)
            writer.sheets[save_sheet].column_dimensions[col_letter].width = column_width


def build_dictionary(yf):
    # Open the file and load the file
    print("Formatting YAML file: " + yf)
    yaml_sub_name = ''  # Name of 'child' sub element
    with open(yf) as file:
        yaml_data = yaml.load(file, Loader=SafeLoader)

    for k, v in yaml_data.items():
        if type(v) == list:
            yaml_sub_name = k  # Name of 'child' sub element

    # Make copy of the original dictionary for modification
    print("------ Copy the Parent dictionary                 -------------")
    parent_dict = yaml_data.copy()

    print("------ Create the Child & GrandChild dictionaries -------------")

    child_dict = {}
    grandchild_dict = {}
    sub_list = ''
    for x in parent_dict[yaml_sub_name][0]:
        if len(child_dict) == 0:
            child_dict['Childs0'] = {x: ''}
        else:
            if type(yaml_data[yaml_sub_name][0][x]) != list:
                child_dict['Childs0'].update({x: ''})
            else:
                sub_list = x

    for x in parent_dict[yaml_sub_name][0][sub_list][0]:
        if len(grandchild_dict) == 0:
            grandchild_dict['Grandchilds0'] = {x: ''}
        else:
            grandchild_dict['Grandchilds0'].update({x: ''})

    # Remove the Child elements from the Parent dictionary
    child_list = parent_dict.pop(yaml_sub_name)

    # Create the rest of the Child and Grandchild dictionaries
    row = 1
    for enum, ele in enumerate(child_list):
        if row != 1: row += 1
        child_dict[yaml_sub_name + str(row)] = ele
        pop_num = row
        if type(child_list[enum][sub_list]) == list:
            for i in range(len(child_list[enum][sub_list])):
                if i == 0:
                    grandchild_dict['Grandchilds' + str(row)] = {'': '', '': ''}
                grandchild_dict['Grandchilds' + str(row + 1)] = ele[sub_list][i]
                row += 1
                child_dict[yaml_sub_name + str(row)] = {'': '', '': ''}
        child_dict[yaml_sub_name + str(pop_num)].pop(sub_list)

    return parent_dict, child_dict, grandchild_dict


def set_background_color(bg_color, bg_start_row):
    coords = writer.sheets[save_sheet].dimensions
    start_coord, end_coord = coords.split(':')
    bg_color_dims = 'A' + str(bg_start_row) + ':' + end_coord[:1] + str(row_offset)
    sheet = writer.sheets[save_sheet]
    for cells in sheet[bg_color_dims]:
        for cell in cells:
            cell.fill = PatternFill("solid", start_color=bg_color)
    bg_start_row = row_offset + 1

    return bg_start_row


if __name__ == "__main__":
    # Change these variables as necessary
    yaml_dir = './yaml_files'   # Set name of the input yaml files directory
    save_file = 'family.xlsx'   # Set name of Excel workbook for output
    save_sheet = 'Sheet1'       # Set name of Excel sheet
    
    # These variables should not be changed 
    row_offset = 0              # Row number for next Excel write
    header = True               # Write Header to Excel
    bg_color_start_row = '2'    # First row to start fill background color
    # Set start bg color
    bg_color = "DDD9C4"         # First segment background color

    # If save file already exists then load it and the save sheet
    # Get the last row and last background colour used. Header is set to False
    if os.path.isfile(save_file):
        book = load_workbook(save_file)
        writer = pd.ExcelWriter(save_file, engine='openpyxl')
        writer.book = book
        writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
        row_offset = writer.sheets[save_sheet].max_row + 1
        bg_color_start_row = row_offset
        bg_color = writer.sheets[save_sheet][writer.sheets[save_sheet].dimensions[3:]].fill.fgColor.value[2:]
        header = False
    else:
        # If no save file then create a new Excel file
        writer = pd.ExcelWriter(save_file, engine='openpyxl')

    # Get a list of the YAML files in yaml directory
    yaml_files = glob.glob(yaml_dir + "/[!~]*.yaml")
    # Loop through the YAML files
    for enum, y_file in enumerate(yaml_files):
        if enum > 0:
            header = False

        print("------ Export dictionaries to Excel using Pandas --------------")
        parent_dictionary, child_dictionary, grandchild_dictionary = build_dictionary(y_file)

        parent_col_offset = len(parent_dictionary)
        child_col_offset = parent_col_offset + len(child_dictionary['Childs0'])

        write_next_segments(parent_dictionary, header, row_offset, 0)
        write_next_segments(child_dictionary, header, row_offset, parent_col_offset)
        write_next_segments(grandchild_dictionary, header, row_offset, child_col_offset)
        row_offset = writer.sheets[save_sheet].max_row + 1
        # Insert background fill using alternate fill colours
        print("------ Set Background Colors ---------------------------------")
        if bg_color == "DAEEF3":
            bg_color = "DDD9C4"
        else:
            bg_color = "DAEEF3"

        bg_color_start_row = set_background_color(bg_color, bg_color_start_row)
        print("n")

    print("------ Save Excel file to '" + save_file + "' ---------------------")

    writer.save()
Answered By: moken
Categories: questions Tags: , , , ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.