How to wrap text in excel using Python?

Question:

I created a script which parse YAML files to Excel. I’ve made part of code which adjust width of column but now I want to wrap text for Desciprtion column.

After script execution:

Incorrect Description

What I want:

Correct Description

Here is my code:

import yaml
import os
import pandas as pd
from openpyxl.utils import get_column_letter
import sys


def yaml_to_excel(dictionary, row):
    for key, value in dictionary.items():
        if type(value) is dict:
            yield from yaml_to_excel(value, row + 1)
        elif type(value) is list:
            for item in value:
                yield from yaml_to_excel(item, row + 1)
                row += 1
        else:
            yield (key, value, row)


def get_all_subdirectiries_in_path(root_dir):
    all_subdirectories = []

    for file in os.listdir(root_dir):
        d = os.path.join(root_dir, file)
        if os.path.isdir(d):
            all_subdirectories.append(d)

    return all_subdirectories


def dfs_tabs(df_list, sheet_list, file_name, columns):
    writer = pd.ExcelWriter(file_name,engine='openpyxl')
    for dataframe, sheet, columns_list in zip(df_list, sheet_list, columns):
        dataframe.to_excel(writer, sheet_name=sheet, startrow=0 , startcol=0, index=False, columns=columns_list)

        for column in dataframe:
            column_width = max(dataframe[column].astype(str).map(len).max() + 5, len(column) + 5)
            col_letter = get_column_letter(dataframe.columns.get_loc(column) + 1)
            writer.sheets[sheet].column_dimensions[col_letter].width = column_width

    writer.save()


def get_nested_dir_path(main_dir):
    if any(File.endswith(".yml") for File in os.listdir(main_dir)):
        yield main_dir
    else:
        for file in os.listdir(main_dir):
            yield from get_nested_dir_path(main_dir + f"/{file}")


def main(dir_path):
    all_sheets = []
    sheet_names = []
    all_columns = []
    for subdirectory in get_nested_dir_path(dir_path):
        final_object = []
        last_row = 1
        current_row = 1

        for file in os.listdir(subdirectory):
            # Check if YAML file exist
            if (file.split(".")[1] == "yml") | (file.split(".")[1] == "yaml"):
                with open(subdirectory + f"/{file}", "r", encoding="utf-8") as f:
                    dataMap = yaml.safe_load(
                        f,
                    )

                last_row += 1
                hierarchy_obj = {}
                hierarchy = 1

                prev_value = ""
                prev_temp_depth = 2
                current_row += 1

                starting_row = current_row

                for key, value, temp_depth in yaml_to_excel(dataMap, last_row):
                    if key not in hierarchy_obj:
                        hierarchy_obj[key] = hierarchy
                        hierarchy += 1

                    if prev_value != "":
                        if hierarchy_obj[key] < hierarchy_obj[prev_value]:
                            current_row += 1
                        if (temp_depth > prev_temp_depth) & (
                            hierarchy_obj[key] > hierarchy_obj[prev_value]
                        ):
                            current_row += 1

                    if type(value) is bool:
                        if value:
                            value = 'yes'
                        else:
                            value = 'no'

                    final_object.append(
                        {
                            "column": hierarchy_obj[key],
                            "value": value,
                            "row": current_row,
                        }
                    )

                    prev_value = key
                    prev_temp_depth = temp_depth

        columns_arr = []
        for column_name in hierarchy_obj.keys():
            columns_arr.append(column_name)
        all_columns.append(columns_arr)
        last_row_in_excel = final_object[len(final_object)-1]['row']

        main_arr = []

        for i in range(last_row_in_excel):
            main_arr.append(["" for i in range(len(columns_arr))])

        main_arr = [main_arr]

        for item in final_object:
            if (type(item['value']) == int) | (type(item['value']) == float):
                main_arr[0][item['row'] - 1][item['column'] - 1] = item['value']
            else:
                main_arr[0][item['row'] - 1][item['column'] -1] = item['value']

        result = pd.DataFrame(main_arr[0], columns = columns_arr)
        result = result[1:]
        all_sheets.append(result)
        sheet_names.append(subdirectory[subdirectory.rfind('/')+1 :].replace("/", " ").replace("_", " ")[0:31])

    dfs_tabs(all_sheets, sheet_names, 'test.xlsx', all_columns)


if __name__ == "__main__":
    dir_path = 'Lands'
    main(dir_path)

And below you can find YAML file which I base of:

Country: France
Capital City: Paris
Language: French
Description: |-
  A country whose metropolitan part is in Western Europe, and also has overseas territories on other continents.

  The most important people:
  President: Emmanuel Macron
  Prime Minister: Élisabeth Borne
Year: 2020
Population:
- Total population: 67 522 000
  Year: 2021
  Nationality:
  - French population: 87%
    German population: 2%
    Year: 2022
    Paris neighborhoods:
    - 1st district of Paris: Louvre
      Paris II district: Bourse
      Year: 2023
Additional information:

To run my script you must create folder Lands/France/france.yml

I will be grateful for any tips how to wrap text in Description column!

Asked By: user19263504

||

Answers:

I suggest you in this particular situation to parse the .YAML file with pandas.json_normalize and use the format class of xlswriter to wrap the text in the column Description.

Try this :

import pandas as pd
from yaml import safe_load

with open(r"LandsFrancefrance.yaml", 'r', encoding='utf-8') as f:
    df = pd.json_normalize(safe_load(f))
    
# --- Selecting columns
description_index = df.columns.get_loc("Description")
df.iloc[:, 0:description_index+1]

with pd.ExcelWriter('final_result.xlsx') as writer :
    df.to_excel(writer, sheet_name='Sheet1', index=False)

    # --- Wrapping the text of column D
    workbook=writer.book
    worksheet = writer.sheets['Sheet1']
    format = workbook.add_format({'text_wrap': True})
    worksheet.set_column(description_index, description_index, None, format)

    # --- Auto-fit all the columns
    for column in df:
        column_width = max(df[column].astype(str).map(len).max(), len(column))
        col_idx = df.columns.get_loc(column)
        worksheet.set_column(col_idx, col_idx, column_width)

# Output (in Excel) :

enter image description here

Answered By: abokey
Categories: questions Tags: , , ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.