Python sort a string in CSV
Question:
I want to sort the every row of the CSV string with the following code
import csv
def sort_csv_columns(csv_string: str) -> str:
# Split the CSV string into lines
lines = csv_string.strip().split("n")
# Split the first line (column names) and sort it case-insensitively
header = lines[0].split(",")
header.sort(key=str.lower)
# Split the remaining lines (data rows) and sort them by the sorted header
data = [line.split(",") for line in lines[1:]]
data.sort(key=lambda row: [row[header.index(col)] for col in header])
# Join the sorted data and header into a single CSV string
sorted_csv = "n".join([",".join(header)] + [",".join(row) for row in data])
return sorted_csv
# Test the function
csv_string = "Beth,Charles,Danielle,Adam,Ericn17945,10091,10088,3907,10132n2,12,13,48,11"
sorted_csv = sort_csv_columns(csv_string)
print(sorted_csv)
Output
Adam,Beth,Charles,Danielle,Eric
17945,10091,10088,3907,10132
2,12,13,48,11
Expected Output
Adam,Beth,Charles,Danielle,Ericn
3907,17945,10091,10088,10132n
48,2,12,13,11
What am I doing wrong
I am not able to sort the row besides the top header
Answers:
-
As data
represents your lines, then data.sort
can only sort the lines between, them, not the lines content (the cells), you need to sort on each element of data
-
Also doing the following will always give 0,1,2,3,4
as you check index on the list on iterate on
[header.index(col) for col in header]
Sort header then reorder
You need sorting, but without sort
method, you just need to reorder the values regarding the new header order
def sort_csv_columns(csv_string: str) -> str:
lines = csv_string.strip().split("n")
initial_header = lines[0].split(",")
header = sorted(initial_header, key=str.lower)
data = [line.split(",") for line in lines[1:]]
data = [[row[initial_header.index(col)] for col in header]
for row in data]
sorted_csv = "n".join([",".join(header)] + [",".join(row) for row in data])
return sorted_csv
Sort by header but maintain row together
You can avoid the reorder part if you sort the data while having a the content stored by column instead of rows
def sort_csv_columns(csv_string: str) -> str:
data = [line.split(",") for line in csv_string.strip().split("n")]
# [['Beth', 'Charles', 'Danielle', 'Adam', 'Eric'], ['17945', '10091', '10088', '3907', '10132']
# , ['2', '12', '13', '48', '11']]
data = list(zip(*data))
# [('Beth', '17945', '2'), ('Charles', '10091', '12'), ('Danielle', '10088', '13'),
# ('Adam', '3907', '48'), ('Eric', '10132', '11')]
# sort by first value : name
data.sort(key=lambda row: row[0].lower())
sorted_csv = "n".join([",".join(row) for row in zip(*data)])
return sorted_csv
I want to sort the every row of the CSV string with the following code
import csv
def sort_csv_columns(csv_string: str) -> str:
# Split the CSV string into lines
lines = csv_string.strip().split("n")
# Split the first line (column names) and sort it case-insensitively
header = lines[0].split(",")
header.sort(key=str.lower)
# Split the remaining lines (data rows) and sort them by the sorted header
data = [line.split(",") for line in lines[1:]]
data.sort(key=lambda row: [row[header.index(col)] for col in header])
# Join the sorted data and header into a single CSV string
sorted_csv = "n".join([",".join(header)] + [",".join(row) for row in data])
return sorted_csv
# Test the function
csv_string = "Beth,Charles,Danielle,Adam,Ericn17945,10091,10088,3907,10132n2,12,13,48,11"
sorted_csv = sort_csv_columns(csv_string)
print(sorted_csv)
Output
Adam,Beth,Charles,Danielle,Eric
17945,10091,10088,3907,10132
2,12,13,48,11
Expected Output
Adam,Beth,Charles,Danielle,Ericn
3907,17945,10091,10088,10132n
48,2,12,13,11
What am I doing wrong
I am not able to sort the row besides the top header
-
As
data
represents your lines, thendata.sort
can only sort the lines between, them, not the lines content (the cells), you need to sort on each element ofdata
-
Also doing the following will always give
0,1,2,3,4
as you check index on the list on iterate on[header.index(col) for col in header]
Sort header then reorder
You need sorting, but without sort
method, you just need to reorder the values regarding the new header order
def sort_csv_columns(csv_string: str) -> str:
lines = csv_string.strip().split("n")
initial_header = lines[0].split(",")
header = sorted(initial_header, key=str.lower)
data = [line.split(",") for line in lines[1:]]
data = [[row[initial_header.index(col)] for col in header]
for row in data]
sorted_csv = "n".join([",".join(header)] + [",".join(row) for row in data])
return sorted_csv
Sort by header but maintain row together
You can avoid the reorder part if you sort the data while having a the content stored by column instead of rows
def sort_csv_columns(csv_string: str) -> str:
data = [line.split(",") for line in csv_string.strip().split("n")]
# [['Beth', 'Charles', 'Danielle', 'Adam', 'Eric'], ['17945', '10091', '10088', '3907', '10132']
# , ['2', '12', '13', '48', '11']]
data = list(zip(*data))
# [('Beth', '17945', '2'), ('Charles', '10091', '12'), ('Danielle', '10088', '13'),
# ('Adam', '3907', '48'), ('Eric', '10132', '11')]
# sort by first value : name
data.sort(key=lambda row: row[0].lower())
sorted_csv = "n".join([",".join(row) for row in zip(*data)])
return sorted_csv