How do I copy a row from an existing dataframe df_a into a new dataframe df_b?
Question:
How do I copy a row from an existing dataframe df_a into a new dataframe df_b? Also, a cell from dataframe df_a into the new dataframe df_b? See the following example:
for index, row in df__data.iterrows():
for i in range(df__attributes_to_compare.shape[0]):
if row[df__attributes_to_compare["info_a"].values[i]] != row[df__attributes_to_compare["info_b"].values[i]]:
# new_df.append(key columns values + unsimiliar compared values)
df__data:
key1
key2
key3
attrb1
attrb2
attrb3
sfdg
dagd
dgsg
12
43
24
afrtf
yxcbb
ertet
34
45
34
df__attributes_to_compare:
info_a
info_b
attrb1
attrb2
attrb1
attrb3
new_df, target output:
key1
key2
key3
value1
value2
sfdg
dagd
dgsg
12
43
sfdg
dagd
dgsg
12
24
afrtf
yxcbb
ertet
34
45
The values that do not match are stored in value1 and value2.
Answers:
Here is my solution
import pandas as pd
import numpy as np
data = {"key1": ["sfdg", "afrtf"],
"key2": ["dagd", "yxcbb"],
"key3": ["dgsg", "ertet"],
"attrb1": ["12", "34"],
"attrb2": ["43", "45"],
"attrb3": ["24", "34"]}
df__data = pd.DataFrame(data = data)
list_key = ["key1", "key2", "key3"]
# attributes_to_compare = {"info_a": ["attrb1", "attrb1"],
# "info_b": ["attrb2", "attrb3"]}
# df__attributes_to_compare = pd.DataFrame(data = attributes_to_compare)
list__attributes_to_compare = ["attrb1", "attrb2",
"attrb1", "attrb3"]
key_set = []
values = []
for index, row in df__data.iterrows():
for i in range(int(len(list__attributes_to_compare)/2)):
i = i*2
if row[list__attributes_to_compare[i]] != row[list__attributes_to_compare[i+1]]:
key_set.append(row.iloc[0:len(list_key)])
values.append([row[list__attributes_to_compare[i]], row[list__attributes_to_compare[i+1]]])
df__values = pd.DataFrame(data = values, columns = ["value1", "value2"])
df__key_set = pd.DataFrame(data = key_set, columns = list_key)
df__key_set["new_index"] = np.arange(df__key_set.shape[0])
df__key_set.set_index("new_index", inplace = True)
df__result = df__key_set.merge(df__values, left_index=True, right_index=True)
Does anyone have suggestions for improvement?
You could try the following, which is bit more focused:
def select(row):
col1, col2 = row
df = df__data[["key1", "key2", "key3"] + [col1, col2]]
return df[df[col1] != df[col2]].rename(columns={col1: "value1", col2: "value2"})
result = pd.concat(
map(select, df__attributes_to_compare.itertuples(index=False)),
axis=0, ignore_index=True
)
The result
is not exactly in the same order given in the question but essentially the same:
key1 key2 key3 value1 value2
0 sfdg dagd dgsg 12 43
1 afrtf yxcbb ertet 34 45
2 sfdg dagd dgsg 12 24
How do I copy a row from an existing dataframe df_a into a new dataframe df_b? Also, a cell from dataframe df_a into the new dataframe df_b? See the following example:
for index, row in df__data.iterrows():
for i in range(df__attributes_to_compare.shape[0]):
if row[df__attributes_to_compare["info_a"].values[i]] != row[df__attributes_to_compare["info_b"].values[i]]:
# new_df.append(key columns values + unsimiliar compared values)
df__data:
key1 | key2 | key3 | attrb1 | attrb2 | attrb3 |
---|---|---|---|---|---|
sfdg | dagd | dgsg | 12 | 43 | 24 |
afrtf | yxcbb | ertet | 34 | 45 | 34 |
df__attributes_to_compare:
info_a | info_b |
---|---|
attrb1 | attrb2 |
attrb1 | attrb3 |
new_df, target output:
key1 | key2 | key3 | value1 | value2 |
---|---|---|---|---|
sfdg | dagd | dgsg | 12 | 43 |
sfdg | dagd | dgsg | 12 | 24 |
afrtf | yxcbb | ertet | 34 | 45 |
The values that do not match are stored in value1 and value2.
Here is my solution
import pandas as pd
import numpy as np
data = {"key1": ["sfdg", "afrtf"],
"key2": ["dagd", "yxcbb"],
"key3": ["dgsg", "ertet"],
"attrb1": ["12", "34"],
"attrb2": ["43", "45"],
"attrb3": ["24", "34"]}
df__data = pd.DataFrame(data = data)
list_key = ["key1", "key2", "key3"]
# attributes_to_compare = {"info_a": ["attrb1", "attrb1"],
# "info_b": ["attrb2", "attrb3"]}
# df__attributes_to_compare = pd.DataFrame(data = attributes_to_compare)
list__attributes_to_compare = ["attrb1", "attrb2",
"attrb1", "attrb3"]
key_set = []
values = []
for index, row in df__data.iterrows():
for i in range(int(len(list__attributes_to_compare)/2)):
i = i*2
if row[list__attributes_to_compare[i]] != row[list__attributes_to_compare[i+1]]:
key_set.append(row.iloc[0:len(list_key)])
values.append([row[list__attributes_to_compare[i]], row[list__attributes_to_compare[i+1]]])
df__values = pd.DataFrame(data = values, columns = ["value1", "value2"])
df__key_set = pd.DataFrame(data = key_set, columns = list_key)
df__key_set["new_index"] = np.arange(df__key_set.shape[0])
df__key_set.set_index("new_index", inplace = True)
df__result = df__key_set.merge(df__values, left_index=True, right_index=True)
Does anyone have suggestions for improvement?
You could try the following, which is bit more focused:
def select(row):
col1, col2 = row
df = df__data[["key1", "key2", "key3"] + [col1, col2]]
return df[df[col1] != df[col2]].rename(columns={col1: "value1", col2: "value2"})
result = pd.concat(
map(select, df__attributes_to_compare.itertuples(index=False)),
axis=0, ignore_index=True
)
The result
is not exactly in the same order given in the question but essentially the same:
key1 key2 key3 value1 value2
0 sfdg dagd dgsg 12 43
1 afrtf yxcbb ertet 34 45
2 sfdg dagd dgsg 12 24