Renaming column names from a data set in pandas
Question:
I am trying to rename column names from a DataFrame that have space in the name. DataFrame (df) consists of 45 columns and the majority have spaces in the name. For instance: df.column.values [1] = 'Date Release'
, and the name should be changed to 'Date_Release'
. I tried DataFrame.rename ()
and DataFrame.columns.values[]
but did not work. I would much appreciate it if you could help me to find out what I did wrong
for colmns in df:
if ' ' in colmns:
colmns_new = '_'.join(colmns.split())
df = df.rename (columns = {""%s"" %colmns : ""%s"" %colmns_new})
else:
print (colmns)
print (df)
or this one:
for i in range (len(df.columns)):
old= df.columns.values[i]
if ' ' in old:
new = '_'.join(old.split())
df = df.columns.values[i] = ['%s' % new]
print (""%s"" % new)
print (df)
Error: AttributeError: ‘list’ object has no attribute ‘columns’
Answers:
import pandas as pd
df.columns = [i.replace(' ','_') for i in df.columns]
You can use regex to replace spaces with underscore
Here is an example df with some columns containing spaces,
cols = ['col {}'.format(i) for i in range(1, 10, 1)] + ['col10']
df = pd.DataFrame(columns = cols)
import re
df.columns = [re.sub(' ','_',i) for i in df.columns]
You get
col_1 col_2 col_3 col_4 col_5 col_6 col_7 col_8 col_9 col10
You can just give df.columns = df.columns.str.replace(' ','_')
to replace the space with an underscore.
Here’s an example. Here column a1
does not have a space. However columns b 2
and c 3
have a space.
>>> df = pd.DataFrame({'a1': range(1,5), 'b 2': list ('abcd'), 'c 3':list('pqrs')})
>>> df
a1 b 2 c 3
0 1 a p
1 2 b q
2 3 c r
3 4 d s
>>> df.columns = df.columns.str.replace(' ','_')
>>> df
a1 b_2 c_3
0 1 a p
1 2 b q
2 3 c r
3 4 d s
-- Dimension Tables
CREATE TABLE DIM_CROP
(
CROP_NUMBER char(18) NOT NULL,
VARIETY char(18),
VARIETY_CATEGORY char(18),
TYPE char(18),
DENOTER_FLAG char(18),
SCADA_ID char(18),
SAVORMETRICS_ID char(18),
SEEDED_Y_N char(18),
HARVESTED_Y_N char(18),
YIELD_SOURCE_FLAG char(18),
TRAY_TYPE char(18),
SUBSTRATE_MATERIAL_TYPE char(18),
PRIMARY KEY (CROP_NUMBER)
);
CREATE TABLE DIM_CUSTOMER_DISTRIBUTION_CENTRE
(
CUSTOMER_DC_ID varchar(20) NOT NULL,
NAME varchar(20),
STREET_ADDRESS_1 varchar(20),
UNIT varchar(20),
CITY varchar(20),
PROVINCE varchar(20),
COUNTRY varchar(20),
POSTAL_CODE varchar(20),
CUSTOMER_NAME varchar(20),
CUSTOMER_DC_SITE_ID varchar(20),
PRIMARY KEY (CUSTOMER_DC_ID)
);
CREATE TABLE DIM_CUSTOMER_ORDER
(
CUSTOMER_ORDER_ID numeric NOT NULL,
CARRIER_NAME varchar(20),
FGI_ENVIRONMENT varchar(20),
DELETED_FLAG bit,
SHIPPED_FLAG bit,
PALLETIZED_FLAG bit,
PRIMARY KEY (CUSTOMER_ORDER_ID)
);
CREATE TABLE DIM_DATE
(
DATE_ID varchar(20) NOT NULL,
QUARTER varchar(20),
YEAR varchar(20),
MONTH datetime,
DAY datetime,
WEEK integer,
FISCAL_QUARTER integer,
FISCAL_YEAR integer,
FISCAL_MONTH integer,
FISCAL_PERIOD integer,
FISCAL_MONTH_NAME varchar(20),
FISCAL_WEEK integer,
DAYLIGHT_SAVING_FLAG bit,
PERIOD integer,
MONTH_OF_YEAR integer,
MONTH_OF_NAME varchar(20),
DAY_OF_MONTH integer,
DAY_OF_WEEK integer,
PRIMARY KEY (DATE_ID)
);
CREATE TABLE DIM_GOODLEAF_FACILITY_SITE
(
GOODLEAF_FACILITY_SITE_ID char(18) NOT NULL,
SITE_NAME varchar(20),
REGION_GROUP varchar(20),
STREET_ADDRESS varchar(20),
UNIT varchar(20),
CITY varchar(20),
PROVINCE varchar(20),
COUNTRY varchar(20),
POSTAL_CODE varchar(20),
PRIMARY KEY (GOODLEAF_FACILITY_SITE_ID)
);
CREATE TABLE DIM_SEED_LOT
(
LOT_NUMBER char(18) NOT NULL,
PRIMARY KEY (LOT_NUMBER)
);
CREATE TABLE DIM_SKU
(
SKU_ID varchar(20) NOT NULL,
PRODUCT varchar(20),
FORMAT varchar(20),
SKU_NAME varchar(20),
UNIT_QUANTITY_PER_CASE numeric,
UNIT_LABELLED_WEIGHT decimal(10, 2),
SAGE_PRODUCT_ID varchar(20),
SAGE_SAL_FORMAT_CODE varchar(20),
CASE_LABELLED_WEIGHT decimal(10, 2),
PRODUCT_CATEGORY varchar(20),
PRIMARY KEY (SKU_ID)
);
CREATE TABLE DIM_TRACEABLE_UNIT
(
TRACEABLE_UNIT_ID char(18) NOT NULL,
FORMATTED_BEST_BEFORE_DATE char(18),
LOT_CODE char(18),
FARM_SOURCE_SITE char(18),
BEST_BEFORE_DATE char(18),
PACKAGED_DATE char(18),
PRODUCT char(18),
PRODUCT_LINE_CATEGORY char(18),
SKU char(18),
SKU___QTY_OF_UNITS_PER_CASE char(18),
SKU___UNIT_WEIGHT char(18),
SKU___CASE_WEIGHT char(18),
QA_STATUS char(18),
PRIMARY KEY (TRACEABLE_UNIT_ID)
);
-- Fact Tables
CREATE TABLE FACT_CROP
(
CROP_ID char(18) NOT NULL,
SEED_DATE_START char(18),
SEED_DATE_END char(18),
HARVEST_DATE_START char(18),
HARVEST_DATE_END char(18),
SEEDING_DATE_ID char(18),
HARVEST_DATE_ID char(18),
SEEDING_SITE_ID char(18),
HARVEST_SITE_ID char(18),
SEEDING_TARGET_BENCHES char(18),
SEEDING_TARGET_TRAYS char(18),
SEEDING_COMPLETED_BENCHES char(18),
SEEDING_COMPLETED_TRAYS char(18),
SEEDING_COMPLETED_BENCHES_SCADA char(18),
HARVEST_DISCARD_TRAY___HORT char(18),
HARVERT_DISCARD_TRAY___QA char(18),
HARVERT_DISCARD_TRAY___PROCESS char(18),
HARVERT_DISCARD_TRAY___CASE_TARGET char(18),
HARVERT_DISCARD_TRAY___SCHEDULE char(18),
HARVERT_DISCARD_TRAY___MISSING char(18),
HARVERT_DISCARD_TRAY___ADMIN char(18),
HARVEST_NET_CUT_TRAYS char(18),
DATE_ID varchar(20),
CROP_NUMBER char(18),
LOT_NUMBER char(18),
GOODLEAF_FACILITY_SITE_ID char(18),
PRIMARY KEY (CROP_ID)
);
CREATE TABLE FACT_CUSTOMER_ORDER
(
ORDER_CREATE_DATE_ID varchar(20) NOT NULL,
CUSTOMER_DC_ID varchar(20) NOT NULL,
GOODLEAF_FACILITY_SITE_ID char(18) NOT NULL,
SKU_ID varchar(20) NOT NULL,
CUSTOMER_ORDER_ID numeric NOT NULL,
SHIPPED_DATE_ID varchar(20),
EXPECTED_SHIP_DATE_ID varchar(20),
PALLETIZED_DATE_ID varchar(20),
ORDER_CREATE_DATE datetime,
SHIPPED_DATE datetime,
EXPECTED_SHIPPED_DATE datetime,
PALLETIZE_DATE datetime,
QUANTITY_ORDERED numeric,
QUANTITY_SHIPPED numeric,
QUANTITY_SHORTED numeric,
TRACEABLE_UNITS_COUNT numeric,
EXPIRY_DAYS_REMAINING_MAX numeric,
EXPIRY_DAYS_REMAINING_MIN numeric,
EXPIRY_DAYS_REMAINING_AVG numeric,
SHIP_WEIGHT_LABELLED decimal(10, 2),
SKU_LABELLED_WEIGHT decimal(10, 2),
INVOICED_VALUE_$ decimal(10, 2),
SHIP_SOURCE_SITE_ID varchar(20),
SHIPPED_FLAG_COUNT numeric,
DATE_ID varchar(20),
PRIMARY KEY (ORDER_CREATE_DATE_ID, CUSTOMER_DC_ID, GOODLEAF_FACILITY_SITE_ID, SKU_ID, CUSTOMER_ORDER_ID)
);
-- Note: In serverless SQL pools, foreign key constraints are not enforced, but can be defined for documentation purposes.
-- Example of adding Foreign Key Constraint
ALTER TABLE FACT_CROP
ADD CONSTRAINT FK_FACT_CROP_DATE_ID FOREIGN KEY (DATE_ID) REFERENCES DIM_DATE(DATE_ID);
-- Additional ALTER TABLE statements for adding Foreign Key Constraints to other Fact tables as necessary...
I am trying to rename column names from a DataFrame that have space in the name. DataFrame (df) consists of 45 columns and the majority have spaces in the name. For instance: df.column.values [1] = 'Date Release'
, and the name should be changed to 'Date_Release'
. I tried DataFrame.rename ()
and DataFrame.columns.values[]
but did not work. I would much appreciate it if you could help me to find out what I did wrong
for colmns in df:
if ' ' in colmns:
colmns_new = '_'.join(colmns.split())
df = df.rename (columns = {""%s"" %colmns : ""%s"" %colmns_new})
else:
print (colmns)
print (df)
or this one:
for i in range (len(df.columns)):
old= df.columns.values[i]
if ' ' in old:
new = '_'.join(old.split())
df = df.columns.values[i] = ['%s' % new]
print (""%s"" % new)
print (df)
Error: AttributeError: ‘list’ object has no attribute ‘columns’
import pandas as pd
df.columns = [i.replace(' ','_') for i in df.columns]
You can use regex to replace spaces with underscore
Here is an example df with some columns containing spaces,
cols = ['col {}'.format(i) for i in range(1, 10, 1)] + ['col10']
df = pd.DataFrame(columns = cols)
import re
df.columns = [re.sub(' ','_',i) for i in df.columns]
You get
col_1 col_2 col_3 col_4 col_5 col_6 col_7 col_8 col_9 col10
You can just give df.columns = df.columns.str.replace(' ','_')
to replace the space with an underscore.
Here’s an example. Here column a1
does not have a space. However columns b 2
and c 3
have a space.
>>> df = pd.DataFrame({'a1': range(1,5), 'b 2': list ('abcd'), 'c 3':list('pqrs')})
>>> df
a1 b 2 c 3
0 1 a p
1 2 b q
2 3 c r
3 4 d s
>>> df.columns = df.columns.str.replace(' ','_')
>>> df
a1 b_2 c_3
0 1 a p
1 2 b q
2 3 c r
3 4 d s
-- Dimension Tables
CREATE TABLE DIM_CROP
(
CROP_NUMBER char(18) NOT NULL,
VARIETY char(18),
VARIETY_CATEGORY char(18),
TYPE char(18),
DENOTER_FLAG char(18),
SCADA_ID char(18),
SAVORMETRICS_ID char(18),
SEEDED_Y_N char(18),
HARVESTED_Y_N char(18),
YIELD_SOURCE_FLAG char(18),
TRAY_TYPE char(18),
SUBSTRATE_MATERIAL_TYPE char(18),
PRIMARY KEY (CROP_NUMBER)
);
CREATE TABLE DIM_CUSTOMER_DISTRIBUTION_CENTRE
(
CUSTOMER_DC_ID varchar(20) NOT NULL,
NAME varchar(20),
STREET_ADDRESS_1 varchar(20),
UNIT varchar(20),
CITY varchar(20),
PROVINCE varchar(20),
COUNTRY varchar(20),
POSTAL_CODE varchar(20),
CUSTOMER_NAME varchar(20),
CUSTOMER_DC_SITE_ID varchar(20),
PRIMARY KEY (CUSTOMER_DC_ID)
);
CREATE TABLE DIM_CUSTOMER_ORDER
(
CUSTOMER_ORDER_ID numeric NOT NULL,
CARRIER_NAME varchar(20),
FGI_ENVIRONMENT varchar(20),
DELETED_FLAG bit,
SHIPPED_FLAG bit,
PALLETIZED_FLAG bit,
PRIMARY KEY (CUSTOMER_ORDER_ID)
);
CREATE TABLE DIM_DATE
(
DATE_ID varchar(20) NOT NULL,
QUARTER varchar(20),
YEAR varchar(20),
MONTH datetime,
DAY datetime,
WEEK integer,
FISCAL_QUARTER integer,
FISCAL_YEAR integer,
FISCAL_MONTH integer,
FISCAL_PERIOD integer,
FISCAL_MONTH_NAME varchar(20),
FISCAL_WEEK integer,
DAYLIGHT_SAVING_FLAG bit,
PERIOD integer,
MONTH_OF_YEAR integer,
MONTH_OF_NAME varchar(20),
DAY_OF_MONTH integer,
DAY_OF_WEEK integer,
PRIMARY KEY (DATE_ID)
);
CREATE TABLE DIM_GOODLEAF_FACILITY_SITE
(
GOODLEAF_FACILITY_SITE_ID char(18) NOT NULL,
SITE_NAME varchar(20),
REGION_GROUP varchar(20),
STREET_ADDRESS varchar(20),
UNIT varchar(20),
CITY varchar(20),
PROVINCE varchar(20),
COUNTRY varchar(20),
POSTAL_CODE varchar(20),
PRIMARY KEY (GOODLEAF_FACILITY_SITE_ID)
);
CREATE TABLE DIM_SEED_LOT
(
LOT_NUMBER char(18) NOT NULL,
PRIMARY KEY (LOT_NUMBER)
);
CREATE TABLE DIM_SKU
(
SKU_ID varchar(20) NOT NULL,
PRODUCT varchar(20),
FORMAT varchar(20),
SKU_NAME varchar(20),
UNIT_QUANTITY_PER_CASE numeric,
UNIT_LABELLED_WEIGHT decimal(10, 2),
SAGE_PRODUCT_ID varchar(20),
SAGE_SAL_FORMAT_CODE varchar(20),
CASE_LABELLED_WEIGHT decimal(10, 2),
PRODUCT_CATEGORY varchar(20),
PRIMARY KEY (SKU_ID)
);
CREATE TABLE DIM_TRACEABLE_UNIT
(
TRACEABLE_UNIT_ID char(18) NOT NULL,
FORMATTED_BEST_BEFORE_DATE char(18),
LOT_CODE char(18),
FARM_SOURCE_SITE char(18),
BEST_BEFORE_DATE char(18),
PACKAGED_DATE char(18),
PRODUCT char(18),
PRODUCT_LINE_CATEGORY char(18),
SKU char(18),
SKU___QTY_OF_UNITS_PER_CASE char(18),
SKU___UNIT_WEIGHT char(18),
SKU___CASE_WEIGHT char(18),
QA_STATUS char(18),
PRIMARY KEY (TRACEABLE_UNIT_ID)
);
-- Fact Tables
CREATE TABLE FACT_CROP
(
CROP_ID char(18) NOT NULL,
SEED_DATE_START char(18),
SEED_DATE_END char(18),
HARVEST_DATE_START char(18),
HARVEST_DATE_END char(18),
SEEDING_DATE_ID char(18),
HARVEST_DATE_ID char(18),
SEEDING_SITE_ID char(18),
HARVEST_SITE_ID char(18),
SEEDING_TARGET_BENCHES char(18),
SEEDING_TARGET_TRAYS char(18),
SEEDING_COMPLETED_BENCHES char(18),
SEEDING_COMPLETED_TRAYS char(18),
SEEDING_COMPLETED_BENCHES_SCADA char(18),
HARVEST_DISCARD_TRAY___HORT char(18),
HARVERT_DISCARD_TRAY___QA char(18),
HARVERT_DISCARD_TRAY___PROCESS char(18),
HARVERT_DISCARD_TRAY___CASE_TARGET char(18),
HARVERT_DISCARD_TRAY___SCHEDULE char(18),
HARVERT_DISCARD_TRAY___MISSING char(18),
HARVERT_DISCARD_TRAY___ADMIN char(18),
HARVEST_NET_CUT_TRAYS char(18),
DATE_ID varchar(20),
CROP_NUMBER char(18),
LOT_NUMBER char(18),
GOODLEAF_FACILITY_SITE_ID char(18),
PRIMARY KEY (CROP_ID)
);
CREATE TABLE FACT_CUSTOMER_ORDER
(
ORDER_CREATE_DATE_ID varchar(20) NOT NULL,
CUSTOMER_DC_ID varchar(20) NOT NULL,
GOODLEAF_FACILITY_SITE_ID char(18) NOT NULL,
SKU_ID varchar(20) NOT NULL,
CUSTOMER_ORDER_ID numeric NOT NULL,
SHIPPED_DATE_ID varchar(20),
EXPECTED_SHIP_DATE_ID varchar(20),
PALLETIZED_DATE_ID varchar(20),
ORDER_CREATE_DATE datetime,
SHIPPED_DATE datetime,
EXPECTED_SHIPPED_DATE datetime,
PALLETIZE_DATE datetime,
QUANTITY_ORDERED numeric,
QUANTITY_SHIPPED numeric,
QUANTITY_SHORTED numeric,
TRACEABLE_UNITS_COUNT numeric,
EXPIRY_DAYS_REMAINING_MAX numeric,
EXPIRY_DAYS_REMAINING_MIN numeric,
EXPIRY_DAYS_REMAINING_AVG numeric,
SHIP_WEIGHT_LABELLED decimal(10, 2),
SKU_LABELLED_WEIGHT decimal(10, 2),
INVOICED_VALUE_$ decimal(10, 2),
SHIP_SOURCE_SITE_ID varchar(20),
SHIPPED_FLAG_COUNT numeric,
DATE_ID varchar(20),
PRIMARY KEY (ORDER_CREATE_DATE_ID, CUSTOMER_DC_ID, GOODLEAF_FACILITY_SITE_ID, SKU_ID, CUSTOMER_ORDER_ID)
);
-- Note: In serverless SQL pools, foreign key constraints are not enforced, but can be defined for documentation purposes.
-- Example of adding Foreign Key Constraint
ALTER TABLE FACT_CROP
ADD CONSTRAINT FK_FACT_CROP_DATE_ID FOREIGN KEY (DATE_ID) REFERENCES DIM_DATE(DATE_ID);
-- Additional ALTER TABLE statements for adding Foreign Key Constraints to other Fact tables as necessary...