Renaming column names from a data set in pandas

Question:

I am trying to rename column names from a DataFrame that have space in the name. DataFrame (df) consists of 45 columns and the majority have spaces in the name. For instance: df.column.values [1] = 'Date Release', and the name should be changed to 'Date_Release'. I tried DataFrame.rename () and DataFrame.columns.values[] but did not work. I would much appreciate it if you could help me to find out what I did wrong

for colmns in df:
    if ' ' in colmns:
        colmns_new = '_'.join(colmns.split())
        df = df.rename (columns = {""%s"" %colmns : ""%s"" %colmns_new})   
    else:
        print (colmns)    

print (df)

or this one:

for i in range (len(df.columns)):
    old= df.columns.values[i]
    if ' ' in old:
        new = '_'.join(old.split())
        df = df.columns.values[i] = ['%s' % new]
        print (""%s"" % new) 
print (df)

Error: AttributeError: ‘list’ object has no attribute ‘columns’

Asked By: Firuz

||

Answers:

import pandas as pd
df.columns = [i.replace(' ','_') for i in df.columns]
Answered By: Mo Huss

You can use regex to replace spaces with underscore

Here is an example df with some columns containing spaces,

cols = ['col {}'.format(i) for i in range(1, 10, 1)] + ['col10']
df = pd.DataFrame(columns = cols)

import re
df.columns = [re.sub(' ','_',i) for i in df.columns]

You get

col_1   col_2   col_3   col_4   col_5   col_6   col_7   col_8   col_9   col10
Answered By: Vaishali

You can just give df.columns = df.columns.str.replace(' ','_') to replace the space with an underscore.

Here’s an example. Here column a1 does not have a space. However columns b 2 and c 3 have a space.

>>> df = pd.DataFrame({'a1': range(1,5), 'b 2': list ('abcd'), 'c 3':list('pqrs')})
>>> df
   a1 b 2 c 3
0   1   a   p
1   2   b   q
2   3   c   r
3   4   d   s
>>> df.columns = df.columns.str.replace(' ','_')
>>> df
   a1 b_2 c_3
0   1   a   p
1   2   b   q
2   3   c   r
3   4   d   s
Answered By: Joe Ferndz
-- Dimension Tables

CREATE TABLE DIM_CROP
( 
    CROP_NUMBER char(18) NOT NULL,
    VARIETY char(18),
    VARIETY_CATEGORY char(18),
    TYPE char(18),
    DENOTER_FLAG char(18),
    SCADA_ID char(18),
    SAVORMETRICS_ID char(18),
    SEEDED_Y_N char(18),
    HARVESTED_Y_N char(18),
    YIELD_SOURCE_FLAG char(18),
    TRAY_TYPE char(18),
    SUBSTRATE_MATERIAL_TYPE char(18),
    PRIMARY KEY (CROP_NUMBER)
);

CREATE TABLE DIM_CUSTOMER_DISTRIBUTION_CENTRE
( 
    CUSTOMER_DC_ID varchar(20) NOT NULL,
    NAME varchar(20),
    STREET_ADDRESS_1 varchar(20),
    UNIT varchar(20),
    CITY varchar(20),
    PROVINCE varchar(20),
    COUNTRY varchar(20),
    POSTAL_CODE varchar(20),
    CUSTOMER_NAME varchar(20),
    CUSTOMER_DC_SITE_ID varchar(20),
    PRIMARY KEY (CUSTOMER_DC_ID)
);

CREATE TABLE DIM_CUSTOMER_ORDER
( 
    CUSTOMER_ORDER_ID numeric NOT NULL,
    CARRIER_NAME varchar(20),
    FGI_ENVIRONMENT varchar(20),
    DELETED_FLAG bit,
    SHIPPED_FLAG bit,
    PALLETIZED_FLAG bit,
    PRIMARY KEY (CUSTOMER_ORDER_ID)
);

CREATE TABLE DIM_DATE
( 
    DATE_ID varchar(20) NOT NULL,
    QUARTER varchar(20),
    YEAR varchar(20),
    MONTH datetime,
    DAY datetime,
    WEEK integer,
    FISCAL_QUARTER integer,
    FISCAL_YEAR integer,
    FISCAL_MONTH integer,
    FISCAL_PERIOD integer,
    FISCAL_MONTH_NAME varchar(20),
    FISCAL_WEEK integer,
    DAYLIGHT_SAVING_FLAG bit,
    PERIOD integer,
    MONTH_OF_YEAR integer,
    MONTH_OF_NAME varchar(20),
    DAY_OF_MONTH integer,
    DAY_OF_WEEK integer,
    PRIMARY KEY (DATE_ID)
);

CREATE TABLE DIM_GOODLEAF_FACILITY_SITE
( 
    GOODLEAF_FACILITY_SITE_ID char(18) NOT NULL,
    SITE_NAME varchar(20),
    REGION_GROUP varchar(20),
    STREET_ADDRESS varchar(20),
    UNIT varchar(20),
    CITY varchar(20),
    PROVINCE varchar(20),
    COUNTRY varchar(20),
    POSTAL_CODE varchar(20),
    PRIMARY KEY (GOODLEAF_FACILITY_SITE_ID)
);

CREATE TABLE DIM_SEED_LOT
( 
    LOT_NUMBER char(18) NOT NULL,
    PRIMARY KEY (LOT_NUMBER)
);

CREATE TABLE DIM_SKU
( 
    SKU_ID varchar(20) NOT NULL,
    PRODUCT varchar(20),
    FORMAT varchar(20),
    SKU_NAME varchar(20),
    UNIT_QUANTITY_PER_CASE numeric,
    UNIT_LABELLED_WEIGHT decimal(10, 2),
    SAGE_PRODUCT_ID varchar(20),
    SAGE_SAL_FORMAT_CODE varchar(20),
    CASE_LABELLED_WEIGHT decimal(10, 2),
    PRODUCT_CATEGORY varchar(20),
    PRIMARY KEY (SKU_ID)
);

CREATE TABLE DIM_TRACEABLE_UNIT
( 
    TRACEABLE_UNIT_ID char(18) NOT NULL,
    FORMATTED_BEST_BEFORE_DATE char(18),
    LOT_CODE char(18),
    FARM_SOURCE_SITE char(18),
    BEST_BEFORE_DATE char(18),
    PACKAGED_DATE char(18),
    PRODUCT char(18),
    PRODUCT_LINE_CATEGORY char(18),
    SKU char(18),
    SKU___QTY_OF_UNITS_PER_CASE char(18),
    SKU___UNIT_WEIGHT char(18),
    SKU___CASE_WEIGHT char(18),
    QA_STATUS char(18),
    PRIMARY KEY (TRACEABLE_UNIT_ID)
);

-- Fact Tables

CREATE TABLE FACT_CROP
( 
    CROP_ID char(18) NOT NULL,
    SEED_DATE_START char(18),
    SEED_DATE_END char(18),
    HARVEST_DATE_START char(18),
    HARVEST_DATE_END char(18),
    SEEDING_DATE_ID char(18),
    HARVEST_DATE_ID char(18),
    SEEDING_SITE_ID char(18),
    HARVEST_SITE_ID char(18),
    SEEDING_TARGET_BENCHES char(18),
    SEEDING_TARGET_TRAYS char(18),
    SEEDING_COMPLETED_BENCHES char(18),
    SEEDING_COMPLETED_TRAYS char(18),
    SEEDING_COMPLETED_BENCHES_SCADA char(18),
    HARVEST_DISCARD_TRAY___HORT char(18),
    HARVERT_DISCARD_TRAY___QA char(18),
    HARVERT_DISCARD_TRAY___PROCESS char(18),
    HARVERT_DISCARD_TRAY___CASE_TARGET char(18),
    HARVERT_DISCARD_TRAY___SCHEDULE char(18),
    HARVERT_DISCARD_TRAY___MISSING char(18),
    HARVERT_DISCARD_TRAY___ADMIN char(18),
    HARVEST_NET_CUT_TRAYS char(18),
    DATE_ID varchar(20),
    CROP_NUMBER char(18),
    LOT_NUMBER char(18),
    GOODLEAF_FACILITY_SITE_ID char(18),
    PRIMARY KEY (CROP_ID)
);

CREATE TABLE FACT_CUSTOMER_ORDER
( 
    ORDER_CREATE_DATE_ID varchar(20) NOT NULL,
    CUSTOMER_DC_ID varchar(20) NOT NULL,
    GOODLEAF_FACILITY_SITE_ID char(18) NOT NULL,
    SKU_ID varchar(20) NOT NULL,
    CUSTOMER_ORDER_ID numeric NOT NULL,
    SHIPPED_DATE_ID varchar(20),
    EXPECTED_SHIP_DATE_ID varchar(20),
    PALLETIZED_DATE_ID varchar(20),
    ORDER_CREATE_DATE datetime,
    SHIPPED_DATE datetime,
    EXPECTED_SHIPPED_DATE datetime,
    PALLETIZE_DATE datetime,
    QUANTITY_ORDERED numeric,
    QUANTITY_SHIPPED numeric,
    QUANTITY_SHORTED numeric,
    TRACEABLE_UNITS_COUNT numeric,
    EXPIRY_DAYS_REMAINING_MAX numeric,
    EXPIRY_DAYS_REMAINING_MIN numeric,
    EXPIRY_DAYS_REMAINING_AVG numeric,
    SHIP_WEIGHT_LABELLED decimal(10, 2),
    SKU_LABELLED_WEIGHT decimal(10, 2),
    INVOICED_VALUE_$ decimal(10, 2),
    SHIP_SOURCE_SITE_ID varchar(20),
    SHIPPED_FLAG_COUNT numeric,
    DATE_ID varchar(20),
    PRIMARY KEY (ORDER_CREATE_DATE_ID, CUSTOMER_DC_ID, GOODLEAF_FACILITY_SITE_ID, SKU_ID, CUSTOMER_ORDER_ID)
);

-- Note: In serverless SQL pools, foreign key constraints are not enforced, but can be defined for documentation purposes.
-- Example of adding Foreign Key Constraint
ALTER TABLE FACT_CROP
    ADD CONSTRAINT FK_FACT_CROP_DATE_ID FOREIGN KEY (DATE_ID) REFERENCES DIM_DATE(DATE_ID);

-- Additional ALTER TABLE statements for adding Foreign Key Constraints to other Fact tables as necessary...
Answered By: Kojo Olivet
Categories: questions Tags: , ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.