mapping many keys to 1 values

Question:

I am mapping countries in my data, I do the following for 1 key:value pair

from itertools import chain
countryDict = {
    'Australia': 'ANZ',
    'New Zealand': 'ANZ',
    'China': 'CHINA PLUS',
    'Hong Kong': 'CHINA PLUS', 
    'India': 'INDIA',
    'Japan': 'JAPAN',
    'Indonesia': 'SEA',
    'Malaysia': 'SEA',
    'Philippines': 'SEA',
    'Singapore': 'SEA',
    'Thailand': 'SEA',
    'VietNam': 'SEA',
    'South Korea': 'SOUTH KOREA',
    'Taiwan': 'TAIWAN',
    }

mapping_expr = F.create_map([F.lit(x) for x in chain(*countryDict.items())])
df = df.withColumn('Country_L1', mapping_expr[df['Customer_Country']])

I am not sure how to do it for multiple keys: 1 value, I am currently using when and otherwise to achieve but I want something like a dictionary to make it look simple

bbo = bbo.withColumn("Country_L1", F.when(F.col('customer_country').isin(
                'AU', 'CC', 'CK', 'CX', 'FJ', 'FM', 'GU', 'KI', 'MH', 'MP', 'NF', 'NR', 'NU', 'NZ', 'PG', 'PN', 'PW', 'TO', 'TV', 'WF', 'WS', 'SB', 'VU'), F.lit('ANZ')).otherwise(
                    F.when(F.col('customer_country').isin(
                        'CN', 'HK', 'MO'), F.lit('CHINA PLUS')).otherwise(
                            F.when(F.col('customer_country').isin(
                                'BD', 'BN', 'BT', 'ID', 'KH', 'KP', 'LA', 'LK', 'MM', 'MN', 'MV', 'MY', 'NP', 'PH', 'SG', 'TH', 'TL', 'VN'), F.lit('SEA')).otherwise(
                                    F.when(F.col('customer_country') == 'JP', F.lit('JAPAN')).otherwise(
                                        F.when(F.col('customer_country') == 'KR', F.lit('SOUTH KOREA')).otherwise(
                                            F.when(F.col('customer_country') == 'TW', F.lit('TAIWAN')).otherwise(
                                                F.lit('Non APAC'))
                                            )
                                        )
                                    ) 
                                )
                            )
                    )

any help to make this simple would be appreciated, thanks in advance.

Asked By: Arun Mohan

||

Answers:

look_up={ # Store the relations here
    'SEA': ['Indonesia','Malaysia','Philippines'],
    'JAPAN' :['Japan']
}
rev_lookup={value: key for (key, values) in look_up.items() for value in values } #Reverse key-value 
rev_lookup

Result:

{'Indonesia': 'SEA', 'Malaysia': 'SEA', 'Philippines': 'SEA', 'Japan': 'JAPAN'}

Possible duplicate with Reverse / invert a dictionary mapping

Answered By: Jacob

You can have a dict like this:

mapping = {
    'ANZ': ['AU', 'CC', 'CK', 'CX', 'FJ', 'FM', 'GU', 'KI', 'MH', 'MP', 'NF', 'NR', 'NU', 'NZ', 'PG', 'PN', 'PW', 'TO',
            'TV', 'WF', 'WS', 'SB', 'VU'],
    'CHINA PLUS': ['CN', 'HK', 'MO'],
    'SEA': ['BD', 'BN', 'BT', 'ID', 'KH', 'KP', 'LA', 'LK', 'MM', 'MN', 'MV', 'MY', 'NP', 'PH', 'SG', 'TH', 'TL', 'VN'],
    'JAPAN': ['JP'],
    'SOUTH KOREA': ['KR'],
    'TAIWAN': ['TW']
}

Then use python functools.reduce function to dynamically generate when expressions from the dict elements:

from functools import reduce
from pyspark.sql import functions as F

bbo = bbo.withColumn(
    "Country_L1",
    reduce(
        lambda a, b: a.when(F.col("customer_country").isin(b[1]), b[0]),
        mapping.items(),
        F
    ).otherwise('Non APAC')
)
Answered By: blackbishop