How to return data from one definition to another one?

Question:

I am messing around with python, and am trying to make a simple data cleaning program. I’m trying to pass the title values from the read_excel module, to the output module. But, it keeps saying name title is not defined. Here is my code:

import os
import pandas as pd
import math

class Item():
    __name = ""
    __cost = 0
    __gender = ""
    __prime = ""

    def has_all_properties(self):
        return bool(self.__name and not math.isnan(self.__cost) and self.__gender and self.__prime)

    def clean(self,wanted_cost,wanted_gender,wanted_prime):
        return bool(self.__name and self.__gender == wanted_gender and self.__cost <= wanted_cost and self.__prime == wanted_prime)
    
    def __init__(self, name, cost, gender, prime):
        self.__name = name
        self.__cost = cost
        self.__gender = gender
        self.__prime = prime

    def __eq__(self, other):
        return (self.__name == other.__name and self.__cost == other.__cost and self.__gender == other.__gender and self.__prime == other.__prime)   
    def __hash__(self):
        return hash((self.__name, self.__cost, self.__gender, self.__prime))

    def __repr__(self):
        return f"Item({self.__name},{self.__cost},{self.__gender},{self.__prime})"

    def tuple(self): 
        return self.__name, self.__cost, self.__gender, self.__prime

def read_excel(filetype):
    cwd = os.path.abspath('') 
    files = os.listdir(cwd)  
    df = pd.DataFrame()
    for file in files:
        if file.endswith(filetype):
            df = df.append(pd.read_excel(file), ignore_index=True)
            df = df.where(df.notnull(), None)
            df = df[['name', 'cost', 'used_by', 'prime']]
    title = list(df.columns.values)
    print(title) 
    array = df.values.tolist()
    print(array)
    return array
    return output(title)

def process(array):
    mylist = {Item(*k) for k in array}
    print(mylist)
    filtered = {obj for obj in mylist if obj.has_all_properties()}
    clean = {obj for obj in filtered if obj.clean(20,"male","yes")}
    result = list(clean)
    print(result)
    
def output(where, sort_data, title):
    t_list = [obj.tuple() for obj in sort_data]
    output = pd.DataFrame(t_list, columns = title)
    output.to_excel(where, index = False, header = True)

if __name__ == "__main__":
    inputfile = read_excel('.XLSX')
    processdata = process(inputfile)
    result = output('clean_data.xlsx', processdata, title)

can you show me waht to do instead? Thank you for the help

Asked By: zhangruibo101

||

Answers:

Right after a return statement is executed the function will exit. This means that return output(title) will never actually happen in your code. As well, output() doesn’t return anything and DataFrame.to_excel() only writes to an excel file. What you want to do in read_excel() is

def read_excel(filetype):
    cwd = os.path.abspath('') 
    files = os.listdir(cwd)  
    df = pd.DataFrame()
    for file in files:
        if file.endswith(filetype):
            df = df.append(pd.read_excel(file), ignore_index=True)
            df = df.where(df.notnull(), None)
            df = df[['name', 'cost', 'used_by', 'prime']]
    title = list(df.columns.values)
    print(title) 
    array = df.values.tolist()
    print(array)
    output(title)
    return array
Answered By: BTables

After you call return your function will exit so you can’t put any statement after returning from your function.
You can return the both Like this

def read_excel(filetype):
cwd = os.path.abspath('') 
files = os.listdir(cwd)  
df = pd.DataFrame()
for file in files:
    if file.endswith(filetype):
        df = df.append(pd.read_excel(file), ignore_index=True)
        df = df.where(df.notnull(), None)
        df = df[['name', 'cost', 'used_by', 'prime']]
title = list(df.columns.values)
print(title) 
array = df.values.tolist()
print(array)
return array, output(title)

this will return a tupple of your values

(array, output(title))
Answered By: Ammar

I find one of the most easy to understand way of solving my current issue. So, I just break down the read_excel definition and make a get_header and get_list definition. Here is my solution:

import os
import pandas as pd
import math

class Item():
    __name = ""
    __cost = 0
    __gender = ""
    __prime = ""

    def has_all_properties(self):
        return bool(self.__name and not math.isnan(self.__cost) and self.__gender and self.__prime)

    def clean(self,wanted_cost,wanted_gender,wanted_prime):
        return bool(self.__name and self.__gender == wanted_gender and self.__cost <= wanted_cost and self.__prime == wanted_prime)
    
    def __init__(self, name, cost, gender, prime):
        self.__name = name
        self.__cost = cost
        self.__gender = gender
        self.__prime = prime

    def __eq__(self, other):
        return (self.__name == other.__name and self.__cost == other.__cost and self.__gender == other.__gender and self.__prime == other.__prime)   
    def __hash__(self):
        return hash((self.__name, self.__cost, self.__gender, self.__prime))

    def __repr__(self):
        return f"Item({self.__name},{self.__cost},{self.__gender},{self.__prime})"

    def tuple(self): 
        return self.__name, self.__cost, self.__gender, self.__prime

def read_excel(filetype):
    cwd = os.path.abspath('') 
    files = os.listdir(cwd)  
    df = pd.DataFrame()
    for file in files:
        if file.endswith(filetype):
            df = df.append(pd.read_excel(file), ignore_index=True)
            df = df.where(df.notnull(), None)
            df = df[['name', 'cost', 'used_by', 'prime']]
    return df

def get_list(dataframe):
    array = dataframe.values.tolist()
    print(array)
    return array

def get_header(dataframe):
    title = list(dataframe.columns.values)
    print(title)
    return title

def process(array):
    mylist = {Item(*k) for k in array}
    print(mylist)
    filtered = {obj for obj in mylist if obj.has_all_properties()}
    clean = {obj for obj in filtered if obj.clean(20,"male","yes")}
    result = list(clean)
    print(result)
    t_list = [obj.tuple() for obj in result]
    return t_list

    
def output(where, sort_data, title): 
    output = pd.DataFrame(sort_data, columns = title)
    output.to_excel(where, index = False, header = True)
 
if __name__ == "__main__":
    inputfile = read_excel('.XLSX')
    array = get_list(inputfile)
    header = get_header(inputfile)
    processdata = process(array)
    result = output('clean_data.xlsx', processdata, header)
Answered By: zhangruibo101
Categories: questions Tags: , , ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.