Display the output of datatypes from two dataframes

Question

I would like to display the output of datatypes from two dataframes.

column_name, dbf datatype, df datatype should be displayed.

Working code

import csv
import pandas as pd
from dbfread import DBF

csv_file = "bridges.csv"
dbf_file = "bridges.dbf"

def dbf_to_csv(path_to_dbf):
    '''Convert to .csv file,  display DBF and CSV column types'''
    csv_fn = path_to_dbf[:-4]+ ".csv"          
    print('tCreating {}'.format(csv_fn)) 
    table = DBF(path_to_dbf)
    with open(csv_fn, 'w', newline = '') as f: 
        writer = csv.writer(f)
        writer.writerow(table.field_names)     
        print('ttWriting converted data to {}'.format(csv_fn))
        for record in table:                   
            writer.writerow(list(record.values()))
        print('nnttClosing converted data to {}nn'.format(csv_fn))


def main():
    path_to_dbf = "./bridges.dbf"
    
    print('ntPrinting the head of the .dbf file: {}'.format(dbf_file))
    dbf = DBF(dbf_file)
    dbf = pd.DataFrame(dbf)
    print(dbf.head(5))
    
    print('ntPrinting the head of the .csv file: {}'.format(csv_file))
    df = pd.read_csv(csv_file)
    print(df.head(5))
    
    ##  2.  read the column datatype and display    
    print('Printing column name and column datatype:')
    for name, dbf_type in dbf.dtypes.iteritems():
        print('t{}tt{}'.format(name, dbf_type))

    
    print('Printing .csv column name and column datatype:')
    for name, dtype in df.dtypes.iteritems():
        print('t{}tt{}'.format(name, dtype))




if __name__ == "__main__":
    main()

After searching SO I found something along the lines of

    ##  2.  read the column datatype and display    
    print('Printing column name and column datatype:')
    dbf_type = dbf.dtypes.iteritems()
    df_type = df.dtypes.iteritems()
    for name, in zip(dbf.dtypes.iteritems(), df.dtypes.iteritems()):
        print('t{}tt{}tt{}'.format(name, df_type, dbf_type))

This produces the error

ValueError: too many values to unpack (expected 1)

Can anyone offer a solution or alternative method, please.

Asked By: Tommy Gibbons

||

Source

Answer 1

I found what I was looking for on How to iterate over two dictionaries at once….. A big thank you to user2699

import csv
import pandas as pd
from dbfread import DBF

csv_file = "bridges.csv"
dbf_file = "bridges.dbf"
datatypes = "datatypes.csv"

def dbf_to_csv(path_to_dbf):
    '''Convert to .csv file,  display DBF and CSV column types'''
    csv_fn = path_to_dbf[:-4]+ ".csv"          
    print('tCreating {}'.format(csv_fn)) 
    table = DBF(path_to_dbf)
    with open(csv_fn, 'w', newline = '') as f: 
        writer = csv.writer(f)
        writer.writerow(table.field_names)     
        print('ttWriting converted data to {}'.format(csv_fn))
        for record in table:                   
            writer.writerow(list(record.values()))
        print('nnttClosing converted data to {}nn'.format(csv_fn))


def main():
    ##  1.  read .dbf file formed by QField
    path_to_dbf = "./bridges.dbf"
    
    # print('ntPrinting the head of the .dbf file: {}'.format(dbf_file))
    dbf = DBF(dbf_file)
    dbf = pd.DataFrame(dbf)
    # print(dbf.head(5))
    
    # print('ntPrinting the head of the .csv file: {}'.format(csv_file))
    df = pd.read_csv(csv_file)
    # print(df.head(5))
    
    # ##  2.  read the column datatype and display    
    # print('Printing column name and column datatype:')
    # for name, dbf_type in dbf.dtypes.iteritems():
        # print('t{}tt{}'.format(name, dbf_type))

    
    print('Printing .csv column name and column datatype:')
    # Create an empty Dictionary
    my_csv_dictionary = {}
    my_dbf_dictionary = {}
    for name, dtype in df.dtypes.items():
        # print('t{}tt{}'.format(name, dtype))
        # ##  str(dtype) only returns value
        my_csv_dictionary.update({name:str(dtype)})
        
    for name, dtype in dbf.dtypes.items():
        # print('t{}tt{}'.format(name, dtype))
        # ##  str(dtype) only returns value
        my_dbf_dictionary.update({name:str(dtype)})


    ##  write the datatypes to a .csv file
    ##  ensure UTF-B encoding for Postgres
    print('Printing Dict to screen')
    for k, v in my_csv_dictionary.items():
        print('{},t{},t {}'.format(k, v, my_dbf_dictionary[k]))
    
    
    print('Printing Dict to .csv')
    with open('gt.csv', 'w') as f:
        for k, v in my_csv_dictionary.items():
            f.write("%s, %s, %sn" % (k, v, my_dbf_dictionary[k]))
    
    print('nnttClosing converted data to {}nn')




if __name__ == "__main__":
    main()

This code displays to the screen (maybe not very neatly) in the format key, dict1_value, dict2_value

I have also added the code to write to a .csv file (in the same formatting as the screen display), this might save some time later.

Answered By: Tommy Gibbons

Display the output of datatypes from two dataframes

Question:

Answers: