How to run the program like ./main.py –fields column1, column2
Question:
I have
df = pd.read_csv(csv_raw)
data_json = df.to_json(orient='records')
In df
different columns which user can select in the terminal by running the code ./main.py --fields column1, column2
. I understand that I sould use argparse
library. But the methods I have tried didn’t worth my effort.
I have tried the code below, but it doesn’t work for me:
parser = argparse.ArgumentParser(description='Process rows to display.')
parser.add_argument('--fields', type=str, help='Rows to display')
args = parser.parse_args()
select = args.select.split(',') if args.select else None
df contain :
date column1 column2 column3 column4
0 2022-01-01 1 2.550000 Unknown facebook
1 2022-01-01 5 2.470000 Unknown facebook
2 2022-01-01 2 2.620000 Unknown facebook
3 2022-01-01 3 2.480000 Unknown facebook
4 2022-01-01 2 2.440000 Unknown facebook
Full code:
import pandas as pd
import requests
from io import StringIO
import hashlib
import time
import argparse
import sys
def get_file():
url = 'https://drive.google.com/file/d/1zLdEcpzCp357s3Rse112Lch9EMUWzMLE/view?usp=sharing'
file_id = url.split('/')[-2]
dwn_url = 'https://drive.google.com/uc?export=download&id=' + file_id
url2 = requests.get(dwn_url).text
csv_raw = StringIO(url2)
df = pd.read_csv(csv_raw)
data_json = df.to_json(orient='records')
df.drop(5, inplace=True)
if len(sys.argv) > 1:
select = sys.argv[1:]
else:
select = 'None'
print(df[select])
def make_table():
pd.options.display.width = 500
pd.options.display.width = None # for unlimited
pd.options.display.max_columns = 50
pd.options.display.max_columns = None # for unlimited
pd.options.display.max_colwidth = 30
pd.options.display.max_colwidth = None # for unlimited
def calculate_hash(df):
df_str = df.to_string().encode()
hash = hashlib.sha256(df_str).hexdigest()
return hash
def monitor_file(interval):
previous_hash = None
while True:
df, data_json = get_file()
current_hash = calculate_hash(df)
if previous_hash != current_hash:
print("File has been updated")
print(df.head(50))
print(data_json)
previous_hash = current_hash
time.sleep(interval)
if __name__ == '__main__':
monitor_file(120)
Answers:
import sys
if len(sys.argv) > 1:
select = sys.argv[1:]
else:
select = 'None'
print(select)
will output:
Python myFile.py lorem ipsum
['lorem','ipsum']
python myFile.py
None
then to get them as fields in your dataframe:
df[select]
for example:
df1 = pd.DataFrame({'lorem':[1,2,3],'ipsum':['a','b','c']})
if len(sys.argv) > 1:
select = sys.argv[1:]
else:
select = 'None'
print(df1[select])
python myFile.py lorem ipsum
returns:
lorem ipsum
0 1 a
1 2 b
2 3 c
With argparse
you have three options:
- Ditch the space after comma (
--fields column1,_column2
) and split the string like you did
--fields column1,column2
- Use
nargs='+'
--fields column1 column2
- Use
action='append'
--field column1 --field column2
Either way, you get a list of strings for args.fields
(or select
in p. 1 and your code). I would personally go for option 2 (nargs='+'
)
I have
df = pd.read_csv(csv_raw)
data_json = df.to_json(orient='records')
In df
different columns which user can select in the terminal by running the code ./main.py --fields column1, column2
. I understand that I sould use argparse
library. But the methods I have tried didn’t worth my effort.
I have tried the code below, but it doesn’t work for me:
parser = argparse.ArgumentParser(description='Process rows to display.')
parser.add_argument('--fields', type=str, help='Rows to display')
args = parser.parse_args()
select = args.select.split(',') if args.select else None
df contain :
date column1 column2 column3 column4
0 2022-01-01 1 2.550000 Unknown facebook
1 2022-01-01 5 2.470000 Unknown facebook
2 2022-01-01 2 2.620000 Unknown facebook
3 2022-01-01 3 2.480000 Unknown facebook
4 2022-01-01 2 2.440000 Unknown facebook
Full code:
import pandas as pd
import requests
from io import StringIO
import hashlib
import time
import argparse
import sys
def get_file():
url = 'https://drive.google.com/file/d/1zLdEcpzCp357s3Rse112Lch9EMUWzMLE/view?usp=sharing'
file_id = url.split('/')[-2]
dwn_url = 'https://drive.google.com/uc?export=download&id=' + file_id
url2 = requests.get(dwn_url).text
csv_raw = StringIO(url2)
df = pd.read_csv(csv_raw)
data_json = df.to_json(orient='records')
df.drop(5, inplace=True)
if len(sys.argv) > 1:
select = sys.argv[1:]
else:
select = 'None'
print(df[select])
def make_table():
pd.options.display.width = 500
pd.options.display.width = None # for unlimited
pd.options.display.max_columns = 50
pd.options.display.max_columns = None # for unlimited
pd.options.display.max_colwidth = 30
pd.options.display.max_colwidth = None # for unlimited
def calculate_hash(df):
df_str = df.to_string().encode()
hash = hashlib.sha256(df_str).hexdigest()
return hash
def monitor_file(interval):
previous_hash = None
while True:
df, data_json = get_file()
current_hash = calculate_hash(df)
if previous_hash != current_hash:
print("File has been updated")
print(df.head(50))
print(data_json)
previous_hash = current_hash
time.sleep(interval)
if __name__ == '__main__':
monitor_file(120)
import sys
if len(sys.argv) > 1:
select = sys.argv[1:]
else:
select = 'None'
print(select)
will output:
Python myFile.py lorem ipsum
['lorem','ipsum']
python myFile.py
None
then to get them as fields in your dataframe:
df[select]
for example:
df1 = pd.DataFrame({'lorem':[1,2,3],'ipsum':['a','b','c']})
if len(sys.argv) > 1:
select = sys.argv[1:]
else:
select = 'None'
print(df1[select])
python myFile.py lorem ipsum
returns:
lorem ipsum
0 1 a
1 2 b
2 3 c
With argparse
you have three options:
- Ditch the space after comma (
--fields column1,_column2
) and split the string like you did--fields column1,column2
- Use
nargs='+'
--fields column1 column2
- Use
action='append'
--field column1 --field column2
Either way, you get a list of strings for args.fields
(or select
in p. 1 and your code). I would personally go for option 2 (nargs='+'
)