How to show the name in a list?
Question:
I try to show the names of fruit with the cost of the fruit in a list.
So I have it like this:
import re
50="[' all apply. The Rotterdam District Court shall have exclusive jurisdiction.nnrut ard wegetablesnx0c']"
fruit_words = ['Appels', 'Ananas', 'Peen Waspeen',
'Tomaten Cherry', 'Sinaasappels',
'Watermeloenen', 'Rettich', 'Peren', 'Peen', 'Mandarijnen', 'Meloenen', 'Grapefruit']
def fruit_list(format_=re.escape):
return "|".join(format_(word) for word in fruit_words)
def verdi_total_fruit_cost_regex():
fruit_list2 = fruit_list(format_="(?:{})".format)
return regex_fruit_cost(f"(?:{fruit_list2})")
def findallfruit(regex):
return re.findall(regex, verdi50)
def regex_fruit_cost(subst):
return rf"(?:{subst}).*?(?P<number>[0-9,.]*)n" #W+({fruit_list()})n"
def show_extracted_data_from_file():
regexes = [
verdi_total_fruit_cost_regex()
]
matches = [findallfruit(regex) for regex in regexes]
return "n".join(" t ".join(items) for items in zip(*matches))
print(show_extracted_data_from_file())
And this is the output:
123,20
2.772,00
46,20
577,50
69,30
3.488,16
137,50
500,00
1.000,00
2.000,00
1.000,00
381,25
But how to show the cost with the name of the fruit sort in a list? So I mean like this:
[[123,20, Watermeloen], [2772,00, Watermeloen]], etc..
Answers:
The issue is with the verdi_total_fruit_cost_regex()
function, you have added the fruit name to the non-capturing group(?:
), so it won’t be captured as part of the regex capture group.
Even though you are doing OR
(|
) across fruit names, you have a single regex pattern and not multiple ones.
Updated the two marked lines –
import re
verdi50="[' nna)nn nnFactuurnVerdi Import SchoolfruitnFactuur nr. : 71201 Koopliedenweg 33nDeb. nr. : 108636 2991 LN BARENDRECHTnYour VAT nr. : NL851703884B01 NederlandnFactuur datum : 10-12-21nAantal Omschrijving Prijs BedragnOrder number : 77553 Loading date : 09-12-21 Incoterm: : FOTnYour ref. : SCHOOLFRUIT Delivery date :nWK50nD.C. Schoolfruitn16 Watermeloenen Quetzali 16kg 4 IMPERIAL BR I € 7,70 € 123,20n360 Watermeloenen Quetzali 16kg 4 IMPERIAL BR I € 7,70 € 2.772,00n6 Watermeloenen Quetzali 16kg 4 IMPERIAL BR I € 7,/0 € 46,20n75 Watermeloenen Quetzali 16kg 4 IMPERIAL BR I € 7,70 € 577,50n9 Watermeloenen Quetzali 16kg 4 IMPERIAL BR I € 7,70 € 69,30n688 Appels Royal Gala 13kg 60/65 Generica PL I € 5,07 € 3.488,16n22 Sinaasappels Valencias 15kg 105 Elara ZAI € 6,25 € 137,50n80 Sinaasappels Valencias 15kg 105 Elara ZAI € 6,25 € 500,00n160 Sinaasappels Valencias 15kg 105 FVC ZAI € 6,25 € 1.000,00n320 Sinaasappels Valencias 15kg 105 Generica ZAI € 6,25 € 2.000,00n160 Sinaasappels Valencias 15kg 105 Noordhoek ZA I € 6,25 € 1.000,00n61 Sinaasappels Valencias 15kg 105 Noordhoek ZA I € 6,25 € 381,25nTotaal Colli Totaal Netto Btw Btw Bedrag Totaal Bedragn€ 12.095,11 1.088,56nBetaling binnen 30 dagennAchterstand wordt gemeld bij de kredietverzekeringsmaatschappijnVerDi Import BV ING Bank NV. Rotterdam IBAN number: NL17INGB0006959173 ~~nn nnKoopliedenweg 38, 2991 LN Barendrecht, The Netherlands SWIFT/BIC: INGBNL2A, VAT number: NL851703884B01 inTel, +31 (0}1 80 61 88 11, Fax +31 (0)1 8061 88 25 Chamber of Commerce Rotterdam no. 55424309 VerDinnE-mail: [email protected], www.verdiimport.nl Dutch law shall apply. The Rotterdam District Court shall have exclusive jurisdiction.nnrut ard wegetablesnx0c']"
fruit_words = ['Appels', 'Ananas', 'Peen Waspeen',
'Tomaten Cherry', 'Sinaasappels',
'Watermeloenen', 'Rettich', 'Peren', 'Peen', 'Mandarijnen', 'Meloenen', 'Grapefruit']
def fruit_list(format_=re.escape):
return "|".join(format_(word) for word in fruit_words)
def verdi_total_fruit_cost_regex():
fruit_list2 = fruit_list(format_="(?:{})".format)
return regex_fruit_cost(f"({fruit_list2})") # updated to remove the :?
def findallfruit(regex):
return re.findall(regex, verdi50)
def regex_fruit_cost(subst):
return rf"(?:{subst}).*?(?P<number>[0-9,.]*)n" #W+({fruit_list()})n"
def show_extracted_data_from_file():
regexes = [
verdi_total_fruit_cost_regex()
]
matches = [findallfruit(regex) for regex in regexes]
return matches[0] # updated to get the required format
print(show_extracted_data_from_file())
Output:
[('Watermeloenen', '123,20'), ('Watermeloenen', '2.772,00'), ('Watermeloenen', '46,20'), ('Watermeloenen', '577,50'), ('Watermeloenen', '69,30'), ('Appels', '3.488,16'), ('Sinaasappels', '137,50'), ('Sinaasappels', '500,00'), ('Sinaasappels', '1.000,00'), ('Sinaasappels', '2.000,00'), ('Sinaasappels', '1.000,00'), ('Sinaasappels', '381,25')]
I try to show the names of fruit with the cost of the fruit in a list.
So I have it like this:
import re
50="[' all apply. The Rotterdam District Court shall have exclusive jurisdiction.nnrut ard wegetablesnx0c']"
fruit_words = ['Appels', 'Ananas', 'Peen Waspeen',
'Tomaten Cherry', 'Sinaasappels',
'Watermeloenen', 'Rettich', 'Peren', 'Peen', 'Mandarijnen', 'Meloenen', 'Grapefruit']
def fruit_list(format_=re.escape):
return "|".join(format_(word) for word in fruit_words)
def verdi_total_fruit_cost_regex():
fruit_list2 = fruit_list(format_="(?:{})".format)
return regex_fruit_cost(f"(?:{fruit_list2})")
def findallfruit(regex):
return re.findall(regex, verdi50)
def regex_fruit_cost(subst):
return rf"(?:{subst}).*?(?P<number>[0-9,.]*)n" #W+({fruit_list()})n"
def show_extracted_data_from_file():
regexes = [
verdi_total_fruit_cost_regex()
]
matches = [findallfruit(regex) for regex in regexes]
return "n".join(" t ".join(items) for items in zip(*matches))
print(show_extracted_data_from_file())
And this is the output:
123,20
2.772,00
46,20
577,50
69,30
3.488,16
137,50
500,00
1.000,00
2.000,00
1.000,00
381,25
But how to show the cost with the name of the fruit sort in a list? So I mean like this:
[[123,20, Watermeloen], [2772,00, Watermeloen]], etc..
The issue is with the verdi_total_fruit_cost_regex()
function, you have added the fruit name to the non-capturing group(?:
), so it won’t be captured as part of the regex capture group.
Even though you are doing OR
(|
) across fruit names, you have a single regex pattern and not multiple ones.
Updated the two marked lines –
import re
verdi50="[' nna)nn nnFactuurnVerdi Import SchoolfruitnFactuur nr. : 71201 Koopliedenweg 33nDeb. nr. : 108636 2991 LN BARENDRECHTnYour VAT nr. : NL851703884B01 NederlandnFactuur datum : 10-12-21nAantal Omschrijving Prijs BedragnOrder number : 77553 Loading date : 09-12-21 Incoterm: : FOTnYour ref. : SCHOOLFRUIT Delivery date :nWK50nD.C. Schoolfruitn16 Watermeloenen Quetzali 16kg 4 IMPERIAL BR I € 7,70 € 123,20n360 Watermeloenen Quetzali 16kg 4 IMPERIAL BR I € 7,70 € 2.772,00n6 Watermeloenen Quetzali 16kg 4 IMPERIAL BR I € 7,/0 € 46,20n75 Watermeloenen Quetzali 16kg 4 IMPERIAL BR I € 7,70 € 577,50n9 Watermeloenen Quetzali 16kg 4 IMPERIAL BR I € 7,70 € 69,30n688 Appels Royal Gala 13kg 60/65 Generica PL I € 5,07 € 3.488,16n22 Sinaasappels Valencias 15kg 105 Elara ZAI € 6,25 € 137,50n80 Sinaasappels Valencias 15kg 105 Elara ZAI € 6,25 € 500,00n160 Sinaasappels Valencias 15kg 105 FVC ZAI € 6,25 € 1.000,00n320 Sinaasappels Valencias 15kg 105 Generica ZAI € 6,25 € 2.000,00n160 Sinaasappels Valencias 15kg 105 Noordhoek ZA I € 6,25 € 1.000,00n61 Sinaasappels Valencias 15kg 105 Noordhoek ZA I € 6,25 € 381,25nTotaal Colli Totaal Netto Btw Btw Bedrag Totaal Bedragn€ 12.095,11 1.088,56nBetaling binnen 30 dagennAchterstand wordt gemeld bij de kredietverzekeringsmaatschappijnVerDi Import BV ING Bank NV. Rotterdam IBAN number: NL17INGB0006959173 ~~nn nnKoopliedenweg 38, 2991 LN Barendrecht, The Netherlands SWIFT/BIC: INGBNL2A, VAT number: NL851703884B01 inTel, +31 (0}1 80 61 88 11, Fax +31 (0)1 8061 88 25 Chamber of Commerce Rotterdam no. 55424309 VerDinnE-mail: [email protected], www.verdiimport.nl Dutch law shall apply. The Rotterdam District Court shall have exclusive jurisdiction.nnrut ard wegetablesnx0c']"
fruit_words = ['Appels', 'Ananas', 'Peen Waspeen',
'Tomaten Cherry', 'Sinaasappels',
'Watermeloenen', 'Rettich', 'Peren', 'Peen', 'Mandarijnen', 'Meloenen', 'Grapefruit']
def fruit_list(format_=re.escape):
return "|".join(format_(word) for word in fruit_words)
def verdi_total_fruit_cost_regex():
fruit_list2 = fruit_list(format_="(?:{})".format)
return regex_fruit_cost(f"({fruit_list2})") # updated to remove the :?
def findallfruit(regex):
return re.findall(regex, verdi50)
def regex_fruit_cost(subst):
return rf"(?:{subst}).*?(?P<number>[0-9,.]*)n" #W+({fruit_list()})n"
def show_extracted_data_from_file():
regexes = [
verdi_total_fruit_cost_regex()
]
matches = [findallfruit(regex) for regex in regexes]
return matches[0] # updated to get the required format
print(show_extracted_data_from_file())
Output:
[('Watermeloenen', '123,20'), ('Watermeloenen', '2.772,00'), ('Watermeloenen', '46,20'), ('Watermeloenen', '577,50'), ('Watermeloenen', '69,30'), ('Appels', '3.488,16'), ('Sinaasappels', '137,50'), ('Sinaasappels', '500,00'), ('Sinaasappels', '1.000,00'), ('Sinaasappels', '2.000,00'), ('Sinaasappels', '1.000,00'), ('Sinaasappels', '381,25')]