Set if conditional inside a lambda function depending on whether a value captured using regex is None or ""

Question:

import re

input_text = 'desde el 2022_-_12_-_10 corrimos juntas hasta el 11° nivel de aquella montaña hasta el 2022_-_12_-_13' #example 1
#input_text = 'desde el 2022_-_11_-_10 18:30 pm hasta el 2022_-_12_-_01 21:00 hs' #example 2

#text in the middle associated with the date range...
some_text = r"(?:(?!.s*)[^;])*" #but cannot contain ";", ".s*"

identificate_hours = r"(?:as*las|as*la|)s*(?:(|)s*(d{1,2}):(d{1,2})s*(?:(am)|(pm)|)s*(?:)|)" #no acepta que no se le indicase el 'am' o el 'pm'

date_format = r"(?:(|)s*(d*)_-_(d{2})_-_(d{2})s*(?:)|)"

some_text_limiters = [r",s*hasta", r"hasta", r"al", r"a "]


for some_text_limiter in some_text_limiters:

    identification_re_0 = r"(?:(?<=s)|^)(?:desdes*el|desde|del|des*el|des*la|de |)s*(?:día|dia|fecha|)s*(?:del|des*el|de |)s*" + date_format + r"s*(?:" + identificate_hours + r"|)s*(?:)|)s*(" + some_text + r")s*" + some_text_limiter + r"s*(?:el|la|)s*(?:fecha|d[íi]a|)s*(?:del|des*el|de|)s*" + date_format + r"s*(?:" + identificate_hours + r"|)s*(?:)|)"

    input_text = re.sub(identification_re_0,
                        lambda m: if(r"{m[1]}" == None or r"{m[1]}" == " " or r"{m[1]}" == "") : (f"({m[1]}_-_{m[2]}_-_({m[3]}({m[4] or '00'}:{m[5] or '00'} {m[6] or m[7] or 'am'})_--_{m[9]}_-_{m[10]}_-_({m[11]}({m[12] or '00'}:{m[13] or '00'} {m[14] or m[15] or 'am'})))").replace(" )", ")").replace("( ", "(") else : (f"({m[1]}_-_{m[2]}_-_({m[3]}({m[4] or '00'}:{m[5] or '00'} {m[6] or m[7] or 'am'})_--_{m[9]}_-_{m[10]}_-_({m[11]}({m[12] or '00'}:{m[13] or '00'} {m[14] or m[15] or 'am'})))({m[8]})").replace(" )", ")").replace("( ", "("),
                        input_text, re.IGNORECASE)

print(repr(input_text))

I get a SyntaxError: invalid syntax whis this lambda lambda m: if(r"{m[8]}" == None or r"{m[8]}" == " " or r"{m[8]}" == "") : (f"({m[1]}_-_{m[2]}_-_({m[3]}({m[4] or '00'}:{m[5] or '00'} {m[6] or m[7] or 'am'})_--_{m[9]}_-_{m[10]}_-_({m[11]}({m[12] or '00'}:{m[13] or '00'} {m[14] or m[15] or 'am'})))").replace(" )", ")").replace("( ", "(") else : (f"({m[1]}_-_{m[2]}_-_({m[3]}({m[4] or '00'}:{m[5] or '00'} {m[6] or m[7] or 'am'})_--_{m[9]}_-_{m[10]}_-_({m[11]}({m[12] or '00'}:{m[13] or '00'} {m[14] or m[15] or 'am'})))({m[8]})").replace(" )", ")").replace("( ", "(")

How should I evaluate the conditions inside the lambda function housed inside the parameter of the re.sub() function?

lambda m: if(r"{m[8]}" == None or r"{m[8]}" == " " or r"{m[8]}" == "") : else:

All the evaluation of the conditional depends on "{m[8]}", and the outputs should be like the following

#for example 1, where {m[8]} is not None
'(2022_-_12_-_(10(00:00 am)_--_2022_-_12_-_(13(00:00 am)))(corrimos juntas hasta el 11° nivel de aquella montaña)'

#for example 2, where {m[8]} is None, and remove the last ()
'(2022_-_11_-_(10(18:30 pm)_--_2022_-_12_-_(01(21:00 am)))()hs' #wrong output
'(2022_-_11_-_(10(18:30 pm)_--_2022_-_12_-_(01(21:00 am)))hs' #correct output

Edit question with the error:

def sub_rule(m):
    res_true = f"({m[1]}_-_{m[2]}_-_({m[3]}({m[4] or '00'}:{m[5] or '00'} {m[6] or m[7] or 'am'})_--_{m[9]}_-_{m[10]}_-_({m[11]}({m[12] or '00'}:{m[13] or '00'} {m[14] or m[15] or 'am'})))"

    # ternary expression is general, not limited to lambdas
    return (
        res_true.replace(" )", ")").replace("( ", "(")
        if (r"{m[8]}" == None or r"{m[8]}" == " " or r"{m[8]}" == "") 
        else 
        (res_true + f"({m[8]})").replace(" )", ")").replace("( ", "(")
        )


some_text_limiters = [r",s*hasta", r"hasta", r"al", r"a "]

for some_text_limiter in some_text_limiters:

    identification_re_0 = r"(?:(?<=s)|^)(?:desdes*el|desde|del|des*el|des*la|de |)s*(?:día|dia|fecha|)s*(?:del|des*el|de |)s*" + date_format + r"s*(?:" + identificate_hours + r"|)s*(?:)|)s*(" + some_text + r")s*" + some_text_limiter + r"s*(?:el|la|)s*(?:fecha|d[íi]a|)s*(?:del|des*el|de|)s*" + date_format + r"s*(?:" + identificate_hours + r"|)s*(?:)|)"

    input_text = re.sub(identification_re_0,
                        sub_rule,
                        input_text, re.IGNORECASE)

And the wrong output:

'(2022_-_12_-_(10(00:00 am)_--_2022_-_12_-_(13(00:00 am)))()'

And the correct ouput in example 2:

'(2022_-_12_-_(10(00:00 am)_--_2022_-_12_-_(13(00:00 am)))'

EDIT 2: I have managed to perform the conditional, although not within the same lambda

def remove_or_not_parentheses_from_middle_text(m):

    print(repr(m[8]))

    if ( str(m[8]) == None or str(m[8]) == " " or str(m[8]) == ""): res_true = (f"({m[1]}_-_{m[2]}_-_({m[3]}({m[4] or '00'}:{m[5] or '00'} {m[6] or m[7] or 'am'})_--_{m[9]}_-_{m[10]}_-_({m[11]}({m[12] or '00'}:{m[13] or '00'} {m[14] or m[15] or 'am'})))").replace(" )", ")").replace("( ", "(")
    else: res_true = (f"({m[1]}_-_{m[2]}_-_({m[3]}({m[4] or '00'}:{m[5] or '00'} {m[6] or m[7] or 'am'})_--_{m[9]}_-_{m[10]}_-_({m[11]}({m[12] or '00'}:{m[13] or '00'} {m[14] or m[15] or 'am'})))({m[8]})").replace(" )", ")").replace("( ", "(")

    return res_true

Answers:

Use the ternary expression of a conditional, (do) if (condition) else (do other).

f = lambda x: True if x > 0 else False
print(f(10))
# True

Nesting is also possible

g = lambda x: '-' if x < 0 else '+' if x < 10 else '+ but more than 10'
print(g(5))
# +

Hint: split long expressions into multiple lines using ( and ), fir ex like this

lambda m: (
        f"({m[1]}_-_{m[2]}_-_({m[3]}({m[4] or '00'}:{m[5] or '00'} {m[6] or m[7] or 'am'})_--_{m[9]}_-_{m[10]}_-_({m[11]}({m[12] or '00'}:{m[13] or '00'} {m[14] or m[15] or 'am'})))")
       .replace(" )", ")")
       .replace("( ", "(")
   )
   if (r"{m[1]}" == None or r"{m[1]}" == " " or r"{m[1]}" == "") 
   else (
       f"({m[1]}_-_{m[2]}_-_({m[3]}({m[4] or '00'}:{m[5] or '00'} {m[6] or m[7] or 'am'})_--_{m[9]}_-_{m[10]}_-_({m[11]}({m[12] or '00'}:{m[13] or '00'} {m[14] or m[15] or 'am'})))({m[8]})")
      .replace(" )", ")")
      .replace("( ", "(")
   )

In this case a lambda function is not a good choice:

  • as mentioned by chepner the function depends only on the match object and not on the iterations variables
  • a clean variable initialization cannot be done with lambda and here could be useful because the outcome of each conditional branch is almost similar
# outside the loop

def sub_rule(m):
    res_true = f"({m[1]}_-_{m[2]}_-_({m[3]}({m[4] or '00'}:{m[5] or '00'} {m[6] or m[7] or 'am'})_--_{m[9]}_-_{m[10]}_-_({m[11]}({m[12] or '00'}:{m[13] or '00'} {m[14] or m[15] or 'am'})))"

    # ternary expression is general, not limited to lambdas
    return (
        res_true.replace(" )", ")").replace("( ", "(")
        if (r"{m[1]}" == None or r"{m[1]}" == " " or r"{m[1]}" == "") 
        else 
        (res_true + f"({m[8]})").replace(" )", ")").replace("( ", "(")
        )
Answered By: cards