Airflow 2.0 task getting skipped after BranchPython Operator

Question:

I’m fiddling with branches in Airflow in the new version and no matter what I try, all the tasks after the BranchOperator get skipped.

Here is a minimal example of what I’ve been trying to accomplish

from airflow.decorators import dag, task
from datetime import timedelta, datetime

from airflow.operators.python import BranchPythonOperator
from airflow.utils.trigger_rule import TriggerRule

import logging
logger = logging.getLogger("airflow.task")

@dag(
    schedule_interval="0 0 * * *",
    start_date=datetime.today() - timedelta(days=2),
    dagrun_timeout=timedelta(minutes=60),
)
def StackOverflowExample():

    @task
    def task_A():

        logging.info("TASK A")
        

    @task
    def task_B():

        logging.info("TASK B")

    @task
    def task_C():

        logging.info("TASK C")

    @task
    def task_D():
        
        logging.info("TASK D")

        return {"parameter":0.5}

    
    def _choose_task(task_parameters,**kwargs):

        logging.info(task_parameters["parameter"])
        if task_parameters["parameter"]<0.5:
            logging.info("SUCCESSS ")
            return ['branch_1', 'task_final']
        else:
            logging.info("RIP")
            return ['branch_2', 'task_final']

    @task(task_id="branch_1")
    def branch_1():
        logging.info("branch_1...")

    @task(task_id="branch_2")
    def branch_2():
        logging.info("branch_2")

    @task(task_id="task_final")
    def task_final():
        logging.info("task_final")


    parameter = task_A() >> task_B() >> task_C() >> task_D()   

    choose_task = BranchPythonOperator(
                                            task_id='choose_best_model',
                                            op_kwargs={"task_parameters":parameter},
                                            python_callable=_choose_task,
                                            trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS
                                            )



    choose_task >> [branch_1(), branch_2()] >> task_final()


dag = StackOverflowExample  ()

Airflow DAG graph

Any clues? I’m suspicious of the trigger rule. I’m an Airflow beginner so I wouldn’t discard any other problems I’m overlooking

Asked By: ABaron

||

Answers:

You should set Trigger rule on task_final.
You wish task_final to be executed after branch_1 and branch_2 finished their execution (regardless of which one of them was executed/skipped) so you need to set all done trigger rule:

@task(task_id="task_final", trigger_rule=TriggerRule.ALL_DONE)
def task_final():
    logging.info("task_final")

enter image description here

Answered By: Elad Kalif

following the same example – I want to pass values between tasks from Task A to task_final.

I’ve simplified the above example, so I got only task_A and task_D.

from airflow.decorators import dag, task
from datetime import timedelta, datetime
from typing import Dict
from airflow.operators.python import BranchPythonOperator
from airflow.utils.trigger_rule import TriggerRule

import logging

logger = logging.getLogger("airflow.task")


@dag(
    schedule_interval="0 0 * * *",
    start_date=datetime.today() - timedelta(days=2),
    dagrun_timeout=timedelta(minutes=60),
)
def StackOverflowExample():
    @task
    def task_A(**kwargs) -> Dict[str, str]:

        inp = kwargs['dag_run'].conf.get('dummy')
        logging.info("TASK A")
        logging.info("dag dummy:" + str(inp))

        return{'dummy':inp}

    @task
    def task_D(param: Dict[str, str]) -> Dict[str, str]:
        logging.info("TASK D")
        logging.info(param['dummy'])
        return param


    def _choose_task(param: Dict[str, str]):
        logging.info('-----choose_task')
        logging.info(param["dummy"])
        if param["dummy"] < 10:
            logging.info("SUCCESSS ")
            return ['branch_1', 'task_final']
        else:
            logging.info("else")
            return ['branch_2', 'task_final']

    @task(task_id="branch_1")
    #def branch_1(param: Dict[str, str]) -> Dict[str, str]:
    def branch_1():
        logging.info("branch_1...")
        #logging.info('dummy ' + str(param['dummy']))
        # return param

    @task(task_id="branch_2")
    # def branch_2(param: Dict[str, str]) -> Dict[str, str]:
    def branch_2():
        logging.info("branch_2")
        # logging.info('dummy ' + str(param['dummy']))
        # return param

    @task(task_id="task_final", trigger_rule=TriggerRule.ALL_DONE)
    def task_final():
        logging.info("task_final")

    parameter = task_D(task_A())
    choose_task = BranchPythonOperator(
        task_id='choose_best_model',
        op_kwargs={"task_parameters": parameter},
        python_callable=_choose_task,
        trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS
    )

    choose_task >> [branch_1(), branch_2()] >> task_final()

dag = StackOverflowExample()

When I run this I get the error:
Traceback (most recent call last):
File "/opt/python3.8/lib/python3.8/site-packages/airflow/operators/python.py", line 206, in execute
branch = super().execute(context)
File "/opt/python3.8/lib/python3.8/site-packages/airflow/operators/python.py", line 174, in execute
return_value = self.execute_callable()
File "/opt/python3.8/lib/python3.8/site-packages/airflow/operators/python.py", line 188, in execute_callable
return self.python_callable(*self.op_args, **self.op_kwargs)
TypeError: _choose_task() missing 1 required positional argument: ‘param’

Any idea what am I missing please? I want to pass the "dummy" value to "branch_1" and "branch_2"

Answered By: Subi