How to submit a form with session in scrapy

Question:

I’m trying to scrape the website using Scrapy. To get the content which I want I need to login first. The url is login_url

There I have form as follows:

enter image description here

My code is as follows:

LOGIN_URL1 = "https://www.partslink24.com/partslink24/user/login.do"
class PartsSpider(scrapy.Spider):
    name = "parts"
    login_url = LOGIN_URL1
    start_urls = [
        login_url,
    ]

    def parse(self, response):
        form_data = {
            'accountLogin': COMPANY_ID,
            'userLogin': USERNAME,
            'loginBean.password': PASSWORD
        }
        yield FormRequest(url=self.login_url, formdata=form_data, callback=self.parse1)

    def parse1(self, response):
        inspect_response(response, self)
        print("RESPONSE: {}".format(response))


def start_scraper(vin_number):
    process = CrawlerProcess()
    process.crawl(PartsSpider)
    process.start()

But the problem is that they check if the session is activated and I get an error, the form can not be submitted.

When I check the response which I get after submitting the login form, I get the following error:

enter image description here

The code on their site which checks that is as follows:

var JSSessionChecker = {
    check: function()
    {
        if (!Ajax.getTransport())
        {
            alert('NO_AJAX_IN_BROWSER');
        }
        else
        {
            
            new Ajax.Request('/partslink24/checkSessionCookies.do', {
                method:'post',
                onSuccess: function(transport)
                {
                    if (transport.responseText != 'true')
                    {
                        if (Object.isFunction(JSSessionChecker.showError)) JSSessionChecker.showError(); 
                    }
                },
                onFailure: function(e) 
                { 
                    if (Object.isFunction(JSSessionChecker.showError)) JSSessionChecker.showError(); 
                },
                onException: function (request, e) 
                { 
                    if (Object.isFunction(JSSessionChecker.showError)) JSSessionChecker.showError(); 
                }
            });
        }
    },
    
    showError: function()
    {
        var errorElement = $('sessionCheckError');
        if (errorElement)
        {
            errorElement.show();
        }
    }
};
JSSessionChecker.check();

And on success it returns only true.

Is there any way that I can activate the session before submitting a form?

Thanks in advance.

EDIT

The error page which I get using the answer from @fam.

enter image description here

Asked By: Boky

||

Answers:

Please check this code.

import scrapy

LOGIN_URL1 = "https://www.partslink24.com/partslink24/user/login.do"
class PartsSpider(scrapy.Spider):
    name = "parts"
    login_url = LOGIN_URL1
    start_urls = [
        login_url,
    ]

    def parse(self, response):
        form_data = {
            'loginBean.accountLogin': "COMPANY_ID",
            'loginBean.userLogin': "USERNAME",
            'loginBean.sessionSqueezeOut' : "false",
            'loginBean.password': "PASSWORD",
            'loginBean.userOffsetSec' : "18000",
            'loginBean.code2f' : ""
        }
        yield scrapy.FormRequest.from_response(response=response, url=self.login_url, formdata=form_data, callback=self.parse1)

    def parse1(self, response):
        #scrapy.inspect_response(response, self)
        print("RESPONSE: {}".format(response))


def start_scraper(vin_number):
    process = scrapy.CrawlerProcess()
    process.crawl(PartsSpider)
    process.start()

I am not getting an error and the response is as follows:

RESPONSE: <200 https://www.partslink24.com/partslink24/user/login.do>

EDIT:
The following code is for Selenium. It will log you into the page easily. You only need to download the chrome driver and install Selenium.

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.options import Options
import time


chrome_options = Options()
#chrome_options.add_argument("--headless")


driver = webdriver.Chrome(executable_path="./chromedriver", options=chrome_options)
driver.get("https://www.partslink24.com/partslink24/user/login.do")

# enter the form fields
company_ID = "company id"
user_name = "user name"
password = "password"

company_ID_input = driver.find_element_by_xpath("//input[@name='accountLogin']")
company_ID_input.send_keys(company_ID)
time.sleep(1)

user_name_input = driver.find_element_by_xpath("//input[@name='userLogin']")
user_name_input.send_keys(user_name)
time.sleep(1)

password_input = driver.find_element_by_xpath("//input[@id='inputPassword']")
password_input.send_keys(password)
time.sleep(1)

# click the search button and get links from first page
click_btn = driver.find_element_by_xpath("//a[@tabindex='5']")
click_btn.click()
time.sleep(5)

Don’t forget to change the credentials.

Answered By: fam
Categories: questions Tags: ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.