PyMySQL database, Issues with filling table

Question:

This is my Python main.py program:

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait as wait
import time

from bs4 import BeautifulSoup
import requests

import mysql.connector

db = mysql.connector.connect(                                                                                           
    host="localhost",                                                                                                   
    user="root",
    passwd="root",
    database="testdatabase"                                                                                             
    )

mycursor = db.cursor()

try:
    #mycursor.execute("CREATE DATABASE animus")
    mycursor.execute("CREATE TABLE animus_list (name VARCHAR(255), voti VARCHAR(10), ranks VARCHAR(50),   popolarità VARCHAR(50), membri VARCHAR(50) )")

except:
    #delete_table = "DROP TABLE animus_list"
    #mycursor.execute(delete_table)
    print("Table already exists")

class Datascraping:
    def __init__(self):
        print("Let's start!")

    def MyAnimeList(self):
    
        '''GIVE SELENIUM THE GECKODRIVER LOCATION AND THE WEBSITE TO WORK ON'''
        #---------------------------------------------------------------------------------------------------------------------
        PATH = "C:Program Files (x86)chromedriver.exe"

        driver = webdriver.Chrome(PATH)

        driver.get("https://myanimelist.net/anime.php")


        print(driver.title)
     
        '''INPUT TO PYTHON THE ANIME YOU ARE SEARCHING. CAREFUL!!! THE SEARCHBAR OF THAT WEBSITE DOESN'T WORK WITH LESS THAN 3 LETTERS IN IT!!!'''
        #-----------------------------------------------------------------------------------------------------------------------

        selected_anime = str(input("Insert the anime you are searching (MIN 3 CHARACTERS):  "))
        while len(selected_anime) < 3:
            print("The search won't work with less than 3 characters!!")
            selected_anime = input("Insert the anime you are searching (MIN 3 CHARACTERS):  ")

        '''COOKIE BUTTON THAT APPEARS AS SOON AS I OPEN THE WEBSITE. IF I DON'T CLOSE IT, THE HTML OF THE PAGE WON'T BE REACHABLE AND I WILL GET AN EXCEPTION'''
        #-----------------------------------------------------------------------------------------------------------------------
        try:
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "css-47sehv")))
            puls_cookie = driver.find_element(By.CLASS_NAME, "css-47sehv")
            puls_cookie.click()
        except:
            print("The cookie window didn't appear this time!")
        #-----------------------------------------------------------------------------------------------------------------------

        '''WRITE IN THE WEBSITE SEARCHBAR'''
        #-----------------------------------------------------------------------------------------------------------------------
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "q")))                       #searchbar has ID = "q"
        search = driver.find_element(By.ID, "q")
        search.click()
        search.send_keys(anime_selezionato)                                                               #anime_selezionato means "selected anime"
        search.send_keys(Keys.RETURN)
        #-----------------------------------------------------------------------------------------------------------------------



        '''TRY TO OBTAIN THE ANIME WITH THE EXACT NAME THAT WE INSERTED IN anime_selezionato'''
        #-----------------------------------------------------------------------------------------------------------------------
        try:
           driver.get(WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, f"//table/tbody/tr/td[2]/div/a[strong='{anime_selezionato}']"))).get_attribute("href"))
        #-----------------------------------------------------------------------------------------------------------------------


            '''IF WE DON'T FIND THE ANIME WITH THE EXACT NAME WE INPUT, WE PRINT THE ANIME(s) THAT THE PAGE FOUND WITH THE KEY WE INSERTED IN THE SEARCHBAR. IS ONE OF THEM WHAT YOU WERE SEARCHING FOR?'''
        except:

        #-----------------------------------------------------------------------------------------------------------------------
            print("No anime with the same name found! Is it one of this, maybe?")

            listaanimu = []                                                 #create an empty list to  fill later with the list of animes found
            numero_di_anime = 1                                             #iterable element that we will need in the for loop. numero di anime means "number of animes"
            search2 = driver.find_elements(By.TAG_NAME, "strong")           #All anime names are stored    in strong tags
            for element in search2:
                nome = element.text                                         #nome means "name"
                print("-" + str(numero_di_anime) + "    " + element.text)   #the result will be like: -1   Demon Slayer
                listaanimu.append(nome)
                numero_di_anime += 1
            print("If one of this animes is what you are looking for, write the respective index number")
            anime = input("The anime I want is number:  ")
            try:
                anime2 = int(anime)                                         #this try/except is needed in case of a wrong input, like a letter, that would create an exception 
            except:
                anime2 = anime

            while anime2 not in range(1, 51):                               #the animes that appear are 50 at maximum. A number too big would not be found and give an exception
                print("Anime not found! Try again!!")
                anime = input("The anime I want is number:  ")
                try:
                    anime2 = int(anime)
                except:
                    anime2 = anime
        #-----------------------------------------------------------------------------------------------------------------------



            '''CLICK THE NAME OF THE ANIME ON THE SITE, TO OPEN ITS PAGE'''
        #-----------------------------------------------------------------------------------------------------------------------
            def anime_alternativo():
                try:
                    anime_selezionato = listaanimu[int(anime) - 1]                                                                      #The list of animes starts at 1, but lists in python start at 0
                    print(anime_selezionato)
                    print("Anime Found!")
                    try:
                        driver.get(WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, f"//table/tbody/tr/td[2]/div/a[strong='{anime_selezionato}']"))).get_attribute("href"))
                        anime_BeautifulSoup = driver.find_element(By.LINK_TEXT, "%s" % anime_selezionato).get_attribute("href")         #BeautifulSoup works with links, let's give it the link of the page of the selected anime
                        print(anime_BeautifulSoup)
                    except:
                        print("Anime not found! Are you sure it is part of the list?")
                except:
                    print("It is not a valid answer!!! ")                                                                                #these excepts should never be activated, shouldn't even be necessary
                BeautifulSoup_Scraping(anime_BeautifulSoup)                                                                              #give the link to beautifulSoup
         #-----------------------------------------------------------------------------------------------------------------------
    
                    '''NOW THAT THE PAGE OF THE SELECTED ANIME IS OPENED, WE DATASCRAPE IT WITH BEAUTIFULSOUP'''
        #-----------------------------------------------------------------------------------------------------------------------
            def BeautifulSoup_Scraping(link):

                html_text = requests.get(link).text
                soup = BeautifulSoup(html_text, 'lxml')

                rangevoti = ['1', '2', '3', '4', '5', '6', '7', '8', '9', 'na']                                                          #the range of votes of the website. if an anime is still unreleased, its vote will be na

                for numeri in rangevoti:
                    try:
                        voto = soup.find('div', class_='score-label score-%s' % (numeri)).text                                           #get the vote of the anime from the webpage
                    except:
                        print("Error! They may have invented another 'type' of vote!")

                try:
                    nome = soup.find('h1', class_='title-name h1_bold_none').text                                                        #get the name of the anime. this try/except is needed because some anime names have weird fonts that are not supported by BeautifulSoup
                except:
                    print("The anime could have a non supported font")

                rank = soup.find('span', class_='numbers ranked').text                                                                   #get the anime rank, popularity and number of users
                popularity = soup.find('span', class_='numbers popularity').text        
                users = soup.find('span', class_='numbers members').text

                rank1 = rank.replace('Ranked #', '', 1)                                                                                  #remove unnecessary characters, just give me the numbers
                popularity1 = popularity.replace('Popularity #', '', 1)
                users1 = users.replace('Members', '', 1)

                if rank == 'Ranked N/A':
                    rank1 = rank.replace('Ranked', '', 1)
                if rank == 'Popularity N/A':
                    popularity1 = popularity.replace('Popularity', '', 1)

                print(nome + " " + voto + " " + rank1 + " " + popularity1 + " " + users1)                                                #Print a preview of what the row on the database will look like


                name = str(nome)                                                
                vote = str(voto)
                rank2 = str(rank1)
                popularity2 = str(popularity1)
                users2 = str(users1)

                '''ERROR ZONE! MYSQL SAYS THAT I HAVE AN ERROR IN THE SYNTAX BUT THIS COMMAND WORKS IN ANOTHER PROGRAM, WHERE IT IS WRITTEN IN THE SAME EXACT WAY!'''
#---------------------------------------------------------------------------------------------------------------------------------------------------------------------

                data = "INSERT INTO animus_list (nome, voto, rank1, popularity1, users1) VALUES (%s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE"   #insert all the scraped data in the table
                values = (name, vote, rank2, popularity2, users2)
                mycursor.execute(data, values)
                db.commit()

                for x in mycursor:
                    print(x)

                #except:
                    #print("Cannot insert in database")
#---------------------------------------------------------------------------------------------------------------------------------------------------------------------




                '''CALL THE FUNCTION'''
        #-----------------------------------------------------------------------------------------------------------------------
            anime_alternativo()

        #-----------------------------------------------------------------------------------------------------------------------

        '''FINE PROGRAMMA'''
        #-----------------------------------------------------------------------------------------------------------------------
        time.sleep(5)                                                                                                                #close the browser window and end the program

        driver.quit()
        #-----------------------------------------------------------------------------------------------------------------------


scraping = Datascraping()
scraping.MyAnimeList() 

                                                                                                             

This whole program is inside a class because end result of this project will be: a Tkinter (or Kivi or PyQt, still not sure what to choose) app that scrapes websites

The part of the code that gives an error works perfectly in another program (which is just the beautifoulSoup of this program here).
The exception I get says that I made a mistake on the MySQL syntax, but again. In other programs it works and has the same syntax.

data = "INSERT INTO animus_list (nome, voto, rank1, popularity1, users1) VALUES (%s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE"   #insert all the scraped data in the table
values = (name, vote, rank2, popularity2, users2)
mycursor.execute(data, values)
db.commit()

for x in mycursor:
    print(x)

#except:
    #print("Cannot insert in database")

This is the part that is causing me issues.
What I tried is:
-DELETING the table and creating a new one
-Adding/removing variables to add to the table
-Changing the size of the space reserved for each variable in MySQL (VARCHAR (1), VARCHAR (100)….)

Asked By: JustAMan

||

Answers:

The error was caused by the "ON DUPLICATE KEY UPDATE", and when I removed it the error disappeared.
I thought I needed it to not have duplicate rows but I found another way.
Thank you for the help Markus.
I wish you a great day

Answered By: JustAMan
Categories: questions Tags: , ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.