1064, "You have an error in your SQL syntax; check the manual that corresponds to your MariaDB for the right syntax to use near 'str.' at line 1"
Question:
So, I am using Kafka stream and twitter API to get the tweets and to send them to MySQL, but it’s not working
def insert_tweet(tweet,username,pnr,prediction,tweet_id):
query = "INSERT INTO tweets(tweet,username,pnr,prediction,tweet_id) VALUES ('"+tweet+"','"+username+"',"+str(pnr)+","+str(int(prediction))+","+str(tweet_id)+");"
try:
conn = MySQLdb.connect("localhost","root","","twitter" )
cursor = conn.cursor()
cursor.execute(query)
print("Database insertion SUCCESSFUL!!")
conn.commit()
except MySQLdb.Error as e:
print(e)
print("Database insertion unsuccessful!!")
finally:
conn.close()
def process_data(data):
print("Processing data ...")
if (not data.isEmpty()):
nbModel=bc_model.value
hashingTF = HashingTF(100000)
tf = hashingTF.transform(data.map(lambda x: x[0].encode('utf-8','ignore')))
tf.cache()
idf = IDF(minDocFreq=2).fit(tf)
tfidf = idf.transform(tf)
tfidf.cache()
prediction=nbModel.predict(tfidf)
temp = []
i=0
for p,q,r in data.collect():
temp.append([])
temp[i].append(p.encode('utf-8','ignore'))
temp[i].append(q)
temp[i].append(r)
i+=1
i=0
for p in prediction.collect():
temp[i].append(p)
i+=1
print(temp)
for i in temp:
insert_tweet(str(i[0]),str(i[1]),"0",int(i[3]),int(i[2]))
else:
print("Empty RDD !!!")
pass
twitter=tweets.map(lambda tweet: tweet['data']['id'])
tweet_text = tweets.map(lambda tweet: tweet['data']['text'])
txt = tweets.map(lambda x: (x['data']['text'], x['data']['id'], x['data']['id']))
txt.foreachRDD(process_data)
I tried to run the query manually in MySQL and it worked, so I don’t understand what’s the problem.
I am using XAMPP v 3.3.0
This is my table
Answers:
In your code, if your tweet
contains a literal '
character, it will cause problems with the way you format your SQL.
query = "INSERT INTO tweets(tweet,username,pnr,prediction,tweet_id) VALUES ('"+tweet+"'
If the value of tweet
is:
He said: 'Sir, I am travelling from Rewa to Bhopal...
Then the resulting SQL will look like the following. Notice that the extra '
character appears to MySQL as the end of the string!
INSERT INTO tweets(tweet,username,pnr,prediction,tweet_id) VALUES ('He said: 'Sir, I am travelling from Rewa to Bhopal...
^
This confuses the SQL syntax. MySQL doesn’t understand why the end of the quoted string is followed by "Sir, I am travelling…"
If you use Python string-formatting, it doesn’t help.
query = "INSERT INTO tweets(tweet,username,pnr,prediction,tweet_id) VALUES ('%s','%s',%s,%s,%s);" % (tweet,username,str(pnr),str(int(prediction)))
There will still be mismatched '
characters, and it results in an SQL syntax error when you run the query.
Query parameters are the best fix for this problem. In the Python connector, it also uses the %s
placeholder, but you don’t format the string values into the string. Also, don’t put them in quotes in the SQL query.
query = "INSERT INTO tweets(tweet,username,pnr,prediction,tweet_id) VALUES (%s,%s,%s,%s,%s)"
cursor.execute(query, (tweet, username, str(pnr), str(int(prediction)), str(tweet_id)))
The %s
placeholders remain in the query
string, and you pass a second argument to execute()
— a list of values to use as parameter values. The Python connector will combine these values in a safe way, so if they contain '
or any other special character, they won’t upset the syntax.
This is shown in code examples here: https://dev.mysql.com/doc/connector-python/en/connector-python-example-cursor-transaction.html
Also for future reference, remember that a query parameter can be used only for one scalar SQL value (e.g. where you would otherwise use a quoted string literal or a numeric literal). You can’t use parameters for SQL identifiers like table names or column names, or SQL keywords or expressions, or lists of values.
So, I am using Kafka stream and twitter API to get the tweets and to send them to MySQL, but it’s not working
def insert_tweet(tweet,username,pnr,prediction,tweet_id):
query = "INSERT INTO tweets(tweet,username,pnr,prediction,tweet_id) VALUES ('"+tweet+"','"+username+"',"+str(pnr)+","+str(int(prediction))+","+str(tweet_id)+");"
try:
conn = MySQLdb.connect("localhost","root","","twitter" )
cursor = conn.cursor()
cursor.execute(query)
print("Database insertion SUCCESSFUL!!")
conn.commit()
except MySQLdb.Error as e:
print(e)
print("Database insertion unsuccessful!!")
finally:
conn.close()
def process_data(data):
print("Processing data ...")
if (not data.isEmpty()):
nbModel=bc_model.value
hashingTF = HashingTF(100000)
tf = hashingTF.transform(data.map(lambda x: x[0].encode('utf-8','ignore')))
tf.cache()
idf = IDF(minDocFreq=2).fit(tf)
tfidf = idf.transform(tf)
tfidf.cache()
prediction=nbModel.predict(tfidf)
temp = []
i=0
for p,q,r in data.collect():
temp.append([])
temp[i].append(p.encode('utf-8','ignore'))
temp[i].append(q)
temp[i].append(r)
i+=1
i=0
for p in prediction.collect():
temp[i].append(p)
i+=1
print(temp)
for i in temp:
insert_tweet(str(i[0]),str(i[1]),"0",int(i[3]),int(i[2]))
else:
print("Empty RDD !!!")
pass
twitter=tweets.map(lambda tweet: tweet['data']['id'])
tweet_text = tweets.map(lambda tweet: tweet['data']['text'])
txt = tweets.map(lambda x: (x['data']['text'], x['data']['id'], x['data']['id']))
txt.foreachRDD(process_data)
I tried to run the query manually in MySQL and it worked, so I don’t understand what’s the problem.
I am using XAMPP v 3.3.0
This is my table
In your code, if your tweet
contains a literal '
character, it will cause problems with the way you format your SQL.
query = "INSERT INTO tweets(tweet,username,pnr,prediction,tweet_id) VALUES ('"+tweet+"'
If the value of tweet
is:
He said: 'Sir, I am travelling from Rewa to Bhopal...
Then the resulting SQL will look like the following. Notice that the extra '
character appears to MySQL as the end of the string!
INSERT INTO tweets(tweet,username,pnr,prediction,tweet_id) VALUES ('He said: 'Sir, I am travelling from Rewa to Bhopal...
^
This confuses the SQL syntax. MySQL doesn’t understand why the end of the quoted string is followed by "Sir, I am travelling…"
If you use Python string-formatting, it doesn’t help.
query = "INSERT INTO tweets(tweet,username,pnr,prediction,tweet_id) VALUES ('%s','%s',%s,%s,%s);" % (tweet,username,str(pnr),str(int(prediction)))
There will still be mismatched '
characters, and it results in an SQL syntax error when you run the query.
Query parameters are the best fix for this problem. In the Python connector, it also uses the %s
placeholder, but you don’t format the string values into the string. Also, don’t put them in quotes in the SQL query.
query = "INSERT INTO tweets(tweet,username,pnr,prediction,tweet_id) VALUES (%s,%s,%s,%s,%s)"
cursor.execute(query, (tweet, username, str(pnr), str(int(prediction)), str(tweet_id)))
The %s
placeholders remain in the query
string, and you pass a second argument to execute()
— a list of values to use as parameter values. The Python connector will combine these values in a safe way, so if they contain '
or any other special character, they won’t upset the syntax.
This is shown in code examples here: https://dev.mysql.com/doc/connector-python/en/connector-python-example-cursor-transaction.html
Also for future reference, remember that a query parameter can be used only for one scalar SQL value (e.g. where you would otherwise use a quoted string literal or a numeric literal). You can’t use parameters for SQL identifiers like table names or column names, or SQL keywords or expressions, or lists of values.