Can't regex over created list
Question:
I want to:
-
Pull number from parentheses “javascript:analysis(XXXXXXX)” for each game
-
Create a list with direct hyperlinks like: http://www.nowgoal.cc/analysis/XXXXXXX.html
-
Many many more…
Code:
import bs4 as bs
import sys
import re
from PyQt5.QtWebEngineWidgets import QWebEnginePage
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl
class Page(QWebEnginePage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebEnginePage.__init__(self)
self.html = ''
self.loadFinished.connect(self._on_load_finished)
self.load(QUrl(url))
self.app.exec_()
def _on_load_finished(self):
self.html = self.toHtml(self.Callable)
print('READY!')
def Callable(self, html_str):
self.html = html_str
self.app.quit()
def main():
page = Page('http://www.nowgoal.cc/')
soup = bs.BeautifulSoup(page.html, 'html.parser')
lista = []
for x in soup.find_all("a", attrs={'title' : 'Match analyze'}):
lista.append(x)
for element in lista:
z = re.search(r"((.......))",element)
if z:
print(z.groups())
if __name__ == '__main__': main()
Python yields: TypeError: expected string or bytes-like object
I struggle with this for a few days now, don’t have more ideas to fix this. I digged and tried, searched and ended clueless. I really want to write more code and develop my idea. Please help, I’m dying here.
Answers:
According to the following Beautiful Soup Documentation
The elements that are returned by soup.find_all
are not strings. They are objects. Except each object has their respective __str__
methods overridden for human readability.
So you are not passing strings into search. But in fact customized beautiful soup objects.
for element in lista:
z = re.search(r"((.......))",element)
if z:
print(z.groups())
To fix, make sure to convert element
into a string before calling the respective method.
for element in lista:
z = re.search(r"((.......))", str(element))
if z:
print(z.groups())
I want to:
-
Pull number from parentheses “javascript:analysis(XXXXXXX)” for each game
-
Create a list with direct hyperlinks like: http://www.nowgoal.cc/analysis/XXXXXXX.html
-
Many many more…
Code:
import bs4 as bs
import sys
import re
from PyQt5.QtWebEngineWidgets import QWebEnginePage
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl
class Page(QWebEnginePage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebEnginePage.__init__(self)
self.html = ''
self.loadFinished.connect(self._on_load_finished)
self.load(QUrl(url))
self.app.exec_()
def _on_load_finished(self):
self.html = self.toHtml(self.Callable)
print('READY!')
def Callable(self, html_str):
self.html = html_str
self.app.quit()
def main():
page = Page('http://www.nowgoal.cc/')
soup = bs.BeautifulSoup(page.html, 'html.parser')
lista = []
for x in soup.find_all("a", attrs={'title' : 'Match analyze'}):
lista.append(x)
for element in lista:
z = re.search(r"((.......))",element)
if z:
print(z.groups())
if __name__ == '__main__': main()
Python yields: TypeError: expected string or bytes-like object
I struggle with this for a few days now, don’t have more ideas to fix this. I digged and tried, searched and ended clueless. I really want to write more code and develop my idea. Please help, I’m dying here.
According to the following Beautiful Soup Documentation
The elements that are returned by soup.find_all
are not strings. They are objects. Except each object has their respective __str__
methods overridden for human readability.
So you are not passing strings into search. But in fact customized beautiful soup objects.
for element in lista:
z = re.search(r"((.......))",element)
if z:
print(z.groups())
To fix, make sure to convert element
into a string before calling the respective method.
for element in lista:
z = re.search(r"((.......))", str(element))
if z:
print(z.groups())