Ich habe vor kurzem eine Homepage gecrawlt mit den Informationen die ich benötige. Und habe es danach geschafft in meine MySQL-Datenbank einzutragen. Aber das Problem ist nun, das einzelne Werte in Anführungszeichen dargestellt werden.
Header Price Deeplink PartnerID LocationID
'New York CityPASS' 10057 'https://www.ctrip.com/prod' 3 12
Dies ist mein Code:
Code: Alles auswählen
class Crawling(unittest.TestCase):
def setUp(self):
self.driver = webdriver.Firefox()
self.driver.set_window_size(1024, 768)
self.base_url = "https://www.ctrip.com/"
self.accept_next_alert = True
def test_sel(self):
driver = self.driver
delay = 3
driver.get(self.base_url + "Search/new york")
for i in range(1,2):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)
html_source = driver.page_source
data = html_source.encode("utf-8")
elements = driver.find_elements_by_xpath("/html/body/div[4]/div[1]/div[2]/div/div[5]/div/div[1]/div[1]/ul/li[1]/div/div[1]")
innerElements = 15
outerElements = len(elements)/innerElements
#print(innerElements, "\t", outerElements, "\t", len(elements))
try:
connection = mysql.connector.connect\
(host = "localhost", user = "root", passwd ="", db = "crawling")
except:
print("Keine Verbindung zum Server")
sys.exit(0)
cursor = connection.cursor()
#text = connection.escape_string()
cursor.execute("TRUNCATE meta;")
connection.commit()
cursor.execute("ALTER TABLE meta AUTO_INCREMENT =1;")
connection.commit()
for j in range(1, 20):
partner_ID = 3
location_ID = "New York"
price = driver.find_element_by_xpath("/html/body/div[4]/div[1]/div[2]/div/div[5]/div/div[1]/div[1]/ul/li["+str(j)+"]/div/div[1]/div[2]/span[1]").text[6:]
headline = driver.find_element_by_xpath("/html/body/div[4]/div[1]/div[2]/div/div[5]/div/div[1]/div[1]/ul/li["+str(j)+"]/div/div[1]/div[2]/strong").text
description = ""
deeplink = driver.find_element_by_xpath("/html/body/div[4]/div[1]/div[2]/div/div[5]/div/div[1]/div[1]/ul/li["+str(j)+"]/div/div[1]/div[3]/div/ul/li[1]/a").get_attribute("href")
if not all([headline, price]):
print("Header not available " " | " + "Price not available " + " | " + "Deeplink: " + str(deeplink) + " | " + "PartnerID: " + str(partner_ID) + " | " + "LocationID: " + location_ID)
else:
print("Header: " + headline + " | " + "Price: " + str(price) + " | " + "Deeplink: " + str(deeplink) + " | " + "PartnerID: " + str(partner_ID) + " | " + "LocationID: " + location_ID)
cursor.execute('''INSERT INTO meta (price_id, Header, Price, Deeplink, PartnerID, LocationID) \
VALUES("%s", "%s", "%s", "%s", "%s", "%s")''', ['None'] + [headline] + text + [price] + [deeplink] + [partner_ID] + [location_ID])
connection.commit()
cursor.close()
connection.close()
Danke für eure Hilfe