viele kennen mich schon mit meinem etwas dilettantischen Versuch Online Feeds zu streamen und in eine MySQL DB zu schreiben. So weit klappt alles ganz gut bis auf das Entfernen der Doubletten. Dieses kann problemlos nachträglich gemacht werden, ich hätte aber gerne einen Code der überflüssige Doubletten direkt erkennt und gar nicht er in die DB Tabelle einfügt.
Ausprobiert habe ich bisher folgendes im Create Table Statement:
(1) LSE_CommentNr integer, UNIQUE( LSE_CommentNr)
sowie
(2) LSE_CommentNr integer UNIQUE ON CONFLICT IGNORE,
Bei Variante (1) bekomme ich folgenden Fehlercode:
Code: Alles auswählen
Traceback (most recent call last):
File "C:/Python27/MySQL_finalversion/RSS_common_FV_LSE_v1.py", line 50, in <module>
main()
File "C:/Python27/MySQL_finalversion/RSS_common_FV_LSE_v1.py", line 45, in main
cur.execute("""INSERT INTO feeddata_lse_nodub(LSE_UnixTimesstamp, LSE_Timestamp, LSE_Source, LSE_Title, LSE_Text, LSE_Link, LSE_Epic, LSE_CommentNr, LSE_Author) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)""",item)
File "C:\Python27\lib\site-packages\MySQLdb\cursors.py", line 205, in execute
self.errorhandler(self, exc, value)
File "C:\Python27\lib\site-packages\MySQLdb\connections.py", line 36, in defaulterrorhandler
raise errorclass, errorvalue
IntegrityError: (1062, "Duplicate entry 'Mon, 14 Jul 2014 10:15:53 GMT-9385236' for key 'LSE_Timestamp'")
Code: Alles auswählen
Warning (from warnings module):
File "C:\Python27\MySQL_finalversion\RSS_common_FV_LSE_v1.py", line 16
cur.execute("DROP TABLE IF EXISTS feeddata_lse_nodub")
Warning: Unknown table 'sentimentanalysis_unicode.feeddata_lse_nodub'
Traceback (most recent call last):
File "C:\Python27\MySQL_finalversion\RSS_common_FV_LSE_v1.py", line 20, in <module>
cur.execute(sql_lse)
File "C:\Python27\lib\site-packages\MySQLdb\cursors.py", line 205, in execute
self.errorhandler(self, exc, value)
File "C:\Python27\lib\site-packages\MySQLdb\connections.py", line 36, in defaulterrorhandler
raise errorclass, errorvalue
ProgrammingError: (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'CONFLICT IGNORE, LSE_Author varchar(255))' at line 1")
Code: Alles auswählen
import feedparser
import MySQLdb
import time
from cookielib import CookieJar
db = MySQLdb.connect(host="localhost", # your host, usually localhost
user="root", # your username - SELECT * FROM mysql.user
passwd="****", # your password
db="sentimentanalysis_unicode",
charset="utf8") # name of the data base
cur = db.cursor()
cur.execute("SET NAMES utf8")
cur.execute("SET CHARACTER SET utf8")
cur.execute("SET character_set_connection=utf8")
cur.execute("DROP TABLE IF EXISTS feeddata_lse_nodub")
sql_lse = """CREATE TABLE feeddata_lse_nodub(LSE_ID INT NOT NULL AUTO_INCREMENT,PRIMARY KEY(LSE_ID), LSE_UnixTimesstamp integer, LSE_Timestamp varchar(255), LSE_Source varchar(255), LSE_Title varchar(255), LSE_Text TEXT, LSE_Link varchar(255), LSE_Epic varchar(255), LSE_CommentNr integer, LSE_Author varchar(255))"""
cur.execute(sql_lse)
def feed_load_lse(feed_lse):
return [(time.time(),
entry.published,
'lse',
entry.title,
entry.summary,
entry.link,
(entry.link.split('ShareTicker=')[1]).split('&post')[0],
entry.link.split('post=')[1],
entry.author)
for entry
in feedparser.parse(feed_lse).entries]
def main():
feed_url_lse = "http://www.lse.co.uk/chat/recent/"
try: feed_lse = feed_load_lse(feed_url_lse)
except IndexError: error = 'ERROR'
try:
for item in feed_lse:
cur.execute("""INSERT INTO feeddata_lse(LSE_UnixTimesstamp, LSE_Timestamp, LSE_Source, LSE_Title, LSE_Text, LSE_Link, LSE_Epic, LSE_CommentNr, LSE_Author) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)""",item)
db.commit()
print feed_lse[0][1]
except: print error
if __name__ == "__main__":
while True:
main()
time.sleep(10)
Ps: Ich weiss, dass meine split Befehle wie auch die Namensgebung meiner Spalten nicht optimal sind. Wer da Hinweise hat - ich freue mich über jede PN - dies soll aber nicht Teil der Fragestellung sein