Code: Alles auswählen
grep -E "png|jpg|gif" skype_links.txt > image_list.txt
Code: Alles auswählen
#! /usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function
import codecs, sqlite3, re
# replace with the path to main.db, the database containing your skype history
HISTORY_FILE = "/Users/username/Library/Application Support/Skype/username/main.db"
OUTPUT_FILE = "skype_links.txt"
SQL_STATEMENT = "SELECT body_xml FROM Messages"
PATTERN = re.compile("<a href=\"(?P<url>.*?)\">(?P<text>.*?)</a>")
def main():
links = set() # if you use a set instead of a list you don't need to remove duplicates
connection = sqlite3.connect(HISTORY_FILE)
c = connection.cursor()
for message in c.execute(SQL_STATEMENT):
# for some reason simply selecting the message body doesn't suffice
# so I check whether the first element of it exists at all
if message[0]:
# I need to use a loop because I don't know how many URLs the message contains
for match in re.finditer(PATTERN, message[0]):
links.add(match.group("url"))
# this is needed to make sure it works with python 2.x and 3.x
with codecs.open(OUTPUT_FILE, "w", "utf-8") as output_file:
output_file.write("\n".join(sorted(links)))
print("{} links were written".format(len(links)))
if __name__ == "__main__":
main()