Er sollte gültigen XHTML Code erzeugen, bei Fragen einfach hier posten.
Code: Alles auswählen
# -*- coding: utf-8 -*-
# ---------------------------------------------------------------------------- #
# BBCode Parser #
# Author: Armin Ronacher <armin.ronacher@active-4.com> #
# licensed under the GPL #
# ---------------------------------------------------------------------------- #
# This program is free software; you can redistribute it and/or #
# modify it under the terms of the GNU General Public License #
# as published by the Free Software Foundation; either version 2 #
# of the License, or (at your option) any later version. #
# ---------------------------------------------------------------------------- #
import re
from random import random
from md5 import md5
from time import time
LANGUAGE = {
"SIMPLE_QUOTE": "Zitat:",
"AUTHOR_QUOTE": "%s schrieb:"
}
"""
this module implements a simple BBCode parser for pythonBB
Here a small example for using it:
text = file("input.txt", "r").read()
p = parser(text)
text = p.parse()
file("output.html", "w").write(text)
"""
class parser:
def __init__(self, text):
self.text = text
self.table = {}
self.parse()
def parse(self):
text = self.text
text = self.htmlspecialchars(text)
text = self.extract(text)
text = self.inlineformat(text)
text = self.paragraphs(text)
text = self.assamble(text)
self.table = {}
return text
def htmlspecialchars(self, text):
rules = [
("&", "&"),
("< ", "<"),
(">", ">")
]
for rule in rules:
text = text.replace(rule[0], rule[1])
return text
def extract(self, text):
areas = [
("\[code\](.*?)\[/code\](?uism)", self.do_code),
("\[list(=(.*?))?\](.*?)\[/list](?uism)", self.do_list)
]
for area in areas:
try:
while re.search(area[0], text):
token_id = self.__create_token_id()
result = re.search(area[0], text)
self.table[token_id] = (area[1], result)
text = text.replace(result.group(0), token_id, 1)
except:
pass
return text
def inlineformat(self, text):
rules = [
("\[b\](.*?)\[/b\](?uism)", r"<strong>\1</strong>"),
("\[i\](.*?)\[/i\](?uism)", r"<em>\1</em>"),
("\[u\](.*?)\[/u\](?uism)", r"<u>\1</u>"),
("\[url(=(.*?))?\](.*?)\[/url\](?uism)", self.do_link),
("\[img(=\"(.*?)\")?\](.*?)\[/img\](?uism)", self.do_image),
("\[quote(=\"(.*?)\")?\](.*?)\[/quote\](?uism)", self.do_quote),
]
for rule in rules:
text = re.sub(rule[0], rule[1], text)
return text
def paragraphs(self, text):
lines = re.split("\n{2,}", text)
text = ""
for line in lines:
line = line.strip().replace("\n", "<br />\n")
text += "<p>"+line+"</p>\n\n"
return text
def assamble(self, text):
for token in self.table:
handler = self.table[token][0]
content = self.table[token][1]
block = handler(content)
text = text.replace(token, block)
text = re.sub("<p>([\s]*?)</p>", "", text) #removes empty paragraphs
return text
def __create_token_id(self):
while True:
token = "~~"+md5(str(time() + random())).hexdigest()+"~~"
if not self.table.has_key(token):
return token
def do_link(self, matchobj):
caption = matchobj.group(3)
if matchobj.group(1) == None:
link = matchobj.group(3)
else:
link = matchobj.group(2)
return "<a href=\""+link+"\">"+caption+"</a>"
def do_image(self, matchobj):
href = matchobj.group(3)
if matchobj.group(1) == None:
title = matchobj.group(3)
else:
title = matchobj.group(2)
return "<img src=\""+href+"\" title=\""+title+"\" alt=\""+title+"\" />"
def do_quote(self, matchobj):
text = matchobj.group(3)
if matchobj.group(1) == None:
title = LANGUAGE["SIMPLE_QUOTE"]
else:
title = matchobj.group(2)
if not title.endswith(":"):
title = LANGUAGE["AUTHOR_QUOTE"].replace("%s", title)
return "</p><div class=\"quote\"><h2>"+title+"</h2><p>"+text+"</p></div><p>"
def do_code(self, matchobj):
content = matchobj.group(1)
if content[0] == "\n": content = content[1:]
result = "</p><code>"+content+"</code><p>"
return result
def do_list(self, matchobj):
content = self.inlineformat(matchobj.group(3))
content = re.sub("\[\*\](.*?)\n", r"<li>\1</li>\n", content)
if matchobj.group(1) == None:
list_type = "*"
else:
list_type = matchobj.group(2)
if list_type == "1":
result == "<ol class=\"numeric\">"+content+"</ol>"
elif list_type == "a":
result = "<ol class=\"alpha\">"+content+"</ol>"
elif list_type == "i":
result = "<ol class=\"roman\">"+content+"</ol>"
else:
result = "<ul>"+content+"</ul>"
result = "</p>"+result+"<p>"
return result