Seite 1 von 1

Pango HTML/ASCII Renderer

Verfasst: Samstag 8. März 2008, 11:42
von EnTeQuAk
Hallo,

da ich selber eben gerade vor dem Problem stand nen formatierten Text eines gtk.TextBuffers in HTML beziehungsweise ASCII umzuwandeln, hab ich mir nen Renderer gebastelt, der das kann.

Benutzung ist folgende:

Code: Alles auswählen

import pango, gtk
tb = gtk.TextBuffer()

tb.create_tag('bold', weight=pango.WEIGHT_BOLD)
tb.create_tag('italic', style=pango.STYLE_ITALIC)
tb.create_tag('underline', underline=pango.UNDERLINE_SINGLE)
tb.create_tag('strikethrough', strikethrough=True)
tb.set_text('bold, italic, normal, \n\n\n\n And now some underlined text...'
            '\n\nBut delted text is also possible....')
ao = tb.get_iter_at_offset
tb.apply_tag_by_name('bold', ao(0), ao(4))
tb.apply_tag_by_name('italic', ao(6), ao(12))
tb.apply_tag_by_name('underline', ao(27), ao(55))
tb.apply_tag_by_name('strikethrough', ao(60), ao(92))
renderer = PangoHTMLRenderer(tb)

print renderer.get_text()
Und hier ist die kleine Bibliothek:

Code: Alles auswählen

#-*- coding: utf-8 -*-
from xml.sax.saxutils import quoteattr


#: set of tags that don't want child elements.
EMPTY_TAGS = set(['br', 'img', 'area', 'hr', 'param', 'meta', 'link', 'base',
                  'input', 'embed', 'col', 'frame', 'spacer'])

def _build_html_tag(tag, attrs):
    """Build an HTML opening tag."""
    attrs = u' '.join(iter(
        u'%s=%s' % (k, quoteattr(unicode(v)))
        for k, v in attrs.iteritems()
        if v is not None
    ))
    return u'<%s%s%s>' % (
        tag, attrs and ' ' + attrs or '',
        tag in EMPTY_TAGS and ' /' or ''
    ), tag not in EMPTY_TAGS and u'</%s>' % tag or u''


class PangoHTMLRenderer(object):

    tag_mapping = {
        'bold': {
            'tag': u'strong',
        },
        'italic': {
            'tag': u'em',
        },
        'underline': {
            'tag': u'ins',
        },
        'strikethrough': {
            'tag': u'del',
        },
    }

    def __init__(self, buffer):
        self.buffer = buffer

    def get_tags(self):
        tagdict = {}

        for pos in range(self.buffer.get_char_count()):
            iter = self.buffer.get_iter_at_offset(pos)
            for tag in iter.get_tags():
                if tagdict.has_key(tag):
                    if tagdict[tag][-1][1] == pos - 1:
                        tagdict[tag][-1] = (tagdict[tag][-1][0], pos)
                    else:
                        tagdict[tag].append((pos, pos))
                else:
                    tagdict[tag] = [(pos, pos)]
        return tagdict

    def get_text (self, se_callback=None):
        if se_callback is None:
            tm = self.tag_mapping
            se_callback = lambda k,v: _build_html_tag(
                tm[k.get_property('name')]['tag'],
                tm[k.get_property('name')].get('attrs', {}))
        tagdict = self.get_tags()
        buf = self.buffer
        text = buf.get_text(buf.get_start_iter(), buf.get_end_iter())
        cuts = {}

        for k, v in tagdict.items():
            start_tag, end_tag = se_callback(k, v)
            for start, end in v:
                if start in cuts:
                    cuts[start].append(start_tag)
                else:
                    cuts[start] = start_tag

                if (end+1) in cuts:
                    cuts[end+1] = [end_tag] + cuts[end+1]
                else:
                    cuts[end+1] = [end_tag]

        last_pos = 0
        outbuff = u''
        cut_indices = cuts.keys()
        cut_indices.sort()
        for c in cut_indices:
            if not last_pos == c:
                outbuff += text[last_pos:c]
                last_pos = c
            for tag in cuts[c]:
                outbuff += tag
        outbuff += text[last_pos:]
        return outbuff


class PangoPlainRenderer(PangoHTMLRenderer):

    tag_mapping = {
        'bold': {
            'tag': u'**'
        },
        'italic': {
            'tag': u'*'
        },
        'underline': {
            'tag': u'_'
        },
        'strikethrough': {
            'tag': u'––'
        }
    }

    def get_text(self):
        tm = self.tag_mapping
        se_callback = lambda k,v: 2*[tm[k.get_property('name')]['tag']]
        outbuf = PangoHTMLRenderer.get_text(self, se_callback)
        return outbuf
Ich habe nur die von mir definierten Tags Beispielsweise eingebaut, erweiterungen sind ganz einfach möglich durch editieren der `tag_mapping` dictionaries.

Vielleicht kanns jemand gebrauchen, viel Spaß!


MFG EnTeQuAK :)