Der Assembler ist zeilenbasiert. Eine Zeile kann entweder eine Leer- oder Kommentarzeile sein, wobei Kommentare mit einem # eingeleitet werden. Oder es ist eine Pseudo-Anweisung oder eine Bytecode-Anweisung. Pseudoanweisungen fangen mit einem "." an und es gibt nur ".code" und ".endcode" um Code-Objekte zu erstellen die dann als Konstanten in anderem Code verwendet werden können, beispielsweise um Funktionen daraus zu erstellen.
Ein "Hallo Welt"-Programm sieht so aus:
Code: Alles auswählen
LOAD_GLOBAL print
LOAD_CONST "Hallo, Welt!"
CALL_FUNCTION 1
RETURN_VALUE
Der Assembler macht daraus eine *.pyc-Datei, die man mit (C)Python ausführen kann:
Code: Alles auswählen
$ python3 pyasm.py hello.pyasm
2 0 LOAD_GLOBAL 0 (print)
3 2 LOAD_CONST 0 ('Hallo, Welt!')
4 4 CALL_FUNCTION 1
5 6 RETURN_VALUE
$ python3 hello.pyc
Hallo, Welt!
Code: Alles auswählen
.code framed_print, text, frame_character
# (len(text) + 2) * frame_character
LOAD_GLOBAL len
LOAD_FAST text
CALL_FUNCTION 1
LOAD_CONST 2
BINARY_ADD
LOAD_FAST frame_character
BINARY_MULTIPLY
DUP_TOP
LOAD_GLOBAL print
ROT_TWO
CALL_FUNCTION 1
POP_TOP
LOAD_FAST frame_character
LOAD_FAST text
LOAD_FAST frame_character
BINARY_ADD
BINARY_ADD
LOAD_GLOBAL print
ROT_TWO
CALL_FUNCTION 1
POP_TOP
LOAD_GLOBAL print
ROT_TWO
CALL_FUNCTION 1
RETURN_VALUE
.endcode
# Create function object.
LOAD_CONST framed_print
LOAD_CONST "framed_print"
MAKE_FUNCTION 0
STORE_FAST framed_print
LOAD_FAST framed_print
LOAD_CONST "Hallo"
LOAD_CONST "*"
CALL_FUNCTION 2
POP_TOP
LOAD_FAST framed_print
LOAD_CONST "Welt"
LOAD_CONST "="
CALL_FUNCTION 2
RETURN_VALUE
Code: Alles auswählen
*******
*Hallo*
*******
======
=Welt=
======
Code: Alles auswählen
LOAD_CONST 1
STORE_FAST i
loop: LOAD_FAST i
LOAD_CONST 11
COMPARE_OP EQ
POP_JUMP_IF_TRUE exit_loop
LOAD_GLOBAL print
LOAD_FAST i
CALL_FUNCTION 1
POP_TOP
LOAD_FAST i
LOAD_CONST 1
BINARY_ADD
STORE_FAST i
JUMP_ABSOLUTE loop
exit_loop:
LOAD_CONST None
RETURN_VALUE
Code: Alles auswählen
LOAD_CONST 1
loop: DUP_TOP
LOAD_CONST 11
COMPARE_OP EQ
POP_JUMP_IF_TRUE exit_loop
DUP_TOP
LOAD_GLOBAL print
ROT_TWO
CALL_FUNCTION 1
POP_TOP
LOAD_CONST 1
BINARY_ADD
JUMP_ABSOLUTE loop
exit_loop:
RETURN_VALUE
Code: Alles auswählen
#!/usr/bin/env python3
import dis
import marshal
import re
import sys
from ast import literal_eval
from collections import defaultdict
from importlib.util import MAGIC_NUMBER
from pathlib import Path
from attr import attrib, attrs
from bytecode import Bytecode, Compare, dump_bytecode, Instr, Label, UNSET
LABEL_RE = re.compile(r'([a-zA-Z_][a-zA-Z0-9_]*):')
MNENONIC_RE = re.compile(r"([A-Z_]+)\s*")
@attrs
class CodeBlock:
_bytecode = attrib(factory=Bytecode)
_name_to_label = attrib(factory=lambda: defaultdict(Label))
def add_label(self, name):
#
# FIXME Error for duplicate labels.
#
self._bytecode.append(self._name_to_label[name])
def get_label(self, name):
return self._name_to_label[name]
def add_instruction(self, mnenonic, argument, line_number):
self._bytecode.append(Instr(mnenonic, argument, lineno=line_number))
def dump(self):
dump_bytecode(self._bytecode)
def to_code(self):
return self._bytecode.to_code()
@classmethod
def new(cls, file_path, name="<module>", argument_names=()):
result = cls()
bytecode = result._bytecode
bytecode.filename = str(file_path)
bytecode.name = name
bytecode.argnames = argument_names
bytecode.argcount = len(argument_names)
return result
def assemble(file_path, lines):
module_code_block = code_block = CodeBlock.new(file_path)
name_to_code_block = dict()
for line_number, line in enumerate(lines, 1):
line = line.strip()
if line and not line.startswith("#"):
if line.startswith("."):
pseudo_op, _, line = line.partition(" ")
if pseudo_op == ".code":
name, *argument_names = (
string.strip() for string in line.split(",")
)
if code_block is not module_code_block:
raise ValueError("can't nest code blocks")
code_block = CodeBlock.new(file_path, name, argument_names)
#
# FIXME Error if duplicate.
#
name_to_code_block[name] = code_block
elif pseudo_op == ".endcode":
if code_block is module_code_block:
raise ValueError("missing start of code block")
code_block = module_code_block
else:
raise ValueError(
f"unknown pseudo-op {pseudo_op!r} ({line_number})"
)
else:
match = LABEL_RE.match(line)
if match:
name = match.group(1)
line = line[len(name) + 1 :].strip()
code_block.add_label(name)
match = MNENONIC_RE.match(line)
if match:
mnenonic = match.group(1)
line = line[len(mnenonic) :].strip()
opcode = dis.opmap[mnenonic]
if opcode in dis.hascompare:
argument = Compare[line]
elif opcode in dis.hasconst:
try:
argument = literal_eval(line) if line else UNSET
except ValueError:
argument = name_to_code_block[line].to_code()
elif opcode in dis.hasjabs or opcode in dis.hasjrel:
argument = code_block.get_label(line)
elif opcode in dis.hasname or opcode in dis.haslocal:
argument = line if line else UNSET
else:
argument = int(line) if line else UNSET
code_block.add_instruction(mnenonic, argument, line_number)
return module_code_block.to_code()
def main():
if len(sys.argv) <= 1:
print("Missing filename.")
else:
source_file_path = Path(sys.argv[1])
with source_file_path.open(encoding="utf-8") as lines:
code = assemble(source_file_path, lines)
dis.disassemble(code)
target_file_path = source_file_path.with_name(
source_file_path.stem + ".pyc"
)
with target_file_path.open("wb") as pyc_file:
pyc_file.write(MAGIC_NUMBER)
pyc_file.write(bytes(8))
marshal.dump(code, pyc_file)
if __name__ == "__main__":
main()