matthieudartiailh / bytecode Goto Github PK

View Code? Open in Web Editor NEW

298.0 298.0 38.0 950 KB

Python module to modify bytecode

Home Page: https://bytecode.readthedocs.io/

License: MIT License

Python 99.83% Shell 0.17%

bytecode's People

Contributors

Stargazers

Watchers

bytecode's Issues

Cannot decompile code with empty try block

Small reproducer: we add a TryBegin followed immediately by the closing TryEnd. The code re-compiles fine, but cannot be decompiled again

def test_bytecode():
    import bytecode as b

    def foo():
        return 42

    bc = b.Bytecode.from_code(foo.__code__)
    label = b.Label()
    try_begin = b.TryBegin(label, push_lasti=True)
    bc[1:1] = [try_begin, b.TryEnd(try_begin), label]

    foo.__code__ = bc.to_code()

    assert foo() == 42

    bc = b.Bytecode.from_code(foo.__code__)

Result:

________________________________ test_bytecode _________________________________

    def test_bytecode():
        import bytecode as b
    
        def foo():
            return 42
    
        bc = b.Bytecode.from_code(foo.__code__)
        label = b.Label()
        try_begin = b.TryBegin(label, push_lasti=True)
        bc[1:1] = [try_begin, b.TryEnd(try_begin), label]
    
        foo.__code__ = bc.to_code()
    
        assert foo() == 42
    
>       bc = b.Bytecode.from_code(foo.__code__)

tests/internal/test_wrapping.py:779: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
.riot/venv_py3120/lib/python3.12/site-packages/bytecode/bytecode.py:283: in from_code
    return concrete.to_bytecode(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <ConcreteBytecode instr#=2>, prune_caches = True
conserve_exception_block_stackdepth = False

    def to_bytecode(
        self,
        prune_caches: bool = True,
        conserve_exception_block_stackdepth: bool = False,
    ) -> _bytecode.Bytecode:
        # On 3.11 we generate pseudo-instruction from the exception table
    
        # Copy instruction and remove extended args if any (in-place)
        c_instructions = self[:]
        self._remove_extended_args(c_instructions)
    
        # Find jump targets
        jump_targets: Set[int] = set()
        offset = 0
        for c_instr in c_instructions:
            if isinstance(c_instr, SetLineno):
                continue
            target = c_instr.get_jump_target(offset)
            if target is not None:
                jump_targets.add(target)
            offset += (c_instr.size // 2) if OFFSET_AS_INSTRUCTION else c_instr.size
    
        # On 3.11+ we need to also look at the exception table for jump targets
        for ex_entry in self.exception_table:
            jump_targets.add(ex_entry.target)
    
        # Create look up dict to find entries based on either exception handling
        # block exit or entry offsets. Several blocks can end on the same instruction
        # so we store a list of entry per offset.
        ex_start: Dict[int, ExceptionTableEntry] = {}
        ex_end: Dict[int, List[ExceptionTableEntry]] = {}
        for entry in self.exception_table:
            # Ensure we do not have more than one entry with identical starting
            # offsets
            assert entry.start_offset not in ex_start
            ex_start[entry.start_offset] = entry
            ex_end.setdefault(entry.stop_offset, []).append(entry)
    
        # Create labels and instructions
        jumps: List[Tuple[int, int]] = []
        instructions: List[Union[Instr, Label, TryBegin, TryEnd, SetLineno]] = []
        labels = {}
        tb_instrs: Dict[ExceptionTableEntry, TryBegin] = {}
        offset = 0
        # In Python 3.11+ cell and varnames can be shared and are indexed in a single
        # array.
        # As a consequence, the instruction argument can be either:
        # - < len(varnames): the name is shared an we can directly use
        #   the index to access the name in cellvars
        # - > len(varnames): the name is not shared and is offset by the
        #   number unshared varname.
        # Free vars are never shared and correspond to index larger than the
        # largest cell var.
        # See PyCode_NewWithPosOnlyArgs
        if sys.version_info >= (3, 11):
            cells_lookup = self.varnames + [
                n for n in self.cellvars if n not in self.varnames
            ]
            ncells = len(cells_lookup)
        else:
            ncells = len(self.cellvars)
            cells_lookup = self.cellvars
    
        for lineno, c_instr in self._normalize_lineno(
            c_instructions, self.first_lineno
        ):
            if offset in jump_targets:
                label = Label()
                labels[offset] = label
                instructions.append(label)
    
            # Handle TryBegin pseudo instructions
            if offset in ex_start:
                entry = ex_start[offset]
                tb_instr = TryBegin(
                    Label(),
                    entry.push_lasti,
                    entry.stack_depth if conserve_exception_block_stackdepth else UNSET,
                )
                # Per entry store the pseudo instruction associated
                tb_instrs[entry] = tb_instr
                instructions.append(tb_instr)
    
            jump_target = c_instr.get_jump_target(offset)
            size = c_instr.size
            # If an instruction uses extended args, those appear before the instruction
            # causing the instruction to appear at offset that accounts for extended
            # args. So we first update the offset to account for extended args, then
            # record the instruction offset and then add the instruction itself to the
            # offset.
            offset += (size // 2 - 1) if OFFSET_AS_INSTRUCTION else (size - 2)
            current_instr_offset = offset
            offset += 1 if OFFSET_AS_INSTRUCTION else 2
    
            # on Python 3.11+ remove CACHE opcodes if we are requested to do so.
            # We are careful to first advance the offset and check that the CACHE
            # is not a jump target. It should never be the case but we double check.
            if prune_caches and c_instr.name == "CACHE":
                assert jump_target is None
    
            # We may need to insert a TryEnd after a CACHE so we need to run the
            # through the last block.
            else:
                arg: InstrArg
                c_arg = c_instr.arg
                # FIXME: better error reporting
                if c_instr.opcode in _opcode.hasconst:
                    arg = self.consts[c_arg]
                elif c_instr.opcode in _opcode.haslocal:
                    arg = self.varnames[c_arg]
                elif c_instr.opcode in _opcode.hasname:
                    if c_instr.name in BITFLAG_INSTRUCTIONS:
                        arg = (bool(c_arg & 1), self.names[c_arg >> 1])
                    elif c_instr.name in BITFLAG2_INSTRUCTIONS:
                        arg = (bool(c_arg & 1), bool(c_arg & 2), self.names[c_arg >> 2])
                    else:
                        arg = self.names[c_arg]
                elif c_instr.opcode in _opcode.hasfree:
                    if c_arg < ncells:
                        name = cells_lookup[c_arg]
                        arg = CellVar(name)
                    else:
                        name = self.freevars[c_arg - ncells]
                        arg = FreeVar(name)
                elif c_instr.opcode in _opcode.hascompare:
                    arg = Compare(
                        (c_arg >> 4) if sys.version_info >= (3, 12) else c_arg
                    )
                elif c_instr.opcode in INTRINSIC_1OP:
                    arg = Intrinsic1Op(c_arg)
                elif c_instr.opcode in INTRINSIC_2OP:
                    arg = Intrinsic2Op(c_arg)
                else:
                    arg = c_arg
    
                location = c_instr.location or InstrLocation(lineno, None, None, None)
    
                if jump_target is not None:
                    arg = PLACEHOLDER_LABEL
                    instr_index = len(instructions)
                    jumps.append((instr_index, jump_target))
    
                instructions.append(Instr(c_instr.name, arg, location=location))
    
            # We now insert the TryEnd entries
            if current_instr_offset in ex_end:
                entries = ex_end[current_instr_offset]
                for entry in reversed(entries):
>                   instructions.append(TryEnd(tb_instrs[entry]))
E                   KeyError: ExceptionTableEntry(start_offset=1, stop_offset=0, target=1, stack_depth=0, push_lasti=True

.riot/venv_py3120/lib/python3.12/site-packages/bytecode/concrete.py:1067: KeyError

Creating functions through bytecode

I'm trying to create a new function / lambda purely through bytecode (no actual Python code). I can't really find an example on how to do this.

The bytecode I want to generate:

import bytecode.tests
import dis

dis.dis(bytecode.tests.get_code("def some_fn(x): return x"))

Which prints out:

  1           0 LOAD_CONST               0 (<code object some_fn at 0x7fbe893c6b30, file "<string>", line 1>)
              2 LOAD_CONST               1 ('some_fn')
              4 MAKE_FUNCTION            0
              6 STORE_NAME               0 (some_fn)
              8 LOAD_CONST               2 (None)
             10 RETURN_VALUE

Disassembly of <code object some_fn at 0x7fbe893c6b30, file "<string>", line 1>:
  1           0 LOAD_FAST                0 (x)
              2 RETURN_VALUE

Here's how I'm trying to create the function:

from bytecode import ConcreteBytecode, ConcreteInstr

# Define the body of the function

bytecode_fn = ConcreteBytecode()
bytecode_fn.varnames = ["x"]
bytecode_fn.extend([ConcreteInstr("LOAD_FAST", 0), # var x
                    ConcreteInstr("RETURN_VALUE")])

# Convert bytecode_fn to code
fn_code_obj = bytecode_fn.to_code()

bytecode = ConcreteBytecode()
bytecode.names = ["some_fn"]
bytecode.consts = [fn_code_obj, "some_fn", None]
bytecode.extend([ConcreteInstr("LOAD_CONST", 2),    # Default x: None
                 ConcreteInstr("LOAD_CONST", 0),    # fn_code_obj
                 ConcreteInstr("LOAD_CONST", 1),    # "some_fn"
                 ConcreteInstr("MAKE_FUNCTION", 1), # 1 arg
                 ConcreteInstr("STORE_NAME", 0),    # "some_fn"
                 ConcreteInstr("LOAD_CONST", 2),    # None
                 ConcreteInstr("RETURN_VALUE")])

# Execute bytecode
code = bytecode.to_code()
exec(code)

# Call created function
# Error:
#
# TypeError: <module>() takes from -16 to 0 positional arguments but 1 was given
some_fn(1)

# Error:
#
# UnboundLocalError: local variable 'x' referenced before assignment
some_fn()

This code created the some_fn function, but with zero arguments. I believe MAKE_FUNCTION requires you to add a default value on TOS (which in my case is None).

How do I adjust this to actually create a function that accepts 1 argument and binds it to x? If we can figure this out I'd be willing to make a write up and place it in the documentation.

I was looking for copyright information for this package (planning to package it for Debian), and got stuck.
doc/conf.py states: copyright = u"2016-2021, Victor Stinner" while COPYING says: Copyright (c) 2016 Red Hat.

These cannot both be correct! My guess is that COPYING should be changed. Also, the setup.py file says that the author is Victor Stinner and the maintainer is Matthieu C. Dartiailh; do both of you share the copyright, or does Victor hold the copyright alone, even though you took over the package in 2017?

(BTW, u"..." can be replaced with just "..." throughout the doc directory, as the Unicode marker is unnecessary in Python 3.)

Thanks for this package!

Provide a way to convert frame.f_lasti into an instruction

Here's the situation - you want to find the bytecode that was just executed in a frame, possibly in a trace func. Here are a pair of things you could want:

The current ConcreteInstr object corresponding to f_lasti:

def get_concrete_index(concrete_bc, code_index):
    at = 0
    concrete_index = 0
    for c in concrete_bc:
        at += c.size
        if at < code_index:
            concrete_index += 1
    return concrete_index

Which can be used as:

concrete_bc = ConcreteBytecode.from_code(frame.code)
ci = get_concrete_index(concrete_bc, frame.f_lasti)
concrete_instr = concrete_bc[ci]

the current Instr object corresponding to ci or flasti:

def promote_concrete_index(bc, concrete_index):
    index = None
    at = 0
    for i, b in enumerate(bc):
        if at == concrete_index:
            index = i
        if isinstance(b, bytecode.instr.BaseInstr):
            at += 1
    return index

Used as

bc = concrete_bc.to_bytecode()
i = promote_concrete_index(bc, ci)
instr = bc[concrete_bc]

Does this code look correct for all cases?
Does this make sense as a library addition? If so, how are these operations best exposed in the API?

stack size calculation issue

Hi there,
In a program where I process a bytecode sequence, I'm using Instr.stack_effect() to determine the current stack size at a bytecode instruction. Ths is working well when I'm using simple statements and any kind of expressions, But it fails when I use loops.
Here is a sample without loop:

from bytecode import Bytecode, Instr

def example():
	a = toto()
	b = tutu(b, 'truc')
	#for i in range(5):
	#	c = budu(i)
	boudou()
	budu(a,b,c)

bc = Bytecode.from_code(example.__code__)
stacksize = 0
for instr in bc:
	print(stacksize, instr)
	if isinstance(instr, Instr):	
		stacksize += instr.stack_effect()

We can see that the stack is empty at start if the call to boudou

0 <LOAD_GLOBAL arg='boudou' lineno=8>

If we uncomment the lines of the for loop, the stack size is bad after the loop, at the start of the call:

1 <bytecode.instr.Label object at 0x7f286df9c220>
1 <LOAD_GLOBAL arg='boudou' lineno=8>

Can you fix that, or is there already a better way than stack_effect to do that ?

The `Compare` enum is broken starting from Python 3.9

Python 3.9.7 (default, Sep  3 2021, 12:37:55)
[Clang 12.0.5 (clang-1205.0.22.9)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> from dis import cmp_op
>>> len(cmp_op)
6
>>> cmp_op
('<', '<=', '==', '!=', '>', '>=')

The in and is operators have dedicated opcodes (with an argument for the inverse) and they should be removed from Compare.

Adding support for stack depth calculation

Hi,
I am interested in porting the stack depth calculation found in byteplay to bytecode. Before starting do you have any pointer to give me about this ?
Thanks

Please add support for EXTENDED_ARG in jumps

Hi,

Currently, EXTENDED_ARG in jumps are not supported (see: https://github.com/haypo/bytecode/blob/master/bytecode/concrete.py#L395).
Without this feature, it's impossible to craft python object code >64Ko.

Please add support for EXTENDED_ARG in jumps if it's not too difficult to do :)

For example, with this feature, my brainfuck-to-python-bytecode will be able to run any BF program >64Ko (like hanoi.bf).

Flag inference is too agressive in determining generator

await generates YIELD_FROM instruction and the function should not be tagged as a generator in that case (overall the flag inference need to be reworked).

EXTENDED_ARG 0 retained from input raises ValueError

EXTENDED_ARG 0 shouldn't be in code, but sometimes they are. Trying to convert a Bytecode back to code ends up raising ValueError("invalid opcode or oparg") because PyCompile_OpcodeStackEffect doesn't handle EXTENDED_ARG.

They are retained when disassembling code to Bytecode due to commit 63f1ec9. That commit should NOT be undone because the disassembly depends on physical instruction sizes, and removing the EXTENDED_ARG 0 then wreaks havoc on all the jump offsets.

A somewhat simple fix is to remove them later in ConcreteBytecode.to_bytecode after the physical sizes are no longer needed.

An alternative would be to modify the stack_effect calls to deal with this. But IMO its better to just remove them so that people building code analyzers or optimizers are forced to deal with it.

Labels don't seem to be handled correctly with Python<3.9

I am currently working with the following abstract code

            stopiter = Label()
            loop = Label()
            genexit = Label()
            exc = Label()
            propagate = Label()
            instrs[-1:-1] = [
                Instr("DUP_TOP", lineno=lineno),
                Instr("STORE_FAST", "__ddgen", lineno=lineno),
                Instr("LOAD_ATTR", "asend", lineno=lineno),
                Instr("STORE_FAST", "__ddgensend", lineno=lineno),
                Instr("LOAD_FAST", "__ddgen", lineno=lineno),
                Instr("LOAD_ATTR", "__anext__", lineno=lineno),
                Instr("CALL_FUNCTION", 0, lineno=lineno),
                loop,
                Instr("SETUP_EXCEPT" if PY < (3, 8) else "SETUP_FINALLY", stopiter, lineno=lineno),
                Instr("GET_AWAITABLE", lineno=lineno),
                Instr("LOAD_CONST", None, lineno=lineno),
                Instr("YIELD_FROM", lineno=lineno),
                Instr("POP_BLOCK", lineno=lineno),
                Instr("SETUP_EXCEPT" if PY < (3, 8) else "SETUP_FINALLY", genexit, lineno=lineno),
                Instr("YIELD_VALUE", lineno=lineno),
                Instr("POP_BLOCK", lineno=lineno),
                Instr("LOAD_FAST", "__ddgensend", lineno=lineno),
                Instr("ROT_TWO", lineno=lineno),
                Instr("CALL_FUNCTION", 1, lineno=lineno),
                Instr("JUMP_ABSOLUTE", loop, lineno=lineno),
                stopiter,  # except StopAsyncIteration:
                Instr("DUP_TOP", lineno=lineno),
                Instr("LOAD_CONST", StopAsyncIteration, lineno=lineno),
                compare_exc(propagate, lineno),
                jump_if_false(propagate, lineno),
                Instr("POP_TOP", lineno=lineno),
                Instr("POP_TOP", lineno=lineno),
                Instr("POP_TOP", lineno=lineno),
                Instr("POP_EXCEPT", lineno=lineno),
                Instr("LOAD_CONST", None, lineno=lineno),
                Instr("RETURN_VALUE", lineno=lineno),
                propagate,  # finally:
                Instr("END_FINALLY" if PY < (3, 9) else "RERAISE", lineno=lineno),
                genexit,  # except GeneratorExit:
                Instr("DUP_TOP", lineno=lineno),
                Instr("LOAD_CONST", GeneratorExit, lineno=lineno),
                compare_exc(exc, lineno),
                jump_if_false(exc, lineno),
                Instr("POP_TOP", lineno=lineno),
                Instr("POP_TOP", lineno=lineno),
                Instr("POP_TOP", lineno=lineno),
                Instr("LOAD_FAST", "__ddgen", lineno=lineno),
                Instr("LOAD_ATTR", "aclose", lineno=lineno),
                Instr("CALL_FUNCTION", 0, lineno=lineno),
                Instr("GET_AWAITABLE", lineno=lineno),
                Instr("LOAD_CONST", None, lineno=lineno),
                Instr("YIELD_FROM", lineno=lineno),
                Instr("POP_EXCEPT", lineno=lineno),
                Instr("RETURN_VALUE", lineno=lineno),
                exc,  # except:
                Instr("POP_TOP", lineno=lineno),
                Instr("POP_TOP", lineno=lineno),
                Instr("POP_TOP", lineno=lineno),
                Instr("LOAD_FAST", "__ddgen", lineno=lineno),
                Instr("LOAD_ATTR", "athrow", lineno=lineno),
                Instr("LOAD_CONST", sys.exc_info, lineno=lineno),
                Instr("CALL_FUNCTION", 0, lineno=lineno),
                Instr("CALL_FUNCTION_EX", 0, lineno=lineno),
                Instr("GET_AWAITABLE", lineno=lineno),
                Instr("LOAD_CONST", None, lineno=lineno),
                Instr("YIELD_FROM", lineno=lineno),
                Instr("POP_EXCEPT", lineno=lineno),
                Instr("RETURN_VALUE", lineno=lineno),
            ]

When compiled to concrete Python 3.9 bytecode I get what I would expect (note the correct jump to RERAISE)

             12 DUP_TOP
             14 STORE_FAST               1 (__ddgen)
             16 LOAD_ATTR                0 (asend)
             18 STORE_FAST               2 (__ddgensend)
             20 LOAD_FAST                1 (__ddgen)
             22 LOAD_ATTR                1 (__anext__)
             24 CALL_FUNCTION            0
        >>   26 SETUP_FINALLY           22 (to 50)
             28 GET_AWAITABLE
             30 LOAD_CONST               2 (None)
             32 YIELD_FROM
             34 POP_BLOCK
             36 SETUP_FINALLY           34 (to 72)
             38 YIELD_VALUE
             40 POP_BLOCK
             42 LOAD_FAST                2 (__ddgensend)
             44 ROT_TWO
             46 CALL_FUNCTION            1
             48 JUMP_ABSOLUTE           26
        >>   50 DUP_TOP
             52 LOAD_CONST               3 (<class 'StopAsyncIteration'>)
             54 JUMP_IF_NOT_EXC_MATCH    70
             56 NOP
             58 POP_TOP
             60 POP_TOP
             62 POP_TOP
             64 POP_EXCEPT
             66 LOAD_CONST               2 (None)
             68 RETURN_VALUE
        >>   70 RERAISE
        >>   72 DUP_TOP
             74 LOAD_CONST               4 (<class 'GeneratorExit'>)
             76 JUMP_IF_NOT_EXC_MATCH   102
             78 NOP
             80 POP_TOP
             82 POP_TOP
             84 POP_TOP
             86 LOAD_FAST                1 (__ddgen)
             88 LOAD_ATTR                2 (aclose)
             90 CALL_FUNCTION            0
             92 GET_AWAITABLE
             94 LOAD_CONST               2 (None)
             96 YIELD_FROM
             98 POP_EXCEPT
            100 RETURN_VALUE
        >>  102 POP_TOP
            104 POP_TOP
            106 POP_TOP
            108 LOAD_FAST                1 (__ddgen)
            110 LOAD_ATTR                3 (athrow)
            112 LOAD_CONST               5 (<built-in function exc_info>)
            114 CALL_FUNCTION            0
            116 CALL_FUNCTION_EX         0
            118 GET_AWAITABLE
            120 LOAD_CONST               2 (None)
            122 YIELD_FROM
            124 POP_EXCEPT
            126 RETURN_VALUE
            128 RETURN_VALUE

However, with earlier Python versions the jump to the propagate label is not resolved correctly and actually ends up targeting the exc label (END_FINALLY is now in place of the newer RERASE, but the jump is not there!):

             12 DUP_TOP
             14 STORE_FAST               1 (__ddgen)
             16 LOAD_ATTR                0 (asend)
             18 STORE_FAST               2 (__ddgensend)
             20 LOAD_FAST                1 (__ddgen)
             22 LOAD_ATTR                1 (__anext__)
             24 CALL_FUNCTION            0
        >>   26 SETUP_FINALLY           22 (to 50)
             28 GET_AWAITABLE
             30 LOAD_CONST               2 (None)
             32 YIELD_FROM
             34 POP_BLOCK
             36 SETUP_FINALLY           34 (to 72)
             38 YIELD_VALUE
             40 POP_BLOCK
             42 LOAD_FAST                2 (__ddgensend)
             44 ROT_TWO
             46 CALL_FUNCTION            1
             48 JUMP_ABSOLUTE           26
        >>   50 DUP_TOP
             52 LOAD_CONST               3 (<class 'StopAsyncIteration'>)
             54 COMPARE_OP              10 (exception match)
             56 POP_JUMP_IF_FALSE      102
             58 POP_TOP
             60 POP_TOP
             62 POP_TOP
             64 POP_EXCEPT
             66 LOAD_CONST               2 (None)
             68 RETURN_VALUE
             70 END_FINALLY
        >>   72 DUP_TOP
             74 LOAD_CONST               4 (<class 'GeneratorExit'>)
             76 COMPARE_OP              10 (exception match)
             78 POP_JUMP_IF_FALSE      102
             80 POP_TOP
             82 POP_TOP
             84 POP_TOP
             86 LOAD_FAST                1 (__ddgen)
             88 LOAD_ATTR                2 (aclose)
             90 CALL_FUNCTION            0
             92 GET_AWAITABLE
             94 LOAD_CONST               2 (None)
             96 YIELD_FROM
             98 POP_EXCEPT
            100 RETURN_VALUE
        >>  102 POP_TOP
            104 POP_TOP
            106 POP_TOP
            108 LOAD_FAST                1 (__ddgen)
            110 LOAD_ATTR                3 (athrow)
            112 LOAD_CONST               5 (<built-in function exc_info>)
            114 CALL_FUNCTION            0
            116 CALL_FUNCTION_EX         0
            118 GET_AWAITABLE
            120 LOAD_CONST               2 (None)
            122 YIELD_FROM
            124 POP_EXCEPT
            126 RETURN_VALUE
            128 RETURN_VALUE

This seems to point to a wrong resolution of the branching label.

treating `const_key` of a code object as mutable one

https://github.com/vstinner/bytecode/blob/45d643f6a706ae95586a874ea0fbcb18e797bfa1/bytecode/instr.py#L39

If the code object's constant pool contains mutable data(even lists), the return const key cannot get hashed.
I for one got this:

  File ".../bytecode/concrete.py", line 466, in add_const
    if key in self.consts_indices:
TypeError: unhashable type: 'list'

Get rid of the dependency of aenum

Currently bytecode requires a third-party module aenum. But IntFlag is included in the stdlib's enum module since Python 3.6. aenum is not needed for Python 3.6+.

Can't call function with no arguments (py312)

I wan't to call a function with no arguments, e.g. input(), but I keep getting a RuntimeError saying the stack size is negative. These are the instructions that I have used:

<LOAD_NAME arg='input' location=None>
<CALL arg=0 location=None>
<POP_TOP location=None>
<LOAD_CONST arg=1 location=None>
<RETURN_CONST arg=1 location=None>

Causes:

  File "\venv\Lib\site-packages\bytecode\bytecode.py", line 306, in to_code
    stacksize = cfg.compute_stacksize(
                ^^^^^^^^^^^^^^^^^^^^^^
  File "\venv\Lib\site-packages\bytecode\cfg.py", line 547, in compute_stacksize
    args = coro.send(None)  # type: ignore
           ^^^^^^^^^^^^^^^
  File "\venv\Lib\site-packages\bytecode\cfg.py", line 381, in run
    self._update_size(*effect)
  File "\venv\Lib\site-packages\bytecode\cfg.py", line 419, in _update_size
    size, maxsize, minsize = _update_size(
                             ^^^^^^^^^^^^^
  File "\venv\Lib\site-packages\bytecode\cfg.py", line 159, in _update_size
    raise RuntimeError(msg)
RuntimeError: Failed to compute stacksize, got negative size

However when I call the function with an argument I get the expected output, with the bytecode:

<LOAD_NAME arg='input' location=None>
<LOAD_CONST arg='Some argument' location=None>
<CALL arg=0 location=None>
<POP_TOP location=None>
<LOAD_CONST arg=1 location=None>
<RETURN_CONST arg=1 location=None>

I also looked at the dis module to try and find out how python 3.12 does function calls with no arguments.

  1           0 RESUME                   0


  2           2 LOAD_GLOBAL              1 (NULL + input) # < with no arguments
             12 CALL                     0
             20 POP_TOP

  3          22 LOAD_GLOBAL              1 (NULL + input) # < with arguments
             32 LOAD_CONST               1 ('ok')
             34 CALL                     1
             42 POP_TOP
             44 RETURN_CONST             0 (None)

How could I call input with no arguments?

Python 3.11 support

Now that Python 3.11 is in beta (and should have no new features), are there plans to support Python 3.11?

fails to load very big constant numbers

Hey, I come back again and ask for your help！

When I was loading very big numbers, I got an OverflowError:

bc = Bytecode([Instr('LOAD_CONST', 0xFFFFFFFF), Instr('RETURN_VALUE')])
bc.to_code()
>> OverflowError: Python int too large to convert to C long

Let's avoid performing dis.stack_effect when the instruction is something like LOAD_CONST?

Possible "hysteresis" in bytecode recompilation with 3.12

We've started investigating support for CPython 3.12 in our project that makes use of bytecode and we have observed a potential "hysteresis" in the following test

https://github.com/DataDog/dd-trace-py/blob/db7372d249de118a48b78d64327b9a903a388068/tests/debugging/function/test_store.py#L183-L206

The test is manipulating a bytecode object by adding extra instructions, and then removing them, in different orders. We want to check that we get an equal, albeit not identical, code object. Up until CPython 3.11 the last equality assertion would pass, but with 3.12 it fails. Using the dis module we can confirm that the bytecode content of the two code objects being tested is essentially the same, so the equality check must be failing for some other attribute(s) of the code object

Disassembly of original code object:
  5           0 RESUME                   0

  6           2 LOAD_FAST                0 (snafu)
              4 RETURN_VALUE
Disassembly of new code object:
  5           0 RESUME                   0

  6           2 LOAD_FAST                0 (snafu)
              4 RETURN_VALUE

For completeness, the function is defined as

def modulestuff(snafu):
    return snafu

cannot directly set argnames when creating a new code object from an existed one

Hi, community!
This is such an awesome project that enable people to try something amazing in python, it does help me.

Currently, the following problem just troubled me:

def f(x):
    y = 1
    print(y)

codeobj: types.CodeType = f.__code__
print(codeobj.co_varnames)       # (x, y) 
bc = Bytecode.from_code(codeobj)
bc.to_code() == codeobj  # -> True
new_bc = Bytecode([each for each in bc])
new_code_obj = new_bc.to_code()
new_code_obj == codeobj # False
new_code_obj.co_varnames # (y, )

I cannot correctly set the arguments now if I do not add new_bc.argnames = bc.argnames, for the Bytecode constructor doesn't have any other argument except an instructions.
I wonder if I can rewrite the bytecode of a function and immediately create a new one with a constructor like:

new_bc = Bytecode([each for each in bc], metadata_from=bc)

The new argument metadata_from could make the new code object consistent with the older one

new_code_object.argcount == code_object.argcount,  # True
new_code_object.kwonlyargcount  == code_object.kwonlyargcount, # True
...

Why are extended line offsets -127 and 126, and not -128 and 127?

Regarding this part here, emitting extended line offsets:
https://github.com/vstinner/bytecode/blob/master/bytecode/concrete.py#L289-L295

But CPython uses -128 and 127 as the range, which is the (inclusive) range of a signed byte:
https://github.com/python/cpython/blob/3.9/Python/compile.c#L5638-L5648

Bytecode doesn't properly set linenumbers (in corner case in Python 3.10rc1)

For some context, I use bytecode to set programmatic breakpoints in the pydev debugger and while using it with Python 3.10, I found a corner case where the line isn't being properly mapped back to bytecode.

-- note that it works in Python 3.9 and it also works if the target file is a bit different -- say with less lines or with smaller lines -- so, it seems I got extremely lucky that I have a test case which got into this situation...

I'm attaching a test case which shows the issue. What the test does is load the code for a function (in this case long_lines_example.long_lines) and change the 2nd line of that function to include bytecode which does something as the code below and generates back a code object:

    label = Label()
    return [
        # -- if _pydev_needs_stop_at_break():
        Instr("LOAD_CONST", _pydev_needs_stop_at_break, lineno=stop_at_line),
        Instr("LOAD_CONST", stop_at_line, lineno=stop_at_line),
        Instr("CALL_FUNCTION", 1, lineno=stop_at_line),
        Instr("POP_JUMP_IF_FALSE", label, lineno=stop_at_line),

        #     -- _pydev_stop_at_break()
        #
        # Note that this has line numbers -1 so that when the NOP just below
        # is executed we have a spurious line event.
        Instr("LOAD_CONST", _pydev_stop_at_break, lineno=stop_at_line - 1),
        Instr("LOAD_CONST", stop_at_line, lineno=stop_at_line - 1),
        Instr("CALL_FUNCTION", 1, lineno=stop_at_line - 1),
        Instr("POP_TOP", lineno=stop_at_line - 1),

        # Put NOP in new line so that Python given a line event for the debugger.
        Instr("NOP", lineno=stop_at_line),
        label,
    ]

This works well enough in general, but for this specific use case in Python 3.10 it's not putting the NOP in a new line.

test_bytecode_line_not_correct.zip

Any ideas on what may be wrong there?

Docs: update examples for Python 3.8

cf comment by @serhiy-storchaka

#41 (comment)

`_encode_varint` cannot handle 0

The following exception is thrown when a TryBegin is the very first entry in a Bytecode object

value = 0, set_begin_marker = True

    @staticmethod
    def _encode_varint(value: int, set_begin_marker: bool = False) -> Iterator[int]:
        # Encode value as a varint on 7 bits (MSB should come first) and set
        # the begin marker if requested.
        temp: List[int] = []
        assert value >= 0
        while value:
            temp.append(value & 63 | (64 if temp else 0))
            value >>= 6
        if set_begin_marker:
>           temp[-1] |= 128
E           IndexError: list index out of range

It looks like the _encode_varint helper is failing to handle a value of 0 in this case. In Python 3.11 it is likely that the first opcode is e.g. RESUME, so in real Python code this issue might not occur.

Remove `Compare.EXC_MATCH` on Python>=3.9

Python 3.9 introduced the JUMP_IF_NOT_EXC_MATCH opcode, and the use of Compare.EXC_MATCH produces bad code that results in

TypeError: 'UH��H��]�l�' not supported between instances of 'type' and 'type'

therefore I would like to suggest that the attribute Compare.EXC_MATCH be defined only under the condition sys.version_info[:2] < (3, 9)

Is there any way to get the bytecode offsets along with the line number ?

x = 0

Consider the above code snippet. The dis module outputs the line numbers along with the bytecode offsets.

  1           0 LOAD_CONST               0 (0)
              2 STORE_NAME               0 (x)
              4 LOAD_CONST               1 (None)
              6 RETURN_VALUE

Using the bytecode module I get the following, except the offset=<> part. Is there any way I can get these offsets, similar to the dis module's output?

<LOAD_CONST arg=0 lineno=1 offset=0>
<STORE_NAME arg='x' lineno=1 offset=2>
<LOAD_CONST arg=None lineno=1 offset=4>
<RETURN_VALUE lineno=1 offset=6>>

I modified the bytecode module source to get the above output. If there is no other way except modifying the source, should I submit a PR?

EXTENDED_ARG + NOP Error

When running Pynguin (https://github.com/se2p/pynguin) on certain programs (like https://github.com/bottlepy/bottle) , I am running into this error (python/cpython#89918), but obviously arising from bytecode, not dis.

The trace and exact error changes betweem 0.13/0.14 of bytecode, but the issue stems from the same area.

You may reproduce it with the following modified example (from the above CPytho n error), either as a script or in a REPL:

from types import CodeType
from bytecode import Bytecode

constants = [None] * (0x000129 + 1)
constants[0x000129] = "Hello world!"

code = CodeType(
    0,  # argcount
    0,  # posonlyargcount
    0,  # kwonlyargcount
    0,  # nlocals
    1,  # stacksize
    64,  # flags
    bytes([
        0x90, 0x01,  # EXTENDED_ARG 0x01
        0x09, 0xFF,  # NOP 0xFF
        0x90, 0x01,  # EXTENDED_ARG 0x01
        0x64, 0x29,  # LOAD_CONST 0x29
        0x53, 0x00,  # RETURN_VALUE 0x00
    ]),  # codestring=
    tuple(constants),  # constants
    (),  # names
    (),  # varnames
    '<no file>',  # filename
    'code',  # name
    1,  # firstlineno
    b''  # linetable
)

print("Output:", eval(code))

print(list(Bytecode.from_code(code)))

On 0.14, this will give:

Output: Hello world!
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[3], line 32
      7 code = CodeType(
      8     0,  # argcount
      9     0,  # posonlyargcount
   (...)
     27     b''  # linetable
     28 )
     30 print("Output:", eval(code))
---> 32 print(list(Bytecode.from_code(code)))

File ~/.local/lib/python3.10/site-packages/bytecode/bytecode.py:282, in Bytecode.from_code(code, prune_caches, conserve_exception_block_stackdepth)
    276 @staticmethod
    277 def from_code(
    278     code: types.CodeType,
    279     prune_caches: bool = True,
    280     conserve_exception_block_stackdepth: bool = False,
    281 ) -> "Bytecode":
--> 282     concrete = _bytecode.ConcreteBytecode.from_code(code)
    283     return concrete.to_bytecode(
    284         prune_caches=prune_caches,
    285         conserve_exception_block_stackdepth=conserve_exception_block_stackdepth,
    286     )

File ~/.local/lib/python3.10/site-packages/bytecode/concrete.py:346, in ConcreteBytecode.from_code(code, extended_arg)
    339 # HINT : in some cases Python generate useless EXTENDED_ARG opcode
    340 # with a value of zero. Such opcodes do not increases the size of the
    341 # following opcode the way a normal EXTENDED_ARG does. As a
    342 # consequence, they need to be tracked manually as otherwise the
    343 # offsets in jump targets can end up being wrong.
    344 if not extended_arg:
    345     # The list is modified in place
--> 346     bytecode._remove_extended_args(instructions)
    348 bytecode.name = code.co_name
    349 bytecode.filename = code.co_filename

File ~/.local/lib/python3.10/site-packages/bytecode/concrete.py:749, in ConcreteBytecode._remove_extended_args(instructions)
    746 arg = (extended_arg << 8) + instr.arg
    747 extended_arg = None
--> 749 instr = ConcreteInstr(
    750     instr.name,
    751     arg,
    752     location=instr.location,
    753     extended_args=nb_extended_args,
    754 )
    755 instructions[index] = instr
    756 nb_extended_args = 0

File ~/.local/lib/python3.10/site-packages/bytecode/concrete.py:90, in ConcreteInstr.__init__(self, name, arg, lineno, location, extended_args)
     77 def __init__(
     78     self,
     79     name: str,
   (...)
     87     # Python to properly compute the size and avoid messing up the jump
     88     # targets
     89     self._extended_args = extended_args
---> 90     super().__init__(name, arg, lineno=lineno, location=location)

File ~/.local/lib/python3.10/site-packages/bytecode/instr.py:448, in BaseInstr.__init__(self, name, arg, lineno, location)
    440 def __init__(
    441     self,
    442     name: str,
   (...)
    446     location: Optional[InstrLocation] = None,
    447 ) -> None:
--> 448     self._set(name, arg)
    449     if location:
    450         self._location = location

File ~/.local/lib/python3.10/site-packages/bytecode/concrete.py:111, in ConcreteInstr._set(self, name, arg)
    106 def _set(
    107     self,
    108     name: str,
    109     arg: int,
    110 ) -> None:
--> 111     super()._set(name, arg)
    112     size = 2
    113     if arg is not UNSET:

File ~/.local/lib/python3.10/site-packages/bytecode/instr.py:645, in BaseInstr._set(self, name, arg)
    642 except KeyError:
    643     raise ValueError("invalid operation name")
--> 645 self._check_arg(name, opcode, arg)
    647 self._name = name
    648 self._opcode = opcode

File ~/.local/lib/python3.10/site-packages/bytecode/concrete.py:104, in ConcreteInstr._check_arg(self, name, opcode, arg)
    102 else:
    103     if arg is not UNSET:
--> 104         raise ValueError("operation %s has no argument" % name)

ValueError: operation NOP has no argument

For the reproducer at least, the following addition seems to have worked around the problem:

bytecode/src/bytecode/concrete.py

Lines 741 to 752 in 75948ed

 if extended_arg is not None: 

 arg = (extended_arg << 8) + instr.arg 

 extended_arg = None 

 instr = ConcreteInstr( 

 instr.name, 

 arg, 

 location=instr.location, 

 extended_args=nb_extended_args, 

 ) 

 instructions[index] = instr 

 nb_extended_args = 0

            if extended_arg is not None:
                if instr.name == "NOP":
                    arg = UNSET
                else:
                    arg = (extended_arg << 8) + instr.arg
                 instr = ConcreteInstr( 
                 ...

It would be nice if this could be backported to at least 0.13 as well, which would instead be lines 370 to 372 of concrete.py.

Invalid operation name under python 3.6.0

Hello, I tried a very simple snippet and I get this:

>>> from bytecode import Bytecode
>>> Bytecode.from_code((lambda: x).__code__)
Traceback (most recent call last):
  File "/home/alancristhian/py360/lib/python3.6/site-packages/bytecode/instr.py", line 199, in _set
    opcode = _opcode.opmap[name]
KeyError: '<0>'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/home/alancristhian/py360/lib/python3.6/site-packages/bytecode/bytecode.py", line 117, in from_code
    concrete = _bytecode.ConcreteBytecode.from_code(code)
  File "/home/alancristhian/py360/lib/python3.6/site-packages/bytecode/concrete.py", line 143, in from_code
    instr = ConcreteInstr.disassemble(lineno, code.co_code, offset)
  File "/home/alancristhian/py360/lib/python3.6/site-packages/bytecode/concrete.py", line 91, in disassemble
    return cls(name, arg, lineno=lineno)
  File "/home/alancristhian/py360/lib/python3.6/site-packages/bytecode/concrete.py", line 36, in __init__
    self._set(name, arg, lineno)
  File "/home/alancristhian/py360/lib/python3.6/site-packages/bytecode/concrete.py", line 49, in _set
    super()._set(name, arg, lineno)
  File "/home/alancristhian/py360/lib/python3.6/site-packages/bytecode/instr.py", line 201, in _set
    raise ValueError("invalid operation name")
ValueError: invalid operation name

`LOAD_METHOD` is still a valid opname but doesn't seem to be fully supported

Porting some code to 3.12 and I get

ddtrace/internal/wrapping/__init__.py:520: in wrap
    f.__code__ = code.to_code()
.riot/venv_py3120/lib/python3.12/site-packages/bytecode/bytecode.py:312: in to_code
    bc = self.to_concrete_bytecode(
.riot/venv_py3120/lib/python3.12/site-packages/bytecode/bytecode.py:327: in to_concrete_bytecode
    return converter.to_concrete_bytecode(
.riot/venv_py3120/lib/python3.12/site-packages/bytecode/concrete.py:1401: in to_concrete_bytecode
    self.concrete_instructions()
.riot/venv_py3120/lib/python3.12/site-packages/bytecode/concrete.py:1275: in concrete_instructions
    self.required_caches = c_instr.use_cache_opcodes()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <LOAD_METHOD arg=1 location=InstrLocation(lineno=105, end_lineno=None, col_offset=None, end_col_offset=None)>

    def use_cache_opcodes(self) -> int:
        print(self._opcode)
        return (
            # Not supposed to be used but we need it
>           dis._inline_cache_entries[self._opcode]  # type: ignore
            if sys.version_info >= (3, 11)
            else 0
        )
E       IndexError: list index out of range

.riot/venv_py3120/lib/python3.12/site-packages/bytecode/concrete.py:183: IndexError

so it looks like the LOAD_METHOD opcode is not supported by 0.15.0. However, LOAD_ATTR should provide a workaround by setting the first argument of the tuple to True.

Support for Python 2

Hi,

I would be interested in adding support for Python 2 bytecode to be able to use bytecode in a project that must retain 2/3compatibility and for which the better Python 3 support provided by bytecode would be interesting.

Would you merge such a PR and do you have any advice as to how structure the code ?

Thanks

Travis fails (pep8) on master

Due to this line not having a second newline in setup.py

Using special optimization to get rid of recursion limitations when compiling really huge code

The problem got raised in some to-python compiler, if you need I can provide more details.

In that case, a generated file sometimes can be larger than 2 MB, with a function containing thousands of basic blocks. I failed at using bytecode even I had done things like sys.setrecursionlimit(5000).

The recursion exception is raised at
https://github.com/vstinner/bytecode/blob/df5aa489eff54f04e7f6edb1abf9b448351021ea/bytecode/cfg.py#L89-L99

Currently I workaround it with an evil patch, which modifies the global member bytecode.cfg._compute_stack_size, and finally migrate the use of function stacks to the use of a Python list and make things work.

Could I merge this optimization to bytecode library?
At least as an option, to support compiling very large bytecode.

Running pytest segfaults with Python 3.12 on armhf

This is a very strange one, and might not be the fault of bytecode:

(sid_armhf-dchroot)jdg@abel:~/python-bytecode-0.15.1$ python3.12 -m pytest tests
============================= test session starts ==============================
platform linux -- Python 3.12.1, pytest-7.4.3, pluggy-1.3.0
rootdir: /home/jdg/python-bytecode-0.15.1
configfile: pyproject.toml
collected 165 items                                                            

tests/test_bytecode.py ...s...s......Fatal Python error: Segmentation fault

Current thread 0xb6d47020 (most recent call first):
  File "/usr/lib/python3/dist-packages/bytecode/cfg.py", line 744 in from_bytecode
  File "/usr/lib/python3/dist-packages/bytecode/bytecode.py", line 305 in to_code
  File "/home/jdg/python-bytecode-0.15.1/tests/test_bytecode.py", line 493 in test_negative_size_binary_with_disable_check_of_pre_and_post
  File "/usr/lib/python3.12/unittest/case.py", line 589 in _callTestMethod
  File "/usr/lib/python3.12/unittest/case.py", line 636 in run
  File "/usr/lib/python3.12/unittest/case.py", line 692 in __call__
  File "/usr/lib/python3/dist-packages/_pytest/unittest.py", line 333 in runtest
  File "/usr/lib/python3/dist-packages/_pytest/runner.py", line 169 in pytest_runtest_call
  File "/usr/lib/python3/dist-packages/pluggy/_callers.py", line 77 in _multicall
  File "/usr/lib/python3/dist-packages/pluggy/_manager.py", line 115 in _hookexec
  File "/usr/lib/python3/dist-packages/pluggy/_hooks.py", line 493 in __call__
  File "/usr/lib/python3/dist-packages/_pytest/runner.py", line 262 in <lambda>
  File "/usr/lib/python3/dist-packages/_pytest/runner.py", line 341 in from_call
  File "/usr/lib/python3/dist-packages/_pytest/runner.py", line 261 in call_runtest_hook
  File "/usr/lib/python3/dist-packages/_pytest/runner.py", line 222 in call_and_report
  File "/usr/lib/python3/dist-packages/_pytest/runner.py", line 133 in runtestprotocol
  File "/usr/lib/python3/dist-packages/_pytest/runner.py", line 114 in pytest_runtest_protocol
  File "/usr/lib/python3/dist-packages/pluggy/_callers.py", line 77 in _multicall
  File "/usr/lib/python3/dist-packages/pluggy/_manager.py", line 115 in _hookexec
  File "/usr/lib/python3/dist-packages/pluggy/_hooks.py", line 493 in __call__
  File "/usr/lib/python3/dist-packages/_pytest/main.py", line 350 in pytest_runtestloop
  File "/usr/lib/python3/dist-packages/pluggy/_callers.py", line 77 in _multicall
  File "/usr/lib/python3/dist-packages/pluggy/_manager.py", line 115 in _hookexec
  File "/usr/lib/python3/dist-packages/pluggy/_hooks.py", line 493 in __call__
  File "/usr/lib/python3/dist-packages/_pytest/main.py", line 325 in _main
  File "/usr/lib/python3/dist-packages/_pytest/main.py", line 271 in wrap_session
  File "/usr/lib/python3/dist-packages/_pytest/main.py", line 318 in pytest_cmdline_main
  File "/usr/lib/python3/dist-packages/pluggy/_callers.py", line 77 in _multicall
  File "/usr/lib/python3/dist-packages/pluggy/_manager.py", line 115 in _hookexec
  File "/usr/lib/python3/dist-packages/pluggy/_hooks.py", line 493 in __call__
  File "/usr/lib/python3/dist-packages/_pytest/config/__init__.py", line 169 in main
  File "/usr/lib/python3/dist-packages/_pytest/config/__init__.py", line 192 in console_main
  File "/usr/lib/python3/dist-packages/pytest/__main__.py", line 5 in <module>
  File "<frozen runpy>", line 88 in _run_code
  File "<frozen runpy>", line 198 in _run_module_as_main
Segmentation fault

It seems to run fine on other architectures, so I don't know what's up here.

Update pre_and_post_stack_effect

It appears that a number of new (some old) opcodes have not been included in the calculations of pre_and_post_stack_effect which weakens the guarantee we make on the absence of segfault from recompiled bytecode. The relevant opcodes are:

DUP_TOP_TWO (covered with DUP_TOP)
WITH_EXCEPT_START
COPY_DICT_WITHOUT_KEYS
MATCH_*

Handled as push back opcodes

Ideally we should try to come up with a way to test for this but I am not sure it is possible. Help is welcome.

read .pyc

Does it support reading .pyc files? Get all bytecode instructions in .pyc by similar to Bytecode.from_code(code_obj)

Python 3.6 compatibility

I'd like to know if there is any plan to make bytecode compatible with Python 3.6.

The main reason it's important to me is that I'm using this package in my PyScanPrev project.

`ValueError: ('TryBegin target must a BasicBlock, got %s', 'Label')` when using `Label` with `TryBegin`

I am trying to use a TryBegin with a Label. According to the documentation, the target argument can either be a Label or a BasicBlock. However, when using a Label I get

ValueError: ('TryBegin target must a BasicBlock, got %s', 'Label')

This is with bytecode 0.15.0.

Question: how stable is it to round-trip code?

I'm experimenting on using bytecode to add programmatic breakpoints in pydevd (https://github.com/fabioz/PyDev.Debugger/).

The use case is getting the existing bytecode, adding some code to activate the pydevd breakpoint and then save it back (I'm trying to migrate from the existing code which does that but fails on some corner cases).

i.e.: something as:

b = bytecode.Bytecode.from_code(code_to_modify)
# modify to add new instructions at breakpoints ... something as:
b.insert(i, Instr("LOAD_GLOBAL", '_pydev_stop_at_break'))
b.insert(i + 1, Instr("LOAD_CONST", stop_at_line))
b.insert(i + 2, Instr("CALL_FUNCTION", 1))
b.insert(i + 3, Instr("POP_TOP"))
new_code = b.to_code()

On my experiments it seems to be working well, but I was wondering if you know of any corner case where doing so would not be safe or if something else would need to be taken into account for such a round-trip to work.

p.s.: Sorry for using the tracker to ask a question, I wasn't sure what was the appropriate channel here.

Update jump address handling for 3.10

Python 3.10.a7 moved away from encoding offset in term of bytes and is now encoding them in term of instructions

Re-implement bytecode on top of codetype

Using the builtin code type can guard against future changes to the bytecode format. This is from an SO question:

MyCode= CodeType(
        0,
        0,
        0,
        3,
        64,
        bytes([101, 0, 0,    #Load print function
               101, 1, 0,    #Load name 'a'
               101, 2, 0,    #Load name 'b'
               23,           #Take first two stack elements and store their sum
               131, 1, 0,    #Call first element in the stack with one positional argument
               1,            #Pop top of stack
               101, 0, 0,    #Load print function
               101, 1, 0,    #Load name 'a'
               101, 2, 0,    #Load name 'b'
               20,           #Take first two stack elements and store their product
               131, 1, 0,    #Call first element in the stack with one positional argument
               1,            #Pop top of stack
               100, 0, 0,    #Load constant None
               83]),         #Return top of stack
        (None,),
        ('print', 'a', 'b'),
        (),
        'PersonalCodeObject',
        'MyCode',
        1,
        bytes([14,1]),
        (),
        () )

Support general constants

Hello,
I've found that the code object could not take unhashable object as its co_consts, for you use a dictionary to store the constants.
I think that we may not follow the implementation of _PyCode_ConstantKey for further usage. Actually my project needs making more kinds of constants than CPython already has.
https://github.com/vstinner/bytecode/blob/a7cc7a52ca10e58a8aee48052edd069fac3c5a01/bytecode/concrete.py#L439

    self.consts = {}
    ...
    def add_const(self, value):
        key = const_key(value)
        if key in self.consts:
            return self.consts[key]
        index = len(self.consts)
        self.consts[key] = index
        return index

Could we add a compiler flag to support unhashable constants?
Just consider using associate list (List[Tuple[K, V]]) to store unhashable ones.

except Exception as e fails on Python 3.11

import textwrap
from bytecode import Bytecode, ControlFlowGraph


source = '''
try:
    pass
except Exception as e:
    pass
'''
source = textwrap.dedent(source).strip()
code = compile(source, '<string>', 'exec')
bytecode = Bytecode.from_code(code)
cfg = ControlFlowGraph.from_bytecode(bytecode)
cfg.to_bytecode()

Gives the following traceback:

Traceback (most recent call last):
  File "c:\Users\lbhb\projects\psi-nafc\src\test_bytecode.py", line 15, in <module>
    cfg.to_bytecode()
  File "c:\Users\lbhb\anaconda3\envs\psi-nafc\Lib\site-packages\bytecode\cfg.py", line 992, in to_bytecode
    byt_te.entry.stack_depth = min(
                               ^^^^
TypeError: '<' not supported between instances of '_UNSET' and '_UNSET'

Errors in compute stacksize on 3.10

People have been seeing a few errors on python 3.10 with inkcut

  File "/home/himbeere/.local/lib/python3.10/site-packages/inkcut/core/plugin.py", line 80, in start_default_workspace
    ui.select_workspace('inkcut.workspace')
  File "/home/himbeere/.local/lib/python3.10/site-packages/enaml/workbench/ui/ui_plugin.py", line 157, in select_workspace
    new_workspace.start()
  File "/home/himbeere/.local/lib/python3.10/site-packages/inkcut/ui/workspace.py", line 46, in start
    self.workbench.get_plugin('inkcut.ui')
  File "/home/himbeere/.local/lib/python3.10/site-packages/enaml/workbench/workbench.py", line 151, in get_plugin
    plugin.start()
  File "/home/himbeere/.local/lib/python3.10/site-packages/inkcut/ui/plugin.py", line 91, in start
    self._refresh_dock_items()
  File "/home/himbeere/.local/lib/python3.10/site-packages/inkcut/ui/plugin.py", line 194, in _refresh_dock_items
    DockItem = declaration.factory()
  File "/home/himbeere/.local/lib/python3.10/site-packages/inkcut/preview/manifest.enaml", line 30, in preview_factory
    from .view import PreviewDockItem
  File "<frozen importlib._bootstrap>", line 1027, in _find_and_load
  File "<frozen importlib._bootstrap>", line 1006, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 688, in _load_unlocked
  File "/home/himbeere/.local/lib/python3.10/site-packages/enaml/core/import_hooks.py", line 140, in exec_module
    code, _ = self.get_code()
  File "/home/himbeere/.local/lib/python3.10/site-packages/enaml/core/import_hooks.py", line 401, in get_code
    return self.compile_code()
  File "/home/himbeere/.local/lib/python3.10/site-packages/enaml/core/import_hooks.py", line 366, in compile_code
    code = EnamlCompiler.compile(ast, file_info.src_path)
  File "/home/himbeere/.local/lib/python3.10/site-packages/enaml/core/enaml_compiler.py", line 178, in compile
    return compiler.visit(node)
  File "/home/himbeere/.local/lib/python3.10/site-packages/enaml/core/enaml_ast.py", line 359, in visit
    result = visitor(node, *args, **kwargs)
  File "/home/himbeere/.local/lib/python3.10/site-packages/enaml/core/enaml_compiler.py", line 206, in visit_Module
    return cg.to_code()
  File "/home/himbeere/.local/lib/python3.10/site-packages/enaml/core/code_generator.py", line 119, in to_code
    return bc_code.to_code()
  File "/home/himbeere/.local/lib/python3.10/site-packages/bytecode/bytecode.py", line 204, in to_code
    stacksize = self.compute_stacksize(check_pre_and_post=check_pre_and_post)
  File "/home/himbeere/.local/lib/python3.10/site-packages/bytecode/bytecode.py", line 195, in compute_stacksize
    cfg = _bytecode.ControlFlowGraph.from_bytecode(self)
  File "/home/himbeere/.local/lib/python3.10/site-packages/bytecode/cfg.py", line 391, in from_bytecode
    if not block[-1].is_final():
  File "/home/himbeere/.local/lib/python3.10/site-packages/bytecode/cfg.py", line 44, in __getitem__
    value = super().__getitem__(index)
IndexError: list index out of range

See inkcut/inkcut#320

Missing tag on 0.12.0 release

The commit 826344e seems to be missing a tag. It would be handy to have this tagged like other release commits.

Discussion : Improving handling of co_flags

Currently the flags for a code object need to be specified manually as a single integer. This approach offers the maximum flexibility but is also error prone as the flags and could I believe be improved. Follow are the 'official' flags (ie excluding future flags such as CO_FUTURE_BARRY_AS_BDFL and CO_FUTURE_GENERATOR_STOP).

From dis.COMPILER_FLAG_NAMES

 1 OPTIMIZED
  2 NEWLOCALS
  4 VARARGS
  8 VARKEYWORDS
 16 NESTED
 32 GENERATOR
 64 NOFREE
128 COROUTINE
256 ITERABLE_COROUTINE

Among those we can identify kind of three families (please correct me if I got this wrong):

the flags completely independent of the underlying code : NEWLOCALS, VARARGS, VARKEYWORDS, ITERABLE_COROUTINE
the flags completely dependent on the underlying code: OPTIMIZED, NO_FREE, GENERATOR
and in between flags:
- NESTED: apply only to function like code defined in another function (and honestly I have difficulty understanding what it does...)
- COROUTINE: can be obvious if GET_AWAITABLE is used but it is not always so

Furthermore COROUTINE and ITERABLE_COROUTINE are incompatible.

I hence believe that it would be profitable to be able to:

specify manually the value for the first kind through a higher level construct
have the proper value be computed in an automatic fashion for the second kind
be able to specify if the code is nested (byteplay does this by passing a keyword arg to to_code) and if the code is from a function (byteplay only guess here).
be able to force the coroutine behavior or have it inferred.

Looking at how byteplay handles this, there is a number of attribute on the code object itself allowing to specify the flags (and the from_function keyword arg in to_code). Moreover generator behavior can be forced.

I do not have a specific implementation in mind but I think that keeping the flag logic in a separate class differentiating between default values (from the original code or guessed) and forced user value may help. The conversion to an int would obsiously require the code.

What do people think ?

Stack size computation issue

For example:

This will cause a stack underflow, but no error was given until the segfault.
Expecting something like:

Stack size with EXTENDED_ARG

Hi,
I stumbled upon a problem with the stack size computation in combination with the EXTENDED_ARG instruction. Take for example this code snipped:

p = [1, 2, 3, 4, 5, 6]
q, r, *s, t = p
print(q, r, s, t)

>>> 1 2 [3, 4, 5] 6

The disassembly looks like this:

1           0 LOAD_CONST               1 (1)
            2 LOAD_CONST               2 (2)
            4 LOAD_CONST               3 (3)
            6 LOAD_CONST               4 (4)
            8 LOAD_CONST               5 (5)
            10 LOAD_CONST              6 (6)
            12 BUILD_LIST              6
            14 STORE_FAST              1 (p)
                  
2          16 LOAD_FAST                1 (p)
           18 EXTENDED_ARG             1
           20 UNPACK_EX              258
           22 STORE_FAST               2 (q)
           24 STORE_FAST               3 (r)
           26 STORE_FAST               4 (s)
           28 STORE_FAST               5 (t)
                  
3          30 LOAD_GLOBAL              0 (print)
           32 LOAD_FAST                2 (q)
           34 LOAD_FAST                3 (r)
           36 LOAD_FAST                4 (s)
           38 LOAD_FAST                5 (t)
           40 CALL_FUNCTION            4
           42 POP_TOP
           44 LOAD_CONST               0 (None)
           46 RETURN_VALUE

Now generating the bytecode from this code and converting back to a code object fails due to wrong stack size computation, when the bytecode is generated with the parameter to include extended arguments here set to True (I specifically need this behavior for the bytecode to match the disassembly).

The problem seems to be the use of the EXTENDED_ARG instruction, which is not handled correctly. The stack size computation treats UNPACK_EX as a single instruction and takes into account only the lower byte (corresponding to the number of variables before the list value, i.e. variables q, r), but not the higher byte provided by EXTENDED_ARG (corresponding to the number of values after the list value, i.e. variable t). This leads to a negative stack size.

Is this a known problem and is there a solution or a workaround for this, except just excluding the EXTENDED_ARG instruction?

(For reference: UNPACK_EX documentation, stack effect of UNPACK_EX)

EDIT: Ok, an easy workaround would be to generate two bytecode objects, one without and one with the parameter for extended arguments. I can generate a code object from the one without EXTENDED_ARG and keep the other one for reference and comparison with the disassembly. Nevertheless, I don't think that is the nicest option here.

	if extended_arg is not None:
	arg = (extended_arg << 8) + instr.arg
	extended_arg = None

	instr = ConcreteInstr(
	instr.name,
	arg,
	location=instr.location,
	extended_args=nb_extended_args,
	)
	instructions[index] = instr
	nb_extended_args = 0

matthieudartiailh / bytecode Goto Github PK

bytecode's People

Contributors

Stargazers

Watchers

Forkers

bytecode's Issues

Recommend Projects

Recommend Topics

Recommend Org