1#!/usr/bin/env python3
2#
3# This file is part of the MicroPython project, http://micropython.org/
4#
5# The MIT License (MIT)
6#
7# Copyright (c) 2016-2019 Damien P. George
8#
9# Permission is hereby granted, free of charge, to any person obtaining a copy
10# of this software and associated documentation files (the "Software"), to deal
11# in the Software without restriction, including without limitation the rights
12# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the Software is
14# furnished to do so, subject to the following conditions:
15#
16# The above copyright notice and this permission notice shall be included in
17# all copies or substantial portions of the Software.
18#
19# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25# THE SOFTWARE.
26
27# Python 2/3 compatibility code
28from __future__ import print_function
29import platform
30
31if platform.python_version_tuple()[0] == "2":
32    str_cons = lambda val, enc=None: val
33    bytes_cons = lambda val, enc=None: bytearray(val)
34    is_str_type = lambda o: type(o) is str
35    is_bytes_type = lambda o: type(o) is bytearray
36    is_int_type = lambda o: type(o) is int or type(o) is long
37else:
38    str_cons = str
39    bytes_cons = bytes
40    is_str_type = lambda o: type(o) is str
41    is_bytes_type = lambda o: type(o) is bytes
42    is_int_type = lambda o: type(o) is int
43# end compatibility code
44
45import sys
46import struct
47from collections import namedtuple
48
49sys.path.append(sys.path[0] + "/../py")
50import makeqstrdata as qstrutil
51
52
53class FreezeError(Exception):
54    def __init__(self, rawcode, msg):
55        self.rawcode = rawcode
56        self.msg = msg
57
58    def __str__(self):
59        return "error while freezing %s: %s" % (self.rawcode.source_file, self.msg)
60
61
62class Config:
63    MPY_VERSION = 5
64    MICROPY_LONGINT_IMPL_NONE = 0
65    MICROPY_LONGINT_IMPL_LONGLONG = 1
66    MICROPY_LONGINT_IMPL_MPZ = 2
67
68
69config = Config()
70
71
72class QStrType:
73    def __init__(self, str):
74        self.str = str
75        self.qstr_esc = qstrutil.qstr_escape(self.str)
76        self.qstr_id = "MP_QSTR_" + self.qstr_esc
77
78
79# Initialise global list of qstrs with static qstrs
80global_qstrs = [None]  # MP_QSTRnull should never be referenced
81for n in qstrutil.static_qstr_list:
82    global_qstrs.append(QStrType(n))
83
84
85class QStrWindow:
86    def __init__(self, size):
87        self.window = []
88        self.size = size
89
90    def push(self, val):
91        self.window = [val] + self.window[: self.size - 1]
92
93    def access(self, idx):
94        val = self.window[idx]
95        self.window = [val] + self.window[:idx] + self.window[idx + 1 :]
96        return val
97
98
99MP_CODE_BYTECODE = 2
100MP_CODE_NATIVE_PY = 3
101MP_CODE_NATIVE_VIPER = 4
102MP_CODE_NATIVE_ASM = 5
103
104MP_NATIVE_ARCH_NONE = 0
105MP_NATIVE_ARCH_X86 = 1
106MP_NATIVE_ARCH_X64 = 2
107MP_NATIVE_ARCH_ARMV6 = 3
108MP_NATIVE_ARCH_ARMV6M = 4
109MP_NATIVE_ARCH_ARMV7M = 5
110MP_NATIVE_ARCH_ARMV7EM = 6
111MP_NATIVE_ARCH_ARMV7EMSP = 7
112MP_NATIVE_ARCH_ARMV7EMDP = 8
113MP_NATIVE_ARCH_XTENSA = 9
114MP_NATIVE_ARCH_XTENSAWIN = 10
115
116MP_BC_MASK_EXTRA_BYTE = 0x9E
117
118MP_BC_FORMAT_BYTE = 0
119MP_BC_FORMAT_QSTR = 1
120MP_BC_FORMAT_VAR_UINT = 2
121MP_BC_FORMAT_OFFSET = 3
122
123# extra byte if caching enabled:
124MP_BC_LOAD_NAME = 0x11
125MP_BC_LOAD_GLOBAL = 0x12
126MP_BC_LOAD_ATTR = 0x13
127MP_BC_STORE_ATTR = 0x18
128
129# this function mirrors that in py/bc.c
130def mp_opcode_format(bytecode, ip, count_var_uint):
131    opcode = bytecode[ip]
132    ip_start = ip
133    f = (0x000003A4 >> (2 * ((opcode) >> 4))) & 3
134    if f == MP_BC_FORMAT_QSTR:
135        if config.MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE:
136            if (
137                opcode == MP_BC_LOAD_NAME
138                or opcode == MP_BC_LOAD_GLOBAL
139                or opcode == MP_BC_LOAD_ATTR
140                or opcode == MP_BC_STORE_ATTR
141            ):
142                ip += 1
143        ip += 3
144    else:
145        extra_byte = (opcode & MP_BC_MASK_EXTRA_BYTE) == 0
146        ip += 1
147        if f == MP_BC_FORMAT_VAR_UINT:
148            if count_var_uint:
149                while bytecode[ip] & 0x80 != 0:
150                    ip += 1
151                ip += 1
152        elif f == MP_BC_FORMAT_OFFSET:
153            ip += 2
154        ip += extra_byte
155    return f, ip - ip_start
156
157
158def read_prelude_sig(read_byte):
159    z = read_byte()
160    # xSSSSEAA
161    S = (z >> 3) & 0xF
162    E = (z >> 2) & 0x1
163    F = 0
164    A = z & 0x3
165    K = 0
166    D = 0
167    n = 0
168    while z & 0x80:
169        z = read_byte()
170        # xFSSKAED
171        S |= (z & 0x30) << (2 * n)
172        E |= (z & 0x02) << n
173        F |= ((z & 0x40) >> 6) << n
174        A |= (z & 0x4) << n
175        K |= ((z & 0x08) >> 3) << n
176        D |= (z & 0x1) << n
177        n += 1
178    S += 1
179    return S, E, F, A, K, D
180
181
182def read_prelude_size(read_byte):
183    I = 0
184    C = 0
185    n = 0
186    while True:
187        z = read_byte()
188        # xIIIIIIC
189        I |= ((z & 0x7E) >> 1) << (6 * n)
190        C |= (z & 1) << n
191        if not (z & 0x80):
192            break
193        n += 1
194    return I, C
195
196
197def extract_prelude(bytecode, ip):
198    def local_read_byte():
199        b = bytecode[ip_ref[0]]
200        ip_ref[0] += 1
201        return b
202
203    ip_ref = [ip]  # to close over ip in Python 2 and 3
204    (
205        n_state,
206        n_exc_stack,
207        scope_flags,
208        n_pos_args,
209        n_kwonly_args,
210        n_def_pos_args,
211    ) = read_prelude_sig(local_read_byte)
212    n_info, n_cell = read_prelude_size(local_read_byte)
213    ip = ip_ref[0]
214
215    ip2 = ip
216    ip = ip2 + n_info + n_cell
217    # ip now points to first opcode
218    # ip2 points to simple_name qstr
219    return ip, ip2, (n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args)
220
221
222class MPFunTable:
223    pass
224
225
226class RawCode(object):
227    # a set of all escaped names, to make sure they are unique
228    escaped_names = set()
229
230    # convert code kind number to string
231    code_kind_str = {
232        MP_CODE_BYTECODE: "MP_CODE_BYTECODE",
233        MP_CODE_NATIVE_PY: "MP_CODE_NATIVE_PY",
234        MP_CODE_NATIVE_VIPER: "MP_CODE_NATIVE_VIPER",
235        MP_CODE_NATIVE_ASM: "MP_CODE_NATIVE_ASM",
236    }
237
238    def __init__(self, code_kind, bytecode, prelude_offset, qstrs, objs, raw_codes):
239        # set core variables
240        self.code_kind = code_kind
241        self.bytecode = bytecode
242        self.prelude_offset = prelude_offset
243        self.qstrs = qstrs
244        self.objs = objs
245        self.raw_codes = raw_codes
246
247        if self.prelude_offset is None:
248            # no prelude, assign a dummy simple_name
249            self.prelude_offset = 0
250            self.simple_name = global_qstrs[1]
251        else:
252            # extract prelude
253            self.ip, self.ip2, self.prelude = extract_prelude(self.bytecode, self.prelude_offset)
254            self.simple_name = self._unpack_qstr(self.ip2)
255            self.source_file = self._unpack_qstr(self.ip2 + 2)
256            self.line_info_offset = self.ip2 + 4
257
258    def _unpack_qstr(self, ip):
259        qst = self.bytecode[ip] | self.bytecode[ip + 1] << 8
260        return global_qstrs[qst]
261
262    def dump(self):
263        # dump children first
264        for rc in self.raw_codes:
265            rc.freeze("")
266        # TODO
267
268    def freeze_children(self, parent_name):
269        self.escaped_name = parent_name + self.simple_name.qstr_esc
270
271        # make sure the escaped name is unique
272        i = 2
273        while self.escaped_name in RawCode.escaped_names:
274            self.escaped_name = parent_name + self.simple_name.qstr_esc + str(i)
275            i += 1
276        RawCode.escaped_names.add(self.escaped_name)
277
278        # emit children first
279        for rc in self.raw_codes:
280            rc.freeze(self.escaped_name + "_")
281
282    def freeze_constants(self):
283        # generate constant objects
284        for i, obj in enumerate(self.objs):
285            obj_name = "const_obj_%s_%u" % (self.escaped_name, i)
286            if obj is MPFunTable:
287                pass
288            elif obj is Ellipsis:
289                print("#define %s mp_const_ellipsis_obj" % obj_name)
290            elif is_str_type(obj) or is_bytes_type(obj):
291                if is_str_type(obj):
292                    obj = bytes_cons(obj, "utf8")
293                    obj_type = "mp_type_str"
294                else:
295                    obj_type = "mp_type_bytes"
296                print(
297                    'STATIC const mp_obj_str_t %s = {{&%s}, %u, %u, (const byte*)"%s"};'
298                    % (
299                        obj_name,
300                        obj_type,
301                        qstrutil.compute_hash(obj, config.MICROPY_QSTR_BYTES_IN_HASH),
302                        len(obj),
303                        "".join(("\\x%02x" % b) for b in obj),
304                    )
305                )
306            elif is_int_type(obj):
307                if config.MICROPY_LONGINT_IMPL == config.MICROPY_LONGINT_IMPL_NONE:
308                    # TODO check if we can actually fit this long-int into a small-int
309                    raise FreezeError(self, "target does not support long int")
310                elif config.MICROPY_LONGINT_IMPL == config.MICROPY_LONGINT_IMPL_LONGLONG:
311                    # TODO
312                    raise FreezeError(self, "freezing int to long-long is not implemented")
313                elif config.MICROPY_LONGINT_IMPL == config.MICROPY_LONGINT_IMPL_MPZ:
314                    neg = 0
315                    if obj < 0:
316                        obj = -obj
317                        neg = 1
318                    bits_per_dig = config.MPZ_DIG_SIZE
319                    digs = []
320                    z = obj
321                    while z:
322                        digs.append(z & ((1 << bits_per_dig) - 1))
323                        z >>= bits_per_dig
324                    ndigs = len(digs)
325                    digs = ",".join(("%#x" % d) for d in digs)
326                    print(
327                        "STATIC const mp_obj_int_t %s = {{&mp_type_int}, "
328                        "{.neg=%u, .fixed_dig=1, .alloc=%u, .len=%u, .dig=(uint%u_t*)(const uint%u_t[]){%s}}};"
329                        % (obj_name, neg, ndigs, ndigs, bits_per_dig, bits_per_dig, digs)
330                    )
331            elif type(obj) is float:
332                print(
333                    "#if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_A || MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_B"
334                )
335                print(
336                    "STATIC const mp_obj_float_t %s = {{&mp_type_float}, (mp_float_t)%.16g};"
337                    % (obj_name, obj)
338                )
339                print("#endif")
340            elif type(obj) is complex:
341                print(
342                    "STATIC const mp_obj_complex_t %s = {{&mp_type_complex}, (mp_float_t)%.16g, (mp_float_t)%.16g};"
343                    % (obj_name, obj.real, obj.imag)
344                )
345            else:
346                raise FreezeError(self, "freezing of object %r is not implemented" % (obj,))
347
348        # generate constant table, if it has any entries
349        const_table_len = len(self.qstrs) + len(self.objs) + len(self.raw_codes)
350        if const_table_len:
351            print(
352                "STATIC const mp_rom_obj_t const_table_data_%s[%u] = {"
353                % (self.escaped_name, const_table_len)
354            )
355            for qst in self.qstrs:
356                print("    MP_ROM_QSTR(%s)," % global_qstrs[qst].qstr_id)
357            for i in range(len(self.objs)):
358                if self.objs[i] is MPFunTable:
359                    print("    &mp_fun_table,")
360                elif type(self.objs[i]) is float:
361                    print(
362                        "#if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_A || MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_B"
363                    )
364                    print("    MP_ROM_PTR(&const_obj_%s_%u)," % (self.escaped_name, i))
365                    print("#elif MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_C")
366                    n = struct.unpack("<I", struct.pack("<f", self.objs[i]))[0]
367                    n = ((n & ~0x3) | 2) + 0x80800000
368                    print("    (mp_rom_obj_t)(0x%08x)," % (n,))
369                    print("#elif MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D")
370                    n = struct.unpack("<Q", struct.pack("<d", self.objs[i]))[0]
371                    n += 0x8004000000000000
372                    print("    (mp_rom_obj_t)(0x%016x)," % (n,))
373                    print("#endif")
374                else:
375                    print("    MP_ROM_PTR(&const_obj_%s_%u)," % (self.escaped_name, i))
376            for rc in self.raw_codes:
377                print("    MP_ROM_PTR(&raw_code_%s)," % rc.escaped_name)
378            print("};")
379
380    def freeze_module(self, qstr_links=(), type_sig=0):
381        # generate module
382        if self.simple_name.str != "<module>":
383            print("STATIC ", end="")
384        print("const mp_raw_code_t raw_code_%s = {" % self.escaped_name)
385        print("    .kind = %s," % RawCode.code_kind_str[self.code_kind])
386        print("    .scope_flags = 0x%02x," % self.prelude[2])
387        print("    .n_pos_args = %u," % self.prelude[3])
388        print("    .fun_data = fun_data_%s," % self.escaped_name)
389        if len(self.qstrs) + len(self.objs) + len(self.raw_codes):
390            print("    .const_table = (mp_uint_t*)const_table_data_%s," % self.escaped_name)
391        else:
392            print("    .const_table = NULL,")
393        print("    #if MICROPY_PERSISTENT_CODE_SAVE")
394        print("    .fun_data_len = %u," % len(self.bytecode))
395        print("    .n_obj = %u," % len(self.objs))
396        print("    .n_raw_code = %u," % len(self.raw_codes))
397        if self.code_kind == MP_CODE_BYTECODE:
398            print("    #if MICROPY_PY_SYS_SETTRACE")
399            print("    .prelude = {")
400            print("        .n_state = %u," % self.prelude[0])
401            print("        .n_exc_stack = %u," % self.prelude[1])
402            print("        .scope_flags = %u," % self.prelude[2])
403            print("        .n_pos_args = %u," % self.prelude[3])
404            print("        .n_kwonly_args = %u," % self.prelude[4])
405            print("        .n_def_pos_args = %u," % self.prelude[5])
406            print("        .qstr_block_name = %s," % self.simple_name.qstr_id)
407            print("        .qstr_source_file = %s," % self.source_file.qstr_id)
408            print(
409                "        .line_info = fun_data_%s + %u,"
410                % (self.escaped_name, self.line_info_offset)
411            )
412            print("        .opcodes = fun_data_%s + %u," % (self.escaped_name, self.ip))
413            print("    },")
414            print("    .line_of_definition = %u," % 0)  # TODO
415            print("    #endif")
416        print("    #if MICROPY_EMIT_MACHINE_CODE")
417        print("    .prelude_offset = %u," % self.prelude_offset)
418        print("    .n_qstr = %u," % len(qstr_links))
419        print("    .qstr_link = NULL,")  # TODO
420        print("    #endif")
421        print("    #endif")
422        print("    #if MICROPY_EMIT_MACHINE_CODE")
423        print("    .type_sig = %u," % type_sig)
424        print("    #endif")
425        print("};")
426
427
428class RawCodeBytecode(RawCode):
429    def __init__(self, bytecode, qstrs, objs, raw_codes):
430        super(RawCodeBytecode, self).__init__(
431            MP_CODE_BYTECODE, bytecode, 0, qstrs, objs, raw_codes
432        )
433
434    def freeze(self, parent_name):
435        self.freeze_children(parent_name)
436
437        # generate bytecode data
438        print()
439        print(
440            "// frozen bytecode for file %s, scope %s%s"
441            % (self.source_file.str, parent_name, self.simple_name.str)
442        )
443        print("STATIC ", end="")
444        if not config.MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE:
445            print("const ", end="")
446        print("byte fun_data_%s[%u] = {" % (self.escaped_name, len(self.bytecode)))
447        print("   ", end="")
448        for i in range(self.ip2):
449            print(" 0x%02x," % self.bytecode[i], end="")
450        print()
451        print("   ", self.simple_name.qstr_id, "& 0xff,", self.simple_name.qstr_id, ">> 8,")
452        print("   ", self.source_file.qstr_id, "& 0xff,", self.source_file.qstr_id, ">> 8,")
453        print("   ", end="")
454        for i in range(self.ip2 + 4, self.ip):
455            print(" 0x%02x," % self.bytecode[i], end="")
456        print()
457        ip = self.ip
458        while ip < len(self.bytecode):
459            f, sz = mp_opcode_format(self.bytecode, ip, True)
460            if f == 1:
461                qst = self._unpack_qstr(ip + 1).qstr_id
462                extra = "" if sz == 3 else " 0x%02x," % self.bytecode[ip + 3]
463                print("   ", "0x%02x," % self.bytecode[ip], qst, "& 0xff,", qst, ">> 8,", extra)
464            else:
465                print("   ", "".join("0x%02x, " % self.bytecode[ip + i] for i in range(sz)))
466            ip += sz
467        print("};")
468
469        self.freeze_constants()
470        self.freeze_module()
471
472
473class RawCodeNative(RawCode):
474    def __init__(
475        self,
476        code_kind,
477        fun_data,
478        prelude_offset,
479        prelude,
480        qstr_links,
481        qstrs,
482        objs,
483        raw_codes,
484        type_sig,
485    ):
486        super(RawCodeNative, self).__init__(
487            code_kind, fun_data, prelude_offset, qstrs, objs, raw_codes
488        )
489        self.prelude = prelude
490        self.qstr_links = qstr_links
491        self.type_sig = type_sig
492        if config.native_arch in (
493            MP_NATIVE_ARCH_X86,
494            MP_NATIVE_ARCH_X64,
495            MP_NATIVE_ARCH_XTENSA,
496            MP_NATIVE_ARCH_XTENSAWIN,
497        ):
498            self.fun_data_attributes = '__attribute__((section(".text,\\"ax\\",@progbits # ")))'
499        else:
500            self.fun_data_attributes = '__attribute__((section(".text,\\"ax\\",%progbits @ ")))'
501
502        # Allow single-byte alignment by default for x86/x64.
503        # ARM needs word alignment, ARM Thumb needs halfword, due to instruction size.
504        # Xtensa needs word alignment due to the 32-bit constant table embedded in the code.
505        if config.native_arch in (
506            MP_NATIVE_ARCH_ARMV6,
507            MP_NATIVE_ARCH_XTENSA,
508            MP_NATIVE_ARCH_XTENSAWIN,
509        ):
510            # ARMV6 or Xtensa -- four byte align.
511            self.fun_data_attributes += " __attribute__ ((aligned (4)))"
512        elif MP_NATIVE_ARCH_ARMV6M <= config.native_arch <= MP_NATIVE_ARCH_ARMV7EMDP:
513            # ARMVxxM -- two byte align.
514            self.fun_data_attributes += " __attribute__ ((aligned (2)))"
515
516    def _asm_thumb_rewrite_mov(self, pc, val):
517        print("    (%u & 0xf0) | (%s >> 12)," % (self.bytecode[pc], val), end="")
518        print(" (%u & 0xfb) | (%s >> 9 & 0x04)," % (self.bytecode[pc + 1], val), end="")
519        print(" (%s & 0xff)," % (val,), end="")
520        print(" (%u & 0x07) | (%s >> 4 & 0x70)," % (self.bytecode[pc + 3], val))
521
522    def _link_qstr(self, pc, kind, qst):
523        if kind == 0:
524            # Generic 16-bit link
525            print("    %s & 0xff, %s >> 8," % (qst, qst))
526            return 2
527        else:
528            # Architecture-specific link
529            is_obj = kind == 2
530            if is_obj:
531                qst = "((uintptr_t)MP_OBJ_NEW_QSTR(%s))" % qst
532            if config.native_arch in (
533                MP_NATIVE_ARCH_X86,
534                MP_NATIVE_ARCH_X64,
535                MP_NATIVE_ARCH_ARMV6,
536                MP_NATIVE_ARCH_XTENSA,
537                MP_NATIVE_ARCH_XTENSAWIN,
538            ):
539                print(
540                    "    %s & 0xff, (%s >> 8) & 0xff, (%s >> 16) & 0xff, %s >> 24,"
541                    % (qst, qst, qst, qst)
542                )
543                return 4
544            elif MP_NATIVE_ARCH_ARMV6M <= config.native_arch <= MP_NATIVE_ARCH_ARMV7EMDP:
545                if is_obj:
546                    # qstr object, movw and movt
547                    self._asm_thumb_rewrite_mov(pc, qst)
548                    self._asm_thumb_rewrite_mov(pc + 4, "(%s >> 16)" % qst)
549                    return 8
550                else:
551                    # qstr number, movw instruction
552                    self._asm_thumb_rewrite_mov(pc, qst)
553                    return 4
554            else:
555                assert 0
556
557    def freeze(self, parent_name):
558        if self.prelude[2] & ~0x0F:
559            raise FreezeError("unable to freeze code with relocations")
560
561        self.freeze_children(parent_name)
562
563        # generate native code data
564        print()
565        if self.code_kind == MP_CODE_NATIVE_PY:
566            print(
567                "// frozen native code for file %s, scope %s%s"
568                % (self.source_file.str, parent_name, self.simple_name.str)
569            )
570        elif self.code_kind == MP_CODE_NATIVE_VIPER:
571            print("// frozen viper code for scope %s" % (parent_name,))
572        else:
573            print("// frozen assembler code for scope %s" % (parent_name,))
574        print(
575            "STATIC const byte fun_data_%s[%u] %s = {"
576            % (self.escaped_name, len(self.bytecode), self.fun_data_attributes)
577        )
578
579        if self.code_kind == MP_CODE_NATIVE_PY:
580            i_top = self.prelude_offset
581        else:
582            i_top = len(self.bytecode)
583        i = 0
584        qi = 0
585        while i < i_top:
586            if qi < len(self.qstr_links) and i == self.qstr_links[qi][0]:
587                # link qstr
588                qi_off, qi_kind, qi_val = self.qstr_links[qi]
589                qst = global_qstrs[qi_val].qstr_id
590                i += self._link_qstr(i, qi_kind, qst)
591                qi += 1
592            else:
593                # copy machine code (max 16 bytes)
594                i16 = min(i + 16, i_top)
595                if qi < len(self.qstr_links):
596                    i16 = min(i16, self.qstr_links[qi][0])
597                print("   ", end="")
598                for ii in range(i, i16):
599                    print(" 0x%02x," % self.bytecode[ii], end="")
600                print()
601                i = i16
602
603        if self.code_kind == MP_CODE_NATIVE_PY:
604            print("   ", end="")
605            for i in range(self.prelude_offset, self.ip2):
606                print(" 0x%02x," % self.bytecode[i], end="")
607            print()
608
609            print("   ", self.simple_name.qstr_id, "& 0xff,", self.simple_name.qstr_id, ">> 8,")
610            print("   ", self.source_file.qstr_id, "& 0xff,", self.source_file.qstr_id, ">> 8,")
611
612            print("   ", end="")
613            for i in range(self.ip2 + 4, self.ip):
614                print(" 0x%02x," % self.bytecode[i], end="")
615            print()
616
617        print("};")
618
619        self.freeze_constants()
620        self.freeze_module(self.qstr_links, self.type_sig)
621
622
623class BytecodeBuffer:
624    def __init__(self, size):
625        self.buf = bytearray(size)
626        self.idx = 0
627
628    def is_full(self):
629        return self.idx == len(self.buf)
630
631    def append(self, b):
632        self.buf[self.idx] = b
633        self.idx += 1
634
635
636def read_byte(f, out=None):
637    b = bytes_cons(f.read(1))[0]
638    if out is not None:
639        out.append(b)
640    return b
641
642
643def read_uint(f, out=None):
644    i = 0
645    while True:
646        b = read_byte(f, out)
647        i = (i << 7) | (b & 0x7F)
648        if b & 0x80 == 0:
649            break
650    return i
651
652
653def read_qstr(f, qstr_win):
654    ln = read_uint(f)
655    if ln == 0:
656        # static qstr
657        return bytes_cons(f.read(1))[0]
658    if ln & 1:
659        # qstr in table
660        return qstr_win.access(ln >> 1)
661    ln >>= 1
662    data = str_cons(f.read(ln), "utf8")
663    global_qstrs.append(QStrType(data))
664    qstr_win.push(len(global_qstrs) - 1)
665    return len(global_qstrs) - 1
666
667
668def read_obj(f):
669    obj_type = f.read(1)
670    if obj_type == b"e":
671        return Ellipsis
672    else:
673        buf = f.read(read_uint(f))
674        if obj_type == b"s":
675            return str_cons(buf, "utf8")
676        elif obj_type == b"b":
677            return bytes_cons(buf)
678        elif obj_type == b"i":
679            return int(str_cons(buf, "ascii"), 10)
680        elif obj_type == b"f":
681            return float(str_cons(buf, "ascii"))
682        elif obj_type == b"c":
683            return complex(str_cons(buf, "ascii"))
684        else:
685            assert 0
686
687
688def read_prelude(f, bytecode, qstr_win):
689    (
690        n_state,
691        n_exc_stack,
692        scope_flags,
693        n_pos_args,
694        n_kwonly_args,
695        n_def_pos_args,
696    ) = read_prelude_sig(lambda: read_byte(f, bytecode))
697    n_info, n_cell = read_prelude_size(lambda: read_byte(f, bytecode))
698    read_qstr_and_pack(f, bytecode, qstr_win)  # simple_name
699    read_qstr_and_pack(f, bytecode, qstr_win)  # source_file
700    for _ in range(n_info - 4 + n_cell):
701        read_byte(f, bytecode)
702    return n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args
703
704
705def read_qstr_and_pack(f, bytecode, qstr_win):
706    qst = read_qstr(f, qstr_win)
707    bytecode.append(qst & 0xFF)
708    bytecode.append(qst >> 8)
709
710
711def read_bytecode(file, bytecode, qstr_win):
712    while not bytecode.is_full():
713        op = read_byte(file, bytecode)
714        f, sz = mp_opcode_format(bytecode.buf, bytecode.idx - 1, False)
715        sz -= 1
716        if f == MP_BC_FORMAT_QSTR:
717            read_qstr_and_pack(file, bytecode, qstr_win)
718            sz -= 2
719        elif f == MP_BC_FORMAT_VAR_UINT:
720            while read_byte(file, bytecode) & 0x80:
721                pass
722        for _ in range(sz):
723            read_byte(file, bytecode)
724
725
726def read_raw_code(f, qstr_win):
727    kind_len = read_uint(f)
728    kind = (kind_len & 3) + MP_CODE_BYTECODE
729    fun_data_len = kind_len >> 2
730    fun_data = BytecodeBuffer(fun_data_len)
731
732    if kind == MP_CODE_BYTECODE:
733        prelude = read_prelude(f, fun_data, qstr_win)
734        read_bytecode(f, fun_data, qstr_win)
735    else:
736        fun_data.buf[:] = f.read(fun_data_len)
737
738        qstr_links = []
739        if kind in (MP_CODE_NATIVE_PY, MP_CODE_NATIVE_VIPER):
740            # load qstr link table
741            n_qstr_link = read_uint(f)
742            for _ in range(n_qstr_link):
743                off = read_uint(f)
744                qst = read_qstr(f, qstr_win)
745                qstr_links.append((off >> 2, off & 3, qst))
746
747        type_sig = 0
748        if kind == MP_CODE_NATIVE_PY:
749            prelude_offset = read_uint(f)
750            _, name_idx, prelude = extract_prelude(fun_data.buf, prelude_offset)
751            fun_data.idx = name_idx  # rewind to where qstrs are in prelude
752            read_qstr_and_pack(f, fun_data, qstr_win)  # simple_name
753            read_qstr_and_pack(f, fun_data, qstr_win)  # source_file
754        else:
755            prelude_offset = None
756            scope_flags = read_uint(f)
757            n_pos_args = 0
758            if kind == MP_CODE_NATIVE_ASM:
759                n_pos_args = read_uint(f)
760                type_sig = read_uint(f)
761            prelude = (None, None, scope_flags, n_pos_args, 0)
762
763    qstrs = []
764    objs = []
765    raw_codes = []
766    if kind != MP_CODE_NATIVE_ASM:
767        # load constant table
768        n_obj = read_uint(f)
769        n_raw_code = read_uint(f)
770        qstrs = [read_qstr(f, qstr_win) for _ in range(prelude[3] + prelude[4])]
771        if kind != MP_CODE_BYTECODE:
772            objs.append(MPFunTable)
773        objs.extend([read_obj(f) for _ in range(n_obj)])
774        raw_codes = [read_raw_code(f, qstr_win) for _ in range(n_raw_code)]
775
776    if kind == MP_CODE_BYTECODE:
777        return RawCodeBytecode(fun_data.buf, qstrs, objs, raw_codes)
778    else:
779        return RawCodeNative(
780            kind,
781            fun_data.buf,
782            prelude_offset,
783            prelude,
784            qstr_links,
785            qstrs,
786            objs,
787            raw_codes,
788            type_sig,
789        )
790
791
792def read_mpy(filename):
793    with open(filename, "rb") as f:
794        header = bytes_cons(f.read(4))
795        if header[0] != ord("M"):
796            raise Exception("not a valid .mpy file")
797        if header[1] != config.MPY_VERSION:
798            raise Exception("incompatible .mpy version")
799        feature_byte = header[2]
800        qw_size = read_uint(f)
801        config.MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE = (feature_byte & 1) != 0
802        config.MICROPY_PY_BUILTINS_STR_UNICODE = (feature_byte & 2) != 0
803        mpy_native_arch = feature_byte >> 2
804        if mpy_native_arch != MP_NATIVE_ARCH_NONE:
805            if config.native_arch == MP_NATIVE_ARCH_NONE:
806                config.native_arch = mpy_native_arch
807            elif config.native_arch != mpy_native_arch:
808                raise Exception("native architecture mismatch")
809        config.mp_small_int_bits = header[3]
810        qstr_win = QStrWindow(qw_size)
811        rc = read_raw_code(f, qstr_win)
812        rc.mpy_source_file = filename
813        rc.qstr_win_size = qw_size
814        return rc
815
816
817def dump_mpy(raw_codes):
818    for rc in raw_codes:
819        rc.dump()
820
821
822def freeze_mpy(base_qstrs, raw_codes):
823    # add to qstrs
824    new = {}
825    for q in global_qstrs:
826        # don't add duplicates
827        if q is None or q.qstr_esc in base_qstrs or q.qstr_esc in new:
828            continue
829        new[q.qstr_esc] = (len(new), q.qstr_esc, q.str)
830    new = sorted(new.values(), key=lambda x: x[0])
831
832    print('#include "py/mpconfig.h"')
833    print('#include "py/objint.h"')
834    print('#include "py/objstr.h"')
835    print('#include "py/emitglue.h"')
836    print('#include "py/nativeglue.h"')
837    print()
838
839    print(
840        "#if MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE != %u"
841        % config.MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE
842    )
843    print('#error "incompatible MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE"')
844    print("#endif")
845    print()
846
847    print("#if MICROPY_LONGINT_IMPL != %u" % config.MICROPY_LONGINT_IMPL)
848    print('#error "incompatible MICROPY_LONGINT_IMPL"')
849    print("#endif")
850    print()
851
852    if config.MICROPY_LONGINT_IMPL == config.MICROPY_LONGINT_IMPL_MPZ:
853        print("#if MPZ_DIG_SIZE != %u" % config.MPZ_DIG_SIZE)
854        print('#error "incompatible MPZ_DIG_SIZE"')
855        print("#endif")
856        print()
857
858    print("#if MICROPY_PY_BUILTINS_FLOAT")
859    print("typedef struct _mp_obj_float_t {")
860    print("    mp_obj_base_t base;")
861    print("    mp_float_t value;")
862    print("} mp_obj_float_t;")
863    print("#endif")
864    print()
865
866    print("#if MICROPY_PY_BUILTINS_COMPLEX")
867    print("typedef struct _mp_obj_complex_t {")
868    print("    mp_obj_base_t base;")
869    print("    mp_float_t real;")
870    print("    mp_float_t imag;")
871    print("} mp_obj_complex_t;")
872    print("#endif")
873    print()
874
875    if len(new) > 0:
876        print("enum {")
877        for i in range(len(new)):
878            if i == 0:
879                print("    MP_QSTR_%s = MP_QSTRnumber_of," % new[i][1])
880            else:
881                print("    MP_QSTR_%s," % new[i][1])
882        print("};")
883
884    # As in qstr.c, set so that the first dynamically allocated pool is twice this size; must be <= the len
885    qstr_pool_alloc = min(len(new), 10)
886
887    print()
888    print("extern const qstr_pool_t mp_qstr_const_pool;")
889    print("const qstr_pool_t mp_qstr_frozen_const_pool = {")
890    print("    (qstr_pool_t*)&mp_qstr_const_pool, // previous pool")
891    print("    MP_QSTRnumber_of, // previous pool size")
892    print("    %u, // allocated entries" % qstr_pool_alloc)
893    print("    %u, // used entries" % len(new))
894    print("    {")
895    for _, _, qstr in new:
896        print(
897            "        %s,"
898            % qstrutil.make_bytes(
899                config.MICROPY_QSTR_BYTES_IN_LEN, config.MICROPY_QSTR_BYTES_IN_HASH, qstr
900            )
901        )
902    print("    },")
903    print("};")
904
905    for rc in raw_codes:
906        rc.freeze(rc.source_file.str.replace("/", "_")[:-3] + "_")
907
908    print()
909    print("const char mp_frozen_mpy_names[] = {")
910    for rc in raw_codes:
911        module_name = rc.source_file.str
912        print('"%s\\0"' % module_name)
913    print('"\\0"};')
914
915    print("const mp_raw_code_t *const mp_frozen_mpy_content[] = {")
916    for rc in raw_codes:
917        print("    &raw_code_%s," % rc.escaped_name)
918    print("};")
919
920    # If a port defines MICROPY_FROZEN_LIST_ITEM then list all modules wrapped in that macro.
921    print("#ifdef MICROPY_FROZEN_LIST_ITEM")
922    for rc in raw_codes:
923        module_name = rc.source_file.str
924        if module_name.endswith("/__init__.py"):
925            short_name = module_name[: -len("/__init__.py")]
926        else:
927            short_name = module_name[: -len(".py")]
928        print('MICROPY_FROZEN_LIST_ITEM("%s", "%s")' % (short_name, module_name))
929    print("#endif")
930
931
932def merge_mpy(raw_codes, output_file):
933    assert len(raw_codes) <= 31  # so var-uints all fit in 1 byte
934    merged_mpy = bytearray()
935
936    if len(raw_codes) == 1:
937        with open(raw_codes[0].mpy_source_file, "rb") as f:
938            merged_mpy.extend(f.read())
939    else:
940        header = bytearray(5)
941        header[0] = ord("M")
942        header[1] = config.MPY_VERSION
943        header[2] = (
944            config.native_arch << 2
945            | config.MICROPY_PY_BUILTINS_STR_UNICODE << 1
946            | config.MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE
947        )
948        header[3] = config.mp_small_int_bits
949        header[4] = 32  # qstr_win_size
950        merged_mpy.extend(header)
951
952        bytecode = bytearray()
953        bytecode_len = 6 + len(raw_codes) * 5 + 2
954        bytecode.append(bytecode_len << 2)  # kind and length
955        bytecode.append(0b00000000)  # signature prelude
956        bytecode.append(0b00001000)  # size prelude
957        bytecode.extend(b"\x00\x01")  # MP_QSTR_
958        bytecode.extend(b"\x00\x01")  # MP_QSTR_
959        for idx in range(len(raw_codes)):
960            bytecode.append(0x32)  # MP_BC_MAKE_FUNCTION
961            bytecode.append(idx)  # index raw code
962            bytecode.extend(b"\x34\x00\x59")  # MP_BC_CALL_FUNCTION, 0 args, MP_BC_POP_TOP
963        bytecode.extend(b"\x51\x63")  # MP_BC_LOAD_NONE, MP_BC_RETURN_VALUE
964
965        bytecode.append(0)  # n_obj
966        bytecode.append(len(raw_codes))  # n_raw_code
967
968        merged_mpy.extend(bytecode)
969
970        for rc in raw_codes:
971            with open(rc.mpy_source_file, "rb") as f:
972                f.read(4)  # skip header
973                read_uint(f)  # skip qstr_win_size
974                data = f.read()  # read rest of mpy file
975                merged_mpy.extend(data)
976
977    if output_file is None:
978        sys.stdout.buffer.write(merged_mpy)
979    else:
980        with open(output_file, "wb") as f:
981            f.write(merged_mpy)
982
983
984def main():
985    import argparse
986
987    cmd_parser = argparse.ArgumentParser(description="A tool to work with MicroPython .mpy files.")
988    cmd_parser.add_argument("-d", "--dump", action="store_true", help="dump contents of files")
989    cmd_parser.add_argument("-f", "--freeze", action="store_true", help="freeze files")
990    cmd_parser.add_argument(
991        "--merge", action="store_true", help="merge multiple .mpy files into one"
992    )
993    cmd_parser.add_argument("-q", "--qstr-header", help="qstr header file to freeze against")
994    cmd_parser.add_argument(
995        "-mlongint-impl",
996        choices=["none", "longlong", "mpz"],
997        default="mpz",
998        help="long-int implementation used by target (default mpz)",
999    )
1000    cmd_parser.add_argument(
1001        "-mmpz-dig-size",
1002        metavar="N",
1003        type=int,
1004        default=16,
1005        help="mpz digit size used by target (default 16)",
1006    )
1007    cmd_parser.add_argument("-o", "--output", default=None, help="output file")
1008    cmd_parser.add_argument("files", nargs="+", help="input .mpy files")
1009    args = cmd_parser.parse_args()
1010
1011    # set config values relevant to target machine
1012    config.MICROPY_LONGINT_IMPL = {
1013        "none": config.MICROPY_LONGINT_IMPL_NONE,
1014        "longlong": config.MICROPY_LONGINT_IMPL_LONGLONG,
1015        "mpz": config.MICROPY_LONGINT_IMPL_MPZ,
1016    }[args.mlongint_impl]
1017    config.MPZ_DIG_SIZE = args.mmpz_dig_size
1018    config.native_arch = MP_NATIVE_ARCH_NONE
1019
1020    # set config values for qstrs, and get the existing base set of qstrs
1021    if args.qstr_header:
1022        qcfgs, base_qstrs = qstrutil.parse_input_headers([args.qstr_header])
1023        config.MICROPY_QSTR_BYTES_IN_LEN = int(qcfgs["BYTES_IN_LEN"])
1024        config.MICROPY_QSTR_BYTES_IN_HASH = int(qcfgs["BYTES_IN_HASH"])
1025    else:
1026        config.MICROPY_QSTR_BYTES_IN_LEN = 1
1027        config.MICROPY_QSTR_BYTES_IN_HASH = 1
1028        base_qstrs = {}
1029
1030    raw_codes = [read_mpy(file) for file in args.files]
1031
1032    if args.dump:
1033        dump_mpy(raw_codes)
1034    elif args.freeze:
1035        try:
1036            freeze_mpy(base_qstrs, raw_codes)
1037        except FreezeError as er:
1038            print(er, file=sys.stderr)
1039            sys.exit(1)
1040    elif args.merge:
1041        merged_mpy = merge_mpy(raw_codes, args.output)
1042
1043
1044if __name__ == "__main__":
1045    main()
1046