#!/usr/bin/env python3 """ Transplant extra tensors (e.g. MTP layers) from one GGUF file into another, producing a mixed-quantization GGUF. Note: Tested with ik_llama.cpp GGUF Python module. Usage: python convert.py Arguments: target — base GGUF (tensors + metadata kept as-is) source — GGUF with extra blocks to transplant (e.g. blk.64.* for MTP) output — resulting mixed-quantization GGUF The script preserves the exact on-disk layout including per-row metadata for quantization types like IQ4_KS that have row_meta_size > 0. This is critical for GPU inference to work correctly. Example: # Transplant MTP block from Q8_0 into IQ4_KS base model python convert.py Qwen3.6-27B-IQ4_KS.gguf Qwen3.6-27B-MTP-Q8_0.gguf Qwen3.6-27B-MTP-IQ4_KS.gguf """ import hashlib import sys import struct from pathlib import Path from gguf import GGUFReader, GGUFValueType def get_field_value(reader: GGUFReader, key: str): """Safely get a field value from GGUFReader.""" field = reader.get_field(key) return field.contents() if field else None def calculate_on_disk_sizes(tensors, file_size): """Calculate on-disk size for each tensor (including per-row metadata/padding).""" n_tensors = len(tensors) sizes = [] for i in range(n_tensors): if i < n_tensors - 1: sizes.append(tensors[i + 1].data_offset - tensors[i].data_offset) else: sizes.append(file_size - tensors[i].data_offset) return sizes def write_kv_value(fout, kv_type, value): """Write a KV value to the output file.""" if kv_type == GGUFValueType.STRING: value_bytes = value.encode("utf-8") fout.write(struct.pack(" None: if len(sys.argv) != 4: print( f"Usage: {sys.argv[0]} ", file=sys.stderr, ) sys.exit(1) target_path, source_path, output_path = sys.argv[1], sys.argv[2], sys.argv[3] # ------------------------------------------------------------------ # 1. Open both files # ------------------------------------------------------------------ print(f"Reading target: {target_path}") target_reader = GGUFReader(target_path) print(f"Reading source: {source_path}") source_reader = GGUFReader(source_path) target_file_size = Path(target_path).stat().st_size source_file_size = Path(source_path).stat().st_size print( f" Target tensors: {len(target_reader.tensors)}, KVs: {len([k for k in target_reader.fields if not k.startswith('GGUF.')])}" ) print( f" Source tensors: {len(source_reader.tensors)}, KVs: {len([k for k in source_reader.fields if not k.startswith('GGUF.')])}" ) # ------------------------------------------------------------------ # 2. Read architecture and MTP metadata from source # ------------------------------------------------------------------ arch = get_field_value(target_reader, "general.architecture") if arch is None: print("ERROR: Target GGUF has no general.architecture key") sys.exit(1) source_block_count = get_field_value(source_reader, f"{arch}.block_count") source_nextn = get_field_value(source_reader, f"{arch}.nextn_predict_layers") if source_nextn is None: print("ERROR: Source GGUF has no nextn_predict_layers key") sys.exit(1) target_block_count = get_field_value(target_reader, f"{arch}.block_count") print(f"\n Arch: {arch}") print(f" Target block_count: {target_block_count}") print( f" Source block_count: {source_block_count}, nextn_predict_layers: {source_nextn}" ) # Identify extra tensors in the source (blocks beyond target's count) source_extra = [ t for t in source_reader.tensors if t.name.startswith(f"blk.{target_block_count}.") ] print(f"\n Extra tensors to transplant: {len(source_extra)}") if not source_extra: print( f"ERROR: No tensors found with prefix 'blk.{target_block_count}.' in source" ) sys.exit(1) # ------------------------------------------------------------------ # 3. Prepare tensor lists and calculate sizes # ------------------------------------------------------------------ # Combine tensors: all from target + extra from source all_tensors = list(target_reader.tensors) + source_extra # Calculate on-disk sizes for source tensors (including per-row metadata) target_on_disk_sizes = calculate_on_disk_sizes( target_reader.tensors, target_file_size ) source_on_disk_sizes = calculate_on_disk_sizes( source_reader.tensors, source_file_size ) # Create mapping for source tensors source_tensor_map = { t.name: (t, size) for t, size in zip(source_reader.tensors, source_on_disk_sizes) } # ------------------------------------------------------------------ # 4. Write output file # ------------------------------------------------------------------ print(f"\nWriting output: {output_path}") with ( open(target_path, "rb") as target_fin, open(source_path, "rb") as source_fin, open(output_path, "wb") as fout, ): # 4.1 Write header # Magic (4 bytes) fout.write(b"GGUF") # Version (4 bytes) fout.write(struct.pack(" 1 else GGUFValueType.FLOAT32 ) write_array_value(fout, sub_type, field.contents()) else: write_kv_value(fout, kv_type, field.contents()) written_keys.add(key) # Add block_count from source key = f"{arch}.block_count" key_bytes = key.encode("utf-8") fout.write(struct.pack(" 1 else GGUFValueType.FLOAT32 ) write_array_value(fout, sub_type, field.contents()) else: write_kv_value(fout, kv_type, field.contents()) # 4.3 Write tensor info # Calculate offsets for all tensors current_offset = 0 tensor_offsets = [] for i, tensor in enumerate(all_tensors): if i < len(target_reader.tensors): size = target_on_disk_sizes[i] else: _, size = source_tensor_map[tensor.name] tensor_offsets.append(current_offset) current_offset += size # Write tensor info for each tensor for i, tensor in enumerate(all_tensors): # Tensor name name_bytes = tensor.name.encode("utf-8") fout.write(struct.pack("