| | |
| | """ |
| | Script to reconstruct the original model file from shards |
| | """ |
| | import json |
| | import hashlib |
| | from pathlib import Path |
| |
|
| | def reconstruct_file(shards_dir="."): |
| | shards_dir = Path(shards_dir) |
| |
|
| | |
| | metadata_files = list(shards_dir.glob("*.shards.json")) |
| | if not metadata_files: |
| | print("Error: No shards metadata file found") |
| | return False |
| |
|
| | metadata_path = metadata_files[0] |
| | print(f"Loading metadata: {metadata_path}") |
| |
|
| | with open(metadata_path, 'r') as f: |
| | metadata = json.load(f) |
| |
|
| | output_file = metadata["original_file"] |
| | print(f"Reconstructing: {output_file}") |
| | print(f" Expected size: {metadata['file_size'] / (1024**3):.2f} GB") |
| | print(f" Number of shards: {metadata['num_shards']}") |
| |
|
| | with open(output_file, 'wb') as f_out: |
| | for shard_info in metadata["shards"]: |
| | shard_path = shards_dir / shard_info["filename"] |
| | print(f" Processing shard {shard_info['index'] + 1}/{metadata['num_shards']}: {shard_info['filename']}") |
| |
|
| | if not shard_path.exists(): |
| | print(f"Error: Shard not found: {shard_path}") |
| | return False |
| |
|
| | |
| | with open(shard_path, 'rb') as f_in: |
| | chunk_data = f_in.read() |
| |
|
| | |
| | chunk_hash = hashlib.sha256(chunk_data).hexdigest() |
| | if chunk_hash != shard_info["sha256"]: |
| | print(f"Error: Hash mismatch for {shard_info['filename']}") |
| | print(f" Expected: {shard_info['sha256']}") |
| | print(f" Got: {chunk_hash}") |
| | return False |
| |
|
| | |
| | f_out.write(chunk_data) |
| |
|
| | print(f"\n✓ Reconstruction complete: {output_file}") |
| | return True |
| |
|
| | if __name__ == "__main__": |
| | import sys |
| | shards_dir = sys.argv[1] if len(sys.argv) > 1 else "." |
| | success = reconstruct_file(shards_dir) |
| | exit(0 if success else 1) |
| |
|