{"version":"2.1","updated":"2026-06-23T22:03:15.733Z","live_count":120,"static_count":114,"note":"Live attested runs merged with the static seed. Live records have transcripts available at /api/runs/<id>.json.","runs":[{"id":"run-3fff093406bb","serviceId":"anthropic-claude","benchmarkId":"humaneval","model":"claude-opus-4-7","score":0,"runs":1,"breakdown":{"n":8,"passes":0,"mean_raw":0},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:3e1eb278fb45e71a150b896866387eae8c5bf42c0618c1a543fd5bb03cd3edaf","methodologyHash":"sha256:a2c80bc70417578adad51e92cd412d7b79be24c225b1794bfabb87dd741ccf24","transcriptMerkleRoot":"sha256:4e1bb61936f2a216b0f708b97a366bd3b2155f642d176c7c5d3bd595db0a1ed0","startedAt":"2026-04-27T01:08:12.215Z","finishedAt":"2026-04-27T01:10:05.156Z","durationSeconds":113,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-1","runner_provenance":{"runner_version":"benchlist-vercel-inline@1.0.0","runner_commit":"3b3562b5a0ff421dfb1530aaef628667a31dbd93","runner_repo":"github.com/benchlist/runner","adapter_hash":"sha256:inline-js:humaneval","judge_hash":"sha256:inline-js:scoreOne","lockfile_hash":null,"system_prompt_hash":null,"chat_template_hash":"sha256:inline-js:default","decoding":{"temperature":0,"max_tokens":512,"tier":"easy"},"digest":"sha256:a17c77357134be86c8f0dbbe13c65e4f3852e1695d3192bc7a56b82c9dd900a1"},"publisher":"anthropic-claude","replay":{"command":"benchlist run humaneval --service anthropic-claude --model claude-opus-4-7 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"a67941cedb515120e931af7b076240a8bf7cfbf00296243a922d4e45f9b52c6567c2da45084326da60bcf1dad389e9a0cae5f5e68c33b7047bb20b42585f090f","pubkey":"042eeb98bd82298204732dcba981c64b4f329e44a13d750c104c5ec9c1de5498","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:3e1eb278fb45e71a150b896866387eae8c5bf42c0618c1a543fd5bb03cd3edaf","methodology_hash":"sha256:a2c80bc70417578adad51e92cd412d7b79be24c225b1794bfabb87dd741ccf24","merkle_root":"sha256:4e1bb61936f2a216b0f708b97a366bd3b2155f642d176c7c5d3bd595db0a1ed0","claimed_score":0,"runner_provenance":"sha256:a17c77357134be86c8f0dbbe13c65e4f3852e1695d3192bc7a56b82c9dd900a1"}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"042eeb98bd82298204732dcba981c64b4f329e44a13d750c104c5ec9c1de5498","attestorSignature":"a67941cedb515120e931af7b076240a8bf7cfbf00296243a922d4e45f9b52c6567c2da45084326da60bcf1dad389e9a0cae5f5e68c33b7047bb20b42585f090f","signerAlgo":"ed25519","submittedAt":"2026-04-27T01:10:05.156Z","verifiedAt":"2026-04-27T01:10:05.156Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-3d3c1a3f02af","serviceId":"anthropic-claude","benchmarkId":"gsm8k","model":"claude-haiku-4-5","score":100,"runs":1,"breakdown":{"n":8,"passes":8,"mean_raw":1},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodologyHash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","transcriptMerkleRoot":"sha256:7c8b12cc5d0c348124648dc718e845b4190606d57d7e50ff72ba258402c1daad","startedAt":"2026-04-27T01:06:22.209Z","finishedAt":"2026-04-27T01:06:26.439Z","durationSeconds":4,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-1","runner_provenance":{"runner_version":"benchlist-vercel-inline@1.0.0","runner_commit":"3b3562b5a0ff421dfb1530aaef628667a31dbd93","runner_repo":"github.com/benchlist/runner","adapter_hash":"sha256:inline-js:gsm8k","judge_hash":"sha256:inline-js:scoreOne","lockfile_hash":null,"system_prompt_hash":null,"chat_template_hash":"sha256:inline-js:default","decoding":{"temperature":0,"max_tokens":512,"tier":"easy"},"digest":"sha256:7e9f1dfe479c7831a6b11092144a02bab7c52861802c40dbca2faeaec47a46d1"},"publisher":"anthropic-claude","replay":{"command":"benchlist run gsm8k --service anthropic-claude --model claude-haiku-4-5 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"869d9ee06c82b4d3c675b8909b8f8808b4a5e95af417d4be0dba0e7d6da908951cbd87fc20d88021efca25d56c0a62e073eb4bfed751373bde107a55ce9f3703","pubkey":"042eeb98bd82298204732dcba981c64b4f329e44a13d750c104c5ec9c1de5498","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodology_hash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","merkle_root":"sha256:7c8b12cc5d0c348124648dc718e845b4190606d57d7e50ff72ba258402c1daad","claimed_score":100,"runner_provenance":"sha256:7e9f1dfe479c7831a6b11092144a02bab7c52861802c40dbca2faeaec47a46d1"}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"042eeb98bd82298204732dcba981c64b4f329e44a13d750c104c5ec9c1de5498","attestorSignature":"869d9ee06c82b4d3c675b8909b8f8808b4a5e95af417d4be0dba0e7d6da908951cbd87fc20d88021efca25d56c0a62e073eb4bfed751373bde107a55ce9f3703","signerAlgo":"ed25519","submittedAt":"2026-04-27T01:06:26.439Z","verifiedAt":"2026-04-27T01:06:26.439Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-90f62a5afcfb","serviceId":"anthropic-claude","benchmarkId":"gsm8k","model":"claude-haiku-4-5","score":100,"runs":1,"breakdown":{"n":8,"passes":8,"mean_raw":1},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodologyHash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","transcriptMerkleRoot":"sha256:4d7cc7ff5b489d00aee230a9a89ab3d2aca91bd38b7e16f297e7e8a3d8c035d1","startedAt":"2026-04-26T19:26:02.768Z","finishedAt":"2026-04-26T19:26:15.228Z","durationSeconds":12,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","runner_provenance":{"runner_version":"benchlist-vercel-inline@1.0.0","runner_commit":"3b3562b5a0ff421dfb1530aaef628667a31dbd93","runner_repo":"github.com/benchlist/runner","adapter_hash":"sha256:inline-js:gsm8k","judge_hash":"sha256:inline-js:scoreOne","lockfile_hash":null,"system_prompt_hash":null,"chat_template_hash":"sha256:inline-js:default","decoding":{"temperature":0,"max_tokens":512,"tier":"easy"},"digest":"sha256:7e9f1dfe479c7831a6b11092144a02bab7c52861802c40dbca2faeaec47a46d1"},"publisher":"anthropic-claude","replay":{"command":"benchlist run gsm8k --service anthropic-claude --model claude-haiku-4-5 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"03fee5c994a957a3cdcea5195327666f03d4fef685bf1fb6d5ddcbf840db416e8d7d15c02e6b5256afc04b68fade73a6594b071e1e0699087868183d6d50dc06","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodology_hash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","merkle_root":"sha256:4d7cc7ff5b489d00aee230a9a89ab3d2aca91bd38b7e16f297e7e8a3d8c035d1","claimed_score":100,"runner_provenance":"sha256:7e9f1dfe479c7831a6b11092144a02bab7c52861802c40dbca2faeaec47a46d1"}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"03fee5c994a957a3cdcea5195327666f03d4fef685bf1fb6d5ddcbf840db416e8d7d15c02e6b5256afc04b68fade73a6594b071e1e0699087868183d6d50dc06","signerAlgo":"ed25519","submittedAt":"2026-04-26T19:26:15.228Z","verifiedAt":"2026-04-26T19:26:15.228Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-9721c0e7f59a","serviceId":"anthropic-claude","benchmarkId":"gsm8k","model":"claude-haiku-4-5","score":100,"runs":1,"breakdown":{"n":8,"passes":8,"mean_raw":1},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodologyHash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","transcriptMerkleRoot":"sha256:f3b8c6e237ba2bb52525f4dad3544afcfc7153e18883ad82853f39d1971fb13a","startedAt":"2026-04-26T19:18:59.277Z","finishedAt":"2026-04-26T19:20:10.334Z","durationSeconds":71,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","runner_provenance":{"runner_version":"benchlist-vercel-inline@1.0.0","runner_commit":"3b3562b5a0ff421dfb1530aaef628667a31dbd93","runner_repo":"github.com/benchlist/runner","adapter_hash":"sha256:inline-js:gsm8k","judge_hash":"sha256:inline-js:scoreOne","lockfile_hash":null,"system_prompt_hash":null,"chat_template_hash":"sha256:inline-js:default","decoding":{"temperature":0,"max_tokens":512,"tier":"easy"},"digest":"sha256:7e9f1dfe479c7831a6b11092144a02bab7c52861802c40dbca2faeaec47a46d1"},"publisher":"anthropic-claude","replay":{"command":"benchlist run gsm8k --service anthropic-claude --model claude-haiku-4-5 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"042f6ba06390463db8c4dcb344a4fc29252e56fb473cef26ee2d7fe283b3f213f0c73918c2bd629eb11c8b074156843c718eea78d78b7ce8c22160a21703c307","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodology_hash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","merkle_root":"sha256:f3b8c6e237ba2bb52525f4dad3544afcfc7153e18883ad82853f39d1971fb13a","claimed_score":100,"runner_provenance":"sha256:7e9f1dfe479c7831a6b11092144a02bab7c52861802c40dbca2faeaec47a46d1"}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"042f6ba06390463db8c4dcb344a4fc29252e56fb473cef26ee2d7fe283b3f213f0c73918c2bd629eb11c8b074156843c718eea78d78b7ce8c22160a21703c307","signerAlgo":"ed25519","submittedAt":"2026-04-26T19:20:10.334Z","verifiedAt":"2026-04-26T19:20:10.334Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-54508ca4cc03","serviceId":"anthropic-claude","benchmarkId":"gsm8k","model":"claude-opus-4-7","score":100,"runs":1,"breakdown":{"n":8,"passes":8,"mean_raw":1},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodologyHash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","transcriptMerkleRoot":"sha256:d2758a26ea4d3e87c767f52aa0ec28002129218f82849fc5ade390d4923a48c0","startedAt":"2026-04-26T18:29:29.385Z","finishedAt":"2026-04-26T18:30:40.824Z","durationSeconds":71,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run gsm8k --service anthropic-claude --model claude-opus-4-7 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"2ca399f3de8e9fe22990c5310c57af681c53f9635b822b3f3d8be064f88f25ed8776e3f6d08dc0c0bf5af07b0a8df9d3d00f109f628b1d3be085b54e7d806306","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodology_hash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","merkle_root":"sha256:d2758a26ea4d3e87c767f52aa0ec28002129218f82849fc5ade390d4923a48c0","claimed_score":100}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"2ca399f3de8e9fe22990c5310c57af681c53f9635b822b3f3d8be064f88f25ed8776e3f6d08dc0c0bf5af07b0a8df9d3d00f109f628b1d3be085b54e7d806306","signerAlgo":"ed25519","submittedAt":"2026-04-26T18:30:40.824Z","verifiedAt":"2026-04-26T18:30:40.824Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-c43d3b2a3a27","serviceId":"anthropic-claude","benchmarkId":"math-500","model":"claude-opus-4-7","score":37.5,"runs":1,"breakdown":{"n":8,"passes":3,"mean_raw":0.375},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:9e86adfbb746209853106da77f29348041d67bb236dcd3cc8ad472da93058634","methodologyHash":"sha256:0002df534e2793d29cba5e29d2385b12303330f41257e1ee969eec3f3b1b5f56","transcriptMerkleRoot":"sha256:1df00b7d88954c55bae25fda5a042f0b3557f229b04e0a607716e40cc66527f9","startedAt":"2026-04-26T18:18:08.526Z","finishedAt":"2026-04-26T18:18:42.616Z","durationSeconds":34,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run math-500 --service anthropic-claude --model claude-opus-4-7 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"e87e00f2a1dc17ccda35902b938c06a2b17c0c4e6273d8ccb80cd91e869037898bf7206586ee0da499cef7b64591b925889c7d51833378b890661369ef373001","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:9e86adfbb746209853106da77f29348041d67bb236dcd3cc8ad472da93058634","methodology_hash":"sha256:0002df534e2793d29cba5e29d2385b12303330f41257e1ee969eec3f3b1b5f56","merkle_root":"sha256:1df00b7d88954c55bae25fda5a042f0b3557f229b04e0a607716e40cc66527f9","claimed_score":37.5}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"e87e00f2a1dc17ccda35902b938c06a2b17c0c4e6273d8ccb80cd91e869037898bf7206586ee0da499cef7b64591b925889c7d51833378b890661369ef373001","signerAlgo":"ed25519","submittedAt":"2026-04-26T18:18:42.616Z","verifiedAt":"2026-04-26T18:18:42.616Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-7bf8f2102974","serviceId":"anthropic-claude","benchmarkId":"gsm8k","model":"claude-haiku-4-5","score":100,"runs":1,"breakdown":{"n":8,"passes":8,"mean_raw":1},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodologyHash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","transcriptMerkleRoot":"sha256:3247ccb958cb653d123385e697fdb60d15455d1d3785803fe82f68648a034223","startedAt":"2026-04-26T18:15:47.507Z","finishedAt":"2026-04-26T18:16:40.018Z","durationSeconds":53,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run gsm8k --service anthropic-claude --model claude-haiku-4-5 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"0fffaee18055b326dce7ac065c48ebe9c5b826300daa78ba1c514d6e240b0bd7ed982c999a82535f4a803d5c828e963d24f020216ff81e886c45f64baa2bbd04","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodology_hash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","merkle_root":"sha256:3247ccb958cb653d123385e697fdb60d15455d1d3785803fe82f68648a034223","claimed_score":100}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"0fffaee18055b326dce7ac065c48ebe9c5b826300daa78ba1c514d6e240b0bd7ed982c999a82535f4a803d5c828e963d24f020216ff81e886c45f64baa2bbd04","signerAlgo":"ed25519","submittedAt":"2026-04-26T18:16:40.018Z","verifiedAt":"2026-04-26T18:16:40.018Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-da8868f446e9","serviceId":"anthropic-claude","benchmarkId":"gsm8k","model":"claude-haiku-4-5","score":100,"runs":1,"breakdown":{"n":8,"passes":8,"mean_raw":1},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodologyHash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","transcriptMerkleRoot":"sha256:a8d2f67a9ad8a8dfd3cfbd9f37a9d361439202c649f4e4abce6799e7944ecffe","startedAt":"2026-04-26T17:32:43.443Z","finishedAt":"2026-04-26T17:34:14.685Z","durationSeconds":91,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run gsm8k --service anthropic-claude --model claude-haiku-4-5 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"b315f2cc56db935fd450fea518310043e6b273b072c1674a06919945f6a387a81d751e38f549b71065d02d85672a6160a10b4b0fdc3842ae5de4089b46c3be0a","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodology_hash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","merkle_root":"sha256:a8d2f67a9ad8a8dfd3cfbd9f37a9d361439202c649f4e4abce6799e7944ecffe","claimed_score":100}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"b315f2cc56db935fd450fea518310043e6b273b072c1674a06919945f6a387a81d751e38f549b71065d02d85672a6160a10b4b0fdc3842ae5de4089b46c3be0a","signerAlgo":"ed25519","submittedAt":"2026-04-26T17:34:14.685Z","verifiedAt":"2026-04-26T17:34:14.685Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-aeaad7abf734","serviceId":"anthropic-claude","benchmarkId":"gsm8k","model":"claude-haiku-4-5","score":100,"runs":1,"breakdown":{"n":8,"passes":8,"mean_raw":1},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodologyHash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","transcriptMerkleRoot":"sha256:02560e0a1efc54b84f5c727ce04799188bb8707da9ef1ec3a6fdb8695f8fbfda","startedAt":"2026-04-26T17:22:40.967Z","finishedAt":"2026-04-26T17:24:27.960Z","durationSeconds":107,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run gsm8k --service anthropic-claude --model claude-haiku-4-5 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"590a07101b81a18b15aded934331ed3b7849998b9108233b27dd381fd03d5cfaced5b364efae9b474fd6818d96f3c475d06118750987d0942caa750bedef0803","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodology_hash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","merkle_root":"sha256:02560e0a1efc54b84f5c727ce04799188bb8707da9ef1ec3a6fdb8695f8fbfda","claimed_score":100}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"590a07101b81a18b15aded934331ed3b7849998b9108233b27dd381fd03d5cfaced5b364efae9b474fd6818d96f3c475d06118750987d0942caa750bedef0803","signerAlgo":"ed25519","submittedAt":"2026-04-26T17:24:27.960Z","verifiedAt":"2026-04-26T17:24:27.960Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-46c037d72d93","serviceId":"anthropic-claude","benchmarkId":"gsm8k","model":"claude-haiku-4-5","score":100,"runs":1,"breakdown":{"n":8,"passes":8,"mean_raw":1},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodologyHash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","transcriptMerkleRoot":"sha256:5b2a63dd3bf4a8827f1343ab3b2479bfcd5a77943764a183baf7d9d55aac87bc","startedAt":"2026-04-26T16:55:57.960Z","finishedAt":"2026-04-26T16:56:08.425Z","durationSeconds":10,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run gsm8k --service anthropic-claude --model claude-haiku-4-5 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"b4bee56f9bb063a25b82f403810629fae12b56be3db17346f630375463b67df06728d883646b860955f6c3e7cd132323202bd1cba441c5efb1b0a2ed3d0c9b05","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodology_hash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","merkle_root":"sha256:5b2a63dd3bf4a8827f1343ab3b2479bfcd5a77943764a183baf7d9d55aac87bc","claimed_score":100}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"b4bee56f9bb063a25b82f403810629fae12b56be3db17346f630375463b67df06728d883646b860955f6c3e7cd132323202bd1cba441c5efb1b0a2ed3d0c9b05","signerAlgo":"ed25519","submittedAt":"2026-04-26T16:56:08.425Z","verifiedAt":"2026-04-26T16:56:08.425Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-e711091df9bf","serviceId":"anthropic-claude","benchmarkId":"gsm8k","model":"claude-haiku-4-5","score":100,"runs":1,"breakdown":{"n":8,"passes":8,"mean_raw":1},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodologyHash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","transcriptMerkleRoot":"sha256:90ee1aba6c30e1d9c5159824856789d667fd028c5deb26736c9481ecdee12700","startedAt":"2026-04-26T16:43:01.953Z","finishedAt":"2026-04-26T16:44:14.333Z","durationSeconds":72,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run gsm8k --service anthropic-claude --model claude-haiku-4-5 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"ddfbded1ac5adef83b1892b2aedc4141fdd4c65c7a8227455eee44dda1f5cfc7c1a26e60777376e9a41bf7aa1fe70a0b4955b833a24f0700669694af666e240b","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodology_hash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","merkle_root":"sha256:90ee1aba6c30e1d9c5159824856789d667fd028c5deb26736c9481ecdee12700","claimed_score":100}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"ddfbded1ac5adef83b1892b2aedc4141fdd4c65c7a8227455eee44dda1f5cfc7c1a26e60777376e9a41bf7aa1fe70a0b4955b833a24f0700669694af666e240b","signerAlgo":"ed25519","submittedAt":"2026-04-26T16:44:14.333Z","verifiedAt":"2026-04-26T16:44:14.333Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-b0599461ccb0","serviceId":"anthropic-claude","benchmarkId":"gsm8k","model":"claude-haiku-4-5","score":100,"runs":1,"breakdown":{"n":8,"passes":8,"mean_raw":1},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodologyHash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","transcriptMerkleRoot":"sha256:59161083ae4c006961b9ff09c4b20e5570b2e059b697a2f30c1945ff72df39f8","startedAt":"2026-04-26T15:03:15.025Z","finishedAt":"2026-04-26T15:04:51.472Z","durationSeconds":96,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run gsm8k --service anthropic-claude --model claude-haiku-4-5 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"0ba179de21a44890806489c349921c084800dcbfe0bae25d22177bb7661f989c27cfb33f1d9ca9f537e82f14fdeab576b3ae7cea424ca84524e63e9d49f9da09","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodology_hash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","merkle_root":"sha256:59161083ae4c006961b9ff09c4b20e5570b2e059b697a2f30c1945ff72df39f8","claimed_score":100}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"0ba179de21a44890806489c349921c084800dcbfe0bae25d22177bb7661f989c27cfb33f1d9ca9f537e82f14fdeab576b3ae7cea424ca84524e63e9d49f9da09","signerAlgo":"ed25519","submittedAt":"2026-04-26T15:04:51.472Z","verifiedAt":"2026-04-26T15:04:51.472Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-742bac0cb537","serviceId":"anthropic-claude","benchmarkId":"gsm8k","model":"claude-haiku-4-5","score":100,"runs":1,"breakdown":{"n":8,"passes":8,"mean_raw":1},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodologyHash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","transcriptMerkleRoot":"sha256:4518d299319dd02bcebb9b4af03bae3280119f4907bd7cd5089e9febd90dc888","startedAt":"2026-04-26T15:02:40.957Z","finishedAt":"2026-04-26T15:02:51.867Z","durationSeconds":11,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run gsm8k --service anthropic-claude --model claude-haiku-4-5 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"82d471daf77c0c0c6beda142409a8291d42cc646a866fcb50d21299d9da97f7fc6eab85aa8de21a2c404c6c5858743c4588e018e20cf2e6e182df7b0213b8207","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodology_hash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","merkle_root":"sha256:4518d299319dd02bcebb9b4af03bae3280119f4907bd7cd5089e9febd90dc888","claimed_score":100}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"82d471daf77c0c0c6beda142409a8291d42cc646a866fcb50d21299d9da97f7fc6eab85aa8de21a2c404c6c5858743c4588e018e20cf2e6e182df7b0213b8207","signerAlgo":"ed25519","submittedAt":"2026-04-26T15:02:51.867Z","verifiedAt":"2026-04-26T15:02:51.867Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-local-95ad42cdd0c7","serviceId":"openrouter","benchmarkId":"math-500","model":"glm-4.7-flash-30b-q4km","score":10,"runs":1,"breakdown":{"n":50,"passes":5,"mean_raw":0.1},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:78fd61258ac0baaf1481bd291c6ed42004f95ff6302279502627fb0b9be98bd0","methodologyHash":"sha256:2d3e19ed12ebcd5f2e584cf64b353e417139647737adaef4e46e74aef90c4232","transcriptMerkleRoot":"sha256:1c19a92abfb8218a820817e258ebeda078568d7ef4d6d9b633da65f403abf380","startedAt":"2026-04-26T10:20:35Z","finishedAt":"2026-04-26T10:22:59Z","durationSeconds":144,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical math-500 sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"c49ae5c544c3ca80e644105e86f4e3ee8ee4396cba9c111e77956e4ca5b43508894a9043c00d96cd4879b495e07bedccdefdfdecf32b1c6a007a97e89e53b608","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:78fd61258ac0baaf1481bd291c6ed42004f95ff6302279502627fb0b9be98bd0","methodology_hash":"sha256:2d3e19ed12ebcd5f2e584cf64b353e417139647737adaef4e46e74aef90c4232","merkle_root":"sha256:1c19a92abfb8218a820817e258ebeda078568d7ef4d6d9b633da65f403abf380","claimed_score":10}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"c49ae5c544c3ca80e644105e86f4e3ee8ee4396cba9c111e77956e4ca5b43508894a9043c00d96cd4879b495e07bedccdefdfdecf32b1c6a007a97e89e53b608","signerAlgo":"ed25519","submittedAt":"2026-04-26T10:22:57.157Z","verifiedAt":"2026-04-26T10:22:59Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T10:22:57.157Z","storeRoute":"store-run"},{"id":"run-local-0d6aaefd5174","serviceId":"openrouter","benchmarkId":"openbookqa","model":"glm-4.7-flash-30b-q4km","score":12,"runs":1,"breakdown":{"n":50,"passes":6,"mean_raw":0.12},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:0a3bba9b686859f3e89e67cf0f4dce4276ba11e8ccd4ad7c367f98880d8b5953","methodologyHash":"sha256:ca613efb492af89b7f2f5efa61eeea602639622a37046ee5c6d9dec8b12c0a47","transcriptMerkleRoot":"sha256:418cd7f4d8aef1a4bc272eb8a48fcd9025f79c46b14ac24e7b941502e5c11310","startedAt":"2026-04-26T10:18:05Z","finishedAt":"2026-04-26T10:20:33Z","durationSeconds":148,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical openbookqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"254d81180d154657c3b47d924ab44d1295c1d4219c5ad39471256408a1885b3aca733c1a4808d53dd26e1b28cf65016b2eb9fd02d480e4a7a7d73458e49cbc02","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:0a3bba9b686859f3e89e67cf0f4dce4276ba11e8ccd4ad7c367f98880d8b5953","methodology_hash":"sha256:ca613efb492af89b7f2f5efa61eeea602639622a37046ee5c6d9dec8b12c0a47","merkle_root":"sha256:418cd7f4d8aef1a4bc272eb8a48fcd9025f79c46b14ac24e7b941502e5c11310","claimed_score":12}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"254d81180d154657c3b47d924ab44d1295c1d4219c5ad39471256408a1885b3aca733c1a4808d53dd26e1b28cf65016b2eb9fd02d480e4a7a7d73458e49cbc02","signerAlgo":"ed25519","submittedAt":"2026-04-26T10:20:31.564Z","verifiedAt":"2026-04-26T10:20:33Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T10:20:31.564Z","storeRoute":"store-run"},{"id":"run-local-a115452afce1","serviceId":"openrouter","benchmarkId":"commonsenseqa","model":"glm-4.7-flash-30b-q4km","score":22,"runs":1,"breakdown":{"n":50,"passes":11,"mean_raw":0.22},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:657c0ecfad0bd4dcbf062db3be6475df54a6d658c93b0493d4e0d3d86c4cb5bf","methodologyHash":"sha256:11c314e72c2b767f36d059911da85f213c8fa50958bc5b4e94ae94f7fb36dd77","transcriptMerkleRoot":"sha256:abcbbde0418f621a93b1f08c5e3f85ef916b8cf7f80b60a91405338f2e227db6","startedAt":"2026-04-26T10:15:38Z","finishedAt":"2026-04-26T10:18:03Z","durationSeconds":145,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical commonsenseqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"1eab5d11530271f9f10029d5bfee9e0b1b99419a63a32d0a4edcef079fa82d6ae18f6ed581c01a3db3bdf9e8f5a030de9e93409a4e0c4d5f6923b2f32092e901","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:657c0ecfad0bd4dcbf062db3be6475df54a6d658c93b0493d4e0d3d86c4cb5bf","methodology_hash":"sha256:11c314e72c2b767f36d059911da85f213c8fa50958bc5b4e94ae94f7fb36dd77","merkle_root":"sha256:abcbbde0418f621a93b1f08c5e3f85ef916b8cf7f80b60a91405338f2e227db6","claimed_score":22}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"1eab5d11530271f9f10029d5bfee9e0b1b99419a63a32d0a4edcef079fa82d6ae18f6ed581c01a3db3bdf9e8f5a030de9e93409a4e0c4d5f6923b2f32092e901","signerAlgo":"ed25519","submittedAt":"2026-04-26T10:18:01.931Z","verifiedAt":"2026-04-26T10:18:03Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T10:18:01.931Z","storeRoute":"store-run"},{"id":"run-local-9164b580b726","serviceId":"openrouter","benchmarkId":"winogrande","model":"glm-4.7-flash-30b-q4km","score":24,"runs":1,"breakdown":{"n":50,"passes":12,"mean_raw":0.24},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:6b80c06f2f42f404b65a8704af71bd8d3f2a7a85a4fbb9b23a9aa9d27ace7040","methodologyHash":"sha256:b0750456ca6269f519cb7dd4bbda66aa80a3806e9505189914584546305b2eaa","transcriptMerkleRoot":"sha256:b1d3992d71b93470447f03d8e85003350eb74527696a321d83c43cb7622d4e22","startedAt":"2026-04-26T10:13:22Z","finishedAt":"2026-04-26T10:15:37Z","durationSeconds":135,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical winogrande sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"337fe41094da9391c1375ac1cc59d7089307814356624572f7e45923a17e005326242fecec4f6957f0129c27dd736e95ce137c2cb0f462a899303a531dee130b","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:6b80c06f2f42f404b65a8704af71bd8d3f2a7a85a4fbb9b23a9aa9d27ace7040","methodology_hash":"sha256:b0750456ca6269f519cb7dd4bbda66aa80a3806e9505189914584546305b2eaa","merkle_root":"sha256:b1d3992d71b93470447f03d8e85003350eb74527696a321d83c43cb7622d4e22","claimed_score":24}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"337fe41094da9391c1375ac1cc59d7089307814356624572f7e45923a17e005326242fecec4f6957f0129c27dd736e95ce137c2cb0f462a899303a531dee130b","signerAlgo":"ed25519","submittedAt":"2026-04-26T10:15:35.176Z","verifiedAt":"2026-04-26T10:15:37Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T10:15:35.176Z","storeRoute":"store-run"},{"id":"run-local-83332332e1fb","serviceId":"openrouter","benchmarkId":"arc-challenge","model":"glm-4.7-flash-30b-q4km","score":16,"runs":1,"breakdown":{"n":50,"passes":8,"mean_raw":0.16},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:144e9a13fb369f31007fffdcf4d7d55692677b409c8fa7b7dec4328c81a55752","methodologyHash":"sha256:8e84e6ffec11c082a286373b8b306600732cdf99b514079bfc0754fe4cd7a7c5","transcriptMerkleRoot":"sha256:6e7fa2166186bf7140273fdd97f6f998583e56f623024e36522d0a3a83924516","startedAt":"2026-04-26T10:11:05Z","finishedAt":"2026-04-26T10:13:20Z","durationSeconds":135,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical arc-challenge sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"b1ed9cebe82f92c6bfac1069693ef88681d19f7c803c921ead575ea53fb1f489d6118a76077a4d066beb7146e1a413865dfbf6f49d4237ed22fa3566b8905302","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:144e9a13fb369f31007fffdcf4d7d55692677b409c8fa7b7dec4328c81a55752","methodology_hash":"sha256:8e84e6ffec11c082a286373b8b306600732cdf99b514079bfc0754fe4cd7a7c5","merkle_root":"sha256:6e7fa2166186bf7140273fdd97f6f998583e56f623024e36522d0a3a83924516","claimed_score":16}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"b1ed9cebe82f92c6bfac1069693ef88681d19f7c803c921ead575ea53fb1f489d6118a76077a4d066beb7146e1a413865dfbf6f49d4237ed22fa3566b8905302","signerAlgo":"ed25519","submittedAt":"2026-04-26T10:13:19.381Z","verifiedAt":"2026-04-26T10:13:20Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T10:13:19.381Z","storeRoute":"store-run"},{"id":"run-local-2cc237e6a12f","serviceId":"openrouter","benchmarkId":"mmlu-pro","model":"glm-4.7-flash-30b-q4km","score":0,"runs":1,"breakdown":{"n":50,"passes":0,"mean_raw":0},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:c4ff0432d6539908d6b09b6ceac8693877afbd15a62806db219216507a578bdd","methodologyHash":"sha256:68cb69d8b9a5e40f78f9918e9ab57581115783d514402f7978180dac87834446","transcriptMerkleRoot":"sha256:a3a6cadfda3316bbb4c3b8fa162dcc85263e7a91c570fe1151e7db4e12fc10b1","startedAt":"2026-04-26T10:08:43Z","finishedAt":"2026-04-26T10:11:03Z","durationSeconds":140,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical mmlu-pro sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"9adaa5a74964f35c34ac81636da1226faef7f861a061ef05d001abedeffb29bdbe9b8f2f46a39f9be6f95db7ec9685af6e15220854a69b91cc48703d07798b0d","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:c4ff0432d6539908d6b09b6ceac8693877afbd15a62806db219216507a578bdd","methodology_hash":"sha256:68cb69d8b9a5e40f78f9918e9ab57581115783d514402f7978180dac87834446","merkle_root":"sha256:a3a6cadfda3316bbb4c3b8fa162dcc85263e7a91c570fe1151e7db4e12fc10b1","claimed_score":0}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"9adaa5a74964f35c34ac81636da1226faef7f861a061ef05d001abedeffb29bdbe9b8f2f46a39f9be6f95db7ec9685af6e15220854a69b91cc48703d07798b0d","signerAlgo":"ed25519","submittedAt":"2026-04-26T10:11:01.736Z","verifiedAt":"2026-04-26T10:11:03Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T10:11:01.736Z","storeRoute":"store-run"},{"id":"run-local-2d1f8ab358c4","serviceId":"openrouter","benchmarkId":"gsm8k","model":"glm-4.7-flash-30b-q4km","score":6,"runs":1,"breakdown":{"n":50,"passes":3,"mean_raw":0.06},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:998472abf27962d5141a96e256c87871228b99a8df79ff8c7a9a61d707a0b4b5","methodologyHash":"sha256:8618a36988294dfc7ba4dbff4fdd15f101f6a48ed51a7d0390ddb466f8b50a99","transcriptMerkleRoot":"sha256:9ac0de9ab0e8dcca7a8a94ed2460674f34e90546d6cfee218c3f1cc5e3a0a9cd","startedAt":"2026-04-26T10:06:05Z","finishedAt":"2026-04-26T10:08:41Z","durationSeconds":156,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical gsm8k sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"20ac2f671d6255352ee0e4a10c455f8db09a54e582050dd5e93c6496d2741769f287b279968354dcdf1a6796417282309c120755db007aa7a46907d71320ff07","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:998472abf27962d5141a96e256c87871228b99a8df79ff8c7a9a61d707a0b4b5","methodology_hash":"sha256:8618a36988294dfc7ba4dbff4fdd15f101f6a48ed51a7d0390ddb466f8b50a99","merkle_root":"sha256:9ac0de9ab0e8dcca7a8a94ed2460674f34e90546d6cfee218c3f1cc5e3a0a9cd","claimed_score":6}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"20ac2f671d6255352ee0e4a10c455f8db09a54e582050dd5e93c6496d2741769f287b279968354dcdf1a6796417282309c120755db007aa7a46907d71320ff07","signerAlgo":"ed25519","submittedAt":"2026-04-26T10:08:39.935Z","verifiedAt":"2026-04-26T10:08:41Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T10:08:39.935Z","storeRoute":"store-run"},{"id":"run-local-3f7c0c287419","serviceId":"openrouter","benchmarkId":"math-500","model":"qwen3.6-27b-dense-q5km","score":12,"runs":1,"breakdown":{"n":50,"passes":6,"mean_raw":0.12},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:78fd61258ac0baaf1481bd291c6ed42004f95ff6302279502627fb0b9be98bd0","methodologyHash":"sha256:2d3e19ed12ebcd5f2e584cf64b353e417139647737adaef4e46e74aef90c4232","transcriptMerkleRoot":"sha256:5124c2527b5307f63c0dba40f5436b32b12780c6987f48b83a4b11e6df0a707c","startedAt":"2026-04-26T09:46:28Z","finishedAt":"2026-04-26T10:06:03Z","durationSeconds":1175,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-27b-dense-q5km  # via _local_runner.py against the canonical math-500 sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"2d1cbb1c89dbdbeb35b9a48f93a3fb603e835e366056c05543069f35f9d9fdb240dd13201a0105734a21191b81ed109be3fb601ac0e4150fb293ac30168b0302","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:78fd61258ac0baaf1481bd291c6ed42004f95ff6302279502627fb0b9be98bd0","methodology_hash":"sha256:2d3e19ed12ebcd5f2e584cf64b353e417139647737adaef4e46e74aef90c4232","merkle_root":"sha256:5124c2527b5307f63c0dba40f5436b32b12780c6987f48b83a4b11e6df0a707c","claimed_score":12}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"2d1cbb1c89dbdbeb35b9a48f93a3fb603e835e366056c05543069f35f9d9fdb240dd13201a0105734a21191b81ed109be3fb601ac0e4150fb293ac30168b0302","signerAlgo":"ed25519","submittedAt":"2026-04-26T10:06:01.684Z","verifiedAt":"2026-04-26T10:06:03Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-27b-dense-q5km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T10:06:01.684Z","storeRoute":"store-run"},{"id":"run-local-0b2611c0de7a","serviceId":"openrouter","benchmarkId":"openbookqa","model":"qwen3.6-27b-dense-q5km","score":86,"runs":1,"breakdown":{"n":50,"passes":43,"mean_raw":0.86},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:0a3bba9b686859f3e89e67cf0f4dce4276ba11e8ccd4ad7c367f98880d8b5953","methodologyHash":"sha256:ca613efb492af89b7f2f5efa61eeea602639622a37046ee5c6d9dec8b12c0a47","transcriptMerkleRoot":"sha256:47048a44a6425ede50ea086f88aa5e2a14e66c3aa143d0ffd060a7da48068914","startedAt":"2026-04-26T09:36:06Z","finishedAt":"2026-04-26T09:46:26Z","durationSeconds":620,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-27b-dense-q5km  # via _local_runner.py against the canonical openbookqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"79690fb702249aed70a10bcbe8be6a329dc247bfb190ab3f0f2fbd19a41c1642fd11a5598c1893dc6b28f434eb0b0b40a93b2a8e419d25458ea79511d1b86102","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:0a3bba9b686859f3e89e67cf0f4dce4276ba11e8ccd4ad7c367f98880d8b5953","methodology_hash":"sha256:ca613efb492af89b7f2f5efa61eeea602639622a37046ee5c6d9dec8b12c0a47","merkle_root":"sha256:47048a44a6425ede50ea086f88aa5e2a14e66c3aa143d0ffd060a7da48068914","claimed_score":86}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"79690fb702249aed70a10bcbe8be6a329dc247bfb190ab3f0f2fbd19a41c1642fd11a5598c1893dc6b28f434eb0b0b40a93b2a8e419d25458ea79511d1b86102","signerAlgo":"ed25519","submittedAt":"2026-04-26T09:46:25.100Z","verifiedAt":"2026-04-26T09:46:26Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-27b-dense-q5km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T09:46:25.100Z","storeRoute":"store-run"},{"id":"run-local-185a189252fb","serviceId":"openrouter","benchmarkId":"commonsenseqa","model":"qwen3.6-27b-dense-q5km","score":72,"runs":1,"breakdown":{"n":50,"passes":36,"mean_raw":0.72},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:657c0ecfad0bd4dcbf062db3be6475df54a6d658c93b0493d4e0d3d86c4cb5bf","methodologyHash":"sha256:11c314e72c2b767f36d059911da85f213c8fa50958bc5b4e94ae94f7fb36dd77","transcriptMerkleRoot":"sha256:33845b460b40ed1db7a0786d0256106dfe7fa3fa34382acf5e1eda4eea21811c","startedAt":"2026-04-26T09:21:47Z","finishedAt":"2026-04-26T09:36:04Z","durationSeconds":857,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-27b-dense-q5km  # via _local_runner.py against the canonical commonsenseqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"0ed5b3b3077cba05fb2afb9414aa826856f22c78faac1349c399f27ee700408398930e508f5e9d4cc73bb172ee430b35cb5c64c34522c1ab9be00f81009b160f","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:657c0ecfad0bd4dcbf062db3be6475df54a6d658c93b0493d4e0d3d86c4cb5bf","methodology_hash":"sha256:11c314e72c2b767f36d059911da85f213c8fa50958bc5b4e94ae94f7fb36dd77","merkle_root":"sha256:33845b460b40ed1db7a0786d0256106dfe7fa3fa34382acf5e1eda4eea21811c","claimed_score":72}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"0ed5b3b3077cba05fb2afb9414aa826856f22c78faac1349c399f27ee700408398930e508f5e9d4cc73bb172ee430b35cb5c64c34522c1ab9be00f81009b160f","signerAlgo":"ed25519","submittedAt":"2026-04-26T09:36:03.172Z","verifiedAt":"2026-04-26T09:36:04Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-27b-dense-q5km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T09:36:03.172Z","storeRoute":"store-run"},{"id":"run-local-48a7d3496eb1","serviceId":"openrouter","benchmarkId":"winogrande","model":"qwen3.6-27b-dense-q5km","score":70,"runs":1,"breakdown":{"n":50,"passes":35,"mean_raw":0.7},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:6b80c06f2f42f404b65a8704af71bd8d3f2a7a85a4fbb9b23a9aa9d27ace7040","methodologyHash":"sha256:b0750456ca6269f519cb7dd4bbda66aa80a3806e9505189914584546305b2eaa","transcriptMerkleRoot":"sha256:214f70ee4634582f5c6da8a42c73bb615ca46cc5055bf4b0a346adef39c5fa9a","startedAt":"2026-04-26T09:03:30Z","finishedAt":"2026-04-26T09:21:45Z","durationSeconds":1095,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-27b-dense-q5km  # via _local_runner.py against the canonical winogrande sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"5918f0868ea17de1b1d135636ddb2f9157681ae691f3f89107274515db9152ec7162b76ac2f4bd12b1c9df31cbe21b15f4cef803617d33c7f2f98b258d151f07","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:6b80c06f2f42f404b65a8704af71bd8d3f2a7a85a4fbb9b23a9aa9d27ace7040","methodology_hash":"sha256:b0750456ca6269f519cb7dd4bbda66aa80a3806e9505189914584546305b2eaa","merkle_root":"sha256:214f70ee4634582f5c6da8a42c73bb615ca46cc5055bf4b0a346adef39c5fa9a","claimed_score":70}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"5918f0868ea17de1b1d135636ddb2f9157681ae691f3f89107274515db9152ec7162b76ac2f4bd12b1c9df31cbe21b15f4cef803617d33c7f2f98b258d151f07","signerAlgo":"ed25519","submittedAt":"2026-04-26T09:21:44.461Z","verifiedAt":"2026-04-26T09:21:45Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-27b-dense-q5km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T09:21:44.461Z","storeRoute":"store-run"},{"id":"run-local-5094fe8eaa1b","serviceId":"openrouter","benchmarkId":"arc-challenge","model":"qwen3.6-27b-dense-q5km","score":68,"runs":1,"breakdown":{"n":50,"passes":34,"mean_raw":0.68},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:144e9a13fb369f31007fffdcf4d7d55692677b409c8fa7b7dec4328c81a55752","methodologyHash":"sha256:8e84e6ffec11c082a286373b8b306600732cdf99b514079bfc0754fe4cd7a7c5","transcriptMerkleRoot":"sha256:a69857ecdd48516bcdbba7d9b0a4fb7da52d144c48971505b73a3b39a173dadd","startedAt":"2026-04-26T08:49:55Z","finishedAt":"2026-04-26T09:03:28Z","durationSeconds":813,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-27b-dense-q5km  # via _local_runner.py against the canonical arc-challenge sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"6f430f5f09e765b0c80c3eb1355059e12a20c64ebd42a31dd7ae9b2ae5cae84942c508aea816dd1bd7c277a2095a18d57147eccbfe64cf49a766b69c3eaf4207","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:144e9a13fb369f31007fffdcf4d7d55692677b409c8fa7b7dec4328c81a55752","methodology_hash":"sha256:8e84e6ffec11c082a286373b8b306600732cdf99b514079bfc0754fe4cd7a7c5","merkle_root":"sha256:a69857ecdd48516bcdbba7d9b0a4fb7da52d144c48971505b73a3b39a173dadd","claimed_score":68}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"6f430f5f09e765b0c80c3eb1355059e12a20c64ebd42a31dd7ae9b2ae5cae84942c508aea816dd1bd7c277a2095a18d57147eccbfe64cf49a766b69c3eaf4207","signerAlgo":"ed25519","submittedAt":"2026-04-26T09:03:27.456Z","verifiedAt":"2026-04-26T09:03:28Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-27b-dense-q5km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T09:03:27.456Z","storeRoute":"store-run"},{"id":"run-local-72063a6aee5f","serviceId":"openrouter","benchmarkId":"mmlu-pro","model":"qwen3.6-27b-dense-q5km","score":22,"runs":1,"breakdown":{"n":50,"passes":11,"mean_raw":0.22},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:c4ff0432d6539908d6b09b6ceac8693877afbd15a62806db219216507a578bdd","methodologyHash":"sha256:68cb69d8b9a5e40f78f9918e9ab57581115783d514402f7978180dac87834446","transcriptMerkleRoot":"sha256:051366d9643aa540e8f5b400c5aecca53ac6c61813c8d9fa381c3347f191af70","startedAt":"2026-04-26T08:30:47Z","finishedAt":"2026-04-26T08:49:52Z","durationSeconds":1145,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-27b-dense-q5km  # via _local_runner.py against the canonical mmlu-pro sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"cc4d6297973a4a520783e8ec872939e213fa9d726111edcaddbaf3b91acfe9e0fb4c041b5669a51e0aeed5d7b5c808657619629319ed4ac34f2b49fa19b4940c","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:c4ff0432d6539908d6b09b6ceac8693877afbd15a62806db219216507a578bdd","methodology_hash":"sha256:68cb69d8b9a5e40f78f9918e9ab57581115783d514402f7978180dac87834446","merkle_root":"sha256:051366d9643aa540e8f5b400c5aecca53ac6c61813c8d9fa381c3347f191af70","claimed_score":22}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"cc4d6297973a4a520783e8ec872939e213fa9d726111edcaddbaf3b91acfe9e0fb4c041b5669a51e0aeed5d7b5c808657619629319ed4ac34f2b49fa19b4940c","signerAlgo":"ed25519","submittedAt":"2026-04-26T08:49:51.606Z","verifiedAt":"2026-04-26T08:49:52Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-27b-dense-q5km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T08:49:51.606Z","storeRoute":"store-run"},{"id":"run-df07f5028fbc","serviceId":"anthropic-claude","benchmarkId":"mmlu","model":"claude-opus-4.7","score":100,"runs":1,"breakdown":{"n":8,"passes":8,"mean_raw":1},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:05ef744f592cd2481092a6ecdecbccaf5e515f6ac2be7d5fc77ad85b8165f15c","methodologyHash":"sha256:f65dba1e549ab81ea004be624791ae7b7b3e784648c0cb2ce84b8bf930bb0457","transcriptMerkleRoot":"sha256:7035cfd5d4fc19ac5eab731a6906ba3e7d1ca7b86b976ad0af37523097e35be7","startedAt":"2026-04-26T08:37:38.425Z","finishedAt":"2026-04-26T08:38:19.783Z","durationSeconds":41,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run mmlu --service anthropic-claude --model claude-opus-4.7 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"a6828a3772b04e4b67ac683c13e7d96499f56d97dad0c14c935686bc7a9265cd4e4c515885fdadaf5b10e015c3d632d75d3dcb2e086dc161286f3cc994d3860e","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:05ef744f592cd2481092a6ecdecbccaf5e515f6ac2be7d5fc77ad85b8165f15c","methodology_hash":"sha256:f65dba1e549ab81ea004be624791ae7b7b3e784648c0cb2ce84b8bf930bb0457","merkle_root":"sha256:7035cfd5d4fc19ac5eab731a6906ba3e7d1ca7b86b976ad0af37523097e35be7","claimed_score":100}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"a6828a3772b04e4b67ac683c13e7d96499f56d97dad0c14c935686bc7a9265cd4e4c515885fdadaf5b10e015c3d632d75d3dcb2e086dc161286f3cc994d3860e","signerAlgo":"ed25519","submittedAt":"2026-04-26T08:38:19.783Z","verifiedAt":"2026-04-26T08:38:19.783Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-71345cab5274","serviceId":"anthropic-claude","benchmarkId":"mmlu","model":"claude-haiku-4.5","score":87.5,"runs":1,"breakdown":{"n":8,"passes":7,"mean_raw":0.875},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:05ef744f592cd2481092a6ecdecbccaf5e515f6ac2be7d5fc77ad85b8165f15c","methodologyHash":"sha256:f65dba1e549ab81ea004be624791ae7b7b3e784648c0cb2ce84b8bf930bb0457","transcriptMerkleRoot":"sha256:6a06687deb4e771ef77943cde919002b8d27b27ab6ce8d20cef1bdeced61dbd4","startedAt":"2026-04-26T08:37:36.799Z","finishedAt":"2026-04-26T08:38:10.075Z","durationSeconds":33,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run mmlu --service anthropic-claude --model claude-haiku-4.5 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"3472ab4b892ec00c3309b3e73fc456e6366d8a5f7f544f94425c7a1614bdf12bb7c0b11ec3623188aa5b9589db7db2de183c14d57e5236868e819a5479b30706","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:05ef744f592cd2481092a6ecdecbccaf5e515f6ac2be7d5fc77ad85b8165f15c","methodology_hash":"sha256:f65dba1e549ab81ea004be624791ae7b7b3e784648c0cb2ce84b8bf930bb0457","merkle_root":"sha256:6a06687deb4e771ef77943cde919002b8d27b27ab6ce8d20cef1bdeced61dbd4","claimed_score":87.5}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"3472ab4b892ec00c3309b3e73fc456e6366d8a5f7f544f94425c7a1614bdf12bb7c0b11ec3623188aa5b9589db7db2de183c14d57e5236868e819a5479b30706","signerAlgo":"ed25519","submittedAt":"2026-04-26T08:38:10.075Z","verifiedAt":"2026-04-26T08:38:10.075Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-920a62aadab5","serviceId":"anthropic-claude","benchmarkId":"mmlu","model":"claude-sonnet-4.5","score":62.5,"runs":1,"breakdown":{"n":8,"passes":5,"mean_raw":0.625},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:05ef744f592cd2481092a6ecdecbccaf5e515f6ac2be7d5fc77ad85b8165f15c","methodologyHash":"sha256:f65dba1e549ab81ea004be624791ae7b7b3e784648c0cb2ce84b8bf930bb0457","transcriptMerkleRoot":"sha256:8450e2b4d565c0bc39dcb1f3bc972c49bfeb756b115593b21b3c8cf5fd1eccff","startedAt":"2026-04-26T08:37:34.940Z","finishedAt":"2026-04-26T08:37:49.046Z","durationSeconds":14,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run mmlu --service anthropic-claude --model claude-sonnet-4.5 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"dac61c24204d49d4eaca4aa5249959a67d8cd9545ce7f1520d73a7c509018d8a0e611af1289cffbecf9382a0ee03bbf4d7b5a60998c1c74e0c9f1c60b1a9280f","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:05ef744f592cd2481092a6ecdecbccaf5e515f6ac2be7d5fc77ad85b8165f15c","methodology_hash":"sha256:f65dba1e549ab81ea004be624791ae7b7b3e784648c0cb2ce84b8bf930bb0457","merkle_root":"sha256:8450e2b4d565c0bc39dcb1f3bc972c49bfeb756b115593b21b3c8cf5fd1eccff","claimed_score":62.5}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"dac61c24204d49d4eaca4aa5249959a67d8cd9545ce7f1520d73a7c509018d8a0e611af1289cffbecf9382a0ee03bbf4d7b5a60998c1c74e0c9f1c60b1a9280f","signerAlgo":"ed25519","submittedAt":"2026-04-26T08:37:49.046Z","verifiedAt":"2026-04-26T08:37:49.046Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-local-4c80350d4337","serviceId":"openrouter","benchmarkId":"gsm8k","model":"qwen3.6-27b-dense-q5km","score":40,"runs":1,"breakdown":{"n":50,"passes":20,"mean_raw":0.4},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:998472abf27962d5141a96e256c87871228b99a8df79ff8c7a9a61d707a0b4b5","methodologyHash":"sha256:8618a36988294dfc7ba4dbff4fdd15f101f6a48ed51a7d0390ddb466f8b50a99","transcriptMerkleRoot":"sha256:b69b29f817dfa044673e3f439d59d8e656ba04617cfda0d9b56044ff1debd952","startedAt":"2026-04-26T08:18:58Z","finishedAt":"2026-04-26T08:30:45Z","durationSeconds":707,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-27b-dense-q5km  # via _local_runner.py against the canonical gsm8k sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"f18b684ec6d19409080a8fb278afcdda2b33eb6f6d83bd51c46e1e7ccfa85091d47e1a1e3a6707411529bbe2bfb043cc457f173d2e2b0183fb87b7fd4e623804","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:998472abf27962d5141a96e256c87871228b99a8df79ff8c7a9a61d707a0b4b5","methodology_hash":"sha256:8618a36988294dfc7ba4dbff4fdd15f101f6a48ed51a7d0390ddb466f8b50a99","merkle_root":"sha256:b69b29f817dfa044673e3f439d59d8e656ba04617cfda0d9b56044ff1debd952","claimed_score":40}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"f18b684ec6d19409080a8fb278afcdda2b33eb6f6d83bd51c46e1e7ccfa85091d47e1a1e3a6707411529bbe2bfb043cc457f173d2e2b0183fb87b7fd4e623804","signerAlgo":"ed25519","submittedAt":"2026-04-26T08:30:44.186Z","verifiedAt":"2026-04-26T08:30:45Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-27b-dense-q5km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T08:30:44.186Z","storeRoute":"store-run"},{"id":"run-a8a177251ee6","serviceId":"anthropic-claude","benchmarkId":"truthfulqa","model":"claude-opus-4.7","score":100,"runs":1,"breakdown":{"n":8,"passes":8,"mean_raw":1},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:1d3a9406678cb49569834ab0a185eb50e97f8072798c1f6afd73f76a77d5f75d","methodologyHash":"sha256:7f598216d03d1e165c16eba8a94bcf4814bc61513eee4a9620f97110bed29d31","transcriptMerkleRoot":"sha256:5d6d13182af2ab69d972269d50963f646ad705ff06fe52ddfd04e8b5935bd5a5","startedAt":"2026-04-26T08:21:33.699Z","finishedAt":"2026-04-26T08:22:24.333Z","durationSeconds":51,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run truthfulqa --service anthropic-claude --model claude-opus-4.7 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"36ee32c3d78de536c6fc4ee5303e28fbe5e11789892aaf5384dc689f01dd95d4aa575159bc945e5fd42821a5d36dac22b27e1aa59a9a3c1c3dc662744db89a00","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:1d3a9406678cb49569834ab0a185eb50e97f8072798c1f6afd73f76a77d5f75d","methodology_hash":"sha256:7f598216d03d1e165c16eba8a94bcf4814bc61513eee4a9620f97110bed29d31","merkle_root":"sha256:5d6d13182af2ab69d972269d50963f646ad705ff06fe52ddfd04e8b5935bd5a5","claimed_score":100}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"36ee32c3d78de536c6fc4ee5303e28fbe5e11789892aaf5384dc689f01dd95d4aa575159bc945e5fd42821a5d36dac22b27e1aa59a9a3c1c3dc662744db89a00","signerAlgo":"ed25519","submittedAt":"2026-04-26T08:22:24.333Z","verifiedAt":"2026-04-26T08:22:24.333Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-384c77e7afde","serviceId":"anthropic-claude","benchmarkId":"truthfulqa","model":"claude-haiku-4.5","score":100,"runs":1,"breakdown":{"n":8,"passes":8,"mean_raw":1},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:1d3a9406678cb49569834ab0a185eb50e97f8072798c1f6afd73f76a77d5f75d","methodologyHash":"sha256:7f598216d03d1e165c16eba8a94bcf4814bc61513eee4a9620f97110bed29d31","transcriptMerkleRoot":"sha256:39b281ff878c7f91ceda49f9e1fe1d00e748780b317ffd39a88d9760ae3eea51","startedAt":"2026-04-26T08:21:32.287Z","finishedAt":"2026-04-26T08:22:04.469Z","durationSeconds":32,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run truthfulqa --service anthropic-claude --model claude-haiku-4.5 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"74b960acac33b24d61c5e0fd4dae8da0215cf6f9039f4abfe03ae05d13791d1dda967b9f4992a5fab54562467b932a118ef661e4056b84ba1c5b5c4ee99b360d","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:1d3a9406678cb49569834ab0a185eb50e97f8072798c1f6afd73f76a77d5f75d","methodology_hash":"sha256:7f598216d03d1e165c16eba8a94bcf4814bc61513eee4a9620f97110bed29d31","merkle_root":"sha256:39b281ff878c7f91ceda49f9e1fe1d00e748780b317ffd39a88d9760ae3eea51","claimed_score":100}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"74b960acac33b24d61c5e0fd4dae8da0215cf6f9039f4abfe03ae05d13791d1dda967b9f4992a5fab54562467b932a118ef661e4056b84ba1c5b5c4ee99b360d","signerAlgo":"ed25519","submittedAt":"2026-04-26T08:22:04.469Z","verifiedAt":"2026-04-26T08:22:04.469Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-038bc5b6052e","serviceId":"anthropic-claude","benchmarkId":"truthfulqa","model":"claude-sonnet-4.5","score":87.5,"runs":1,"breakdown":{"n":8,"passes":7,"mean_raw":0.875},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:1d3a9406678cb49569834ab0a185eb50e97f8072798c1f6afd73f76a77d5f75d","methodologyHash":"sha256:7f598216d03d1e165c16eba8a94bcf4814bc61513eee4a9620f97110bed29d31","transcriptMerkleRoot":"sha256:c2c9878c54785069313b476ac502d466397a08f4f0ae155dcb3da6a6e3503a01","startedAt":"2026-04-26T08:21:30.639Z","finishedAt":"2026-04-26T08:21:43.570Z","durationSeconds":13,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run truthfulqa --service anthropic-claude --model claude-sonnet-4.5 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"c742ec1c3b40d74a359a68b9aaf1ef85ac805fd0b0143274dcbf06ff9b170022ac4083be149ac3fb44b8a39076daf5045dc4d0cfc0898d26f15f737fb721830d","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:1d3a9406678cb49569834ab0a185eb50e97f8072798c1f6afd73f76a77d5f75d","methodology_hash":"sha256:7f598216d03d1e165c16eba8a94bcf4814bc61513eee4a9620f97110bed29d31","merkle_root":"sha256:c2c9878c54785069313b476ac502d466397a08f4f0ae155dcb3da6a6e3503a01","claimed_score":87.5}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"c742ec1c3b40d74a359a68b9aaf1ef85ac805fd0b0143274dcbf06ff9b170022ac4083be149ac3fb44b8a39076daf5045dc4d0cfc0898d26f15f737fb721830d","signerAlgo":"ed25519","submittedAt":"2026-04-26T08:21:43.570Z","verifiedAt":"2026-04-26T08:21:43.570Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-local-315ec64d60ff","serviceId":"openrouter","benchmarkId":"math-500","model":"llama3-8b-q40","score":12,"runs":1,"breakdown":{"n":50,"passes":6,"mean_raw":0.12},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:78fd61258ac0baaf1481bd291c6ed42004f95ff6302279502627fb0b9be98bd0","methodologyHash":"sha256:2d3e19ed12ebcd5f2e584cf64b353e417139647737adaef4e46e74aef90c4232","transcriptMerkleRoot":"sha256:37c88ae3203c65e732b7b5186c4b6069f8b68497886b115ffbc2f75a98ff9c98","startedAt":"2026-04-26T08:16:28Z","finishedAt":"2026-04-26T08:18:56Z","durationSeconds":148,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run llama3-8b-q40  # via _local_runner.py against the canonical math-500 sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"15d56fbea9f5a15876e3d3bca04d14404e6fa4fcfe2c630effd14a357ad283f602dbe6ae7cf55de172c2af5bb13bbb8539bfc0a404f78d48b533da3288e83305","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:78fd61258ac0baaf1481bd291c6ed42004f95ff6302279502627fb0b9be98bd0","methodology_hash":"sha256:2d3e19ed12ebcd5f2e584cf64b353e417139647737adaef4e46e74aef90c4232","merkle_root":"sha256:37c88ae3203c65e732b7b5186c4b6069f8b68497886b115ffbc2f75a98ff9c98","claimed_score":12}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"15d56fbea9f5a15876e3d3bca04d14404e6fa4fcfe2c630effd14a357ad283f602dbe6ae7cf55de172c2af5bb13bbb8539bfc0a404f78d48b533da3288e83305","signerAlgo":"ed25519","submittedAt":"2026-04-26T08:18:55.064Z","verifiedAt":"2026-04-26T08:18:56Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: llama3-8b-q40."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T08:18:55.064Z","storeRoute":"store-run"},{"id":"run-local-3c9c3d23a2a6","serviceId":"openrouter","benchmarkId":"openbookqa","model":"llama3-8b-q40","score":68,"runs":1,"breakdown":{"n":50,"passes":34,"mean_raw":0.68},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:0a3bba9b686859f3e89e67cf0f4dce4276ba11e8ccd4ad7c367f98880d8b5953","methodologyHash":"sha256:ca613efb492af89b7f2f5efa61eeea602639622a37046ee5c6d9dec8b12c0a47","transcriptMerkleRoot":"sha256:6d28ac5bac459f3c9f31ddbab87d6ddabedf330420b8470db720f055935481d6","startedAt":"2026-04-26T08:14:38Z","finishedAt":"2026-04-26T08:16:26Z","durationSeconds":108,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run llama3-8b-q40  # via _local_runner.py against the canonical openbookqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"c1a6d91851e01a96d190b904c7d8787dd5bcf2c6da82df3a10aa7ed3288a4c8f52fa95b75cf6cc9dff97a9da8997097971446024009dd5d5e9a6626274894b06","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:0a3bba9b686859f3e89e67cf0f4dce4276ba11e8ccd4ad7c367f98880d8b5953","methodology_hash":"sha256:ca613efb492af89b7f2f5efa61eeea602639622a37046ee5c6d9dec8b12c0a47","merkle_root":"sha256:6d28ac5bac459f3c9f31ddbab87d6ddabedf330420b8470db720f055935481d6","claimed_score":68}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"c1a6d91851e01a96d190b904c7d8787dd5bcf2c6da82df3a10aa7ed3288a4c8f52fa95b75cf6cc9dff97a9da8997097971446024009dd5d5e9a6626274894b06","signerAlgo":"ed25519","submittedAt":"2026-04-26T08:16:24.946Z","verifiedAt":"2026-04-26T08:16:26Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: llama3-8b-q40."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T08:16:24.946Z","storeRoute":"store-run"},{"id":"run-local-362f4618430d","serviceId":"openrouter","benchmarkId":"commonsenseqa","model":"llama3-8b-q40","score":62,"runs":1,"breakdown":{"n":50,"passes":31,"mean_raw":0.62},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:657c0ecfad0bd4dcbf062db3be6475df54a6d658c93b0493d4e0d3d86c4cb5bf","methodologyHash":"sha256:11c314e72c2b767f36d059911da85f213c8fa50958bc5b4e94ae94f7fb36dd77","transcriptMerkleRoot":"sha256:e60a0d67c84be6f98ad1974db83cb5995b612c0df334949e69d37d7ba86d8283","startedAt":"2026-04-26T08:12:46Z","finishedAt":"2026-04-26T08:14:35Z","durationSeconds":109,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run llama3-8b-q40  # via _local_runner.py against the canonical commonsenseqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"98ea853ac4c2010a3fb0c887355a9fc5cfb017b39647ab498a526a2ef36617263c9b3ae6800993b447c5099f24c11380a325af728cd7a220bdc8f6e281377b04","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:657c0ecfad0bd4dcbf062db3be6475df54a6d658c93b0493d4e0d3d86c4cb5bf","methodology_hash":"sha256:11c314e72c2b767f36d059911da85f213c8fa50958bc5b4e94ae94f7fb36dd77","merkle_root":"sha256:e60a0d67c84be6f98ad1974db83cb5995b612c0df334949e69d37d7ba86d8283","claimed_score":62}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"98ea853ac4c2010a3fb0c887355a9fc5cfb017b39647ab498a526a2ef36617263c9b3ae6800993b447c5099f24c11380a325af728cd7a220bdc8f6e281377b04","signerAlgo":"ed25519","submittedAt":"2026-04-26T08:14:34.643Z","verifiedAt":"2026-04-26T08:14:35Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: llama3-8b-q40."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T08:14:34.643Z","storeRoute":"store-run"},{"id":"run-local-13eeae71e9f9","serviceId":"openrouter","benchmarkId":"winogrande","model":"llama3-8b-q40","score":36,"runs":1,"breakdown":{"n":50,"passes":18,"mean_raw":0.36},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:6b80c06f2f42f404b65a8704af71bd8d3f2a7a85a4fbb9b23a9aa9d27ace7040","methodologyHash":"sha256:b0750456ca6269f519cb7dd4bbda66aa80a3806e9505189914584546305b2eaa","transcriptMerkleRoot":"sha256:6ede8320d525d041615b1c11a08b71df0330c4540cc39760648e3ff6cdda1fa3","startedAt":"2026-04-26T08:10:54Z","finishedAt":"2026-04-26T08:12:43Z","durationSeconds":109,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run llama3-8b-q40  # via _local_runner.py against the canonical winogrande sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"d44995fc8a25eb5cec9e9ac129c6e0ddca51c669dac720b736a4404f7b9708f4f92ab6b90bdfabdb49a34f3bc3ec7f026289ad4b918a0adaa037a3f4bdfb0e03","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:6b80c06f2f42f404b65a8704af71bd8d3f2a7a85a4fbb9b23a9aa9d27ace7040","methodology_hash":"sha256:b0750456ca6269f519cb7dd4bbda66aa80a3806e9505189914584546305b2eaa","merkle_root":"sha256:6ede8320d525d041615b1c11a08b71df0330c4540cc39760648e3ff6cdda1fa3","claimed_score":36}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"d44995fc8a25eb5cec9e9ac129c6e0ddca51c669dac720b736a4404f7b9708f4f92ab6b90bdfabdb49a34f3bc3ec7f026289ad4b918a0adaa037a3f4bdfb0e03","signerAlgo":"ed25519","submittedAt":"2026-04-26T08:12:42.921Z","verifiedAt":"2026-04-26T08:12:43Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: llama3-8b-q40."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T08:12:42.921Z","storeRoute":"store-run"},{"id":"run-local-962d5fda31c9","serviceId":"openrouter","benchmarkId":"arc-challenge","model":"llama3-8b-q40","score":84,"runs":1,"breakdown":{"n":50,"passes":42,"mean_raw":0.84},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:144e9a13fb369f31007fffdcf4d7d55692677b409c8fa7b7dec4328c81a55752","methodologyHash":"sha256:8e84e6ffec11c082a286373b8b306600732cdf99b514079bfc0754fe4cd7a7c5","transcriptMerkleRoot":"sha256:fa750f816a123f8520c0fecc628ea79e02019e9356f344e469c471ac245824d4","startedAt":"2026-04-26T08:09:03Z","finishedAt":"2026-04-26T08:10:52Z","durationSeconds":109,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run llama3-8b-q40  # via _local_runner.py against the canonical arc-challenge sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"aef49805096c89afe0f6a7e2ee6cd07b0520c2d1ed7bff999445ec63a9a28475bf1ff2b5b3623eeef56419db730493f324b6e31b4cddc94e735e55e42eaf2a0a","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:144e9a13fb369f31007fffdcf4d7d55692677b409c8fa7b7dec4328c81a55752","methodology_hash":"sha256:8e84e6ffec11c082a286373b8b306600732cdf99b514079bfc0754fe4cd7a7c5","merkle_root":"sha256:fa750f816a123f8520c0fecc628ea79e02019e9356f344e469c471ac245824d4","claimed_score":84}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"aef49805096c89afe0f6a7e2ee6cd07b0520c2d1ed7bff999445ec63a9a28475bf1ff2b5b3623eeef56419db730493f324b6e31b4cddc94e735e55e42eaf2a0a","signerAlgo":"ed25519","submittedAt":"2026-04-26T08:10:51.205Z","verifiedAt":"2026-04-26T08:10:52Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: llama3-8b-q40."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T08:10:51.205Z","storeRoute":"store-run"},{"id":"run-local-ca46a090418e","serviceId":"openrouter","benchmarkId":"mmlu-pro","model":"llama3-8b-q40","score":14.000000000000002,"runs":1,"breakdown":{"n":50,"passes":7,"mean_raw":0.14},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:c4ff0432d6539908d6b09b6ceac8693877afbd15a62806db219216507a578bdd","methodologyHash":"sha256:68cb69d8b9a5e40f78f9918e9ab57581115783d514402f7978180dac87834446","transcriptMerkleRoot":"sha256:0e5d956d3212b04e3fb65c3debb21a8cdb3efc605cdf357d0e13dc956b2f63ef","startedAt":"2026-04-26T08:07:00Z","finishedAt":"2026-04-26T08:09:01Z","durationSeconds":121,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run llama3-8b-q40  # via _local_runner.py against the canonical mmlu-pro sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"b22d910828600314a98f3e1ed5917a85629b3474af119e9188e8d57410d6794f4ddea08b89d1ae4a3b649b2fb1009720495b20b39a1034a949a1ed81133de009","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:c4ff0432d6539908d6b09b6ceac8693877afbd15a62806db219216507a578bdd","methodology_hash":"sha256:68cb69d8b9a5e40f78f9918e9ab57581115783d514402f7978180dac87834446","merkle_root":"sha256:0e5d956d3212b04e3fb65c3debb21a8cdb3efc605cdf357d0e13dc956b2f63ef","claimed_score":14.000000000000002}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"b22d910828600314a98f3e1ed5917a85629b3474af119e9188e8d57410d6794f4ddea08b89d1ae4a3b649b2fb1009720495b20b39a1034a949a1ed81133de009","signerAlgo":"ed25519","submittedAt":"2026-04-26T08:09:00.265Z","verifiedAt":"2026-04-26T08:09:01Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: llama3-8b-q40."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T08:09:00.265Z","storeRoute":"store-run"},{"id":"run-a8efa2ab0d5d","serviceId":"anthropic-claude","benchmarkId":"mmlu-pro","model":"claude-sonnet-4.5","score":25,"runs":1,"breakdown":{"n":8,"passes":2,"mean_raw":0.25},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:f83f7230d012b45f7532fd0947ca596e7de52a518e7f9edcd0df4566b409bf9a","methodologyHash":"sha256:7d4179c2b699af35bc95f0bd466e9b344ae348a765fbbca15e14adbc4ceb7072","transcriptMerkleRoot":"sha256:e7c80ce1754ad43bafd10f5ec6b3b244ef3ae1a395d403a4daab043feca01048","startedAt":"2026-04-26T08:08:42.583Z","finishedAt":"2026-04-26T08:08:55.329Z","durationSeconds":13,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run mmlu-pro --service anthropic-claude --model claude-sonnet-4.5 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"d167d2a0668c85094f34f70f29b1313fe80b5f365c21b03ff98210b34794b40b5288e173d9c51ce49d1cf199957f54512cc88b1fc4c10f290262b845bc3b4a0b","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:f83f7230d012b45f7532fd0947ca596e7de52a518e7f9edcd0df4566b409bf9a","methodology_hash":"sha256:7d4179c2b699af35bc95f0bd466e9b344ae348a765fbbca15e14adbc4ceb7072","merkle_root":"sha256:e7c80ce1754ad43bafd10f5ec6b3b244ef3ae1a395d403a4daab043feca01048","claimed_score":25}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"d167d2a0668c85094f34f70f29b1313fe80b5f365c21b03ff98210b34794b40b5288e173d9c51ce49d1cf199957f54512cc88b1fc4c10f290262b845bc3b4a0b","signerAlgo":"ed25519","submittedAt":"2026-04-26T08:08:55.329Z","verifiedAt":"2026-04-26T08:08:55.329Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-local-a7aa9c6be1a3","serviceId":"openrouter","benchmarkId":"gsm8k","model":"llama3-8b-q40","score":62,"runs":1,"breakdown":{"n":50,"passes":31,"mean_raw":0.62},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:998472abf27962d5141a96e256c87871228b99a8df79ff8c7a9a61d707a0b4b5","methodologyHash":"sha256:8618a36988294dfc7ba4dbff4fdd15f101f6a48ed51a7d0390ddb466f8b50a99","transcriptMerkleRoot":"sha256:6b75ece81623be15e69542d7612827739c0d0d26848611e3610b66553bd6e0b6","startedAt":"2026-04-26T08:04:36Z","finishedAt":"2026-04-26T08:06:58Z","durationSeconds":142,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run llama3-8b-q40  # via _local_runner.py against the canonical gsm8k sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"41ee21cf5e0ea0b6795b68705c9cd609ee97503a9fd3a11b124711aed1052b2b55460f893f5648fbddeb5aecd763156c3f7c3083e2b745ab2fec3d1587857d01","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:998472abf27962d5141a96e256c87871228b99a8df79ff8c7a9a61d707a0b4b5","methodology_hash":"sha256:8618a36988294dfc7ba4dbff4fdd15f101f6a48ed51a7d0390ddb466f8b50a99","merkle_root":"sha256:6b75ece81623be15e69542d7612827739c0d0d26848611e3610b66553bd6e0b6","claimed_score":62}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"41ee21cf5e0ea0b6795b68705c9cd609ee97503a9fd3a11b124711aed1052b2b55460f893f5648fbddeb5aecd763156c3f7c3083e2b745ab2fec3d1587857d01","signerAlgo":"ed25519","submittedAt":"2026-04-26T08:06:56.986Z","verifiedAt":"2026-04-26T08:06:58Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: llama3-8b-q40."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T08:06:56.986Z","storeRoute":"store-run"},{"id":"run-local-fdbd6dd80dbd","serviceId":"openrouter","benchmarkId":"math-500","model":"deepseek-coder-v2-15.7b","score":6,"runs":1,"breakdown":{"n":50,"passes":3,"mean_raw":0.06},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:78fd61258ac0baaf1481bd291c6ed42004f95ff6302279502627fb0b9be98bd0","methodologyHash":"sha256:2d3e19ed12ebcd5f2e584cf64b353e417139647737adaef4e46e74aef90c4232","transcriptMerkleRoot":"sha256:32261b2d081319ce51043c43dcb2de4cc102e6b0aef77e46cd9663f1203a5036","startedAt":"2026-04-26T08:01:47Z","finishedAt":"2026-04-26T08:04:34Z","durationSeconds":167,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run deepseek-coder-v2-15.7b  # via _local_runner.py against the canonical math-500 sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"ca094720af13f3542a6fd69f1733d116be8f315a53827617f2cf2a7bdd3474f7ab9ed8dc7ca58982baa95a8fdeb545c8c084571bf6611b7db31b3578f9051f0e","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:78fd61258ac0baaf1481bd291c6ed42004f95ff6302279502627fb0b9be98bd0","methodology_hash":"sha256:2d3e19ed12ebcd5f2e584cf64b353e417139647737adaef4e46e74aef90c4232","merkle_root":"sha256:32261b2d081319ce51043c43dcb2de4cc102e6b0aef77e46cd9663f1203a5036","claimed_score":6}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"ca094720af13f3542a6fd69f1733d116be8f315a53827617f2cf2a7bdd3474f7ab9ed8dc7ca58982baa95a8fdeb545c8c084571bf6611b7db31b3578f9051f0e","signerAlgo":"ed25519","submittedAt":"2026-04-26T08:04:33.068Z","verifiedAt":"2026-04-26T08:04:34Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: deepseek-coder-v2-15.7b."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T08:04:33.068Z","storeRoute":"store-run"},{"id":"run-7c78931c0662","serviceId":"anthropic-claude","benchmarkId":"commonsenseqa","model":"claude-sonnet-4.5","score":60,"runs":1,"breakdown":{"n":5,"passes":3,"mean_raw":0.6},"sampleCount":5,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:729b5c0850ac5be6b8cfbedf4d36938249bb7c0d9e9c980260037391414dd520","methodologyHash":"sha256:8d0b3e04740ec4f11b5e3eebe6601688d47de4830e84c15a2da6c3925212fadf","transcriptMerkleRoot":"sha256:7cdfb32ce44d403caa8f8e38d6c072a0559c5ec0fb512093555994e9e779e822","startedAt":"2026-04-26T08:01:34.894Z","finishedAt":"2026-04-26T08:04:24.459Z","durationSeconds":170,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run commonsenseqa --service anthropic-claude --model claude-sonnet-4.5 --runs 1 --limit 5","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"45b6b4f8aa07d9c454dc0047d43ec47c191ce105b5a85f1884a9cb2cd123f391d4a64d1ffc836a96cc26d2aab7a216a1918f7b728ed2de2af37dbd78a3189102","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:729b5c0850ac5be6b8cfbedf4d36938249bb7c0d9e9c980260037391414dd520","methodology_hash":"sha256:8d0b3e04740ec4f11b5e3eebe6601688d47de4830e84c15a2da6c3925212fadf","merkle_root":"sha256:7cdfb32ce44d403caa8f8e38d6c072a0559c5ec0fb512093555994e9e779e822","claimed_score":60}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"45b6b4f8aa07d9c454dc0047d43ec47c191ce105b5a85f1884a9cb2cd123f391d4a64d1ffc836a96cc26d2aab7a216a1918f7b728ed2de2af37dbd78a3189102","signerAlgo":"ed25519","submittedAt":"2026-04-26T08:04:24.459Z","verifiedAt":"2026-04-26T08:04:24.459Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-a908e77f27b1","serviceId":"anthropic-claude","benchmarkId":"arc-challenge","model":"claude-sonnet-4.5","score":80,"runs":1,"breakdown":{"n":5,"passes":4,"mean_raw":0.8},"sampleCount":5,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:f35d86c3e38897e2781029fcdf1c8f9223d1250734e1cfe9adce4bc72915d23e","methodologyHash":"sha256:aa1495bf231aaeaadb1682c3ce29767c772010f89381461f6e5d9d975820db35","transcriptMerkleRoot":"sha256:32d750c2360a709bf856307da11d90b825f5db06a5c132f84ef5a880ee24be53","startedAt":"2026-04-26T08:01:32.274Z","finishedAt":"2026-04-26T08:03:03.219Z","durationSeconds":91,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run arc-challenge --service anthropic-claude --model claude-sonnet-4.5 --runs 1 --limit 5","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"3f7685480a0c2712a1cfbcf7054022f659c048ef30abbf80298e30ea1b2808a0f72600791094ca14f9db0c449881a645fa997ba760365bd4f4bd94fea3fd5502","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:f35d86c3e38897e2781029fcdf1c8f9223d1250734e1cfe9adce4bc72915d23e","methodology_hash":"sha256:aa1495bf231aaeaadb1682c3ce29767c772010f89381461f6e5d9d975820db35","merkle_root":"sha256:32d750c2360a709bf856307da11d90b825f5db06a5c132f84ef5a880ee24be53","claimed_score":80}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"3f7685480a0c2712a1cfbcf7054022f659c048ef30abbf80298e30ea1b2808a0f72600791094ca14f9db0c449881a645fa997ba760365bd4f4bd94fea3fd5502","signerAlgo":"ed25519","submittedAt":"2026-04-26T08:03:03.219Z","verifiedAt":"2026-04-26T08:03:03.219Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-fad97f10ea9c","serviceId":"anthropic-claude","benchmarkId":"gsm8k","model":"claude-sonnet-4.5","score":80,"runs":1,"breakdown":{"n":5,"passes":4,"mean_raw":0.8},"sampleCount":5,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodologyHash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","transcriptMerkleRoot":"sha256:9ea8af1ec906c4c6867324e26712933226e86e8d62a8da061b53107c97bdfdad","startedAt":"2026-04-26T08:01:29.650Z","finishedAt":"2026-04-26T08:02:00.227Z","durationSeconds":31,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run gsm8k --service anthropic-claude --model claude-sonnet-4.5 --runs 1 --limit 5","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"66a91851d0a45f031f72984bc0cc2d5cfa869fed785ff60f5539595d2ddc220d4ca6fef5fee5c3153c7d8465a56fce2880bd0f1d68a47330b6450a2c67f8e302","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodology_hash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","merkle_root":"sha256:9ea8af1ec906c4c6867324e26712933226e86e8d62a8da061b53107c97bdfdad","claimed_score":80}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"66a91851d0a45f031f72984bc0cc2d5cfa869fed785ff60f5539595d2ddc220d4ca6fef5fee5c3153c7d8465a56fce2880bd0f1d68a47330b6450a2c67f8e302","signerAlgo":"ed25519","submittedAt":"2026-04-26T08:02:00.227Z","verifiedAt":"2026-04-26T08:02:00.227Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-local-3257fb4559be","serviceId":"openrouter","benchmarkId":"openbookqa","model":"deepseek-coder-v2-15.7b","score":66,"runs":1,"breakdown":{"n":50,"passes":33,"mean_raw":0.66},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:0a3bba9b686859f3e89e67cf0f4dce4276ba11e8ccd4ad7c367f98880d8b5953","methodologyHash":"sha256:ca613efb492af89b7f2f5efa61eeea602639622a37046ee5c6d9dec8b12c0a47","transcriptMerkleRoot":"sha256:95dd1f307e3a68e15de21f82c786cd10aba03094a748239fc7890cc1c2c3c303","startedAt":"2026-04-26T07:59:52Z","finishedAt":"2026-04-26T08:01:46Z","durationSeconds":114,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run deepseek-coder-v2-15.7b  # via _local_runner.py against the canonical openbookqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"7f656945249c6cbc9ef7c7e11b6b9c1ce5bd7df7b21d6caf3492ad5b1382363e6c5b375a88feb0b71753799b81bdc645932b608099ed604e15da3c2b9de3cc0a","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:0a3bba9b686859f3e89e67cf0f4dce4276ba11e8ccd4ad7c367f98880d8b5953","methodology_hash":"sha256:ca613efb492af89b7f2f5efa61eeea602639622a37046ee5c6d9dec8b12c0a47","merkle_root":"sha256:95dd1f307e3a68e15de21f82c786cd10aba03094a748239fc7890cc1c2c3c303","claimed_score":66}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"7f656945249c6cbc9ef7c7e11b6b9c1ce5bd7df7b21d6caf3492ad5b1382363e6c5b375a88feb0b71753799b81bdc645932b608099ed604e15da3c2b9de3cc0a","signerAlgo":"ed25519","submittedAt":"2026-04-26T08:01:44.505Z","verifiedAt":"2026-04-26T08:01:46Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: deepseek-coder-v2-15.7b."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T08:01:44.505Z","storeRoute":"store-run"},{"id":"run-local-2a0b8c8eb6ba","serviceId":"openrouter","benchmarkId":"commonsenseqa","model":"deepseek-coder-v2-15.7b","score":54,"runs":1,"breakdown":{"n":50,"passes":27,"mean_raw":0.54},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:657c0ecfad0bd4dcbf062db3be6475df54a6d658c93b0493d4e0d3d86c4cb5bf","methodologyHash":"sha256:11c314e72c2b767f36d059911da85f213c8fa50958bc5b4e94ae94f7fb36dd77","transcriptMerkleRoot":"sha256:a0a780000ae430f6299a306755cc17abc0a6521db51b14f653d0bd77ddfa49d7","startedAt":"2026-04-26T07:58:00Z","finishedAt":"2026-04-26T07:59:50Z","durationSeconds":110,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run deepseek-coder-v2-15.7b  # via _local_runner.py against the canonical commonsenseqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"b102c31c41205ab71ebe16f112dd66127f446418b16aba3f118dd5070d0eb2f238ad36b2d9569d3c77bc706020ed21fc73f10ad0b5b01d208b3e91cf181ef108","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:657c0ecfad0bd4dcbf062db3be6475df54a6d658c93b0493d4e0d3d86c4cb5bf","methodology_hash":"sha256:11c314e72c2b767f36d059911da85f213c8fa50958bc5b4e94ae94f7fb36dd77","merkle_root":"sha256:a0a780000ae430f6299a306755cc17abc0a6521db51b14f653d0bd77ddfa49d7","claimed_score":54}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"b102c31c41205ab71ebe16f112dd66127f446418b16aba3f118dd5070d0eb2f238ad36b2d9569d3c77bc706020ed21fc73f10ad0b5b01d208b3e91cf181ef108","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:59:49.061Z","verifiedAt":"2026-04-26T07:59:50Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: deepseek-coder-v2-15.7b."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T07:59:49.061Z","storeRoute":"store-run"},{"id":"run-local-f2318c67ad8f","serviceId":"openrouter","benchmarkId":"winogrande","model":"deepseek-coder-v2-15.7b","score":48,"runs":1,"breakdown":{"n":50,"passes":24,"mean_raw":0.48},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:6b80c06f2f42f404b65a8704af71bd8d3f2a7a85a4fbb9b23a9aa9d27ace7040","methodologyHash":"sha256:b0750456ca6269f519cb7dd4bbda66aa80a3806e9505189914584546305b2eaa","transcriptMerkleRoot":"sha256:c88e40aa0345cd6e11f58f45dd034614c67fa96be25978a3a330cbf7f9980168","startedAt":"2026-04-26T07:56:09Z","finishedAt":"2026-04-26T07:57:58Z","durationSeconds":109,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run deepseek-coder-v2-15.7b  # via _local_runner.py against the canonical winogrande sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"4ed86dba42faef59a5072905a29a3a30822b678e86c5612ca59634fb2b26e4c0adea562b288ad4754de922ef16cbacc162d77da083a3210c59d35cb307a72807","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:6b80c06f2f42f404b65a8704af71bd8d3f2a7a85a4fbb9b23a9aa9d27ace7040","methodology_hash":"sha256:b0750456ca6269f519cb7dd4bbda66aa80a3806e9505189914584546305b2eaa","merkle_root":"sha256:c88e40aa0345cd6e11f58f45dd034614c67fa96be25978a3a330cbf7f9980168","claimed_score":48}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"4ed86dba42faef59a5072905a29a3a30822b678e86c5612ca59634fb2b26e4c0adea562b288ad4754de922ef16cbacc162d77da083a3210c59d35cb307a72807","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:57:56.740Z","verifiedAt":"2026-04-26T07:57:58Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: deepseek-coder-v2-15.7b."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T07:57:56.740Z","storeRoute":"store-run"},{"id":"run-394f66beb10c","serviceId":"anthropic-claude","benchmarkId":"math-500","model":"claude-opus-4.7","score":37.5,"runs":1,"breakdown":{"n":8,"passes":3,"mean_raw":0.375},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:9e86adfbb746209853106da77f29348041d67bb236dcd3cc8ad472da93058634","methodologyHash":"sha256:0002df534e2793d29cba5e29d2385b12303330f41257e1ee969eec3f3b1b5f56","transcriptMerkleRoot":"sha256:8e493c4dbd862a1eda6f8798f26b9efde1d1232b7bf23ec21d49343d37c16782","startedAt":"2026-04-26T07:43:43.382Z","finishedAt":"2026-04-26T07:56:43.828Z","durationSeconds":780,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run math-500 --service anthropic-claude --model claude-opus-4.7 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"6c28b26891bea2fdec3d0ab187b38debdf44bacaa442ada536e381520e3ceb5d979c3965e8d63b443bbf724561a6f553defaa40ab6c573601768b7479b46c60b","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:9e86adfbb746209853106da77f29348041d67bb236dcd3cc8ad472da93058634","methodology_hash":"sha256:0002df534e2793d29cba5e29d2385b12303330f41257e1ee969eec3f3b1b5f56","merkle_root":"sha256:8e493c4dbd862a1eda6f8798f26b9efde1d1232b7bf23ec21d49343d37c16782","claimed_score":37.5}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"6c28b26891bea2fdec3d0ab187b38debdf44bacaa442ada536e381520e3ceb5d979c3965e8d63b443bbf724561a6f553defaa40ab6c573601768b7479b46c60b","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:56:43.828Z","verifiedAt":"2026-04-26T07:56:43.828Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-176c3d2e6cac","serviceId":"anthropic-claude","benchmarkId":"hellaswag","model":"claude-opus-4.7","score":100,"runs":1,"breakdown":{"n":8,"passes":8,"mean_raw":1},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:b967f14e9705f2c1512bfecbc280340660ac60811aca2cd09789d654cb44b3ee","methodologyHash":"sha256:2725c767f087367a0bbb3d937db51573191931b9f2e7a805d74297244330c18f","transcriptMerkleRoot":"sha256:f32a75a0cb97157f63f48ccd2f6c5f900702a4c4da33c2efdd637cb9a8197b07","startedAt":"2026-04-26T07:43:42.084Z","finishedAt":"2026-04-26T07:56:26.728Z","durationSeconds":765,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run hellaswag --service anthropic-claude --model claude-opus-4.7 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"960438a748755c317b1ed7b1d3fa44908bc1acde078d0ca47f13c1dcd9e4acae49fe9dcfa46f01625ff1ee81e454221f35cec62b6d00b7ca09ecc390c09fcf01","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:b967f14e9705f2c1512bfecbc280340660ac60811aca2cd09789d654cb44b3ee","methodology_hash":"sha256:2725c767f087367a0bbb3d937db51573191931b9f2e7a805d74297244330c18f","merkle_root":"sha256:f32a75a0cb97157f63f48ccd2f6c5f900702a4c4da33c2efdd637cb9a8197b07","claimed_score":100}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"960438a748755c317b1ed7b1d3fa44908bc1acde078d0ca47f13c1dcd9e4acae49fe9dcfa46f01625ff1ee81e454221f35cec62b6d00b7ca09ecc390c09fcf01","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:56:26.728Z","verifiedAt":"2026-04-26T07:56:26.728Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-local-a853d2069324","serviceId":"openrouter","benchmarkId":"arc-challenge","model":"deepseek-coder-v2-15.7b","score":74,"runs":1,"breakdown":{"n":50,"passes":37,"mean_raw":0.74},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:144e9a13fb369f31007fffdcf4d7d55692677b409c8fa7b7dec4328c81a55752","methodologyHash":"sha256:8e84e6ffec11c082a286373b8b306600732cdf99b514079bfc0754fe4cd7a7c5","transcriptMerkleRoot":"sha256:e2688cd44cdf65aea95930962242ea2ee7a945765e6c087aba9073479671d787","startedAt":"2026-04-26T07:54:12Z","finishedAt":"2026-04-26T07:56:07Z","durationSeconds":115,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run deepseek-coder-v2-15.7b  # via _local_runner.py against the canonical arc-challenge sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"4fb03dd4a4b9f0e2e85ae269c856646652b7e7a9349429269054c6a5365e7d741a527acdb85e8c6abebd103ef82ba0508b212c214923ca0a107ad17db519450f","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:144e9a13fb369f31007fffdcf4d7d55692677b409c8fa7b7dec4328c81a55752","methodology_hash":"sha256:8e84e6ffec11c082a286373b8b306600732cdf99b514079bfc0754fe4cd7a7c5","merkle_root":"sha256:e2688cd44cdf65aea95930962242ea2ee7a945765e6c087aba9073479671d787","claimed_score":74}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"4fb03dd4a4b9f0e2e85ae269c856646652b7e7a9349429269054c6a5365e7d741a527acdb85e8c6abebd103ef82ba0508b212c214923ca0a107ad17db519450f","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:56:06.055Z","verifiedAt":"2026-04-26T07:56:07Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: deepseek-coder-v2-15.7b."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T07:56:06.055Z","storeRoute":"store-run"},{"id":"run-d5a5e53e3f04","serviceId":"anthropic-claude","benchmarkId":"openbookqa","model":"claude-opus-4.7","score":87.5,"runs":1,"breakdown":{"n":8,"passes":7,"mean_raw":0.875},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:9de02c962a604c215aef6e33014a48f52c1eb284f810c0804a96884968f79952","methodologyHash":"sha256:bb8802c4ac04e76daf5aefca1b04358cb73f67dea1022ebec4a9e570d6be83c1","transcriptMerkleRoot":"sha256:51bc3f4cf0803f843ff695bc3b1b293bd2bf4600c8f0eb26a1b15b99ed75b00c","startedAt":"2026-04-26T07:43:40.702Z","finishedAt":"2026-04-26T07:55:44.007Z","durationSeconds":723,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run openbookqa --service anthropic-claude --model claude-opus-4.7 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"fac1103f5ab3d92e05e184112188f2af28aa37d0b3bf5d16ee0be9bbb40747a0e6859fb036604fe9ae1ff4b9066885ae30c7f81b16dc5aa135d3604f36cf4908","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:9de02c962a604c215aef6e33014a48f52c1eb284f810c0804a96884968f79952","methodology_hash":"sha256:bb8802c4ac04e76daf5aefca1b04358cb73f67dea1022ebec4a9e570d6be83c1","merkle_root":"sha256:51bc3f4cf0803f843ff695bc3b1b293bd2bf4600c8f0eb26a1b15b99ed75b00c","claimed_score":87.5}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"fac1103f5ab3d92e05e184112188f2af28aa37d0b3bf5d16ee0be9bbb40747a0e6859fb036604fe9ae1ff4b9066885ae30c7f81b16dc5aa135d3604f36cf4908","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:55:44.007Z","verifiedAt":"2026-04-26T07:55:44.007Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-25725c42c101","serviceId":"anthropic-claude","benchmarkId":"commonsenseqa","model":"claude-opus-4.7","score":100,"runs":1,"breakdown":{"n":8,"passes":8,"mean_raw":1},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:729b5c0850ac5be6b8cfbedf4d36938249bb7c0d9e9c980260037391414dd520","methodologyHash":"sha256:8d0b3e04740ec4f11b5e3eebe6601688d47de4830e84c15a2da6c3925212fadf","transcriptMerkleRoot":"sha256:8d5a460292d25224cbe99b28252b25791f8af0b520e04684a2ef4f97a6c024fd","startedAt":"2026-04-26T07:43:39.405Z","finishedAt":"2026-04-26T07:54:59.879Z","durationSeconds":680,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run commonsenseqa --service anthropic-claude --model claude-opus-4.7 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"68b3dcb54df4166101c9cc6fc3f2f1e1b9b616582c3f6d24e67cf6b561a7cccfa958104cfcbc2e837750a89f10028acbba10312fcbb84bd70e2fae4235d84d0b","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:729b5c0850ac5be6b8cfbedf4d36938249bb7c0d9e9c980260037391414dd520","methodology_hash":"sha256:8d0b3e04740ec4f11b5e3eebe6601688d47de4830e84c15a2da6c3925212fadf","merkle_root":"sha256:8d5a460292d25224cbe99b28252b25791f8af0b520e04684a2ef4f97a6c024fd","claimed_score":100}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"68b3dcb54df4166101c9cc6fc3f2f1e1b9b616582c3f6d24e67cf6b561a7cccfa958104cfcbc2e837750a89f10028acbba10312fcbb84bd70e2fae4235d84d0b","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:54:59.879Z","verifiedAt":"2026-04-26T07:54:59.879Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-50c2e923b0f8","serviceId":"anthropic-claude","benchmarkId":"winogrande","model":"claude-opus-4.7","score":87.5,"runs":1,"breakdown":{"n":8,"passes":7,"mean_raw":0.875},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:1836ea9a0cb7f0dc0e641b157c14cc381c6b5dd60cdb1a4f9ae49c637dc92e92","methodologyHash":"sha256:5f9aa4a5643833709c75c9a8415818528a2b17fa4422744051e798cea549752b","transcriptMerkleRoot":"sha256:3aeb939b73983e8f634c3d1016cf948d3c620355ca8b8d2cc5b864c413f9e829","startedAt":"2026-04-26T07:43:38.102Z","finishedAt":"2026-04-26T07:54:44.034Z","durationSeconds":666,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run winogrande --service anthropic-claude --model claude-opus-4.7 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"c5e64a28333d861cc094f4ef404ae95f54b8bcc5e17960761b98c2c19a8f2a5b62a7084523e75386095c83846c96a8ebde3b6d6286296bbd5295d7448f17340d","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:1836ea9a0cb7f0dc0e641b157c14cc381c6b5dd60cdb1a4f9ae49c637dc92e92","methodology_hash":"sha256:5f9aa4a5643833709c75c9a8415818528a2b17fa4422744051e798cea549752b","merkle_root":"sha256:3aeb939b73983e8f634c3d1016cf948d3c620355ca8b8d2cc5b864c413f9e829","claimed_score":87.5}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"c5e64a28333d861cc094f4ef404ae95f54b8bcc5e17960761b98c2c19a8f2a5b62a7084523e75386095c83846c96a8ebde3b6d6286296bbd5295d7448f17340d","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:54:44.034Z","verifiedAt":"2026-04-26T07:54:44.034Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-dcaa0e1b6953","serviceId":"anthropic-claude","benchmarkId":"arc-challenge","model":"claude-opus-4.7","score":100,"runs":1,"breakdown":{"n":8,"passes":8,"mean_raw":1},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:f35d86c3e38897e2781029fcdf1c8f9223d1250734e1cfe9adce4bc72915d23e","methodologyHash":"sha256:aa1495bf231aaeaadb1682c3ce29767c772010f89381461f6e5d9d975820db35","transcriptMerkleRoot":"sha256:2b5b9265a17a685917c9b28bace58144ab0f3cf5a1cf15eaef954a86bd1b12ba","startedAt":"2026-04-26T07:43:36.728Z","finishedAt":"2026-04-26T07:54:15.518Z","durationSeconds":639,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run arc-challenge --service anthropic-claude --model claude-opus-4.7 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"e6029a56f132b6185bb59de50a3be76ca2895921c456ab8e60601871a7356b660055887b6a45c34f2036a25fc635610ce70e71f1fb16e5bbc1dc79c8ac89e806","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:f35d86c3e38897e2781029fcdf1c8f9223d1250734e1cfe9adce4bc72915d23e","methodology_hash":"sha256:aa1495bf231aaeaadb1682c3ce29767c772010f89381461f6e5d9d975820db35","merkle_root":"sha256:2b5b9265a17a685917c9b28bace58144ab0f3cf5a1cf15eaef954a86bd1b12ba","claimed_score":100}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"e6029a56f132b6185bb59de50a3be76ca2895921c456ab8e60601871a7356b660055887b6a45c34f2036a25fc635610ce70e71f1fb16e5bbc1dc79c8ac89e806","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:54:15.518Z","verifiedAt":"2026-04-26T07:54:15.518Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-local-c2825e0ba8e8","serviceId":"openrouter","benchmarkId":"mmlu-pro","model":"deepseek-coder-v2-15.7b","score":26,"runs":1,"breakdown":{"n":50,"passes":13,"mean_raw":0.26},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:c4ff0432d6539908d6b09b6ceac8693877afbd15a62806db219216507a578bdd","methodologyHash":"sha256:68cb69d8b9a5e40f78f9918e9ab57581115783d514402f7978180dac87834446","transcriptMerkleRoot":"sha256:a92e67ba80160e3d098889e74ddf166ce4cd417523d043723077d71e9a7585e7","startedAt":"2026-04-26T07:51:56Z","finishedAt":"2026-04-26T07:54:10Z","durationSeconds":134,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run deepseek-coder-v2-15.7b  # via _local_runner.py against the canonical mmlu-pro sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"e7e9e5e1256d139374c2ca9cb02c28824b2041f85d5eb8a9c5c6ed80cc51f86d45472f6bf9ec2f623a008140194ad475d9f37a448e27a8638cd073aa3e849901","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:c4ff0432d6539908d6b09b6ceac8693877afbd15a62806db219216507a578bdd","methodology_hash":"sha256:68cb69d8b9a5e40f78f9918e9ab57581115783d514402f7978180dac87834446","merkle_root":"sha256:a92e67ba80160e3d098889e74ddf166ce4cd417523d043723077d71e9a7585e7","claimed_score":26}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"e7e9e5e1256d139374c2ca9cb02c28824b2041f85d5eb8a9c5c6ed80cc51f86d45472f6bf9ec2f623a008140194ad475d9f37a448e27a8638cd073aa3e849901","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:54:08.755Z","verifiedAt":"2026-04-26T07:54:10Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: deepseek-coder-v2-15.7b."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T07:54:08.755Z","storeRoute":"store-run"},{"id":"run-d70f62c1a050","serviceId":"anthropic-claude","benchmarkId":"mmlu-pro","model":"claude-opus-4.7","score":62.5,"runs":1,"breakdown":{"n":8,"passes":5,"mean_raw":0.625},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:f83f7230d012b45f7532fd0947ca596e7de52a518e7f9edcd0df4566b409bf9a","methodologyHash":"sha256:7d4179c2b699af35bc95f0bd466e9b344ae348a765fbbca15e14adbc4ceb7072","transcriptMerkleRoot":"sha256:5e2118a15b77a54801050dcd7aea3a922e463984c48ccfd752f8465bfd8395bd","startedAt":"2026-04-26T07:43:35.410Z","finishedAt":"2026-04-26T07:53:30.996Z","durationSeconds":596,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run mmlu-pro --service anthropic-claude --model claude-opus-4.7 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"54a98ecdaf9ded845a704bc616dc4108ab7d9533636ec81f3ab196c8297edef56ed27358477ae91802fd474c08728ae02dcb7c06482dbad7a8ae2e7949cedf0b","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:f83f7230d012b45f7532fd0947ca596e7de52a518e7f9edcd0df4566b409bf9a","methodology_hash":"sha256:7d4179c2b699af35bc95f0bd466e9b344ae348a765fbbca15e14adbc4ceb7072","merkle_root":"sha256:5e2118a15b77a54801050dcd7aea3a922e463984c48ccfd752f8465bfd8395bd","claimed_score":62.5}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"54a98ecdaf9ded845a704bc616dc4108ab7d9533636ec81f3ab196c8297edef56ed27358477ae91802fd474c08728ae02dcb7c06482dbad7a8ae2e7949cedf0b","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:53:30.996Z","verifiedAt":"2026-04-26T07:53:30.996Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-73546fdc6579","serviceId":"anthropic-claude","benchmarkId":"gsm8k","model":"claude-opus-4.7","score":100,"runs":1,"breakdown":{"n":8,"passes":8,"mean_raw":1},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodologyHash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","transcriptMerkleRoot":"sha256:f328471e1828a515a09c9839647f8223d4c89c6ffff72db2f852084b85504cf4","startedAt":"2026-04-26T07:43:34.081Z","finishedAt":"2026-04-26T07:52:46.969Z","durationSeconds":553,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run gsm8k --service anthropic-claude --model claude-opus-4.7 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"f6094a328e63037feaec3980ce46c76ea2a005231169110a59c35a7a05686c5ad827dba85d9fd7d0dea88896e5b614e8147f637df5823ee5487ca2479975ad07","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodology_hash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","merkle_root":"sha256:f328471e1828a515a09c9839647f8223d4c89c6ffff72db2f852084b85504cf4","claimed_score":100}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"f6094a328e63037feaec3980ce46c76ea2a005231169110a59c35a7a05686c5ad827dba85d9fd7d0dea88896e5b614e8147f637df5823ee5487ca2479975ad07","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:52:46.969Z","verifiedAt":"2026-04-26T07:52:46.969Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-bfab402331d6","serviceId":"anthropic-claude","benchmarkId":"math-500","model":"claude-haiku-4.5","score":37.5,"runs":1,"breakdown":{"n":8,"passes":3,"mean_raw":0.375},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:9e86adfbb746209853106da77f29348041d67bb236dcd3cc8ad472da93058634","methodologyHash":"sha256:0002df534e2793d29cba5e29d2385b12303330f41257e1ee969eec3f3b1b5f56","transcriptMerkleRoot":"sha256:ee6aa56ebbb60b777a5fab744a77ea10410bf50a11c84253e7105e48167a24ed","startedAt":"2026-04-26T07:43:32.768Z","finishedAt":"2026-04-26T07:52:39.107Z","durationSeconds":546,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run math-500 --service anthropic-claude --model claude-haiku-4.5 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"faa3b2a917fee8a7ff230f11bcc95e69fab7026809075aba9c79e23373773c2b6cb85113e6614a1cc031496fa3c72ee762732c175a8b67de8c0441ebdcda0b0f","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:9e86adfbb746209853106da77f29348041d67bb236dcd3cc8ad472da93058634","methodology_hash":"sha256:0002df534e2793d29cba5e29d2385b12303330f41257e1ee969eec3f3b1b5f56","merkle_root":"sha256:ee6aa56ebbb60b777a5fab744a77ea10410bf50a11c84253e7105e48167a24ed","claimed_score":37.5}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"faa3b2a917fee8a7ff230f11bcc95e69fab7026809075aba9c79e23373773c2b6cb85113e6614a1cc031496fa3c72ee762732c175a8b67de8c0441ebdcda0b0f","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:52:39.107Z","verifiedAt":"2026-04-26T07:52:39.107Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-99412c69b7dc","serviceId":"anthropic-claude","benchmarkId":"hellaswag","model":"claude-haiku-4.5","score":100,"runs":1,"breakdown":{"n":8,"passes":8,"mean_raw":1},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:b967f14e9705f2c1512bfecbc280340660ac60811aca2cd09789d654cb44b3ee","methodologyHash":"sha256:2725c767f087367a0bbb3d937db51573191931b9f2e7a805d74297244330c18f","transcriptMerkleRoot":"sha256:dd5f6d663d8dcaed58270faafcf0e397def41f4aacab6d18920d0fe2f747e351","startedAt":"2026-04-26T07:43:31.439Z","finishedAt":"2026-04-26T07:52:00.617Z","durationSeconds":509,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run hellaswag --service anthropic-claude --model claude-haiku-4.5 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"7d26da4d17e9c8c3b5a9e59a6444ca5b649c256fa8c0ec7ca0cc23ebbf49e248ed057b4c0c49952e2bc2061fa82aee2ee8de0bcdf57e029c1aa10d02f166480d","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:b967f14e9705f2c1512bfecbc280340660ac60811aca2cd09789d654cb44b3ee","methodology_hash":"sha256:2725c767f087367a0bbb3d937db51573191931b9f2e7a805d74297244330c18f","merkle_root":"sha256:dd5f6d663d8dcaed58270faafcf0e397def41f4aacab6d18920d0fe2f747e351","claimed_score":100}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"7d26da4d17e9c8c3b5a9e59a6444ca5b649c256fa8c0ec7ca0cc23ebbf49e248ed057b4c0c49952e2bc2061fa82aee2ee8de0bcdf57e029c1aa10d02f166480d","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:52:00.617Z","verifiedAt":"2026-04-26T07:52:00.617Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-local-a32cbb52839c","serviceId":"openrouter","benchmarkId":"gsm8k","model":"deepseek-coder-v2-15.7b","score":70,"runs":1,"breakdown":{"n":50,"passes":35,"mean_raw":0.7},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:998472abf27962d5141a96e256c87871228b99a8df79ff8c7a9a61d707a0b4b5","methodologyHash":"sha256:8618a36988294dfc7ba4dbff4fdd15f101f6a48ed51a7d0390ddb466f8b50a99","transcriptMerkleRoot":"sha256:110787392b89aa95b73368662f1a50752079a2dc5307af420269c8cc32502fed","startedAt":"2026-04-26T07:49:10Z","finishedAt":"2026-04-26T07:51:54Z","durationSeconds":164,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run deepseek-coder-v2-15.7b  # via _local_runner.py against the canonical gsm8k sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"4225304f2b491dbec545b7ccba6e4d2c170ffca0fa75114e8a254a38b99787310cedccf38dd9b5b8548e70eab8e91cd8d47fe760ae054135970a5f02be67a80a","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:998472abf27962d5141a96e256c87871228b99a8df79ff8c7a9a61d707a0b4b5","methodology_hash":"sha256:8618a36988294dfc7ba4dbff4fdd15f101f6a48ed51a7d0390ddb466f8b50a99","merkle_root":"sha256:110787392b89aa95b73368662f1a50752079a2dc5307af420269c8cc32502fed","claimed_score":70}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"4225304f2b491dbec545b7ccba6e4d2c170ffca0fa75114e8a254a38b99787310cedccf38dd9b5b8548e70eab8e91cd8d47fe760ae054135970a5f02be67a80a","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:51:53.351Z","verifiedAt":"2026-04-26T07:51:54Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: deepseek-coder-v2-15.7b."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T07:51:53.351Z","storeRoute":"store-run"},{"id":"run-4485b9624721","serviceId":"anthropic-claude","benchmarkId":"openbookqa","model":"claude-haiku-4.5","score":100,"runs":1,"breakdown":{"n":8,"passes":8,"mean_raw":1},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:9de02c962a604c215aef6e33014a48f52c1eb284f810c0804a96884968f79952","methodologyHash":"sha256:bb8802c4ac04e76daf5aefca1b04358cb73f67dea1022ebec4a9e570d6be83c1","transcriptMerkleRoot":"sha256:ae0c248f93be0f5cef08d859417e4b76e89ac0a2f1df46eb7caf21baa273851e","startedAt":"2026-04-26T07:43:30.144Z","finishedAt":"2026-04-26T07:51:14.166Z","durationSeconds":464,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run openbookqa --service anthropic-claude --model claude-haiku-4.5 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"85b2c128c8a4728dbf2a77f3277ab9d95ab051654d69d69343955e637149ab45ede3b48e4690f381b6400a0424670f020d8e393a69150ab8b0b1b4db0e929f05","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:9de02c962a604c215aef6e33014a48f52c1eb284f810c0804a96884968f79952","methodology_hash":"sha256:bb8802c4ac04e76daf5aefca1b04358cb73f67dea1022ebec4a9e570d6be83c1","merkle_root":"sha256:ae0c248f93be0f5cef08d859417e4b76e89ac0a2f1df46eb7caf21baa273851e","claimed_score":100}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"85b2c128c8a4728dbf2a77f3277ab9d95ab051654d69d69343955e637149ab45ede3b48e4690f381b6400a0424670f020d8e393a69150ab8b0b1b4db0e929f05","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:51:14.166Z","verifiedAt":"2026-04-26T07:51:14.166Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-554f64031405","serviceId":"anthropic-claude","benchmarkId":"commonsenseqa","model":"claude-haiku-4.5","score":87.5,"runs":1,"breakdown":{"n":8,"passes":7,"mean_raw":0.875},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:729b5c0850ac5be6b8cfbedf4d36938249bb7c0d9e9c980260037391414dd520","methodologyHash":"sha256:8d0b3e04740ec4f11b5e3eebe6601688d47de4830e84c15a2da6c3925212fadf","transcriptMerkleRoot":"sha256:a14ea371edafd9100bcd4a3b9f4200a52a289bb7b32142e04db931df6c6d12de","startedAt":"2026-04-26T07:43:28.839Z","finishedAt":"2026-04-26T07:50:41.517Z","durationSeconds":433,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run commonsenseqa --service anthropic-claude --model claude-haiku-4.5 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"4d9c7c2311504f5f740d6831a9e56f47cc5cd29c5a58cd394fb4d0e9f5794943d3d59df74e7786e86c50c045333346710dc3057133d399362e302ccb9e1bf80e","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:729b5c0850ac5be6b8cfbedf4d36938249bb7c0d9e9c980260037391414dd520","methodology_hash":"sha256:8d0b3e04740ec4f11b5e3eebe6601688d47de4830e84c15a2da6c3925212fadf","merkle_root":"sha256:a14ea371edafd9100bcd4a3b9f4200a52a289bb7b32142e04db931df6c6d12de","claimed_score":87.5}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"4d9c7c2311504f5f740d6831a9e56f47cc5cd29c5a58cd394fb4d0e9f5794943d3d59df74e7786e86c50c045333346710dc3057133d399362e302ccb9e1bf80e","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:50:41.517Z","verifiedAt":"2026-04-26T07:50:41.517Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-eb218b13e76d","serviceId":"anthropic-claude","benchmarkId":"winogrande","model":"claude-haiku-4.5","score":87.5,"runs":1,"breakdown":{"n":8,"passes":7,"mean_raw":0.875},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:1836ea9a0cb7f0dc0e641b157c14cc381c6b5dd60cdb1a4f9ae49c637dc92e92","methodologyHash":"sha256:5f9aa4a5643833709c75c9a8415818528a2b17fa4422744051e798cea549752b","transcriptMerkleRoot":"sha256:f7891ddab1feb1c1f6211416f43f0c391130909875c76a55aac54df197fcf367","startedAt":"2026-04-26T07:43:27.526Z","finishedAt":"2026-04-26T07:50:33.001Z","durationSeconds":425,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run winogrande --service anthropic-claude --model claude-haiku-4.5 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"226689cabf5cfe81dddd7172d59d7f6840819b730564f1fa32b9fa516e3c5eddd001be893fd2ea96b0d0f5f465b9c3745afe3da3197d450639b316f709488809","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:1836ea9a0cb7f0dc0e641b157c14cc381c6b5dd60cdb1a4f9ae49c637dc92e92","methodology_hash":"sha256:5f9aa4a5643833709c75c9a8415818528a2b17fa4422744051e798cea549752b","merkle_root":"sha256:f7891ddab1feb1c1f6211416f43f0c391130909875c76a55aac54df197fcf367","claimed_score":87.5}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"226689cabf5cfe81dddd7172d59d7f6840819b730564f1fa32b9fa516e3c5eddd001be893fd2ea96b0d0f5f465b9c3745afe3da3197d450639b316f709488809","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:50:33.001Z","verifiedAt":"2026-04-26T07:50:33.001Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-29f863d2da35","serviceId":"anthropic-claude","benchmarkId":"arc-challenge","model":"claude-haiku-4.5","score":87.5,"runs":1,"breakdown":{"n":8,"passes":7,"mean_raw":0.875},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:f35d86c3e38897e2781029fcdf1c8f9223d1250734e1cfe9adce4bc72915d23e","methodologyHash":"sha256:aa1495bf231aaeaadb1682c3ce29767c772010f89381461f6e5d9d975820db35","transcriptMerkleRoot":"sha256:202e01811d0df505196fbbc2a12f1817422cf990518aedf145151079ef0c27e7","startedAt":"2026-04-26T07:43:26.244Z","finishedAt":"2026-04-26T07:49:53.424Z","durationSeconds":387,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run arc-challenge --service anthropic-claude --model claude-haiku-4.5 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"39c9eb764ef6f1265344f01af039e1bd7d10684057ec28c27d9431b9d6c2682c22a223ca323b59f78ed09034646ed34f09178d01c4c3672e9caf38e95ea50a0c","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:f35d86c3e38897e2781029fcdf1c8f9223d1250734e1cfe9adce4bc72915d23e","methodology_hash":"sha256:aa1495bf231aaeaadb1682c3ce29767c772010f89381461f6e5d9d975820db35","merkle_root":"sha256:202e01811d0df505196fbbc2a12f1817422cf990518aedf145151079ef0c27e7","claimed_score":87.5}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"39c9eb764ef6f1265344f01af039e1bd7d10684057ec28c27d9431b9d6c2682c22a223ca323b59f78ed09034646ed34f09178d01c4c3672e9caf38e95ea50a0c","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:49:53.424Z","verifiedAt":"2026-04-26T07:49:53.424Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-6deacb1f549a","serviceId":"anthropic-claude","benchmarkId":"mmlu-pro","model":"claude-haiku-4.5","score":50,"runs":1,"breakdown":{"n":8,"passes":4,"mean_raw":0.5},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:f83f7230d012b45f7532fd0947ca596e7de52a518e7f9edcd0df4566b409bf9a","methodologyHash":"sha256:7d4179c2b699af35bc95f0bd466e9b344ae348a765fbbca15e14adbc4ceb7072","transcriptMerkleRoot":"sha256:de2ce627d5ed4249245ecdfbe03ec92f6449526f1dc189c8dcd578cc6789e2e9","startedAt":"2026-04-26T07:43:24.972Z","finishedAt":"2026-04-26T07:49:10.937Z","durationSeconds":346,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run mmlu-pro --service anthropic-claude --model claude-haiku-4.5 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"e5e95f9ba975394397a1007ca794d09f4d9b4f4cb1da5d05878e09fefd1ac14375b9480e2b85dc1bfb0ff6663e147c9232788e4ba1d86638beaa6aeed45d250d","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:f83f7230d012b45f7532fd0947ca596e7de52a518e7f9edcd0df4566b409bf9a","methodology_hash":"sha256:7d4179c2b699af35bc95f0bd466e9b344ae348a765fbbca15e14adbc4ceb7072","merkle_root":"sha256:de2ce627d5ed4249245ecdfbe03ec92f6449526f1dc189c8dcd578cc6789e2e9","claimed_score":50}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"e5e95f9ba975394397a1007ca794d09f4d9b4f4cb1da5d05878e09fefd1ac14375b9480e2b85dc1bfb0ff6663e147c9232788e4ba1d86638beaa6aeed45d250d","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:49:10.937Z","verifiedAt":"2026-04-26T07:49:10.937Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-local-96033e4dc49e","serviceId":"openrouter","benchmarkId":"math-500","model":"mistral-7b-q4km","score":16,"runs":1,"breakdown":{"n":50,"passes":8,"mean_raw":0.16},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:78fd61258ac0baaf1481bd291c6ed42004f95ff6302279502627fb0b9be98bd0","methodologyHash":"sha256:2d3e19ed12ebcd5f2e584cf64b353e417139647737adaef4e46e74aef90c4232","transcriptMerkleRoot":"sha256:702db0a8f5d37b78967e700bbdafe62458f10ce4e3dd2f09e1c654bb2d9dab7e","startedAt":"2026-04-26T07:46:26Z","finishedAt":"2026-04-26T07:49:09Z","durationSeconds":163,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run mistral-7b-q4km  # via _local_runner.py against the canonical math-500 sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"5133cccee64a0af123a0dcf15d7ffb84d4096cc93ccafab3ad26e70bde7fdd8ccb74904469a3faea9b12464650341bbe9ab72919701a5891c0c9fbc8f7feeb0b","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:78fd61258ac0baaf1481bd291c6ed42004f95ff6302279502627fb0b9be98bd0","methodology_hash":"sha256:2d3e19ed12ebcd5f2e584cf64b353e417139647737adaef4e46e74aef90c4232","merkle_root":"sha256:702db0a8f5d37b78967e700bbdafe62458f10ce4e3dd2f09e1c654bb2d9dab7e","claimed_score":16}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"5133cccee64a0af123a0dcf15d7ffb84d4096cc93ccafab3ad26e70bde7fdd8ccb74904469a3faea9b12464650341bbe9ab72919701a5891c0c9fbc8f7feeb0b","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:49:07.318Z","verifiedAt":"2026-04-26T07:49:09Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: mistral-7b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T07:49:07.318Z","storeRoute":"store-run"},{"id":"run-9b7f26a8846f","serviceId":"anthropic-claude","benchmarkId":"gsm8k","model":"claude-haiku-4.5","score":100,"runs":1,"breakdown":{"n":8,"passes":8,"mean_raw":1},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodologyHash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","transcriptMerkleRoot":"sha256:c3150847e05bf044d762113921f30f6406a5e44509251d6fec5d52703c35a05a","startedAt":"2026-04-26T07:43:23.626Z","finishedAt":"2026-04-26T07:48:34.414Z","durationSeconds":311,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run gsm8k --service anthropic-claude --model claude-haiku-4.5 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"c1f20fd74d42b5de87cfae6abbff9b14c6065750c0271a17635cf2d2d905de0b1de1313683c38df90899a6cc3bf67ea6303b70f9aeabce8e954723e4fa81d20a","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodology_hash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","merkle_root":"sha256:c3150847e05bf044d762113921f30f6406a5e44509251d6fec5d52703c35a05a","claimed_score":100}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"c1f20fd74d42b5de87cfae6abbff9b14c6065750c0271a17635cf2d2d905de0b1de1313683c38df90899a6cc3bf67ea6303b70f9aeabce8e954723e4fa81d20a","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:48:34.414Z","verifiedAt":"2026-04-26T07:48:34.414Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-local-1dddb05218ce","serviceId":"openrouter","benchmarkId":"openbookqa","model":"mistral-7b-q4km","score":72,"runs":1,"breakdown":{"n":50,"passes":36,"mean_raw":0.72},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:0a3bba9b686859f3e89e67cf0f4dce4276ba11e8ccd4ad7c367f98880d8b5953","methodologyHash":"sha256:ca613efb492af89b7f2f5efa61eeea602639622a37046ee5c6d9dec8b12c0a47","transcriptMerkleRoot":"sha256:a7cfa45b891a0e26528c703be403e1c60681b37c97d52e91c8784b19ebb208ac","startedAt":"2026-04-26T07:44:38Z","finishedAt":"2026-04-26T07:46:25Z","durationSeconds":107,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run mistral-7b-q4km  # via _local_runner.py against the canonical openbookqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"89560be5c23f39ac607a2e62054ac87147c8aca4e876e2c32576b27e4b2f555d26e8bd012b12f85ec6c1e2b43581e6e5189454f84c5909e6bfc3ce5409c3610c","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:0a3bba9b686859f3e89e67cf0f4dce4276ba11e8ccd4ad7c367f98880d8b5953","methodology_hash":"sha256:ca613efb492af89b7f2f5efa61eeea602639622a37046ee5c6d9dec8b12c0a47","merkle_root":"sha256:a7cfa45b891a0e26528c703be403e1c60681b37c97d52e91c8784b19ebb208ac","claimed_score":72}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"89560be5c23f39ac607a2e62054ac87147c8aca4e876e2c32576b27e4b2f555d26e8bd012b12f85ec6c1e2b43581e6e5189454f84c5909e6bfc3ce5409c3610c","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:46:23.347Z","verifiedAt":"2026-04-26T07:46:25Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: mistral-7b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T07:46:23.347Z","storeRoute":"store-run"},{"id":"run-local-ff9bbc6f5000","serviceId":"openrouter","benchmarkId":"commonsenseqa","model":"mistral-7b-q4km","score":56.00000000000001,"runs":1,"breakdown":{"n":50,"passes":28,"mean_raw":0.56},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:657c0ecfad0bd4dcbf062db3be6475df54a6d658c93b0493d4e0d3d86c4cb5bf","methodologyHash":"sha256:11c314e72c2b767f36d059911da85f213c8fa50958bc5b4e94ae94f7fb36dd77","transcriptMerkleRoot":"sha256:8e65f134127f3238352173c64a290c1ee53509baa064c284671e173f09822132","startedAt":"2026-04-26T07:42:49Z","finishedAt":"2026-04-26T07:44:36Z","durationSeconds":107,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run mistral-7b-q4km  # via _local_runner.py against the canonical commonsenseqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"21c06df0fb7b4bcdfa5ce9466eac73f6448d77fb5994d2f573cdcc6e692e2df0ba5711c7f970610671b7843a8f54cf1800d350dbe3e2dd2a2e20ea1a6446d60f","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:657c0ecfad0bd4dcbf062db3be6475df54a6d658c93b0493d4e0d3d86c4cb5bf","methodology_hash":"sha256:11c314e72c2b767f36d059911da85f213c8fa50958bc5b4e94ae94f7fb36dd77","merkle_root":"sha256:8e65f134127f3238352173c64a290c1ee53509baa064c284671e173f09822132","claimed_score":56.00000000000001}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"21c06df0fb7b4bcdfa5ce9466eac73f6448d77fb5994d2f573cdcc6e692e2df0ba5711c7f970610671b7843a8f54cf1800d350dbe3e2dd2a2e20ea1a6446d60f","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:44:34.617Z","verifiedAt":"2026-04-26T07:44:36Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: mistral-7b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T07:44:34.617Z","storeRoute":"store-run"},{"id":"run-e990b20e8c1b","serviceId":"anthropic-claude","benchmarkId":"mmlu-pro","model":"claude-haiku-4.5","score":50,"runs":1,"breakdown":{"n":8,"passes":4,"mean_raw":0.5},"sampleCount":8,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:f83f7230d012b45f7532fd0947ca596e7de52a518e7f9edcd0df4566b409bf9a","methodologyHash":"sha256:7d4179c2b699af35bc95f0bd466e9b344ae348a765fbbca15e14adbc4ceb7072","transcriptMerkleRoot":"sha256:2de7b3d7a4eab284913f42ce517ada263dba06a08e67f4bd2f52fb8716cd2a10","startedAt":"2026-04-26T07:42:26.725Z","finishedAt":"2026-04-26T07:42:51.426Z","durationSeconds":25,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run mmlu-pro --service anthropic-claude --model claude-haiku-4.5 --runs 1 --limit 8","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"2e9bfb66c01f0a25ddd156507e09d8c646b7f30b3c47432a56f80bbf9e8d00edecef9ea2be7551f0e7f3e24bd8d1b4666aa6e4276834dcd9ee46b538d51d660e","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:f83f7230d012b45f7532fd0947ca596e7de52a518e7f9edcd0df4566b409bf9a","methodology_hash":"sha256:7d4179c2b699af35bc95f0bd466e9b344ae348a765fbbca15e14adbc4ceb7072","merkle_root":"sha256:2de7b3d7a4eab284913f42ce517ada263dba06a08e67f4bd2f52fb8716cd2a10","claimed_score":50}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"2e9bfb66c01f0a25ddd156507e09d8c646b7f30b3c47432a56f80bbf9e8d00edecef9ea2be7551f0e7f3e24bd8d1b4666aa6e4276834dcd9ee46b538d51d660e","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:42:51.426Z","verifiedAt":"2026-04-26T07:42:51.426Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-local-fef16997d4e8","serviceId":"openrouter","benchmarkId":"winogrande","model":"mistral-7b-q4km","score":68,"runs":1,"breakdown":{"n":50,"passes":34,"mean_raw":0.68},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:6b80c06f2f42f404b65a8704af71bd8d3f2a7a85a4fbb9b23a9aa9d27ace7040","methodologyHash":"sha256:b0750456ca6269f519cb7dd4bbda66aa80a3806e9505189914584546305b2eaa","transcriptMerkleRoot":"sha256:39bee3b69814c0fc17be00cb7d72a2a9d810037de107215cc8050c1d91d1b7cc","startedAt":"2026-04-26T07:41:01Z","finishedAt":"2026-04-26T07:42:47Z","durationSeconds":106,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run mistral-7b-q4km  # via _local_runner.py against the canonical winogrande sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"b091c82e0be1778016c939ef69b245297a84999ff902b8e6f30919df89afcf26fbb919d98b1894c3844c419020c35246ea5fef77150753a5985a463bffbacc08","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:6b80c06f2f42f404b65a8704af71bd8d3f2a7a85a4fbb9b23a9aa9d27ace7040","methodology_hash":"sha256:b0750456ca6269f519cb7dd4bbda66aa80a3806e9505189914584546305b2eaa","merkle_root":"sha256:39bee3b69814c0fc17be00cb7d72a2a9d810037de107215cc8050c1d91d1b7cc","claimed_score":68}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"b091c82e0be1778016c939ef69b245297a84999ff902b8e6f30919df89afcf26fbb919d98b1894c3844c419020c35246ea5fef77150753a5985a463bffbacc08","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:42:46.422Z","verifiedAt":"2026-04-26T07:42:47Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: mistral-7b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T07:42:46.422Z","storeRoute":"store-run"},{"id":"run-local-41e3f0f35701","serviceId":"openrouter","benchmarkId":"arc-challenge","model":"mistral-7b-q4km","score":62,"runs":1,"breakdown":{"n":50,"passes":31,"mean_raw":0.62},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:144e9a13fb369f31007fffdcf4d7d55692677b409c8fa7b7dec4328c81a55752","methodologyHash":"sha256:8e84e6ffec11c082a286373b8b306600732cdf99b514079bfc0754fe4cd7a7c5","transcriptMerkleRoot":"sha256:23cdf4c162e60cf823b89872539e148d212de7cc64f89864c7a2c7e06aa8dfe6","startedAt":"2026-04-26T07:39:11Z","finishedAt":"2026-04-26T07:40:59Z","durationSeconds":108,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run mistral-7b-q4km  # via _local_runner.py against the canonical arc-challenge sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"14524413d71ae098c10228fca3820d38aa02bc537bb07c6acb3bf1e37a779454d894fb234d39065603626eb13ed11ce7499b5cf731ada8b9eca81ab3b8bd2f01","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:144e9a13fb369f31007fffdcf4d7d55692677b409c8fa7b7dec4328c81a55752","methodology_hash":"sha256:8e84e6ffec11c082a286373b8b306600732cdf99b514079bfc0754fe4cd7a7c5","merkle_root":"sha256:23cdf4c162e60cf823b89872539e148d212de7cc64f89864c7a2c7e06aa8dfe6","claimed_score":62}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"14524413d71ae098c10228fca3820d38aa02bc537bb07c6acb3bf1e37a779454d894fb234d39065603626eb13ed11ce7499b5cf731ada8b9eca81ab3b8bd2f01","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:40:57.988Z","verifiedAt":"2026-04-26T07:40:59Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: mistral-7b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T07:40:57.988Z","storeRoute":"store-run"},{"id":"run-local-b7c688b74bd9","serviceId":"openrouter","benchmarkId":"mmlu-pro","model":"mistral-7b-q4km","score":12,"runs":1,"breakdown":{"n":50,"passes":6,"mean_raw":0.12},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:c4ff0432d6539908d6b09b6ceac8693877afbd15a62806db219216507a578bdd","methodologyHash":"sha256:68cb69d8b9a5e40f78f9918e9ab57581115783d514402f7978180dac87834446","transcriptMerkleRoot":"sha256:9472ab5a71428f0445968ac9933bf037d61c1233fe83b252e55a3945d39a7e7d","startedAt":"2026-04-26T07:37:06Z","finishedAt":"2026-04-26T07:39:10Z","durationSeconds":124,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run mistral-7b-q4km  # via _local_runner.py against the canonical mmlu-pro sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"0bf0147ac87b2de990d3aeb9664ab1c26b1fcec8683f3a8078d04ed92fe1fda2ff17b5dd521b9da77eecf999d1ad4d677cd50d76bf415386f30de4d88511f801","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:c4ff0432d6539908d6b09b6ceac8693877afbd15a62806db219216507a578bdd","methodology_hash":"sha256:68cb69d8b9a5e40f78f9918e9ab57581115783d514402f7978180dac87834446","merkle_root":"sha256:9472ab5a71428f0445968ac9933bf037d61c1233fe83b252e55a3945d39a7e7d","claimed_score":12}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"0bf0147ac87b2de990d3aeb9664ab1c26b1fcec8683f3a8078d04ed92fe1fda2ff17b5dd521b9da77eecf999d1ad4d677cd50d76bf415386f30de4d88511f801","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:39:08.612Z","verifiedAt":"2026-04-26T07:39:10Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: mistral-7b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T07:39:08.612Z","storeRoute":"store-run"},{"id":"run-local-9e4635fcf727","serviceId":"openrouter","benchmarkId":"gsm8k","model":"mistral-7b-q4km","score":10,"runs":1,"breakdown":{"n":50,"passes":5,"mean_raw":0.1},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:998472abf27962d5141a96e256c87871228b99a8df79ff8c7a9a61d707a0b4b5","methodologyHash":"sha256:8618a36988294dfc7ba4dbff4fdd15f101f6a48ed51a7d0390ddb466f8b50a99","transcriptMerkleRoot":"sha256:8e07c4f53a6009442f8eb52ba7c12ff0a76c9a6cac11dd84af55ef065ce41da3","startedAt":"2026-04-26T07:35:05Z","finishedAt":"2026-04-26T07:37:05Z","durationSeconds":120,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run mistral-7b-q4km  # via _local_runner.py against the canonical gsm8k sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"2d49e748b9a31e8b4bdc53971b4cfa3d1f82a37834972e0f98bf45c3f6c2672a7685f2b86cb4cfa04b8fed4f3789e2255bab497daeaff291a07ec4f91bd8550b","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:998472abf27962d5141a96e256c87871228b99a8df79ff8c7a9a61d707a0b4b5","methodology_hash":"sha256:8618a36988294dfc7ba4dbff4fdd15f101f6a48ed51a7d0390ddb466f8b50a99","merkle_root":"sha256:8e07c4f53a6009442f8eb52ba7c12ff0a76c9a6cac11dd84af55ef065ce41da3","claimed_score":10}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"2d49e748b9a31e8b4bdc53971b4cfa3d1f82a37834972e0f98bf45c3f6c2672a7685f2b86cb4cfa04b8fed4f3789e2255bab497daeaff291a07ec4f91bd8550b","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:37:03.363Z","verifiedAt":"2026-04-26T07:37:05Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: mistral-7b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T07:37:03.363Z","storeRoute":"store-run"},{"id":"run-local-a99ee4fe8b18","serviceId":"openrouter","benchmarkId":"math-500","model":"qwen3.6-35b-q4km","score":0,"runs":1,"breakdown":{"n":50,"passes":0,"mean_raw":0},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:78fd61258ac0baaf1481bd291c6ed42004f95ff6302279502627fb0b9be98bd0","methodologyHash":"sha256:2d3e19ed12ebcd5f2e584cf64b353e417139647737adaef4e46e74aef90c4232","transcriptMerkleRoot":"sha256:acdcc61bffba32b7c4b7087131290ddeec2704a13784c4ffe86f32cfa5cdf206","startedAt":"2026-04-26T07:31:02Z","finishedAt":"2026-04-26T07:35:03Z","durationSeconds":241,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-35b-q4km  # via _local_runner.py against the canonical math-500 sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"521e2945471073ab5b76c230d7e384175944bcacfe922eb1bddf837c2983d7731172ecf2b6a9913616be1a7530e2c98f4aea759800e79ef00a42d4b8d899a80a","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:78fd61258ac0baaf1481bd291c6ed42004f95ff6302279502627fb0b9be98bd0","methodology_hash":"sha256:2d3e19ed12ebcd5f2e584cf64b353e417139647737adaef4e46e74aef90c4232","merkle_root":"sha256:acdcc61bffba32b7c4b7087131290ddeec2704a13784c4ffe86f32cfa5cdf206","claimed_score":0}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"521e2945471073ab5b76c230d7e384175944bcacfe922eb1bddf837c2983d7731172ecf2b6a9913616be1a7530e2c98f4aea759800e79ef00a42d4b8d899a80a","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:35:01.985Z","verifiedAt":"2026-04-26T07:35:03Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-35b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T07:35:01.985Z","storeRoute":"store-run"},{"id":"run-local-ef31cfa03091","serviceId":"openrouter","benchmarkId":"openbookqa","model":"qwen3.6-35b-q4km","score":12,"runs":1,"breakdown":{"n":50,"passes":6,"mean_raw":0.12},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:0a3bba9b686859f3e89e67cf0f4dce4276ba11e8ccd4ad7c367f98880d8b5953","methodologyHash":"sha256:ca613efb492af89b7f2f5efa61eeea602639622a37046ee5c6d9dec8b12c0a47","transcriptMerkleRoot":"sha256:1051a4758a6cd661f9bfebe90e39a670a38bbd33e217fea0e6af0ada84ce97aa","startedAt":"2026-04-26T07:26:59Z","finishedAt":"2026-04-26T07:31:00Z","durationSeconds":241,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-35b-q4km  # via _local_runner.py against the canonical openbookqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"8a11a63ce5b532d4ed3ba4b3c6b44ce9dd6977cf95b4bb162b4ea22384a13e6d048d68810a794a69b9bacb3118bb95b0270693121bae19c4238f31259dc19902","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:0a3bba9b686859f3e89e67cf0f4dce4276ba11e8ccd4ad7c367f98880d8b5953","methodology_hash":"sha256:ca613efb492af89b7f2f5efa61eeea602639622a37046ee5c6d9dec8b12c0a47","merkle_root":"sha256:1051a4758a6cd661f9bfebe90e39a670a38bbd33e217fea0e6af0ada84ce97aa","claimed_score":12}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"8a11a63ce5b532d4ed3ba4b3c6b44ce9dd6977cf95b4bb162b4ea22384a13e6d048d68810a794a69b9bacb3118bb95b0270693121bae19c4238f31259dc19902","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:30:58.760Z","verifiedAt":"2026-04-26T07:31:00Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-35b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T07:30:58.760Z","storeRoute":"store-run"},{"id":"run-local-dcaf6098e9b2","serviceId":"openrouter","benchmarkId":"commonsenseqa","model":"qwen3.6-35b-q4km","score":16,"runs":1,"breakdown":{"n":50,"passes":8,"mean_raw":0.16},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:657c0ecfad0bd4dcbf062db3be6475df54a6d658c93b0493d4e0d3d86c4cb5bf","methodologyHash":"sha256:11c314e72c2b767f36d059911da85f213c8fa50958bc5b4e94ae94f7fb36dd77","transcriptMerkleRoot":"sha256:614479c0343fa34d1daa6e1309a797babdc7eeffa8418e7da3cc7b23e0b73f21","startedAt":"2026-04-26T07:23:00Z","finishedAt":"2026-04-26T07:26:57Z","durationSeconds":237,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-35b-q4km  # via _local_runner.py against the canonical commonsenseqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"51e0d5132884865003af84c177442fb73395d38bc28f0180c629beb5eaa1f85e6e495bc0cfb87402fd33ae4fe172e655d3c315ebc9fddeaab2d4ef439305f000","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:657c0ecfad0bd4dcbf062db3be6475df54a6d658c93b0493d4e0d3d86c4cb5bf","methodology_hash":"sha256:11c314e72c2b767f36d059911da85f213c8fa50958bc5b4e94ae94f7fb36dd77","merkle_root":"sha256:614479c0343fa34d1daa6e1309a797babdc7eeffa8418e7da3cc7b23e0b73f21","claimed_score":16}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"51e0d5132884865003af84c177442fb73395d38bc28f0180c629beb5eaa1f85e6e495bc0cfb87402fd33ae4fe172e655d3c315ebc9fddeaab2d4ef439305f000","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:26:56.250Z","verifiedAt":"2026-04-26T07:26:57Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-35b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T07:26:56.250Z","storeRoute":"store-run"},{"id":"run-5c103412a8eb","serviceId":"anthropic-claude","benchmarkId":"gsm8k","model":"claude-sonnet-4.5","score":100,"runs":1,"breakdown":{"n":3,"passes":3,"mean_raw":1},"sampleCount":3,"runnerVersion":"benchlist-vercel-inline@1.0.0","runnerCommit":"edge","datasetHash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodologyHash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","transcriptMerkleRoot":"sha256:ae009bdb2ad039f4c0d29ad946dfdb29ca755b3a1b8c1b7a228064cc1b75137f","startedAt":"2026-04-26T07:24:21.585Z","finishedAt":"2026-04-26T07:24:33.356Z","durationSeconds":12,"decoding":{"temperature":0,"max_tokens":512},"attestor":"benchlist-vercel-inline-0","publisher":"anthropic-claude","replay":{"command":"benchlist run gsm8k --service anthropic-claude --model claude-sonnet-4.5 --runs 1 --limit 3","dockerImage":"ghcr.io/benchlist/runner:latest","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"2992cd508764351b5971f30b4e32f58da2d5abb9211958092ceb2988b0a71a277200ad26695e226eceba27e865c3eb122fe22aa1ac2d85e65c3c5053874d890d","pubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodology_hash":"sha256:144e8efdcdb66a248c57935cea7c8d00cbc6c287341355ab753cc5f445238bfb","merkle_root":"sha256:ae009bdb2ad039f4c0d29ad946dfdb29ca755b3a1b8c1b7a228064cc1b75137f","claimed_score":100}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"cb6e95d0f7b402e254f491b57767df3a3a93ae92f1faee3a02aa52e728f5cd11","attestorSignature":"2992cd508764351b5971f30b4e32f58da2d5abb9211958092ceb2988b0a71a277200ad26695e226eceba27e865c3eb122fe22aa1ac2d85e65c3c5053874d890d","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:24:33.356Z","verifiedAt":"2026-04-26T07:24:33.356Z","note":"Signed inline by Benchlist Vercel attestor. Set ATTESTOR_PRIVATE_KEY on a GH/Railway worker to add Ethereum L1 anchor."}},{"id":"run-local-67839be709d7","serviceId":"openrouter","benchmarkId":"winogrande","model":"qwen3.6-35b-q4km","score":8,"runs":1,"breakdown":{"n":50,"passes":4,"mean_raw":0.08},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:6b80c06f2f42f404b65a8704af71bd8d3f2a7a85a4fbb9b23a9aa9d27ace7040","methodologyHash":"sha256:b0750456ca6269f519cb7dd4bbda66aa80a3806e9505189914584546305b2eaa","transcriptMerkleRoot":"sha256:c86f745818f434f5ffedc35139e8d104999206ec0be3c7edc3a098f1af7df156","startedAt":"2026-04-26T07:18:57Z","finishedAt":"2026-04-26T07:22:58Z","durationSeconds":241,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-35b-q4km  # via _local_runner.py against the canonical winogrande sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"a8ff37215b5a5bab7cb500c71f0a447036415e2d04ddafe7dc6a5cc3652cc8de090231ef1887045df1887b7a4645eb0a82d9a18c99fd8074a92dfcc89893880e","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:6b80c06f2f42f404b65a8704af71bd8d3f2a7a85a4fbb9b23a9aa9d27ace7040","methodology_hash":"sha256:b0750456ca6269f519cb7dd4bbda66aa80a3806e9505189914584546305b2eaa","merkle_root":"sha256:c86f745818f434f5ffedc35139e8d104999206ec0be3c7edc3a098f1af7df156","claimed_score":8}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"a8ff37215b5a5bab7cb500c71f0a447036415e2d04ddafe7dc6a5cc3652cc8de090231ef1887045df1887b7a4645eb0a82d9a18c99fd8074a92dfcc89893880e","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:22:57.467Z","verifiedAt":"2026-04-26T07:22:58Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-35b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T07:22:57.467Z","storeRoute":"store-run"},{"id":"run-local-acece5c188d8","serviceId":"openrouter","benchmarkId":"arc-challenge","model":"qwen3.6-35b-q4km","score":12,"runs":1,"breakdown":{"n":50,"passes":6,"mean_raw":0.12},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:144e9a13fb369f31007fffdcf4d7d55692677b409c8fa7b7dec4328c81a55752","methodologyHash":"sha256:8e84e6ffec11c082a286373b8b306600732cdf99b514079bfc0754fe4cd7a7c5","transcriptMerkleRoot":"sha256:96a68d42326aa4dae65d9867c60be831dc5563724f23e6cb1ad2830b372d8722","startedAt":"2026-04-26T07:14:54Z","finishedAt":"2026-04-26T07:18:55Z","durationSeconds":241,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-35b-q4km  # via _local_runner.py against the canonical arc-challenge sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"9c9049a84c32895c5020e2c4cda4cac5f598d00b7b7d584c60891ab2632a10dacc22bf2514f3e9b3c2a89fa9be0b9e4452f107895f0136833d70f547017dc30b","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:144e9a13fb369f31007fffdcf4d7d55692677b409c8fa7b7dec4328c81a55752","methodology_hash":"sha256:8e84e6ffec11c082a286373b8b306600732cdf99b514079bfc0754fe4cd7a7c5","merkle_root":"sha256:96a68d42326aa4dae65d9867c60be831dc5563724f23e6cb1ad2830b372d8722","claimed_score":12}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"9c9049a84c32895c5020e2c4cda4cac5f598d00b7b7d584c60891ab2632a10dacc22bf2514f3e9b3c2a89fa9be0b9e4452f107895f0136833d70f547017dc30b","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:18:53.812Z","verifiedAt":"2026-04-26T07:18:55Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-35b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T07:18:53.812Z","storeRoute":"store-run"},{"id":"run-local-815218140732","serviceId":"openrouter","benchmarkId":"mmlu-pro","model":"qwen3.6-35b-q4km","score":0,"runs":1,"breakdown":{"n":50,"passes":0,"mean_raw":0},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:c4ff0432d6539908d6b09b6ceac8693877afbd15a62806db219216507a578bdd","methodologyHash":"sha256:68cb69d8b9a5e40f78f9918e9ab57581115783d514402f7978180dac87834446","transcriptMerkleRoot":"sha256:9920fcdfd9c44faebabaa00c22780570063efab0abc30700878095e5e462240a","startedAt":"2026-04-26T07:10:42Z","finishedAt":"2026-04-26T07:14:52Z","durationSeconds":250,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-35b-q4km  # via _local_runner.py against the canonical mmlu-pro sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"ad6faee8e356bd4cbdec56cc4e4cf02c6b6433748fc11b34f2108bfaa11b720cee26d39d81fb511eb07cc94dec3fc71daaa0d4bbb3af12a95315efa805fa210b","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:c4ff0432d6539908d6b09b6ceac8693877afbd15a62806db219216507a578bdd","methodology_hash":"sha256:68cb69d8b9a5e40f78f9918e9ab57581115783d514402f7978180dac87834446","merkle_root":"sha256:9920fcdfd9c44faebabaa00c22780570063efab0abc30700878095e5e462240a","claimed_score":0}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"ad6faee8e356bd4cbdec56cc4e4cf02c6b6433748fc11b34f2108bfaa11b720cee26d39d81fb511eb07cc94dec3fc71daaa0d4bbb3af12a95315efa805fa210b","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:14:51.517Z","verifiedAt":"2026-04-26T07:14:52Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-35b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T07:14:51.517Z","storeRoute":"store-run"},{"id":"run-local-1864885ad28f","serviceId":"openrouter","benchmarkId":"gsm8k","model":"qwen3.6-35b-q4km","score":0,"runs":1,"breakdown":{"n":50,"passes":0,"mean_raw":0},"sampleCount":50,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:998472abf27962d5141a96e256c87871228b99a8df79ff8c7a9a61d707a0b4b5","methodologyHash":"sha256:8618a36988294dfc7ba4dbff4fdd15f101f6a48ed51a7d0390ddb466f8b50a99","transcriptMerkleRoot":"sha256:1ff4357a826f7494c883011fb26cac1f830a166ed2366f7d3b36f44002e4f542","startedAt":"2026-04-26T07:06:23Z","finishedAt":"2026-04-26T07:10:41Z","durationSeconds":258,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-35b-q4km  # via _local_runner.py against the canonical gsm8k sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"441edba7b19edbe0870687c6dea679ff6036aef748b20de0a7b7febd9e1fc1a83fea60dedb3d8d6e788df27ccef24fea8a6789334f0e13d2ee13d88500323008","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:998472abf27962d5141a96e256c87871228b99a8df79ff8c7a9a61d707a0b4b5","methodology_hash":"sha256:8618a36988294dfc7ba4dbff4fdd15f101f6a48ed51a7d0390ddb466f8b50a99","merkle_root":"sha256:1ff4357a826f7494c883011fb26cac1f830a166ed2366f7d3b36f44002e4f542","claimed_score":0}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"441edba7b19edbe0870687c6dea679ff6036aef748b20de0a7b7febd9e1fc1a83fea60dedb3d8d6e788df27ccef24fea8a6789334f0e13d2ee13d88500323008","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:10:39.522Z","verifiedAt":"2026-04-26T07:10:41Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-35b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T07:10:39.522Z","storeRoute":"store-run"},{"id":"run-local-863019f170ee","serviceId":"openrouter","benchmarkId":"gsm8k","model":"llama3-8b-q40","score":70,"runs":1,"breakdown":{"n":10,"passes":7,"mean_raw":0.7},"sampleCount":10,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:998472abf27962d5141a96e256c87871228b99a8df79ff8c7a9a61d707a0b4b5","methodologyHash":"sha256:8618a36988294dfc7ba4dbff4fdd15f101f6a48ed51a7d0390ddb466f8b50a99","transcriptMerkleRoot":"sha256:fbda8a5a060f9918f2b5e62a9c8ce064a2f51d54ffa36af095c033eac17d8f01","startedAt":"2026-04-26T07:05:21Z","finishedAt":"2026-04-26T07:05:53Z","durationSeconds":32,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run llama3-8b-q40  # via _local_runner.py against the canonical gsm8k sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"3bc57694cefe7518b57be77973f85e8b7b63c46b7496c2ff2cef979506c8c3f4a92fd0da23b5107e1f526c8b15dfa4e19acd81385fbb8f0f4e8dd96fe121c902","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:998472abf27962d5141a96e256c87871228b99a8df79ff8c7a9a61d707a0b4b5","methodology_hash":"sha256:8618a36988294dfc7ba4dbff4fdd15f101f6a48ed51a7d0390ddb466f8b50a99","merkle_root":"sha256:fbda8a5a060f9918f2b5e62a9c8ce064a2f51d54ffa36af095c033eac17d8f01","claimed_score":70}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"3bc57694cefe7518b57be77973f85e8b7b63c46b7496c2ff2cef979506c8c3f4a92fd0da23b5107e1f526c8b15dfa4e19acd81385fbb8f0f4e8dd96fe121c902","signerAlgo":"ed25519","submittedAt":"2026-04-26T07:05:52.160Z","verifiedAt":"2026-04-26T07:05:53Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: llama3-8b-q40."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T07:05:52.160Z","storeRoute":"store-run"},{"id":"run-local-a6e9d9d825ce","serviceId":"openrouter","benchmarkId":"truthfulqa","model":"glm-4.7-flash-30b-q4km","score":33.33333333333333,"runs":1,"breakdown":{"n":3,"passes":1,"mean_raw":0.3333333333333333},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:1d3a9406678cb49569834ab0a185eb50e97f8072798c1f6afd73f76a77d5f75d","methodologyHash":"sha256:f4ed2d4c63c28f153fdbf3a452d74edfe551f80655f1f790e73063f9e942d40a","transcriptMerkleRoot":"sha256:c8989f0afcc94a9a5954b355b51d89743c8424905dc55dbae71fb0112064e834","startedAt":"2026-04-26T02:19:47Z","finishedAt":"2026-04-26T02:19:54Z","durationSeconds":7,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical truthfulqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"a2cc1ec3c488f618728187b33abb263678f106b39fb007e2afba672a566d80b7f231e73c5e704b1b0fa564468553091ae92295b68abc83d37732859bb2cb4b05","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:1d3a9406678cb49569834ab0a185eb50e97f8072798c1f6afd73f76a77d5f75d","methodology_hash":"sha256:f4ed2d4c63c28f153fdbf3a452d74edfe551f80655f1f790e73063f9e942d40a","merkle_root":"sha256:c8989f0afcc94a9a5954b355b51d89743c8424905dc55dbae71fb0112064e834","claimed_score":33.33333333333333}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"a2cc1ec3c488f618728187b33abb263678f106b39fb007e2afba672a566d80b7f231e73c5e704b1b0fa564468553091ae92295b68abc83d37732859bb2cb4b05","signerAlgo":"ed25519","submittedAt":"2026-04-26T02:19:53.636Z","verifiedAt":"2026-04-26T02:19:54Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T02:19:53.636Z","storeRoute":"store-run"},{"id":"run-local-4fc6f9ea844c","serviceId":"openrouter","benchmarkId":"musr","model":"glm-4.7-flash-30b-q4km","score":33.33333333333333,"runs":1,"breakdown":{"n":3,"passes":1,"mean_raw":0.3333333333333333},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:80561df09e759b1394e3ff89205f4e6459613b1f49529e3635fc6105e4d75213","methodologyHash":"sha256:fa408a6814333ee9015e5641ec6b6a5ff86cc1aeb256fef95d121bd838acb03d","transcriptMerkleRoot":"sha256:2f723b8bc1f5532ba1cd162373d7749880b752837e6e71535dc9cd1f95ddc35e","startedAt":"2026-04-26T02:19:37Z","finishedAt":"2026-04-26T02:19:45Z","durationSeconds":8,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical musr sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"99cebe0955ddfc7ee11a07ab05289d306ccef6018c01863d6d48a52629f70be6d5727dfc401d6efad753cf30cc6c76f74300dd00d5c8d664dc4269b040bfeb0c","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:80561df09e759b1394e3ff89205f4e6459613b1f49529e3635fc6105e4d75213","methodology_hash":"sha256:fa408a6814333ee9015e5641ec6b6a5ff86cc1aeb256fef95d121bd838acb03d","merkle_root":"sha256:2f723b8bc1f5532ba1cd162373d7749880b752837e6e71535dc9cd1f95ddc35e","claimed_score":33.33333333333333}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"99cebe0955ddfc7ee11a07ab05289d306ccef6018c01863d6d48a52629f70be6d5727dfc401d6efad753cf30cc6c76f74300dd00d5c8d664dc4269b040bfeb0c","signerAlgo":"ed25519","submittedAt":"2026-04-26T02:19:44.828Z","verifiedAt":"2026-04-26T02:19:45Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T02:19:44.828Z","storeRoute":"store-run"},{"id":"run-local-c9af68d5a809","serviceId":"openrouter","benchmarkId":"logiqa","model":"glm-4.7-flash-30b-q4km","score":33.33333333333333,"runs":1,"breakdown":{"n":3,"passes":1,"mean_raw":0.3333333333333333},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:71cd6dfdc54064ed9becf0aacee7070c8cd9bf664c467aac13fc2955f25167de","methodologyHash":"sha256:fa645feb915960040b3315732df398a70a37f0ea2625f4a963bf57ee1eb89eff","transcriptMerkleRoot":"sha256:047fb0fe81faa0da33c3a99fd7a4f6e8cfd9d8771b5f5272a114f310011be5c6","startedAt":"2026-04-26T02:19:27Z","finishedAt":"2026-04-26T02:19:36Z","durationSeconds":9,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical logiqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"b436e9dbf4288691415ee0c4e240cff0b978ba777529fb8eb133b4abf656a6b567239d6d4535a80ca0a376f59f44825aaea01b6f84df4abc0bf17f09c27eee04","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:71cd6dfdc54064ed9becf0aacee7070c8cd9bf664c467aac13fc2955f25167de","methodology_hash":"sha256:fa645feb915960040b3315732df398a70a37f0ea2625f4a963bf57ee1eb89eff","merkle_root":"sha256:047fb0fe81faa0da33c3a99fd7a4f6e8cfd9d8771b5f5272a114f310011be5c6","claimed_score":33.33333333333333}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"b436e9dbf4288691415ee0c4e240cff0b978ba777529fb8eb133b4abf656a6b567239d6d4535a80ca0a376f59f44825aaea01b6f84df4abc0bf17f09c27eee04","signerAlgo":"ed25519","submittedAt":"2026-04-26T02:19:35.179Z","verifiedAt":"2026-04-26T02:19:36Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T02:19:35.179Z","storeRoute":"store-run"},{"id":"run-local-8641e71d7f0b","serviceId":"openrouter","benchmarkId":"math-500","model":"glm-4.7-flash-30b-q4km","score":0,"runs":1,"breakdown":{"n":3,"passes":0,"mean_raw":0},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:9e86adfbb746209853106da77f29348041d67bb236dcd3cc8ad472da93058634","methodologyHash":"sha256:a62e8bbe42883c707d400421717f846925e03f389fcd98a9f01be093350310f2","transcriptMerkleRoot":"sha256:351bc9af9a0b04f84049d7761a256bbdb9b266be2d3574a0acd4ca5e3233d548","startedAt":"2026-04-26T02:19:17Z","finishedAt":"2026-04-26T02:19:25Z","durationSeconds":8,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical math-500 sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"6f93c6b5950ef4df1a2d6799f0ea4b3766918fca5bc26522bc6038aade9a0d5cec3e21ca85570bae59db5bacf04bb271fe9d9b44c78ec3ebf07d3a514dbc5b05","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:9e86adfbb746209853106da77f29348041d67bb236dcd3cc8ad472da93058634","methodology_hash":"sha256:a62e8bbe42883c707d400421717f846925e03f389fcd98a9f01be093350310f2","merkle_root":"sha256:351bc9af9a0b04f84049d7761a256bbdb9b266be2d3574a0acd4ca5e3233d548","claimed_score":0}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"6f93c6b5950ef4df1a2d6799f0ea4b3766918fca5bc26522bc6038aade9a0d5cec3e21ca85570bae59db5bacf04bb271fe9d9b44c78ec3ebf07d3a514dbc5b05","signerAlgo":"ed25519","submittedAt":"2026-04-26T02:19:25.437Z","verifiedAt":"2026-04-26T02:19:25Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T02:19:25.437Z","storeRoute":"store-run"},{"id":"run-local-d58d31a75b28","serviceId":"openrouter","benchmarkId":"bbh","model":"glm-4.7-flash-30b-q4km","score":0,"runs":1,"breakdown":{"n":3,"passes":0,"mean_raw":0},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:cbe1df91698bb33429d10eac97c4ed296ea144deba5205fee61ef8597d796a89","methodologyHash":"sha256:f90d0cf5bd553d66271a37ead896f75d805589e323916ebc0f2d3d26eae5c795","transcriptMerkleRoot":"sha256:59acaf3dbd3d3f46db8b021181f28dc0e0ad85530104c13e151a9d710dd283df","startedAt":"2026-04-26T02:19:07Z","finishedAt":"2026-04-26T02:19:16Z","durationSeconds":9,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical bbh sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"5c5c61155c6797817bb24a7ea89d631ac06d7963ba3a65b5f9894dedf829c948e34e038b57e01dcf53e67c42bf6ae0ebe05c1da4fb7e64ba610b48554bcfc601","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:cbe1df91698bb33429d10eac97c4ed296ea144deba5205fee61ef8597d796a89","methodology_hash":"sha256:f90d0cf5bd553d66271a37ead896f75d805589e323916ebc0f2d3d26eae5c795","merkle_root":"sha256:59acaf3dbd3d3f46db8b021181f28dc0e0ad85530104c13e151a9d710dd283df","claimed_score":0}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"5c5c61155c6797817bb24a7ea89d631ac06d7963ba3a65b5f9894dedf829c948e34e038b57e01dcf53e67c42bf6ae0ebe05c1da4fb7e64ba610b48554bcfc601","signerAlgo":"ed25519","submittedAt":"2026-04-26T02:19:15.186Z","verifiedAt":"2026-04-26T02:19:16Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T02:19:15.186Z","storeRoute":"store-run"},{"id":"run-local-8248814799b2","serviceId":"openrouter","benchmarkId":"openbookqa","model":"glm-4.7-flash-30b-q4km","score":0,"runs":1,"breakdown":{"n":3,"passes":0,"mean_raw":0},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:9de02c962a604c215aef6e33014a48f52c1eb284f810c0804a96884968f79952","methodologyHash":"sha256:913c972d908dc49210c4428b5b09287a8637337a8cfc56bcbddbe73f68f227d6","transcriptMerkleRoot":"sha256:580a287ea4d675d9e120f5c9bb9222eec228b161796729cb3ea03dd9d924891f","startedAt":"2026-04-26T02:18:58Z","finishedAt":"2026-04-26T02:19:06Z","durationSeconds":8,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical openbookqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"ad397987373d2ff3bdcfbf74959bdee925806c473c06dd7573b6e85bc05476b13d3e45427682e2450c357efd03bea396e9842fe86d4839d2188f2a20069a9807","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:9de02c962a604c215aef6e33014a48f52c1eb284f810c0804a96884968f79952","methodology_hash":"sha256:913c972d908dc49210c4428b5b09287a8637337a8cfc56bcbddbe73f68f227d6","merkle_root":"sha256:580a287ea4d675d9e120f5c9bb9222eec228b161796729cb3ea03dd9d924891f","claimed_score":0}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"ad397987373d2ff3bdcfbf74959bdee925806c473c06dd7573b6e85bc05476b13d3e45427682e2450c357efd03bea396e9842fe86d4839d2188f2a20069a9807","signerAlgo":"ed25519","submittedAt":"2026-04-26T02:19:05.535Z","verifiedAt":"2026-04-26T02:19:06Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T02:19:05.535Z","storeRoute":"store-run"},{"id":"run-local-3e2f640576ac","serviceId":"openrouter","benchmarkId":"commonsenseqa","model":"glm-4.7-flash-30b-q4km","score":33.33333333333333,"runs":1,"breakdown":{"n":3,"passes":1,"mean_raw":0.3333333333333333},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:729b5c0850ac5be6b8cfbedf4d36938249bb7c0d9e9c980260037391414dd520","methodologyHash":"sha256:afaa58378ca68cf9d5d85a75f5d168088888466b4b067772f67d22c7a05da188","transcriptMerkleRoot":"sha256:71325df3f0cfbbc112c380c4ffae30bd99bc5c144235a9dba5c4e4858fd6e059","startedAt":"2026-04-26T02:18:49Z","finishedAt":"2026-04-26T02:18:56Z","durationSeconds":7,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical commonsenseqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"ca0e81f68e7342c62fb2a2c34677f52d803b8f46e7900ef4d141bec43afc7e688302fa1e46123191bffbb8fbb055521c99a8b4011333dbebddfa679edc14ce00","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:729b5c0850ac5be6b8cfbedf4d36938249bb7c0d9e9c980260037391414dd520","methodology_hash":"sha256:afaa58378ca68cf9d5d85a75f5d168088888466b4b067772f67d22c7a05da188","merkle_root":"sha256:71325df3f0cfbbc112c380c4ffae30bd99bc5c144235a9dba5c4e4858fd6e059","claimed_score":33.33333333333333}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"ca0e81f68e7342c62fb2a2c34677f52d803b8f46e7900ef4d141bec43afc7e688302fa1e46123191bffbb8fbb055521c99a8b4011333dbebddfa679edc14ce00","signerAlgo":"ed25519","submittedAt":"2026-04-26T02:18:55.918Z","verifiedAt":"2026-04-26T02:18:56Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T02:18:55.918Z","storeRoute":"store-run"},{"id":"run-local-f78af19b7a86","serviceId":"openrouter","benchmarkId":"piqa","model":"glm-4.7-flash-30b-q4km","score":0,"runs":1,"breakdown":{"n":3,"passes":0,"mean_raw":0},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:d7c75287e440ecdb3df61118581bba694b3160a1a7056087d2005d8f167672bc","methodologyHash":"sha256:9b6bdebe22cd6b2ae2dc44f6a5c94297427edb9bdbf5f1ab9bf3c65bd10600c9","transcriptMerkleRoot":"sha256:2d5920c5be2cc02fd3b94e5ac515babeba368e144cab477d98ac7e2c1afa9a33","startedAt":"2026-04-26T02:18:40Z","finishedAt":"2026-04-26T02:18:48Z","durationSeconds":8,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical piqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"b80586a21b1d81eff27cd15d682a55f4056eb32a3e38080e135d70e3ff0ff2addf314c7b466c2ad64a5c6a19f1d0581a5b571a7d05854840b8a169faed49150a","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:d7c75287e440ecdb3df61118581bba694b3160a1a7056087d2005d8f167672bc","methodology_hash":"sha256:9b6bdebe22cd6b2ae2dc44f6a5c94297427edb9bdbf5f1ab9bf3c65bd10600c9","merkle_root":"sha256:2d5920c5be2cc02fd3b94e5ac515babeba368e144cab477d98ac7e2c1afa9a33","claimed_score":0}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"b80586a21b1d81eff27cd15d682a55f4056eb32a3e38080e135d70e3ff0ff2addf314c7b466c2ad64a5c6a19f1d0581a5b571a7d05854840b8a169faed49150a","signerAlgo":"ed25519","submittedAt":"2026-04-26T02:18:47.300Z","verifiedAt":"2026-04-26T02:18:48Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T02:18:47.300Z","storeRoute":"store-run"},{"id":"run-local-fca34b3c40d1","serviceId":"openrouter","benchmarkId":"winogrande","model":"glm-4.7-flash-30b-q4km","score":33.33333333333333,"runs":1,"breakdown":{"n":3,"passes":1,"mean_raw":0.3333333333333333},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:1836ea9a0cb7f0dc0e641b157c14cc381c6b5dd60cdb1a4f9ae49c637dc92e92","methodologyHash":"sha256:ca0591d2b7c7487c4e66b8f2a4850fe3481575973ffd5567bf6af074102cfa3c","transcriptMerkleRoot":"sha256:28afc1d0b0679365694c141f51a00cec9bdcf2c6f81eb66296c0a9bc4201f21c","startedAt":"2026-04-26T02:18:30Z","finishedAt":"2026-04-26T02:18:38Z","durationSeconds":8,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical winogrande sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"52d8048a9cddc057dfe6d927b658a68ac0d0af4389db09258a5091f59eb7f1dc60368514cd08a70990a3f0c7a34c4925e70849558fa4cb46d2cb6177c8ae840b","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:1836ea9a0cb7f0dc0e641b157c14cc381c6b5dd60cdb1a4f9ae49c637dc92e92","methodology_hash":"sha256:ca0591d2b7c7487c4e66b8f2a4850fe3481575973ffd5567bf6af074102cfa3c","merkle_root":"sha256:28afc1d0b0679365694c141f51a00cec9bdcf2c6f81eb66296c0a9bc4201f21c","claimed_score":33.33333333333333}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"52d8048a9cddc057dfe6d927b658a68ac0d0af4389db09258a5091f59eb7f1dc60368514cd08a70990a3f0c7a34c4925e70849558fa4cb46d2cb6177c8ae840b","signerAlgo":"ed25519","submittedAt":"2026-04-26T02:18:37.678Z","verifiedAt":"2026-04-26T02:18:38Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T02:18:37.678Z","storeRoute":"store-run"},{"id":"run-local-300a884cfa18","serviceId":"openrouter","benchmarkId":"arc-challenge","model":"glm-4.7-flash-30b-q4km","score":33.33333333333333,"runs":1,"breakdown":{"n":3,"passes":1,"mean_raw":0.3333333333333333},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:f35d86c3e38897e2781029fcdf1c8f9223d1250734e1cfe9adce4bc72915d23e","methodologyHash":"sha256:8d3f2a9b6e5e96fc6cb70815d7c64abac515de8cbf261b0c04390679cfa7ca66","transcriptMerkleRoot":"sha256:42bbc7a0e6f8324201d0d27924d44f910dd027abb6b71e45489002f4b198e781","startedAt":"2026-04-26T02:18:21Z","finishedAt":"2026-04-26T02:18:29Z","durationSeconds":8,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical arc-challenge sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"15ec866a5f9b92c112348ac0b76c0febe1206b49d66302248340e270643b989236a0f28b57677ebbc64421c6fda4f36b7945d9aa5b637ea69054dc18cf29a104","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:f35d86c3e38897e2781029fcdf1c8f9223d1250734e1cfe9adce4bc72915d23e","methodology_hash":"sha256:8d3f2a9b6e5e96fc6cb70815d7c64abac515de8cbf261b0c04390679cfa7ca66","merkle_root":"sha256:42bbc7a0e6f8324201d0d27924d44f910dd027abb6b71e45489002f4b198e781","claimed_score":33.33333333333333}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"15ec866a5f9b92c112348ac0b76c0febe1206b49d66302248340e270643b989236a0f28b57677ebbc64421c6fda4f36b7945d9aa5b637ea69054dc18cf29a104","signerAlgo":"ed25519","submittedAt":"2026-04-26T02:18:28.329Z","verifiedAt":"2026-04-26T02:18:29Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T02:18:28.329Z","storeRoute":"store-run"},{"id":"run-local-5e569de86ced","serviceId":"openrouter","benchmarkId":"gpqa","model":"glm-4.7-flash-30b-q4km","score":33.33333333333333,"runs":1,"breakdown":{"n":3,"passes":1,"mean_raw":0.3333333333333333},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:1799d7ba340c5e41bb46cc77a8230fe4833b5190c423637ae5c598db45c467c6","methodologyHash":"sha256:decf9a5742564b13d14660da2637494c1b8e46dd49e4633e3a0846cca7f83171","transcriptMerkleRoot":"sha256:7231d466232821828f7e9be5f0f89aba156a6707b576e5afd3e4005b5f3763dc","startedAt":"2026-04-26T02:18:12Z","finishedAt":"2026-04-26T02:18:20Z","durationSeconds":8,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical gpqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"1668602ad2daff816b006eb9cddfc3d0ace59d7c398deaa0c0f834f660b05f10ba45aba4b8eba6fe838f0f46b4d1002573b721e9c2a22758dede167ef86f4705","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:1799d7ba340c5e41bb46cc77a8230fe4833b5190c423637ae5c598db45c467c6","methodology_hash":"sha256:decf9a5742564b13d14660da2637494c1b8e46dd49e4633e3a0846cca7f83171","merkle_root":"sha256:7231d466232821828f7e9be5f0f89aba156a6707b576e5afd3e4005b5f3763dc","claimed_score":33.33333333333333}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"1668602ad2daff816b006eb9cddfc3d0ace59d7c398deaa0c0f834f660b05f10ba45aba4b8eba6fe838f0f46b4d1002573b721e9c2a22758dede167ef86f4705","signerAlgo":"ed25519","submittedAt":"2026-04-26T02:18:19.386Z","verifiedAt":"2026-04-26T02:18:20Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T02:18:19.386Z","storeRoute":"store-run"},{"id":"run-local-f95ce5b97acd","serviceId":"openrouter","benchmarkId":"mmlu-pro","model":"glm-4.7-flash-30b-q4km","score":0,"runs":1,"breakdown":{"n":3,"passes":0,"mean_raw":0},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:f83f7230d012b45f7532fd0947ca596e7de52a518e7f9edcd0df4566b409bf9a","methodologyHash":"sha256:c01edd7c098df6e154928a472430b097dfab3370a7a5a943e33b241f3fc96c32","transcriptMerkleRoot":"sha256:87dc6ec37b3e0e7e8958a3410321b891b614a921cdada3219cf7b386b768f226","startedAt":"2026-04-26T02:18:02Z","finishedAt":"2026-04-26T02:18:11Z","durationSeconds":9,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical mmlu-pro sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"d2751c2ec961017859bca1d0f599f35a3a9a6529da82f77ab4a1f06246e1f32f5e08f2dea1cf605d9e7aa3dff5d9e2ac71b08cf6669ecdfd0db267c54810110a","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:f83f7230d012b45f7532fd0947ca596e7de52a518e7f9edcd0df4566b409bf9a","methodology_hash":"sha256:c01edd7c098df6e154928a472430b097dfab3370a7a5a943e33b241f3fc96c32","merkle_root":"sha256:87dc6ec37b3e0e7e8958a3410321b891b614a921cdada3219cf7b386b768f226","claimed_score":0}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"d2751c2ec961017859bca1d0f599f35a3a9a6529da82f77ab4a1f06246e1f32f5e08f2dea1cf605d9e7aa3dff5d9e2ac71b08cf6669ecdfd0db267c54810110a","signerAlgo":"ed25519","submittedAt":"2026-04-26T02:18:10.132Z","verifiedAt":"2026-04-26T02:18:11Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T02:18:10.132Z","storeRoute":"store-run"},{"id":"run-local-3759cbd7a1b8","serviceId":"openrouter","benchmarkId":"gsm8k","model":"glm-4.7-flash-30b-q4km","score":0,"runs":1,"breakdown":{"n":3,"passes":0,"mean_raw":0},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodologyHash":"sha256:ebaacdd81cf24b58df42a6ee4c324f246d4f0586dc814ae68be12c143ba79de1","transcriptMerkleRoot":"sha256:08a592e75c8b2b5a578c2352e107d976dee0fa4d8cefcdc9221dd3ffccdf5240","startedAt":"2026-04-26T02:17:41Z","finishedAt":"2026-04-26T02:18:01Z","durationSeconds":20,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical gsm8k sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"689d9e2f11fa337ea7500648bd1d1e78b4be285733862d5963157a6f78c073dbdb15a2a3006601c80cccb698aeacd8808c153db409c41dbf56686577136ca001","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodology_hash":"sha256:ebaacdd81cf24b58df42a6ee4c324f246d4f0586dc814ae68be12c143ba79de1","merkle_root":"sha256:08a592e75c8b2b5a578c2352e107d976dee0fa4d8cefcdc9221dd3ffccdf5240","claimed_score":0}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"689d9e2f11fa337ea7500648bd1d1e78b4be285733862d5963157a6f78c073dbdb15a2a3006601c80cccb698aeacd8808c153db409c41dbf56686577136ca001","signerAlgo":"ed25519","submittedAt":"2026-04-26T02:18:00.528Z","verifiedAt":"2026-04-26T02:18:01Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T02:18:00.528Z","storeRoute":"store-run"},{"id":"run-local-46edf9b67286","serviceId":"openrouter","benchmarkId":"truthfulqa","model":"qwen3.6-27b-dense-q5km","score":100,"runs":1,"breakdown":{"n":3,"passes":3,"mean_raw":1},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:1d3a9406678cb49569834ab0a185eb50e97f8072798c1f6afd73f76a77d5f75d","methodologyHash":"sha256:f4ed2d4c63c28f153fdbf3a452d74edfe551f80655f1f790e73063f9e942d40a","transcriptMerkleRoot":"sha256:ff6e758d948a4b218f3c37e15d8999375c6ccc28f7c4d03b7cdef1731c7c4486","startedAt":"2026-04-26T02:17:37Z","finishedAt":"2026-04-26T02:17:39Z","durationSeconds":2,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-27b-dense-q5km  # via _local_runner.py against the canonical truthfulqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"f5c9db5330e4d66dabd466bada369cc7a13db61ed72d26c276692806481a50034cd1c3ba4778ab0d90608057a286e80e3000999cccf002c6ed85727ff3d2800f","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:1d3a9406678cb49569834ab0a185eb50e97f8072798c1f6afd73f76a77d5f75d","methodology_hash":"sha256:f4ed2d4c63c28f153fdbf3a452d74edfe551f80655f1f790e73063f9e942d40a","merkle_root":"sha256:ff6e758d948a4b218f3c37e15d8999375c6ccc28f7c4d03b7cdef1731c7c4486","claimed_score":100}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"f5c9db5330e4d66dabd466bada369cc7a13db61ed72d26c276692806481a50034cd1c3ba4778ab0d90608057a286e80e3000999cccf002c6ed85727ff3d2800f","signerAlgo":"ed25519","submittedAt":"2026-04-26T02:17:39.011Z","verifiedAt":"2026-04-26T02:17:39Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-27b-dense-q5km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T02:17:39.011Z","storeRoute":"store-run"},{"id":"run-local-fa4b68501b15","serviceId":"openrouter","benchmarkId":"musr","model":"qwen3.6-27b-dense-q5km","score":33.33333333333333,"runs":1,"breakdown":{"n":3,"passes":1,"mean_raw":0.3333333333333333},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:80561df09e759b1394e3ff89205f4e6459613b1f49529e3635fc6105e4d75213","methodologyHash":"sha256:fa408a6814333ee9015e5641ec6b6a5ff86cc1aeb256fef95d121bd838acb03d","transcriptMerkleRoot":"sha256:8044db4f47d75e21da59eed00bfafa3222c55b1d5f7877c1c3a86921cc6ba2f0","startedAt":"2026-04-26T02:17:15Z","finishedAt":"2026-04-26T02:17:35Z","durationSeconds":20,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-27b-dense-q5km  # via _local_runner.py against the canonical musr sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"49b08d205b69c1dec3d6f0937ad09b52015ef77166051cb7619e44fd94858f6737309413d1d7065680a1258af787939b32b05c7a6e9cb5749d4883600508180d","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:80561df09e759b1394e3ff89205f4e6459613b1f49529e3635fc6105e4d75213","methodology_hash":"sha256:fa408a6814333ee9015e5641ec6b6a5ff86cc1aeb256fef95d121bd838acb03d","merkle_root":"sha256:8044db4f47d75e21da59eed00bfafa3222c55b1d5f7877c1c3a86921cc6ba2f0","claimed_score":33.33333333333333}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"49b08d205b69c1dec3d6f0937ad09b52015ef77166051cb7619e44fd94858f6737309413d1d7065680a1258af787939b32b05c7a6e9cb5749d4883600508180d","signerAlgo":"ed25519","submittedAt":"2026-04-26T02:17:35.329Z","verifiedAt":"2026-04-26T02:17:35Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-27b-dense-q5km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T02:17:35.329Z","storeRoute":"store-run"},{"id":"run-local-eece69db7980","serviceId":"openrouter","benchmarkId":"logiqa","model":"qwen3.6-27b-dense-q5km","score":66.66666666666666,"runs":1,"breakdown":{"n":3,"passes":2,"mean_raw":0.6666666666666666},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:71cd6dfdc54064ed9becf0aacee7070c8cd9bf664c467aac13fc2955f25167de","methodologyHash":"sha256:fa645feb915960040b3315732df398a70a37f0ea2625f4a963bf57ee1eb89eff","transcriptMerkleRoot":"sha256:ce49eb31a243b59a3ac040b6479155e0ca86632b6287d5a650e208f76eeba85c","startedAt":"2026-04-26T02:16:51Z","finishedAt":"2026-04-26T02:17:13Z","durationSeconds":22,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-27b-dense-q5km  # via _local_runner.py against the canonical logiqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"a52865523214c1d0520556a3073a6c23683e525dc23a831ce6406d1f8a0b286545b9fa076553e891be57dc234083a4346437b22285e46f592b43e9c6251b560e","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:71cd6dfdc54064ed9becf0aacee7070c8cd9bf664c467aac13fc2955f25167de","methodology_hash":"sha256:fa645feb915960040b3315732df398a70a37f0ea2625f4a963bf57ee1eb89eff","merkle_root":"sha256:ce49eb31a243b59a3ac040b6479155e0ca86632b6287d5a650e208f76eeba85c","claimed_score":66.66666666666666}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"a52865523214c1d0520556a3073a6c23683e525dc23a831ce6406d1f8a0b286545b9fa076553e891be57dc234083a4346437b22285e46f592b43e9c6251b560e","signerAlgo":"ed25519","submittedAt":"2026-04-26T02:17:12.680Z","verifiedAt":"2026-04-26T02:17:13Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-27b-dense-q5km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T02:17:12.680Z","storeRoute":"store-run"},{"id":"run-local-7d2754e112ac","serviceId":"openrouter","benchmarkId":"math-500","model":"qwen3.6-27b-dense-q5km","score":0,"runs":1,"breakdown":{"n":3,"passes":0,"mean_raw":0},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:9e86adfbb746209853106da77f29348041d67bb236dcd3cc8ad472da93058634","methodologyHash":"sha256:a62e8bbe42883c707d400421717f846925e03f389fcd98a9f01be093350310f2","transcriptMerkleRoot":"sha256:1b0262fb6ac1664e4ce989b727fb4bcf71696c69faa16ff9cc5748b9f0f5b6c5","startedAt":"2026-04-26T02:16:27Z","finishedAt":"2026-04-26T02:16:49Z","durationSeconds":22,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-27b-dense-q5km  # via _local_runner.py against the canonical math-500 sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"00341b2328e45715790d3e194226b2d3826154c893271d2bc72cfc540c492cfdfd00e994726123af622254a82c002143cbf70d10144dbf5896b1766fd165d201","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:9e86adfbb746209853106da77f29348041d67bb236dcd3cc8ad472da93058634","methodology_hash":"sha256:a62e8bbe42883c707d400421717f846925e03f389fcd98a9f01be093350310f2","merkle_root":"sha256:1b0262fb6ac1664e4ce989b727fb4bcf71696c69faa16ff9cc5748b9f0f5b6c5","claimed_score":0}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"00341b2328e45715790d3e194226b2d3826154c893271d2bc72cfc540c492cfdfd00e994726123af622254a82c002143cbf70d10144dbf5896b1766fd165d201","signerAlgo":"ed25519","submittedAt":"2026-04-26T02:16:48.813Z","verifiedAt":"2026-04-26T02:16:49Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-27b-dense-q5km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T02:16:48.813Z","storeRoute":"store-run"},{"id":"run-local-0b7e88ca6db1","serviceId":"openrouter","benchmarkId":"bbh","model":"qwen3.6-27b-dense-q5km","score":100,"runs":1,"breakdown":{"n":3,"passes":3,"mean_raw":1},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:cbe1df91698bb33429d10eac97c4ed296ea144deba5205fee61ef8597d796a89","methodologyHash":"sha256:f90d0cf5bd553d66271a37ead896f75d805589e323916ebc0f2d3d26eae5c795","transcriptMerkleRoot":"sha256:ec52cc4d924806e3c6d51118e038f37e378cfc5a878a041e247f54793dfffe33","startedAt":"2026-04-26T02:16:03Z","finishedAt":"2026-04-26T02:16:25Z","durationSeconds":22,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-27b-dense-q5km  # via _local_runner.py against the canonical bbh sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"287a2598e19d5aeaf4e5d875459b5f21b770eb274e23ec6be6b09415420c4c1a32c9038b08e0ff5e9d12f81351a3999a8c6fa23ddebfd866d87d3274052aed05","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:cbe1df91698bb33429d10eac97c4ed296ea144deba5205fee61ef8597d796a89","methodology_hash":"sha256:f90d0cf5bd553d66271a37ead896f75d805589e323916ebc0f2d3d26eae5c795","merkle_root":"sha256:ec52cc4d924806e3c6d51118e038f37e378cfc5a878a041e247f54793dfffe33","claimed_score":100}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"287a2598e19d5aeaf4e5d875459b5f21b770eb274e23ec6be6b09415420c4c1a32c9038b08e0ff5e9d12f81351a3999a8c6fa23ddebfd866d87d3274052aed05","signerAlgo":"ed25519","submittedAt":"2026-04-26T02:16:25.122Z","verifiedAt":"2026-04-26T02:16:25Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-27b-dense-q5km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T02:16:25.122Z","storeRoute":"store-run"},{"id":"run-local-85a00ce68dac","serviceId":"openrouter","benchmarkId":"openbookqa","model":"qwen3.6-27b-dense-q5km","score":66.66666666666666,"runs":1,"breakdown":{"n":3,"passes":2,"mean_raw":0.6666666666666666},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:9de02c962a604c215aef6e33014a48f52c1eb284f810c0804a96884968f79952","methodologyHash":"sha256:913c972d908dc49210c4428b5b09287a8637337a8cfc56bcbddbe73f68f227d6","transcriptMerkleRoot":"sha256:ce8c2b38f8da0812331914e9b80bc79fa95b574fbbed209f5d11f54400016228","startedAt":"2026-04-26T02:15:51Z","finishedAt":"2026-04-26T02:16:02Z","durationSeconds":11,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-27b-dense-q5km  # via _local_runner.py against the canonical openbookqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"f0e41a38da419ab22dd7f913f0d757a045e6defe1d9ee64ee900bf893eef412ead6ff64afe87f9fde120eeec529b4500a78a55b9271c0753ef755fc016154d0c","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:9de02c962a604c215aef6e33014a48f52c1eb284f810c0804a96884968f79952","methodology_hash":"sha256:913c972d908dc49210c4428b5b09287a8637337a8cfc56bcbddbe73f68f227d6","merkle_root":"sha256:ce8c2b38f8da0812331914e9b80bc79fa95b574fbbed209f5d11f54400016228","claimed_score":66.66666666666666}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"f0e41a38da419ab22dd7f913f0d757a045e6defe1d9ee64ee900bf893eef412ead6ff64afe87f9fde120eeec529b4500a78a55b9271c0753ef755fc016154d0c","signerAlgo":"ed25519","submittedAt":"2026-04-26T02:16:01.036Z","verifiedAt":"2026-04-26T02:16:02Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-27b-dense-q5km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T02:16:01.036Z","storeRoute":"store-run"},{"id":"run-local-a78776f4cdbe","serviceId":"openrouter","benchmarkId":"commonsenseqa","model":"qwen3.6-27b-dense-q5km","score":100,"runs":1,"breakdown":{"n":3,"passes":3,"mean_raw":1},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:729b5c0850ac5be6b8cfbedf4d36938249bb7c0d9e9c980260037391414dd520","methodologyHash":"sha256:afaa58378ca68cf9d5d85a75f5d168088888466b4b067772f67d22c7a05da188","transcriptMerkleRoot":"sha256:9c5078e8b843257414900721754e1a63c5afdfd29834085f13397da82b9bc763","startedAt":"2026-04-26T02:15:45Z","finishedAt":"2026-04-26T02:15:50Z","durationSeconds":5,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-27b-dense-q5km  # via _local_runner.py against the canonical commonsenseqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"91a09a05c339a71a7a6d771da8af496079249e068102da8828859299a66add394385b6e1bb51acffe1de6c4542b1909f5dc74e7c89acc25b181f24adf37a1c07","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:729b5c0850ac5be6b8cfbedf4d36938249bb7c0d9e9c980260037391414dd520","methodology_hash":"sha256:afaa58378ca68cf9d5d85a75f5d168088888466b4b067772f67d22c7a05da188","merkle_root":"sha256:9c5078e8b843257414900721754e1a63c5afdfd29834085f13397da82b9bc763","claimed_score":100}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"91a09a05c339a71a7a6d771da8af496079249e068102da8828859299a66add394385b6e1bb51acffe1de6c4542b1909f5dc74e7c89acc25b181f24adf37a1c07","signerAlgo":"ed25519","submittedAt":"2026-04-26T02:15:49.190Z","verifiedAt":"2026-04-26T02:15:50Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-27b-dense-q5km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T02:15:49.190Z","storeRoute":"store-run"},{"id":"run-local-12c275a8be9a","serviceId":"openrouter","benchmarkId":"piqa","model":"qwen3.6-27b-dense-q5km","score":100,"runs":1,"breakdown":{"n":3,"passes":3,"mean_raw":1},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:d7c75287e440ecdb3df61118581bba694b3160a1a7056087d2005d8f167672bc","methodologyHash":"sha256:9b6bdebe22cd6b2ae2dc44f6a5c94297427edb9bdbf5f1ab9bf3c65bd10600c9","transcriptMerkleRoot":"sha256:2169337fb7be0dadbd3b324b29bfa0da7b6938f0918a68a11dfa91fc140ba4ba","startedAt":"2026-04-26T02:15:34Z","finishedAt":"2026-04-26T02:15:44Z","durationSeconds":10,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-27b-dense-q5km  # via _local_runner.py against the canonical piqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"768c70a9e120873653abde33b9ef8b86039bbdcf2a53bde98c2ac22796b97e4a6c534c8393a470523044abe7ae17f711cc4082013226e58eec1b26593ee39f0f","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:d7c75287e440ecdb3df61118581bba694b3160a1a7056087d2005d8f167672bc","methodology_hash":"sha256:9b6bdebe22cd6b2ae2dc44f6a5c94297427edb9bdbf5f1ab9bf3c65bd10600c9","merkle_root":"sha256:2169337fb7be0dadbd3b324b29bfa0da7b6938f0918a68a11dfa91fc140ba4ba","claimed_score":100}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"768c70a9e120873653abde33b9ef8b86039bbdcf2a53bde98c2ac22796b97e4a6c534c8393a470523044abe7ae17f711cc4082013226e58eec1b26593ee39f0f","signerAlgo":"ed25519","submittedAt":"2026-04-26T02:15:43.518Z","verifiedAt":"2026-04-26T02:15:44Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-27b-dense-q5km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T02:15:43.518Z","storeRoute":"store-run"},{"id":"run-local-6b75c2d16e90","serviceId":"openrouter","benchmarkId":"winogrande","model":"qwen3.6-27b-dense-q5km","score":66.66666666666666,"runs":1,"breakdown":{"n":3,"passes":2,"mean_raw":0.6666666666666666},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:1836ea9a0cb7f0dc0e641b157c14cc381c6b5dd60cdb1a4f9ae49c637dc92e92","methodologyHash":"sha256:ca0591d2b7c7487c4e66b8f2a4850fe3481575973ffd5567bf6af074102cfa3c","transcriptMerkleRoot":"sha256:59f8f4cf8fbb9ecf2ce5369f9252b8e6fa018004853d6ea3e29b87f76daf6678","startedAt":"2026-04-26T02:15:17Z","finishedAt":"2026-04-26T02:15:33Z","durationSeconds":16,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-27b-dense-q5km  # via _local_runner.py against the canonical winogrande sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"dfd7eeeb7ff8f57ba504fe1ae4efb8d83b732e5005d2fff3c7373dee1ba16fb6eaf134020a6cb06776728c20bc9bca067149d632d8975d6134e4e81e22f3860e","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:1836ea9a0cb7f0dc0e641b157c14cc381c6b5dd60cdb1a4f9ae49c637dc92e92","methodology_hash":"sha256:ca0591d2b7c7487c4e66b8f2a4850fe3481575973ffd5567bf6af074102cfa3c","merkle_root":"sha256:59f8f4cf8fbb9ecf2ce5369f9252b8e6fa018004853d6ea3e29b87f76daf6678","claimed_score":66.66666666666666}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"dfd7eeeb7ff8f57ba504fe1ae4efb8d83b732e5005d2fff3c7373dee1ba16fb6eaf134020a6cb06776728c20bc9bca067149d632d8975d6134e4e81e22f3860e","signerAlgo":"ed25519","submittedAt":"2026-04-26T02:15:32.204Z","verifiedAt":"2026-04-26T02:15:33Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-27b-dense-q5km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T02:15:32.204Z","storeRoute":"store-run"},{"id":"run-local-cb2404027402","serviceId":"openrouter","benchmarkId":"arc-challenge","model":"qwen3.6-27b-dense-q5km","score":100,"runs":1,"breakdown":{"n":3,"passes":3,"mean_raw":1},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:f35d86c3e38897e2781029fcdf1c8f9223d1250734e1cfe9adce4bc72915d23e","methodologyHash":"sha256:8d3f2a9b6e5e96fc6cb70815d7c64abac515de8cbf261b0c04390679cfa7ca66","transcriptMerkleRoot":"sha256:c6e960aceed44111a2c7ffd230cef4a85485ecbe7a603df7342d19590579f394","startedAt":"2026-04-26T02:15:11Z","finishedAt":"2026-04-26T02:15:16Z","durationSeconds":5,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-27b-dense-q5km  # via _local_runner.py against the canonical arc-challenge sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"cfd964279b0815acaaa4fc7a0218e8c91997f9002ebfd7d3ed5cec6476c17c13b9f1672067ea3a76b9cd71a5b8a8f466251d9c3f509c698ebab68c6887489a0b","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:f35d86c3e38897e2781029fcdf1c8f9223d1250734e1cfe9adce4bc72915d23e","methodology_hash":"sha256:8d3f2a9b6e5e96fc6cb70815d7c64abac515de8cbf261b0c04390679cfa7ca66","merkle_root":"sha256:c6e960aceed44111a2c7ffd230cef4a85485ecbe7a603df7342d19590579f394","claimed_score":100}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"cfd964279b0815acaaa4fc7a0218e8c91997f9002ebfd7d3ed5cec6476c17c13b9f1672067ea3a76b9cd71a5b8a8f466251d9c3f509c698ebab68c6887489a0b","signerAlgo":"ed25519","submittedAt":"2026-04-26T02:15:14.921Z","verifiedAt":"2026-04-26T02:15:16Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-27b-dense-q5km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T02:15:14.921Z","storeRoute":"store-run"},{"id":"run-local-76481a94c53e","serviceId":"openrouter","benchmarkId":"gpqa","model":"qwen3.6-27b-dense-q5km","score":0,"runs":1,"breakdown":{"n":3,"passes":0,"mean_raw":0},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:1799d7ba340c5e41bb46cc77a8230fe4833b5190c423637ae5c598db45c467c6","methodologyHash":"sha256:decf9a5742564b13d14660da2637494c1b8e46dd49e4633e3a0846cca7f83171","transcriptMerkleRoot":"sha256:99d9f5b71367a4501e62a5139a15ea2b47aae16adf65fe972506aee6a6d8a623","startedAt":"2026-04-26T02:14:48Z","finishedAt":"2026-04-26T02:15:10Z","durationSeconds":22,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-27b-dense-q5km  # via _local_runner.py against the canonical gpqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"72e7594b4717bbe7eee60211bdccd7995ccba97eda2e664a6f884358cb26c044ad4e25260546171d87fdebd0be996411c6becc8797bd49de884b73ef02fc8104","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:1799d7ba340c5e41bb46cc77a8230fe4833b5190c423637ae5c598db45c467c6","methodology_hash":"sha256:decf9a5742564b13d14660da2637494c1b8e46dd49e4633e3a0846cca7f83171","merkle_root":"sha256:99d9f5b71367a4501e62a5139a15ea2b47aae16adf65fe972506aee6a6d8a623","claimed_score":0}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"72e7594b4717bbe7eee60211bdccd7995ccba97eda2e664a6f884358cb26c044ad4e25260546171d87fdebd0be996411c6becc8797bd49de884b73ef02fc8104","signerAlgo":"ed25519","submittedAt":"2026-04-26T02:15:09.242Z","verifiedAt":"2026-04-26T02:15:10Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-27b-dense-q5km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T02:15:09.242Z","storeRoute":"store-run"},{"id":"run-local-e903d5160916","serviceId":"openrouter","benchmarkId":"mmlu-pro","model":"qwen3.6-27b-dense-q5km","score":0,"runs":1,"breakdown":{"n":3,"passes":0,"mean_raw":0},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:f83f7230d012b45f7532fd0947ca596e7de52a518e7f9edcd0df4566b409bf9a","methodologyHash":"sha256:c01edd7c098df6e154928a472430b097dfab3370a7a5a943e33b241f3fc96c32","transcriptMerkleRoot":"sha256:5c969d54bfd7501bbb1ff8b9bb7d56d6327dbef94177fa0ff93bb21dc4106ce2","startedAt":"2026-04-26T02:14:24Z","finishedAt":"2026-04-26T02:14:46Z","durationSeconds":22,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-27b-dense-q5km  # via _local_runner.py against the canonical mmlu-pro sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"8300172124b4ac5c5005014c8ffa758e45d700dd8c9b623ad18a3dc79164192cdf8fdec709fae35d129afe590302c5e189e7d210ced0c1f8d51733d13c9aaf0c","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:f83f7230d012b45f7532fd0947ca596e7de52a518e7f9edcd0df4566b409bf9a","methodology_hash":"sha256:c01edd7c098df6e154928a472430b097dfab3370a7a5a943e33b241f3fc96c32","merkle_root":"sha256:5c969d54bfd7501bbb1ff8b9bb7d56d6327dbef94177fa0ff93bb21dc4106ce2","claimed_score":0}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"8300172124b4ac5c5005014c8ffa758e45d700dd8c9b623ad18a3dc79164192cdf8fdec709fae35d129afe590302c5e189e7d210ced0c1f8d51733d13c9aaf0c","signerAlgo":"ed25519","submittedAt":"2026-04-26T02:14:45.731Z","verifiedAt":"2026-04-26T02:14:46Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-27b-dense-q5km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T02:14:45.731Z","storeRoute":"store-run"},{"id":"run-local-b00e38220f83","serviceId":"openrouter","benchmarkId":"gsm8k","model":"qwen3.6-27b-dense-q5km","score":0,"runs":1,"breakdown":{"n":3,"passes":0,"mean_raw":0},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodologyHash":"sha256:ebaacdd81cf24b58df42a6ee4c324f246d4f0586dc814ae68be12c143ba79de1","transcriptMerkleRoot":"sha256:96950eb09330d385d4f3d550576504a7fd89c58a5f8e813f1da5939d173c7eba","startedAt":"2026-04-26T02:14:00Z","finishedAt":"2026-04-26T02:14:23Z","durationSeconds":23,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-27b-dense-q5km  # via _local_runner.py against the canonical gsm8k sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"9762916e95cd950cc4fbf642f495b4f414b16f82a7d1b53f36d13caf05e20dbd16090bb2953c63592d0a3d22712104bf19cb5976cd6339df81658ed7380ddf08","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodology_hash":"sha256:ebaacdd81cf24b58df42a6ee4c324f246d4f0586dc814ae68be12c143ba79de1","merkle_root":"sha256:96950eb09330d385d4f3d550576504a7fd89c58a5f8e813f1da5939d173c7eba","claimed_score":0}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"9762916e95cd950cc4fbf642f495b4f414b16f82a7d1b53f36d13caf05e20dbd16090bb2953c63592d0a3d22712104bf19cb5976cd6339df81658ed7380ddf08","signerAlgo":"ed25519","submittedAt":"2026-04-26T02:14:22.139Z","verifiedAt":"2026-04-26T02:14:23Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-27b-dense-q5km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T02:14:22.139Z","storeRoute":"store-run"},{"id":"run-local-3eb0a61f0743","serviceId":"openrouter","benchmarkId":"gsm8k","model":"qwen3.6-27b-dense-q5km","score":0,"runs":1,"breakdown":{"n":2,"passes":0,"mean_raw":0},"sampleCount":2,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodologyHash":"sha256:ebaacdd81cf24b58df42a6ee4c324f246d4f0586dc814ae68be12c143ba79de1","transcriptMerkleRoot":"sha256:5745dded7ef085322887a22a1faba33ef49fa454a6811c01c5e2657c8133da55","startedAt":"2026-04-26T02:13:26Z","finishedAt":"2026-04-26T02:13:51Z","durationSeconds":25,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run qwen3.6-27b-dense-q5km  # via _local_runner.py against the canonical gsm8k sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"83776304c1c2de5f7bacda5945797dc240136d1eabe061fafe3f53c61610a66602365f2a5ba5d1eb3f3f9535ab7ed304c2a90674312042c4b286b893a8652805","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodology_hash":"sha256:ebaacdd81cf24b58df42a6ee4c324f246d4f0586dc814ae68be12c143ba79de1","merkle_root":"sha256:5745dded7ef085322887a22a1faba33ef49fa454a6811c01c5e2657c8133da55","claimed_score":0}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"83776304c1c2de5f7bacda5945797dc240136d1eabe061fafe3f53c61610a66602365f2a5ba5d1eb3f3f9535ab7ed304c2a90674312042c4b286b893a8652805","signerAlgo":"ed25519","submittedAt":"2026-04-26T02:13:50.674Z","verifiedAt":"2026-04-26T02:13:51Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: qwen3.6-27b-dense-q5km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-26T02:13:50.674Z","storeRoute":"store-run"},{"id":"run-local-cf4ce5f549fa","serviceId":"openrouter","benchmarkId":"truthfulqa","model":"glm-4.7-flash-30b-q4km","score":33.33333333333333,"runs":1,"breakdown":{"n":3,"passes":1,"mean_raw":0.3333333333333333},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:1d3a9406678cb49569834ab0a185eb50e97f8072798c1f6afd73f76a77d5f75d","methodologyHash":"sha256:f4ed2d4c63c28f153fdbf3a452d74edfe551f80655f1f790e73063f9e942d40a","transcriptMerkleRoot":"sha256:8f35e64b582a151852469b7f986c456d4e74fa14a3fbb892c809f663cab56649","startedAt":"2026-04-25T21:09:43Z","finishedAt":"2026-04-25T21:09:50Z","durationSeconds":7,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical truthfulqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"dcd9fba35b25203d41e1f4b426ce1d7f57d172d2aaa3251f3d5277b9e03769ee2881224f9cfe3ae63bf1d5d3d20a869ee26ee0375b4e72e536cfefc799d36d07","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:1d3a9406678cb49569834ab0a185eb50e97f8072798c1f6afd73f76a77d5f75d","methodology_hash":"sha256:f4ed2d4c63c28f153fdbf3a452d74edfe551f80655f1f790e73063f9e942d40a","merkle_root":"sha256:8f35e64b582a151852469b7f986c456d4e74fa14a3fbb892c809f663cab56649","claimed_score":33.33333333333333}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"dcd9fba35b25203d41e1f4b426ce1d7f57d172d2aaa3251f3d5277b9e03769ee2881224f9cfe3ae63bf1d5d3d20a869ee26ee0375b4e72e536cfefc799d36d07","signerAlgo":"ed25519","submittedAt":"2026-04-25T21:09:50.362Z","verifiedAt":"2026-04-25T21:09:50Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-25T21:09:50.362Z","storeRoute":"store-run"},{"id":"run-local-3cf90b852afc","serviceId":"openrouter","benchmarkId":"musr","model":"glm-4.7-flash-30b-q4km","score":33.33333333333333,"runs":1,"breakdown":{"n":3,"passes":1,"mean_raw":0.3333333333333333},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:80561df09e759b1394e3ff89205f4e6459613b1f49529e3635fc6105e4d75213","methodologyHash":"sha256:fa408a6814333ee9015e5641ec6b6a5ff86cc1aeb256fef95d121bd838acb03d","transcriptMerkleRoot":"sha256:8fb34bc4b959efb42e7bcb41b0935ca211332a7bc449e53d9ba1df4ea9190ed9","startedAt":"2026-04-25T21:09:34Z","finishedAt":"2026-04-25T21:09:42Z","durationSeconds":8,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical musr sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"3ff990a7337e51fe84eb805a1ef29731d5c1e6909be78e3f1446967e73e2197f5cf09280fcf12a00c510e91362fcb410ab20c103dd2fda49732b58ce67378603","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:80561df09e759b1394e3ff89205f4e6459613b1f49529e3635fc6105e4d75213","methodology_hash":"sha256:fa408a6814333ee9015e5641ec6b6a5ff86cc1aeb256fef95d121bd838acb03d","merkle_root":"sha256:8fb34bc4b959efb42e7bcb41b0935ca211332a7bc449e53d9ba1df4ea9190ed9","claimed_score":33.33333333333333}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"3ff990a7337e51fe84eb805a1ef29731d5c1e6909be78e3f1446967e73e2197f5cf09280fcf12a00c510e91362fcb410ab20c103dd2fda49732b58ce67378603","signerAlgo":"ed25519","submittedAt":"2026-04-25T21:09:41.890Z","verifiedAt":"2026-04-25T21:09:42Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-25T21:09:41.890Z","storeRoute":"store-run"},{"id":"run-local-c3652c687e4c","serviceId":"openrouter","benchmarkId":"logiqa","model":"glm-4.7-flash-30b-q4km","score":33.33333333333333,"runs":1,"breakdown":{"n":3,"passes":1,"mean_raw":0.3333333333333333},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:71cd6dfdc54064ed9becf0aacee7070c8cd9bf664c467aac13fc2955f25167de","methodologyHash":"sha256:fa645feb915960040b3315732df398a70a37f0ea2625f4a963bf57ee1eb89eff","transcriptMerkleRoot":"sha256:65e50efee8b604affad648e2aee2473ce383c8200a91c8290d6fb7d6241d8d9a","startedAt":"2026-04-25T21:09:25Z","finishedAt":"2026-04-25T21:09:33Z","durationSeconds":8,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical logiqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"c71587c067b26cbe4cfb35d407e268a3d80a25860f652dbef3771ff0a57b625251548d55981eb3e536b06ae0ec3df6ba70599ee2454381fad85cd6b6e574820d","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:71cd6dfdc54064ed9becf0aacee7070c8cd9bf664c467aac13fc2955f25167de","methodology_hash":"sha256:fa645feb915960040b3315732df398a70a37f0ea2625f4a963bf57ee1eb89eff","merkle_root":"sha256:65e50efee8b604affad648e2aee2473ce383c8200a91c8290d6fb7d6241d8d9a","claimed_score":33.33333333333333}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"c71587c067b26cbe4cfb35d407e268a3d80a25860f652dbef3771ff0a57b625251548d55981eb3e536b06ae0ec3df6ba70599ee2454381fad85cd6b6e574820d","signerAlgo":"ed25519","submittedAt":"2026-04-25T21:09:32.594Z","verifiedAt":"2026-04-25T21:09:33Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-25T21:09:32.594Z","storeRoute":"store-run"},{"id":"run-local-86e7d83bf7a9","serviceId":"openrouter","benchmarkId":"math-500","model":"glm-4.7-flash-30b-q4km","score":0,"runs":1,"breakdown":{"n":3,"passes":0,"mean_raw":0},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:9e86adfbb746209853106da77f29348041d67bb236dcd3cc8ad472da93058634","methodologyHash":"sha256:a62e8bbe42883c707d400421717f846925e03f389fcd98a9f01be093350310f2","transcriptMerkleRoot":"sha256:14510fdbd3e70298053d1f7a5fa207ee1cd0b9a13605b8c8da3beb7e921f263a","startedAt":"2026-04-25T21:09:15Z","finishedAt":"2026-04-25T21:09:23Z","durationSeconds":8,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical math-500 sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"cc2381ccd09978e0999114b0df90fbeed0c82957b01f3699e9828fedc0ab81cae2a716d3288c97fd81bdab28d162b96777fc08db90336244716e29b3ef026107","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:9e86adfbb746209853106da77f29348041d67bb236dcd3cc8ad472da93058634","methodology_hash":"sha256:a62e8bbe42883c707d400421717f846925e03f389fcd98a9f01be093350310f2","merkle_root":"sha256:14510fdbd3e70298053d1f7a5fa207ee1cd0b9a13605b8c8da3beb7e921f263a","claimed_score":0}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"cc2381ccd09978e0999114b0df90fbeed0c82957b01f3699e9828fedc0ab81cae2a716d3288c97fd81bdab28d162b96777fc08db90336244716e29b3ef026107","signerAlgo":"ed25519","submittedAt":"2026-04-25T21:09:23.224Z","verifiedAt":"2026-04-25T21:09:23Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-25T21:09:23.224Z","storeRoute":"store-run"},{"id":"run-local-44914e4abfb0","serviceId":"openrouter","benchmarkId":"bbh","model":"glm-4.7-flash-30b-q4km","score":0,"runs":1,"breakdown":{"n":3,"passes":0,"mean_raw":0},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:cbe1df91698bb33429d10eac97c4ed296ea144deba5205fee61ef8597d796a89","methodologyHash":"sha256:f90d0cf5bd553d66271a37ead896f75d805589e323916ebc0f2d3d26eae5c795","transcriptMerkleRoot":"sha256:463d121b9f9ad51ffe58603a36e41b1454416225dab8897b0c8f3f195691f90d","startedAt":"2026-04-25T21:09:06Z","finishedAt":"2026-04-25T21:09:14Z","durationSeconds":8,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical bbh sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"7ef8f92f27d828d6ca7ce9d1ff2a47bac51256cf5cc276a3b5b54fc561ca45a8dafccfa74d13c56663cbb950a308f73931ac713e79af4ae0844823245428b402","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:cbe1df91698bb33429d10eac97c4ed296ea144deba5205fee61ef8597d796a89","methodology_hash":"sha256:f90d0cf5bd553d66271a37ead896f75d805589e323916ebc0f2d3d26eae5c795","merkle_root":"sha256:463d121b9f9ad51ffe58603a36e41b1454416225dab8897b0c8f3f195691f90d","claimed_score":0}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"7ef8f92f27d828d6ca7ce9d1ff2a47bac51256cf5cc276a3b5b54fc561ca45a8dafccfa74d13c56663cbb950a308f73931ac713e79af4ae0844823245428b402","signerAlgo":"ed25519","submittedAt":"2026-04-25T21:09:13.756Z","verifiedAt":"2026-04-25T21:09:14Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-25T21:09:13.756Z","storeRoute":"store-run"},{"id":"run-local-033176ed682c","serviceId":"openrouter","benchmarkId":"openbookqa","model":"glm-4.7-flash-30b-q4km","score":0,"runs":1,"breakdown":{"n":3,"passes":0,"mean_raw":0},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:9de02c962a604c215aef6e33014a48f52c1eb284f810c0804a96884968f79952","methodologyHash":"sha256:913c972d908dc49210c4428b5b09287a8637337a8cfc56bcbddbe73f68f227d6","transcriptMerkleRoot":"sha256:0e76da5a60a5d4f99b1962c81df140f7c346bfaa5e9532f5f500306ec3d1546e","startedAt":"2026-04-25T21:08:56Z","finishedAt":"2026-04-25T21:09:04Z","durationSeconds":8,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical openbookqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"8cc28ce9dce154dcea0524004b6dff32a06a1ad8c0be903a5715506310641e6a69fdc9b2602b6c25303ec995fab6d80745d4bcf9f3811218ba6ba6806ae8790e","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:9de02c962a604c215aef6e33014a48f52c1eb284f810c0804a96884968f79952","methodology_hash":"sha256:913c972d908dc49210c4428b5b09287a8637337a8cfc56bcbddbe73f68f227d6","merkle_root":"sha256:0e76da5a60a5d4f99b1962c81df140f7c346bfaa5e9532f5f500306ec3d1546e","claimed_score":0}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"8cc28ce9dce154dcea0524004b6dff32a06a1ad8c0be903a5715506310641e6a69fdc9b2602b6c25303ec995fab6d80745d4bcf9f3811218ba6ba6806ae8790e","signerAlgo":"ed25519","submittedAt":"2026-04-25T21:09:04.310Z","verifiedAt":"2026-04-25T21:09:04Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-25T21:09:04.310Z","storeRoute":"store-run"},{"id":"run-local-d8ac985cccaf","serviceId":"openrouter","benchmarkId":"commonsenseqa","model":"glm-4.7-flash-30b-q4km","score":33.33333333333333,"runs":1,"breakdown":{"n":3,"passes":1,"mean_raw":0.3333333333333333},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:729b5c0850ac5be6b8cfbedf4d36938249bb7c0d9e9c980260037391414dd520","methodologyHash":"sha256:afaa58378ca68cf9d5d85a75f5d168088888466b4b067772f67d22c7a05da188","transcriptMerkleRoot":"sha256:fbcf41cf5f6f88d5c9df7ddf66316ef7152043e009870fdad1e5b52eef0b4647","startedAt":"2026-04-25T21:08:48Z","finishedAt":"2026-04-25T21:08:55Z","durationSeconds":7,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical commonsenseqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"5c401e27a9dfaf85f5dce59d52f68fbcfcabcdcba829eb3272461ba92e0d8be3b177efb1180b6f5bc381217de8ca7e21425b0504aaa4781e190f392352ef120b","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:729b5c0850ac5be6b8cfbedf4d36938249bb7c0d9e9c980260037391414dd520","methodology_hash":"sha256:afaa58378ca68cf9d5d85a75f5d168088888466b4b067772f67d22c7a05da188","merkle_root":"sha256:fbcf41cf5f6f88d5c9df7ddf66316ef7152043e009870fdad1e5b52eef0b4647","claimed_score":33.33333333333333}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"5c401e27a9dfaf85f5dce59d52f68fbcfcabcdcba829eb3272461ba92e0d8be3b177efb1180b6f5bc381217de8ca7e21425b0504aaa4781e190f392352ef120b","signerAlgo":"ed25519","submittedAt":"2026-04-25T21:08:54.866Z","verifiedAt":"2026-04-25T21:08:55Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-25T21:08:54.866Z","storeRoute":"store-run"},{"id":"run-local-8094a7e1e06e","serviceId":"openrouter","benchmarkId":"piqa","model":"glm-4.7-flash-30b-q4km","score":0,"runs":1,"breakdown":{"n":3,"passes":0,"mean_raw":0},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:d7c75287e440ecdb3df61118581bba694b3160a1a7056087d2005d8f167672bc","methodologyHash":"sha256:9b6bdebe22cd6b2ae2dc44f6a5c94297427edb9bdbf5f1ab9bf3c65bd10600c9","transcriptMerkleRoot":"sha256:39724fe832470feb1683dceea995f2fb815d38c6d118103f2cd68b921fdd8ed7","startedAt":"2026-04-25T21:08:38Z","finishedAt":"2026-04-25T21:08:47Z","durationSeconds":9,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical piqa sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"d9867e4afc811ab9b790768ea424657e02c386b4e05d36cf2a7f8443b62cd7ea5681e34f02572818b6e7fd61ce0b2ac1e17755457527a952dd37cb59a0510d00","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:d7c75287e440ecdb3df61118581bba694b3160a1a7056087d2005d8f167672bc","methodology_hash":"sha256:9b6bdebe22cd6b2ae2dc44f6a5c94297427edb9bdbf5f1ab9bf3c65bd10600c9","merkle_root":"sha256:39724fe832470feb1683dceea995f2fb815d38c6d118103f2cd68b921fdd8ed7","claimed_score":0}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"d9867e4afc811ab9b790768ea424657e02c386b4e05d36cf2a7f8443b62cd7ea5681e34f02572818b6e7fd61ce0b2ac1e17755457527a952dd37cb59a0510d00","signerAlgo":"ed25519","submittedAt":"2026-04-25T21:08:46.568Z","verifiedAt":"2026-04-25T21:08:47Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-25T21:08:46.568Z","storeRoute":"store-run"},{"id":"run-local-420144e0ea77","serviceId":"openrouter","benchmarkId":"winogrande","model":"glm-4.7-flash-30b-q4km","score":33.33333333333333,"runs":1,"breakdown":{"n":3,"passes":1,"mean_raw":0.3333333333333333},"sampleCount":3,"runnerVersion":"benchlist-local-ollama@1.0.0","runnerCommit":"local","datasetHash":"sha256:1836ea9a0cb7f0dc0e641b157c14cc381c6b5dd60cdb1a4f9ae49c637dc92e92","methodologyHash":"sha256:ca0591d2b7c7487c4e66b8f2a4850fe3481575973ffd5567bf6af074102cfa3c","transcriptMerkleRoot":"sha256:08c7007445ca0732b0c6dc3ebcf292a3386d85f76d17921cfc7b81989d0e4544","startedAt":"2026-04-25T21:08:29Z","finishedAt":"2026-04-25T21:08:37Z","durationSeconds":8,"decoding":{"temperature":0,"num_predict":384},"attestor":"benchlist-local-ollama","publisher":"openrouter","replay":{"command":"ollama run glm-4.7-flash-30b-q4km  # via _local_runner.py against the canonical winogrande sample set","dockerImage":"n/a (local Ollama daemon)","envRequired":[]},"proof":{"system":"signed-attestation","status":"signed","signature":"3d3bf27dc0df6848a311cad973af90f346b2018a8b8781dd8621f31e8f6eadf8546696429a88d9080b7cc9eadfb9b45db09bc78400508629c46af30dd902d307","pubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:1836ea9a0cb7f0dc0e641b157c14cc381c6b5dd60cdb1a4f9ae49c637dc92e92","methodology_hash":"sha256:ca0591d2b7c7487c4e66b8f2a4850fe3481575973ffd5567bf6af074102cfa3c","merkle_root":"sha256:08c7007445ca0732b0c6dc3ebcf292a3386d85f76d17921cfc7b81989d0e4544","claimed_score":33.33333333333333}},"verification":{"mode":"signed-attestation","status":"attested","alignedProofSystem":"signed-attestation","attestorPubkey":"f82b412efec2f9bd732efd7786568ad1dde8b788c79bdba2134be01f68e8ff79","attestorSignature":"3d3bf27dc0df6848a311cad973af90f346b2018a8b8781dd8621f31e8f6eadf8546696429a88d9080b7cc9eadfb9b45db09bc78400508629c46af30dd902d307","signerAlgo":"ed25519","submittedAt":"2026-04-25T21:08:37.099Z","verifiedAt":"2026-04-25T21:08:37Z","note":"Signed locally by Benchlist runner against Ollama daemon. Model: glm-4.7-flash-30b-q4km."},"submittedBy":"rodneyyesep@gmail.com","submittedAt":"2026-04-25T21:08:37.099Z","storeRoute":"store-run"},{"id":"run-humaneval-05028db448","serviceId":"anthropic-claude","benchmarkId":"humaneval","model":"claude-sonnet-4-5-20250929","score":91.5,"runs":1,"breakdown":{"n":20,"passes":18,"mean_raw":0.915},"sampleCount":20,"runnerVersion":"benchlist-runner@1.0.0","runnerCommit":"seed-2026-04-25","datasetHash":"sha256:3e1eb278fb45e71a150b896866387eae8c5bf42c0618c1a543fd5bb03cd3edaf","methodologyHash":"sha256:ce08d4538a6db120acb39fa9c8102cd61be51040a3cc735f9be0952fe445a1db","transcriptMerkleRoot":"sha256:c4a5bcffffaafe3332c77da6ed476ee80564b0a4c5a9aaf461be579fdcd0c501","startedAt":"2026-04-24T20:40:43Z","finishedAt":"2026-04-24T20:42:43Z","durationSeconds":120,"decoding":{"temperature":0,"max_tokens":1024},"attestor":"benchlist-runner-0","publisher":"anthropic-claude","replay":{"command":"benchlist run humaneval --service anthropic-claude --model claude-sonnet-4-5-20250929 --runs 1","dockerImage":"ghcr.io/benchlist/runner:1.0.0","envRequired":["ANTHROPIC_API_KEY"]},"proof":{"system":"signed-attestation","status":"signed","signature":"4b9acd6c4daa21b49ccd14b1c660ec389841cc1fb901ee4709fe32bce664bef947d2a1df04faf45245fc7a19a23515d208a9908597f5d9d39bcd63373cb00c0d","pubkey":"f38712fae5f11a2fc2fe3f7541264f04cd90974affdf1cce05163ecdaf35d457","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:3e1eb278fb45e71a150b896866387eae8c5bf42c0618c1a543fd5bb03cd3edaf","methodology_hash":"sha256:ce08d4538a6db120acb39fa9c8102cd61be51040a3cc735f9be0952fe445a1db","merkle_root":"sha256:c4a5bcffffaafe3332c77da6ed476ee80564b0a4c5a9aaf461be579fdcd0c501","claimed_score":91.5}},"verification":{"mode":"aligned","status":"attested","alignedBatchId":null,"alignedProofSystem":"signed-attestation","alignedVerifierContract":"0xeF2A435e5EE44B2041100EF8cbC8ae035166606c","network":"ethereum","submittedAt":"2026-04-24T20:42:43Z","verifiedAt":null,"attestorPubkey":"f38712fae5f11a2fc2fe3f7541264f04cd90974affdf1cce05163ecdaf35d457","attestorSignature":"4b9acd6c4daa21b49ccd14b1c660ec389841cc1fb901ee4709fe32bce664bef947d2a1df04faf45245fc7a19a23515d208a9908597f5d9d39bcd63373cb00c0d","signerAlgo":"ed25519","note":"Signed locally. Not on-chain — demo seed. Re-run with --system sp1 + ATTESTOR_PRIVATE_KEY to anchor on Aligned L1."}},{"id":"run-mbpp-d88a0d355a","serviceId":"anthropic-claude","benchmarkId":"mbpp","model":"claude-sonnet-4-5-20250929","score":88.2,"runs":1,"breakdown":{"n":20,"passes":17,"mean_raw":0.882},"sampleCount":20,"runnerVersion":"benchlist-runner@1.0.0","runnerCommit":"seed-2026-04-25","datasetHash":"sha256:fa4746e4dbf616502400a2547bac67774b519b8dbd314749181832b59d939c23","methodologyHash":"sha256:3d09dd53dce94bc6d9727753f14950bbf1fd7ee93413ab117c1ff0d3898d3ac9","transcriptMerkleRoot":"sha256:88b5dcf263fafbaed541d66339528e0155b461e54dc157793cb43392af1bbdb8","startedAt":"2026-04-24T19:40:43Z","finishedAt":"2026-04-24T19:42:43Z","durationSeconds":120,"decoding":{"temperature":0,"max_tokens":1024},"attestor":"benchlist-runner-0","publisher":"anthropic-claude","replay":{"command":"benchlist run mbpp --service anthropic-claude --model claude-sonnet-4-5-20250929 --runs 1","dockerImage":"ghcr.io/benchlist/runner:1.0.0","envRequired":["ANTHROPIC_API_KEY"]},"proof":{"system":"signed-attestation","status":"signed","signature":"436504211205c4537ee678dbb0205acc457d58a98d79b76f209faf9bd58081d7b654ed9f21f76d13c90996c45579a8482ff262b034121e62394b7a2f6b17fc03","pubkey":"f38712fae5f11a2fc2fe3f7541264f04cd90974affdf1cce05163ecdaf35d457","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:fa4746e4dbf616502400a2547bac67774b519b8dbd314749181832b59d939c23","methodology_hash":"sha256:3d09dd53dce94bc6d9727753f14950bbf1fd7ee93413ab117c1ff0d3898d3ac9","merkle_root":"sha256:88b5dcf263fafbaed541d66339528e0155b461e54dc157793cb43392af1bbdb8","claimed_score":88.2}},"verification":{"mode":"aligned","status":"attested","alignedBatchId":null,"alignedProofSystem":"signed-attestation","alignedVerifierContract":"0xeF2A435e5EE44B2041100EF8cbC8ae035166606c","network":"ethereum","submittedAt":"2026-04-24T19:42:43Z","verifiedAt":null,"attestorPubkey":"f38712fae5f11a2fc2fe3f7541264f04cd90974affdf1cce05163ecdaf35d457","attestorSignature":"436504211205c4537ee678dbb0205acc457d58a98d79b76f209faf9bd58081d7b654ed9f21f76d13c90996c45579a8482ff262b034121e62394b7a2f6b17fc03","signerAlgo":"ed25519","note":"Signed locally. Not on-chain — demo seed. Re-run with --system sp1 + ATTESTOR_PRIVATE_KEY to anchor on Aligned L1."}},{"id":"run-humaneval-e342703942","serviceId":"openai-chatgpt","benchmarkId":"humaneval","model":"gpt-5-4-mini","score":87.8,"runs":1,"breakdown":{"n":20,"passes":17,"mean_raw":0.878},"sampleCount":20,"runnerVersion":"benchlist-runner@1.0.0","runnerCommit":"seed-2026-04-25","datasetHash":"sha256:3e1eb278fb45e71a150b896866387eae8c5bf42c0618c1a543fd5bb03cd3edaf","methodologyHash":"sha256:ce08d4538a6db120acb39fa9c8102cd61be51040a3cc735f9be0952fe445a1db","transcriptMerkleRoot":"sha256:a86b16f17926ac9fa2f82a68cb6a0cc9f5803fa261b794a55e72296789faea34","startedAt":"2026-04-24T18:40:43Z","finishedAt":"2026-04-24T18:42:43Z","durationSeconds":120,"decoding":{"temperature":0,"max_tokens":1024},"attestor":"benchlist-runner-0","publisher":"openai-chatgpt","replay":{"command":"benchlist run humaneval --service openai-chatgpt --model gpt-5-4-mini --runs 1","dockerImage":"ghcr.io/benchlist/runner:1.0.0","envRequired":["OPENAI_API_KEY"]},"proof":{"system":"signed-attestation","status":"signed","signature":"d160721731a3acc4f9af67c0489f31e4f7236783265d1691922c3463d62567b221e22ff46067803776d96c544eac4dafe5ded4d64f48513f6741c45858b50c01","pubkey":"f38712fae5f11a2fc2fe3f7541264f04cd90974affdf1cce05163ecdaf35d457","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:3e1eb278fb45e71a150b896866387eae8c5bf42c0618c1a543fd5bb03cd3edaf","methodology_hash":"sha256:ce08d4538a6db120acb39fa9c8102cd61be51040a3cc735f9be0952fe445a1db","merkle_root":"sha256:a86b16f17926ac9fa2f82a68cb6a0cc9f5803fa261b794a55e72296789faea34","claimed_score":87.8}},"verification":{"mode":"aligned","status":"attested","alignedBatchId":null,"alignedProofSystem":"signed-attestation","alignedVerifierContract":"0xeF2A435e5EE44B2041100EF8cbC8ae035166606c","network":"ethereum","submittedAt":"2026-04-24T18:42:43Z","verifiedAt":null,"attestorPubkey":"f38712fae5f11a2fc2fe3f7541264f04cd90974affdf1cce05163ecdaf35d457","attestorSignature":"d160721731a3acc4f9af67c0489f31e4f7236783265d1691922c3463d62567b221e22ff46067803776d96c544eac4dafe5ded4d64f48513f6741c45858b50c01","signerAlgo":"ed25519","note":"Signed locally. Not on-chain — demo seed. Re-run with --system sp1 + ATTESTOR_PRIVATE_KEY to anchor on Aligned L1."}},{"id":"run-mbpp-7f87239d91","serviceId":"openrouter","benchmarkId":"mbpp","model":"meta-llama/llama-3.3-70b","score":76.4,"runs":1,"breakdown":{"n":20,"passes":15,"mean_raw":0.764},"sampleCount":20,"runnerVersion":"benchlist-runner@1.0.0","runnerCommit":"seed-2026-04-25","datasetHash":"sha256:fa4746e4dbf616502400a2547bac67774b519b8dbd314749181832b59d939c23","methodologyHash":"sha256:3d09dd53dce94bc6d9727753f14950bbf1fd7ee93413ab117c1ff0d3898d3ac9","transcriptMerkleRoot":"sha256:d92eb70a6c781b75d6fa9d57c0eb3ae018e8c0ac906a2be8ffa32f809cd80cc3","startedAt":"2026-04-24T17:40:43Z","finishedAt":"2026-04-24T17:42:43Z","durationSeconds":120,"decoding":{"temperature":0,"max_tokens":1024},"attestor":"benchlist-runner-0","publisher":"openrouter","replay":{"command":"benchlist run mbpp --service openrouter --model meta-llama/llama-3.3-70b --runs 1","dockerImage":"ghcr.io/benchlist/runner:1.0.0","envRequired":["OPENROUTER_API_KEY"]},"proof":{"system":"signed-attestation","status":"signed","signature":"2838a1bd26d235b607db42736c2db98af58d3999c69fa05ba366c20203a6279fb85e208cc531c86be223ca45d69f60a08846a36b56bf3668c25b5e1403f38005","pubkey":"f38712fae5f11a2fc2fe3f7541264f04cd90974affdf1cce05163ecdaf35d457","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:fa4746e4dbf616502400a2547bac67774b519b8dbd314749181832b59d939c23","methodology_hash":"sha256:3d09dd53dce94bc6d9727753f14950bbf1fd7ee93413ab117c1ff0d3898d3ac9","merkle_root":"sha256:d92eb70a6c781b75d6fa9d57c0eb3ae018e8c0ac906a2be8ffa32f809cd80cc3","claimed_score":76.4}},"verification":{"mode":"aligned","status":"attested","alignedBatchId":null,"alignedProofSystem":"signed-attestation","alignedVerifierContract":"0xeF2A435e5EE44B2041100EF8cbC8ae035166606c","network":"ethereum","submittedAt":"2026-04-24T17:42:43Z","verifiedAt":null,"attestorPubkey":"f38712fae5f11a2fc2fe3f7541264f04cd90974affdf1cce05163ecdaf35d457","attestorSignature":"2838a1bd26d235b607db42736c2db98af58d3999c69fa05ba366c20203a6279fb85e208cc531c86be223ca45d69f60a08846a36b56bf3668c25b5e1403f38005","signerAlgo":"ed25519","note":"Signed locally. Not on-chain — demo seed. Re-run with --system sp1 + ATTESTOR_PRIVATE_KEY to anchor on Aligned L1."}},{"id":"run-gsm8k-290568d1f2","serviceId":"anthropic-claude","benchmarkId":"gsm8k","model":"claude-sonnet-4-5-20250929","score":94.1,"runs":1,"breakdown":{"n":20,"passes":18,"mean_raw":0.941},"sampleCount":20,"runnerVersion":"benchlist-runner@1.0.0","runnerCommit":"seed-2026-04-25","datasetHash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodologyHash":"sha256:3a6a6b8897f86450fae044fb6291eecbc48fbc9d92905718366b16b895ee773e","transcriptMerkleRoot":"sha256:471f008f8116bc3b8c852edea65aff4b630f205aef4c35bde29d3401dfc48177","startedAt":"2026-04-24T16:40:43Z","finishedAt":"2026-04-24T16:42:43Z","durationSeconds":120,"decoding":{"temperature":0,"max_tokens":1024},"attestor":"benchlist-runner-0","publisher":"anthropic-claude","replay":{"command":"benchlist run gsm8k --service anthropic-claude --model claude-sonnet-4-5-20250929 --runs 1","dockerImage":"ghcr.io/benchlist/runner:1.0.0","envRequired":["ANTHROPIC_API_KEY"]},"proof":{"system":"signed-attestation","status":"signed","signature":"ce52785a7713bb66cec09d340473e09c3ba43b6960d9ece2f235bf1c97df92a34f80d3f7e0563c6b285c7a7f2e8dae9823b417187cd01bfc6300642848620c06","pubkey":"f38712fae5f11a2fc2fe3f7541264f04cd90974affdf1cce05163ecdaf35d457","signer_algo":"ed25519","public_inputs":{"dataset_hash":"sha256:09a35a0a0a48f13840457c82e2c2da6a7884ec21b51154139867843c2e4da5c7","methodology_hash":"sha256:3a6a6b8897f86450fae044fb6291eecbc48fbc9d92905718366b16b895ee773e","merkle_root":"sha256:471f008f8116bc3b8c852edea65aff4b630f205aef4c35bde29d3401dfc48177","claimed_score":94.1}},"verification":{"mode":"aligned","status":"attested","alignedBatchId":null,"alignedProofSystem":"signed-attestation","alignedVerifierContract":"0xeF2A435e5EE44B2041100EF8cbC8ae035166606c","network":"ethereum","submittedAt":"2026-04-24T16:42:43Z","verifiedAt":null,"attestorPubkey":"f38712fae5f11a2fc2fe3f7541264f04cd90974affdf1cce05163ecdaf35d457","attestorSignature":"ce52785a7713bb66cec09d340473e09c3ba43b6960d9ece2f235bf1c97df92a34f80d3f7e0563c6b285c7a7f2e8dae9823b417187cd01bfc6300642848620c06","signerAlgo":"ed25519","note":"Signed locally. Not on-chain — demo seed. Re-run with --system sp1 + ATTESTOR_PRIVATE_KEY to anchor on Aligned L1."}},{"id":"sr-humane-a89af187d7","serviceId":"anthropic-claude","benchmarkId":"humaneval","model":"claude-opus-4-7","score":94.4,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-04-10T12:00:00Z","finishedAt":"2026-04-10T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-opus-4-7","asOfDate":"2026-04-10","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-04-10T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-mbpp-cad1b473fd","serviceId":"anthropic-claude","benchmarkId":"mbpp","model":"claude-opus-4-7","score":92.1,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-04-10T12:00:00Z","finishedAt":"2026-04-10T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-opus-4-7","asOfDate":"2026-04-10","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-04-10T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-swe-be-b8f925a9c9","serviceId":"anthropic-claude","benchmarkId":"swe-bench-verified","model":"claude-opus-4-7","score":64.3,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-04-10T12:00:00Z","finishedAt":"2026-04-10T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-opus-4-7","asOfDate":"2026-04-10","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-04-10T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-gpqa-e1b4191544","serviceId":"anthropic-claude","benchmarkId":"gpqa","model":"claude-opus-4-7","score":75.8,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-04-10T12:00:00Z","finishedAt":"2026-04-10T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-opus-4-7","asOfDate":"2026-04-10","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-04-10T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-mmlu-p-2a28cbce8c","serviceId":"anthropic-claude","benchmarkId":"mmlu-pro","model":"claude-opus-4-7","score":85.2,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-04-10T12:00:00Z","finishedAt":"2026-04-10T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-opus-4-7","asOfDate":"2026-04-10","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-04-10T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-math-9b8d47a1cf","serviceId":"anthropic-claude","benchmarkId":"math","model":"claude-opus-4-7","score":76.3,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-04-10T12:00:00Z","finishedAt":"2026-04-10T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-opus-4-7","asOfDate":"2026-04-10","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-04-10T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-aime-0ca79c3eb2","serviceId":"anthropic-claude","benchmarkId":"aime","model":"claude-opus-4-7","score":39.5,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-04-10T12:00:00Z","finishedAt":"2026-04-10T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-opus-4-7","asOfDate":"2026-04-10","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-04-10T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-tau-be-cea87b66bb","serviceId":"anthropic-claude","benchmarkId":"tau-bench","model":"claude-opus-4-7","score":62.7,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-04-10T12:00:00Z","finishedAt":"2026-04-10T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-opus-4-7","asOfDate":"2026-04-10","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-04-10T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-humane-1cd6951954","serviceId":"anthropic-claude","benchmarkId":"humaneval","model":"claude-sonnet-4-5-20250929","score":91.8,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2025-09-29T12:00:00Z","finishedAt":"2025-09-29T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-sonnet-4-5","asOfDate":"2025-09-29","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2025-09-29T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-mbpp-40f64ed6e5","serviceId":"anthropic-claude","benchmarkId":"mbpp","model":"claude-sonnet-4-5-20250929","score":90,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2025-09-29T12:00:00Z","finishedAt":"2025-09-29T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-sonnet-4-5","asOfDate":"2025-09-29","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2025-09-29T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-swe-be-f6e4719c01","serviceId":"anthropic-claude","benchmarkId":"swe-bench-verified","model":"claude-sonnet-4-5-20250929","score":49,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2025-09-29T12:00:00Z","finishedAt":"2025-09-29T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-sonnet-4-5","asOfDate":"2025-09-29","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2025-09-29T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-gpqa-0ba41b21c0","serviceId":"anthropic-claude","benchmarkId":"gpqa","model":"claude-sonnet-4-5-20250929","score":66.3,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2025-09-29T12:00:00Z","finishedAt":"2025-09-29T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-sonnet-4-5","asOfDate":"2025-09-29","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2025-09-29T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-mmlu-p-30275d1218","serviceId":"anthropic-claude","benchmarkId":"mmlu-pro","model":"claude-sonnet-4-5-20250929","score":78.4,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2025-09-29T12:00:00Z","finishedAt":"2025-09-29T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-sonnet-4-5","asOfDate":"2025-09-29","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2025-09-29T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-math-f33cef01a5","serviceId":"anthropic-claude","benchmarkId":"math","model":"claude-sonnet-4-5-20250929","score":70.4,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2025-09-29T12:00:00Z","finishedAt":"2025-09-29T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-sonnet-4-5","asOfDate":"2025-09-29","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2025-09-29T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-humane-3d78bed46f","serviceId":"anthropic-claude","benchmarkId":"humaneval","model":"claude-haiku-4-5-20251001","score":85.6,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2025-10-01T12:00:00Z","finishedAt":"2025-10-01T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-haiku-4-5","asOfDate":"2025-10-01","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2025-10-01T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-mbpp-46e7d81115","serviceId":"anthropic-claude","benchmarkId":"mbpp","model":"claude-haiku-4-5-20251001","score":82.4,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2025-10-01T12:00:00Z","finishedAt":"2025-10-01T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-haiku-4-5","asOfDate":"2025-10-01","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2025-10-01T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-gpqa-0ae3729fec","serviceId":"anthropic-claude","benchmarkId":"gpqa","model":"claude-haiku-4-5-20251001","score":51.8,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2025-10-01T12:00:00Z","finishedAt":"2025-10-01T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-haiku-4-5","asOfDate":"2025-10-01","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2025-10-01T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-mmlu-p-bdb7082bee","serviceId":"anthropic-claude","benchmarkId":"mmlu-pro","model":"claude-haiku-4-5-20251001","score":66.7,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2025-10-01T12:00:00Z","finishedAt":"2025-10-01T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-haiku-4-5","asOfDate":"2025-10-01","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2025-10-01T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-math-c76f11812d","serviceId":"anthropic-claude","benchmarkId":"math","model":"claude-haiku-4-5-20251001","score":61.2,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2025-10-01T12:00:00Z","finishedAt":"2025-10-01T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-haiku-4-5","asOfDate":"2025-10-01","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2025-10-01T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-humane-e0ce78a0da","serviceId":"anthropic-claude","benchmarkId":"humaneval","model":"claude-3-7-sonnet-20250219","score":93,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2025-02-19T12:00:00Z","finishedAt":"2025-02-19T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-3-7-sonnet","asOfDate":"2025-02-19","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2025-02-19T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-swe-be-e29d978d5d","serviceId":"anthropic-claude","benchmarkId":"swe-bench-verified","model":"claude-3-7-sonnet-20250219","score":62.3,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2025-02-19T12:00:00Z","finishedAt":"2025-02-19T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-3-7-sonnet","asOfDate":"2025-02-19","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2025-02-19T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-gpqa-ce73e8c391","serviceId":"anthropic-claude","benchmarkId":"gpqa","model":"claude-3-7-sonnet-20250219","score":68,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2025-02-19T12:00:00Z","finishedAt":"2025-02-19T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-3-7-sonnet","asOfDate":"2025-02-19","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2025-02-19T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-humane-24e62748ec","serviceId":"openai-chatgpt","benchmarkId":"humaneval","model":"gpt-5.4-pro","score":95.2,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-15T12:00:00Z","finishedAt":"2026-03-15T12:00:00Z","decoding":{},"attestor":null,"publisher":"openai-chatgpt","selfReported":true,"sourceLabel":"OpenAI announcement","sourceUrl":"https://openai.com/index/gpt-5-4","asOfDate":"2026-03-15","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-15T12:00:00Z","verifiedAt":null,"note":"Self-reported by OpenAI announcement. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-mbpp-9803a10391","serviceId":"openai-chatgpt","benchmarkId":"mbpp","model":"gpt-5.4-pro","score":93.4,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-15T12:00:00Z","finishedAt":"2026-03-15T12:00:00Z","decoding":{},"attestor":null,"publisher":"openai-chatgpt","selfReported":true,"sourceLabel":"OpenAI announcement","sourceUrl":"https://openai.com/index/gpt-5-4","asOfDate":"2026-03-15","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-15T12:00:00Z","verifiedAt":null,"note":"Self-reported by OpenAI announcement. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-swe-be-4cf7d81481","serviceId":"openai-chatgpt","benchmarkId":"swe-bench-verified","model":"gpt-5.4-pro","score":71.6,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-15T12:00:00Z","finishedAt":"2026-03-15T12:00:00Z","decoding":{},"attestor":null,"publisher":"openai-chatgpt","selfReported":true,"sourceLabel":"OpenAI announcement","sourceUrl":"https://openai.com/index/gpt-5-4","asOfDate":"2026-03-15","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-15T12:00:00Z","verifiedAt":null,"note":"Self-reported by OpenAI announcement. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-gpqa-63d7cabe98","serviceId":"openai-chatgpt","benchmarkId":"gpqa","model":"gpt-5.4-pro","score":79.4,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-15T12:00:00Z","finishedAt":"2026-03-15T12:00:00Z","decoding":{},"attestor":null,"publisher":"openai-chatgpt","selfReported":true,"sourceLabel":"OpenAI announcement","sourceUrl":"https://openai.com/index/gpt-5-4","asOfDate":"2026-03-15","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-15T12:00:00Z","verifiedAt":null,"note":"Self-reported by OpenAI announcement. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-mmlu-p-e23d227c06","serviceId":"openai-chatgpt","benchmarkId":"mmlu-pro","model":"gpt-5.4-pro","score":87.6,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-15T12:00:00Z","finishedAt":"2026-03-15T12:00:00Z","decoding":{},"attestor":null,"publisher":"openai-chatgpt","selfReported":true,"sourceLabel":"OpenAI announcement","sourceUrl":"https://openai.com/index/gpt-5-4","asOfDate":"2026-03-15","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-15T12:00:00Z","verifiedAt":null,"note":"Self-reported by OpenAI announcement. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-math-b9b469496f","serviceId":"openai-chatgpt","benchmarkId":"math","model":"gpt-5.4-pro","score":83.5,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-15T12:00:00Z","finishedAt":"2026-03-15T12:00:00Z","decoding":{},"attestor":null,"publisher":"openai-chatgpt","selfReported":true,"sourceLabel":"OpenAI announcement","sourceUrl":"https://openai.com/index/gpt-5-4","asOfDate":"2026-03-15","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-15T12:00:00Z","verifiedAt":null,"note":"Self-reported by OpenAI announcement. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-aime-f8f1da5f8d","serviceId":"openai-chatgpt","benchmarkId":"aime","model":"gpt-5.4-pro","score":53.7,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-15T12:00:00Z","finishedAt":"2026-03-15T12:00:00Z","decoding":{},"attestor":null,"publisher":"openai-chatgpt","selfReported":true,"sourceLabel":"OpenAI announcement","sourceUrl":"https://openai.com/index/gpt-5-4","asOfDate":"2026-03-15","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-15T12:00:00Z","verifiedAt":null,"note":"Self-reported by OpenAI announcement. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-humane-ed24d1a39d","serviceId":"openai-chatgpt","benchmarkId":"humaneval","model":"gpt-5.4-mini","score":90.6,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-15T12:00:00Z","finishedAt":"2026-03-15T12:00:00Z","decoding":{},"attestor":null,"publisher":"openai-chatgpt","selfReported":true,"sourceLabel":"OpenAI announcement","sourceUrl":"https://openai.com/index/gpt-5-4","asOfDate":"2026-03-15","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-15T12:00:00Z","verifiedAt":null,"note":"Self-reported by OpenAI announcement. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-mbpp-9ea46af018","serviceId":"openai-chatgpt","benchmarkId":"mbpp","model":"gpt-5.4-mini","score":87.2,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-15T12:00:00Z","finishedAt":"2026-03-15T12:00:00Z","decoding":{},"attestor":null,"publisher":"openai-chatgpt","selfReported":true,"sourceLabel":"OpenAI announcement","sourceUrl":"https://openai.com/index/gpt-5-4","asOfDate":"2026-03-15","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-15T12:00:00Z","verifiedAt":null,"note":"Self-reported by OpenAI announcement. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-gpqa-f4decc099a","serviceId":"openai-chatgpt","benchmarkId":"gpqa","model":"gpt-5.4-mini","score":60.3,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-15T12:00:00Z","finishedAt":"2026-03-15T12:00:00Z","decoding":{},"attestor":null,"publisher":"openai-chatgpt","selfReported":true,"sourceLabel":"OpenAI announcement","sourceUrl":"https://openai.com/index/gpt-5-4","asOfDate":"2026-03-15","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-15T12:00:00Z","verifiedAt":null,"note":"Self-reported by OpenAI announcement. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-mmlu-p-da1558f9ad","serviceId":"openai-chatgpt","benchmarkId":"mmlu-pro","model":"gpt-5.4-mini","score":74.8,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-15T12:00:00Z","finishedAt":"2026-03-15T12:00:00Z","decoding":{},"attestor":null,"publisher":"openai-chatgpt","selfReported":true,"sourceLabel":"OpenAI announcement","sourceUrl":"https://openai.com/index/gpt-5-4","asOfDate":"2026-03-15","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-15T12:00:00Z","verifiedAt":null,"note":"Self-reported by OpenAI announcement. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-math-9c2d594215","serviceId":"openai-chatgpt","benchmarkId":"math","model":"gpt-5.4-mini","score":72.6,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-15T12:00:00Z","finishedAt":"2026-03-15T12:00:00Z","decoding":{},"attestor":null,"publisher":"openai-chatgpt","selfReported":true,"sourceLabel":"OpenAI announcement","sourceUrl":"https://openai.com/index/gpt-5-4","asOfDate":"2026-03-15","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-15T12:00:00Z","verifiedAt":null,"note":"Self-reported by OpenAI announcement. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-humane-2041130d6d","serviceId":"openai-chatgpt","benchmarkId":"humaneval","model":"o4-pro","score":93.7,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-01-12T12:00:00Z","finishedAt":"2026-01-12T12:00:00Z","decoding":{},"attestor":null,"publisher":"openai-chatgpt","selfReported":true,"sourceLabel":"OpenAI o-series","sourceUrl":"https://openai.com/index/introducing-o4","asOfDate":"2026-01-12","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-01-12T12:00:00Z","verifiedAt":null,"note":"Self-reported by OpenAI o-series. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-swe-be-c897232303","serviceId":"openai-chatgpt","benchmarkId":"swe-bench-verified","model":"o4-pro","score":68.2,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-01-12T12:00:00Z","finishedAt":"2026-01-12T12:00:00Z","decoding":{},"attestor":null,"publisher":"openai-chatgpt","selfReported":true,"sourceLabel":"OpenAI o-series","sourceUrl":"https://openai.com/index/introducing-o4","asOfDate":"2026-01-12","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-01-12T12:00:00Z","verifiedAt":null,"note":"Self-reported by OpenAI o-series. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-gpqa-9498b5a751","serviceId":"openai-chatgpt","benchmarkId":"gpqa","model":"o4-pro","score":81,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-01-12T12:00:00Z","finishedAt":"2026-01-12T12:00:00Z","decoding":{},"attestor":null,"publisher":"openai-chatgpt","selfReported":true,"sourceLabel":"OpenAI o-series","sourceUrl":"https://openai.com/index/introducing-o4","asOfDate":"2026-01-12","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-01-12T12:00:00Z","verifiedAt":null,"note":"Self-reported by OpenAI o-series. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-math-337586aac2","serviceId":"openai-chatgpt","benchmarkId":"math","model":"o4-pro","score":87.4,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-01-12T12:00:00Z","finishedAt":"2026-01-12T12:00:00Z","decoding":{},"attestor":null,"publisher":"openai-chatgpt","selfReported":true,"sourceLabel":"OpenAI o-series","sourceUrl":"https://openai.com/index/introducing-o4","asOfDate":"2026-01-12","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-01-12T12:00:00Z","verifiedAt":null,"note":"Self-reported by OpenAI o-series. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-aime-975a292ba7","serviceId":"openai-chatgpt","benchmarkId":"aime","model":"o4-pro","score":66.5,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-01-12T12:00:00Z","finishedAt":"2026-01-12T12:00:00Z","decoding":{},"attestor":null,"publisher":"openai-chatgpt","selfReported":true,"sourceLabel":"OpenAI o-series","sourceUrl":"https://openai.com/index/introducing-o4","asOfDate":"2026-01-12","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-01-12T12:00:00Z","verifiedAt":null,"note":"Self-reported by OpenAI o-series. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-fronti-dae10d103c","serviceId":"openai-chatgpt","benchmarkId":"frontier-math","model":"o4-pro","score":11.4,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-01-12T12:00:00Z","finishedAt":"2026-01-12T12:00:00Z","decoding":{},"attestor":null,"publisher":"openai-chatgpt","selfReported":true,"sourceLabel":"OpenAI o-series","sourceUrl":"https://openai.com/index/introducing-o4","asOfDate":"2026-01-12","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-01-12T12:00:00Z","verifiedAt":null,"note":"Self-reported by OpenAI o-series. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-humane-88a61adfc2","serviceId":"openrouter","benchmarkId":"humaneval","model":"google/gemini-2.6-pro","score":94,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-02-20T12:00:00Z","finishedAt":"2026-02-20T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Gemini 2.6 report","sourceUrl":"https://blog.google/technology/google-deepmind/gemini-2-6","asOfDate":"2026-02-20","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-02-20T12:00:00Z","verifiedAt":null,"note":"Self-reported by Gemini 2.6 report. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-mbpp-65d7c0e826","serviceId":"openrouter","benchmarkId":"mbpp","model":"google/gemini-2.6-pro","score":91.8,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-02-20T12:00:00Z","finishedAt":"2026-02-20T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Gemini 2.6 report","sourceUrl":"https://blog.google/technology/google-deepmind/gemini-2-6","asOfDate":"2026-02-20","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-02-20T12:00:00Z","verifiedAt":null,"note":"Self-reported by Gemini 2.6 report. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-swe-be-fc8c07d2ac","serviceId":"openrouter","benchmarkId":"swe-bench-verified","model":"google/gemini-2.6-pro","score":60.5,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-02-20T12:00:00Z","finishedAt":"2026-02-20T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Gemini 2.6 report","sourceUrl":"https://blog.google/technology/google-deepmind/gemini-2-6","asOfDate":"2026-02-20","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-02-20T12:00:00Z","verifiedAt":null,"note":"Self-reported by Gemini 2.6 report. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-gpqa-c90ddd50ea","serviceId":"openrouter","benchmarkId":"gpqa","model":"google/gemini-2.6-pro","score":72.4,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-02-20T12:00:00Z","finishedAt":"2026-02-20T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Gemini 2.6 report","sourceUrl":"https://blog.google/technology/google-deepmind/gemini-2-6","asOfDate":"2026-02-20","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-02-20T12:00:00Z","verifiedAt":null,"note":"Self-reported by Gemini 2.6 report. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-mmlu-p-bcaa182d61","serviceId":"openrouter","benchmarkId":"mmlu-pro","model":"google/gemini-2.6-pro","score":84.8,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-02-20T12:00:00Z","finishedAt":"2026-02-20T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Gemini 2.6 report","sourceUrl":"https://blog.google/technology/google-deepmind/gemini-2-6","asOfDate":"2026-02-20","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-02-20T12:00:00Z","verifiedAt":null,"note":"Self-reported by Gemini 2.6 report. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-math-16b2d89e7f","serviceId":"openrouter","benchmarkId":"math","model":"google/gemini-2.6-pro","score":82.1,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-02-20T12:00:00Z","finishedAt":"2026-02-20T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Gemini 2.6 report","sourceUrl":"https://blog.google/technology/google-deepmind/gemini-2-6","asOfDate":"2026-02-20","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-02-20T12:00:00Z","verifiedAt":null,"note":"Self-reported by Gemini 2.6 report. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-ruler-ec0bd41afb","serviceId":"openrouter","benchmarkId":"ruler","model":"google/gemini-2.6-pro","score":92.3,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-02-20T12:00:00Z","finishedAt":"2026-02-20T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Gemini 2.6 report","sourceUrl":"https://blog.google/technology/google-deepmind/gemini-2-6","asOfDate":"2026-02-20","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-02-20T12:00:00Z","verifiedAt":null,"note":"Self-reported by Gemini 2.6 report. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-humane-f726cb9801","serviceId":"openrouter","benchmarkId":"humaneval","model":"google/gemini-2.6-flash","score":88.4,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-02-20T12:00:00Z","finishedAt":"2026-02-20T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Gemini 2.6 report","sourceUrl":"https://blog.google/technology/google-deepmind/gemini-2-6","asOfDate":"2026-02-20","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-02-20T12:00:00Z","verifiedAt":null,"note":"Self-reported by Gemini 2.6 report. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-mbpp-61e2aa7cd1","serviceId":"openrouter","benchmarkId":"mbpp","model":"google/gemini-2.6-flash","score":85,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-02-20T12:00:00Z","finishedAt":"2026-02-20T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Gemini 2.6 report","sourceUrl":"https://blog.google/technology/google-deepmind/gemini-2-6","asOfDate":"2026-02-20","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-02-20T12:00:00Z","verifiedAt":null,"note":"Self-reported by Gemini 2.6 report. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-gpqa-ffecb032c9","serviceId":"openrouter","benchmarkId":"gpqa","model":"google/gemini-2.6-flash","score":58,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-02-20T12:00:00Z","finishedAt":"2026-02-20T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Gemini 2.6 report","sourceUrl":"https://blog.google/technology/google-deepmind/gemini-2-6","asOfDate":"2026-02-20","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-02-20T12:00:00Z","verifiedAt":null,"note":"Self-reported by Gemini 2.6 report. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-mmlu-p-7769ba547b","serviceId":"openrouter","benchmarkId":"mmlu-pro","model":"google/gemini-2.6-flash","score":70.2,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-02-20T12:00:00Z","finishedAt":"2026-02-20T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Gemini 2.6 report","sourceUrl":"https://blog.google/technology/google-deepmind/gemini-2-6","asOfDate":"2026-02-20","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-02-20T12:00:00Z","verifiedAt":null,"note":"Self-reported by Gemini 2.6 report. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-humane-b1872faeab","serviceId":"openrouter","benchmarkId":"humaneval","model":"mistral/large-3","score":88.6,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-04T12:00:00Z","finishedAt":"2026-03-04T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Mistral release","sourceUrl":"https://mistral.ai/news/mistral-large-3","asOfDate":"2026-03-04","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-04T12:00:00Z","verifiedAt":null,"note":"Self-reported by Mistral release. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-mbpp-18a6cdad25","serviceId":"openrouter","benchmarkId":"mbpp","model":"mistral/large-3","score":86.2,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-04T12:00:00Z","finishedAt":"2026-03-04T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Mistral release","sourceUrl":"https://mistral.ai/news/mistral-large-3","asOfDate":"2026-03-04","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-04T12:00:00Z","verifiedAt":null,"note":"Self-reported by Mistral release. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-gpqa-c81f779ecb","serviceId":"openrouter","benchmarkId":"gpqa","model":"mistral/large-3","score":54.8,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-04T12:00:00Z","finishedAt":"2026-03-04T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Mistral release","sourceUrl":"https://mistral.ai/news/mistral-large-3","asOfDate":"2026-03-04","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-04T12:00:00Z","verifiedAt":null,"note":"Self-reported by Mistral release. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-mmlu-p-d0444adea2","serviceId":"openrouter","benchmarkId":"mmlu-pro","model":"mistral/large-3","score":76.4,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-04T12:00:00Z","finishedAt":"2026-03-04T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Mistral release","sourceUrl":"https://mistral.ai/news/mistral-large-3","asOfDate":"2026-03-04","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-04T12:00:00Z","verifiedAt":null,"note":"Self-reported by Mistral release. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-humane-5e75dcc6e6","serviceId":"openrouter","benchmarkId":"humaneval","model":"deepseek/deepseek-v4","score":92.2,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-12T12:00:00Z","finishedAt":"2026-03-12T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"DeepSeek-V4 paper","sourceUrl":"https://arxiv.org/abs/2503.12345","asOfDate":"2026-03-12","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-12T12:00:00Z","verifiedAt":null,"note":"Self-reported by DeepSeek-V4 paper. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-mbpp-bf115f84c5","serviceId":"openrouter","benchmarkId":"mbpp","model":"deepseek/deepseek-v4","score":89.8,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-12T12:00:00Z","finishedAt":"2026-03-12T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"DeepSeek-V4 paper","sourceUrl":"https://arxiv.org/abs/2503.12345","asOfDate":"2026-03-12","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-12T12:00:00Z","verifiedAt":null,"note":"Self-reported by DeepSeek-V4 paper. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-swe-be-fcaa3ce599","serviceId":"openrouter","benchmarkId":"swe-bench-verified","model":"deepseek/deepseek-v4","score":56.4,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-12T12:00:00Z","finishedAt":"2026-03-12T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"DeepSeek-V4 paper","sourceUrl":"https://arxiv.org/abs/2503.12345","asOfDate":"2026-03-12","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-12T12:00:00Z","verifiedAt":null,"note":"Self-reported by DeepSeek-V4 paper. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-gpqa-e3f8702cae","serviceId":"openrouter","benchmarkId":"gpqa","model":"deepseek/deepseek-v4","score":68.7,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-12T12:00:00Z","finishedAt":"2026-03-12T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"DeepSeek-V4 paper","sourceUrl":"https://arxiv.org/abs/2503.12345","asOfDate":"2026-03-12","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-12T12:00:00Z","verifiedAt":null,"note":"Self-reported by DeepSeek-V4 paper. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-math-1651ccd850","serviceId":"openrouter","benchmarkId":"math","model":"deepseek/deepseek-v4","score":81,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-12T12:00:00Z","finishedAt":"2026-03-12T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"DeepSeek-V4 paper","sourceUrl":"https://arxiv.org/abs/2503.12345","asOfDate":"2026-03-12","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-12T12:00:00Z","verifiedAt":null,"note":"Self-reported by DeepSeek-V4 paper. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-humane-b6db9f4cd1","serviceId":"openrouter","benchmarkId":"humaneval","model":"qwen/qwen3.6-72b-instruct","score":87.9,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-04-08T12:00:00Z","finishedAt":"2026-04-08T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Qwen 3.6 release","sourceUrl":"https://qwenlm.github.io/blog/qwen3-6","asOfDate":"2026-04-08","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-04-08T12:00:00Z","verifiedAt":null,"note":"Self-reported by Qwen 3.6 release. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-mbpp-45422addb4","serviceId":"openrouter","benchmarkId":"mbpp","model":"qwen/qwen3.6-72b-instruct","score":85.4,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-04-08T12:00:00Z","finishedAt":"2026-04-08T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Qwen 3.6 release","sourceUrl":"https://qwenlm.github.io/blog/qwen3-6","asOfDate":"2026-04-08","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-04-08T12:00:00Z","verifiedAt":null,"note":"Self-reported by Qwen 3.6 release. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-gpqa-adab10d6eb","serviceId":"openrouter","benchmarkId":"gpqa","model":"qwen/qwen3.6-72b-instruct","score":57.2,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-04-08T12:00:00Z","finishedAt":"2026-04-08T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Qwen 3.6 release","sourceUrl":"https://qwenlm.github.io/blog/qwen3-6","asOfDate":"2026-04-08","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-04-08T12:00:00Z","verifiedAt":null,"note":"Self-reported by Qwen 3.6 release. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-math-18c0f653a5","serviceId":"openrouter","benchmarkId":"math","model":"qwen/qwen3.6-72b-instruct","score":74.2,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-04-08T12:00:00Z","finishedAt":"2026-04-08T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Qwen 3.6 release","sourceUrl":"https://qwenlm.github.io/blog/qwen3-6","asOfDate":"2026-04-08","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-04-08T12:00:00Z","verifiedAt":null,"note":"Self-reported by Qwen 3.6 release. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-humane-decd6332fa","serviceId":"openrouter","benchmarkId":"humaneval","model":"meta-llama/llama-4-405b","score":85.2,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-02-12T12:00:00Z","finishedAt":"2026-02-12T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Llama 4 release","sourceUrl":"https://ai.meta.com/blog/llama-4","asOfDate":"2026-02-12","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-02-12T12:00:00Z","verifiedAt":null,"note":"Self-reported by Llama 4 release. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-mbpp-7a53669895","serviceId":"openrouter","benchmarkId":"mbpp","model":"meta-llama/llama-4-405b","score":82,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-02-12T12:00:00Z","finishedAt":"2026-02-12T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Llama 4 release","sourceUrl":"https://ai.meta.com/blog/llama-4","asOfDate":"2026-02-12","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-02-12T12:00:00Z","verifiedAt":null,"note":"Self-reported by Llama 4 release. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-mmlu-p-029b6af6a6","serviceId":"openrouter","benchmarkId":"mmlu-pro","model":"meta-llama/llama-4-405b","score":71.8,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-02-12T12:00:00Z","finishedAt":"2026-02-12T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Llama 4 release","sourceUrl":"https://ai.meta.com/blog/llama-4","asOfDate":"2026-02-12","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-02-12T12:00:00Z","verifiedAt":null,"note":"Self-reported by Llama 4 release. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-humane-d934943c9d","serviceId":"openrouter","benchmarkId":"humaneval","model":"moonshotai/kimi-k2.6","score":90.4,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-22T12:00:00Z","finishedAt":"2026-03-22T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Moonshot Kimi K2.6","sourceUrl":"https://moonshot.cn/news/kimi-k2-6","asOfDate":"2026-03-22","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-22T12:00:00Z","verifiedAt":null,"note":"Self-reported by Moonshot Kimi K2.6. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-mbpp-1c6833f45c","serviceId":"openrouter","benchmarkId":"mbpp","model":"moonshotai/kimi-k2.6","score":87.6,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-22T12:00:00Z","finishedAt":"2026-03-22T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Moonshot Kimi K2.6","sourceUrl":"https://moonshot.cn/news/kimi-k2-6","asOfDate":"2026-03-22","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-22T12:00:00Z","verifiedAt":null,"note":"Self-reported by Moonshot Kimi K2.6. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-swe-be-68b807492e","serviceId":"openrouter","benchmarkId":"swe-bench-verified","model":"moonshotai/kimi-k2.6","score":47.8,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-22T12:00:00Z","finishedAt":"2026-03-22T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Moonshot Kimi K2.6","sourceUrl":"https://moonshot.cn/news/kimi-k2-6","asOfDate":"2026-03-22","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-22T12:00:00Z","verifiedAt":null,"note":"Self-reported by Moonshot Kimi K2.6. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-math-1b8c0070b7","serviceId":"openrouter","benchmarkId":"math","model":"moonshotai/kimi-k2.6","score":73,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-22T12:00:00Z","finishedAt":"2026-03-22T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Moonshot Kimi K2.6","sourceUrl":"https://moonshot.cn/news/kimi-k2-6","asOfDate":"2026-03-22","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-22T12:00:00Z","verifiedAt":null,"note":"Self-reported by Moonshot Kimi K2.6. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-humane-8e895f7255","serviceId":"openrouter","benchmarkId":"humaneval","model":"x-ai/grok-4","score":88,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-30T12:00:00Z","finishedAt":"2026-03-30T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"xAI Grok 4","sourceUrl":"https://x.ai/news/grok-4","asOfDate":"2026-03-30","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-30T12:00:00Z","verifiedAt":null,"note":"Self-reported by xAI Grok 4. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-gpqa-209ba0baee","serviceId":"openrouter","benchmarkId":"gpqa","model":"x-ai/grok-4","score":64.5,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-30T12:00:00Z","finishedAt":"2026-03-30T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"xAI Grok 4","sourceUrl":"https://x.ai/news/grok-4","asOfDate":"2026-03-30","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-30T12:00:00Z","verifiedAt":null,"note":"Self-reported by xAI Grok 4. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-math-70a33cce66","serviceId":"openrouter","benchmarkId":"math","model":"x-ai/grok-4","score":75,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-30T12:00:00Z","finishedAt":"2026-03-30T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"xAI Grok 4","sourceUrl":"https://x.ai/news/grok-4","asOfDate":"2026-03-30","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-30T12:00:00Z","verifiedAt":null,"note":"Self-reported by xAI Grok 4. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-humane-d527315317","serviceId":"openrouter","benchmarkId":"humaneval","model":"cohere/command-r-plus","score":76.4,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2025-08-15T12:00:00Z","finishedAt":"2025-08-15T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Cohere Command","sourceUrl":"https://cohere.com/blog/command-r-plus","asOfDate":"2025-08-15","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2025-08-15T12:00:00Z","verifiedAt":null,"note":"Self-reported by Cohere Command. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-mmlu-p-02eaeca54d","serviceId":"openrouter","benchmarkId":"mmlu-pro","model":"cohere/command-r-plus","score":62,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2025-08-15T12:00:00Z","finishedAt":"2025-08-15T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Cohere Command","sourceUrl":"https://cohere.com/blog/command-r-plus","asOfDate":"2025-08-15","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2025-08-15T12:00:00Z","verifiedAt":null,"note":"Self-reported by Cohere Command. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-gsm8k-f684538470","serviceId":"anthropic-claude","benchmarkId":"gsm8k","model":"claude-opus-4-7","score":96.7,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-04-10T12:00:00Z","finishedAt":"2026-04-10T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-opus-4-7","asOfDate":"2026-04-10","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-04-10T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-gsm8k-bc262ea3df","serviceId":"anthropic-claude","benchmarkId":"gsm8k","model":"claude-sonnet-4-5-20250929","score":95.2,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2025-09-29T12:00:00Z","finishedAt":"2025-09-29T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-sonnet-4-5","asOfDate":"2025-09-29","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2025-09-29T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-gsm8k-f913b58bb8","serviceId":"openai-chatgpt","benchmarkId":"gsm8k","model":"gpt-5.4-pro","score":97.5,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-15T12:00:00Z","finishedAt":"2026-03-15T12:00:00Z","decoding":{},"attestor":null,"publisher":"openai-chatgpt","selfReported":true,"sourceLabel":"OpenAI announcement","sourceUrl":"https://openai.com/index/gpt-5-4","asOfDate":"2026-03-15","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-15T12:00:00Z","verifiedAt":null,"note":"Self-reported by OpenAI announcement. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-gsm8k-dd4dcd8f16","serviceId":"openai-chatgpt","benchmarkId":"gsm8k","model":"o4-pro","score":96.4,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-01-12T12:00:00Z","finishedAt":"2026-01-12T12:00:00Z","decoding":{},"attestor":null,"publisher":"openai-chatgpt","selfReported":true,"sourceLabel":"OpenAI o-series","sourceUrl":"https://openai.com/index/introducing-o4","asOfDate":"2026-01-12","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-01-12T12:00:00Z","verifiedAt":null,"note":"Self-reported by OpenAI o-series. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-gsm8k-0162edb8ad","serviceId":"openrouter","benchmarkId":"gsm8k","model":"google/gemini-2.6-pro","score":96.1,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-02-20T12:00:00Z","finishedAt":"2026-02-20T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Gemini 2.6 report","sourceUrl":"https://blog.google/technology/google-deepmind/gemini-2-6","asOfDate":"2026-02-20","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-02-20T12:00:00Z","verifiedAt":null,"note":"Self-reported by Gemini 2.6 report. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-gsm8k-e63633af58","serviceId":"openrouter","benchmarkId":"gsm8k","model":"deepseek/deepseek-v4","score":95,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-12T12:00:00Z","finishedAt":"2026-03-12T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"DeepSeek-V4 paper","sourceUrl":"https://arxiv.org/abs/2503.12345","asOfDate":"2026-03-12","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-12T12:00:00Z","verifiedAt":null,"note":"Self-reported by DeepSeek-V4 paper. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-gsm8k-98a1ee7c54","serviceId":"openrouter","benchmarkId":"gsm8k","model":"qwen/qwen3.6-72b-instruct","score":94.2,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-04-08T12:00:00Z","finishedAt":"2026-04-08T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Qwen 3.6 release","sourceUrl":"https://qwenlm.github.io/blog/qwen3-6","asOfDate":"2026-04-08","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-04-08T12:00:00Z","verifiedAt":null,"note":"Self-reported by Qwen 3.6 release. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-gsm8k-99d776de1b","serviceId":"openrouter","benchmarkId":"gsm8k","model":"meta-llama/llama-4-405b","score":93,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-02-12T12:00:00Z","finishedAt":"2026-02-12T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Llama 4 release","sourceUrl":"https://ai.meta.com/blog/llama-4","asOfDate":"2026-02-12","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-02-12T12:00:00Z","verifiedAt":null,"note":"Self-reported by Llama 4 release. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-arc-ag-ebbe66dd5f","serviceId":"openai-chatgpt","benchmarkId":"arc-agi","model":"o4-pro","score":27.5,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-01-12T12:00:00Z","finishedAt":"2026-01-12T12:00:00Z","decoding":{},"attestor":null,"publisher":"openai-chatgpt","selfReported":true,"sourceLabel":"OpenAI o-series","sourceUrl":"https://openai.com/index/introducing-o4","asOfDate":"2026-01-12","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-01-12T12:00:00Z","verifiedAt":null,"note":"Self-reported by OpenAI o-series. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-arc-ag-f7b93bf619","serviceId":"anthropic-claude","benchmarkId":"arc-agi","model":"claude-opus-4-7","score":21,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-04-10T12:00:00Z","finishedAt":"2026-04-10T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-opus-4-7","asOfDate":"2026-04-10","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-04-10T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-arc-ag-3390fde46c","serviceId":"openrouter","benchmarkId":"arc-agi","model":"google/gemini-2.6-pro","score":19.4,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-02-20T12:00:00Z","finishedAt":"2026-02-20T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Gemini 2.6 report","sourceUrl":"https://blog.google/technology/google-deepmind/gemini-2-6","asOfDate":"2026-02-20","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-02-20T12:00:00Z","verifiedAt":null,"note":"Self-reported by Gemini 2.6 report. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-hellas-5d0e425518","serviceId":"anthropic-claude","benchmarkId":"hellaswag","model":"claude-opus-4-7","score":95.4,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-04-10T12:00:00Z","finishedAt":"2026-04-10T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-opus-4-7","asOfDate":"2026-04-10","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-04-10T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-hellas-1fc8af7517","serviceId":"openai-chatgpt","benchmarkId":"hellaswag","model":"gpt-5.4-pro","score":95.8,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-15T12:00:00Z","finishedAt":"2026-03-15T12:00:00Z","decoding":{},"attestor":null,"publisher":"openai-chatgpt","selfReported":true,"sourceLabel":"OpenAI announcement","sourceUrl":"https://openai.com/index/gpt-5-4","asOfDate":"2026-03-15","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-15T12:00:00Z","verifiedAt":null,"note":"Self-reported by OpenAI announcement. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-hellas-fb675c45db","serviceId":"openrouter","benchmarkId":"hellaswag","model":"google/gemini-2.6-pro","score":94.7,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-02-20T12:00:00Z","finishedAt":"2026-02-20T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Gemini 2.6 report","sourceUrl":"https://blog.google/technology/google-deepmind/gemini-2-6","asOfDate":"2026-02-20","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-02-20T12:00:00Z","verifiedAt":null,"note":"Self-reported by Gemini 2.6 report. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-hellas-cd17a2c289","serviceId":"openrouter","benchmarkId":"hellaswag","model":"meta-llama/llama-4-405b","score":92.6,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-02-12T12:00:00Z","finishedAt":"2026-02-12T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Llama 4 release","sourceUrl":"https://ai.meta.com/blog/llama-4","asOfDate":"2026-02-12","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-02-12T12:00:00Z","verifiedAt":null,"note":"Self-reported by Llama 4 release. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-bigcod-4edb49c9c4","serviceId":"anthropic-claude","benchmarkId":"bigcodebench","model":"claude-opus-4-7","score":47.2,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-04-10T12:00:00Z","finishedAt":"2026-04-10T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-opus-4-7","asOfDate":"2026-04-10","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-04-10T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-liveco-362d8daa90","serviceId":"anthropic-claude","benchmarkId":"livecodebench","model":"claude-opus-4-7","score":54,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-04-10T12:00:00Z","finishedAt":"2026-04-10T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-opus-4-7","asOfDate":"2026-04-10","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-04-10T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-liveco-1a8b2cc7b8","serviceId":"openai-chatgpt","benchmarkId":"livecodebench","model":"gpt-5.4-pro","score":61.4,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-15T12:00:00Z","finishedAt":"2026-03-15T12:00:00Z","decoding":{},"attestor":null,"publisher":"openai-chatgpt","selfReported":true,"sourceLabel":"OpenAI announcement","sourceUrl":"https://openai.com/index/gpt-5-4","asOfDate":"2026-03-15","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-15T12:00:00Z","verifiedAt":null,"note":"Self-reported by OpenAI announcement. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-liveco-14def7bf05","serviceId":"openrouter","benchmarkId":"livecodebench","model":"deepseek/deepseek-v4","score":58.6,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-12T12:00:00Z","finishedAt":"2026-03-12T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"DeepSeek-V4 paper","sourceUrl":"https://arxiv.org/abs/2503.12345","asOfDate":"2026-03-12","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-12T12:00:00Z","verifiedAt":null,"note":"Self-reported by DeepSeek-V4 paper. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-liveco-5092e228b8","serviceId":"openrouter","benchmarkId":"livecodebench","model":"google/gemini-2.6-pro","score":55.2,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-02-20T12:00:00Z","finishedAt":"2026-02-20T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Gemini 2.6 report","sourceUrl":"https://blog.google/technology/google-deepmind/gemini-2-6","asOfDate":"2026-02-20","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-02-20T12:00:00Z","verifiedAt":null,"note":"Self-reported by Gemini 2.6 report. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-gaia-0099f95bad","serviceId":"anthropic-claude","benchmarkId":"gaia","model":"claude-opus-4-7","score":39.4,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-04-10T12:00:00Z","finishedAt":"2026-04-10T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-opus-4-7","asOfDate":"2026-04-10","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-04-10T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-gaia-f60115460d","serviceId":"openai-chatgpt","benchmarkId":"gaia","model":"o4-pro","score":47,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-01-12T12:00:00Z","finishedAt":"2026-01-12T12:00:00Z","decoding":{},"attestor":null,"publisher":"openai-chatgpt","selfReported":true,"sourceLabel":"OpenAI o-series","sourceUrl":"https://openai.com/index/introducing-o4","asOfDate":"2026-01-12","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-01-12T12:00:00Z","verifiedAt":null,"note":"Self-reported by OpenAI o-series. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-gaia-9cd7fed476","serviceId":"openrouter","benchmarkId":"gaia","model":"google/gemini-2.6-pro","score":36.5,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-02-20T12:00:00Z","finishedAt":"2026-02-20T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Gemini 2.6 report","sourceUrl":"https://blog.google/technology/google-deepmind/gemini-2-6","asOfDate":"2026-02-20","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-02-20T12:00:00Z","verifiedAt":null,"note":"Self-reported by Gemini 2.6 report. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-tau-be-a33a474ab4","serviceId":"openai-chatgpt","benchmarkId":"tau-bench","model":"gpt-5.4-pro","score":65.4,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-15T12:00:00Z","finishedAt":"2026-03-15T12:00:00Z","decoding":{},"attestor":null,"publisher":"openai-chatgpt","selfReported":true,"sourceLabel":"OpenAI announcement","sourceUrl":"https://openai.com/index/gpt-5-4","asOfDate":"2026-03-15","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-15T12:00:00Z","verifiedAt":null,"note":"Self-reported by OpenAI announcement. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-tau-be-b6da2d8828","serviceId":"openrouter","benchmarkId":"tau-bench","model":"deepseek/deepseek-v4","score":41,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-12T12:00:00Z","finishedAt":"2026-03-12T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"DeepSeek-V4 paper","sourceUrl":"https://arxiv.org/abs/2503.12345","asOfDate":"2026-03-12","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-12T12:00:00Z","verifiedAt":null,"note":"Self-reported by DeepSeek-V4 paper. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-mmlu-e580a886b2","serviceId":"anthropic-claude","benchmarkId":"mmlu","model":"claude-opus-4-7","score":91.4,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-04-10T12:00:00Z","finishedAt":"2026-04-10T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-opus-4-7","asOfDate":"2026-04-10","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-04-10T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-mmlu-86b69ab56f","serviceId":"anthropic-claude","benchmarkId":"mmlu","model":"claude-sonnet-4-5-20250929","score":89.7,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2025-09-29T12:00:00Z","finishedAt":"2025-09-29T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-sonnet-4-5","asOfDate":"2025-09-29","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2025-09-29T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-mmlu-114fa90a88","serviceId":"openai-chatgpt","benchmarkId":"mmlu","model":"gpt-5.4-pro","score":92.6,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-15T12:00:00Z","finishedAt":"2026-03-15T12:00:00Z","decoding":{},"attestor":null,"publisher":"openai-chatgpt","selfReported":true,"sourceLabel":"OpenAI announcement","sourceUrl":"https://openai.com/index/gpt-5-4","asOfDate":"2026-03-15","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-15T12:00:00Z","verifiedAt":null,"note":"Self-reported by OpenAI announcement. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-mmlu-423d44ce42","serviceId":"openrouter","benchmarkId":"mmlu","model":"google/gemini-2.6-pro","score":91,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-02-20T12:00:00Z","finishedAt":"2026-02-20T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Gemini 2.6 report","sourceUrl":"https://blog.google/technology/google-deepmind/gemini-2-6","asOfDate":"2026-02-20","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-02-20T12:00:00Z","verifiedAt":null,"note":"Self-reported by Gemini 2.6 report. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-mmlu-6b2f4b90c8","serviceId":"openrouter","benchmarkId":"mmlu","model":"meta-llama/llama-4-405b","score":86.8,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-02-12T12:00:00Z","finishedAt":"2026-02-12T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Llama 4 release","sourceUrl":"https://ai.meta.com/blog/llama-4","asOfDate":"2026-02-12","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-02-12T12:00:00Z","verifiedAt":null,"note":"Self-reported by Llama 4 release. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-truthf-1459c70d5b","serviceId":"anthropic-claude","benchmarkId":"truthfulqa","model":"claude-opus-4-7","score":67.4,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-04-10T12:00:00Z","finishedAt":"2026-04-10T12:00:00Z","decoding":{},"attestor":null,"publisher":"anthropic-claude","selfReported":true,"sourceLabel":"Anthropic model card","sourceUrl":"https://www.anthropic.com/news/claude-opus-4-7","asOfDate":"2026-04-10","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-04-10T12:00:00Z","verifiedAt":null,"note":"Self-reported by Anthropic model card. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-truthf-784fee919e","serviceId":"openai-chatgpt","benchmarkId":"truthfulqa","model":"gpt-5.4-pro","score":70.2,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-03-15T12:00:00Z","finishedAt":"2026-03-15T12:00:00Z","decoding":{},"attestor":null,"publisher":"openai-chatgpt","selfReported":true,"sourceLabel":"OpenAI announcement","sourceUrl":"https://openai.com/index/gpt-5-4","asOfDate":"2026-03-15","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-03-15T12:00:00Z","verifiedAt":null,"note":"Self-reported by OpenAI announcement. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}},{"id":"sr-truthf-31cea63283","serviceId":"openrouter","benchmarkId":"truthfulqa","model":"google/gemini-2.6-pro","score":64.8,"runs":1,"sampleCount":null,"runnerVersion":"self-reported@1","runnerCommit":"vendor-disclosure","datasetHash":null,"methodologyHash":null,"transcriptMerkleRoot":null,"startedAt":"2026-02-20T12:00:00Z","finishedAt":"2026-02-20T12:00:00Z","decoding":{},"attestor":null,"publisher":"openrouter","selfReported":true,"sourceLabel":"Gemini 2.6 report","sourceUrl":"https://blog.google/technology/google-deepmind/gemini-2-6","asOfDate":"2026-02-20","verification":{"mode":"self-reported","status":"self-reported","alignedProofSystem":null,"attestorPubkey":null,"attestorSignature":null,"submittedAt":"2026-02-20T12:00:00Z","verifiedAt":null,"note":"Self-reported by Gemini 2.6 report. Not cryptographically attested. Vendors can upgrade to Attested by publishing a signed run via /v1/run."},"proof":{"system":"self-reported","status":"unattested","signature":null,"pubkey":null,"signer_algo":null,"public_inputs":{}}}]}