Skip to content

Commit 2590146

Browse files
authored
fix: Avoid overflows when saving files with large outputs (#239)
1 parent b8684a9 commit 2590146

File tree

4 files changed

+196
-14
lines changed

4 files changed

+196
-14
lines changed

src/notebooks/deepnote/deepnoteDataConverter.ts

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -243,9 +243,9 @@ export class DeepnoteDataConverter {
243243
} else if (item.mime === 'application/json') {
244244
data['application/json'] = JSON.parse(new TextDecoder().decode(item.data));
245245
} else if (item.mime === 'image/png') {
246-
data['image/png'] = btoa(String.fromCharCode(...new Uint8Array(item.data)));
246+
data['image/png'] = this.uint8ArrayToBase64(item.data);
247247
} else if (item.mime === 'image/jpeg') {
248-
data['image/jpeg'] = btoa(String.fromCharCode(...new Uint8Array(item.data)));
248+
data['image/jpeg'] = this.uint8ArrayToBase64(item.data);
249249
} else if (item.mime === 'application/vnd.deepnote.dataframe.v3+json') {
250250
data['application/vnd.deepnote.dataframe.v3+json'] = JSON.parse(
251251
new TextDecoder().decode(item.data)
@@ -523,4 +523,21 @@ export class DeepnoteDataConverter {
523523
return new NotebookCellOutput([]);
524524
});
525525
}
526+
527+
/**
528+
* Converts a Uint8Array to a base64 string without causing stack overflow.
529+
* Uses chunked processing to avoid call stack limits with large arrays.
530+
*/
531+
private uint8ArrayToBase64(data: Uint8Array): string {
532+
// Process in chunks to avoid stack overflow from spread operator
533+
const chunkSize = 8192;
534+
let binaryString = '';
535+
536+
for (let i = 0; i < data.length; i += chunkSize) {
537+
const chunk = data.subarray(i, Math.min(i + chunkSize, data.length));
538+
binaryString += String.fromCharCode(...chunk);
539+
}
540+
541+
return btoa(binaryString);
542+
}
526543
}

src/notebooks/deepnote/deepnoteDataConverter.unit.test.ts

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { assert } from 'chai';
2-
import { NotebookCellKind, type NotebookCellData } from 'vscode';
2+
import { NotebookCellKind, NotebookCellOutput, NotebookCellOutputItem, type NotebookCellData } from 'vscode';
33

44
import { DeepnoteDataConverter } from './deepnoteDataConverter';
55
import type { DeepnoteBlock, DeepnoteOutput } from '../../platform/deepnote/deepnoteTypes';
@@ -797,4 +797,51 @@ suite('DeepnoteDataConverter', () => {
797797
assert.deepStrictEqual(roundTripBlocks, originalBlocks);
798798
});
799799
});
800+
801+
suite('large data handling', () => {
802+
test('should handle cells with large image data without stack overflow', () => {
803+
// Create a large image data buffer (1MB) - this reproduces the bug where
804+
// saving fails with "Maximum call stack size exceeded" because
805+
// btoa(String.fromCharCode(...largeArray)) causes stack overflow
806+
const largeImageSize = 1000000; // 1MB
807+
const largeImageData = new Uint8Array(largeImageSize);
808+
809+
for (let i = 0; i < largeImageSize; i++) {
810+
largeImageData[i] = i % 256;
811+
}
812+
813+
const cellWithLargeImage: NotebookCellData = {
814+
kind: NotebookCellKind.Code,
815+
value: 'display_image()',
816+
languageId: 'python',
817+
metadata: {
818+
__deepnotePocket: {
819+
type: 'code',
820+
sortingKey: 'a0'
821+
},
822+
id: 'block-1'
823+
},
824+
outputs: [
825+
new NotebookCellOutput([new NotebookCellOutputItem(largeImageData, 'image/png')], {
826+
cellId: 'block-1',
827+
cellIndex: 0
828+
})
829+
]
830+
};
831+
832+
// This should not throw - it should handle large data gracefully
833+
const blocks = converter.convertCellsToBlocks([cellWithLargeImage]);
834+
835+
assert.strictEqual(blocks.length, 1);
836+
assert.strictEqual(blocks[0].type, 'code');
837+
assert.isDefined(blocks[0].outputs);
838+
assert.strictEqual(blocks[0].outputs!.length, 1);
839+
840+
// Verify the output data is properly base64 encoded
841+
const output = blocks[0].outputs![0] as { data?: Record<string, unknown> };
842+
assert.isDefined(output.data);
843+
assert.isDefined(output.data!['image/png']);
844+
assert.isString(output.data!['image/png']);
845+
});
846+
});
800847
});

src/notebooks/deepnote/deepnoteSerializer.ts

Lines changed: 58 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,43 @@ import { l10n, workspace, type CancellationToken, type NotebookData, type Notebo
55
import { logger } from '../../platform/logging';
66
import { IDeepnoteNotebookManager } from '../types';
77
import { DeepnoteDataConverter } from './deepnoteDataConverter';
8-
import type { DeepnoteFile, DeepnoteNotebook } from '../../platform/deepnote/deepnoteTypes';
8+
import type { DeepnoteBlock, DeepnoteFile, DeepnoteNotebook } from '../../platform/deepnote/deepnoteTypes';
99

1010
export { DeepnoteBlock, DeepnoteNotebook, DeepnoteOutput, DeepnoteFile } from '../../platform/deepnote/deepnoteTypes';
1111

12+
/**
13+
* Deep clones an object while removing circular references.
14+
* Uses a recursion stack pattern to only drop true cycles, preserving shared references.
15+
*/
16+
function cloneWithoutCircularRefs<T>(obj: T, seen = new WeakSet<object>()): T {
17+
if (obj === null || typeof obj !== 'object') {
18+
return obj;
19+
}
20+
21+
if (seen.has(obj as object)) {
22+
// True circular reference on the current path - drop it
23+
return undefined as T;
24+
}
25+
26+
seen.add(obj as object);
27+
28+
try {
29+
if (Array.isArray(obj)) {
30+
return obj.map((item) => cloneWithoutCircularRefs(item, seen)) as T;
31+
}
32+
33+
const clone: Record<string, unknown> = {};
34+
35+
for (const key of Object.keys(obj as Record<string, unknown>)) {
36+
clone[key] = cloneWithoutCircularRefs((obj as Record<string, unknown>)[key], seen);
37+
}
38+
39+
return clone as T;
40+
} finally {
41+
seen.delete(obj as object);
42+
}
43+
}
44+
1245
/**
1346
* Serializer for converting between Deepnote YAML files and VS Code notebook format.
1447
* Handles reading/writing .deepnote files and manages project state persistence.
@@ -108,49 +141,63 @@ export class DeepnoteNotebookSerializer implements NotebookSerializer {
108141
}
109142

110143
try {
144+
logger.debug('SerializeNotebook: Starting serialization');
145+
111146
const projectId = data.metadata?.deepnoteProjectId;
112147

113148
if (!projectId) {
114149
throw new Error('Missing Deepnote project ID in notebook metadata');
115150
}
116151

152+
logger.debug(`SerializeNotebook: Project ID: ${projectId}`);
153+
117154
const originalProject = this.notebookManager.getOriginalProject(projectId) as DeepnoteFile | undefined;
118155

119156
if (!originalProject) {
120157
throw new Error('Original Deepnote project not found. Cannot save changes.');
121158
}
122159

160+
logger.debug('SerializeNotebook: Got original project');
161+
123162
const notebookId =
124163
data.metadata?.deepnoteNotebookId || this.notebookManager.getTheSelectedNotebookForAProject(projectId);
125164

126165
if (!notebookId) {
127166
throw new Error('Cannot determine which notebook to save');
128167
}
129168

130-
const notebookIndex = originalProject.project.notebooks.findIndex(
131-
(nb: { id: string }) => nb.id === notebookId
132-
);
169+
logger.debug(`SerializeNotebook: Notebook ID: ${notebookId}`);
170+
171+
const notebook = originalProject.project.notebooks.find((nb: { id: string }) => nb.id === notebookId);
133172

134-
if (notebookIndex === -1) {
173+
if (!notebook) {
135174
throw new Error(`Notebook with ID ${notebookId} not found in project`);
136175
}
137176

138-
const updatedProject = JSON.parse(JSON.stringify(originalProject)) as DeepnoteFile;
177+
logger.debug(`SerializeNotebook: Found notebook, converting ${data.cells.length} cells to blocks`);
178+
179+
// Clone blocks while removing circular references that may have been
180+
// introduced by VS Code's notebook cell/output handling
181+
const blocks = this.converter.convertCellsToBlocks(data.cells);
182+
183+
logger.debug(`SerializeNotebook: Converted to ${blocks.length} blocks, now cloning without circular refs`);
184+
185+
notebook.blocks = cloneWithoutCircularRefs<DeepnoteBlock[]>(blocks);
139186

140-
const updatedBlocks = this.converter.convertCellsToBlocks(data.cells);
187+
logger.debug('SerializeNotebook: Cloned blocks, updating modifiedAt');
141188

142-
updatedProject.project.notebooks[notebookIndex].blocks = updatedBlocks;
189+
originalProject.metadata.modifiedAt = new Date().toISOString();
143190

144-
updatedProject.metadata.modifiedAt = new Date().toISOString();
191+
logger.debug('SerializeNotebook: Starting yaml.dump');
145192

146-
const yamlString = yaml.dump(updatedProject, {
193+
const yamlString = yaml.dump(originalProject, {
147194
indent: 2,
148195
lineWidth: -1,
149196
noRefs: true,
150197
sortKeys: false
151198
});
152199

153-
this.notebookManager.storeOriginalProject(projectId, updatedProject, notebookId);
200+
logger.debug(`SerializeNotebook: yaml.dump complete, ${yamlString.length} chars`);
154201

155202
return new TextEncoder().encode(yamlString);
156203
} catch (error) {

src/notebooks/deepnote/deepnoteSerializer.unit.test.ts

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,77 @@ project:
325325
});
326326
});
327327

328+
suite('circular reference handling', () => {
329+
test('should serialize notebook with circular references in output metadata', async () => {
330+
// Create output with circular reference - this reproduces the bug
331+
// where saving fails with "Maximum call stack size exceeded"
332+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
333+
const circularOutput: any = {
334+
output_type: 'execute_result',
335+
execution_count: 1,
336+
data: { 'text/plain': 'test' },
337+
metadata: {}
338+
};
339+
// Create circular reference
340+
circularOutput.metadata.self = circularOutput;
341+
342+
const projectWithCircularRef: DeepnoteFile = {
343+
version: '1.0',
344+
metadata: {
345+
createdAt: '2023-01-01T00:00:00Z',
346+
modifiedAt: '2023-01-02T00:00:00Z'
347+
},
348+
project: {
349+
id: 'project-circular',
350+
name: 'Circular Test',
351+
notebooks: [
352+
{
353+
id: 'notebook-1',
354+
name: 'Test Notebook',
355+
blocks: [
356+
{
357+
blockGroup: 'group-1',
358+
id: 'block-1',
359+
content: 'test',
360+
sortingKey: 'a0',
361+
type: 'code',
362+
outputs: [circularOutput]
363+
}
364+
],
365+
executionMode: 'block',
366+
isModule: false
367+
}
368+
],
369+
settings: {}
370+
}
371+
};
372+
373+
manager.storeOriginalProject('project-circular', projectWithCircularRef, 'notebook-1');
374+
375+
const notebookData = {
376+
cells: [
377+
{
378+
kind: 2, // NotebookCellKind.Code
379+
value: 'test',
380+
languageId: 'python',
381+
metadata: {}
382+
}
383+
],
384+
metadata: {
385+
deepnoteProjectId: 'project-circular',
386+
deepnoteNotebookId: 'notebook-1'
387+
}
388+
};
389+
390+
// Should successfully serialize even with circular references
391+
const result = await serializer.serializeNotebook(notebookData as any, {} as any);
392+
393+
assert.instanceOf(result, Uint8Array);
394+
const yamlString = new TextDecoder().decode(result);
395+
assert.include(yamlString, 'project-circular');
396+
});
397+
});
398+
328399
suite('integration scenarios', () => {
329400
test('should maintain independence between serializer instances', () => {
330401
const manager1 = new DeepnoteNotebookManager();

0 commit comments

Comments
 (0)