Skip to main content
Export your Prem API key as API_KEY before running any script. Set API_BASE_URL (defaults to https://localhost). Place your PDF file as By-Laws.pdf in the same directory or update the PDF_FILE variable.
1

Create project and generate synthetic dataset from PDF

const { project_id } = await api('/api/v1/public/projects/create', {
  method: 'POST',
  headers: { 'Content-Type': 'application/json' },
  body: JSON.stringify({ name: 'PepsiCo By-Laws Project', goal: 'Train on corporate governance' })
});

const formData = new FormData();
formData.append('project_id', project_id);
formData.append('name', 'PepsiCo By-Laws Dataset');
const pdfFile = file(PDF_FILE);
formData.append('files[]', pdfFile, PDF_FILE);
formData.append('pairs_to_generate', '100');
formData.append('pair_type', 'qa');
formData.append('user_instructions', 'Generate question-answer pairs about PepsiCo internal rules, governance policies, corporate by-laws, board procedures, voting requirements, and organizational structure. Focus on specific rules, requirements, and procedures mentioned in the document.');

const { dataset_id } = await api('/api/v1/public/datasets/create-synthetic', {
  method: 'POST',
  body: formData
});
Create a project, then upload PDF files to generate synthetic Q&A pairs. The generation can take 5-10 minutes depending on document size.
2

Wait for dataset generation

let dataset;
let checks = 0;
do {
  await sleep(5000);
  dataset = await api(`/api/v1/public/datasets/${dataset_id}`);
  if (checks++ % 6 === 0) {
    console.log(`Status: ${dataset.status}, ${dataset.datapoints_count} datapoints`);
  }
} while (dataset.status === 'processing');
Poll the dataset status every 5 seconds. Log progress every 30 seconds to avoid spam.
3

Create snapshot and get recommendations

const { snapshot_id } = await api('/api/v1/public/snapshots/create', {
  method: 'POST',
  headers: { 'Content-Type': 'application/json' },
  body: JSON.stringify({ dataset_id, split_percentage: 80 })
});

await api('/api/v1/public/recommendations/generate', {
  method: 'POST',
  headers: { 'Content-Type': 'application/json' },
  body: JSON.stringify({ snapshot_id, reasoning: false })
});

let recs;
do {
  await sleep(5000);
  recs = await api(`/api/v1/public/recommendations/${snapshot_id}?reasoning=false`);
} while (recs.status === 'processing');
Create a snapshot with 80/20 train/validation split, then request model recommendations.
4

Launch fine-tuning job

const experiments = recs.recommended_models
  .filter((m: any) => m.recommended)
  .map((m: any) => ({
    base_model_id: m.baseModelId,
    batch_size: m.lora_hyperparameters.batchSize,
    learning_rate_multiplier: m.lora_hyperparameters.learningRateMultiplier,
    n_epochs: m.lora_hyperparameters.nEpochs,
    lora: true
  }));

const { job_id } = await api('/api/v1/public/finetuning/create', {
  method: 'POST',
  headers: { 'Content-Type': 'application/json' },
  body: JSON.stringify({ snapshot_id, name: 'PepsiCo By-Laws Model', reasoning: false, experiments })
});
Filter recommended models and create a fine-tuning job using LoRA hyperparameters.
5

Monitor job progress

for (let i = 0; i < 30; i++) {
  await sleep(10000);
  const job = await api(`/api/v1/public/finetuning/${job_id}`);
  console.log(`Status: ${job.status}`);
  job.experiments.forEach((e: any) => {
    console.log(`  - Exp #${e.experiment_number}: ${e.status} ${e.model_id || ''}`);
  });
  if (job.status !== 'processing') break;
}
Poll job status every 10 seconds. Each experiment shows its progress and final model ID.

Full Example

#!/usr/bin/env bun

/**
 * Example 3: PDF synthetic dataset workflow
 * 1. Create project → 2. Generate synthetic data from PDF → 3. Create snapshot → 4. Get recommendations → 5. Run finetuning
 */

import { file } from 'bun';

const API_BASE_URL = process.env.API_BASE_URL || 'https://localhost';
const API_KEY = process.env.API_KEY;
const PDF_FILE = 'By-Laws.pdf';

// Disable TLS verification for local development with self-signed certs
process.env.NODE_TLS_REJECT_UNAUTHORIZED = '0';

if (!API_KEY) {
	console.error('Error: API_KEY environment variable is required');
	console.error('Please create a .env file based on .env.example');
	process.exit(1);
}

async function api(endpoint: string, options: any = {}): Promise<any> {
	const res = await fetch(`${API_BASE_URL}${endpoint}`, {
		...options,
		headers: { Authorization: `Bearer ${API_KEY}`, ...options.headers },
	});
	if (!res.ok) {
		const err: any = await res.json().catch(() => ({}));
		const errorMsg = typeof err.error === 'string' ? err.error : JSON.stringify(err);
		throw new Error(`${res.status}: ${errorMsg}`);
	}
	return res.json();
}

function sleep(ms: number) {
	return new Promise((r) => setTimeout(r, ms));
}

async function main() {
	console.log('\n=== PDF Synthetic Workflow ===\n');

	// 1. Create project
	console.log('1. Creating project...');
	const { project_id } = await api('/api/v1/public/projects/create', {
		method: 'POST',
		headers: { 'Content-Type': 'application/json' },
		body: JSON.stringify({ name: 'PepsiCo By-Laws Project', goal: 'Train on corporate governance' }),
	});
	console.log(`   ✓ Project: ${project_id}\n`);

	// 2. Generate synthetic dataset from PDF
	console.log('2. Generating synthetic dataset from PDF...');
	console.log(`   File: ${PDF_FILE}`);
	const formData = new FormData();
	formData.append('project_id', project_id);
	formData.append('name', 'PepsiCo By-Laws Dataset');
	const pdfFile = file(PDF_FILE);
	formData.append('files[]', pdfFile, PDF_FILE);
	formData.append('pairs_to_generate', '100');
	formData.append('pair_type', 'qa');
	formData.append(
		'user_instructions',
		'Generate question-answer pairs about PepsiCo internal rules, governance policies, corporate by-laws, board procedures, voting requirements, and organizational structure. Focus on specific rules, requirements, and procedures mentioned in the document.'
	);

	const { dataset_id } = await api('/api/v1/public/datasets/create-synthetic', {
		method: 'POST',
		body: formData,
	});
	console.log(`   ✓ Dataset: ${dataset_id}`);

	// Wait for dataset (can take several minutes)
	console.log('   Waiting for generation (may take 5-10 minutes)...');
	let dataset;
	let checks = 0;
	do {
		await sleep(5000);
		dataset = await api(`/api/v1/public/datasets/${dataset_id}`);
		if (checks++ % 6 === 0) {
			console.log(`   Status: ${dataset.status}, ${dataset.datapoints_count} datapoints`);
		}
	} while (dataset.status === 'processing');
	console.log(`   ✓ Ready: ${dataset.datapoints_count} datapoints\n`);

	// 3. Create snapshot
	console.log('3. Creating snapshot...');
	const { snapshot_id } = await api('/api/v1/public/snapshots/create', {
		method: 'POST',
		headers: { 'Content-Type': 'application/json' },
		body: JSON.stringify({ dataset_id, split_percentage: 80 }),
	});
	console.log(`   ✓ Snapshot: ${snapshot_id}\n`);

	// 4. Generate recommendations
	console.log('4. Generating recommendations...');
	await api('/api/v1/public/recommendations/generate', {
		method: 'POST',
		headers: { 'Content-Type': 'application/json' },
		body: JSON.stringify({ snapshot_id, reasoning: false }),
	});

	let recs;
	do {
		await sleep(5000);
		recs = await api(`/api/v1/public/recommendations/${snapshot_id}?reasoning=false`);
	} while (recs.status === 'processing');

	console.log(`   ✓ Recommended models:`);
	const recommendedCount = recs.recommended_models.filter((m: any) => m.recommended).length;
	console.log(`   Total models: ${recs.recommended_models.length}, Recommended: ${recommendedCount}`);
	recs.recommended_models.forEach((m: any) => {
		if (m.recommended) console.log(`     - ${m.baseModelId}`);
	});
	console.log();

	// 5. Create finetuning job
	console.log('5. Creating finetuning job...');
	const experiments = recs.recommended_models
		.filter((m: any) => m.recommended)
		.map((m: any) => ({
			base_model_id: m.baseModelId,
			batch_size: m.lora_hyperparameters.batchSize,
			learning_rate_multiplier: m.lora_hyperparameters.learningRateMultiplier,
			n_epochs: m.lora_hyperparameters.nEpochs,
			lora: true,
		}));

	if (experiments.length === 0) {
		console.error('\n✗ Error: No recommended models found. Cannot create finetuning job.');
		process.exit(1);
	}

	const { job_id } = await api('/api/v1/public/finetuning/create', {
		method: 'POST',
		headers: { 'Content-Type': 'application/json' },
		body: JSON.stringify({ snapshot_id, name: 'PepsiCo By-Laws Model', reasoning: false, experiments }),
	});
	console.log(`   ✓ Job: ${job_id}\n`);

	// 6. Monitor (5 minutes max)
	console.log('6. Monitoring job...');
	for (let i = 0; i < 30; i++) {
		await sleep(10000);
		const job = await api(`/api/v1/public/finetuning/${job_id}`);
		console.log(`   Status: ${job.status}`);
		job.experiments.forEach((e: any) => {
			console.log(`     - Exp #${e.experiment_number}: ${e.status} ${e.model_id || ''}`);
		});
		if (job.status !== 'processing') break;
	}

	console.log('\n✓ Done!\n');
}

main().catch((err) => {
	console.error('\n✗ Error:', err.message);
	process.exit(1);
});