Export your Prem API key as
API_KEY before running any script. Set API_BASE_URL (defaults to https://localhost).
Place your PDF file as By-Laws.pdf in the same directory or update the PDF_FILE variable.1
Create project and generate synthetic dataset from PDF
Copy
Ask AI
const { project_id } = await api('/api/v1/public/projects/create', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ name: 'PepsiCo By-Laws Project', goal: 'Train on corporate governance' })
});
const formData = new FormData();
formData.append('project_id', project_id);
formData.append('name', 'PepsiCo By-Laws Dataset');
const pdfFile = file(PDF_FILE);
formData.append('files[]', pdfFile, PDF_FILE);
formData.append('pairs_to_generate', '100');
formData.append('pair_type', 'qa');
formData.append('user_instructions', 'Generate question-answer pairs about PepsiCo internal rules, governance policies, corporate by-laws, board procedures, voting requirements, and organizational structure. Focus on specific rules, requirements, and procedures mentioned in the document.');
const { dataset_id } = await api('/api/v1/public/datasets/create-synthetic', {
method: 'POST',
body: formData
});
2
Wait for dataset generation
Copy
Ask AI
let dataset;
let checks = 0;
do {
await sleep(5000);
dataset = await api(`/api/v1/public/datasets/${dataset_id}`);
if (checks++ % 6 === 0) {
console.log(`Status: ${dataset.status}, ${dataset.datapoints_count} datapoints`);
}
} while (dataset.status === 'processing');
3
Create snapshot and get recommendations
Copy
Ask AI
const { snapshot_id } = await api('/api/v1/public/snapshots/create', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ dataset_id, split_percentage: 80 })
});
await api('/api/v1/public/recommendations/generate', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ snapshot_id, reasoning: false })
});
let recs;
do {
await sleep(5000);
recs = await api(`/api/v1/public/recommendations/${snapshot_id}?reasoning=false`);
} while (recs.status === 'processing');
4
Launch fine-tuning job
Copy
Ask AI
const experiments = recs.recommended_models
.filter((m: any) => m.recommended)
.map((m: any) => ({
base_model_id: m.baseModelId,
batch_size: m.lora_hyperparameters.batchSize,
learning_rate_multiplier: m.lora_hyperparameters.learningRateMultiplier,
n_epochs: m.lora_hyperparameters.nEpochs,
lora: true
}));
const { job_id } = await api('/api/v1/public/finetuning/create', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ snapshot_id, name: 'PepsiCo By-Laws Model', reasoning: false, experiments })
});
5
Monitor job progress
Copy
Ask AI
for (let i = 0; i < 30; i++) {
await sleep(10000);
const job = await api(`/api/v1/public/finetuning/${job_id}`);
console.log(`Status: ${job.status}`);
job.experiments.forEach((e: any) => {
console.log(` - Exp #${e.experiment_number}: ${e.status} ${e.model_id || ''}`);
});
if (job.status !== 'processing') break;
}
Full Example
Copy
Ask AI
#!/usr/bin/env bun
/**
* Example 3: PDF synthetic dataset workflow
* 1. Create project → 2. Generate synthetic data from PDF → 3. Create snapshot → 4. Get recommendations → 5. Run finetuning
*/
import { file } from 'bun';
const API_BASE_URL = process.env.API_BASE_URL || 'https://localhost';
const API_KEY = process.env.API_KEY;
const PDF_FILE = 'By-Laws.pdf';
// Disable TLS verification for local development with self-signed certs
process.env.NODE_TLS_REJECT_UNAUTHORIZED = '0';
if (!API_KEY) {
console.error('Error: API_KEY environment variable is required');
console.error('Please create a .env file based on .env.example');
process.exit(1);
}
async function api(endpoint: string, options: any = {}): Promise<any> {
const res = await fetch(`${API_BASE_URL}${endpoint}`, {
...options,
headers: { Authorization: `Bearer ${API_KEY}`, ...options.headers },
});
if (!res.ok) {
const err: any = await res.json().catch(() => ({}));
const errorMsg = typeof err.error === 'string' ? err.error : JSON.stringify(err);
throw new Error(`${res.status}: ${errorMsg}`);
}
return res.json();
}
function sleep(ms: number) {
return new Promise((r) => setTimeout(r, ms));
}
async function main() {
console.log('\n=== PDF Synthetic Workflow ===\n');
// 1. Create project
console.log('1. Creating project...');
const { project_id } = await api('/api/v1/public/projects/create', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ name: 'PepsiCo By-Laws Project', goal: 'Train on corporate governance' }),
});
console.log(` ✓ Project: ${project_id}\n`);
// 2. Generate synthetic dataset from PDF
console.log('2. Generating synthetic dataset from PDF...');
console.log(` File: ${PDF_FILE}`);
const formData = new FormData();
formData.append('project_id', project_id);
formData.append('name', 'PepsiCo By-Laws Dataset');
const pdfFile = file(PDF_FILE);
formData.append('files[]', pdfFile, PDF_FILE);
formData.append('pairs_to_generate', '100');
formData.append('pair_type', 'qa');
formData.append(
'user_instructions',
'Generate question-answer pairs about PepsiCo internal rules, governance policies, corporate by-laws, board procedures, voting requirements, and organizational structure. Focus on specific rules, requirements, and procedures mentioned in the document.'
);
const { dataset_id } = await api('/api/v1/public/datasets/create-synthetic', {
method: 'POST',
body: formData,
});
console.log(` ✓ Dataset: ${dataset_id}`);
// Wait for dataset (can take several minutes)
console.log(' Waiting for generation (may take 5-10 minutes)...');
let dataset;
let checks = 0;
do {
await sleep(5000);
dataset = await api(`/api/v1/public/datasets/${dataset_id}`);
if (checks++ % 6 === 0) {
console.log(` Status: ${dataset.status}, ${dataset.datapoints_count} datapoints`);
}
} while (dataset.status === 'processing');
console.log(` ✓ Ready: ${dataset.datapoints_count} datapoints\n`);
// 3. Create snapshot
console.log('3. Creating snapshot...');
const { snapshot_id } = await api('/api/v1/public/snapshots/create', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ dataset_id, split_percentage: 80 }),
});
console.log(` ✓ Snapshot: ${snapshot_id}\n`);
// 4. Generate recommendations
console.log('4. Generating recommendations...');
await api('/api/v1/public/recommendations/generate', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ snapshot_id, reasoning: false }),
});
let recs;
do {
await sleep(5000);
recs = await api(`/api/v1/public/recommendations/${snapshot_id}?reasoning=false`);
} while (recs.status === 'processing');
console.log(` ✓ Recommended models:`);
const recommendedCount = recs.recommended_models.filter((m: any) => m.recommended).length;
console.log(` Total models: ${recs.recommended_models.length}, Recommended: ${recommendedCount}`);
recs.recommended_models.forEach((m: any) => {
if (m.recommended) console.log(` - ${m.baseModelId}`);
});
console.log();
// 5. Create finetuning job
console.log('5. Creating finetuning job...');
const experiments = recs.recommended_models
.filter((m: any) => m.recommended)
.map((m: any) => ({
base_model_id: m.baseModelId,
batch_size: m.lora_hyperparameters.batchSize,
learning_rate_multiplier: m.lora_hyperparameters.learningRateMultiplier,
n_epochs: m.lora_hyperparameters.nEpochs,
lora: true,
}));
if (experiments.length === 0) {
console.error('\n✗ Error: No recommended models found. Cannot create finetuning job.');
process.exit(1);
}
const { job_id } = await api('/api/v1/public/finetuning/create', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ snapshot_id, name: 'PepsiCo By-Laws Model', reasoning: false, experiments }),
});
console.log(` ✓ Job: ${job_id}\n`);
// 6. Monitor (5 minutes max)
console.log('6. Monitoring job...');
for (let i = 0; i < 30; i++) {
await sleep(10000);
const job = await api(`/api/v1/public/finetuning/${job_id}`);
console.log(` Status: ${job.status}`);
job.experiments.forEach((e: any) => {
console.log(` - Exp #${e.experiment_number}: ${e.status} ${e.model_id || ''}`);
});
if (job.status !== 'processing') break;
}
console.log('\n✓ Done!\n');
}
main().catch((err) => {
console.error('\n✗ Error:', err.message);
process.exit(1);
});