Skip to main content
Export your Prem API key as API_KEY before running any script. Set API_BASE_URL (defaults to https://localhost). Customize USER_INSTRUCTIONS based on your video’s subject matter for better results.
1

Create project and generate synthetic dataset

const { project_id } = await api('/api/v1/public/projects/create', {
  method: 'POST',
  headers: { 'Content-Type': 'application/json' },
  body: JSON.stringify({ name: 'YouTube AI Project', goal: 'Train from video content' })
});

const formData = new FormData();
formData.append('project_id', project_id);
formData.append('name', 'YouTube Dataset');
formData.append('youtube_urls[0]', YOUTUBE_URL);
formData.append('pairs_to_generate', '50');
formData.append('pair_type', 'qa');
formData.append('user_instructions', USER_INSTRUCTIONS);

const { dataset_id } = await api('/api/v1/public/datasets/create-synthetic', {
  method: 'POST',
  body: formData
});
Create a project, then submit YouTube URLs to generate synthetic Q&A pairs. The generation can take 5-10 minutes.
2

Wait for dataset generation

let dataset;
let checks = 0;
do {
  await sleep(5000);
  dataset = await api(`/api/v1/public/datasets/${dataset_id}`);
  if (checks++ % 6 === 0) {
    console.log(`Status: ${dataset.status}, ${dataset.datapoints_count} datapoints`);
  }
} while (dataset.status === 'processing');
Poll the dataset status every 5 seconds. Log progress every 30 seconds to avoid spam.
3

Create snapshot and get recommendations

const { snapshot_id } = await api('/api/v1/public/snapshots/create', {
  method: 'POST',
  headers: { 'Content-Type': 'application/json' },
  body: JSON.stringify({ dataset_id, split_percentage: 80 })
});

await api('/api/v1/public/recommendations/generate', {
  method: 'POST',
  headers: { 'Content-Type': 'application/json' },
  body: JSON.stringify({ snapshot_id, reasoning: false })
});

let recs;
do {
  await sleep(5000);
  recs = await api(`/api/v1/public/recommendations/${snapshot_id}?reasoning=false`);
} while (recs.status === 'processing');
Create a snapshot with 80/20 train/validation split, then request model recommendations.
4

Launch fine-tuning job

const experiments = recs.recommended_models
  .filter((m: any) => m.recommended)
  .map((m: any) => ({
    base_model_id: m.baseModelId,
    batch_size: m.lora_hyperparameters.batchSize,
    learning_rate_multiplier: m.lora_hyperparameters.learningRateMultiplier,
    n_epochs: m.lora_hyperparameters.nEpochs,
    lora: true
  }));

const { job_id } = await api('/api/v1/public/finetuning/create', {
  method: 'POST',
  headers: { 'Content-Type': 'application/json' },
  body: JSON.stringify({ snapshot_id, name: 'YouTube Model', reasoning: false, experiments })
});
Filter recommended models and create a fine-tuning job using LoRA hyperparameters.
5

Monitor job progress

for (let i = 0; i < 30; i++) {
  await sleep(10000);
  const job = await api(`/api/v1/public/finetuning/${job_id}`);
  console.log(`Status: ${job.status}`);
  job.experiments.forEach((e: any) => {
    console.log(`  - Exp #${e.experiment_number}: ${e.status} ${e.model_id || ''}`);
  });
  if (job.status !== 'processing') break;
}
Poll job status every 10 seconds. Each experiment shows its progress and final model ID.

Full Example

#!/usr/bin/env bun

/**
 * Example 2: YouTube synthetic dataset workflow
 * 1. Create project → 2. Generate synthetic data from YouTube → 3. Create snapshot → 4. Get recommendations → 5. Run finetuning
 */

const API_BASE_URL = process.env.API_BASE_URL || 'https://localhost';
const API_KEY = process.env.API_KEY;
const YOUTUBE_URL = 'https://www.youtube.com/watch?v=51y4KatMBFI';

const USER_INSTRUCTIONS =
	'Generate detailed question-answer pairs about the key concepts, main topics, important details, and technical information presented in the video. Focus on extracting specific facts, explanations, definitions, examples, and insights that are directly mentioned or demonstrated. Create questions that test understanding of the core subject matter and provide comprehensive answers based on the video content.';

// Disable TLS verification for local development with self-signed certs
process.env.NODE_TLS_REJECT_UNAUTHORIZED = '0';

if (!API_KEY) {
	console.error('Error: API_KEY environment variable is required');
	console.error('Please create a .env file based on .env.example');
	process.exit(1);
}

async function api(endpoint: string, options: any = {}): Promise<any> {
	const res = await fetch(`${API_BASE_URL}${endpoint}`, {
		...options,
		headers: { Authorization: `Bearer ${API_KEY}`, ...options.headers },
	});
	if (!res.ok) {
		const err: any = await res.json().catch(() => ({}));
		const errorMsg = typeof err.error === 'string' ? err.error : JSON.stringify(err);
		throw new Error(`${res.status}: ${errorMsg}`);
	}
	return res.json();
}

function sleep(ms: number) {
	return new Promise((r) => setTimeout(r, ms));
}

async function main() {
	console.log('\n=== YouTube Synthetic Workflow ===\n');

	// 1. Create project
	console.log('1. Creating project...');
	const { project_id } = await api('/api/v1/public/projects/create', {
		method: 'POST',
		headers: { 'Content-Type': 'application/json' },
		body: JSON.stringify({ name: 'YouTube AI Project', goal: 'Train from video content' }),
	});
	console.log(`   ✓ Project: ${project_id}\n`);

	// 2. Generate synthetic dataset
	console.log('2. Generating synthetic dataset from YouTube...');
	console.log(`   URL: ${YOUTUBE_URL}`);
	const formData = new FormData();
	formData.append('project_id', project_id);
	formData.append('name', 'YouTube Dataset');
	formData.append('youtube_urls[0]', YOUTUBE_URL);
	formData.append('pairs_to_generate', '50');
	formData.append('pair_type', 'qa');
	formData.append('user_instructions', USER_INSTRUCTIONS);

	const { dataset_id } = await api('/api/v1/public/datasets/create-synthetic', {
		method: 'POST',
		body: formData,
	});
	console.log(`   ✓ Dataset: ${dataset_id}`);

	// Wait for dataset (can take several minutes)
	console.log('   Waiting for generation (may take 5-10 minutes)...');
	let dataset;
	let checks = 0;
	do {
		await sleep(5000);
		dataset = await api(`/api/v1/public/datasets/${dataset_id}`);
		if (checks++ % 6 === 0) {
			console.log(`   Status: ${dataset.status}, ${dataset.datapoints_count} datapoints`);
		}
	} while (dataset.status === 'processing');
	console.log(`   ✓ Ready: ${dataset.datapoints_count} datapoints\n`);

	// 3. Create snapshot
	console.log('3. Creating snapshot...');
	const { snapshot_id } = await api('/api/v1/public/snapshots/create', {
		method: 'POST',
		headers: { 'Content-Type': 'application/json' },
		body: JSON.stringify({ dataset_id, split_percentage: 80 }),
	});
	console.log(`   ✓ Snapshot: ${snapshot_id}\n`);

	// 4. Generate recommendations
	console.log('4. Generating recommendations...');
	await api('/api/v1/public/recommendations/generate', {
		method: 'POST',
		headers: { 'Content-Type': 'application/json' },
		body: JSON.stringify({ snapshot_id, reasoning: false }),
	});

	let recs;
	do {
		await sleep(5000);
		recs = await api(`/api/v1/public/recommendations/${snapshot_id}?reasoning=false`);
	} while (recs.status === 'processing');

	console.log(`   ✓ Recommended models:`);
	const recommendedCount = recs.recommended_models.filter((m: any) => m.recommended).length;
	console.log(`   Total models: ${recs.recommended_models.length}, Recommended: ${recommendedCount}`);
	recs.recommended_models.forEach((m: any) => {
		if (m.recommended) console.log(`     - ${m.baseModelId}`);
	});
	console.log();

	// 5. Create finetuning job
	console.log('5. Creating finetuning job...');
	const experiments = recs.recommended_models
		.filter((m: any) => m.recommended)
		.map((m: any) => ({
			base_model_id: m.baseModelId,
			batch_size: m.lora_hyperparameters.batchSize,
			learning_rate_multiplier: m.lora_hyperparameters.learningRateMultiplier,
			n_epochs: m.lora_hyperparameters.nEpochs,
			lora: true,
		}));

	if (experiments.length === 0) {
		console.error('\n✗ Error: No recommended models found. Cannot create finetuning job.');
		process.exit(1);
	}

	const { job_id } = await api('/api/v1/public/finetuning/create', {
		method: 'POST',
		headers: { 'Content-Type': 'application/json' },
		body: JSON.stringify({ snapshot_id, name: 'YouTube Model', reasoning: false, experiments }),
	});
	console.log(`   ✓ Job: ${job_id}\n`);

	// 6. Monitor (5 minutes max)
	console.log('6. Monitoring job...');
	for (let i = 0; i < 30; i++) {
		await sleep(10000);
		const job = await api(`/api/v1/public/finetuning/${job_id}`);
		console.log(`   Status: ${job.status}`);
		job.experiments.forEach((e: any) => {
			console.log(`     - Exp #${e.experiment_number}: ${e.status} ${e.model_id || ''}`);
		});
		if (job.status !== 'processing') break;
	}

	console.log('\n✓ Done!\n');
}

main().catch((err) => {
	console.error('\n✗ Error:', err.message);
	process.exit(1);
});