Building Your Own Kernel in C
A detailed guide to building a functional operating system kernel from scratch in C, covering bootloading, VGA display, keyboard input, preemptive multitasking, memory management, FAT16 filesystem, and system calls.
The author of this article documents the development of a functional operating system kernel written in C, marking a transition from previous Rust-based attempts. The author emphasizes that C provides greater flexibility compared to the Rust approach, enabling faster development cycles and more direct hardware control without borrow checker constraints.
Architecture and Boot Process
The kernel uses a custom assembly bootloader with Multiboot header compliance. The boot process begins with an assembly entry point that sets up the stack and transfers control to the C kernel main function.
; kernel.asm — entry point, Multiboot header
bits 32
section .text
;multiboot spec
align 4
dd 0x1BADB002 ; magic Multiboot
dd 0x00 ; flags
dd -(0x1BADB002 + 0x00) ; checksum
global start
extern kmain
start:
cli ; disable interrupts
mov esp, stack_top ; set up stack
call kmain ; jump to C kernel
;hlt ; halt processor
section .bss
resb 8192 ; reserve 8 KiB for stack
stack_top:
section .note.GNU-stack
; emptyThe initial C kernel is straightforward — just a main function with an infinite loop:
/* kernel.c */
void kmain(void)
{
/* Main infinite kernel loop */
for (;;)
{
asm volatile("hlt");
}
}Memory Layout
Memory is organized into distinct sections using a custom linker script. The .text section holds code, .data holds initialized data, .bss holds uninitialized data, .heap provides approximately 32 MB for kernel allocation, and .user reserves approximately 128 MB for user programs.
/* link.ld */
OUTPUT_FORMAT(elf32-i386)
ENTRY(start)
PHDRS
{
text PT_LOAD FLAGS(5); /* PF_R | PF_X */
data PT_LOAD FLAGS(6); /* PF_R | PF_W */
}
SECTIONS
{
. = 0x00100000;
.multiboot ALIGN(4) : { KEEP(*(.multiboot)) } :text
.text : {
*(.text)
*(.text*)
*(.rodata)
*(.rodata*)
} :text
.data : {
*(.data)
*(.data*)
} :data
.bss : {
*(.bss)
*(.bss*)
*(COMMON)
} :data
/* Simple heap area: 32 MiB right after .bss */
.heap : {
_heap_start = .;
. = . + 32 * 1024 * 1024; /* 32 MiB */
_heap_end = .;
} :data
/* User program space: 128 MiB reserved after heap */
.user : {
_user_start = .;
. = . + 128 * 1024 * 1024; /* 128 MiB */
_user_end = .;
} :data
}Display System
Text output leverages VGA memory at address 0xB8000, with each character occupying two bytes — one for the ASCII code and one for color attributes. The hardware cursor follows text output dynamically.
// vga/vga.c
void clean_screen(void)
{
uint8_t *vid = VGA_BUF;
for (unsigned int i = 0; i < 80 * 25 * 2; i += 2)
{
vid[i] = ' '; // character itself
vid[i + 1] = 0x07; // color attribute
}
}
uint8_t make_color(const uint8_t fore, const uint8_t back)
{
return (back << 4) | (fore & 0x0F);
}
void print_char(const char c,
const unsigned int x,
const unsigned int y,
const uint8_t fore,
const uint8_t back)
{
if (x >= VGA_WIDTH || y >= VGA_HEIGHT)
return;
uint8_t *vid = VGA_BUF;
uint8_t color = make_color(fore, back);
unsigned int offset = (y * VGA_WIDTH + x) * 2;
vid[offset] = (uint8_t)c;
vid[offset + 1] = color;
}
void print_string(const char *str,
const unsigned int x,
const unsigned int y,
const uint8_t fore,
const uint8_t back)
{
uint8_t *vid = VGA_BUF;
unsigned int offset = (y * VGA_WIDTH + x) * 2;
uint8_t color = make_color(fore, back);
unsigned int col = x;
for (uint32_t i = 0; str[i]; ++i)
{
char c = str[i];
if (c == '\t')
{
unsigned int spaces = TAB_SIZE - (col % TAB_SIZE);
for (unsigned int s = 0; s < spaces; s++)
{
vid[offset] = ' ';
vid[offset + 1] = color;
offset += 2;
col++;
}
}
else
{
vid[offset] = (uint8_t)c;
vid[offset + 1] = color;
offset += 2;
col++;
}
if (col >= VGA_WIDTH)
break;
}
}The hardware cursor position is updated through VGA I/O ports:
// vga/vga.c
#define VGA_CTRL 0x3D4
#define VGA_DATA 0x3D5
#define CURSOR_HIGH 0x0E
#define CURSOR_LOW 0x0F
#define VGA_WIDTH 80
#define VGA_HEIGHT 25
void update_hardware_cursor(uint8_t x, uint8_t y)
{
uint16_t pos = y * VGA_WIDTH + x;
outb(VGA_CTRL, CURSOR_HIGH);
outb(VGA_DATA, (pos >> 8) & 0xFF);
outb(VGA_CTRL, CURSOR_LOW);
outb(VGA_DATA, pos & 0xFF);
}Keyboard Input
Keyboard input feeds into a ring buffer rather than directly to the display. This decouples hardware input from applications, allowing multiple programs to access keyboard data through system calls. The keyboard handler processes scan codes, handles Shift and Caps Lock states, converts them to ASCII, and pushes characters into the buffer.
// keyboard/keyboard.c
#define KBD_BUF_SIZE 256
static bool shift_down = false;
static bool caps_lock = false;
static char kbd_buf[KBD_BUF_SIZE];
static volatile int kbd_head = 0;
static volatile int kbd_tail = 0;
char get_ascii_char(uint8_t scancode)
{
if (is_alpha(scancode))
{
bool upper = shift_down ^ caps_lock;
char base = scancode_to_ascii[(uint8_t)scancode];
return upper ? my_toupper(base) : base;
}
if (shift_down)
return scancode_to_ascii_shifted[(uint8_t)scancode];
else
return scancode_to_ascii[(uint8_t)scancode];
}
static inline unsigned long irq_save_flags(void)
{
unsigned long flags;
asm volatile("pushf; pop %0; cli" : "=g"(flags)::"memory");
return flags;
}
static inline void irq_restore_flags(unsigned long flags)
{
asm volatile("push %0; popf" ::"g"(flags) : "memory", "cc");
}
void kbd_buffer_push(char c)
{
unsigned long flags = irq_save_flags();
int next = (kbd_head + 1) % KBD_BUF_SIZE;
if (next != kbd_tail)
{
kbd_buf[kbd_head] = c;
kbd_head = next;
}
irq_restore_flags(flags);
}
char kbd_getchar(void)
{
unsigned long flags = irq_save_flags();
if (kbd_head == kbd_tail)
{
irq_restore_flags(flags);
return -1;
}
char c = (char)kbd_buf[kbd_tail];
kbd_tail = (kbd_tail + 1) % KBD_BUF_SIZE;
irq_restore_flags(flags);
return c;
}
void keyboard_handler(void)
{
uint8_t code = inb(KEYBOARD_PORT);
bool released = code & 0x80;
uint8_t key = code & 0x7F;
if (key == KEY_LSHIFT || key == KEY_RSHIFT)
{
shift_down = !released;
pic_send_eoi(1);
return;
}
if (key == KEY_CAPSLOCK && !released)
{
caps_lock = !caps_lock;
pic_send_eoi(1);
return;
}
if (!released)
{
char ch = get_ascii_char(key);
if (ch)
kbd_buffer_push(ch);
}
pic_send_eoi(1);
}The keyboard interrupt service routine is defined in assembly:
; interrupt/isr33.asm
[bits 32]
extern keyboard_handler
global isr33
isr33:
pusha
call keyboard_handler
popa
mov al, 0x20
out 0x20, al
iretdTimer and Timing
IRQ 32 (timer) fires at a configurable frequency, serving dual purposes: maintaining system time and triggering task switching for preemptive multitasking. The Programmable Interval Timer (PIT) is initialized as follows:
// time/timer.c
void init_timer(uint32_t frequency)
{
uint32_t divisor = 1193180 / frequency;
outb(0x43, 0x36);
outb(0x40, divisor & 0xFF);
outb(0x40, (divisor >> 8) & 0xFF);
}
init_timer(1000);The timer ISR handles context saving and switching via assembly:
; interrupt/isr32.asm
[bits 32]
global isr32
extern isr_timer_dispatch
isr32:
cli
push ds
push es
push fs
push gs
pusha
push dword 0
push dword 32
mov eax, esp
push eax
call isr_timer_dispatch
add esp, 4
mov esp, eax
pop eax
pop eax
popa
pop gs
pop fs
pop es
pop ds
iretdSystem Calls
Interrupt 0x80 provides the interface between user applications and kernel services, supporting I/O, memory management, process control, and filesystem access. The system call numbers are defined in a header:
// syscall/syscall.h
#define SYSCALL_PRINT_CHAR 0
#define SYSCALL_PRINT_STRING 1
#define SYSCALL_GET_TIME 2
#define SYSCALL_MALLOC 10
#define SYSCALL_REALLOC 11
#define SYSCALL_FREE 12
#define SYSCALL_KMALLOC_STATS 13
#define SYSCALL_GETCHAR 30
#define SYSCALL_SETPOSCURSOR 31
#define SYSCALL_POWER_OFF 100
#define SYSCALL_REBOOT 101
#define SYSCALL_TASK_CREATE 200
#define SYSCALL_TASK_LIST 201
#define SYSCALL_TASK_STOP 202
#define SYSCALL_REAP_ZOMBIES 203
#define SYSCALL_TASK_EXIT 204The trap gate assembly stub saves registers and passes six arguments to the C handler:
; interrupt/isr80.asm
[bits 32]
extern syscall_handler
global isr80
isr80:
cli
push edi
push esi
push ebp
push ebx
push edx
push ecx
push ebp ; a6
push edi ; a5
push esi ; a4
push edx ; a3
push ecx ; a2
push ebx ; a1
push eax ; num
call syscall_handler
add esp, 28
pop ecx
pop edx
pop ebx
pop ebp
pop esi
pop edi
iretThe C-side system call handler dispatches based on the system call number:
// syscall/syscall.c
uint32_t syscall_handler(
uint32_t num, uint32_t a1, uint32_t a2,
uint32_t a3, uint32_t a4, uint32_t a5, uint32_t a6)
{
switch (num)
{
case SYSCALL_PRINT_CHAR:
print_char((char)a1, a2, a3, (uint8_t)a4, (uint8_t)a5);
return 0;
case SYSCALL_PRINT_STRING:
print_string((const char *)a1, a2, a3, (uint8_t)a4, (uint8_t)a5);
return 0;
case SYSCALL_GET_TIME:
uint_to_str(seconds, str);
return (uint32_t)str;
case SYSCALL_MALLOC:
return (uint32_t)malloc((size_t)a1);
case SYSCALL_FREE:
free((void *)a1);
return 0;
case SYSCALL_REALLOC:
return (uint32_t)realloc((void *)a1, (size_t)a2);
case SYSCALL_KMALLOC_STATS:
if (a1)
get_kmalloc_stats((kmalloc_stats_t *)a1);
return 0;
case SYSCALL_GETCHAR:
{
char c = kbd_getchar();
if (c == -1) return '\0';
return c;
}
case SYSCALL_SETPOSCURSOR:
update_hardware_cursor((uint8_t)a1, (uint8_t)a2);
return 0;
case SYSCALL_POWER_OFF:
power_off();
return 0;
case SYSCALL_REBOOT:
reboot_system();
return 0;
case SYSCALL_TASK_CREATE:
task_create((void (*)(void))a1, (size_t)a2);
return 0;
case SYSCALL_TASK_LIST:
return task_list((task_info_t *)a1, a2);
case SYSCALL_TASK_STOP:
return task_stop((int)a1);
case SYSCALL_REAP_ZOMBIES:
reap_zombies();
return 0;
case SYSCALL_TASK_EXIT:
task_exit((int)a1);
return 0;
default:
return (uint32_t)-1;
}
}Memory Management
A custom allocator manages heap fragmentation through block headers containing magic numbers for corruption detection, size tracking, and coalescing of adjacent free blocks.
// malloc/malloc.c
#define ALIGN 8
#define MAGIC 0xB16B00B5U
typedef struct block_header
{
uint32_t magic;
size_t size; /* payload size in bytes */
int free; /* 1 if free, 0 if occupied */
struct block_header *prev;
struct block_header *next;
} block_header_t;Preemptive Multitasking
Tasks switch via timer interrupts using context-saving mechanisms. The round-robin scheduler maintains a circular task list, with each task having dedicated stack space and register preservation structures.
// multitask/multitask.c
void scheduler_init(void)
{
memset(&init_task, 0, sizeof(init_task));
init_task.pid = 0;
init_task.state = TASK_RUNNING;
init_task.regs = NULL;
init_task.kstack = NULL;
init_task.kstack_size = 0;
init_task.next = &init_task;
task_ring = &init_task;
current = NULL;
next_pid = 1;
}Each new task gets a stack frame set up with initial register values:
// multitask/multitask.c
sp[0] = 32; /* int_no (dummy) */
sp[1] = 0; /* err_code */
sp[2] = 0; /* EDI */
sp[3] = 0; /* ESI */
sp[4] = 0; /* EBP */
sp[5] = (uint32_t)sp; /* ESP_saved */
sp[6] = 0; /* EBX */
sp[7] = 0; /* EDX */
sp[8] = 0; /* ECX */
sp[9] = 0; /* EAX */
sp[10] = 0x10; /* DS */
sp[11] = 0x10; /* ES */
sp[12] = 0x10; /* FS */
sp[13] = 0x10; /* GS */
sp[14] = (uint32_t)entry; /* EIP */
sp[15] = 0x08; /* CS */
sp[16] = 0x202; /* EFLAGS: IF = 1 */The scheduler API is exposed through a header:
// multitask/multitask.h
void scheduler_init(void);
void task_create(void (*entry)(void), size_t stack_size);
void schedule_from_isr(uint32_t *regs, uint32_t **out_regs_ptr);
int task_list(task_info_t *buf, size_t max);
int task_stop(int pid);
void reap_zombies(void);
task_t *get_current_task(void);
void task_exit(int exit_code);File System
A FAT16 implementation on a RAM disk provides file storage with cluster chains and allocation tables. Programs load from disk into the .user memory region via dynamic allocation.
// fat16/fs.h
typedef struct
{
char name[FS_NAME_MAX];
char ext[FS_EXT_MAX];
int16_t parent;
uint16_t first_cluster;
uint32_t size;
uint8_t used;
uint8_t is_dir;
} fs_entry_t;The filesystem is initialized with a root directory entry:
// fat16/fs.c
void fs_init(void)
{
memset(entries, 0, sizeof(entries));
memset(&fat, 0, sizeof(fat));
entries[FS_ROOT_IDX].used = 1;
entries[FS_ROOT_IDX].is_dir = 1;
entries[FS_ROOT_IDX].parent = -1;
strncpy(entries[FS_ROOT_IDX].name, "/", FS_NAME_MAX - 1);
entries[FS_ROOT_IDX].name[FS_NAME_MAX - 1] = '\0';
entries[FS_ROOT_IDX].ext[0] = '\0';
entries[FS_ROOT_IDX].first_cluster = 0;
entries[FS_ROOT_IDX].size = 0;
}Cluster allocation finds the first free entry in the FAT table:
// fat16/fs.c
static uint16_t alloc_cluster(void)
{
for (uint16_t i = 2; i < FAT_ENTRIES; i++)
{
if (fat.entries[i] == 0)
{
fat.entries[i] = 0xFFFF;
return i;
}
}
return 0;
}The write function handles data across cluster chains:
// fat16/fs.c
size_t fs_write(uint16_t first_cluster, const void *buf, size_t size)
{
uint8_t *data = (uint8_t *)buf;
size_t cluster_size = BYTES_PER_SECTOR * SECTORS_PER_CLUSTER;
if (first_cluster < 2 || first_cluster >= FAT_ENTRIES)
return 0;
uint16_t cur = first_cluster;
size_t written = 0;
while (written < size)
{
uint8_t *clptr = get_cluster(cur);
size_t to_write = size - written;
if (to_write > cluster_size)
to_write = cluster_size;
memcpy(clptr, data + written, to_write);
written += to_write;
if (written < size)
{
if (fat.entries[cur] == 0xFFFF)
{
uint16_t nc = alloc_cluster();
if (nc == 0)
{
fat.entries[cur] = 0xFFFF;
return written;
}
fat.entries[cur] = nc;
cur = nc;
}
else
cur = fat.entries[cur];
}
else
{
fat.entries[cur] = 0xFFFF;
break;
}
}
return written;
}High-level file operations are provided:
// fat16/fs.h
int fs_write_file_in_dir(const char *name, const char *ext, int parent, const void *data, size_t size);
int fs_read_file_in_dir(const char *name, const char *ext, int parent, void *buf, size_t bufsize, size_t *out_size);
int fs_get_all_in_dir(fs_entry_t *out_files, int max_files, int parent);The RAM disk provides the underlying storage:
// ramdisk/ramdisk.c
#define RAMDISK_SIZE (64 * 1024 * 1024) // 64 MiB
static uint8_t ramdisk[RAMDISK_SIZE] = {0};
uint8_t *ramdisk_base(void)
{
return ramdisk;
}Conclusion
This project demonstrates that building a kernel from scratch in C is entirely achievable. The resulting kernel supports screen output, keyboard input, a timer, preemptive multitasking with round-robin scheduling, dynamic memory allocation, a FAT16 file system on a RAM disk, and system calls for user-space programs. The complete source code is available on GitHub.
FAQ
What is this article about in one sentence?
This article explains the core idea in practical terms and focuses on what you can apply in real work.
Who is this article for?
It is written for engineers, technical leaders, and curious readers who want a clear, implementation-focused explanation.
What should I read next?
Use the related articles below to continue with closely connected topics and concrete examples.