// initialize the kernel's memory model and the page level allocator

#include <string.h>
#include <assert.h>
#include <stdio.h>
#include <flux/page.h>

#include <flux/machine/seg.h>
#include <flux/machine/proc_reg.h>
#include <flux/machine/gate_init.h>

#include <flux/machine/base_vm.h>

#include "entry.h"
#include "globals.h"
#include "config.h"
#include "irq.h"
#include "kmem.h"

//
// helper functions
//

// allocate a page from the end of the physical memory
static inline vm_offset_t stupid_alloc(vm_offset_t *mem_border)
{
  *mem_border -= PAGE_SIZE;
  
  return static_cast<vm_offset_t>
    (memset(static_cast<void *>(*mem_border), 0, PAGE_SIZE));
}

// allocate some bytes from a memory page
static inline vm_offset_t alloc_from_page(vm_offset_t *from, vm_offset_t size)
{
  vm_offset_t ret = *from;

  *from += (size + 0xf) & ~0xf;

  return ret;
}

// set CS (missing from OSKIT)
#define set_cs(cs) 				\
  asm volatile					\
    ("ljmp %0,$1f \n1:"				\
     : : "i" (cs));

//
// class kmem
//

void kmem::init(vm_offset_t mbi_pa)
{
  // there are several things to note here: first, we run before
  // main() has been started, i.e., before any constructors for static
  // objects have been run.  second, we can assume here that we still
  // have the physical memory mapped in one-to-one from bootstrap.c.
  // third, we can't allocate memory dynamically before we have set up
  // the kernel memory and init page table.
  // we also can assume that the "cpu" global variable has already
  // been initialized.

  // make a copy of the multiboot parameters
  kmbi = *static_cast<multiboot_info *>(phys_to_virt(mbi_pa));
  if (kmbi.flags & MULTIBOOT_CMDLINE)
    {
      strncpy(kcmdline, static_cast<char*>(phys_to_virt(kmbi.cmdline)),
	      sizeof(kcmdline));
      kcmdline[sizeof(kcmdline) - 1] = 0;
    }
  else
    kcmdline[0] = 0;

  // address of physical memory in our address space; this variable is
  // shared with the OSKIT
  phys_mem_va = mem_phys;

  // find the highest memory address
  himem = mem_max = 1024 * ( 1024 + kmbi.mem_upper );

  // allocate a page for processor data structures.  this page is
  // filled in later; however, because we start allocating from the
  // end of memory, we allocate this page early to increase the chance
  // that it is on the end of a 4MB page.  if this happens, we can map
  // it in as an 4MB page later; otherwise, we need to allocate a page
  // table just for this page.  the reason we need this page near the
  // end is that it must refer to the io_bitmap on the next 4MB-page
  // with a 16-bit pointer.
  vm_offset_t cpu_page = stupid_alloc(& himem);

  kdir = static_cast<pd_entry_t *>(phys_to_virt(stupid_alloc(& himem)));
  unsigned kd_entry = mem_phys >> 22;

  cpu_global = flag_global | ((cpu.feature_flags & CPUF_PAGE_GLOBAL_EXT) ? 
			      INTEL_PDE_GLOBAL : 0);

  // set up the kernel mapping for physical memory.  mark all pages as
  // referenced and modified (so when touching the respective pages
  // later, we save the CPU overhead of marking the pd/pt entries like
  // this)

  // we also set up a one-to-one virt-to-phys mapping for two reasons:
  // (1) so that we switch to the new page table early and re-use the
  // segment descriptors set up by bootstrap.c.  (we'll set up our own
  // descriptors later.)  (2) a one-to-one phys-to-virt mapping in the
  // kernel's page directory sometimes comes in handy
  for (vm_offset_t address = 0; address < mem_max; 
       address += SUPERPAGE_SIZE, kd_entry++)
    {
      if (cpu.feature_flags & CPUF_4MB_PAGES)
	{
	  kdir[kd_entry] = address | INTEL_PDE_SUPERPAGE 
	    | INTEL_PDE_VALID | INTEL_PDE_WRITE | INTEL_PDE_REF
	    | INTEL_PDE_MOD | cpu_global;
	}
      else
	{
	  pt_entry_t *t = static_cast<pt_entry_t *>(stupid_alloc(& himem));

	  kdir[kd_entry] = static_cast<vm_offset_t>(t)
	    | INTEL_PDE_VALID | INTEL_PDE_WRITE | INTEL_PDE_REF;
	  for (vm_offset_t a = address; a < address + SUPERPAGE_SIZE;
	       a += PAGE_SIZE)
	    {
	      t[(a >> PTESHIFT) & PTEMASK] = a | INTEL_PDE_VALID 
		| INTEL_PDE_WRITE | INTEL_PDE_REF
		| INTEL_PDE_MOD | cpu_global;
	    }
	}

      // add a one-to-one mapping
      kdir[(address >> PDESHIFT) & PDEMASK] = kdir[kd_entry];
    }

  // kernel mode should acknowledge write-protected page table entries
  set_cr0(get_cr0() | CR0_WP);

  // now switch to our new page table
  set_pdbr(virt_to_phys(kdir));

  // map the cpu_page we allocated earlier just before io_bitmap
  vm_offset_t cpu_page_vm;

  assert((static_cast<vm_offset_t>(io_bitmap) & SUPERPAGE_MASK) == 0);

  if ((cpu.feature_flags & CPUF_4MB_PAGES)
      && SUPERPAGE_SIZE - (cpu_page & SUPERPAGE_MASK) < 0x10000)
    {
      // can map as 4MB page because the cpu_page will land within a
      // 16-bit range from io_bitmap
      kdir[((static_cast<vm_offset_t>(io_bitmap) >> PDESHIFT) & PDEMASK) - 1]
	= (cpu_page & ~SUPERPAGE_MASK) | INTEL_PDE_SUPERPAGE 
	  | INTEL_PDE_VALID | INTEL_PDE_WRITE | INTEL_PDE_REF
	  | INTEL_PDE_MOD | cpu_global;

      cpu_page_vm = (cpu_page & SUPERPAGE_MASK) 
	+ (static_cast<vm_offset_t>(io_bitmap) - SUPERPAGE_SIZE);
    }
  else
    {
      pt_entry_t *t = static_cast<pt_entry_t *>(stupid_alloc(& himem));
      kdir[((static_cast<vm_offset_t>(io_bitmap) >> PDESHIFT) & PDEMASK) - 1]
	= static_cast<vm_offset_t>(t)
	  | INTEL_PDE_VALID | INTEL_PDE_WRITE | INTEL_PDE_REF;
      
      t[PTEMASK] = cpu_page | INTEL_PDE_VALID 
	| INTEL_PDE_WRITE | INTEL_PDE_REF
	| INTEL_PDE_MOD | cpu_global;

      cpu_page_vm = static_cast<vm_offset_t>(io_bitmap) - PAGE_SIZE;
    }

  // set up the x86 CPU's memory model
  pseudo_descriptor desc;

  // first initialize the interrupt descriptor table
  irq_t::init(0x20, 0x28);	// map hardware irqs to interrupt vecs 0x20-30

  // XXX we later want the IDT on a read-only page (preferably a code
  // page which is used often, for instance the page the entry.S code
  // is on) because of the Pentium F00F bug.

  idt = static_cast<x86_gate *>
    (alloc_from_page(& cpu_page_vm, idt_max * 8));

  gate_init(idt, idt_init_table, gdt_code_kernel);

  desc.limit = idt_max * 8 - 1;
  desc.linear_base = static_cast<vm_offset_t>(idt);
  set_idt(&desc);


  // now initialize the global descriptor table
  gdt = static_cast<x86_desc *>(alloc_from_page(& cpu_page_vm, gdt_max));
  tss = static_cast<x86_tss *>(alloc_from_page(& cpu_page_vm, 
					       sizeof(x86_tss)));
  
  // make sure kernel cs/ds and user cs/ds are placed in the same
  // cache line, respectively; pre-set all "accessed" flags so that
  // the CPU doesn't need to do this later
  fill_descriptor(gdt + gdt_tss/8, static_cast<vm_offset_t>(tss), 
		  sizeof(x86_tss) - 1, 
		  ACC_PL_K | ACC_TSS | ACC_A, 0);
  fill_descriptor(gdt + gdt_code_kernel/8, 0, 0xffffffff,
		  ACC_PL_K | ACC_CODE_R | ACC_A, SZ_32);
  fill_descriptor(gdt + gdt_data_kernel/8, 0, 0xffffffff,
		  ACC_PL_K | ACC_DATA_W | ACC_A, SZ_32);
  fill_descriptor(gdt + gdt_code_user/8, 0, 0xffffffff,
		  ACC_PL_U | ACC_CODE_R | ACC_A, SZ_32);
  fill_descriptor(gdt + gdt_data_user/8, 0, 0xffffffff,
		  ACC_PL_U | ACC_DATA_W | ACC_A, SZ_32);

  desc.limit = gdt_max - 1;
  desc.linear_base = static_cast<vm_offset_t>(gdt);
  set_gdt(&desc);
  set_ldt(0);

  set_cs(gdt_code_kernel);
  set_ds(gdt_data_user | SEL_PL_U);
  set_es(gdt_data_user | SEL_PL_U);
  set_fs(gdt_data_user | SEL_PL_U);
  set_gs(gdt_data_user | SEL_PL_U);
  set_ss(gdt_data_kernel);

  // and finally initialize the TSS
  tss->ss0 = gdt_data_kernel;
  tss->io_bit_map_offset = static_cast<vm_offset_t>(io_bitmap) 
    - static_cast<vm_offset_t>(tss);
  set_tr(gdt_tss);

  // CPU initialization done

  // allocate a generic zero page
  zero_page = stupid_alloc(& himem);

  // allocate the kernel info page
  extern char __crt_dummy__, _end; // defined by linker and in crt0.S

  kinfo = static_cast<l4_kernel_info_t*>(phys_to_virt(stupid_alloc(& himem)));

  // initialize kernel info page from prototype
  char *sub = strstr(kcmdline, " proto=");
  if (sub)
    {
      vm_offset_t proto;
      proto = strtoul(sub + 7, 0, 0);
      if (proto)
	{
	  memcpy(kinfo, phys_to_virt(proto), PAGE_SIZE);
	}
    }

  kinfo->magic = L4_KERNEL_INFO_MAGIC;
  kinfo->version = config::kernel_version_id;
  kinfo->main_memory.low = 0;
  kinfo->main_memory.high = mem_max;
  kinfo->reserved0.low = trunc_page(virt_to_phys(&__crt_dummy__));
  kinfo->reserved0.high = round_page(virt_to_phys(&_end));
  kinfo->semi_reserved.low = 1024 * kmbi.mem_lower;
  kinfo->semi_reserved.high = 1024 * 1024;

  kinfo->offset_version_strings = 0x10;
  strcpy(static_cast<char*>(kinfo) + (kinfo->offset_version_strings << 4),
	 config::kernel_version_string);
  
  kinfo->clock = 0;

  // now set up the kernel memory allocator

  void *kmem_base = 
    phys_to_virt(mem_max - mem_max * config::kernel_mem_per_cent / 100);
  vm_size_t kmem_size = phys_to_virt(himem) - kmem_base;

  lmm_init(&lmm);
  lmm_add_region(&lmm, &lmm_region_all, (void*)0, (vm_size_t)-1, 0, 0);
  lmm_add_free(&lmm, kmem_base, kmem_size);

  kinfo->reserved1.low = trunc_page(virt_to_phys(kmem_base));
  kinfo->reserved1.high = mem_max;
}
