/*
 * Copyright (c) 1995, 1996, 1997, 1998 The University of Utah and
 * the Computer Systems Laboratory at the University of Utah (CSL).
 *
 * This file is part of Flick, the Flexible IDL Compiler Kit.
 *
 * Flick is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * Flick is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Flick; see the file COPYING.  If not, write to
 * the Free Software Foundation, 59 Temple Place #330, Boston, MA 02111, USA.
 */

#include <assert.h>
#include <mom/libmint.h>
#include <mom/c/libcast.h>
#include <mom/c/pbe_mem.hh>
#include <string.h>

/* This method overrides (actually extends)
   the default behavior of mu_array
   in order to generate more optimal marshaling/unmarshaling code
   for simple arrays of bytes that need no translation
   (i.e. a byte of message data maps to a byte in memory).
   It handles such arrays all in one shot
   by simply emitting a block copy statement.
   Arrays that don't meet the eligibility requirements
   simply get passed on to the default mu_array implementation,
   which iterates through the array elements individually.

   This code should really be able to optimize more varied data types this way:
   for example, a struct that contains only bytes should work too
   (or, for that matter, most any C data type
   that consists of just one chunk of memory
   and happens to be laid out in the same format
   as the message being marshaled/unmarshaled).
   This could be done by creating a generic method
   to be implemented (partly?) by more-specific code
   that recursively tests a complete itype/ctype/mapping subtree
   and returns true if the message and memory layouts are equivalent
   for that entire type subtree.
*/

static void mu_pointer(
	mem_mu_state *must,
	cast_expr ptr_expr,
	cast_type ptr_ctype,
	pres_c_allocation *ptr_alloc,
	cast_type target_ctype,
	mint_ref target_itype,
	pres_c_mapping target_map,
	cast_expr len_expr, cast_type len_ctype,
	unsigned long len_min, unsigned long len_max);

void mem_mu_state::mu_array(
	cast_expr array_expr,
	cast_type array_ctype,
	pres_c_allocation *array_alloc,
	cast_type elem_ctype,
	mint_ref elem_itype,
	pres_c_mapping elem_map,
	cast_expr len_expr, cast_type len_ctype,
	unsigned long len_min, unsigned long len_max)
{
	/* Bounds check the array length */
	if (len_ctype /* must be an actual array, not just a pointer */
	    && len_min != len_max /* and not a fixed array */) {
		if (len_max != (unsigned long) -1 /* not an unbounded array*/)
			add_stmt(cast_new_if(cast_new_binary_expr(
				CAST_BINARY_GT,
				(len_min != 0 ?
				 cast_new_expr_cast(
					 cast_new_binary_expr(
						 CAST_BINARY_SUB,
						 len_expr,
						 cast_new_expr_lit_int(
							 len_min,
							 CAST_MOD_UNSIGNED)),
					 cast_new_prim_type(CAST_PRIM_INT,
							    CAST_MOD_UNSIGNED
							    | CAST_MOD_LONG))
				 : len_expr),
				cast_new_expr_lit_int(
					len_max - len_min, CAST_MOD_UNSIGNED)),
					     make_error("FLICK_ERROR_OUT_OF_BOUNDS"),
					     0));
		else if (len_min != 0 /* bounded below */)
			add_stmt(cast_new_if(cast_new_binary_expr(
				CAST_BINARY_LT,
				len_expr,
				cast_new_expr_lit_int(
					len_min, CAST_MOD_UNSIGNED)),
					     make_error("FLICK_ERROR_OUT_OF_BOUNDS"),
					     0));
	}
	
	/* Before we do anything, see if we even need to...
	   out-of-line data can be marshaled very easily */
	if (strcmp(array_alloc->allocator, "out_of_line") == 0) {
		/* Out-of-line or "pass-by-pointer" m/u */
		assert (array_ctype->kind == CAST_TYPE_POINTER);
		if (elem_itype != 0) {
			/* Marshal the pointer itself
			   (as an unsigned 32-bit int) */
			mu_mapping_simple(
				array_expr,
				cast_new_prim_type(CAST_PRIM_INT,
						   CAST_MOD_UNSIGNED),
				pres->mint.standard_refs.unsigned32_ref);
			return;
		}
	}
	
	/* If this is just a pointer that needs dereferenced (and possibly
	   allocated), go ahead and do that now */
	if (!len_ctype) {
		/* Allocate space */
		if ((op & MUST_ALLOCATE) &&
		    (array_ctype->kind == CAST_TYPE_POINTER))
		    mu_pointer_alloc(array_expr, elem_ctype,
				     len_expr, array_alloc);
		
		mu_mapping(cast_new_unary_expr(CAST_UNARY_DEREF, array_expr),
			   elem_ctype, elem_itype, elem_map);
		
		/* Deallocate space */
		if ((op & MUST_DEALLOCATE) &&
		    (array_ctype->kind == CAST_TYPE_POINTER))
			mu_pointer_free(array_expr, elem_ctype,
					len_expr, array_alloc);
		return;
	}
	
	/* okay, we're handling some other type of pointer or array...
	   we need to figure out the optimal globbing and chunking. */
	
	int old_array_one_glob = array_one_glob;
	int elem_size;
	int elem_align_bits;
	unsigned array_glob_size;
	
	/* see if a bcopy is possible for this data */
	cast_expr bcopy_poss = mu_get_sizeof(elem_itype,
					     elem_ctype,
					     elem_map,
					     &elem_size,
					     &elem_align_bits);
	
	/* Supposing (conservatively) each array element needs only one byte,
	   if the maximum possible array would already be too long for one glob,
	   don't even bother taking a dry run to find the actual element size.
	   An array of zero-byte elements is actually possible (e.g. array of void),
	   but since it's useless, there's no need to bother optimizing that case.  */
	mem_mu_state *sub = 0;
	cast_expr *element_glob_size_expr;
	
	/* Take a dry run to determine whether or not
	   we can marshal the array as one big glob,
	   and if so, whether it can be merged into the current glob
	   or we need to start a new glob before the array.  */
	
	sub = (mem_mu_state*)clone();
	sub->break_glob();
	sub->make_glob();
	element_glob_size_expr = sub->glob_size_expr;
	assert(element_glob_size_expr);
	sub->array_one_glob = 1;
	/* we call the "stupid" mu_array here, since the optimizations
	   may not give us the correct size for the elements */
	sub->mu_state::mu_array(array_expr, array_ctype, array_alloc,
				elem_ctype, elem_itype, elem_map,
				len_expr, len_ctype,
				1, 1 /* just do one element */);
	
	if (elem_size < 0) 
		elem_size = sub->glob_size;
	array_glob_size = elem_size * len_max;
	
	/* First - check if the element glob was broken
	 * if it was broken, there's no way we'll fit the array in memory,
	 * so we just break the glob here...
	 * If it wasn't broken, but the array could be too long, break it, as well
	 */
	
	// unrestricted
	//      we have to break the glob and start a new glob for each element
	// element-per-chunk
	//	the outer glob should be (element_size * [actual length of array])
	// bounded-globbable
	//	the outer glob should be (element_glob_size * max length of array)
	
	
	/* Perhaps this illustrates it better:
	   UNRESTRICTED
	   sequence<not-globbable, whatever...>
	   end-glob
	   for (;;) {
	    new-glob
	    mu
	    end-glob
	   }
	   
	   UNRESTRICTED
	   sequence<one-glob(multiple chunk), (one-glob * max > max_glob_size)>
	   end-glob
	   for (;;) {
	    new-glob
	    mu
	    end-glob
	   }
	   
	   CHUNK_PER_ELEMENT
	   sequence<one-chunk, (one-chunk * max > max_glob_size)>
	   new-glob(_length * one-chunk)
	   for (;;) {
	    mu
	   }
	   
	   BOUNDED_GLOBBABLE
	   sequence<one-glob, globbable>
	   new-glob(_max * one-glob)
	   for (;;) {
	    mu
	   }
	   
	   BOUNDED_GLOBBABLE
	   sequence<one-chunk, globbable>
	   new-glob(_max * one-chunk)
	   for (;;) {
	    mu
	   }
	   */
	enum {UNRESTRICTED, CHUNK_PER_ELEMENT, BOUNDED_GLOBBABLE} arr_type;
	
	if (len_max <= 1) {
		/* 0 or 1 elements means we can fit it in a glob */
		arr_type = BOUNDED_GLOBBABLE;
	} else if (bcopy_poss && len_max >= max_glob_size) {
		arr_type = CHUNK_PER_ELEMENT;
	} else if (sub->glob_size_expr != element_glob_size_expr) {
		arr_type = UNRESTRICTED;
	} else if (!sub->elem_one_chunk) {
		arr_type = (array_glob_size > max_glob_size) ?
			   UNRESTRICTED :
			   BOUNDED_GLOBBABLE;
	} else {
		arr_type = (array_glob_size > max_glob_size) ?
			   CHUNK_PER_ELEMENT :
			   BOUNDED_GLOBBABLE;
	}
	
	array_one_glob = (arr_type != UNRESTRICTED);
	switch (arr_type) {
	case UNRESTRICTED:
		break_glob();
		break;
	case CHUNK_PER_ELEMENT:
		break_glob();
		if (array_glob_size > 0)
			make_glob();
		break;
	case BOUNDED_GLOBBABLE:
		if (glob_size + array_glob_size > max_glob_size)
			break_glob();
		if (array_glob_size > 0)
			make_glob();
		break;
	}
	
	int orig_glob_size = glob_size;
	
	/* m/u the pointer/array, looking for optimizations */
	mu_pointer(this, array_expr, array_ctype, array_alloc,
		   elem_ctype, elem_itype, elem_map,
		   len_expr, len_ctype,
		   len_min, len_max);
	
	switch (arr_type) {
	case UNRESTRICTED:
		break_glob();
		max_msg_size = MAXUINT_MAX;
		break;
		
	case CHUNK_PER_ELEMENT:
	{
		int arr_size;
		cast_expr *orig_glob_size_expr = glob_size_expr;
		unsigned orig_glob_size = glob_size;
		break_glob();
		// we change the glob expression to be the real glob expression
		cast_expr var_size;
		if (len_expr->kind == CAST_EXPR_LIT_PRIM &&
		    len_expr->cast_expr_u_u.lit_prim.u.kind == CAST_PRIM_INT) {
			var_size = cast_new_expr_lit_int(
				arr_size =
				len_expr->cast_expr_u_u.lit_prim.u.cast_lit_prim_u_u.i * elem_size,
				0);
			if (bcopy_poss && orig_glob_size &&
			    (*orig_glob_size_expr)->kind == CAST_EXPR_LIT_PRIM &&
			    (*orig_glob_size_expr)->cast_expr_u_u.lit_prim.u.kind == CAST_PRIM_INT) {
				var_size = cast_new_expr_lit_int(
					(*orig_glob_size_expr)->cast_expr_u_u.lit_prim.u.cast_lit_prim_u_u.i,
					0);
			}
			*orig_glob_size_expr = var_size;
			if ((max_msg_size + arr_size) < max_msg_size)
				/* overflow */
				max_msg_size = MAXUINT_MAX;
			else
				max_msg_size += arr_size;
		} else {
			var_size = cast_new_binary_expr(
				CAST_BINARY_MUL,
				cast_new_expr_lit_int(elem_size, 0),
				len_expr);
			if (bcopy_poss && orig_glob_size)
				*orig_glob_size_expr = cast_new_binary_expr(
					CAST_BINARY_ADD,
					*orig_glob_size_expr,
					var_size);
			else
				*orig_glob_size_expr = var_size;
			
			if ((max_msg_size + array_glob_size) < max_msg_size)
				/* overflow */
				max_msg_size = MAXUINT_MAX;
			else
				max_msg_size += array_glob_size;
		}
		break;
	}
	
	case BOUNDED_GLOBBABLE:
		if (bcopy_poss) {
			/* bcopy may add extra padding for alignment, but will
			   not add the array size to the glob, so do it now */
			glob_size += array_glob_size;
		} else {
			/* non-bcopy will update the glob_size for a single
			   element.  What we want is the whole array. */
			glob_size = orig_glob_size + array_glob_size;
		}
		break;
	}
	
	array_one_glob = old_array_one_glob;
	delete sub;
}


#ifdef min
#undef min
#endif

#define min(a, b) ((a) < (b)) ? (a) : (b)

static void mu_pointer(
	mem_mu_state *must,
	cast_expr ptr_expr,
	cast_type ptr_ctype,
	pres_c_allocation *ptr_alloc,
	cast_type target_ctype,
	mint_ref target_itype,
	pres_c_mapping target_map,
	cast_expr len_expr, cast_type len_ctype,
	unsigned long len_min, unsigned long len_max)
{
	cast_expr bcopy_poss;
	int target_size;
	int target_align_bits;

	/* special case of nothing to m/u */
	if (len_max == 0) {
		/* Allocate or dealloc space,
		   just don't do any marshaling */
		if ((must->op & MUST_ALLOCATE) &&
		    (ptr_ctype->kind == CAST_TYPE_POINTER))
			must->mu_pointer_alloc(ptr_expr, target_ctype,
					       len_expr, ptr_alloc);
		if ((must->op & MUST_DEALLOCATE) &&
		    (ptr_ctype->kind == CAST_TYPE_POINTER))
			must->mu_pointer_free(ptr_expr, target_ctype,
					      len_expr, ptr_alloc);
		
		/* Done -- no marshaling! */
		return;
	}
	
	/* see if a bcopy is possible for this data */
	bcopy_poss = must->mu_get_sizeof(target_itype, target_ctype,
					 target_map,
					 &target_size, &target_align_bits);
	
	cast_stmt bcopy_if = 0, bcopy_else = 0, bcopy_endif = 0;
	int starting_glob_size = -1;
	
	if (bcopy_poss) {
		/* Possible optimization for a bcopy.
		   Depending on transport and byte-ordering at compile
		   time, this optimization may or may not be used */
		
		bcopy_if = must->mu_bit_translation_necessary(0);
		assert(bcopy_if);
		assert(bcopy_if->kind == CAST_STMT_TEXT);
		
		/* If we're just going to spit out an #if 0, then
		   there's no reason to spit out this code. */
		if (strncmp("#if 0", bcopy_if->cast_stmt_u_u.text, 5) == 0) {
			bcopy_if = bcopy_else = bcopy_endif = 0;
		} else {
			bcopy_else = must->mu_bit_translation_necessary(1);
			assert(bcopy_else);
			assert(bcopy_else->kind == CAST_STMT_TEXT);
			bcopy_endif = must->mu_bit_translation_necessary(2);
			assert(bcopy_endif);
			assert(bcopy_endif->kind == CAST_STMT_TEXT);
		}
	}
	
	if (!bcopy_poss || bcopy_if) {
		/* Always create a chunk break before and after arrays;
		   each array element must be treated as one or more
		   separate chunks.
		   This really is only necessary when generating loops;
		   no loops are generated for pointers or 1-element arrays. */
		if (len_max > 1)
			must->break_chunk();
		starting_glob_size = must->glob_size;
	}
	
	if (bcopy_poss && bcopy_if) {
		must->add_stmt(bcopy_if);
		assert(bcopy_if->kind == CAST_STMT_TEXT);
		/* insert an "#if 0" around array_iter definition */
		must->add_direct_code(
			flick_asprintf("%s\n",
				       bcopy_if->cast_stmt_u_u.text));
	}
	
	/* only do it if we have to */
	if (!bcopy_poss || bcopy_if) {
		must->mu_state::mu_array(ptr_expr, ptr_ctype, ptr_alloc,
					 target_ctype, target_itype,
					 target_map,
					 len_expr, len_ctype,
					 len_min, len_max);
		if (bcopy_poss)
			/* to ensure we end up in the same chunking
			   state as the bcopy, make sure the chunk
			   is broken here */
			must->break_chunk();
	}
	
	if (bcopy_poss &&
	    (must->op & (MUST_ENCODE | MUST_DECODE))) {
		cast_expr macro, call = 0;
		
		if (bcopy_else) {
			must->add_stmt(bcopy_else);
			assert(bcopy_endif);
			assert(bcopy_endif->kind == CAST_STMT_TEXT);
			/* insert an "#if 0" around array_iter definition */
			must->add_direct_code(
				flick_asprintf(
					"%s\n",
					bcopy_endif->cast_stmt_u_u.text));
			/* set the glob size back to what it was
			   before we m/u-ed the array
			   (this ensures the correct glob size) */
			assert(starting_glob_size >= 0);
			must->glob_size = starting_glob_size;
		}
		
		cast_expr length_expr;
		if (len_expr->kind == CAST_EXPR_LIT_PRIM &&
		    len_expr->cast_expr_u_u.lit_prim.u.kind == CAST_PRIM_INT) {
			length_expr = cast_new_expr_lit_int(
				target_size *
				len_expr->cast_expr_u_u.lit_prim.u.cast_lit_prim_u_u.i,
				0);
		} else {
			length_expr = 			
				cast_new_binary_expr(CAST_BINARY_MUL,
						     bcopy_poss,
						     len_expr);
		}
		
		/* if we're decoding a variable-length array, we
		   probably just unmarshaled the length, and thus
		   can't use the same chunk we are in (doing so
		   would mean we use the array length to start
		   the same chunk in which we decode the length!) */
		if ((len_min != len_max) &&
		    (must->op & MUST_DECODE) &&
		    (must->chunk_size > 0))
			must->break_chunk();
		
		/* align ourselves for the upcoming data */
		assert(target_size >= 0);
		assert(target_align_bits >= 0);
		int offset = must->chunk_prim(target_align_bits, 0);
		cast_expr ofs_expr = cast_new_expr_lit_int(offset, 0);
		
		if ((must->op & MUST_DECODE)
		    /*
		     * ...and this object is `in'.  (XXX --- Bogus check.  What
		     * we really want to know is that the receiver won't try to
		     * modify or reallocate the object.)
		     */
		    && (must->current_param_dir == PRES_C_DIRECTION_IN)
		    /*
		     * ...and there's no special allocator
		     * NOTE: The reason we do not directly check the allocator
		     * ourselves is because of the `auto_alloc' hack.  We
		     * leverage off of get_allocator() to find the `true'
		     * allocator that will be used.
		     */
		    && (strcmp(must->get_allocator(ptr_alloc),
			       "auto_flick_alloc") == 0)
		    /*
		     * ...and we are responsible for allocating the object.
		     */
		    && ((ptr_alloc->flags & PRES_C_ALLOC_EVER)
			!= PRES_C_ALLOC_NEVER)
		    /*
		     * ...and we are responsible for later deallocating the
		     * object, too.
		     */
		    && ((ptr_alloc->flags & PRES_C_DEALLOC_EVER)
			== PRES_C_DEALLOC_ALWAYS)
		    /*
		     * ...and the presentation is through a pointer.
		     */
		    && (ptr_ctype->kind == CAST_TYPE_POINTER)) {
		       	/* Optimization for a pointer into the
			   message buffer */
			
			/* There is no need to allocate space here */
			
			macro = cast_new_expr_name(
				flick_asprintf("flick_%s_%s_msgptr",
					       must->get_encode_name(),
					       must->get_buf_name()));
			
			call = cast_new_expr_call(macro, 2);
			call->cast_expr_u_u.call.params.params_val[0]
				= ofs_expr;
			call->cast_expr_u_u.call.params.params_val[1]
				= ptr_expr;
			must->add_stmt(cast_new_stmt_expr(call));
			
		} else {
			/* Optimization for a bcopy */
			
			/* Allocate space */
			if ((must->op & MUST_ALLOCATE) &&
			    (ptr_ctype->kind == CAST_TYPE_POINTER))
				must->mu_pointer_alloc(ptr_expr, target_ctype,
						       len_expr, ptr_alloc);
			
			macro = cast_new_expr_name(
				flick_asprintf("flick_%s_%s_bcopy",
					       must->get_encode_name(),
					       must->get_buf_name()));
			
			call = cast_new_expr_call(macro, 3);
			call->cast_expr_u_u.call.params.params_val[0]
				= ofs_expr;
			call->cast_expr_u_u.call.params.params_val[1]
				= ptr_expr; /* destination array */
			call->cast_expr_u_u.call.params.params_val[2]
				= length_expr;
			must->add_stmt(cast_new_stmt_expr(call));
			
				/* Deallocate space */
			if ((must->op & MUST_DEALLOCATE) &&
			    (ptr_ctype->kind == CAST_TYPE_POINTER))
				must->mu_pointer_free(ptr_expr, target_ctype,
						      len_expr, ptr_alloc);
		}
		
		/* figure out what we know about the alignment at this point.
		   (This is normally taken care of by mu_array_elem(),
		   but the bcopy/msgptr optimizations sometimes
		   eliminate the need to ever run it, so do it now) */
		
		if (len_min == len_max) {
			/* constant length, so we can calculate
			   the exact size of the whole array */
			int mask = (1 << must->align_bits) - 1;
			int ofs = target_size * len_max + must->align_ofs;
			must->align_ofs = ofs & mask;
			must->chunk_size = offset + target_size * len_max;
		} else {
			/* variable length, but we know the
			   element size, so judge from that */
			int bits = 0;
			while ((target_size & (1 << bits)) == 0)
				bits++;
			must->align_bits = min(bits, must->align_bits);
			must->align_ofs &= ((1 << must->align_bits) - 1);
			cast_expr *chunk = must->chunk_size_expr;
			must->break_chunk();
			/* add to current chunk size if > 0 */
			if (offset > 0)
				*chunk = cast_new_binary_expr(CAST_BINARY_ADD,
							      *chunk,
							      length_expr);
			else
				*chunk = length_expr;
		}
		
		if (bcopy_endif) {
			/* have to break the chunk here so we can be
			   in the same chunking state as the other
			   case of the #if */
			must->break_chunk();
			must->add_stmt(bcopy_endif);
		}
	}
}

/* End of file. */
