changeset 9023:4d49ebf4b433

8168849: aarch32: support soft-float platform Reviewed-by: enevill
author snazarki
date Thu, 27 Oct 2016 18:42:29 +0300
parents 51039f5f10ba
children bbff923bf024
files src/cpu/aarch32/vm/assembler_aarch32.hpp src/cpu/aarch32/vm/c1_Defs_aarch32.hpp src/cpu/aarch32/vm/c1_FrameMap_aarch32.cpp src/cpu/aarch32/vm/c1_LIRAssembler_aarch32.cpp src/cpu/aarch32/vm/c1_LIRGenerator_aarch32.cpp src/cpu/aarch32/vm/c1_LinearScan_aarch32.hpp src/cpu/aarch32/vm/c1_Runtime1_aarch32.cpp src/cpu/aarch32/vm/globals_aarch32.hpp src/cpu/aarch32/vm/interp_masm_aarch32.cpp src/cpu/aarch32/vm/interpreter_aarch32.cpp src/cpu/aarch32/vm/jniFastGetField_aarch32.cpp src/cpu/aarch32/vm/macroAssembler_aarch32.cpp src/cpu/aarch32/vm/macroAssembler_aarch32.hpp src/cpu/aarch32/vm/sharedRuntime_aarch32.cpp src/cpu/aarch32/vm/stubGenerator_aarch32.cpp src/cpu/aarch32/vm/templateInterpreter_aarch32.cpp src/cpu/aarch32/vm/templateTable_aarch32.cpp src/cpu/aarch32/vm/vm_version_aarch32.cpp src/cpu/aarch32/vm/vm_version_aarch32.hpp src/share/vm/c1/c1_LIR.hpp src/share/vm/c1/c1_LIRGenerator.cpp src/share/vm/c1/c1_LIRGenerator.hpp src/share/vm/c1/c1_LinearScan.cpp
diffstat 23 files changed, 1277 insertions(+), 580 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/aarch32/vm/assembler_aarch32.hpp	Tue Oct 04 13:21:19 2016 +0300
+++ b/src/cpu/aarch32/vm/assembler_aarch32.hpp	Thu Oct 27 18:42:29 2016 +0300
@@ -588,8 +588,7 @@
 };
 
 
-const int FPUStateSizeInWords = 16 * 2;
-
+const int FPUStateSizeInWords = FloatRegisterImpl::number_of_registers;
 
 class Assembler : public AbstractAssembler {
   void emit_long(jint x) {
--- a/src/cpu/aarch32/vm/c1_Defs_aarch32.hpp	Tue Oct 04 13:21:19 2016 +0300
+++ b/src/cpu/aarch32/vm/c1_Defs_aarch32.hpp	Thu Oct 27 18:42:29 2016 +0300
@@ -62,8 +62,8 @@
 
   // Number of registers killed by calls
   pd_nof_caller_save_cpu_regs_frame_map = 8,
-  pd_nof_caller_save_fpu_regs_frame_map = 32,
 
+  pd_nof_caller_save_fpu_regs_frame_map = pd_nof_fpu_regs_frame_map,
   // The following two constants need to be defined since they are referenced
   // from c1_FrameMap.hpp, but actually they are never used, so can be set to
   // arbitrary values.
@@ -81,15 +81,14 @@
   pd_first_cpu_reg = 0,
   pd_last_cpu_reg = 7,
   pd_first_fpu_reg = pd_nof_cpu_regs_frame_map,
-  pd_last_fpu_reg = pd_first_fpu_reg + 31,
-
+  pd_last_fpu_reg = pd_first_fpu_reg + pd_nof_fpu_regs_frame_map - 1,
   // Register allocator specific register numbers corresponding to first/last
   // CPU/FPU callee-saved registers. These constants are used in
   // LinearScan::is_caller_save() only.
   pd_first_callee_saved_cpu_reg = 4,
   pd_last_callee_saved_cpu_reg = 11,
-  pd_first_callee_saved_fpu_reg = pd_first_fpu_reg + 16,
-  pd_last_callee_saved_fpu_reg = pd_first_fpu_reg + 31
+  pd_first_callee_saved_fpu_reg = pd_first_fpu_reg + pd_nof_fpu_regs_frame_map/2,
+  pd_last_callee_saved_fpu_reg = pd_first_fpu_reg + pd_nof_fpu_regs_frame_map - 1
 };
 
 // This flag must be in sync with how the floating point registers are stored
--- a/src/cpu/aarch32/vm/c1_FrameMap_aarch32.cpp	Tue Oct 04 13:21:19 2016 +0300
+++ b/src/cpu/aarch32/vm/c1_FrameMap_aarch32.cpp	Thu Oct 27 18:42:29 2016 +0300
@@ -202,7 +202,12 @@
     opr = LIR_OprFact::address(new LIR_Address(sp_opr, st_off, type));
   } else if (r_1->is_Register()) {
     Register reg1 = r_1->as_Register();
-    if (type == T_LONG) {
+#ifdef HARD_FLOAT_CC
+    if (type == T_DOUBLE || type == T_FLOAT) {
+        ShouldNotReachHere();
+    } else
+#endif
+    if (type == T_LONG || type == T_DOUBLE) {
       assert(r_2->is_Register(), "wrong VMReg");
       Register reg2 = r_2->as_Register();
       opr = as_long_opr(reg1, reg2);
@@ -214,7 +219,6 @@
       opr = as_opr(reg1);
     }
   } else if (r_1->is_FloatRegister()) {
-    assert(type == T_DOUBLE || type == T_FLOAT, "wrong type");
     int num = r_1->as_FloatRegister()->encoding();
     if (type == T_FLOAT) {
       opr = LIR_OprFact::single_fpu(num);
--- a/src/cpu/aarch32/vm/c1_LIRAssembler_aarch32.cpp	Tue Oct 04 13:21:19 2016 +0300
+++ b/src/cpu/aarch32/vm/c1_LIRAssembler_aarch32.cpp	Thu Oct 27 18:42:29 2016 +0300
@@ -524,7 +524,7 @@
   switch (c->type()) {
     case T_INT: {
       assert(patch_code == lir_patch_none, "no patching handled here");
-      __ mov(dest->as_register(), c->as_jint());
+      __ mov(dest->as_register(), c->as_jint_bits());
       break;
     }
 
@@ -536,8 +536,8 @@
 
     case T_LONG: {
       assert(patch_code == lir_patch_none, "no patching handled here");
-      __ mov(dest->as_register_lo(), c->as_jint_lo());
-      __ mov(dest->as_register_hi(), c->as_jint_hi());
+      __ mov(dest->as_register_lo(), c->as_jint_lo_bits());
+      __ mov(dest->as_register_hi(), c->as_jint_hi_bits());
       break;
     }
 
@@ -560,30 +560,33 @@
     }
 
     case T_FLOAT: {
-#ifdef __ARM_PCS_VFP
-        if (__ operand_valid_for_float_immediate(c->as_jfloat())) {
-            __ vmov_f32(dest->as_float_reg(), c->as_jfloat());
+        if(dest->is_single_fpu()) {
+            if (__ operand_valid_for_float_immediate(c->as_jfloat())) {
+                __ vmov_f32(dest->as_float_reg(), c->as_jfloat());
+            } else {
+                __ lea(rscratch1, InternalAddress(float_constant(c->as_jfloat())));
+                __ vldr_f32(dest->as_float_reg(), Address(rscratch1));
+            }
         } else {
-            __ lea(rscratch1, InternalAddress(float_constant(c->as_jfloat())));
-            __ vldr_f32(dest->as_float_reg(), Address(rscratch1));
+            assert(patch_code == lir_patch_none, "no patching handled here");
+            __ mov(dest->as_register(), c->as_jint_bits());
         }
-#else
-#error "unimplemented"
-#endif
       break;
     }
 
     case T_DOUBLE: {
-#ifdef __ARM_PCS_VFP
-        if (__ operand_valid_for_double_immediate(c->as_jdouble())) {
-            __ vmov_f64(dest->as_double_reg(), c->as_jdouble());
+        if(dest->is_double_fpu()) {
+            if (__ operand_valid_for_double_immediate(c->as_jdouble())) {
+                __ vmov_f64(dest->as_double_reg(), c->as_jdouble());
+            } else {
+                __ lea(rscratch1, InternalAddress(double_constant(c->as_jdouble())));
+                __ vldr_f64(dest->as_double_reg(), Address(rscratch1));
+            }
         } else {
-            __ lea(rscratch1, InternalAddress(double_constant(c->as_jdouble())));
-            __ vldr_f64(dest->as_double_reg(), Address(rscratch1));
+            assert(patch_code == lir_patch_none, "no patching handled here");
+            __ mov(dest->as_register_lo(), c->as_jint_lo_bits());
+            __ mov(dest->as_register_hi(), c->as_jint_hi_bits());
         }
-#else
-#error "unimplemented"
-#endif
       break;
     }
 
@@ -705,36 +708,42 @@
       move_regs(src->as_register_lo(), dest->as_register());
       return;
     }
-    assert(src->is_single_cpu(), "must match");
-    if (src->type() == T_OBJECT) {
-      __ verify_oop(src->as_register());
-    }
-    move_regs(src->as_register(), dest->as_register());
-
-  } else if (dest->is_double_cpu()) {
-    if (src->type() == T_OBJECT || src->type() == T_ARRAY) {
-      // Surprising to me but we can see move of a long to t_object
-      __ verify_oop(src->as_register());
-      move_regs(src->as_register(), dest->as_register_lo());
-      __ mov(dest->as_register_hi(), 0);
-      return;
+    if(src->is_single_fpu()) {
+        __ vmov_f32(dest->as_register(), src->as_float_reg());
+    } else {
+        assert(src->is_single_cpu(), "must match");
+        if (src->type() == T_OBJECT) {
+          __ verify_oop(src->as_register());
+        }
+        move_regs(src->as_register(), dest->as_register());
     }
-    assert(src->is_double_cpu(), "must match");
-    Register f_lo = src->as_register_lo();
-    Register f_hi = src->as_register_hi();
-    Register t_lo = dest->as_register_lo();
-    Register t_hi = dest->as_register_hi();
-    assert(f_hi != f_lo, "must be different");
-    assert(t_hi != t_lo, "must be different");
-    check_register_collision(t_lo, &f_hi);
-    move_regs(f_lo, t_lo);
-    move_regs(f_hi, t_hi);
+  } else if (dest->is_double_cpu()) {
+      if(src->is_double_fpu()) {
+        __ vmov_f64(dest->as_register_lo(), dest->as_register_hi(), src->as_double_reg());
+      } else {
+        assert(src->is_double_cpu(), "must match");
+        Register f_lo = src->as_register_lo();
+        Register f_hi = src->as_register_hi();
+        Register t_lo = dest->as_register_lo();
+        Register t_hi = dest->as_register_hi();
+        assert(f_hi != f_lo, "must be different");
+        assert(t_hi != t_lo, "must be different");
+        check_register_collision(t_lo, &f_hi);
+        move_regs(f_lo, t_lo);
+        move_regs(f_hi, t_hi);
+      }
   } else if (dest->is_single_fpu()) {
-    __ vmov_f32(dest->as_float_reg(), src->as_float_reg());
-
+      if(src->is_single_cpu()) {
+        __ vmov_f32(dest->as_float_reg(), src->as_register());
+      } else {
+        __ vmov_f32(dest->as_float_reg(), src->as_float_reg());
+      }
   } else if (dest->is_double_fpu()) {
-    __ vmov_f64(dest->as_double_reg(), src->as_double_reg());
-
+      if(src->is_double_cpu()) {
+        __ vmov_f64(dest->as_double_reg(), src->as_register_lo(), src->as_register_hi());
+      } else {
+        __ vmov_f64(dest->as_double_reg(), src->as_double_reg());
+      }
   } else {
     ShouldNotReachHere();
   }
@@ -752,21 +761,12 @@
   } else if (src->is_double_cpu()) {
     Address dest_addr_LO = frame_map()->address_for_slot(dest->double_stack_ix(), lo_word_offset_in_bytes);
     __ strd(src->as_register_lo(), src->as_register_hi(), dest_addr_LO);
-
   } else if (src->is_single_fpu()) {
     Address dest_addr = frame_map()->address_for_slot(dest->single_stack_ix());
-#ifdef __ARM_PCS_VFP
     __ vstr_f32(src->as_float_reg(), dest_addr.safe_for(Address::IDT_FLOAT, _masm, rscratch1));
-#else
-#error "unimplemented"
-#endif
   } else if (src->is_double_fpu()) {
     Address dest_addr = frame_map()->address_for_slot(dest->double_stack_ix());
-#ifdef __ARM_PCS_VFP
     __ vstr_f64(src->as_double_reg(), dest_addr.safe_for(Address::IDT_DOUBLE, _masm, rscratch1));
-#else
-#error "unimplemented"
-#endif
   } else {
     ShouldNotReachHere();
   }
@@ -795,29 +795,13 @@
 
   int null_check_here = code_offset();
   switch (type) {
-    case T_FLOAT: {
-#ifdef __ARM_PCS_VFP
-      Address addr = as_Address(to_addr, Address::IDT_FLOAT);
-      null_check_here = code_offset();
-      __ vstr_f32(src->as_float_reg(), addr);
-#else
-#error "unimplemented"
-#endif
-      break;
-    }
-
-    case T_DOUBLE: {
-#ifdef __ARM_PCS_VFP
-      Address addr = as_Address(to_addr, Address::IDT_DOUBLE);
-      null_check_here = code_offset();
-      __ vstr_f64(src->as_double_reg(), addr);
-#else
-#error "unimplemented"
-#endif
-
-      break;
-    }
-
+    case T_FLOAT:
+        if(src->is_single_fpu()) {
+            Address addr = as_Address(to_addr, Address::IDT_FLOAT);
+            null_check_here = code_offset();
+            __ vstr_f32(src->as_float_reg(), addr);
+            break;
+        } // fall through at FPUless system
     case T_ARRAY:   // fall through
     case T_OBJECT:  // fall through
     case T_ADDRESS: // fall though
@@ -836,6 +820,13 @@
 //      __ str(src->as_register(), as_Address(to_addr));
       break;
 
+    case T_DOUBLE:
+        if(src->is_double_fpu()) {
+            Address addr = as_Address(to_addr, Address::IDT_DOUBLE);
+            null_check_here = code_offset();
+            __ vstr_f64(src->as_double_reg(), addr);
+            break;
+        } // fall through at FPUless system
     case T_LONG: {
       Address addr = as_Address_lo(to_addr, Address::IDT_LONG);
       null_check_here = code_offset();
@@ -882,21 +873,12 @@
   } else if (dest->is_double_cpu()) {
     Address src_addr_LO = frame_map()->address_for_slot(src->double_stack_ix(), lo_word_offset_in_bytes);
     __ ldrd(dest->as_register_lo(), dest->as_register_hi(), src_addr_LO);
-
   } else if (dest->is_single_fpu()) {
-#ifdef __ARM_PCS_VFP
     Address src_addr = frame_map()->address_for_slot(src->single_stack_ix());
     __ vldr_f32(dest->as_float_reg(), src_addr.safe_for(Address::IDT_FLOAT, _masm, rscratch1));
-#else
-#error "unimplemented"
-#endif
   } else if (dest->is_double_fpu()) {
-#ifdef __ARM_PCS_VFP
     Address src_addr = frame_map()->address_for_slot(src->double_stack_ix());
     __ vldr_f64(dest->as_double_reg(), src_addr.safe_for(Address::IDT_DOUBLE, _masm, rscratch1));
-#else
-#error "unimplemented"
-#endif
   } else {
     ShouldNotReachHere();
   }
@@ -944,28 +926,13 @@
   int null_check_here = code_offset();
 
   switch (type) {
-    case T_FLOAT: {
-#ifdef __ARM_PCS_VFP
-    Address addr = as_Address(from_addr, Address::IDT_FLOAT);
-    null_check_here = code_offset();
-    __ vldr_f32(dest->as_float_reg(), addr);
-#else
-#error "unimplemented"
-#endif
-      break;
-    }
-
-    case T_DOUBLE: {
-#ifdef __ARM_PCS_VFP
-    Address addr = as_Address(from_addr, Address::IDT_DOUBLE);
-    null_check_here = code_offset();
-    __ vldr_f64(dest->as_double_reg(), addr);
-#else
-#error "unimplemented"
-#endif
-      break;
-    }
-
+    case T_FLOAT:
+        if(dest->is_single_fpu()){
+            Address addr = as_Address(from_addr, Address::IDT_FLOAT);
+            null_check_here = code_offset();
+            __ vldr_f32(dest->as_float_reg(), addr);
+              break;
+        }  // fall through at FPUless systems
     case T_ARRAY:   // fall through
     case T_OBJECT:  // fall through
     case T_ADDRESS: // fall through
@@ -983,7 +950,13 @@
       ShouldNotReachHere();
 //      __ ldr(dest->as_register(), as_Address(from_addr));
       break;
-
+    case T_DOUBLE:
+        if(dest->is_double_fpu()){
+            Address addr = as_Address(from_addr, Address::IDT_DOUBLE);
+            null_check_here = code_offset();
+            __ vldr_f64(dest->as_double_reg(), addr);
+              break;
+        } // fall through at FPUless systems
     case T_LONG: {
       Address addr = as_Address_lo(from_addr, Address::IDT_LONG);
       null_check_here = code_offset();
@@ -1668,6 +1641,10 @@
   assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
 
   if (left->is_single_cpu()) {
+    assert(left->type() != T_FLOAT, "expect integer type");
+    assert(right->type() != T_FLOAT, "expect integer type");
+    assert(dest->type() != T_FLOAT, "expect integer type");
+
     Register lreg = left->as_register();
     Register dreg = as_reg(dest);
 
@@ -1717,6 +1694,10 @@
     }
 
   } else if (left->is_double_cpu()) {
+    assert(left->type() != T_DOUBLE, "expect integer type");
+    assert(right->type() != T_DOUBLE, "expect integer type");
+    assert(dest->type() != T_DOUBLE, "expect integer type");
+
     Register lreg_lo = left->as_register_lo();
     Register lreg_hi = left->as_register_hi();
 
@@ -1891,6 +1872,10 @@
 
 void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) {
   if (opr1->is_single_cpu()) {
+
+    assert(opr1->type() != T_FLOAT, "expect integer type");// softfp guard
+    assert(opr2->type() != T_FLOAT, "expect integer type");
+
     Register reg1 = as_reg(opr1);
     if (opr2->is_single_cpu()) {
       // cpu register - cpu register
@@ -2880,9 +2865,13 @@
 
 void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
   if (left->is_single_cpu()) {
+    assert(left->type() != T_FLOAT, "expect integer type");
+    assert(dest->type() != T_FLOAT, "expect integer type");
     assert(dest->is_single_cpu(), "expect single result reg");
     __ neg(dest->as_register(), left->as_register());
   } else if (left->is_double_cpu()) {
+    assert(left->type() != T_DOUBLE, "expect integer type");
+    assert(dest->type() != T_DOUBLE, "expect integer type");
     assert(dest->is_double_cpu(), "expect double result reg");
     const Register l_lo = left->as_register_lo();
     Register l_hi = left->as_register_hi();
@@ -2892,10 +2881,12 @@
   } else if (left->is_single_fpu()) {
     assert(dest->is_single_fpu(), "expect single float result reg");
     __ vneg_f32(dest->as_float_reg(), left->as_float_reg());
-  } else {
+  } else if (left->is_double_fpu()) {
     assert(left->is_double_fpu(), "expect double float operand reg");
     assert(dest->is_double_fpu(), "expect double float result reg");
     __ vneg_f64(dest->as_double_reg(), left->as_double_reg());
+  } else {
+      ShouldNotReachHere();
   }
 }
 
@@ -2931,7 +2922,9 @@
       const LIR_Opr long_tmp = FrameMap::long1_opr;
       __ lea(rscratch1, as_Address_lo(dest->as_address_ptr(), Address::IDT_LEA));
 
-      if (type == T_DOUBLE) {
+
+      if (src->is_double_fpu()) {
+        assert(type == T_DOUBLE, "invalid register allocation");
         // long0 reserved as temp by LinearScan::pd_add_temps
         __ vmov_f64(long_val->as_register_lo(), long_val->as_register_hi(), src->as_double_reg());
       } else {
@@ -2948,7 +2941,7 @@
       null_check_offset = __ offset();
       __ atomic_ldrd(long_val->as_register_lo(), long_val->as_register_hi(), rscratch1);
 
-      if (type == T_DOUBLE) {
+      if (dest->is_double_fpu()) {
         __ vmov_f64(dest->as_double_reg(), long_val->as_register_lo(), long_val->as_register_hi());
       } else {
         assert(type != T_LONG || dest->is_same_register(long_val), "T_LONG dest should be in long0 (by LIRGenerator)");
--- a/src/cpu/aarch32/vm/c1_LIRGenerator_aarch32.cpp	Tue Oct 04 13:21:19 2016 +0300
+++ b/src/cpu/aarch32/vm/c1_LIRGenerator_aarch32.cpp	Thu Oct 27 18:42:29 2016 +0300
@@ -44,6 +44,7 @@
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
 #include "vmreg_aarch32.inline.hpp"
+#include "vm_version_aarch32.hpp"
 
 #ifdef ASSERT
 #define __ gen()->lir(__FILE__, __LINE__)->
@@ -81,20 +82,44 @@
 LIR_Opr LIRGenerator::getThreadTemp()   { return LIR_OprFact::illegalOpr; }
 
 
+LIR_Opr LIRGenerator::java_result_register_for(ValueType* type, bool callee) {
+  LIR_Opr opr;
+  switch (type->tag()) {
+    case floatTag:
+        if(hasFPU()) {
+            opr = FrameMap::fpu0_float_opr;  break;;
+        }
+    case doubleTag:
+        if(hasFPU()) {
+            opr = FrameMap::fpu0_double_opr;  break;
+        }
+    default: opr = result_register_for(type, callee);
+  }
+  return opr;
+}
 LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) {
   LIR_Opr opr;
   switch (type->tag()) {
+    case floatTag:
+#ifdef HARD_FLOAT_CC
+        opr = FrameMap::fpu0_float_opr;  break;
+#endif
     case intTag:     opr = FrameMap::r0_opr;          break;
     case objectTag:  opr = FrameMap::r0_oop_opr;      break;
+    case doubleTag:
+#ifdef HARD_FLOAT_CC
+        opr = FrameMap::fpu0_double_opr;  break;
+#endif
     case longTag:    opr = FrameMap::long0_opr;        break;
-    case floatTag:   opr = FrameMap::fpu0_float_opr;  break;
-    case doubleTag:  opr = FrameMap::fpu0_double_opr;  break;
 
     case addressTag:
     default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr;
   }
-
+#ifndef HARD_FLOAT_CC
+  assert(type->is_float_kind() || opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch");
+#else
   assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch");
+#endif
   return opr;
 }
 
@@ -151,9 +176,17 @@
     return c->as_metadata() == (Metadata*) NULL;
 
   case T_FLOAT:
-    return Assembler::operand_valid_for_float_immediate(c->as_jfloat());
+    if( hasFPU()) {
+        return Assembler::operand_valid_for_float_immediate(c->as_jfloat());
+    } else {
+       return Assembler::operand_valid_for_add_sub_immediate(c->as_jint());
+    }
   case T_DOUBLE:
-    return Assembler::operand_valid_for_float_immediate(c->as_jdouble());
+    if( hasFPU()) {
+        return Assembler::operand_valid_for_float_immediate(c->as_jdouble());
+    } else {
+        return Assembler::operand_valid_for_add_sub_immediate(c->as_jlong());
+    }
   }
   return false;
 }
@@ -445,12 +478,24 @@
 
 
 void LIRGenerator::do_NegateOp(NegateOp* x) {
-
+#ifdef __SOFTFP__
+  if(x->x()->type()->is_float_kind() && !(hasFPU())) {
+      address entry;
+      if (x->x()->type()->is_float()) {
+          entry = CAST_FROM_FN_PTR(address, SharedRuntime::fneg);
+      } else {
+          entry = CAST_FROM_FN_PTR(address, SharedRuntime::dneg);
+      }
+      LIR_Opr result = call_runtime(x->x(), entry, x->type(), NULL);
+      set_result(x, result);
+  } else
+#endif
+  {
   LIRItem from(x->x(), this);
   from.load_item();
   LIR_Opr result = rlock_result(x);
   __ negate (from.result(), result);
-
+  }
 }
 
 // for  _fadd, _fmul, _fsub, _fdiv, _frem
@@ -458,60 +503,77 @@
 void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) {
 
   if (x->op() == Bytecodes::_frem || x->op() == Bytecodes::_drem) {
-    // float remainder is implemented as a direct call into the runtime
-    LIRItem right(x->x(), this);
-    LIRItem left(x->y(), this);
-
-    BasicTypeList signature(2);
-    if (x->op() == Bytecodes::_frem) {
-      signature.append(T_FLOAT);
-      signature.append(T_FLOAT);
-    } else {
-      signature.append(T_DOUBLE);
-      signature.append(T_DOUBLE);
-    }
-    CallingConvention* cc = frame_map()->c_calling_convention(&signature);
-
-    const LIR_Opr result_reg = result_register_for(x->type());
-    left.load_item_force(cc->at(1));
-    right.load_item();
-
-    __ move(right.result(), cc->at(0));
-
     address entry;
     if (x->op() == Bytecodes::_frem) {
       entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem);
     } else {
       entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem);
     }
-
-    LIR_Opr result = rlock_result(x);
-    __ call_runtime_leaf(entry, getThreadTemp(), result_reg, cc->args());
-    __ move(result_reg, result);
+    LIR_Opr result = call_runtime(x->x(), x->y(), entry, x->type(), NULL);
+    set_result(x, result);
 
     return;
   }
 
-  LIRItem left(x->x(),  this);
-  LIRItem right(x->y(), this);
-  LIRItem* left_arg  = &left;
-  LIRItem* right_arg = &right;
+  if(hasFPU()) {
+        LIRItem left(x->x(),  this);
+        LIRItem right(x->y(), this);
+        LIRItem* left_arg  = &left;
+        LIRItem* right_arg = &right;
+
+        // Always load right hand side.
+        right.load_item();
+
+        if (!left.is_register())
+          left.load_item();
 
-  // Always load right hand side.
-  right.load_item();
+        LIR_Opr reg = rlock(x);
+        LIR_Opr tmp = LIR_OprFact::illegalOpr;
+        if (x->is_strictfp() && (x->op() == Bytecodes::_dmul || x->op() == Bytecodes::_ddiv)) {
+          tmp = new_register(T_DOUBLE);
+        }
+
+        arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), NULL);
+
+        set_result(x, round_item(reg));
+  } else {
+#ifdef __SOFTFP__
+    address entry;
 
-  if (!left.is_register())
-    left.load_item();
-
-  LIR_Opr reg = rlock(x);
-  LIR_Opr tmp = LIR_OprFact::illegalOpr;
-  if (x->is_strictfp() && (x->op() == Bytecodes::_dmul || x->op() == Bytecodes::_ddiv)) {
-    tmp = new_register(T_DOUBLE);
+    switch (x->op()) {
+      case Bytecodes::_fmul:
+        entry = CAST_FROM_FN_PTR(address, SharedRuntime::fmul);
+        break;
+      case Bytecodes::_dmul:
+        entry = CAST_FROM_FN_PTR(address, SharedRuntime::dmul);
+        break;
+      case Bytecodes::_fdiv:
+        entry = CAST_FROM_FN_PTR(address, SharedRuntime::fdiv);
+        break;
+      case Bytecodes::_ddiv:
+        entry = CAST_FROM_FN_PTR(address, SharedRuntime::ddiv);
+        break;
+      case Bytecodes::_fadd:
+        entry = CAST_FROM_FN_PTR(address, SharedRuntime::fadd);
+        break;
+      case Bytecodes::_dadd:
+        entry = CAST_FROM_FN_PTR(address, SharedRuntime::dadd);
+        break;
+      case Bytecodes::_fsub:
+        entry = CAST_FROM_FN_PTR(address, SharedRuntime::fsub);
+        break;
+      case Bytecodes::_dsub:
+        entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsub);
+        break;
+      default:
+          ShouldNotReachHere();
+    }
+    LIR_Opr result = call_runtime(x->x(), x->y(),  entry,  x->type(), NULL);
+    set_result(x, result);
+#else
+    ShouldNotReachHere();// check your compiler settings
+#endif
   }
-
-  arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), NULL);
-
-  set_result(x, round_item(reg));
 }
 
 // for  _ladd, _lmul, _lsub, _ldiv, _lrem
@@ -782,12 +844,40 @@
   ValueTag tag = x->x()->type()->tag();
   left.load_item();
   right.load_item();
-  LIR_Opr reg = rlock_result(x);
 
   if (x->x()->type()->is_float_kind()) {
     Bytecodes::Code code = x->op();
-    __ fcmp2int(left.result(), right.result(), reg, (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl));
+    if(hasFPU()) {
+        LIR_Opr reg = rlock_result(x);
+        __ fcmp2int(left.result(), right.result(), reg, (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl));
+    } else {
+#ifdef __SOFTFP__
+        address entry;
+        switch (code) {
+        case Bytecodes::_fcmpl:
+          entry = CAST_FROM_FN_PTR(address, SharedRuntime::fcmpl);
+          break;
+        case Bytecodes::_fcmpg:
+          entry = CAST_FROM_FN_PTR(address, SharedRuntime::fcmpg);
+          break;
+        case Bytecodes::_dcmpl:
+          entry = CAST_FROM_FN_PTR(address, SharedRuntime::dcmpl);
+          break;
+        case Bytecodes::_dcmpg:
+          entry = CAST_FROM_FN_PTR(address, SharedRuntime::dcmpg);
+          break;
+        default:
+          ShouldNotReachHere();
+        }
+
+        LIR_Opr result = call_runtime(x->x(), x->y(),  entry,  x->type(), NULL);
+        set_result(x, result);
+#else
+        ShouldNotReachHere(); // check your compiler settings
+#endif
+    }
   } else if (x->x()->type()->tag() == longTag) {
+    LIR_Opr reg = rlock_result(x);
     __ lcmp2int(left.result(), right.result(), reg);
   } else {
     Unimplemented();
@@ -866,25 +956,29 @@
 
 void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
   switch (x->id()) {
+    default:
+        ShouldNotReachHere();
+        break;
     case vmIntrinsics::_dabs:
-    case vmIntrinsics::_dsqrt: {
-      assert(x->number_of_arguments() == 1, "wrong type");
-      LIRItem value(x->argument_at(0), this);
-      value.load_item();
-      LIR_Opr dst = rlock_result(x);
+    case vmIntrinsics::_dsqrt:
+        if(hasFPU()) {
+            assert(x->number_of_arguments() == 1, "wrong type");
+            LIRItem value(x->argument_at(0), this);
+            value.load_item();
+            LIR_Opr dst = rlock_result(x);
 
-      switch (x->id()) {
-      case vmIntrinsics::_dsqrt: {
-        __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
-        break;
-      }
-      case vmIntrinsics::_dabs: {
-        __ abs(value.result(), dst, LIR_OprFact::illegalOpr);
-        break;
-      }
-      }
-      break;
-    }
+            switch (x->id()) {
+            case vmIntrinsics::_dsqrt: {
+              __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
+              break;
+            }
+            case vmIntrinsics::_dabs: {
+              __ abs(value.result(), dst, LIR_OprFact::illegalOpr);
+              break;
+            }
+            }
+            break;
+      }// fall through for FPU less cores
     case vmIntrinsics::_dlog10: // fall through
     case vmIntrinsics::_dlog: // fall through
     case vmIntrinsics::_dsin: // fall through
@@ -895,6 +989,14 @@
 
       address runtime_entry = NULL;
       switch (x->id()) {
+#ifdef __SOFTFP__
+      case vmIntrinsics::_dabs:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dabs);
+        break;
+      case vmIntrinsics::_dsqrt:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsqrt);
+        break;
+#endif
       case vmIntrinsics::_dsin:
         runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
         break;
@@ -1044,15 +1146,43 @@
 // _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
 // _i2b, _i2c, _i2s
 void LIRGenerator::do_Convert(Convert* x) {
-  // insired by sparc port
+    address entry = NULL;
   switch (x->op()) {
+  case Bytecodes::_d2i:
+  case Bytecodes::_f2i:
+  case Bytecodes::_i2f:
+  case Bytecodes::_i2d:
+  case Bytecodes::_f2d:
+  case Bytecodes::_d2f:
+      if(hasFPU()) {
+          break;
+      }// fall through for FPU-less cores
   case Bytecodes::_d2l:
   case Bytecodes::_f2l:
   case Bytecodes::_l2d:
   case Bytecodes::_l2f: {
-    address entry;
 
     switch (x->op()) {
+#ifdef __SOFTFP__
+    case Bytecodes::_i2f:
+      entry = CAST_FROM_FN_PTR(address, SharedRuntime::i2f);
+      break;
+    case Bytecodes::_i2d:
+      entry = CAST_FROM_FN_PTR(address, SharedRuntime::i2d);
+      break;
+    case Bytecodes::_f2d:
+      entry = CAST_FROM_FN_PTR(address, SharedRuntime::f2d);
+      break;
+    case Bytecodes::_d2f:
+      entry = CAST_FROM_FN_PTR(address, SharedRuntime::d2f);
+      break;
+    case Bytecodes::_d2i:
+      entry = CAST_FROM_FN_PTR(address, SharedRuntime::d2i);
+      break;
+    case Bytecodes::_f2i:
+      entry = CAST_FROM_FN_PTR(address, SharedRuntime::f2i);
+      break;
+#endif
     case Bytecodes::_d2l:
       entry = CAST_FROM_FN_PTR(address, SharedRuntime::d2l);
       break;
@@ -1075,6 +1205,9 @@
   break;
 
   default:
+    break;
+}
+    if(NULL == entry) {
     LIRItem value(x->value(), this);
     value.load_item();
 
@@ -1277,7 +1410,6 @@
 void LIRGenerator::do_If(If* x) {
   assert(x->number_of_sux() == 2, "inconsistency");
   ValueTag tag = x->x()->type()->tag();
-  bool is_safepoint = x->is_safepoint();
 
   If::Condition cond = x->cond();
 
@@ -1317,15 +1449,41 @@
 
   LIR_Opr left = xin->result();
   LIR_Opr right = yin->result();
+  LIR_Condition lir_c = lir_cond(cond);
 
-  __ cmp(lir_cond(cond), left, right);
+#ifdef __SOFTFP__
+  if(x->x()->type()->is_float_kind() && !(hasFPU())) {// FPU-less cores
+    address entry;
+    bool unordered_flag = x->unordered_is_true() != (lir_c == lir_cond_greater || lir_c == lir_cond_lessEqual);
+    if (x->x()->type()->is_float()) {
+      entry = CAST_FROM_FN_PTR(address, unordered_flag ? SharedRuntime::fcmpg : SharedRuntime::fcmpl);
+    } else if (x->x()->type()->is_double()) {
+      entry = CAST_FROM_FN_PTR(address, unordered_flag ? SharedRuntime::dcmpg : SharedRuntime::dcmpl);
+    } else {
+        ShouldNotReachHere();
+    }
+
+    LIR_Opr fcmp_res = call_runtime(x->x(), x->y(), entry, intType, NULL);
+    LIR_Opr zero = LIR_OprFact::intConst(0);
+    __ cmp(lir_c, fcmp_res, zero);
+  } else
+#endif
+  {
+  __ cmp(lir_c, left, right);
+  }
+
   // Generate branch profiling. Profiling code doesn't kill flags.
   profile_branch(x, cond);
   move_to_phi(x->state());
   if (x->x()->type()->is_float_kind()) {
-    __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux());
-  } else {
-    __ branch(lir_cond(cond), right->type(), x->tsux());
+      if(hasFPU()) {
+        __ branch(lir_c, right->type(), x->tsux(), x->usux());
+      } else {
+        __ branch(lir_c, T_INT, x->tsux());
+      }
+  } else
+  {
+    __ branch(lir_c, right->type(), x->tsux());
   }
   assert(x->default_sux() == x->fsux(), "wrong destination above");
   __ jump(x->default_sux());
--- a/src/cpu/aarch32/vm/c1_LinearScan_aarch32.hpp	Tue Oct 04 13:21:19 2016 +0300
+++ b/src/cpu/aarch32/vm/c1_LinearScan_aarch32.hpp	Thu Oct 27 18:42:29 2016 +0300
@@ -94,7 +94,16 @@
 }
 
 inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) {
-  // The default logic is good enough for AArch32.
+#ifndef HARD_FLOAT_CC
+    BasicType type = cur->type();
+    if(!hasFPU()) {
+        if (type == T_FLOAT || type == T_DOUBLE) {
+            _first_reg = pd_first_cpu_reg;
+            _last_reg = FrameMap::last_cpu_reg();;
+            return true;
+        }
+    }
+#endif
   return false;
 }
 
--- a/src/cpu/aarch32/vm/c1_Runtime1_aarch32.cpp	Tue Oct 04 13:21:19 2016 +0300
+++ b/src/cpu/aarch32/vm/c1_Runtime1_aarch32.cpp	Thu Oct 27 18:42:29 2016 +0300
@@ -50,6 +50,7 @@
 #include "vmreg_aarch32.inline.hpp"
 #if INCLUDE_ALL_GCS
 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#include "vm_version_aarch32.hpp"
 #endif
 
 // Implementation of StubAssembler
@@ -211,7 +212,6 @@
 
 #define __ sasm->
 
-const int float_regs_as_doubles_size_in_slots = pd_nof_fpu_regs_frame_map * 2;
 
 // Stack layout for saving/restoring  all the registers needed during a runtime
 // call (this includes deoptimization)
@@ -223,7 +223,7 @@
 
 enum reg_save_layout {
   reg_save_s0,
-  reg_save_s31 = reg_save_s0 + 31,
+  reg_save_s31 = reg_save_s0 + FrameMap::nof_fpu_regs - 1,
   reg_save_pad, // to align to doubleword to simplify conformance to APCS
   reg_save_r0,
   reg_save_r1,
@@ -276,10 +276,11 @@
   oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r10), r10->as_VMReg());
   oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r11), r11->as_VMReg());
   oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r12), r12->as_VMReg());
-
-  for (int i = 0; i < 32; ++i) {
+  if (hasFPU()) {
+  for (int i = 0; i < FrameMap::nof_fpu_regs; ++i) {
     oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_s0 + i), as_FloatRegister(i)->as_VMReg());
   }
+  }
 
   return oop_map;
 }
@@ -291,7 +292,7 @@
   __ push(RegSet::range(r0, r12), sp);         // integer registers except lr & sp
   __ sub(sp, sp, 4);                           // align to 8 bytes
 
-  if (save_fpu_registers) {
+  if (save_fpu_registers && hasFPU()) {
     __ vstmdb_f64(sp, (1 << FrameMap::nof_fpu_regs / 2) - 1);
   } else {
     __ sub(sp, sp, FrameMap::nof_fpu_regs * 4);
@@ -301,7 +302,8 @@
 }
 
 static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) {
-  if (restore_fpu_registers) {
+
+  if (restore_fpu_registers  && hasFPU()) {
     __ vldmia_f64(sp, (1 << FrameMap::nof_fpu_regs / 2) - 1);
   } else {
     __ add(sp, sp, FrameMap::nof_fpu_regs * 4);
@@ -313,7 +315,7 @@
 
 static void restore_live_registers_except_r0(StubAssembler* sasm, bool restore_fpu_registers = true)  {
 
-  if (restore_fpu_registers) {
+  if (restore_fpu_registers  && hasFPU()) {
     __ vldmia_f64(sp, (1 << FrameMap::nof_fpu_regs / 2) - 1);
   } else {
     __ add(sp, sp, FrameMap::nof_fpu_regs * 4);
@@ -1313,4 +1315,22 @@
 
 #undef __
 
-const char *Runtime1::pd_name_for_address(address entry) { Unimplemented(); return 0; }
+const char *Runtime1::pd_name_for_address(address entry) {
+#ifdef __SOFTFP__
+#define FUNCTION_CASE(a, f) \
+  if ((intptr_t)a == CAST_FROM_FN_PTR(intptr_t, f))  return #f
+
+  FUNCTION_CASE(entry, SharedRuntime::i2f);
+  FUNCTION_CASE(entry, SharedRuntime::i2d);
+  FUNCTION_CASE(entry, SharedRuntime::f2d);
+  FUNCTION_CASE(entry, SharedRuntime::fcmpg);
+  FUNCTION_CASE(entry, SharedRuntime::fcmpl);
+  FUNCTION_CASE(entry, SharedRuntime::dcmpg);
+  FUNCTION_CASE(entry, SharedRuntime::dcmpl);
+  FUNCTION_CASE(entry, SharedRuntime::unordered_fcmple);
+  FUNCTION_CASE(entry, SharedRuntime::unordered_dcmple);
+#undef FUNCTION_CASE
+#endif
+
+  return "Unknown_Func_Ptr";
+}
--- a/src/cpu/aarch32/vm/globals_aarch32.hpp	Tue Oct 04 13:21:19 2016 +0300
+++ b/src/cpu/aarch32/vm/globals_aarch32.hpp	Thu Oct 27 18:42:29 2016 +0300
@@ -96,7 +96,9 @@
   product(bool, UseNeon, false,                                         \
           "Use Neon for CRC32 computation")                             \
   product(bool, UseCRC32, false,                                        \
-          "Use CRC32 instructions for CRC32 computation")
+          "Use CRC32 instructions for CRC32 computation")               \
+  product(bool, UseFPU, true, "Enable FPU utilization at floating point ops." \
+      "Affects SoftFP mode only.")
 
 
 #endif // CPU_AARCH32_VM_GLOBALS_AARCH32_HPP
--- a/src/cpu/aarch32/vm/interp_masm_aarch32.cpp	Tue Oct 04 13:21:19 2016 +0300
+++ b/src/cpu/aarch32/vm/interp_masm_aarch32.cpp	Thu Oct 27 18:42:29 2016 +0300
@@ -39,6 +39,8 @@
 #include "runtime/biasedLocking.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/thread.inline.hpp"
+#include "vm_version_aarch32.hpp"
+#include "register_aarch32.hpp"
 
 
 // Implementation of InterpreterMacroAssembler
@@ -107,14 +109,20 @@
                mov(rscratch1, 0);
                str(rscratch1, oop_addr);
                verify_oop(r0, state);               break;
+    case dtos:
+        if(hasFPU()) {
+            vldr_f64(d0, val_addr);              break;
+        }//fall through  otherwise
     case ltos: ldrd(r0, val_addr);                  break;
+    case ftos:
+        if(hasFPU()) {
+            vldr_f32(d0, val_addr);              break;
+        } //fall through  otherwise
     case btos:                                   // fall through
     case ztos:                                   // fall through
     case ctos:                                   // fall through
     case stos:                                   // fall through
     case itos: ldr(r0, val_addr);                   break;
-    case ftos: vldr_f32(d0, val_addr);              break;
-    case dtos: vldr_f64(d0, val_addr);              break;
     case vtos: /* nothing to do */                  break;
     default  : ShouldNotReachHere();
   }
@@ -353,8 +361,20 @@
   case stos:
   case itos: pop_i();                   break;
   case ltos: pop_l();                   break;
-  case ftos: pop_f();                   break;
-  case dtos: pop_d();                   break;
+  case ftos:
+    if(hasFPU()) {
+        pop_f();
+    } else {
+        pop_i();
+    }
+    break;
+  case dtos:
+    if(hasFPU()) {
+        pop_d();
+    } else {
+        pop_l();
+    }
+    break;
   case vtos: /* nothing to do */        break;
   default:   ShouldNotReachHere();
   }
@@ -371,8 +391,20 @@
   case stos:
   case itos: push_i();                  break;
   case ltos: push_l();                  break;
-  case ftos: push_f();                  break;
-  case dtos: push_d();                  break;
+  case ftos:
+    if(hasFPU()) {
+        push_f();
+    } else {
+        push_i();
+    }
+    break;
+  case dtos:
+    if(hasFPU()) {
+        push_d();
+    } else {
+        push_l();
+    }
+    break;
   case vtos: /* nothing to do */        break;
   default  : ShouldNotReachHere();
   }
--- a/src/cpu/aarch32/vm/interpreter_aarch32.cpp	Tue Oct 04 13:21:19 2016 +0300
+++ b/src/cpu/aarch32/vm/interpreter_aarch32.cpp	Thu Oct 27 18:42:29 2016 +0300
@@ -48,6 +48,7 @@
 #include "runtime/timer.hpp"
 #include "runtime/vframeArray.hpp"
 #include "utilities/debug.hpp"
+#include "vm_version_aarch32.hpp"
 #ifdef COMPILER1
 #include "c1/c1_Runtime1.hpp"
 #endif
@@ -161,18 +162,28 @@
 
   address entry_point = NULL;
   Register continuation = lr;
+  bool transcendental_entry = false;
+
   switch (kind) {
   case Interpreter::java_lang_math_abs:
     entry_point = __ pc();
-    __ vldr_f64(d0, Address(sp));
-    __ mov(sp, r4);
-    __ vabs_f64(d0, d0);
+      if(hasFPU()) {
+        __ vldr_f64(d0, Address(sp));
+        __ vabs_f64(d0, d0);
+      } else {
+        __ ldrd(r0, Address(sp));
+        transcendental_entry = true;
+      }
     break;
   case Interpreter::java_lang_math_sqrt:
     entry_point = __ pc();
-    __ vldr_f64(d0, Address(sp));
-    __ mov(sp, r4);
-    __ vsqrt_f64(d0, d0);
+    if(hasFPU()) {
+        __ vldr_f64(d0, Address(sp));
+        __ vsqrt_f64(d0, d0);
+    } else {
+        __ ldrd(r0, Address(sp));
+        transcendental_entry = true;
+    }
     break;
   case Interpreter::java_lang_math_sin :
   case Interpreter::java_lang_math_cos :
@@ -181,24 +192,40 @@
   case Interpreter::java_lang_math_log10 :
   case Interpreter::java_lang_math_exp :
     entry_point = __ pc();
+    transcendental_entry = true;
+#ifndef HARD_FLOAT_CC
+    __ ldrd(r0, Address(sp));
+#else
     __ vldr_f64(d0, Address(sp));
-    __ mov(sp, r4);
-    __ mov(r4, lr);
-    continuation = r4;  // The first callee-saved register
-    generate_transcendental_entry(kind);
+#endif //HARD_FLOAT_CC
     break;
   case Interpreter::java_lang_math_pow :
     entry_point = __ pc();
+    transcendental_entry = true;
+#ifndef HARD_FLOAT_CC
+    __ ldrd(r0, Address(sp, 2*Interpreter::stackElementSize));
+    __ ldrd(r2, Address(sp));
+#else
     __ vldr_f64(d0, Address(sp, 2*Interpreter::stackElementSize));
     __ vldr_f64(d1, Address(sp));
-    __ mov(sp, r4);
-    __ mov(r4, lr);
-    continuation = r4;
-    generate_transcendental_entry(kind);
+#endif //HARD_FLOAT_CC
     break;
   default:
-    ;
+    ShouldNotReachHere();
   }
+
+   __ mov(sp, r4);
+  if(transcendental_entry) {
+        __ mov(r4, lr);
+        continuation = r4;
+        generate_transcendental_entry(kind);
+#ifndef HARD_FLOAT_CC
+        if(hasFPU()) {
+            __ vmov_f64(d0, r0, r1);
+        }
+#endif
+  }
+
   if (entry_point) {
     __ b(continuation);
   }
@@ -218,6 +245,14 @@
 void InterpreterGenerator::generate_transcendental_entry(AbstractInterpreter::MethodKind kind) {
   address fn;
   switch (kind) {
+#ifdef __SOFTFP__
+  case  Interpreter::java_lang_math_abs:
+    fn = CAST_FROM_FN_PTR(address, SharedRuntime::dabs);
+    break;
+  case Interpreter::java_lang_math_sqrt:
+    fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsqrt);
+    break;
+#endif //__SOFTFP__
   case Interpreter::java_lang_math_sin :
     fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
     break;
--- a/src/cpu/aarch32/vm/jniFastGetField_aarch32.cpp	Tue Oct 04 13:21:19 2016 +0300
+++ b/src/cpu/aarch32/vm/jniFastGetField_aarch32.cpp	Thu Oct 27 18:42:29 2016 +0300
@@ -114,11 +114,14 @@
   __ ldr(rscratch2, rscratch2);
   __ cmp(rcounter, rscratch2);
 
+#ifdef HARD_FLOAT_CC
   switch (type) {
     case T_FLOAT:   __ vmov_f32(d0, result, Assembler::EQ); break;
     case T_DOUBLE:  __ vmov_f64(d0, r0, r1, Assembler::EQ); break; // Change me if result changes
     default:                                                break;
   }
+#endif//HARD_FLOAT_CC
+
   __ add(sp, sp, nargs * wordSize, Assembler::EQ); // Pop args if we don't need them.
   __ b(lr, Assembler::EQ);
 
--- a/src/cpu/aarch32/vm/macroAssembler_aarch32.cpp	Tue Oct 04 13:21:19 2016 +0300
+++ b/src/cpu/aarch32/vm/macroAssembler_aarch32.cpp	Thu Oct 27 18:42:29 2016 +0300
@@ -49,6 +49,7 @@
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
 #include "gc_implementation/g1/heapRegion.hpp"
+#include "vm_version_aarch32.hpp"
 #endif
 
 #ifdef PRODUCT
@@ -1703,13 +1704,22 @@
   // if fix this, update also RegisterSaved::save_live_registers and it's map
   push(0x1fff, sp); // integer registers except lr & sp & (aarch32 pc)
 
-  int nfloat = 16;
-  vstmdb_f64(sp, (1 << nfloat) - 1);
+  if(hasFPU()) {
+    const int nfloat = FPUStateSizeInWords / 2; // saved by pairs
+    vstmdb_f64(sp, (1 << nfloat) - 1);
+  } else {
+    sub(sp, sp, FPUStateSizeInWords * wordSize);
+  }
 }
 
 void MacroAssembler::pop_CPU_state() {
-  int nfloat = 16;
-  vldmia_f64(sp, (1 << nfloat) - 1);
+  if(hasFPU()) {
+    const int nfloat = FloatRegisterImpl::number_of_registers / 2;
+    vldmia_f64(sp, (1 << nfloat) - 1);
+  } else {
+    add(sp, sp, FPUStateSizeInWords * wordSize);
+  }
+
   pop(0x1fff, sp); // integer registers except lr & sp & (aarch32 pc)
   add(sp, sp, 4);
 }
@@ -2628,15 +2638,19 @@
 int machine_state_float_regset = 0b11;
 
 void MacroAssembler::save_machine_state() {
-  stmdb(sp, machine_state_regset);
-  vstmdb_f64(sp, machine_state_float_regset);
-  enter();
+    stmdb(sp, machine_state_regset);
+    if(hasFPU()) {
+        vstmdb_f64(sp, machine_state_float_regset);
+    }
+    enter();
 }
 
 void MacroAssembler::restore_machine_state() {
-  leave();
-  vldmia_f64(sp, machine_state_float_regset);
-  ldmia(sp, machine_state_regset);
+    leave();
+    if(hasFPU()) {
+        vldmia_f64(sp, machine_state_float_regset);
+    }
+    ldmia(sp, machine_state_regset);
 }
 
 void internal_internal_printf(const char *fmt, ...) {
@@ -3113,6 +3127,7 @@
 
   BIND(L_align_exit);
 
+  if(VM_Version::features() & FT_AdvSIMD) {
   if (UseNeon) {
       cmp(len, 32+12); // account for possible need for alignment
       b(L_cpu, Assembler::LT);
@@ -3201,6 +3216,7 @@
 
       add(len, len, 16);
   }
+  } // if FT_AdvSIMD
 
   BIND(L_cpu);
     subs(len, len, 8);
--- a/src/cpu/aarch32/vm/macroAssembler_aarch32.hpp	Tue Oct 04 13:21:19 2016 +0300
+++ b/src/cpu/aarch32/vm/macroAssembler_aarch32.hpp	Thu Oct 27 18:42:29 2016 +0300
@@ -580,9 +580,11 @@
 
 #define should_not_reach_here() should_not_reach_here_line(__FILE__, __LINE__)
   void should_not_reach_here_line(const char *file, int line) {
+#ifdef ASSERT
     mov(rscratch1, line);
     reg_printf_important(file);
     reg_printf_important(": %d", rscratch1);
+#endif
     stop("should_not_reach_here");
   }
 
--- a/src/cpu/aarch32/vm/sharedRuntime_aarch32.cpp	Tue Oct 04 13:21:19 2016 +0300
+++ b/src/cpu/aarch32/vm/sharedRuntime_aarch32.cpp	Thu Oct 27 18:42:29 2016 +0300
@@ -37,6 +37,8 @@
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/vframeArray.hpp"
 #include "vmreg_aarch32.inline.hpp"
+#include "register_aarch32.hpp"
+#include "vm_version_aarch32.hpp"
 #ifdef COMPILER1
 #include "c1/c1_Runtime1.hpp"
 #endif
@@ -147,11 +149,12 @@
   oop_map->set_callee_saved(VMRegImpl::stack2reg(r10_off + additional_frame_slots), r10->as_VMReg());
   // r11 saved in frame header as rfp, not map it here
   // r11 & r14 have special meaning (can't hold oop), so not map them
-
-  for (int i = 0; i < 31; ++i) {
+  if(hasFPU()) {
+  for (int i = 0; i < FPUStateSizeInWords; ++i) {
     oop_map->set_callee_saved(VMRegImpl::stack2reg(fpu_state_off + i + additional_frame_slots),
     as_FloatRegister(i)->as_VMReg());
   }
+  }
 
   return oop_map;
 }
@@ -170,9 +173,11 @@
   // restoration so only result registers need to be restored here.
 
 
-
+  if(hasFPU()) {
   // Restore fp result register
   __ vldr_f64(d0, Address(sp, offset_in_bytes(fpu_state_off)));
+  }
+
   // Restore integer result register
   __ ldr(r0, Address(sp, offset_in_bytes(r0_off)));
   __ ldr(r1, Address(sp, offset_in_bytes(r1_off)));
@@ -246,6 +251,16 @@
 
   for (int i = 0; i < total_args_passed; i++) {
     switch (sig_bt[i]) {
+    case T_FLOAT:
+        if(hasFPU()) {
+            if (fp_args < FP_ArgReg_N) {
+              regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
+            } else {
+              regs[i].set1(VMRegImpl::stack2reg(stk_args));
+              stk_args += 1;
+            }
+            break;
+        } // fallthough for no-FPU system
     case T_BOOLEAN:
     case T_CHAR:
     case T_BYTE:
@@ -266,6 +281,19 @@
       assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
       regs[i].set_bad();
       break;
+    case T_DOUBLE:
+        if(hasFPU()) {
+            assert(sig_bt[i + 1] == T_VOID, "expecting half");
+            fp_args = round_to(fp_args, 2);
+            if (fp_args < FP_ArgReg_N) {
+              regs[i].set2(FP_ArgReg[fp_args]->as_VMReg());
+              fp_args += 2;
+            } else {
+              regs[i].set2(VMRegImpl::stack2reg(stk_args));
+              stk_args += 2;
+            }
+            break;
+        } //fallthough for no-FPU system
     case T_LONG:
       assert(sig_bt[i + 1] == T_VOID, "expecting half");
       if (int_args + 1 < Argument::n_int_register_parameters_j) {
@@ -276,25 +304,6 @@
         stk_args += 2;
       }
       break;
-    case T_FLOAT:
-      if (fp_args < FP_ArgReg_N) {
-        regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
-      } else {
-        regs[i].set1(VMRegImpl::stack2reg(stk_args));
-        stk_args += 1;
-      }
-      break;
-    case T_DOUBLE:
-      assert(sig_bt[i + 1] == T_VOID, "expecting half");
-      fp_args = round_to(fp_args, 2);
-      if (fp_args < FP_ArgReg_N) {
-        regs[i].set2(FP_ArgReg[fp_args]->as_VMReg());
-        fp_args += 2;
-      } else {
-        regs[i].set2(VMRegImpl::stack2reg(stk_args));
-        stk_args += 2;
-      }
-      break;
     default:
       ShouldNotReachHere();
       break;
@@ -751,6 +760,7 @@
     static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
       c_rarg0, c_rarg1, c_rarg2, c_rarg3
     };
+#ifdef HARD_FLOAT_CC
     const int FP_ArgReg_N = 16;
     static const FloatRegister FP_ArgReg[] = {
       f0, f1, f2, f3,
@@ -759,9 +769,10 @@
       f12, f13, f14, f15,
     };
     unsigned long fp_free_mask = (1 << FP_ArgReg_N) - 1;
+    uint fp_args = 0;
+#endif //HARD_FLOAT_CC
 
     uint int_args = 0;
-    uint fp_args = 0;
     uint stk_args = 0;
 
     for (int i = 0; i < total_args_passed; i++) {
@@ -775,6 +786,10 @@
       case T_ARRAY:
       case T_ADDRESS:
       case T_METADATA:
+#ifndef HARD_FLOAT_CC
+      // soft FP case
+      case T_FLOAT:
+#endif
         if (int_args < Argument::n_int_register_parameters_c) {
           regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
         } else {
@@ -782,6 +797,10 @@
           stk_args += 1;
         }
         break;
+#ifndef HARD_FLOAT_CC
+      // soft FP case
+      case  T_DOUBLE:
+#endif
       case T_LONG:
         assert(sig_bt[i + 1] == T_VOID, "expecting half");
         if (int_args + 1 < Argument::n_int_register_parameters_c) {
@@ -799,6 +818,7 @@
           int_args = Argument::n_int_register_parameters_c;
         }
         break;
+#ifdef HARD_FLOAT_CC
       case T_FLOAT:
         if (fp_free_mask & ((1 << FP_ArgReg_N)-1)) {
           unsigned index = __builtin_ctz(fp_free_mask);
@@ -821,6 +841,7 @@
           stk_args += 2;
         }
         break;
+#endif //HARD_FLOAT_CC
       case T_VOID: // Halves of longs and doubles
         assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
         regs[i].set_bad();
@@ -931,24 +952,33 @@
 
 // A float arg may have to do float reg int reg conversion
 static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
-  if (src.first()->is_stack()) {
-    if (dst.first()->is_stack()) {
-      // stack to stack
-      // Have no vfp scratch registers, so copy via gpr
-      __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first())));
-      __ str(rscratch1, Address(sp, reg2offset_out(dst.first())));
+    if(hasFPU()) {
+        if (src.first()->is_stack()) {
+          if (dst.first()->is_stack()) {
+            // stack to stack
+            // Have no vfp scratch registers, so copy via gpr
+            __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first())));
+            __ str(rscratch1, Address(sp, reg2offset_out(dst.first())));
+          } else {
+            // stack to reg
+            __ vldr_f32(dst.first()->as_FloatRegister(), Address(rfp, reg2offset_in(src.first())));
+          }
+        } else if (dst.first()->is_stack()) {
+          // reg to stack
+          __ vstr_f32(src.first()->as_FloatRegister(), Address(sp, reg2offset_out(dst.first())));
+        } else {
+#ifndef HARD_FLOAT_CC
+            if(dst.first()->is_Register()) {
+                __ vmov_f32(dst.first()->as_Register(), src.first()->as_FloatRegister());
+            } else
+#endif
+            if (dst.first() != src.first()) {
+                 __ vmov_f32(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
+            }
+        }
     } else {
-      // stack to reg
-      __ vldr_f32(dst.first()->as_FloatRegister(), Address(rfp, reg2offset_in(src.first())));
+        move_int(masm, src, dst);
     }
-  } else if (dst.first()->is_stack()) {
-    // reg to stack
-    __ vstr_f32(src.first()->as_FloatRegister(), Address(sp, reg2offset_out(dst.first())));
-  } else {
-    if (dst.first() != src.first()) {
-      __ vmov_f32(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
-    }
-  }
 }
 
 // A long move
@@ -983,23 +1013,32 @@
 
 // A double move
 static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
-  if (src.first()->is_stack()) {
-    if (dst.first()->is_stack()) {
-      // stack to stack
-      // Have no vfp scratch registers, so copy via gpr
-      __ ldrd(rscratch1, rscratch2, Address(rfp, reg2offset_in(src.first())));
-      __ strd(rscratch1, rscratch2, Address(sp, reg2offset_out(dst.first())));
+  if(hasFPU()) {
+    if (src.first()->is_stack()) {
+      if (dst.first()->is_stack()) {
+        // stack to stack
+        // Have no vfp scratch registers, so copy via gpr
+        __ ldrd(rscratch1, rscratch2, Address(rfp, reg2offset_in(src.first())));
+        __ strd(rscratch1, rscratch2, Address(sp, reg2offset_out(dst.first())));
+      } else {
+        // stack to reg
+        __ vldr_f64(dst.first()->as_FloatRegister(), Address(rfp, reg2offset_in(src.first())));
+      }
+    } else if (dst.first()->is_stack()) {
+      // reg to stack
+      __ vstr_f64(src.first()->as_FloatRegister(), Address(sp, reg2offset_out(dst.first())));
     } else {
-      // stack to reg
-      __ vldr_f64(dst.first()->as_FloatRegister(), Address(rfp, reg2offset_in(src.first())));
-    }
-  } else if (dst.first()->is_stack()) {
-    // reg to stack
-    __ vstr_f64(src.first()->as_FloatRegister(), Address(sp, reg2offset_out(dst.first())));
+#ifndef HARD_FLOAT_CC
+        if(dst.first()->is_Register()) {
+            __ vmov_f64(dst.first()->as_Register(), dst.second()->as_Register(), src.first()->as_FloatRegister());
+        } else
+#endif
+        if (dst.first() != src.first()) {
+           __ vmov_f64(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
+        }
+      }
   } else {
-    if (dst.first() != src.first()) {
-      __ vmov_f64(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
-    }
+    long_move(masm, src, dst);
   }
 }
 
@@ -1008,17 +1047,21 @@
   // We always ignore the frame_slots arg and just use the space just below frame pointer
   // which by this time is free to use
   switch (ret_type) {
-  case T_FLOAT:
-    __ vstr_f32(f0, Address(rfp, -2 * wordSize));
-    break;
   case T_DOUBLE:
+#ifdef HARD_FLOAT_CC
     __ vstr_f64(d0, Address(rfp, -3 * wordSize));
     break;
+#endif//fall through otherwise
   case T_LONG:
     __ strd(r0, r1, Address(rfp, -3 * wordSize));
     break;
   case T_VOID:
     break;
+  case T_FLOAT:
+#ifdef HARD_FLOAT_CC
+    __ vstr_f32(f0, Address(rfp, -2 * wordSize));
+    break;
+#endif//fall through otherwise
   default:
     __ str(r0, Address(rfp, -2 * wordSize));
     break;
@@ -1029,17 +1072,21 @@
   // We always ignore the frame_slots arg and just use the space just below frame pointer
   // which by this time is free to use
   switch (ret_type) {
-  case T_FLOAT:
-    __ vldr_f32(d0, Address(rfp, -2 * wordSize));
-    break;
   case T_DOUBLE:
+#ifdef HARD_FLOAT_CC
     __ vldr_f64(d0, Address(rfp, -3 * wordSize));
     break;
+#endif//fall through otherwise
   case T_LONG:
     __ ldrd(r0, r1, Address(rfp, -3 * wordSize));
     break;
   case T_VOID:
     break;
+  case T_FLOAT:
+#ifdef HARD_FLOAT_CC
+    __ vldr_f32(d0, Address(rfp, -2 * wordSize));
+    break;
+#endif//fall through otherwise
   default:
     __ ldr(r0, Address(rfp, -2 * wordSize));
     break;
@@ -1053,7 +1100,9 @@
     if (args[i].first()->is_Register()) {
       x = x + args[i].first()->as_Register();
       ++saved_slots;
-    } else if (args[i].first()->is_FloatRegister()) {
+    }
+#ifdef HARD_FLOAT_CC
+    else if (args[i].first()->is_FloatRegister()) {
       FloatRegister fr = args[i].first()->as_FloatRegister();
 
       if (args[i].second()->is_FloatRegister()) {
@@ -1067,6 +1116,7 @@
         ++saved_slots;
       }
     }
+#endif//HARD_FLOAT_CC
   }
   __ push(x, sp);
   return saved_slots;
@@ -1085,7 +1135,9 @@
   for ( int i = first_arg ; i < arg_count ; i++ ) {
     if (args[i].first()->is_Register()) {
       ;
-    } else if (args[i].first()->is_FloatRegister()) {
+    }
+#ifdef HARD_FLOAT_CC
+    else if (args[i].first()->is_FloatRegister()) {
       FloatRegister fr = args[i].first()->as_FloatRegister();
 
       if (args[i].second()->is_FloatRegister()) {
@@ -1097,6 +1149,7 @@
         __ increment(sp, wordSize);
       }
     }
+#endif//HARD_FLOAT_CC
   }
 }
 
@@ -1443,9 +1496,11 @@
           case T_LONG: double_slots++; break;
           default:  ShouldNotReachHere();
         }
-      } else if (in_regs[i].first()->is_FloatRegister()) {
-        ShouldNotReachHere();
-      }
+      } else
+#ifdef HARD_FLOAT_CC
+          if (in_regs[i].first()->is_FloatRegister())
+#endif // HARD_FLOAT_CC
+            ShouldNotReachHere();
     }
     total_save_slots = double_slots * 2 + single_slots;
     // align the save area
@@ -1867,10 +1922,20 @@
   case T_BYTE   : __ sxtb(r0, r0);           break;
   case T_SHORT  : __ sxth(r0, r0);           break;
   case T_INT    :                                    break;
+  case T_FLOAT  :
+#ifndef HARD_FLOAT_CC
+      if(hasFPU()) {
+          __ vmov_f32(d0, r0);
+      }
+#endif
+      break;
   case T_DOUBLE :
-  case T_FLOAT  :
-    // Result is in d0 we'll save as needed
-    break;
+#ifndef HARD_FLOAT_CC
+      if(hasFPU()) {
+          __ vmov_f64(d0, r0, r1);
+      }
+#endif
+      break;
   case T_ARRAY:                 // Really a handle
   case T_OBJECT:                // Really a handle
       break; // can't de-handlize until after safepoint check
@@ -2424,7 +2489,9 @@
   __ sub(sp, sp, (frame_size_in_words - 2) * wordSize);
 
   // Restore frame locals after moving the frame
-  __ vstr_f64(d0, Address(sp, RegisterSaver::offset_in_bytes(RegisterSaver::fpu_state_off)));
+  if(hasFPU()) {
+    __ vstr_f64(d0, Address(sp, RegisterSaver::offset_in_bytes(RegisterSaver::fpu_state_off)));
+  }
   __ strd(r0, Address(sp, RegisterSaver::offset_in_bytes(RegisterSaver::r0_off)));
 
   // Call C code.  Need thread but NOT official VM entry
@@ -2452,7 +2519,9 @@
   __ reset_last_Java_frame(true, true);
 
   // Collect return values
-  __ vldr_f64(d0, Address(sp, RegisterSaver::offset_in_bytes(RegisterSaver::fpu_state_off)));
+  if(hasFPU()) {
+    __ vldr_f64(d0, Address(sp, RegisterSaver::offset_in_bytes(RegisterSaver::fpu_state_off)));
+  }
   __ ldrd(r0, Address(sp, RegisterSaver::offset_in_bytes(RegisterSaver::r0_off)));
   // I think this is useless (throwing pc?)
   // __ ldr(r3, Address(sp, RegisterSaver::r3_offset_in_bytes()));
--- a/src/cpu/aarch32/vm/stubGenerator_aarch32.cpp	Tue Oct 04 13:21:19 2016 +0300
+++ b/src/cpu/aarch32/vm/stubGenerator_aarch32.cpp	Thu Oct 27 18:42:29 2016 +0300
@@ -41,6 +41,7 @@
 #include "runtime/stubRoutines.hpp"
 #include "runtime/thread.inline.hpp"
 #include "utilities/top.hpp"
+#include "vm_version_aarch32.hpp"
 #ifdef COMPILER2
 #include "opto/runtime.hpp"
 #endif
@@ -274,10 +275,17 @@
     __ b(is_object, Assembler::EQ);
     __ cmp(c_rarg3, T_LONG);
     __ b(is_long, Assembler::EQ);
-    __ cmp(c_rarg3, T_FLOAT);
-    __ b(is_float, Assembler::EQ);
+    if(hasFPU()) {
+        // soft FP fall through T_INT case
+        __ cmp(c_rarg3, T_FLOAT);
+        __ b(is_float, Assembler::EQ);
+    }
     __ cmp(c_rarg3, T_DOUBLE);
-    __ b(is_double, Assembler::EQ);
+    if(hasFPU()) {
+        __ b(is_double, Assembler::EQ);
+    } else {
+        __ b(is_long, Assembler::EQ);
+    }
 
     // handle T_INT case
     __ str(r0, Address(c_rarg2));
@@ -298,14 +306,15 @@
     __ strd(r0, r1, Address(c_rarg2, 0));
     __ b(exit, Assembler::AL);
 
-    __ BIND(is_float);
-    __ vstr_f32(f0, Address(c_rarg2, 0));
-    __ b(exit, Assembler::AL);
+    if(hasFPU()) {
+        __ BIND(is_float);
+        __ vstr_f32(f0, Address(c_rarg2, 0));
+        __ b(exit, Assembler::AL);
 
-    __ BIND(is_double);
-    __ vstr_f64(d0, Address(c_rarg2, 0));
-    __ b(exit, Assembler::AL);
-
+        __ BIND(is_double);
+        __ vstr_f64(d0, Address(c_rarg2, 0));
+        __ b(exit, Assembler::AL);
+    }
     return start;
   }
 
@@ -748,13 +757,10 @@
     // if destination is unaliged, copying by words is the only option
     __ tst(d, 3);
     __ b(small, Assembler::NE);
-#ifndef __SOFTFP__
-    if (UseSIMDForMemoryOps) {
+    if (UseSIMDForMemoryOps && (VM_Version::features() & FT_AdvSIMD)) {
       copy_memory_simd(s, d, count, tmp2, step, DoubleFloatRegSet::range(d0, d7), 64);
       copy_memory_simd(s, d, count, tmp2, step, DoubleFloatRegSet::range(d0, d1), 16);
-    } else
-#endif //__SOFTFP__
-    {
+    } else {
       const RegSet tmp_set = RegSet::range(r4, r7);
       const int tmp_set_size = 16;
       Label ldm_loop;
@@ -1122,8 +1128,8 @@
     *entry = __ pc();
 
     // Load *adr into c_rarg1, may fault.
+    *fault_pc = __ pc();
     __ mov(c_rarg2, c_rarg0);
-    *fault_pc = __ pc();
     switch (size) {
       case 4:
         // int32_t
--- a/src/cpu/aarch32/vm/templateInterpreter_aarch32.cpp	Tue Oct 04 13:21:19 2016 +0300
+++ b/src/cpu/aarch32/vm/templateInterpreter_aarch32.cpp	Thu Oct 27 18:42:29 2016 +0300
@@ -53,6 +53,7 @@
 
 #ifndef PRODUCT
 #include "oops/method.hpp"
+#include "vm_version_aarch32.hpp"
 #endif // !PRODUCT
 
 #define __ _masm->
@@ -298,8 +299,20 @@
   case T_INT    : /* nothing to do */    break;
   case T_LONG   : /* nothing to do */    break;
   case T_VOID   : /* nothing to do */    break;
-  case T_FLOAT  : /* nothing to do */    break;
-  case T_DOUBLE : /* nothing to do */    break;
+  case T_FLOAT  :
+#ifndef HARD_FLOAT_CC
+      if(hasFPU()) {
+          __ vmov_f32(d0, r0);
+      }
+#endif
+      break;
+  case T_DOUBLE :
+#ifndef HARD_FLOAT_CC
+      if(hasFPU()) {
+          __ vmov_f64(d0, r0, r1);
+      }
+#endif
+    break;
   case T_OBJECT :
     // retrieve result from frame
     __ reg_printf("In object result handler\n");
@@ -1972,9 +1985,15 @@
   assert(t->is_valid() && t->tos_in() == vtos, "illegal template");
   Label L;
   aep = __ pc();  __ push_ptr();  __ b(L);
-  fep = __ pc();  __ push_f();    __ b(L);
-  dep = __ pc();  __ push_d();    __ b(L);
+  dep = __ pc();
+  if(hasFPU()){
+    __ push_d(); __ b(L);
+  }
   lep = __ pc();  __ push_l();    __ b(L);
+  fep = __ pc();
+  if(hasFPU()){
+    __ push_f();    __ b(L);
+  }
   bep = cep = sep =
   iep = __ pc();  __ push_i();
   vep = __ pc();
--- a/src/cpu/aarch32/vm/templateTable_aarch32.cpp	Tue Oct 04 13:21:19 2016 +0300
+++ b/src/cpu/aarch32/vm/templateTable_aarch32.cpp	Thu Oct 27 18:42:29 2016 +0300
@@ -39,6 +39,7 @@
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
 #include "runtime/synchronizer.hpp"
+#include "vm_version_aarch32.hpp"
 
 #ifndef CC_INTERP
 
@@ -339,11 +340,15 @@
   transition(vtos, ftos);
   float fval = value;
   assert(value == 0 || value == 1 || value == 2, "invalid float const");
-  if(__ operand_valid_for_float_immediate(fval)) {
-    __ vmov_f32(d0, fval);
+  if (hasFPU()) {
+    if(__ operand_valid_for_float_immediate(fval)) {
+      __ vmov_f32(d0, fval);
+    } else {
+      __ mov(r0, *((uint32_t*)&fval));
+      __ vmov_f32(d0, r0);
+    }
   } else {
     __ mov(r0, *((uint32_t*)&fval));
-    __ vmov_f32(d0, r0);
   }
 }
 
@@ -352,13 +357,19 @@
   transition(vtos, dtos);
   double dval = value;
   assert(value == 0 || value == 1 || value == 2, "invalid double const");
-  if(__ operand_valid_for_double_immediate(dval)) {
-    __ vmov_f64(d0, dval);
+  if (hasFPU()) {
+    if(__ operand_valid_for_double_immediate(dval)) {
+      __ vmov_f64(d0, dval);
+    } else {
+      uint32_t* ptr = (uint32_t*)&dval;
+      __ mov(r0, *ptr);
+      __ mov(r1, *(ptr + 1));
+      __ vmov_f64(d0, r0, r1);
+    }
   } else {
     uint32_t* ptr = (uint32_t*)&dval;
     __ mov(r0, *ptr);
     __ mov(r1, *(ptr + 1));
-    __ vmov_f64(d0, r0, r1);
   }
 }
 
@@ -416,17 +427,25 @@
   __ b(Done);
 
   __ bind(notClass);
-  __ cmp(r3, JVM_CONSTANT_Float);
-  __ b(notFloat, Assembler::NE);
-  // ftos
-  __ adds(r1, r2, r1, lsl(2));
-  __ vldr_f32(d0, Address(r1, base_offset));
-
-  __ push_f();
-
-  __ b(Done);
-
-  __ bind(notFloat);
+  if (hasFPU()) {
+    __ cmp(r3, JVM_CONSTANT_Float);
+    __ b(notFloat, Assembler::NE);
+    // ftos
+    __ adds(r1, r2, r1, lsl(2));
+    __ vldr_f32(d0, Address(r1, base_offset));
+
+    __ push_f();
+
+    __ b(Done);
+
+    __ bind(notFloat);
+  } else {
+        // Soft FP pass through T_INT case.
+#ifdef ASSERT
+        __ cmp(r3, JVM_CONSTANT_Float);
+        __ mov(r3, JVM_CONSTANT_Integer,  Assembler::EQ);
+#endif // ASSER
+  }
 #ifdef ASSERT
   {
     Label L;
@@ -478,7 +497,7 @@
 void TemplateTable::ldc2_w()
 {
   transition(vtos, vtos);
-  Label Long, Done;
+   Label Done;
   __ get_unsigned_2_byte_index_at_bcp(r0, 1);
 
   __ get_cpool_and_tags(r1, r2);
@@ -488,15 +507,18 @@
   // get type
   __ lea(r2, Address(r2, r0, lsl(0)));
   __ load_unsigned_byte(r2, Address(r2, tags_offset));
-  __ cmp(r2, (int)JVM_CONSTANT_Double);
-  __ b(Long, Assembler::NE);
-  // dtos
-  __ lea (r2, Address(r1, r0, lsl(2)));
-  __ vldr_f64(d0, Address(r2, base_offset));
-  __ push_d();
-  __ b(Done);
-
-  __ bind(Long);
+  if (hasFPU()) {
+    Label Long;
+    __ cmp(r2, (int)JVM_CONSTANT_Double);
+    __ b(Long, Assembler::NE);
+    // dtos
+    __ lea (r2, Address(r1, r0, lsl(2)));
+    __ vldr_f64(d0, Address(r2, base_offset));
+    __ push_d();
+    __ b(Done);
+
+    __ bind(Long);
+  }
   // ltos
   __ lea(r1, Address(r1, r0, lsl(2)));
   __ ldr(r0, Address(r1, base_offset));
@@ -583,15 +605,24 @@
 {
   transition(vtos, ftos);
   locals_index(r1);
-  __ vldr_f32(d0, faddress(r1, r2, _masm));
+  if (hasFPU()) {
+      __ vldr_f32(d0, faddress(r1, r2, _masm));
+  } else {
+    __ ldr(r0, faddress(r1, r2, _masm));
+  }
 }
 
 void TemplateTable::dload()
 {
   transition(vtos, dtos);
-  __ ldrb(r1, at_bcp(1));
-  __ sub(r1, rlocals, r1, lsl(LogBytesPerWord));
-  __ vldr_f64(d0, Address(r1, Interpreter::local_offset_in_bytes(1)));
+  if (hasFPU()) {
+    __ ldrb(r1, at_bcp(1));
+    __ sub(r1, rlocals, r1, lsl(LogBytesPerWord));
+    __ vldr_f64(d0, Address(r1, Interpreter::local_offset_in_bytes(1)));
+  } else {
+    locals_index(r2);
+    __ ldrd(r0, r1, daddress(r2, r3, _masm));
+  }
 }
 
 void TemplateTable::aload()
@@ -624,16 +655,25 @@
 {
   transition(vtos, ftos);
   locals_index_wide(r1);
-  __ vldr_f32(d0, faddress(r1, rscratch1, _masm));
+  if (hasFPU()) {
+      __ vldr_f32(d0, faddress(r1, rscratch1, _masm));
+  } else {
+  __ ldr (r0, faddress(r1, rscratch1, _masm));
+  }
 }
 
 void TemplateTable::wide_dload()
 {
   transition(vtos, dtos);
-  __ ldrh(r1, at_bcp(2));
-  __ rev16(r1, r1);
-  __ sub(r1, rlocals, r1, lsl(LogBytesPerWord));
-  __ vldr_f64(d0, Address(r1, Interpreter::local_offset_in_bytes(1)));
+  if (hasFPU()) {
+    __ ldrh(r1, at_bcp(2));
+    __ rev16(r1, r1);
+    __ sub(r1, rlocals, r1, lsl(LogBytesPerWord));
+    __ vldr_f64(d0, Address(r1, Interpreter::local_offset_in_bytes(1)));
+  } else {
+    locals_index_wide(r2);
+    __ ldrd(r0, r1, daddress(r2, r3, _masm));
+  }
 }
 
 void TemplateTable::wide_aload()
@@ -701,7 +741,11 @@
   // r2: index
   index_check(r0, r2); // leaves index in r2, kills rscratch1
   __ lea(r2,  Address(r0, r2, lsl(2)));
-  __ vldr_f32(d0, Address(r2,  arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
+  if (hasFPU()) {
+      __ vldr_f32(d0, Address(r2,  arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
+  } else {
+    __ ldr(r0, Address(r2,  arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
+  }
 }
 
 void TemplateTable::daload()
@@ -715,7 +759,9 @@
   __ lea(r2,  Address(r0, r2, lsl(3)));
   __ add(r2, r2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE));
   __ atomic_ldrd(r0, r1, r2);
-  __ vmov_f64(d0, r0, r1);
+  if (hasFPU()) {
+      __ vmov_f64(d0, r0, r1);
+  }
 }
 
 void TemplateTable::aaload()
@@ -798,17 +844,21 @@
 void TemplateTable::fload(int n)
 {
   transition(vtos, ftos);
-  __ vldr_f32(d0, faddress(n));
-  __ vmov_f32(rscratch1, d0);
-  __ reg_printf("Just loaded float 0x%08x\n", rscratch1);
-  __ vmov_f32(rscratch1, d0);
-  __ reg_printf("Just loaded float, confirm 0x%08x\n", rscratch1);
+  if (hasFPU()) {
+      __ vldr_f32(d0, faddress(n));
+  } else {
+    __ ldr(r0, faddress(n));
+  }
 }
 
 void TemplateTable::dload(int n)
 {
   transition(vtos, dtos);
-  __ vldr_f64(d0, daddress(n));
+  if (hasFPU()) {
+    __ vldr_f64(d0, daddress(n));
+  } else {
+    __ ldrd(r0, r1, daddress(n));
+  }
 }
 
 void TemplateTable::aload(int n)
@@ -906,13 +956,22 @@
   transition(ftos, vtos);
   locals_index(r1);
   __ lea(rscratch1, iaddress(r1));
-  __ vstr_f32(d0, Address(rscratch1));
+  if (hasFPU()) {
+      __ vstr_f32(d0, Address(rscratch1));
+  } else {
+    __ str(r0, Address(rscratch1));
+  }
 }
 
 void TemplateTable::dstore() {
   transition(dtos, vtos);
-  locals_index(r1);
-  __ vstr_f64(d0, daddress(r1, rscratch1, _masm));
+  if (hasFPU()) {
+    locals_index(r1);
+    __ vstr_f64(d0, daddress(r1, rscratch1, _masm));
+  } else {
+    locals_index(r2);
+    __ strd(r0, r1, daddress(r2, rscratch1, _masm));
+  }
 }
 
 void TemplateTable::astore()
@@ -941,17 +1000,28 @@
 
 void TemplateTable::wide_fstore() {
   transition(vtos, vtos);
-  __ pop_f();
   locals_index_wide(r1);
   __ lea(rscratch1, faddress(r1, rscratch1, _masm));
-  __ vstr_f32(d0, rscratch1);
+  if (hasFPU()) {
+      __ pop_f();
+    __ vstr_f32(d0, rscratch1);
+  } else {
+    __ pop_i();
+    __ str(r0, Address(rscratch1));
+  }
 }
 
 void TemplateTable::wide_dstore() {
   transition(vtos, vtos);
-  __ pop_d();
-  locals_index_wide(r1);
-  __ vstr_f64(d0, daddress(r1, rscratch1, _masm));
+  if (hasFPU()) {
+    __ pop_d();
+    locals_index_wide(r1);
+    __ vstr_f64(d0, daddress(r1, rscratch1, _masm));
+  } else {
+    __ pop_l();
+    locals_index_wide(r2);
+    __ strd(r0, r1, daddress(r2, rscratch1, _masm));
+  }
 }
 
 void TemplateTable::wide_astore() {
@@ -992,27 +1062,34 @@
   transition(ftos, vtos);
   __ pop_i(r2);
   __ pop_ptr(r3);
-  // d0: value
+  // d0/r0: value
   // r2:  index
   // r3:  array
   index_check(r3, r2); // prefer index in r2
   __ lea(rscratch1, Address(r3, r2, lsl(2)));
-  __ vstr_f32(d0, Address(rscratch1,
-                      arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
+  if (hasFPU()) {
+    __ vstr_f32(d0, Address(rscratch1,
+                        arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
+  } else {
+        __ str(r0, Address(rscratch1,
+                           arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
+  }
 }
 
 void TemplateTable::dastore() {
   transition(dtos, vtos);
   __ pop_i(r2);
   __ pop_ptr(r3);
-  // d0: value
+  // d0/r0:r1: value
   // r2:  index
   // r3:  array
   index_check(r3, r2); // prefer index in r2
   __ lea(rscratch1, Address(r3, r2, lsl(3)));
   __ lea(rscratch1, Address(rscratch1,
                             arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
-  __ vmov_f64(r0, r1, d0);
+    if (hasFPU()) {
+        __ vmov_f64(r0, r1, d0);
+    }
   __ atomic_strd(r0, r1, rscratch1, r2, r3);
 }
 
@@ -1131,13 +1208,21 @@
 void TemplateTable::fstore(int n)
 {
   transition(ftos, vtos);
-  __ vstr_f32(d0, faddress(n));
+  if (hasFPU()) {
+      __ vstr_f32(d0, faddress(n));
+  } else {
+    __ str(r0, faddress(n));
+  }
 }
 
 void TemplateTable::dstore(int n)
 {
   transition(dtos, vtos);
-  __ vstr_f64(d0, daddress(n));
+  if (hasFPU()) {
+      __ vstr_f64(d0, daddress(n));
+  } else {
+    __ strd(r0, r1, daddress(n));
+  }
 }
 
 void TemplateTable::astore(int n)
@@ -1413,74 +1498,139 @@
 void TemplateTable::fop2(Operation op)
 {
   transition(ftos, ftos);
-  switch (op) {
-  case add:
-    __ pop_f(d1);
-    __ vadd_f32(d0, d1, d0);
-    break;
-  case sub:
-    __ pop_f(d1);
-    __ vsub_f32(d0, d1, d0);
-    break;
-  case mul:
-    __ pop_f(d1);
-    __ vmul_f32(d0, d1, d0);
-    break;
-  case div:
-    __ pop_f(d1);
-    __ vdiv_f32(d0, d1, d0);
-    break;
-  case rem:
-    __ vcvt_f64_f32(d1, d0);
-    __ pop_f(d0);
-    __ vcvt_f64_f32(d0, d0);
-#ifndef HARD_FLOAT_CC
-    __ vmov_f64(r0, r1, d0);
-    __ vmov_f64(r2, r3, d1);
+  if(hasFPU()) {
+    switch (op) {
+    case add:
+      __ pop_f(d1);
+      __ vadd_f32(d0, d1, d0);
+      break;
+    case sub:
+      __ pop_f(d1);
+      __ vsub_f32(d0, d1, d0);
+      break;
+    case mul:
+      __ pop_f(d1);
+      __ vmul_f32(d0, d1, d0);
+      break;
+    case div:
+      __ pop_f(d1);
+      __ vdiv_f32(d0, d1, d0);
+      break;
+    case rem:
+      __ vmov_f32(f1, f0);
+      __ pop_f(f0);
+      #ifndef HARD_FLOAT_CC
+      __ vmov_f32(r0, f0);
+      __ vmov_f32(r1, f1);
+      #endif
+      __ mov(rscratch1, (address)fmodf);
+      __ bl(rscratch1);
+      #ifndef HARD_FLOAT_CC
+      __ vmov_f32(f0, r0);
+      #endif
+      break;
+    default:
+      ShouldNotReachHere();
+      break;
+    }
+  } else {
+#ifdef __SOFTFP__
+    __ mov(r1, r0);
+    __ pop_i(r0);
+    switch (op) {
+    case add:
+      __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::fadd), 0);
+      break;
+    case sub:
+      __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::fsub), 0);
+      break;
+    case mul:
+      __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::fmul), 0);
+      break;
+    case div:
+      __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::fdiv), 0);
+      break;
+    case rem:
+      __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 0);
+      break;
+    default:
+      ShouldNotReachHere();
+      break;
+    }
+  #else
+      // expected -mfloat-abi=soft
+      ShouldNotReachHere();
 #endif
-    __ mov(rscratch1, (address)(double (*)(double, double))fmod);
-    __ bl(rscratch1);
-    __ vcvt_f32_f64(d0, d0);
-    break;
-  default:
-    ShouldNotReachHere();
-    break;
-  }
+}
 }
 
 void TemplateTable::dop2(Operation op)
 {
   transition(dtos, dtos);
-  switch (op) {
-  case add:
-    __ pop_d(d1);
-    __ vadd_f64(d0, d1, d0);
-    break;
-  case sub:
-    __ pop_d(d1);
-    __ vsub_f64(d0, d1, d0);
-    break;
-  case mul:
-    __ pop_d(d1);
-    __ vmul_f64(d0, d1, d0);
-    break;
-  case div:
-    __ pop_d(d1);
-    __ vdiv_f64(d0, d1, d0);
-    break;
-  case rem:
-    __ vmov_f64(d1, d0);
-    __ pop_d(d0);
-#ifndef HARD_FLOAT_CC
-    __ vmov_f64(r0, r1, d0);
-    __ vmov_f64(r2, r3, d1);
+  if (hasFPU()) {
+    switch (op) {
+    case add:
+      __ pop_d(d1);
+      __ vadd_f64(d0, d1, d0);
+      break;
+    case sub:
+      __ pop_d(d1);
+      __ vsub_f64(d0, d1, d0);
+      break;
+    case mul:
+      __ pop_d(d1);
+      __ vmul_f64(d0, d1, d0);
+      break;
+    case div:
+      __ pop_d(d1);
+      __ vdiv_f64(d0, d1, d0);
+      break;
+    case rem:
+      __ vmov_f64(d1, d0);
+      __ pop_d(d0);
+      #ifndef HARD_FLOAT_CC
+      __ vmov_f64(r0, r1, d0);
+      __ vmov_f64(r2, r3, d1);
+      #endif
+      __ mov(rscratch1, (address)fmod);
+      __ bl(rscratch1);
+      #ifndef HARD_FLOAT_CC
+      __ vmov_f64(d0, r0, r1);
+      #endif
+      break;
+    default:
+      ShouldNotReachHere();
+      break;
+    }
+  } else {
+#ifdef __SOFTFP__
+    __ push_l(r0, r1);
+    __ pop_l(r2,r3);
+    __ pop_l(r0,r1);
+    switch (op) {
+    case add:
+      __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dadd), 0);
+      break;
+    case sub:
+      __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsub), 0);
+      break;
+    case mul:
+      __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dmul), 0);
+      break;
+    case div:
+      __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::ddiv), 0);
+      break;
+    case rem:
+      __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 0);
+      break;
+    default:
+      ShouldNotReachHere();
+      break;
+    }
+#else
+      // expected -mfloat-abi=soft
+      ShouldNotReachHere();
 #endif
-    __ mov(rscratch1, (address)(double (*)(double, double))fmod);
-    __ bl(rscratch1);
-    break;
-  default:
-    ShouldNotReachHere();
-    break;
   }
 }
 
@@ -1501,13 +1651,31 @@
 void TemplateTable::fneg()
 {
   transition(ftos, ftos);
-  __ vneg_f32(d0, d0);
+  if(hasFPU()) {
+      __ vneg_f32(d0, d0);
+  } else {
+#ifdef __SOFTFP__
+    __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::fneg), 0);
+#else
+      // expected -mfloat-abi=soft
+      ShouldNotReachHere();
+#endif
+  }
 }
 
 void TemplateTable::dneg()
 {
   transition(dtos, dtos);
-  __ vneg_f64(d0, d0);
+  if(hasFPU()) {
+      __ vneg_f64(d0, d0);
+  } else {
+#ifdef __SOFTFP__
+      __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dneg), 0);
+#else
+      // expected -mfloat-abi=soft
+      ShouldNotReachHere();
+#endif
+  }
 }
 
 void TemplateTable::iinc()
@@ -1592,20 +1760,32 @@
     __ reg_printf("Convert i2l (after) 0x%08x%08x\n", r1, r0);
     break;
   case Bytecodes::_i2f:
-    //__ bkpt(735);
-    //__ scvtfws(d0, r0);
-    //__ reg_printf("VCVT Convert i2f, (before) 0x%08x\n", r0);
-    __ vmov_f32(d0, r0);
-    //__ vmov_f32(r0, d0);
-    //__ reg_printf("VCVT Convert i2f, (before) 0x%08x\n", r0);
-    __ vcvt_f32_s32(d0, d0);
-    //__ vmov_f32(rscratch1, d0);
-    //__ reg_printf("VCVT Convert i2f, (after ) 0x%08x\n", rscratch1);
+    if(hasFPU()) {
+      __ vmov_f32(d0, r0);
+      __ vcvt_f32_s32(d0, d0);
+    } else {
+#ifdef __SOFTFP__
+      __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::i2f), 0);
+#else
+      // expected -mfloat-abi=soft
+      ShouldNotReachHere();
+#endif
+    }
     break;
   case Bytecodes::_i2d:
-    //__ scvtfwd(d0, r0);
-    __ vmov_f32(d0, r0);
-    __ vcvt_f64_s32(d0, d0);
+    if(hasFPU()) {
+      //__ scvtfwd(d0, r0);
+      __ vmov_f32(d0, r0);
+      __ vcvt_f64_s32(d0, d0);
+    } else {
+#ifdef __SOFTFP__
+        // ro -> <r1:r0>
+      __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::i2d), 0);
+#else
+      // expected -mfloat-abi=soft
+      ShouldNotReachHere();
+#endif
+    }
     break;
   case Bytecodes::_i2b:
     __ sxtb(r0, r0);
@@ -1621,72 +1801,96 @@
     break;
   case Bytecodes::_l2f:
     // <r1:r0> -> d0
+    // or <r1:r0> -> r0 for softfp
     __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::l2f), 0);
+#ifndef HARD_FLOAT_CC
+    if(hasFPU()) {
+        __ vmov_f32(d0, r0);
+    }
+#endif
     break;
   case Bytecodes::_l2d:
     // <r1:r0> -> d0
+    // or <r1:r0> -> <r1:r0> for softfp
     __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::l2d), 0);
+#ifndef HARD_FLOAT_CC
+    if(hasFPU()) {
+        __ vmov_f64(d0, r0, r1);
+    }
+#endif
     break;
-   //FIXME these instructions have a fallback in aarch64 but not sure why these especially
   case Bytecodes::_f2i:
   {
-    /*Label L_Okay;
-    __ clear_fpsr();
-    __ fcvtzsw(r0, d0);
-    __ get_fpsr(r1);
-    __ cmp(r1, 0);
-    __ b(L_Okay, Assmembler::EQ);
-    //__ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::f2i),
-    //                      0, 1, MacroAssembler::ret_type_integral);
-    __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 0);
-    //TODO why float not counted
-    __ bind(L_Okay);*/
-    __ vcvt_s32_f32(d0, d0);
-    __ vmov_f32(r0, d0);
+      if(hasFPU()) {
+        __ vcvt_s32_f32(d0, d0);
+        __ vmov_f32(r0, d0);
+      } else {
+        __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 0);
+      }
   }
     break;
   case Bytecodes::_f2l:
   {
+#if !defined(HARD_FLOAT_CC)
     //float already in d0 long goes to <r1:r0>
-#ifndef HARD_FLOAT_CC
-    //Need to move float in d0 to r0
-    __ vmov_f32(r0, d0);
-#endif
+    if(hasFPU()) {
+        //Need to move float in d0 to r0
+        __ vmov_f32(r0, d0);
+    }
+#endif //!defined(HARD_FLOAT_CC)
     __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 0);
   }
     break;
   case Bytecodes::_f2d:
-    __ vcvt_f64_f32(d0, d0);
+    if(hasFPU()) {
+        __ vcvt_f64_f32(d0, d0);
+    } else {
+#ifdef __SOFTFP__
+        __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::f2d), 0);
+#else
+      // expected -mfloat-abi=soft
+      ShouldNotReachHere();
+#endif
+    }
     break;
   case Bytecodes::_d2i:
   {
-    /*Label L_Okay;
-    __ clear_fpsr();
-    __ fcvtzdw(r0, d0);
-    __ get_fpsr(r1);
-    __ cmp(r1, 0);
-    __ b(L_Okay, Assmembler::EQ);
-    // __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::d2i),
-    //                      0, 1, MacroAssembler::ret_type_integral);
-    __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 0);
-    // TODO why float not counted?
-    __ bind(L_Okay);*/
-    __ vcvt_s32_f64(d0, d0);
-    __ vmov_f32(r0, d0);
+    if(hasFPU()) {
+        __ vcvt_s32_f64(d0, d0);
+        __ vmov_f32(r0, d0);
+    } else {
+#ifdef __SOFTFP__
+        __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 0);
+#else
+        // expected -mfloat-abi=soft
+        ShouldNotReachHere();
+#endif
+    }
   }
     break;
   case Bytecodes::_d2l:
   {
     // d0 -> <r1:r0>
-#ifndef HARD_FLOAT_CC
-    //Need to move float in d0 to r0
-    __ vmov_f64(r0, r1, d0);
-#endif
+#if !defined(HARD_FLOAT_CC)
+    if(hasFPU()) {
+        //Need to move float in d0 to r0
+        __ vmov_f64(r0, r1, d0);
+    }
+#endif //!defined(HARD_FLOAT_CC)
     __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 0);
   }
     break;
   case Bytecodes::_d2f:
-    __ vcvt_f32_f64(d0, d0);
+    if(hasFPU()) {
+        __ vcvt_f32_f64(d0, d0);
+    } else {
+#ifdef __SOFTFP__
+        __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::d2f), 0);
+#else
+      // expected -mfloat-abi=soft
+      ShouldNotReachHere();
+#endif
+    }
     break;
   default:
     ShouldNotReachHere();
@@ -1721,40 +1925,65 @@
 
 void TemplateTable::float_cmp(bool is_float, int unordered_result)
 {
-  //__ bkpt(400);
-  if (is_float) {
-    __ pop_f(d1);
-    __ vcmp_f32(d1, d0);
-  } else {
-    __ pop_d(d1);
-    /*__ vmov_f64(r0, r1, d0);
-    __ vmov_f64(r2, r3, d1);
-    __ reg_printf("Doing comparison cmp( 0x%08x%08x,\n", r3, r2);
-    __ reg_printf("                      0x%08x%08x)\n", r1, r0);*/
-    __ vcmp_f64(d1, d0);
-  }
-  __ vmrs(rscratch1);
-  __ andr(rscratch1, rscratch1, Assembler::FP_MASK);
-  __ reg_printf("Masked comparison result is %08x\n", rscratch1);
-
-  if (unordered_result < 0) {
-    // we want -1 for unordered or less than, 0 for equal and 1 for
-    // greater than.
-    __ mov(r0, -1);
-    __ cmp(rscratch1, Assembler::FP_EQ);
-    __ mov(r0, 0, Assembler::EQ);
-    __ cmp(rscratch1, Assembler::FP_GT);
-    __ mov(r0, 1, Assembler::EQ);
-    __ reg_printf("un_res < 0, comparison result is %d\n", r0);
-  } else {
-    // we want -1 for less than, 0 for equal and 1 for unordered or
-    // greater than.
-    __ mov(r0, 1);
-    __ cmp(rscratch1, Assembler::FP_LT);
-    __ sub(r0, r0, 2, Assembler::EQ); //Load -1 - but one less instruction
-    __ cmp(rscratch1, Assembler::FP_EQ);
-    __ mov(r0, 0, Assembler::EQ);
-    __ reg_printf("un_res >= 0, comparison result is %d\n", r0);
+    if(hasFPU()) {
+        if (is_float) {
+         __ pop_f(d1);
+         __ vcmp_f32(d1, d0);
+       } else {
+         __ pop_d(d1);
+         /*__ vmov_f64(r0, r1, d0);
+         __ vmov_f64(r2, r3, d1);
+         __ reg_printf("Doing comparison cmp( 0x%08x%08x,\n", r3, r2);
+         __ reg_printf("                      0x%08x%08x)\n", r1, r0);*/
+         __ vcmp_f64(d1, d0);
+       }
+       __ vmrs(rscratch1);
+       __ andr(rscratch1, rscratch1, Assembler::FP_MASK);
+       __ reg_printf("Masked comparison result is %08x\n", rscratch1);
+
+       if (unordered_result < 0) {
+         // we want -1 for unordered or less than, 0 for equal and 1 for
+         // greater than.
+         __ mov(r0, -1);
+         __ cmp(rscratch1, Assembler::FP_EQ);
+         __ mov(r0, 0, Assembler::EQ);
+         __ cmp(rscratch1, Assembler::FP_GT);
+         __ mov(r0, 1, Assembler::EQ);
+         __ reg_printf("un_res < 0, comparison result is %d\n", r0);
+       } else {
+         // we want -1 for less than, 0 for equal and 1 for unordered or
+         // greater than.
+         __ mov(r0, 1);
+         __ cmp(rscratch1, Assembler::FP_LT);
+         __ sub(r0, r0, 2, Assembler::EQ); //Load -1 - but one less instruction
+         __ cmp(rscratch1, Assembler::FP_EQ);
+         __ mov(r0, 0, Assembler::EQ);
+         __ reg_printf("un_res >= 0, comparison result is %d\n", r0);
+       }
+    } else { // hasFPU
+#ifdef __SOFTFP__
+        if (is_float) {
+            __ mov(r1, r0);
+            __ pop_i(r0);
+            if (unordered_result < 0) {
+                __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::fcmpl), 0);
+            } else {
+                __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::fcmpg), 0);
+            }
+        } else {
+            __ mov(r2, r0);
+            __ mov(r3, r1);
+            __ pop_l(r0);
+            if (unordered_result < 0) {
+                __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcmpl), 0);
+            } else {
+                __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcmpg), 0);
+            }
+        }
+#else
+        // expected -mfloat-abi=soft
+        ShouldNotReachHere();
+#endif
   }
 }
 
@@ -2441,7 +2670,6 @@
                                                bool is_invokevirtual,
                                                bool is_invokevfinal, /*unused*/
                                                bool is_invokedynamic) {
-  //__ create_breakpoint();
   // setup registers
   const Register cache = rscratch1;
   const Register index = r14;
@@ -2642,7 +2870,11 @@
   __ b(notFloat, Assembler::NE);
   // ftos
   __ lea(rscratch1, field);
-  __ vldr_f32(d0, Address(rscratch1));
+  if(hasFPU()) {
+    __ vldr_f32(d0, Address(rscratch1));
+  } else {
+    __ ldr(r0, Address(rscratch1));
+  }
   __ push(ftos);
   // Rewrite bytecode to be faster
   if (!is_static) {
@@ -2658,7 +2890,9 @@
   // dtos
   __ lea(rscratch1, field);
   __ atomic_ldrd(r0, r1, rscratch1);
-  __ vmov_f64(d0, r0, r1);
+  if(hasFPU()) {
+    __ vmov_f64(d0, r0, r1);
+  }
   __ push(dtos);
   // Rewrite bytecode to be faster
   if (!is_static) {
@@ -2908,7 +3142,11 @@
       pop_and_check_object(obj);
     }
     __ lea(rscratch1, field);
-    __ vstr_f32(d0, Address(rscratch1));
+    if(hasFPU()) {
+        __ vstr_f32(d0, Address(rscratch1));
+    } else {
+        __ str(r0, Address(rscratch1));
+    }
     if (!is_static) {
       patch_bytecode(Bytecodes::_fast_fputfield, bc, r1, true, byte_no);
     }
@@ -2928,7 +3166,9 @@
       pop_and_check_object(obj);
     }
     __ lea(rscratch1, field);
-    __ vmov_f64(r0, r1, d0);
+    if(hasFPU()) {
+        __ vmov_f64(r0, r1, d0);
+    }
     __ atomic_strd(r0, r1, rscratch1, r2, r3);
     if (!is_static) {
       patch_bytecode(Bytecodes::_fast_dputfield, bc, r1, true, byte_no);
@@ -2950,6 +3190,8 @@
     __ membar(MacroAssembler::StoreLoad);
     __ bind(notVolatile);
   }
+  //FIXME find a more elegant way!
+  __ get_dispatch();
 }
 
 void TemplateTable::putfield(int byte_no) {
@@ -2983,8 +3225,20 @@
     case Bytecodes::_fast_sputfield: // fall through
     case Bytecodes::_fast_cputfield: // fall through
     case Bytecodes::_fast_iputfield: __ push_i(r0); break;
-    case Bytecodes::_fast_dputfield: __ push_d(); break;
-    case Bytecodes::_fast_fputfield: __ push_f(); break;
+    case Bytecodes::_fast_dputfield:
+        if(hasFPU()) {
+            __ push_d();
+        } else {
+            __ push_l();
+        }
+        break;
+    case Bytecodes::_fast_fputfield:
+        if(hasFPU()) {
+            __ push_f();
+        } else {
+            __ push_i();
+        }
+        break;
     case Bytecodes::_fast_lputfield: __ push_l(r0); break;
 
     default:
@@ -3008,9 +3262,15 @@
     case Bytecodes::_fast_zputfield: // fall through
     case Bytecodes::_fast_sputfield: // fall through
     case Bytecodes::_fast_cputfield: // fall through
+    case Bytecodes::_fast_fputfield:
+        if(hasFPU()) {
+            __ pop_f(); break;
+        }
     case Bytecodes::_fast_iputfield: __ pop_i(r0); break;
-    case Bytecodes::_fast_dputfield: __ pop_d(); break;
-    case Bytecodes::_fast_fputfield: __ pop_f(); break;
+    case Bytecodes::_fast_dputfield:
+        if(hasFPU()) {
+            __ pop_d(); break;
+        }
     case Bytecodes::_fast_lputfield: __ pop_l(r0); break;
     }
     __ bind(L2);
@@ -3055,10 +3315,20 @@
   case Bytecodes::_fast_aputfield:
     do_oop_store(_masm, field, r0, _bs->kind(), false);
     break;
+  case Bytecodes::_fast_dputfield:
+    if(hasFPU()) {
+        __ vmov_f64(r0, r1, d0);
+    }
   case Bytecodes::_fast_lputfield:
     __ lea(rscratch1, field);
     __ atomic_strd(r0, r1, rscratch1, r2, r3);
     break;
+  case Bytecodes::_fast_fputfield:
+    if(hasFPU()) {
+    __ lea(rscratch1, field);
+    __ vstr_f32(d0, Address(rscratch1));
+    break;
+    }
   case Bytecodes::_fast_iputfield:
     __ str(r0, field);
     break;
@@ -3073,15 +3343,6 @@
   case Bytecodes::_fast_cputfield:
     __ strh(r0, field);
     break;
-  case Bytecodes::_fast_fputfield:
-    __ lea(rscratch1, field);
-    __ vstr_f32(d0, Address(rscratch1));
-    break;
-  case Bytecodes::_fast_dputfield:
-    __ lea(rscratch1, field);
-    __ vmov_f64(r0, r1, d0);
-    __ atomic_strd(r0, r1, rscratch1, r2, r3);
-    break;
   default:
     ShouldNotReachHere();
   }
@@ -3140,10 +3401,23 @@
     __ load_heap_oop(r0, field);
     __ verify_oop(r0);
     break;
+  case Bytecodes::_fast_dgetfield:
+    if(hasFPU()) {
+    __ lea(rscratch1, field); // r0 <= field
+    __ atomic_ldrd(r0, r1, rscratch1);
+    __ vmov_f64(d0, r0, r1);
+    break;
+    }
   case Bytecodes::_fast_lgetfield:
     __ lea(rscratch1, field);
     __ atomic_ldrd(r0, r1, rscratch1);
     break;
+  case Bytecodes::_fast_fgetfield:
+    if(hasFPU()) {
+    __ lea(r0, field); // r0 <= field
+    __ vldr_f32(d0, Address(r0));
+      break;
+    }
   case Bytecodes::_fast_igetfield:
     __ ldr(r0, field);
     break;
@@ -3156,16 +3430,6 @@
   case Bytecodes::_fast_cgetfield:
     __ load_unsigned_short(r0, field);
     break;
-  case Bytecodes::_fast_fgetfield:
-    __ lea(r0, field); // r0 <= field
-    __ vldr_f32(d0, Address(r0));
-    __ vmov_f32(rscratch1, d0);
-    break;
-  case Bytecodes::_fast_dgetfield:
-    __ lea(rscratch1, field); // r0 <= field
-    __ atomic_ldrd(r0, r1, rscratch1);
-    __ vmov_f64(d0, r0, r1);
-    break;
   default:
     ShouldNotReachHere();
   }
@@ -3194,6 +3458,12 @@
 
   Address field(r0, r1);
   switch (state) {
+  case ftos:
+    if(hasFPU()) {
+    __ lea(r0, field);
+    __ vldr_f32(d0, Address(r0));
+    break;
+    }
   case itos:
     __ ldr(r0, field);
     break;
@@ -3201,10 +3471,6 @@
     __ load_heap_oop(r0, field);
     __ verify_oop(r0);
     break;
-  case ftos:
-    __ lea(r0, field);
-    __ vldr_f32(d0, Address(r0));
-    break;
   default:
     ShouldNotReachHere();
   }
--- a/src/cpu/aarch32/vm/vm_version_aarch32.cpp	Tue Oct 04 13:21:19 2016 +0300
+++ b/src/cpu/aarch32/vm/vm_version_aarch32.cpp	Thu Oct 27 18:42:29 2016 +0300
@@ -219,4 +219,12 @@
 
   //FIXME: turning off CriticalJNINatives flag while it is not implemented
   FLAG_SET_DEFAULT(CriticalJNINatives, false);
+#ifndef HARD_FLOAT_CC
+  if( !(VM_Version::features() & (FT_VFPV2 | FT_VFPV3)) ) {
+      if(FLAG_IS_CMDLINE(UseFPU)) {
+          warning("FPU is not present on this core");
+      }
+      FLAG_SET_DEFAULT(UseFPU, false);
+  }
+#endif
 }
--- a/src/cpu/aarch32/vm/vm_version_aarch32.hpp	Tue Oct 04 13:21:19 2016 +0300
+++ b/src/cpu/aarch32/vm/vm_version_aarch32.hpp	Thu Oct 27 18:42:29 2016 +0300
@@ -85,4 +85,11 @@
     static bool is_determine_features_test_running() { return _is_determine_features_test_running; }
 };
 
+#ifdef HARD_FLOAT_CC
+inline const bool hasFPU(void) { return true; }
+#else
+inline bool hasFPU(void) { return (UseFPU); }
+#endif
+
+
 #endif // CPU_AARCH32_VM_VM_VERSION_AARCH32_HPP
--- a/src/share/vm/c1/c1_LIR.hpp	Tue Oct 04 13:21:19 2016 +0300
+++ b/src/share/vm/c1/c1_LIR.hpp	Thu Oct 27 18:42:29 2016 +0300
@@ -706,18 +706,40 @@
 
 #ifdef __SOFTFP__
       case T_FLOAT:
-        res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) |
-                                  LIR_OprDesc::float_type  |
-                                  LIR_OprDesc::cpu_register |
-                                  LIR_OprDesc::single_size |
-                                  LIR_OprDesc::virtual_mask);
+#ifdef AARCH32
+        if (hasFPU()) {
+            res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) |
+                                      LIR_OprDesc::float_type           |
+                                      LIR_OprDesc::fpu_register         |
+                                      LIR_OprDesc::single_size          |
+                                      LIR_OprDesc::virtual_mask);
+        } else
+#endif // AARCH32
+        {
+            res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) |
+                                      LIR_OprDesc::float_type  |
+                                      LIR_OprDesc::cpu_register |
+                                      LIR_OprDesc::single_size |
+                                      LIR_OprDesc::virtual_mask);
+        }
         break;
       case T_DOUBLE:
-        res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) |
-                                  LIR_OprDesc::double_type |
-                                  LIR_OprDesc::cpu_register |
-                                  LIR_OprDesc::double_size |
-                                  LIR_OprDesc::virtual_mask);
+#ifdef AARCH32
+        if(hasFPU()) {
+            res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) |
+                                                LIR_OprDesc::double_type           |
+                                                LIR_OprDesc::fpu_register          |
+                                                LIR_OprDesc::double_size           |
+                                                LIR_OprDesc::virtual_mask);
+        } else
+#endif
+        {
+            res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) |
+                                      LIR_OprDesc::double_type |
+                                      LIR_OprDesc::cpu_register |
+                                      LIR_OprDesc::double_size |
+                                      LIR_OprDesc::virtual_mask);
+        }
         break;
 #else // __SOFTFP__
       case T_FLOAT:
--- a/src/share/vm/c1/c1_LIRGenerator.cpp	Tue Oct 04 13:21:19 2016 +0300
+++ b/src/share/vm/c1/c1_LIRGenerator.cpp	Thu Oct 27 18:42:29 2016 +0300
@@ -1197,7 +1197,11 @@
   if (x->type()->is_void()) {
     __ return_op(LIR_OprFact::illegalOpr);
   } else {
+#ifdef AARCH32
+    LIR_Opr reg = java_result_register_for(x->type(), /*callee=*/true);
+#else
     LIR_Opr reg = result_register_for(x->type(), /*callee=*/true);
+#endif
     LIRItem result(x->result(), this);
 
     result.load_item_force(reg);
@@ -2919,7 +2923,11 @@
   // setup result register
   LIR_Opr result_register = LIR_OprFact::illegalOpr;
   if (x->type() != voidType) {
+#ifdef AARCH32
+    result_register = java_result_register_for(x->type());
+#else
     result_register = result_register_for(x->type());
+#endif
   }
 
   CodeEmitInfo* info = state_for(x, x->state());
--- a/src/share/vm/c1/c1_LIRGenerator.hpp	Tue Oct 04 13:21:19 2016 +0300
+++ b/src/share/vm/c1/c1_LIRGenerator.hpp	Thu Oct 27 18:42:29 2016 +0300
@@ -284,6 +284,10 @@
 
   static LIR_Opr result_register_for(ValueType* type, bool callee = false);
 
+#ifdef AARCH32
+  static LIR_Opr java_result_register_for(ValueType* type, bool callee = false);
+#endif
+
   ciObject* get_jobject_constant(Value value);
 
   LIRItemList* invoke_visit_arguments(Invoke* x);
--- a/src/share/vm/c1/c1_LinearScan.cpp	Tue Oct 04 13:21:19 2016 +0300
+++ b/src/share/vm/c1/c1_LinearScan.cpp	Thu Oct 27 18:42:29 2016 +0300
@@ -32,6 +32,7 @@
 #include "c1/c1_LinearScan.hpp"
 #include "c1/c1_ValueStack.hpp"
 #include "utilities/bitMap.inline.hpp"
+#include "vm_version_aarch32.hpp"
 #ifdef TARGET_ARCH_x86
 # include "vmreg_x86.inline.hpp"
 #endif
@@ -196,10 +197,10 @@
 }
 
 bool LinearScan::is_virtual_cpu_interval(const Interval* i) {
-#if defined(__SOFTFP__) || defined(E500V2)
+#if !defined(AARCH32) && (defined(__SOFTFP__) || defined(E500V2))
   return i->reg_num() >= LIR_OprDesc::vreg_base;
 #else
-  return i->reg_num() >= LIR_OprDesc::vreg_base && (i->type() != T_FLOAT && i->type() != T_DOUBLE);
+  return i->reg_num() >= LIR_OprDesc::vreg_base && (AARCH32_ONLY(!hasFPU() ||) (i->type() != T_FLOAT && i->type() != T_DOUBLE));
 #endif // __SOFTFP__ or E500V2
 }
 
@@ -208,10 +209,10 @@
 }
 
 bool LinearScan::is_virtual_fpu_interval(const Interval* i) {
-#if defined(__SOFTFP__) || defined(E500V2)
+#if !defined(AARCH32) && (defined(__SOFTFP__) || defined(E500V2))
   return false;
 #else
-  return i->reg_num() >= LIR_OprDesc::vreg_base && (i->type() == T_FLOAT || i->type() == T_DOUBLE);
+  return i->reg_num() >= LIR_OprDesc::vreg_base && (i->type() == T_FLOAT || i->type() == T_DOUBLE) AARCH32_ONLY(&& hasFPU());
 #endif // __SOFTFP__ or E500V2
 }
 
@@ -2077,7 +2078,14 @@
 
 #ifdef __SOFTFP__
       case T_FLOAT:  // fall through
-#endif // __SOFTFP__
+#if defined(AARCH32)
+      if(hasFPU()) {
+        assert(assigned_reg >= pd_first_fpu_reg && assigned_reg <= pd_last_fpu_reg, "no fpu register");
+        assert(interval->assigned_regHi() == any_reg, "must not have hi register");
+        return LIR_OprFact::single_fpu(assigned_reg - pd_first_fpu_reg);
+      }
+#endif
+#endif
       case T_INT: {
         assert(assigned_reg >= pd_first_cpu_reg && assigned_reg <= pd_last_cpu_reg, "no cpu register");
         assert(interval->assigned_regHi() == any_reg, "must not have hi register");
@@ -2086,7 +2094,15 @@
 
 #ifdef __SOFTFP__
       case T_DOUBLE:  // fall through
-#endif // __SOFTFP__
+#if defined(AARCH32)
+        if(hasFPU()) {
+            assert(assigned_reg >= pd_first_fpu_reg && assigned_reg <= pd_last_fpu_reg, "no fpu register");
+            assert(interval->assigned_regHi() >= pd_first_fpu_reg && interval->assigned_regHi() <= pd_last_fpu_reg, "no fpu register");
+            assert(assigned_reg % 2 == 0 && assigned_reg + 1 == interval->assigned_regHi(), "must be sequential and even");
+            return LIR_OprFact::double_fpu(assigned_reg - pd_first_fpu_reg, interval->assigned_regHi() - pd_first_fpu_reg);
+        }
+#endif
+#endif
       case T_LONG: {
         int assigned_regHi = interval->assigned_regHi();
         assert(assigned_reg >= pd_first_cpu_reg && assigned_reg <= pd_last_cpu_reg, "no cpu register");