changeset 2:264be8bc2c98

Some small fixes to build Zero & OpenJDK8 and big refactoring for invokedynamic.
author Roman Kennke <rkennke@redhat.com>
date Wed, 08 Aug 2012 15:22:13 +0200
parents cef23de210a4
children 9b527bc96254
files meth-lazy-7023639.patch remove_zeroonly-return.patch series zero-meth.patch
diffstat 4 files changed, 28619 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/meth-lazy-7023639.patch	Wed Aug 08 15:22:13 2012 +0200
@@ -0,0 +1,27165 @@
+7023639: JSR 292 method handle invocation needs a fast path for compiled code
+6984705: JSR 292 method handle creation should not go through JNI
+Summary: remove assembly code for JDK 7 chained method handles
+Reviewed-by: jrose, twisti, kvn, mhaupt
+Contributed-by: John Rose <john.r.rose@oracle.com>, Christian Thalinger <christian.thalinger@oracle.com>, Michael Haupt <michael.haupt@oracle.com>
+
+diff --git a/agent/src/share/classes/sun/jvm/hotspot/code/CodeBlob.java b/agent/src/share/classes/sun/jvm/hotspot/code/CodeBlob.java
+--- a/agent/src/share/classes/sun/jvm/hotspot/code/CodeBlob.java
++++ b/agent/src/share/classes/sun/jvm/hotspot/code/CodeBlob.java
+@@ -93,7 +93,6 @@
+   public boolean isUncommonTrapStub()   { return false; }
+   public boolean isExceptionStub()      { return false; }
+   public boolean isSafepointStub()      { return false; }
+-  public boolean isRicochetBlob()       { return false; }
+   public boolean isAdapterBlob()        { return false; }
+ 
+   // Fine grain nmethod support: isNmethod() == isJavaMethod() || isNativeMethod() || isOSRMethod()
+diff --git a/agent/src/share/classes/sun/jvm/hotspot/code/CodeCache.java b/agent/src/share/classes/sun/jvm/hotspot/code/CodeCache.java
+--- a/agent/src/share/classes/sun/jvm/hotspot/code/CodeCache.java
++++ b/agent/src/share/classes/sun/jvm/hotspot/code/CodeCache.java
+@@ -57,7 +57,6 @@
+     virtualConstructor.addMapping("BufferBlob", BufferBlob.class);
+     virtualConstructor.addMapping("nmethod", NMethod.class);
+     virtualConstructor.addMapping("RuntimeStub", RuntimeStub.class);
+-    virtualConstructor.addMapping("RicochetBlob", RicochetBlob.class);
+     virtualConstructor.addMapping("AdapterBlob", AdapterBlob.class);
+     virtualConstructor.addMapping("MethodHandlesAdapterBlob", MethodHandlesAdapterBlob.class);
+     virtualConstructor.addMapping("SafepointBlob", SafepointBlob.class);
+@@ -127,10 +126,6 @@
+       Assert.that(result.blobContains(start) || result.blobContains(start.addOffsetTo(8)),
+                                                                     "found wrong CodeBlob");
+     }
+-    if (result.isRicochetBlob()) {
+-      // This should probably be done for other SingletonBlobs
+-      return VM.getVM().ricochetBlob();
+-    }
+     return result;
+   }
+ 
+diff --git a/agent/src/share/classes/sun/jvm/hotspot/code/RicochetBlob.java b/agent/src/share/classes/sun/jvm/hotspot/code/RicochetBlob.java
+deleted file mode 100644
+--- a/agent/src/share/classes/sun/jvm/hotspot/code/RicochetBlob.java
++++ /dev/null
+@@ -1,70 +0,0 @@
+-/*
+- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+- *
+- * This code is free software; you can redistribute it and/or modify it
+- * under the terms of the GNU General Public License version 2 only, as
+- * published by the Free Software Foundation.
+- *
+- * This code is distributed in the hope that it will be useful, but WITHOUT
+- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+- * version 2 for more details (a copy is included in the LICENSE file that
+- * accompanied this code).
+- *
+- * You should have received a copy of the GNU General Public License version
+- * 2 along with this work; if not, write to the Free Software Foundation,
+- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+- *
+- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+- * or visit www.oracle.com if you need additional information or have any
+- * questions.
+- *
+- */
+-
+-package sun.jvm.hotspot.code;
+-
+-import java.util.*;
+-import sun.jvm.hotspot.debugger.*;
+-import sun.jvm.hotspot.runtime.*;
+-import sun.jvm.hotspot.types.*;
+-
+-/** RicochetBlob (currently only used by Compiler 2) */
+-
+-public class RicochetBlob extends SingletonBlob {
+-  static {
+-    VM.registerVMInitializedObserver(new Observer() {
+-        public void update(Observable o, Object data) {
+-          initialize(VM.getVM().getTypeDataBase());
+-        }
+-      });
+-  }
+-
+-  private static void initialize(TypeDataBase db) {
+-    Type type = db.lookupType("RicochetBlob");
+-
+-    bounceOffsetField                = type.getCIntegerField("_bounce_offset");
+-    exceptionOffsetField             = type.getCIntegerField("_exception_offset");
+-  }
+-
+-  private static CIntegerField bounceOffsetField;
+-  private static CIntegerField exceptionOffsetField;
+-
+-  public RicochetBlob(Address addr) {
+-    super(addr);
+-  }
+-
+-  public boolean isRicochetBlob() {
+-    return true;
+-  }
+-
+-  public Address bounceAddr() {
+-    return codeBegin().addOffsetTo(bounceOffsetField.getValue(addr));
+-  }
+-
+-  public boolean returnsToBounceAddr(Address pc) {
+-    Address bouncePc = bounceAddr();
+-    return (pc.equals(bouncePc) || pc.addOffsetTo(Frame.pcReturnOffset()).equals(bouncePc));
+-  }
+-
+-}
+diff --git a/agent/src/share/classes/sun/jvm/hotspot/runtime/Frame.java b/agent/src/share/classes/sun/jvm/hotspot/runtime/Frame.java
+--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/Frame.java
++++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/Frame.java
+@@ -147,12 +147,6 @@
+     }
+   }
+ 
+-  public boolean isRicochetFrame() {
+-    CodeBlob cb = VM.getVM().getCodeCache().findBlob(getPC());
+-    RicochetBlob rcb = VM.getVM().ricochetBlob();
+-    return (cb == rcb && rcb != null && rcb.returnsToBounceAddr(getPC()));
+-  }
+-
+   public boolean isCompiledFrame() {
+     if (Assert.ASSERTS_ENABLED) {
+       Assert.that(!VM.getVM().isCore(), "noncore builds only");
+@@ -216,8 +210,7 @@
+   public Frame realSender(RegisterMap map) {
+     if (!VM.getVM().isCore()) {
+       Frame result = sender(map);
+-      while (result.isRuntimeFrame() ||
+-             result.isRicochetFrame()) {
++      while (result.isRuntimeFrame()) {
+         result = result.sender(map);
+       }
+       return result;
+@@ -631,9 +624,6 @@
+     if (Assert.ASSERTS_ENABLED) {
+       Assert.that(cb != null, "sanity check");
+     }
+-    if (cb == VM.getVM().ricochetBlob()) {
+-      oopsRicochetDo(oopVisitor, regMap);
+-    }
+     if (cb.getOopMaps() != null) {
+       OopMapSet.oopsDo(this, cb, regMap, oopVisitor, VM.getVM().isDebugging());
+ 
+@@ -650,10 +640,6 @@
+     //    }
+   }
+ 
+-  private void oopsRicochetDo      (AddressVisitor oopVisitor, RegisterMap regMap) {
+-    // XXX Empty for now
+-  }
+-
+   // FIXME: implement the above routines, plus add
+   // oops_interpreted_arguments_do and oops_compiled_arguments_do
+ }
+diff --git a/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java b/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java
+--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java
++++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java
+@@ -87,8 +87,6 @@
+   private StubRoutines stubRoutines;
+   private Bytes        bytes;
+ 
+-  private RicochetBlob ricochetBlob;
+-
+   /** Flags indicating whether we are attached to a core, C1, or C2 build */
+   private boolean      usingClientCompiler;
+   private boolean      usingServerCompiler;
+@@ -628,18 +626,6 @@
+     return stubRoutines;
+   }
+ 
+-  public RicochetBlob ricochetBlob() {
+-    if (ricochetBlob == null) {
+-      Type ricochetType  = db.lookupType("SharedRuntime");
+-      AddressField ricochetBlobAddress = ricochetType.getAddressField("_ricochet_blob");
+-      Address addr = ricochetBlobAddress.getValue();
+-      if (addr != null) {
+-        ricochetBlob = new RicochetBlob(addr);
+-      }
+-    }
+-    return ricochetBlob;
+-  }
+-
+   public VMRegImpl getVMRegImplInfo() {
+     if (vmregImpl == null) {
+       vmregImpl = new VMRegImpl();
+diff --git a/agent/src/share/classes/sun/jvm/hotspot/runtime/sparc/SPARCFrame.java b/agent/src/share/classes/sun/jvm/hotspot/runtime/sparc/SPARCFrame.java
+--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/sparc/SPARCFrame.java
++++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/sparc/SPARCFrame.java
+@@ -571,8 +571,6 @@
+     //        registers callee-saved, then we will have to copy over
+     //        the RegisterMap update logic from the Intel code.
+ 
+-    if (isRicochetFrame()) return senderForRicochetFrame(map);
+-
+     // The constructor of the sender must know whether this frame is interpreted so it can set the
+     // sender's _interpreter_sp_adjustment field.
+     if (VM.getVM().getInterpreter().contains(pc)) {
+@@ -945,20 +943,6 @@
+   }
+ 
+ 
+-  private Frame senderForRicochetFrame(SPARCRegisterMap map) {
+-    if (DEBUG) {
+-      System.out.println("senderForRicochetFrame");
+-    }
+-    //RicochetFrame* f = RicochetFrame::from_frame(fr);
+-    // Cf. is_interpreted_frame path of frame::sender
+-    Address youngerSP = getSP();
+-    Address sp        = getSenderSP();
+-    map.makeIntegerRegsUnsaved();
+-    map.shiftWindow(sp, youngerSP);
+-    boolean thisFrameAdjustedStack = true;  // I5_savedSP is live in this RF
+-    return new SPARCFrame(biasSP(sp), biasSP(youngerSP), thisFrameAdjustedStack);
+-  }
+-
+   private Frame senderForEntryFrame(RegisterMap regMap) {
+     SPARCRegisterMap map = (SPARCRegisterMap) regMap;
+ 
+diff --git a/agent/src/share/classes/sun/jvm/hotspot/runtime/sparc/SPARCRicochetFrame.java b/agent/src/share/classes/sun/jvm/hotspot/runtime/sparc/SPARCRicochetFrame.java
+deleted file mode 100644
+--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/sparc/SPARCRicochetFrame.java
++++ /dev/null
+@@ -1,77 +0,0 @@
+-/*
+- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+- *
+- * This code is free software; you can redistribute it and/or modify it
+- * under the terms of the GNU General Public License version 2 only, as
+- * published by the Free Software Foundation.
+- *
+- * This code is distributed in the hope that it will be useful, but WITHOUT
+- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+- * version 2 for more details (a copy is included in the LICENSE file that
+- * accompanied this code).
+- *
+- * You should have received a copy of the GNU General Public License version
+- * 2 along with this work; if not, write to the Free Software Foundation,
+- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+- *
+- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+- * or visit www.oracle.com if you need additional information or have any
+- * questions.
+- *
+- */
+-
+-package sun.jvm.hotspot.runtime.sparc;
+-
+-import java.util.*;
+-import sun.jvm.hotspot.asm.sparc.SPARCRegister;
+-import sun.jvm.hotspot.asm.sparc.SPARCRegisters;
+-import sun.jvm.hotspot.debugger.*;
+-import sun.jvm.hotspot.runtime.*;
+-import sun.jvm.hotspot.types.*;
+-
+-public class SPARCRicochetFrame {
+-  static {
+-    VM.registerVMInitializedObserver(new Observer() {
+-        public void update(Observable o, Object data) {
+-          initialize(VM.getVM().getTypeDataBase());
+-        }
+-      });
+-  }
+-
+-  private SPARCFrame frame;
+-
+-  private static void initialize(TypeDataBase db) {
+-    // Type type = db.lookupType("MethodHandles::RicochetFrame");
+-
+-  }
+-
+-  static SPARCRicochetFrame fromFrame(SPARCFrame f) {
+-    return new SPARCRicochetFrame(f);
+-  }
+-
+-  private SPARCRicochetFrame(SPARCFrame f) {
+-    frame = f;
+-  }
+-
+-  private Address registerValue(SPARCRegister reg) {
+-    return frame.getSP().addOffsetTo(reg.spOffsetInSavedWindow()).getAddressAt(0);
+-  }
+-
+-  public Address savedArgsBase() {
+-    return registerValue(SPARCRegisters.L4);
+-  }
+-  public Address exactSenderSP() {
+-    return registerValue(SPARCRegisters.I5);
+-  }
+-  public Address senderLink() {
+-    return frame.getSenderSP();
+-  }
+-  public Address senderPC() {
+-    return frame.getSenderPC();
+-  }
+-  public Address extendedSenderSP() {
+-    return savedArgsBase();
+-  }
+-}
+diff --git a/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java b/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java
+--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java
++++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java
+@@ -269,7 +269,6 @@
+ 
+     if (isEntryFrame())       return senderForEntryFrame(map);
+     if (isInterpretedFrame()) return senderForInterpreterFrame(map);
+-    if (isRicochetFrame())    return senderForRicochetFrame(map);
+ 
+     if(cb == null) {
+       cb = VM.getVM().getCodeCache().findBlob(getPC());
+@@ -288,16 +287,6 @@
+     return new X86Frame(getSenderSP(), getLink(), getSenderPC());
+   }
+ 
+-  private Frame senderForRicochetFrame(X86RegisterMap map) {
+-    if (DEBUG) {
+-      System.out.println("senderForRicochetFrame");
+-    }
+-    X86RicochetFrame f = X86RicochetFrame.fromFrame(this);
+-    if (map.getUpdateMap())
+-      updateMapWithSavedLink(map, f.senderLinkAddress());
+-    return new X86Frame(f.extendedSenderSP(), f.exactSenderSP(), f.senderLink(), f.senderPC());
+-  }
+-
+   private Frame senderForEntryFrame(X86RegisterMap map) {
+     if (DEBUG) {
+       System.out.println("senderForEntryFrame");
+diff --git a/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86RicochetFrame.java b/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86RicochetFrame.java
+deleted file mode 100644
+--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86RicochetFrame.java
++++ /dev/null
+@@ -1,81 +0,0 @@
+-/*
+- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+- *
+- * This code is free software; you can redistribute it and/or modify it
+- * under the terms of the GNU General Public License version 2 only, as
+- * published by the Free Software Foundation.
+- *
+- * This code is distributed in the hope that it will be useful, but WITHOUT
+- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+- * version 2 for more details (a copy is included in the LICENSE file that
+- * accompanied this code).
+- *
+- * You should have received a copy of the GNU General Public License version
+- * 2 along with this work; if not, write to the Free Software Foundation,
+- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+- *
+- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+- * or visit www.oracle.com if you need additional information or have any
+- * questions.
+- *
+- */
+-
+-package sun.jvm.hotspot.runtime.x86;
+-
+-import java.util.*;
+-import sun.jvm.hotspot.debugger.*;
+-import sun.jvm.hotspot.runtime.*;
+-import sun.jvm.hotspot.types.*;
+-
+-public class X86RicochetFrame extends VMObject {
+-  static {
+-    VM.registerVMInitializedObserver(new Observer() {
+-        public void update(Observable o, Object data) {
+-          initialize(VM.getVM().getTypeDataBase());
+-        }
+-      });
+-  }
+-
+-  private static void initialize(TypeDataBase db) {
+-    Type type = db.lookupType("MethodHandles::RicochetFrame");
+-
+-    senderLinkField    = type.getAddressField("_sender_link");
+-    savedArgsBaseField = type.getAddressField("_saved_args_base");
+-    exactSenderSPField = type.getAddressField("_exact_sender_sp");
+-    senderPCField      = type.getAddressField("_sender_pc");
+-  }
+-
+-  private static AddressField senderLinkField;
+-  private static AddressField savedArgsBaseField;
+-  private static AddressField exactSenderSPField;
+-  private static AddressField senderPCField;
+-
+-  static X86RicochetFrame fromFrame(X86Frame f) {
+-    return new X86RicochetFrame(f.getFP().addOffsetTo(- senderLinkField.getOffset()));
+-  }
+-
+-  private X86RicochetFrame(Address addr) {
+-    super(addr);
+-  }
+-
+-  public Address senderLink() {
+-    return senderLinkField.getValue(addr);
+-  }
+-  public Address senderLinkAddress() {
+-    return addr.addOffsetTo(senderLinkField.getOffset());
+-  }
+-  public Address savedArgsBase() {
+-    return savedArgsBaseField.getValue(addr);
+-  }
+-  public Address extendedSenderSP() {
+-    return savedArgsBase();
+-  }
+-  public Address exactSenderSP() {
+-    return exactSenderSPField.getValue(addr);
+-  }
+-  public Address senderPC() {
+-    return senderPCField.getValue(addr);
+-  }
+-}
+diff --git a/make/solaris/makefiles/fastdebug.make b/make/solaris/makefiles/fastdebug.make
+--- a/make/solaris/makefiles/fastdebug.make
++++ b/make/solaris/makefiles/fastdebug.make
+@@ -36,6 +36,14 @@
+ ifeq ("${Platform_compiler}", "sparcWorks")
+ OPT_CFLAGS/SLOWER = -xO2
+ 
++ifeq ($(COMPILER_REV_NUMERIC), 510)
++# CC 5.10 has bug XXXXX with -xO4
++OPT_CFLAGS/jvmtiClassFileReconstituter.o = $(OPT_CFLAGS/SLOWER)
++# jvm98 crashes on solaris-i586-fastdebug and solaris-sparc-fastdebug with stack overflow
++OPT_CFLAGS/escape.o = $(OPT_CFLAGS) -xspace
++OPT_CFLAGS/matcher.o = $(OPT_CFLAGS) -xspace
++endif # COMPILER_REV_NUMERIC == 510
++
+ ifeq ($(COMPILER_REV_NUMERIC), 509)
+ # To avoid jvm98 crash
+ OPT_CFLAGS/instanceKlass.o = $(OPT_CFLAGS/SLOWER)
+diff --git a/make/solaris/makefiles/optimized.make b/make/solaris/makefiles/optimized.make
+--- a/make/solaris/makefiles/optimized.make
++++ b/make/solaris/makefiles/optimized.make
+@@ -32,6 +32,11 @@
+ # (OPT_CFLAGS/SLOWER is also available, to alter compilation of buggy files)
+ ifeq ("${Platform_compiler}", "sparcWorks")
+ 
++ifeq ($(COMPILER_REV_NUMERIC), 510)
++# CC 5.10 has bug XXXXX with -xO4
++OPT_CFLAGS/jvmtiClassFileReconstituter.o = $(OPT_CFLAGS/O2)
++endif # COMPILER_REV_NUMERIC == 510
++
+ ifeq ($(shell expr $(COMPILER_REV_NUMERIC) \>= 509), 1)
+ # dtrace cannot handle tail call optimization (6672627, 6693876)
+ OPT_CFLAGS/jni.o = $(OPT_CFLAGS/DEFAULT) $(OPT_CCFLAGS/NO_TAIL_CALL_OPT)
+diff --git a/make/solaris/makefiles/product.make b/make/solaris/makefiles/product.make
+--- a/make/solaris/makefiles/product.make
++++ b/make/solaris/makefiles/product.make
+@@ -40,6 +40,11 @@
+ # (OPT_CFLAGS/SLOWER is also available, to alter compilation of buggy files)
+ ifeq ("${Platform_compiler}", "sparcWorks")
+ 
++ifeq ($(COMPILER_REV_NUMERIC), 510)
++# CC 5.10 has bug XXXXX with -xO4
++OPT_CFLAGS/jvmtiClassFileReconstituter.o = $(OPT_CFLAGS/O2)
++endif # COMPILER_REV_NUMERIC == 510
++
+ ifeq ($(shell expr $(COMPILER_REV_NUMERIC) \>= 509), 1)
+ # dtrace cannot handle tail call optimization (6672627, 6693876)
+ OPT_CFLAGS/jni.o = $(OPT_CFLAGS/DEFAULT) $(OPT_CCFLAGS/NO_TAIL_CALL_OPT)
+diff --git a/src/cpu/sparc/vm/assembler_sparc.cpp b/src/cpu/sparc/vm/assembler_sparc.cpp
+--- a/src/cpu/sparc/vm/assembler_sparc.cpp
++++ b/src/cpu/sparc/vm/assembler_sparc.cpp
+@@ -44,8 +44,10 @@
+ 
+ #ifdef PRODUCT
+ #define BLOCK_COMMENT(str) /* nothing */
++#define STOP(error) stop(error)
+ #else
+ #define BLOCK_COMMENT(str) block_comment(str)
++#define STOP(error) block_comment(error); stop(error)
+ #endif
+ 
+ // Convert the raw encoding form into the form expected by the
+@@ -992,7 +994,7 @@
+   save_frame(0);                // to avoid clobbering O0
+   ld_ptr(pc_addr, L0);
+   br_null_short(L0, Assembler::pt, PcOk);
+-  stop("last_Java_pc not zeroed before leaving Java");
++  STOP("last_Java_pc not zeroed before leaving Java");
+   bind(PcOk);
+ 
+   // Verify that flags was zeroed on return to Java
+@@ -1001,7 +1003,7 @@
+   tst(L0);
+   br(Assembler::zero, false, Assembler::pt, FlagsOk);
+   delayed() -> restore();
+-  stop("flags not zeroed before leaving Java");
++  STOP("flags not zeroed before leaving Java");
+   bind(FlagsOk);
+ #endif /* ASSERT */
+   //
+@@ -1021,7 +1023,7 @@
+   andcc(last_java_sp, 0x01, G0);
+   br(Assembler::notZero, false, Assembler::pt, StackOk);
+   delayed()->nop();
+-  stop("Stack Not Biased in set_last_Java_frame");
++  STOP("Stack Not Biased in set_last_Java_frame");
+   bind(StackOk);
+ #endif // ASSERT
+   assert( last_java_sp != G4_scratch, "bad register usage in set_last_Java_frame");
+@@ -1650,23 +1652,28 @@
+ 
+ 
+ void RegistersForDebugging::print(outputStream* s) {
++  FlagSetting fs(Debugging, true);
+   int j;
+-  for ( j = 0;  j < 8;  ++j )
+-    if ( j != 6 ) s->print_cr("i%d = 0x%.16lx", j, i[j]);
+-    else          s->print_cr( "fp = 0x%.16lx",    i[j]);
++  for (j = 0; j < 8; ++j) {
++    if (j != 6) { s->print("i%d = ", j); os::print_location(s, i[j]); }
++    else        { s->print( "fp = "   ); os::print_location(s, i[j]); }
++  }
+   s->cr();
+ 
+-  for ( j = 0;  j < 8;  ++j )
+-    s->print_cr("l%d = 0x%.16lx", j, l[j]);
++  for (j = 0;  j < 8;  ++j) {
++    s->print("l%d = ", j); os::print_location(s, l[j]);
++  }
+   s->cr();
+ 
+-  for ( j = 0;  j < 8;  ++j )
+-    if ( j != 6 ) s->print_cr("o%d = 0x%.16lx", j, o[j]);
+-    else          s->print_cr( "sp = 0x%.16lx",    o[j]);
++  for (j = 0; j < 8; ++j) {
++    if (j != 6) { s->print("o%d = ", j); os::print_location(s, o[j]); }
++    else        { s->print( "sp = "   ); os::print_location(s, o[j]); }
++  }
+   s->cr();
+ 
+-  for ( j = 0;  j < 8;  ++j )
+-    s->print_cr("g%d = 0x%.16lx", j, g[j]);
++  for (j = 0; j < 8; ++j) {
++    s->print("g%d = ", j); os::print_location(s, g[j]);
++  }
+   s->cr();
+ 
+   // print out floats with compression
+@@ -2020,8 +2027,8 @@
+   char* b = new char[1024];
+   sprintf(b, "untested: %s", what);
+ 
+-  if ( ShowMessageBoxOnError )   stop(b);
+-  else                           warn(b);
++  if (ShowMessageBoxOnError) { STOP(b); }
++  else                       { warn(b); }
+ }
+ 
+ 
+@@ -2998,26 +3005,60 @@
+ }
+ 
+ 
++// virtual method calling
++void MacroAssembler::lookup_virtual_method(Register recv_klass,
++                                           RegisterOrConstant vtable_index,
++                                           Register method_result) {
++  assert_different_registers(recv_klass, method_result, vtable_index.register_or_noreg());
++  Register sethi_temp = method_result;
++  const int base = (instanceKlass::vtable_start_offset() * wordSize +
++                    // method pointer offset within the vtable entry:
++                    vtableEntry::method_offset_in_bytes());
++  RegisterOrConstant vtable_offset = vtable_index;
++  // Each of the following three lines potentially generates an instruction.
++  // But the total number of address formation instructions will always be
++  // at most two, and will often be zero.  In any case, it will be optimal.
++  // If vtable_index is a register, we will have (sll_ptr N,x; inc_ptr B,x; ld_ptr k,x).
++  // If vtable_index is a constant, we will have at most (set B+X<<N,t; ld_ptr k,t).
++  vtable_offset = regcon_sll_ptr(vtable_index, exact_log2(vtableEntry::size() * wordSize), vtable_offset);
++  vtable_offset = regcon_inc_ptr(vtable_offset, base, vtable_offset, sethi_temp);
++  Address vtable_entry_addr(recv_klass, ensure_simm13_or_reg(vtable_offset, sethi_temp));
++  ld_ptr(vtable_entry_addr, method_result);
++}
++
++
+ void MacroAssembler::check_klass_subtype(Register sub_klass,
+                                          Register super_klass,
+                                          Register temp_reg,
+                                          Register temp2_reg,
+                                          Label& L_success) {
+-  Label L_failure, L_pop_to_failure;
+-  check_klass_subtype_fast_path(sub_klass, super_klass,
+-                                temp_reg, temp2_reg,
+-                                &L_success, &L_failure, NULL);
+   Register sub_2 = sub_klass;
+   Register sup_2 = super_klass;
+   if (!sub_2->is_global())  sub_2 = L0;
+   if (!sup_2->is_global())  sup_2 = L1;
+-
+-  save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2);
++  bool did_save = false;
++  if (temp_reg == noreg || temp2_reg == noreg) {
++    temp_reg = L2;
++    temp2_reg = L3;
++    save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2);
++    sub_klass = sub_2;
++    super_klass = sup_2;
++    did_save = true;
++  }
++  Label L_failure, L_pop_to_failure, L_pop_to_success;
++  check_klass_subtype_fast_path(sub_klass, super_klass,
++                                temp_reg, temp2_reg,
++                                (did_save ? &L_pop_to_success : &L_success),
++                                (did_save ? &L_pop_to_failure : &L_failure), NULL);
++
++  if (!did_save)
++    save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2);
+   check_klass_subtype_slow_path(sub_2, sup_2,
+                                 L2, L3, L4, L5,
+                                 NULL, &L_pop_to_failure);
+ 
+   // on success:
++  bind(L_pop_to_success);
+   restore();
+   ba_short(L_success);
+ 
+@@ -3234,54 +3275,6 @@
+ }
+ 
+ 
+-void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg,
+-                                              Register temp_reg,
+-                                              Label& wrong_method_type) {
+-  assert_different_registers(mtype_reg, mh_reg, temp_reg);
+-  // compare method type against that of the receiver
+-  RegisterOrConstant mhtype_offset = delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg);
+-  load_heap_oop(mh_reg, mhtype_offset, temp_reg);
+-  cmp_and_brx_short(temp_reg, mtype_reg, Assembler::notEqual, Assembler::pn, wrong_method_type);
+-}
+-
+-
+-// A method handle has a "vmslots" field which gives the size of its
+-// argument list in JVM stack slots.  This field is either located directly
+-// in every method handle, or else is indirectly accessed through the
+-// method handle's MethodType.  This macro hides the distinction.
+-void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
+-                                                Register temp_reg) {
+-  assert_different_registers(vmslots_reg, mh_reg, temp_reg);
+-  // load mh.type.form.vmslots
+-  Register temp2_reg = vmslots_reg;
+-  load_heap_oop(Address(mh_reg,    delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg)),      temp2_reg);
+-  load_heap_oop(Address(temp2_reg, delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, temp_reg)),        temp2_reg);
+-  ld(           Address(temp2_reg, delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, temp_reg)), vmslots_reg);
+-}
+-
+-
+-void MacroAssembler::jump_to_method_handle_entry(Register mh_reg, Register temp_reg, bool emit_delayed_nop) {
+-  assert(mh_reg == G3_method_handle, "caller must put MH object in G3");
+-  assert_different_registers(mh_reg, temp_reg);
+-
+-  // pick out the interpreted side of the handler
+-  // NOTE: vmentry is not an oop!
+-  ld_ptr(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes, temp_reg), temp_reg);
+-
+-  // off we go...
+-  ld_ptr(temp_reg, MethodHandleEntry::from_interpreted_entry_offset_in_bytes(), temp_reg);
+-  jmp(temp_reg, 0);
+-
+-  // for the various stubs which take control at this point,
+-  // see MethodHandles::generate_method_handle_stub
+-
+-  // Some callers can fill the delay slot.
+-  if (emit_delayed_nop) {
+-    delayed()->nop();
+-  }
+-}
+-
+-
+ RegisterOrConstant MacroAssembler::argument_offset(RegisterOrConstant arg_slot,
+                                                    Register temp_reg,
+                                                    int extra_slot_offset) {
+@@ -3914,7 +3907,7 @@
+     ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2);
+     or3(t1, t2, t3);
+     cmp_and_br_short(t1, t2, Assembler::greaterEqual, Assembler::pn, next);
+-    stop("assert(top >= start)");
++    STOP("assert(top >= start)");
+     should_not_reach_here();
+ 
+     bind(next);
+@@ -3922,13 +3915,13 @@
+     ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t2);
+     or3(t3, t2, t3);
+     cmp_and_br_short(t1, t2, Assembler::lessEqual, Assembler::pn, next2);
+-    stop("assert(top <= end)");
++    STOP("assert(top <= end)");
+     should_not_reach_here();
+ 
+     bind(next2);
+     and3(t3, MinObjAlignmentInBytesMask, t3);
+     cmp_and_br_short(t3, 0, Assembler::lessEqual, Assembler::pn, ok);
+-    stop("assert(aligned)");
++    STOP("assert(aligned)");
+     should_not_reach_here();
+ 
+     bind(ok);
+@@ -3976,7 +3969,7 @@
+       btst(MinObjAlignmentInBytesMask, obj);
+       br(Assembler::zero, false, Assembler::pt, L);
+       delayed()->nop();
+-      stop("eden top is not properly aligned");
++      STOP("eden top is not properly aligned");
+       bind(L);
+     }
+ #endif // ASSERT
+@@ -4013,7 +4006,7 @@
+       btst(MinObjAlignmentInBytesMask, top_addr);
+       br(Assembler::zero, false, Assembler::pt, L);
+       delayed()->nop();
+-      stop("eden top is not properly aligned");
++      STOP("eden top is not properly aligned");
+       bind(L);
+     }
+ #endif // ASSERT
+@@ -4066,7 +4059,7 @@
+     btst(MinObjAlignmentInBytesMask, free);
+     br(Assembler::zero, false, Assembler::pt, L);
+     delayed()->nop();
+-    stop("updated TLAB free is not properly aligned");
++    STOP("updated TLAB free is not properly aligned");
+     bind(L);
+   }
+ #endif // ASSERT
+@@ -4164,7 +4157,7 @@
+     ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t2);
+     sll_ptr(t2, LogHeapWordSize, t2);
+     cmp_and_br_short(t1, t2, Assembler::equal, Assembler::pt, ok);
+-    stop("assert(t1 == tlab_size)");
++    STOP("assert(t1 == tlab_size)");
+     should_not_reach_here();
+ 
+     bind(ok);
+diff --git a/src/cpu/sparc/vm/assembler_sparc.hpp b/src/cpu/sparc/vm/assembler_sparc.hpp
+--- a/src/cpu/sparc/vm/assembler_sparc.hpp
++++ b/src/cpu/sparc/vm/assembler_sparc.hpp
+@@ -2538,6 +2538,11 @@
+                                Register temp_reg, Register temp2_reg,
+                                Label& no_such_interface);
+ 
++  // virtual method calling
++  void lookup_virtual_method(Register recv_klass,
++                             RegisterOrConstant vtable_index,
++                             Register method_result);
++
+   // Test sub_klass against super_klass, with fast and slow paths.
+ 
+   // The fast path produces a tri-state answer: yes / no / maybe-slow.
+@@ -2577,12 +2582,6 @@
+                            Label& L_success);
+ 
+   // method handles (JSR 292)
+-  void check_method_handle_type(Register mtype_reg, Register mh_reg,
+-                                Register temp_reg,
+-                                Label& wrong_method_type);
+-  void load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
+-                                  Register temp_reg);
+-  void jump_to_method_handle_entry(Register mh_reg, Register temp_reg, bool emit_delayed_nop = true);
+   // offset relative to Gargs of argument at tos[arg_slot].
+   // (arg_slot == 0 means the last argument, not the first).
+   RegisterOrConstant argument_offset(RegisterOrConstant arg_slot,
+@@ -2590,7 +2589,7 @@
+                                      int extra_slot_offset = 0);
+   // Address of Gargs and argument_offset.
+   Address            argument_address(RegisterOrConstant arg_slot,
+-                                      Register temp_reg,
++                                      Register temp_reg = noreg,
+                                       int extra_slot_offset = 0);
+ 
+   // Stack overflow checking
+diff --git a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp
+--- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp
++++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp
+@@ -2956,6 +2956,7 @@
+ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
+   ciMethod* method = op->profiled_method();
+   int bci          = op->profiled_bci();
++  ciMethod* callee = op->profiled_callee();
+ 
+   // Update counter for all call types
+   ciMethodData* md = method->method_data_or_null();
+@@ -2984,9 +2985,11 @@
+ 
+   Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias);
+   Bytecodes::Code bc = method->java_code_at_bci(bci);
++  const bool callee_is_static = callee->is_loaded() && callee->is_static();
+   // Perform additional virtual call profiling for invokevirtual and
+   // invokeinterface bytecodes
+   if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
++      !callee_is_static &&  // required for optimized MH invokes
+       C1ProfileVirtualCalls) {
+     assert(op->recv()->is_single_cpu(), "recv must be allocated");
+     Register recv = op->recv()->as_register();
+diff --git a/src/cpu/sparc/vm/cppInterpreter_sparc.cpp b/src/cpu/sparc/vm/cppInterpreter_sparc.cpp
+--- a/src/cpu/sparc/vm/cppInterpreter_sparc.cpp
++++ b/src/cpu/sparc/vm/cppInterpreter_sparc.cpp
+@@ -515,9 +515,9 @@
+     // Need to differentiate between igetfield, agetfield, bgetfield etc.
+     // because they are different sizes.
+     // Get the type from the constant pool cache
+-    __ srl(G1_scratch, ConstantPoolCacheEntry::tosBits, G1_scratch);
+-    // Make sure we don't need to mask G1_scratch for tosBits after the above shift
+-    ConstantPoolCacheEntry::verify_tosBits();
++    __ srl(G1_scratch, ConstantPoolCacheEntry::tos_state_shift, G1_scratch);
++    // Make sure we don't need to mask G1_scratch after the above shift
++    ConstantPoolCacheEntry::verify_tos_state_shift();
+     __ cmp(G1_scratch, atos );
+     __ br(Assembler::equal, true, Assembler::pt, xreturn_path);
+     __ delayed()->ld_ptr(Otos_i, G3_scratch, Otos_i);
+diff --git a/src/cpu/sparc/vm/frame_sparc.cpp b/src/cpu/sparc/vm/frame_sparc.cpp
+--- a/src/cpu/sparc/vm/frame_sparc.cpp
++++ b/src/cpu/sparc/vm/frame_sparc.cpp
+@@ -514,7 +514,6 @@
+   // interpreted but its pc is in the code cache (for c1 -> osr_frame_return_id stub), so it must be
+   // explicitly recognized.
+ 
+-  if (is_ricochet_frame())    return sender_for_ricochet_frame(map);
+ 
+   bool frame_is_interpreted = is_interpreted_frame();
+   if (frame_is_interpreted) {
+@@ -821,9 +820,7 @@
+     values.describe(frame_no, sp() + w, err_msg("register save area word %d", w), 1);
+   }
+ 
+-  if (is_ricochet_frame()) {
+-    MethodHandles::RicochetFrame::describe(this, values, frame_no);
+-  } else if (is_interpreted_frame()) {
++  if (is_interpreted_frame()) {
+     DESCRIBE_FP_OFFSET(interpreter_frame_d_scratch_fp);
+     DESCRIBE_FP_OFFSET(interpreter_frame_l_scratch_fp);
+     DESCRIBE_FP_OFFSET(interpreter_frame_padding);
+diff --git a/src/cpu/sparc/vm/interp_masm_sparc.cpp b/src/cpu/sparc/vm/interp_masm_sparc.cpp
+--- a/src/cpu/sparc/vm/interp_masm_sparc.cpp
++++ b/src/cpu/sparc/vm/interp_masm_sparc.cpp
+@@ -505,7 +505,7 @@
+ void InterpreterMacroAssembler::load_receiver(Register param_count,
+                                               Register recv) {
+   sll(param_count, Interpreter::logStackElementSize, param_count);
+-  ld_ptr(Lesp, param_count, recv);                      // gets receiver Oop
++  ld_ptr(Lesp, param_count, recv);  // gets receiver oop
+ }
+ 
+ void InterpreterMacroAssembler::empty_expression_stack() {
+@@ -767,8 +767,12 @@
+   get_cache_and_index_at_bcp(cache, temp, bcp_offset, index_size);
+   ld_ptr(cache, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset(), bytecode);
+   const int shift_count = (1 + byte_no) * BitsPerByte;
+-  srl( bytecode, shift_count, bytecode);
+-  and3(bytecode,        0xFF, bytecode);
++  assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) ||
++         (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift),
++         "correct shift count");
++  srl(bytecode, shift_count, bytecode);
++  assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask");
++  and3(bytecode, ConstantPoolCacheEntry::bytecode_1_mask, bytecode);
+ }
+ 
+ 
+diff --git a/src/cpu/sparc/vm/interpreterGenerator_sparc.hpp b/src/cpu/sparc/vm/interpreterGenerator_sparc.hpp
+--- a/src/cpu/sparc/vm/interpreterGenerator_sparc.hpp
++++ b/src/cpu/sparc/vm/interpreterGenerator_sparc.hpp
+@@ -32,7 +32,6 @@
+   address generate_normal_entry(bool synchronized);
+   address generate_native_entry(bool synchronized);
+   address generate_abstract_entry(void);
+-  address generate_method_handle_entry(void);
+   address generate_math_entry(AbstractInterpreter::MethodKind kind);
+   address generate_empty_entry(void);
+   address generate_accessor_entry(void);
+diff --git a/src/cpu/sparc/vm/interpreter_sparc.cpp b/src/cpu/sparc/vm/interpreter_sparc.cpp
+--- a/src/cpu/sparc/vm/interpreter_sparc.cpp
++++ b/src/cpu/sparc/vm/interpreter_sparc.cpp
+@@ -255,17 +255,6 @@
+ }
+ 
+ 
+-// Method handle invoker
+-// Dispatch a method of the form java.lang.invoke.MethodHandles::invoke(...)
+-address InterpreterGenerator::generate_method_handle_entry(void) {
+-  if (!EnableInvokeDynamic) {
+-    return generate_abstract_entry();
+-  }
+-
+-  return MethodHandles::generate_method_handle_interpreter_entry(_masm);
+-}
+-
+-
+ //----------------------------------------------------------------------------------------------------
+ // Entry points & stack frame layout
+ //
+@@ -395,7 +384,7 @@
+     case Interpreter::empty                  : entry_point = ((InterpreterGenerator*)this)->generate_empty_entry();        break;
+     case Interpreter::accessor               : entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry();     break;
+     case Interpreter::abstract               : entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry();     break;
+-    case Interpreter::method_handle          : entry_point = ((InterpreterGenerator*)this)->generate_method_handle_entry(); break;
++
+     case Interpreter::java_lang_math_sin     :                                                                             break;
+     case Interpreter::java_lang_math_cos     :                                                                             break;
+     case Interpreter::java_lang_math_tan     :                                                                             break;
+@@ -407,7 +396,9 @@
+     case Interpreter::java_lang_math_exp     :                                                                             break;
+     case Interpreter::java_lang_ref_reference_get
+                                              : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
+-    default                                  : ShouldNotReachHere();                                                       break;
++    default:
++      fatal(err_msg("unexpected method kind: %d", kind));
++      break;
+   }
+ 
+   if (entry_point) return entry_point;
+diff --git a/src/cpu/sparc/vm/methodHandles_sparc.cpp b/src/cpu/sparc/vm/methodHandles_sparc.cpp
+--- a/src/cpu/sparc/vm/methodHandles_sparc.cpp
++++ b/src/cpu/sparc/vm/methodHandles_sparc.cpp
+@@ -31,452 +31,37 @@
+ 
+ #ifdef PRODUCT
+ #define BLOCK_COMMENT(str) /* nothing */
++#define STOP(error) stop(error)
+ #else
+ #define BLOCK_COMMENT(str) __ block_comment(str)
++#define STOP(error) block_comment(error); __ stop(error)
+ #endif
+ 
+ #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+ 
+-address MethodHandleEntry::start_compiled_entry(MacroAssembler* _masm,
+-                                                address interpreted_entry) {
+-  // Just before the actual machine code entry point, allocate space
+-  // for a MethodHandleEntry::Data record, so that we can manage everything
+-  // from one base pointer.
+-  __ align(wordSize);
+-  address target = __ pc() + sizeof(Data);
+-  while (__ pc() < target) {
+-    __ nop();
+-    __ align(wordSize);
+-  }
+-
+-  MethodHandleEntry* me = (MethodHandleEntry*) __ pc();
+-  me->set_end_address(__ pc());         // set a temporary end_address
+-  me->set_from_interpreted_entry(interpreted_entry);
+-  me->set_type_checking_entry(NULL);
+-
+-  return (address) me;
++// Workaround for C++ overloading nastiness on '0' for RegisterOrConstant.
++static RegisterOrConstant constant(int value) {
++  return RegisterOrConstant(value);
+ }
+ 
+-MethodHandleEntry* MethodHandleEntry::finish_compiled_entry(MacroAssembler* _masm,
+-                                                address start_addr) {
+-  MethodHandleEntry* me = (MethodHandleEntry*) start_addr;
+-  assert(me->end_address() == start_addr, "valid ME");
+-
+-  // Fill in the real end_address:
+-  __ align(wordSize);
+-  me->set_end_address(__ pc());
+-
+-  return me;
+-}
+-
+-// stack walking support
+-
+-frame MethodHandles::ricochet_frame_sender(const frame& fr, RegisterMap *map) {
+-  //RicochetFrame* f = RicochetFrame::from_frame(fr);
+-  // Cf. is_interpreted_frame path of frame::sender
+-  intptr_t* younger_sp = fr.sp();
+-  intptr_t* sp         = fr.sender_sp();
+-  map->make_integer_regs_unsaved();
+-  map->shift_window(sp, younger_sp);
+-  bool this_frame_adjusted_stack = true;  // I5_savedSP is live in this RF
+-  return frame(sp, younger_sp, this_frame_adjusted_stack);
+-}
+-
+-void MethodHandles::ricochet_frame_oops_do(const frame& fr, OopClosure* blk, const RegisterMap* reg_map) {
+-  ResourceMark rm;
+-  RicochetFrame* f = RicochetFrame::from_frame(fr);
+-
+-  // pick up the argument type descriptor:
+-  Thread* thread = Thread::current();
+-  Handle cookie(thread, f->compute_saved_args_layout(true, true));
+-
+-  // process fixed part
+-  blk->do_oop((oop*)f->saved_target_addr());
+-  blk->do_oop((oop*)f->saved_args_layout_addr());
+-
+-  // process variable arguments:
+-  if (cookie.is_null())  return;  // no arguments to describe
+-
+-  // the cookie is actually the invokeExact method for my target
+-  // his argument signature is what I'm interested in
+-  assert(cookie->is_method(), "");
+-  methodHandle invoker(thread, methodOop(cookie()));
+-  assert(invoker->name() == vmSymbols::invokeExact_name(), "must be this kind of method");
+-  assert(!invoker->is_static(), "must have MH argument");
+-  int slot_count = invoker->size_of_parameters();
+-  assert(slot_count >= 1, "must include 'this'");
+-  intptr_t* base = f->saved_args_base();
+-  intptr_t* retval = NULL;
+-  if (f->has_return_value_slot())
+-    retval = f->return_value_slot_addr();
+-  int slot_num = slot_count - 1;
+-  intptr_t* loc = &base[slot_num];
+-  //blk->do_oop((oop*) loc);   // original target, which is irrelevant
+-  int arg_num = 0;
+-  for (SignatureStream ss(invoker->signature()); !ss.is_done(); ss.next()) {
+-    if (ss.at_return_type())  continue;
+-    BasicType ptype = ss.type();
+-    if (ptype == T_ARRAY)  ptype = T_OBJECT; // fold all refs to T_OBJECT
+-    assert(ptype >= T_BOOLEAN && ptype <= T_OBJECT, "not array or void");
+-    slot_num -= type2size[ptype];
+-    loc = &base[slot_num];
+-    bool is_oop = (ptype == T_OBJECT && loc != retval);
+-    if (is_oop)  blk->do_oop((oop*)loc);
+-    arg_num += 1;
+-  }
+-  assert(slot_num == 0, "must have processed all the arguments");
+-}
+-
+-// Ricochet Frames
+-const Register MethodHandles::RicochetFrame::L1_continuation      = L1;
+-const Register MethodHandles::RicochetFrame::L2_saved_target      = L2;
+-const Register MethodHandles::RicochetFrame::L3_saved_args_layout = L3;
+-const Register MethodHandles::RicochetFrame::L4_saved_args_base   = L4; // cf. Gargs = G4
+-const Register MethodHandles::RicochetFrame::L5_conversion        = L5;
+-#ifdef ASSERT
+-const Register MethodHandles::RicochetFrame::L0_magic_number_1    = L0;
+-#endif //ASSERT
+-
+-oop MethodHandles::RicochetFrame::compute_saved_args_layout(bool read_cache, bool write_cache) {
+-  if (read_cache) {
+-    oop cookie = saved_args_layout();
+-    if (cookie != NULL)  return cookie;
+-  }
+-  oop target = saved_target();
+-  oop mtype  = java_lang_invoke_MethodHandle::type(target);
+-  oop mtform = java_lang_invoke_MethodType::form(mtype);
+-  oop cookie = java_lang_invoke_MethodTypeForm::vmlayout(mtform);
+-  if (write_cache)  {
+-    (*saved_args_layout_addr()) = cookie;
+-  }
+-  return cookie;
+-}
+-
+-void MethodHandles::RicochetFrame::generate_ricochet_blob(MacroAssembler* _masm,
+-                                                          // output params:
+-                                                          int* bounce_offset,
+-                                                          int* exception_offset,
+-                                                          int* frame_size_in_words) {
+-  (*frame_size_in_words) = RicochetFrame::frame_size_in_bytes() / wordSize;
+-
+-  address start = __ pc();
+-
+-#ifdef ASSERT
+-  __ illtrap(0); __ illtrap(0); __ illtrap(0);
+-  // here's a hint of something special:
+-  __ set(MAGIC_NUMBER_1, G0);
+-  __ set(MAGIC_NUMBER_2, G0);
+-#endif //ASSERT
+-  __ illtrap(0);  // not reached
+-
+-  // Return values are in registers.
+-  // L1_continuation contains a cleanup continuation we must return
+-  // to.
+-
+-  (*bounce_offset) = __ pc() - start;
+-  BLOCK_COMMENT("ricochet_blob.bounce");
+-
+-  if (VerifyMethodHandles)  RicochetFrame::verify_clean(_masm);
+-  trace_method_handle(_masm, "return/ricochet_blob.bounce");
+-
+-  __ JMP(L1_continuation, 0);
+-  __ delayed()->nop();
+-  __ illtrap(0);
+-
+-  DEBUG_ONLY(__ set(MAGIC_NUMBER_2, G0));
+-
+-  (*exception_offset) = __ pc() - start;
+-  BLOCK_COMMENT("ricochet_blob.exception");
+-
+-  // compare this to Interpreter::rethrow_exception_entry, which is parallel code
+-  // for example, see TemplateInterpreterGenerator::generate_throw_exception
+-  // Live registers in:
+-  //   Oexception  (O0): exception
+-  //   Oissuing_pc (O1): return address/pc that threw exception (ignored, always equal to bounce addr)
+-  __ verify_oop(Oexception);
+-
+-  // Take down the frame.
+-
+-  // Cf. InterpreterMacroAssembler::remove_activation.
+-  leave_ricochet_frame(_masm, /*recv_reg=*/ noreg, I5_savedSP, I7);
+-
+-  // We are done with this activation frame; find out where to go next.
+-  // The continuation point will be an exception handler, which expects
+-  // the following registers set up:
+-  //
+-  // Oexception: exception
+-  // Oissuing_pc: the local call that threw exception
+-  // Other On: garbage
+-  // In/Ln:  the contents of the caller's register window
+-  //
+-  // We do the required restore at the last possible moment, because we
+-  // need to preserve some state across a runtime call.
+-  // (Remember that the caller activation is unknown--it might not be
+-  // interpreted, so things like Lscratch are useless in the caller.)
+-  __ mov(Oexception,  Oexception ->after_save());  // get exception in I0 so it will be on O0 after restore
+-  __ add(I7, frame::pc_return_offset, Oissuing_pc->after_save());  // likewise set I1 to a value local to the caller
+-  __ call_VM_leaf(L7_thread_cache,
+-                  CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address),
+-                  G2_thread, Oissuing_pc->after_save());
+-
+-  // The caller's SP was adjusted upon method entry to accomodate
+-  // the callee's non-argument locals. Undo that adjustment.
+-  __ JMP(O0, 0);                         // return exception handler in caller
+-  __ delayed()->restore(I5_savedSP, G0, SP);
+-
+-  // (same old exception object is already in Oexception; see above)
+-  // Note that an "issuing PC" is actually the next PC after the call
+-}
+-
+-void MethodHandles::RicochetFrame::enter_ricochet_frame(MacroAssembler* _masm,
+-                                                        Register recv_reg,
+-                                                        Register argv_reg,
+-                                                        address return_handler) {
+-  // does not include the __ save()
+-  assert(argv_reg == Gargs, "");
+-  Address G3_mh_vmtarget(   recv_reg, java_lang_invoke_MethodHandle::vmtarget_offset_in_bytes());
+-  Address G3_amh_conversion(recv_reg, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes());
+-
+-  // Create the RicochetFrame.
+-  // Unlike on x86 we can store all required information in local
+-  // registers.
+-  BLOCK_COMMENT("push RicochetFrame {");
+-  __ set(ExternalAddress(return_handler),          L1_continuation);
+-  __ load_heap_oop(G3_mh_vmtarget,                 L2_saved_target);
+-  __ mov(G0,                                       L3_saved_args_layout);
+-  __ mov(Gargs,                                    L4_saved_args_base);
+-  __ lduw(G3_amh_conversion,                       L5_conversion);  // 32-bit field
+-  // I5, I6, I7 are already set up
+-  DEBUG_ONLY(__ set((int32_t) MAGIC_NUMBER_1,      L0_magic_number_1));
+-  BLOCK_COMMENT("} RicochetFrame");
+-}
+-
+-void MethodHandles::RicochetFrame::leave_ricochet_frame(MacroAssembler* _masm,
+-                                                        Register recv_reg,
+-                                                        Register new_sp_reg,
+-                                                        Register sender_pc_reg) {
+-  assert(new_sp_reg == I5_savedSP, "exact_sender_sp already in place");
+-  assert(sender_pc_reg == I7, "in a fixed place");
+-  // does not include the __ ret() & __ restore()
+-  assert_different_registers(recv_reg, new_sp_reg, sender_pc_reg);
+-  // Take down the frame.
+-  // Cf. InterpreterMacroAssembler::remove_activation.
+-  BLOCK_COMMENT("end_ricochet_frame {");
+-  if (recv_reg->is_valid())
+-    __ mov(L2_saved_target, recv_reg);
+-  BLOCK_COMMENT("} end_ricochet_frame");
+-}
+-
+-// Emit code to verify that FP is pointing at a valid ricochet frame.
+-#ifndef PRODUCT
+-enum {
+-  ARG_LIMIT = 255, SLOP = 45,
+-  // use this parameter for checking for garbage stack movements:
+-  UNREASONABLE_STACK_MOVE = (ARG_LIMIT + SLOP)
+-  // the slop defends against false alarms due to fencepost errors
+-};
+-#endif
+-
+-#ifdef ASSERT
+-void MethodHandles::RicochetFrame::verify_clean(MacroAssembler* _masm) {
+-  // The stack should look like this:
+-  //    ... keep1 | dest=42 | keep2 | magic | handler | magic | recursive args | [RF]
+-  // Check various invariants.
+-
+-  Register O7_temp = O7, O5_temp = O5;
+-
+-  Label L_ok_1, L_ok_2, L_ok_3, L_ok_4;
+-  BLOCK_COMMENT("verify_clean {");
+-  // Magic numbers must check out:
+-  __ set((int32_t) MAGIC_NUMBER_1, O7_temp);
+-  __ cmp_and_br_short(O7_temp, L0_magic_number_1, Assembler::equal, Assembler::pt, L_ok_1);
+-  __ stop("damaged ricochet frame: MAGIC_NUMBER_1 not found");
+-
+-  __ BIND(L_ok_1);
+-
+-  // Arguments pointer must look reasonable:
+-#ifdef _LP64
+-  Register FP_temp = O5_temp;
+-  __ add(FP, STACK_BIAS, FP_temp);
+-#else
+-  Register FP_temp = FP;
+-#endif
+-  __ cmp_and_brx_short(L4_saved_args_base, FP_temp, Assembler::greaterEqualUnsigned, Assembler::pt, L_ok_2);
+-  __ stop("damaged ricochet frame: L4 < FP");
+-
+-  __ BIND(L_ok_2);
+-  // Disable until we decide on it's fate
+-  // __ sub(L4_saved_args_base, UNREASONABLE_STACK_MOVE * Interpreter::stackElementSize, O7_temp);
+-  // __ cmp(O7_temp, FP_temp);
+-  // __ br(Assembler::lessEqualUnsigned, false, Assembler::pt, L_ok_3);
+-  // __ delayed()->nop();
+-  // __ stop("damaged ricochet frame: (L4 - UNREASONABLE_STACK_MOVE) > FP");
+-
+-  __ BIND(L_ok_3);
+-  extract_conversion_dest_type(_masm, L5_conversion, O7_temp);
+-  __ cmp_and_br_short(O7_temp, T_VOID, Assembler::equal, Assembler::pt, L_ok_4);
+-  extract_conversion_vminfo(_masm, L5_conversion, O5_temp);
+-  __ ld_ptr(L4_saved_args_base, __ argument_offset(O5_temp, O5_temp), O7_temp);
+-  assert(Assembler::is_simm13(RETURN_VALUE_PLACEHOLDER), "must be simm13");
+-  __ cmp_and_brx_short(O7_temp, (int32_t) RETURN_VALUE_PLACEHOLDER, Assembler::equal, Assembler::pt, L_ok_4);
+-  __ stop("damaged ricochet frame: RETURN_VALUE_PLACEHOLDER not found");
+-  __ BIND(L_ok_4);
+-  BLOCK_COMMENT("} verify_clean");
+-}
+-#endif //ASSERT
+-
+ void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg, Register temp_reg, Register temp2_reg) {
+   if (VerifyMethodHandles)
+     verify_klass(_masm, klass_reg, SystemDictionaryHandles::Class_klass(), temp_reg, temp2_reg,
+-                 "AMH argument is a Class");
++                 "MH argument is a Class");
+   __ load_heap_oop(Address(klass_reg, java_lang_Class::klass_offset_in_bytes()), klass_reg);
+ }
+ 
+-void MethodHandles::load_conversion_vminfo(MacroAssembler* _masm, Address conversion_field_addr, Register reg) {
+-  assert(CONV_VMINFO_SHIFT == 0, "preshifted");
+-  assert(CONV_VMINFO_MASK == right_n_bits(BitsPerByte), "else change type of following load");
+-  __ ldub(conversion_field_addr.plus_disp(BytesPerInt - 1), reg);
++#ifdef ASSERT
++static int check_nonzero(const char* xname, int x) {
++  assert(x != 0, err_msg("%s should be nonzero", xname));
++  return x;
+ }
+-
+-void MethodHandles::extract_conversion_vminfo(MacroAssembler* _masm, Register conversion_field_reg, Register reg) {
+-  assert(CONV_VMINFO_SHIFT == 0, "preshifted");
+-  __ and3(conversion_field_reg, CONV_VMINFO_MASK, reg);
+-}
+-
+-void MethodHandles::extract_conversion_dest_type(MacroAssembler* _masm, Register conversion_field_reg, Register reg) {
+-  __ srl(conversion_field_reg, CONV_DEST_TYPE_SHIFT, reg);
+-  __ and3(reg, 0x0F, reg);
+-}
+-
+-void MethodHandles::load_stack_move(MacroAssembler* _masm,
+-                                    Address G3_amh_conversion,
+-                                    Register stack_move_reg) {
+-  BLOCK_COMMENT("load_stack_move {");
+-  __ ldsw(G3_amh_conversion, stack_move_reg);
+-  __ sra(stack_move_reg, CONV_STACK_MOVE_SHIFT, stack_move_reg);
+-#ifdef ASSERT
+-  if (VerifyMethodHandles) {
+-    Label L_ok, L_bad;
+-    int32_t stack_move_limit = 0x0800;  // extra-large
+-    __ cmp_and_br_short(stack_move_reg, stack_move_limit, Assembler::greaterEqual, Assembler::pn, L_bad);
+-    __ cmp(stack_move_reg, -stack_move_limit);
+-    __ br(Assembler::greater, false, Assembler::pt, L_ok);
+-    __ delayed()->nop();
+-    __ BIND(L_bad);
+-    __ stop("load_stack_move of garbage value");
+-    __ BIND(L_ok);
+-  }
+-#endif
+-  BLOCK_COMMENT("} load_stack_move");
+-}
++#define NONZERO(x) check_nonzero(#x, x)
++#else //ASSERT
++#define NONZERO(x) (x)
++#endif //ASSERT
+ 
+ #ifdef ASSERT
+-void MethodHandles::RicochetFrame::verify() const {
+-  assert(magic_number_1() == MAGIC_NUMBER_1, "");
+-  if (!Universe::heap()->is_gc_active()) {
+-    if (saved_args_layout() != NULL) {
+-      assert(saved_args_layout()->is_method(), "must be valid oop");
+-    }
+-    if (saved_target() != NULL) {
+-      assert(java_lang_invoke_MethodHandle::is_instance(saved_target()), "checking frame value");
+-    }
+-  }
+-  int conv_op = adapter_conversion_op(conversion());
+-  assert(conv_op == java_lang_invoke_AdapterMethodHandle::OP_COLLECT_ARGS ||
+-         conv_op == java_lang_invoke_AdapterMethodHandle::OP_FOLD_ARGS ||
+-         conv_op == java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_REF,
+-         "must be a sane conversion");
+-  if (has_return_value_slot()) {
+-    assert(*return_value_slot_addr() == RETURN_VALUE_PLACEHOLDER, "");
+-  }
+-}
+-
+-void MethodHandles::verify_argslot(MacroAssembler* _masm, Register argslot_reg, Register temp_reg, const char* error_message) {
+-  // Verify that argslot lies within (Gargs, FP].
+-  Label L_ok, L_bad;
+-  BLOCK_COMMENT("verify_argslot {");
+-  __ cmp_and_brx_short(Gargs, argslot_reg, Assembler::greaterUnsigned, Assembler::pn, L_bad);
+-  __ add(FP, STACK_BIAS, temp_reg);  // STACK_BIAS is zero on !_LP64
+-  __ cmp_and_brx_short(argslot_reg, temp_reg, Assembler::lessEqualUnsigned, Assembler::pt, L_ok);
+-  __ BIND(L_bad);
+-  __ stop(error_message);
+-  __ BIND(L_ok);
+-  BLOCK_COMMENT("} verify_argslot");
+-}
+-
+-void MethodHandles::verify_argslots(MacroAssembler* _masm,
+-                                    RegisterOrConstant arg_slots,
+-                                    Register arg_slot_base_reg,
+-                                    Register temp_reg,
+-                                    Register temp2_reg,
+-                                    bool negate_argslots,
+-                                    const char* error_message) {
+-  // Verify that [argslot..argslot+size) lies within (Gargs, FP).
+-  Label L_ok, L_bad;
+-  BLOCK_COMMENT("verify_argslots {");
+-  if (negate_argslots) {
+-    if (arg_slots.is_constant()) {
+-      arg_slots = -1 * arg_slots.as_constant();
+-    } else {
+-      __ neg(arg_slots.as_register(), temp_reg);
+-      arg_slots = temp_reg;
+-    }
+-  }
+-  __ add(arg_slot_base_reg, __ argument_offset(arg_slots, temp_reg), temp_reg);
+-  __ add(FP, STACK_BIAS, temp2_reg);  // STACK_BIAS is zero on !_LP64
+-  __ cmp_and_brx_short(temp_reg, temp2_reg, Assembler::greaterUnsigned, Assembler::pn, L_bad);
+-  // Gargs points to the first word so adjust by BytesPerWord
+-  __ add(arg_slot_base_reg, BytesPerWord, temp_reg);
+-  __ cmp_and_brx_short(Gargs, temp_reg, Assembler::lessEqualUnsigned, Assembler::pt, L_ok);
+-  __ BIND(L_bad);
+-  __ stop(error_message);
+-  __ BIND(L_ok);
+-  BLOCK_COMMENT("} verify_argslots");
+-}
+-
+-// Make sure that arg_slots has the same sign as the given direction.
+-// If (and only if) arg_slots is a assembly-time constant, also allow it to be zero.
+-void MethodHandles::verify_stack_move(MacroAssembler* _masm,
+-                                      RegisterOrConstant arg_slots, int direction) {
+-  enum { UNREASONABLE_STACK_MOVE = 256 * 4 };  // limit of 255 arguments
+-  bool allow_zero = arg_slots.is_constant();
+-  if (direction == 0) { direction = +1; allow_zero = true; }
+-  assert(stack_move_unit() == -1, "else add extra checks here");
+-  if (arg_slots.is_register()) {
+-    Label L_ok, L_bad;
+-    BLOCK_COMMENT("verify_stack_move {");
+-    // __ btst(-stack_move_unit() - 1, arg_slots.as_register());  // no need
+-    // __ br(Assembler::notZero, false, Assembler::pn, L_bad);
+-    // __ delayed()->nop();
+-    __ cmp(arg_slots.as_register(), (int32_t) NULL_WORD);
+-    if (direction > 0) {
+-      __ br(allow_zero ? Assembler::less : Assembler::lessEqual, false, Assembler::pn, L_bad);
+-      __ delayed()->nop();
+-      __ cmp(arg_slots.as_register(), (int32_t) UNREASONABLE_STACK_MOVE);
+-      __ br(Assembler::less, false, Assembler::pn, L_ok);
+-      __ delayed()->nop();
+-    } else {
+-      __ br(allow_zero ? Assembler::greater : Assembler::greaterEqual, false, Assembler::pn, L_bad);
+-      __ delayed()->nop();
+-      __ cmp(arg_slots.as_register(), (int32_t) -UNREASONABLE_STACK_MOVE);
+-      __ br(Assembler::greater, false, Assembler::pn, L_ok);
+-      __ delayed()->nop();
+-    }
+-    __ BIND(L_bad);
+-    if (direction > 0)
+-      __ stop("assert arg_slots > 0");
+-    else
+-      __ stop("assert arg_slots < 0");
+-    __ BIND(L_ok);
+-    BLOCK_COMMENT("} verify_stack_move");
+-  } else {
+-    intptr_t size = arg_slots.as_constant();
+-    if (direction < 0)  size = -size;
+-    assert(size >= 0, "correct direction of constant move");
+-    assert(size < UNREASONABLE_STACK_MOVE, "reasonable size of constant move");
+-  }
+-}
+-
+ void MethodHandles::verify_klass(MacroAssembler* _masm,
+                                  Register obj_reg, KlassHandle klass,
+                                  Register temp_reg, Register temp2_reg,
+@@ -485,6 +70,14 @@
+   assert(klass_addr >= SystemDictionaryHandles::Object_klass().raw_value() &&
+          klass_addr <= SystemDictionaryHandles::Long_klass().raw_value(),
+          "must be one of the SystemDictionaryHandles");
++  bool did_save = false;
++  if (temp_reg == noreg || temp2_reg == noreg) {
++    temp_reg = L1;
++    temp2_reg = L2;
++    __ save_frame_and_mov(0, obj_reg, L0);
++    obj_reg = L0;
++    did_save = true;
++  }
+   Label L_ok, L_bad;
+   BLOCK_COMMENT("verify_klass {");
+   __ verify_oop(obj_reg);
+@@ -499,537 +92,415 @@
+   __ ld_ptr(Address(temp2_reg, 0), temp2_reg);
+   __ cmp_and_brx_short(temp_reg, temp2_reg, Assembler::equal, Assembler::pt, L_ok);
+   __ BIND(L_bad);
+-  __ stop(error_message);
++  if (did_save)  __ restore();
++  __ STOP(error_message);
+   __ BIND(L_ok);
++  if (did_save)  __ restore();
+   BLOCK_COMMENT("} verify_klass");
+ }
++
++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {
++  Label L;
++  BLOCK_COMMENT("verify_ref_kind {");
++  __ lduw(Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes())), temp);
++  __ srl( temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT, temp);
++  __ and3(temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK,  temp);
++  __ cmp_and_br_short(temp, ref_kind, Assembler::equal, Assembler::pt, L);
++  { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal);
++    jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind);
++    if (ref_kind == JVM_REF_invokeVirtual ||
++        ref_kind == JVM_REF_invokeSpecial)
++      // could do this for all ref_kinds, but would explode assembly code size
++      trace_method_handle(_masm, buf);
++    __ STOP(buf);
++  }
++  BLOCK_COMMENT("} verify_ref_kind");
++  __ bind(L);
++}
++
+ #endif // ASSERT
+ 
+-
+-void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register target, Register temp) {
++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register target, Register temp,
++                                            bool for_compiler_entry) {
+   assert(method == G5_method, "interpreter calling convention");
+   __ verify_oop(method);
+-  __ ld_ptr(G5_method, in_bytes(methodOopDesc::from_interpreted_offset()), target);
+-  if (JvmtiExport::can_post_interpreter_events()) {
++
++  if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
++    Label run_compiled_code;
+     // JVMTI events, such as single-stepping, are implemented partly by avoiding running
+     // compiled code in threads for which the event is enabled.  Check here for
+     // interp_only_mode if these events CAN be enabled.
+     __ verify_thread();
+-    Label skip_compiled_code;
+-
+     const Address interp_only(G2_thread, JavaThread::interp_only_mode_offset());
+     __ ld(interp_only, temp);
+-    __ tst(temp);
+-    __ br(Assembler::notZero, true, Assembler::pn, skip_compiled_code);
+-    __ delayed()->ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), target);
+-    __ bind(skip_compiled_code);
++    __ cmp_and_br_short(temp, 0, Assembler::zero, Assembler::pt, run_compiled_code);
++    __ ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), target);
++    __ jmp(target, 0);
++    __ delayed()->nop();
++    __ BIND(run_compiled_code);
++    // Note: we could fill some delay slots here, but
++    // it doesn't matter, since this is interpreter code.
+   }
++
++  const ByteSize entry_offset = for_compiler_entry ? methodOopDesc::from_compiled_offset() :
++                                                     methodOopDesc::from_interpreted_offset();
++  __ ld_ptr(G5_method, in_bytes(entry_offset), target);
+   __ jmp(target, 0);
+   __ delayed()->nop();
+ }
+ 
++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
++                                        Register recv, Register method_temp,
++                                        Register temp2, Register temp3,
++                                        bool for_compiler_entry) {
++  BLOCK_COMMENT("jump_to_lambda_form {");
++  // This is the initial entry point of a lazy method handle.
++  // After type checking, it picks up the invoker from the LambdaForm.
++  assert_different_registers(recv, method_temp, temp2, temp3);
++  assert(method_temp == G5_method, "required register for loading method");
++
++  //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); });
++
++  // Load the invoker, as MH -> MH.form -> LF.vmentry
++  __ verify_oop(recv);
++  __ load_heap_oop(Address(recv,        NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())),       method_temp);
++  __ verify_oop(method_temp);
++  __ load_heap_oop(Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), method_temp);
++  __ verify_oop(method_temp);
++  // the following assumes that a methodOop is normally compressed in the vmtarget field:
++  __ load_heap_oop(Address(method_temp, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes())),     method_temp);
++  __ verify_oop(method_temp);
++
++  if (VerifyMethodHandles && !for_compiler_entry) {
++    // make sure recv is already on stack
++    __ load_sized_value(Address(method_temp, methodOopDesc::size_of_parameters_offset()),
++                        temp2,
++                        sizeof(u2), /*is_signed*/ false);
++    // assert(sizeof(u2) == sizeof(methodOopDesc::_size_of_parameters), "");
++    Label L;
++    __ ld_ptr(__ argument_address(temp2, temp2, -1), temp2);
++    __ cmp_and_br_short(temp2, recv, Assembler::equal, Assembler::pt, L);
++    __ STOP("receiver not on stack");
++    __ BIND(L);
++  }
++
++  jump_from_method_handle(_masm, method_temp, temp2, temp3, for_compiler_entry);
++  BLOCK_COMMENT("} jump_to_lambda_form");
++}
++
+ 
+ // Code generation
+-address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm) {
+-  // I5_savedSP/O5_savedSP: sender SP (must preserve)
++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
++                                                                vmIntrinsics::ID iid) {
++  const bool not_for_compiler_entry = false;  // this is the interpreter entry
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  if (iid == vmIntrinsics::_invokeGeneric ||
++      iid == vmIntrinsics::_compiledLambdaForm) {
++    // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
++    // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
++    // They all allow an appendix argument.
++    __ should_not_reach_here();           // empty stubs make SG sick
++    return NULL;
++  }
++
++  // I5_savedSP/O5_savedSP: sender SP (must preserve; see prepare_to_jump_from_interpreted)
++  // G5_method:  methodOop
+   // G4 (Gargs): incoming argument list (must preserve)
+-  // G5_method:  invoke methodOop
+-  // G3_method_handle: receiver method handle (must load from sp[MethodTypeForm.vmslots])
+-  // O0, O1, O2, O3, O4: garbage temps, blown away
+-  Register O0_mtype   = O0;
+-  Register O1_scratch = O1;
+-  Register O2_scratch = O2;
+-  Register O3_scratch = O3;
+-  Register O4_argslot = O4;
+-  Register O4_argbase = O4;
++  // O0: used as temp to hold mh or receiver
++  // O1, O4: garbage temps, blown away
++  Register O1_scratch    = O1;
++  Register O4_param_size = O4;   // size of parameters
+ 
+-  // emit WrongMethodType path first, to enable back-branch from main path
+-  Label wrong_method_type;
+-  __ bind(wrong_method_type);
+-  Label invoke_generic_slow_path;
+-  assert(methodOopDesc::intrinsic_id_size_in_bytes() == sizeof(u1), "");;
+-  __ ldub(Address(G5_method, methodOopDesc::intrinsic_id_offset_in_bytes()), O1_scratch);
+-  __ cmp(O1_scratch, (int) vmIntrinsics::_invokeExact);
+-  __ brx(Assembler::notEqual, false, Assembler::pt, invoke_generic_slow_path);
+-  __ delayed()->nop();
+-  __ mov(O0_mtype, G5_method_type);  // required by throw_WrongMethodType
+-  __ mov(G3_method_handle, G3_method_handle);  // already in this register
+-  // O0 will be filled in with JavaThread in stub
+-  __ jump_to(AddressLiteral(StubRoutines::throw_WrongMethodTypeException_entry()), O3_scratch);
+-  __ delayed()->nop();
++  address code_start = __ pc();
+ 
+   // here's where control starts out:
+   __ align(CodeEntryAlignment);
+   address entry_point = __ pc();
+ 
+-  // fetch the MethodType from the method handle
+-  // FIXME: Interpreter should transmit pre-popped stack pointer, to locate base of arg list.
+-  // This would simplify several touchy bits of code.
+-  // See 6984712: JSR 292 method handle calls need a clean argument base pointer
+-  {
+-    Register tem = G5_method;
+-    for (jint* pchase = methodOopDesc::method_type_offsets_chain(); (*pchase) != -1; pchase++) {
+-      __ ld_ptr(Address(tem, *pchase), O0_mtype);
+-      tem = O0_mtype;          // in case there is another indirection
++  if (VerifyMethodHandles) {
++    Label L;
++    BLOCK_COMMENT("verify_intrinsic_id {");
++    __ ldub(Address(G5_method, methodOopDesc::intrinsic_id_offset_in_bytes()), O1_scratch);
++    __ cmp_and_br_short(O1_scratch, (int) iid, Assembler::equal, Assembler::pt, L);
++    if (iid == vmIntrinsics::_linkToVirtual ||
++        iid == vmIntrinsics::_linkToSpecial) {
++      // could do this for all kinds, but would explode assembly code size
++      trace_method_handle(_masm, "bad methodOop::intrinsic_id");
+     }
++    __ STOP("bad methodOop::intrinsic_id");
++    __ bind(L);
++    BLOCK_COMMENT("} verify_intrinsic_id");
+   }
+ 
+-  // given the MethodType, find out where the MH argument is buried
+-  __ load_heap_oop(Address(O0_mtype,   __ delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes,        O1_scratch)), O4_argslot);
+-  __ ldsw(         Address(O4_argslot, __ delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, O1_scratch)), O4_argslot);
+-  __ add(__ argument_address(O4_argslot, O4_argslot, 1), O4_argbase);
+-  // Note: argument_address uses its input as a scratch register!
+-  Address mh_receiver_slot_addr(O4_argbase, -Interpreter::stackElementSize);
+-  __ ld_ptr(mh_receiver_slot_addr, G3_method_handle);
++  // First task:  Find out how big the argument list is.
++  Address O4_first_arg_addr;
++  int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
++  assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
++  if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
++    __ load_sized_value(Address(G5_method, methodOopDesc::size_of_parameters_offset()),
++                        O4_param_size,
++                        sizeof(u2), /*is_signed*/ false);
++    // assert(sizeof(u2) == sizeof(methodOopDesc::_size_of_parameters), "");
++    O4_first_arg_addr = __ argument_address(O4_param_size, O4_param_size, -1);
++  } else {
++    DEBUG_ONLY(O4_param_size = noreg);
++  }
+ 
+-  trace_method_handle(_masm, "invokeExact");
++  Register O0_mh = noreg;
++  if (!is_signature_polymorphic_static(iid)) {
++    __ ld_ptr(O4_first_arg_addr, O0_mh = O0);
++    DEBUG_ONLY(O4_param_size = noreg);
++  }
+ 
+-  __ check_method_handle_type(O0_mtype, G3_method_handle, O1_scratch, wrong_method_type);
++  // O4_first_arg_addr is live!
+ 
+-  // Nobody uses the MH receiver slot after this.  Make sure.
+-  DEBUG_ONLY(__ set((int32_t) 0x999999, O1_scratch); __ st_ptr(O1_scratch, mh_receiver_slot_addr));
++  if (TraceMethodHandles) {
++    const char* name = vmIntrinsics::name_at(iid);
++    if (*name == '_')  name += 1;
++    const size_t len = strlen(name) + 50;
++    char* qname = NEW_C_HEAP_ARRAY(char, len, mtInternal);
++    const char* suffix = "";
++    if (vmIntrinsics::method_for(iid) == NULL ||
++        !vmIntrinsics::method_for(iid)->access_flags().is_public()) {
++      if (is_signature_polymorphic_static(iid))
++        suffix = "/static";
++      else
++        suffix = "/private";
++    }
++    jio_snprintf(qname, len, "MethodHandle::interpreter_entry::%s%s", name, suffix);
++    if (O0_mh != noreg)
++      __ mov(O0_mh, G3_method_handle);  // make stub happy
++    trace_method_handle(_masm, qname);
++  }
+ 
+-  __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
++  if (iid == vmIntrinsics::_invokeBasic) {
++    generate_method_handle_dispatch(_masm, iid, O0_mh, noreg, not_for_compiler_entry);
+ 
+-  // for invokeGeneric (only), apply argument and result conversions on the fly
+-  __ bind(invoke_generic_slow_path);
+-#ifdef ASSERT
+-  if (VerifyMethodHandles) {
+-    Label L;
+-    __ ldub(Address(G5_method, methodOopDesc::intrinsic_id_offset_in_bytes()), O1_scratch);
+-    __ cmp(O1_scratch, (int) vmIntrinsics::_invokeGeneric);
+-    __ brx(Assembler::equal, false, Assembler::pt, L);
+-    __ delayed()->nop();
+-    __ stop("bad methodOop::intrinsic_id");
+-    __ bind(L);
++  } else {
++    // Adjust argument list by popping the trailing MemberName argument.
++    Register O0_recv = noreg;
++    if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
++      // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
++      __ ld_ptr(O4_first_arg_addr, O0_recv = O0);
++      DEBUG_ONLY(O4_param_size = noreg);
++    }
++    Register G5_member = G5_method;  // MemberName ptr; incoming method ptr is dead now
++    __ ld_ptr(__ argument_address(constant(0)), G5_member);
++    __ add(Gargs, Interpreter::stackElementSize, Gargs);
++    generate_method_handle_dispatch(_masm, iid, O0_recv, G5_member, not_for_compiler_entry);
+   }
+-#endif //ASSERT
+ 
+-  // make room on the stack for another pointer:
+-  insert_arg_slots(_masm, 2 * stack_move_unit(), O4_argbase, O1_scratch, O2_scratch, O3_scratch);
+-  // load up an adapter from the calling type (Java weaves this)
+-  Register O2_form    = O2_scratch;
+-  Register O3_adapter = O3_scratch;
+-  __ load_heap_oop(Address(O0_mtype, __ delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes,               O1_scratch)), O2_form);
+-  __ load_heap_oop(Address(O2_form,  __ delayed_value(java_lang_invoke_MethodTypeForm::genericInvoker_offset_in_bytes, O1_scratch)), O3_adapter);
+-  __ verify_oop(O3_adapter);
+-  __ st_ptr(O3_adapter, Address(O4_argbase, 1 * Interpreter::stackElementSize));
+-  // As a trusted first argument, pass the type being called, so the adapter knows
+-  // the actual types of the arguments and return values.
+-  // (Generic invokers are shared among form-families of method-type.)
+-  __ st_ptr(O0_mtype,   Address(O4_argbase, 0 * Interpreter::stackElementSize));
+-  // FIXME: assert that O3_adapter is of the right method-type.
+-  __ mov(O3_adapter, G3_method_handle);
+-  trace_method_handle(_masm, "invokeGeneric");
+-  __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
++  if (PrintMethodHandleStubs) {
++    address code_end = __ pc();
++    tty->print_cr("--------");
++    tty->print_cr("method handle interpreter entry for %s", vmIntrinsics::name_at(iid));
++    Disassembler::decode(code_start, code_end);
++    tty->cr();
++  }
+ 
+   return entry_point;
+ }
+ 
+-// Workaround for C++ overloading nastiness on '0' for RegisterOrConstant.
+-static RegisterOrConstant constant(int value) {
+-  return RegisterOrConstant(value);
+-}
++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
++                                                    vmIntrinsics::ID iid,
++                                                    Register receiver_reg,
++                                                    Register member_reg,
++                                                    bool for_compiler_entry) {
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  // temps used in this code are not used in *either* compiled or interpreted calling sequences
++  Register temp1 = (for_compiler_entry ? G1_scratch : O1);
++  Register temp2 = (for_compiler_entry ? G4_scratch : O4);
++  Register temp3 = G3_scratch;
++  Register temp4 = (for_compiler_entry ? noreg      : O2);
++  if (for_compiler_entry) {
++    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : O0), "only valid assignment");
++    assert_different_registers(temp1,      O0, O1, O2, O3, O4, O5);
++    assert_different_registers(temp2,      O0, O1, O2, O3, O4, O5);
++    assert_different_registers(temp3,      O0, O1, O2, O3, O4, O5);
++    assert_different_registers(temp4,      O0, O1, O2, O3, O4, O5);
++  }
++  if (receiver_reg != noreg)  assert_different_registers(temp1, temp2, temp3, temp4, receiver_reg);
++  if (member_reg   != noreg)  assert_different_registers(temp1, temp2, temp3, temp4, member_reg);
++  if (!for_compiler_entry)    assert_different_registers(temp1, temp2, temp3, temp4, O5_savedSP);  // don't trash lastSP
+ 
+-static void load_vmargslot(MacroAssembler* _masm, Address vmargslot_addr, Register result) {
+-  __ ldsw(vmargslot_addr, result);
+-}
++  if (iid == vmIntrinsics::_invokeBasic) {
++    // indirect through MH.form.vmentry.vmtarget
++    jump_to_lambda_form(_masm, receiver_reg, G5_method, temp2, temp3, for_compiler_entry);
+ 
+-static RegisterOrConstant adjust_SP_and_Gargs_down_by_slots(MacroAssembler* _masm,
+-                                                            RegisterOrConstant arg_slots,
+-                                                            Register temp_reg, Register temp2_reg) {
+-  // Keep the stack pointer 2*wordSize aligned.
+-  const int TwoWordAlignmentMask = right_n_bits(LogBytesPerWord + 1);
+-  if (arg_slots.is_constant()) {
+-    const int        offset = arg_slots.as_constant() << LogBytesPerWord;
+-    const int masked_offset = round_to(offset, 2 * BytesPerWord);
+-    const int masked_offset2 = (offset + 1*BytesPerWord) & ~TwoWordAlignmentMask;
+-    assert(masked_offset == masked_offset2, "must agree");
+-    __ sub(Gargs,        offset, Gargs);
+-    __ sub(SP,    masked_offset, SP   );
+-    return offset;
+   } else {
+-#ifdef ASSERT
++    // The method is a member invoker used by direct method handles.
++    if (VerifyMethodHandles) {
++      // make sure the trailing argument really is a MemberName (caller responsibility)
++      verify_klass(_masm, member_reg, SystemDictionaryHandles::MemberName_klass(),
++                   temp1, temp2,
++                   "MemberName required for invokeVirtual etc.");
++    }
++
++    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()));
++    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()));
++    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()));
++
++    Register temp1_recv_klass = temp1;
++    if (iid != vmIntrinsics::_linkToStatic) {
++      __ verify_oop(receiver_reg);
++      if (iid == vmIntrinsics::_linkToSpecial) {
++        // Don't actually load the klass; just null-check the receiver.
++        __ null_check(receiver_reg);
++      } else {
++        // load receiver klass itself
++        __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
++        __ load_klass(receiver_reg, temp1_recv_klass);
++        __ verify_oop(temp1_recv_klass);
++      }
++      BLOCK_COMMENT("check_receiver {");
++      // The receiver for the MemberName must be in receiver_reg.
++      // Check the receiver against the MemberName.clazz
++      if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
++        // Did not load it above...
++        __ load_klass(receiver_reg, temp1_recv_klass);
++        __ verify_oop(temp1_recv_klass);
++      }
++      if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
++        Label L_ok;
++        Register temp2_defc = temp2;
++        __ load_heap_oop(member_clazz, temp2_defc);
++        load_klass_from_Class(_masm, temp2_defc, temp3, temp4);
++        __ verify_oop(temp2_defc);
++        __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, temp4, L_ok);
++        // If we get here, the type check failed!
++        __ STOP("receiver class disagrees with MemberName.clazz");
++        __ bind(L_ok);
++      }
++      BLOCK_COMMENT("} check_receiver");
++    }
++    if (iid == vmIntrinsics::_linkToSpecial ||
++        iid == vmIntrinsics::_linkToStatic) {
++      DEBUG_ONLY(temp1_recv_klass = noreg);  // these guys didn't load the recv_klass
++    }
++
++    // Live registers at this point:
++    //  member_reg - MemberName that was the trailing argument
++    //  temp1_recv_klass - klass of stacked receiver, if needed
++    //  O5_savedSP - interpreter linkage (if interpreted)
++    //  O0..O7,G1,G4 - compiler arguments (if compiled)
++
++    bool method_is_live = false;
++    switch (iid) {
++    case vmIntrinsics::_linkToSpecial:
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
++      }
++      __ load_heap_oop(member_vmtarget, G5_method);
++      method_is_live = true;
++      break;
++
++    case vmIntrinsics::_linkToStatic:
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
++      }
++      __ load_heap_oop(member_vmtarget, G5_method);
++      method_is_live = true;
++      break;
++
++    case vmIntrinsics::_linkToVirtual:
+     {
+-      Label L_ok;
+-      __ cmp_and_br_short(arg_slots.as_register(), 0, Assembler::greaterEqual, Assembler::pt, L_ok);
+-      __ stop("negative arg_slots");
+-      __ bind(L_ok);
++      // same as TemplateTable::invokevirtual,
++      // minus the CP setup and profiling:
++
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
++      }
++
++      // pick out the vtable index from the MemberName, and then we can discard it:
++      Register temp2_index = temp2;
++      __ ld_ptr(member_vmindex, temp2_index);
++
++      if (VerifyMethodHandles) {
++        Label L_index_ok;
++        __ cmp_and_br_short(temp2_index, (int) 0, Assembler::greaterEqual, Assembler::pn, L_index_ok);
++        __ STOP("no virtual index");
++        __ BIND(L_index_ok);
++      }
++
++      // Note:  The verifier invariants allow us to ignore MemberName.clazz and vmtarget
++      // at this point.  And VerifyMethodHandles has already checked clazz, if needed.
++
++      // get target methodOop & entry point
++      __ lookup_virtual_method(temp1_recv_klass, temp2_index, G5_method);
++      method_is_live = true;
++      break;
+     }
+-#endif
+-    __ sll_ptr(arg_slots.as_register(), LogBytesPerWord, temp_reg);
+-    __ add( temp_reg,  1*BytesPerWord,       temp2_reg);
+-    __ andn(temp2_reg, TwoWordAlignmentMask, temp2_reg);
+-    __ sub(Gargs, temp_reg,  Gargs);
+-    __ sub(SP,    temp2_reg, SP   );
+-    return temp_reg;
++
++    case vmIntrinsics::_linkToInterface:
++    {
++      // same as TemplateTable::invokeinterface
++      // (minus the CP setup and profiling, with different argument motion)
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
++      }
++
++      Register temp3_intf = temp3;
++      __ load_heap_oop(member_clazz, temp3_intf);
++      load_klass_from_Class(_masm, temp3_intf, temp2, temp4);
++      __ verify_oop(temp3_intf);
++
++      Register G5_index = G5_method;
++      __ ld_ptr(member_vmindex, G5_index);
++      if (VerifyMethodHandles) {
++        Label L;
++        __ cmp_and_br_short(G5_index, 0, Assembler::greaterEqual, Assembler::pt, L);
++        __ STOP("invalid vtable index for MH.invokeInterface");
++        __ bind(L);
++      }
++
++      // given intf, index, and recv klass, dispatch to the implementation method
++      Label L_no_such_interface;
++      Register no_sethi_temp = noreg;
++      __ lookup_interface_method(temp1_recv_klass, temp3_intf,
++                                 // note: next two args must be the same:
++                                 G5_index, G5_method,
++                                 temp2, no_sethi_temp,
++                                 L_no_such_interface);
++
++      __ verify_oop(G5_method);
++      jump_from_method_handle(_masm, G5_method, temp2, temp3, for_compiler_entry);
++
++      __ bind(L_no_such_interface);
++      AddressLiteral icce(StubRoutines::throw_IncompatibleClassChangeError_entry());
++      __ jump_to(icce, temp3);
++      __ delayed()->nop();
++      break;
++    }
++
++    default:
++      fatal(err_msg("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)));
++      break;
++    }
++
++    if (method_is_live) {
++      // live at this point:  G5_method, O5_savedSP (if interpreted)
++
++      // After figuring out which concrete method to call, jump into it.
++      // Note that this works in the interpreter with no data motion.
++      // But the compiled version will require that rcx_recv be shifted out.
++      __ verify_oop(G5_method);
++      jump_from_method_handle(_masm, G5_method, temp1, temp3, for_compiler_entry);
++    }
+   }
+ }
+ 
+-static RegisterOrConstant adjust_SP_and_Gargs_up_by_slots(MacroAssembler* _masm,
+-                                                          RegisterOrConstant arg_slots,
+-                                                          Register temp_reg, Register temp2_reg) {
+-  // Keep the stack pointer 2*wordSize aligned.
+-  const int TwoWordAlignmentMask = right_n_bits(LogBytesPerWord + 1);
+-  if (arg_slots.is_constant()) {
+-    const int        offset = arg_slots.as_constant() << LogBytesPerWord;
+-    const int masked_offset = offset & ~TwoWordAlignmentMask;
+-    __ add(Gargs,        offset, Gargs);
+-    __ add(SP,    masked_offset, SP   );
+-    return offset;
+-  } else {
+-    __ sll_ptr(arg_slots.as_register(), LogBytesPerWord, temp_reg);
+-    __ andn(temp_reg, TwoWordAlignmentMask, temp2_reg);
+-    __ add(Gargs, temp_reg,  Gargs);
+-    __ add(SP,    temp2_reg, SP   );
+-    return temp_reg;
+-  }
+-}
+-
+-// Helper to insert argument slots into the stack.
+-// arg_slots must be a multiple of stack_move_unit() and < 0
+-// argslot_reg is decremented to point to the new (shifted) location of the argslot
+-// But, temp_reg ends up holding the original value of argslot_reg.
+-void MethodHandles::insert_arg_slots(MacroAssembler* _masm,
+-                                     RegisterOrConstant arg_slots,
+-                                     Register argslot_reg,
+-                                     Register temp_reg, Register temp2_reg, Register temp3_reg) {
+-  // allow constant zero
+-  if (arg_slots.is_constant() && arg_slots.as_constant() == 0)
+-    return;
+-
+-  assert_different_registers(argslot_reg, temp_reg, temp2_reg, temp3_reg,
+-                             (!arg_slots.is_register() ? Gargs : arg_slots.as_register()));
+-
+-  BLOCK_COMMENT("insert_arg_slots {");
+-  if (VerifyMethodHandles)
+-    verify_argslot(_masm, argslot_reg, temp_reg, "insertion point must fall within current frame");
+-  if (VerifyMethodHandles)
+-    verify_stack_move(_masm, arg_slots, -1);
+-
+-  // Make space on the stack for the inserted argument(s).
+-  // Then pull down everything shallower than argslot_reg.
+-  // The stacked return address gets pulled down with everything else.
+-  // That is, copy [sp, argslot) downward by -size words.  In pseudo-code:
+-  //   sp -= size;
+-  //   for (temp = sp + size; temp < argslot; temp++)
+-  //     temp[-size] = temp[0]
+-  //   argslot -= size;
+-
+-  // offset is temp3_reg in case of arg_slots being a register.
+-  RegisterOrConstant offset = adjust_SP_and_Gargs_up_by_slots(_masm, arg_slots, temp3_reg, temp_reg);
+-  __ sub(Gargs, offset, temp_reg);  // source pointer for copy
+-
+-  {
+-    Label loop;
+-    __ BIND(loop);
+-    // pull one word down each time through the loop
+-    __ ld_ptr(           Address(temp_reg, 0     ), temp2_reg);
+-    __ st_ptr(temp2_reg, Address(temp_reg, offset)           );
+-    __ add(temp_reg, wordSize, temp_reg);
+-    __ cmp_and_brx_short(temp_reg, argslot_reg, Assembler::lessUnsigned, Assembler::pt, loop);
+-  }
+-
+-  // Now move the argslot down, to point to the opened-up space.
+-  __ add(argslot_reg, offset, argslot_reg);
+-  BLOCK_COMMENT("} insert_arg_slots");
+-}
+-
+-
+-// Helper to remove argument slots from the stack.
+-// arg_slots must be a multiple of stack_move_unit() and > 0
+-void MethodHandles::remove_arg_slots(MacroAssembler* _masm,
+-                                     RegisterOrConstant arg_slots,
+-                                     Register argslot_reg,
+-                                     Register temp_reg, Register temp2_reg, Register temp3_reg) {
+-  // allow constant zero
+-  if (arg_slots.is_constant() && arg_slots.as_constant() == 0)
+-    return;
+-  assert_different_registers(argslot_reg, temp_reg, temp2_reg, temp3_reg,
+-                             (!arg_slots.is_register() ? Gargs : arg_slots.as_register()));
+-
+-  BLOCK_COMMENT("remove_arg_slots {");
+-  if (VerifyMethodHandles)
+-    verify_argslots(_masm, arg_slots, argslot_reg, temp_reg, temp2_reg, false,
+-                    "deleted argument(s) must fall within current frame");
+-  if (VerifyMethodHandles)
+-    verify_stack_move(_masm, arg_slots, +1);
+-
+-  // Pull up everything shallower than argslot.
+-  // Then remove the excess space on the stack.
+-  // The stacked return address gets pulled up with everything else.
+-  // That is, copy [sp, argslot) upward by size words.  In pseudo-code:
+-  //   for (temp = argslot-1; temp >= sp; --temp)
+-  //     temp[size] = temp[0]
+-  //   argslot += size;
+-  //   sp += size;
+-
+-  RegisterOrConstant offset = __ regcon_sll_ptr(arg_slots, LogBytesPerWord, temp3_reg);
+-  __ sub(argslot_reg, wordSize, temp_reg);  // source pointer for copy
+-
+-  {
+-    Label L_loop;
+-    __ BIND(L_loop);
+-    // pull one word up each time through the loop
+-    __ ld_ptr(           Address(temp_reg, 0     ), temp2_reg);
+-    __ st_ptr(temp2_reg, Address(temp_reg, offset)           );
+-    __ sub(temp_reg, wordSize, temp_reg);
+-    __ cmp_and_brx_short(temp_reg, Gargs, Assembler::greaterEqualUnsigned, Assembler::pt, L_loop);
+-  }
+-
+-  // And adjust the argslot address to point at the deletion point.
+-  __ add(argslot_reg, offset, argslot_reg);
+-
+-  // We don't need the offset at this point anymore, just adjust SP and Gargs.
+-  (void) adjust_SP_and_Gargs_up_by_slots(_masm, arg_slots, temp3_reg, temp_reg);
+-
+-  BLOCK_COMMENT("} remove_arg_slots");
+-}
+-
+-// Helper to copy argument slots to the top of the stack.
+-// The sequence starts with argslot_reg and is counted by slot_count
+-// slot_count must be a multiple of stack_move_unit() and >= 0
+-// This function blows the temps but does not change argslot_reg.
+-void MethodHandles::push_arg_slots(MacroAssembler* _masm,
+-                                   Register argslot_reg,
+-                                   RegisterOrConstant slot_count,
+-                                   Register temp_reg, Register temp2_reg) {
+-  // allow constant zero
+-  if (slot_count.is_constant() && slot_count.as_constant() == 0)
+-    return;
+-  assert_different_registers(argslot_reg, temp_reg, temp2_reg,
+-                             (!slot_count.is_register() ? Gargs : slot_count.as_register()),
+-                             SP);
+-  assert(Interpreter::stackElementSize == wordSize, "else change this code");
+-
+-  BLOCK_COMMENT("push_arg_slots {");
+-  if (VerifyMethodHandles)
+-    verify_stack_move(_masm, slot_count, 0);
+-
+-  RegisterOrConstant offset = adjust_SP_and_Gargs_down_by_slots(_masm, slot_count, temp2_reg, temp_reg);
+-
+-  if (slot_count.is_constant()) {
+-    for (int i = slot_count.as_constant() - 1; i >= 0; i--) {
+-      __ ld_ptr(          Address(argslot_reg, i * wordSize), temp_reg);
+-      __ st_ptr(temp_reg, Address(Gargs,       i * wordSize));
+-    }
+-  } else {
+-    Label L_plural, L_loop, L_break;
+-    // Emit code to dynamically check for the common cases, zero and one slot.
+-    __ cmp(slot_count.as_register(), (int32_t) 1);
+-    __ br(Assembler::greater, false, Assembler::pn, L_plural);
+-    __ delayed()->nop();
+-    __ br(Assembler::less, false, Assembler::pn, L_break);
+-    __ delayed()->nop();
+-    __ ld_ptr(          Address(argslot_reg, 0), temp_reg);
+-    __ st_ptr(temp_reg, Address(Gargs,       0));
+-    __ ba_short(L_break);
+-    __ BIND(L_plural);
+-
+-    // Loop for 2 or more:
+-    //   top = &argslot[slot_count]
+-    //   while (top > argslot)  *(--Gargs) = *(--top)
+-    Register top_reg = temp_reg;
+-    __ add(argslot_reg, offset, top_reg);
+-    __ add(Gargs,       offset, Gargs  );  // move back up again so we can go down
+-    __ BIND(L_loop);
+-    __ sub(top_reg, wordSize, top_reg);
+-    __ sub(Gargs,   wordSize, Gargs  );
+-    __ ld_ptr(           Address(top_reg, 0), temp2_reg);
+-    __ st_ptr(temp2_reg, Address(Gargs,   0));
+-    __ cmp_and_brx_short(top_reg, argslot_reg, Assembler::greaterUnsigned, Assembler::pt, L_loop);
+-    __ BIND(L_break);
+-  }
+-  BLOCK_COMMENT("} push_arg_slots");
+-}
+-
+-// in-place movement; no change to Gargs
+-// blows temp_reg, temp2_reg
+-void MethodHandles::move_arg_slots_up(MacroAssembler* _masm,
+-                                      Register bottom_reg,  // invariant
+-                                      Address  top_addr,    // can use temp_reg
+-                                      RegisterOrConstant positive_distance_in_slots,  // destroyed if register
+-                                      Register temp_reg, Register temp2_reg) {
+-  assert_different_registers(bottom_reg,
+-                             temp_reg, temp2_reg,
+-                             positive_distance_in_slots.register_or_noreg());
+-  BLOCK_COMMENT("move_arg_slots_up {");
+-  Label L_loop, L_break;
+-  Register top_reg = temp_reg;
+-  if (!top_addr.is_same_address(Address(top_reg, 0))) {
+-    __ add(top_addr, top_reg);
+-  }
+-  // Detect empty (or broken) loop:
+-#ifdef ASSERT
+-  if (VerifyMethodHandles) {
+-    // Verify that &bottom < &top (non-empty interval)
+-    Label L_ok, L_bad;
+-    if (positive_distance_in_slots.is_register()) {
+-      __ cmp(positive_distance_in_slots.as_register(), (int32_t) 0);
+-      __ br(Assembler::lessEqual, false, Assembler::pn, L_bad);
+-      __ delayed()->nop();
+-    }
+-    __ cmp_and_brx_short(bottom_reg, top_reg, Assembler::lessUnsigned, Assembler::pt, L_ok);
+-    __ BIND(L_bad);
+-    __ stop("valid bounds (copy up)");
+-    __ BIND(L_ok);
+-  }
+-#endif
+-  __ cmp_and_brx_short(bottom_reg, top_reg, Assembler::greaterEqualUnsigned, Assembler::pn, L_break);
+-  // work top down to bottom, copying contiguous data upwards
+-  // In pseudo-code:
+-  //   while (--top >= bottom) *(top + distance) = *(top + 0);
+-  RegisterOrConstant offset = __ argument_offset(positive_distance_in_slots, positive_distance_in_slots.register_or_noreg());
+-  __ BIND(L_loop);
+-  __ sub(top_reg, wordSize, top_reg);
+-  __ ld_ptr(           Address(top_reg, 0     ), temp2_reg);
+-  __ st_ptr(temp2_reg, Address(top_reg, offset)           );
+-  __ cmp_and_brx_short(top_reg, bottom_reg, Assembler::greaterUnsigned, Assembler::pt, L_loop);
+-  assert(Interpreter::stackElementSize == wordSize, "else change loop");
+-  __ BIND(L_break);
+-  BLOCK_COMMENT("} move_arg_slots_up");
+-}
+-
+-// in-place movement; no change to rsp
+-// blows temp_reg, temp2_reg
+-void MethodHandles::move_arg_slots_down(MacroAssembler* _masm,
+-                                        Address  bottom_addr,  // can use temp_reg
+-                                        Register top_reg,      // invariant
+-                                        RegisterOrConstant negative_distance_in_slots,  // destroyed if register
+-                                        Register temp_reg, Register temp2_reg) {
+-  assert_different_registers(top_reg,
+-                             negative_distance_in_slots.register_or_noreg(),
+-                             temp_reg, temp2_reg);
+-  BLOCK_COMMENT("move_arg_slots_down {");
+-  Label L_loop, L_break;
+-  Register bottom_reg = temp_reg;
+-  if (!bottom_addr.is_same_address(Address(bottom_reg, 0))) {
+-    __ add(bottom_addr, bottom_reg);
+-  }
+-  // Detect empty (or broken) loop:
+-#ifdef ASSERT
+-  assert(!negative_distance_in_slots.is_constant() || negative_distance_in_slots.as_constant() < 0, "");
+-  if (VerifyMethodHandles) {
+-    // Verify that &bottom < &top (non-empty interval)
+-    Label L_ok, L_bad;
+-    if (negative_distance_in_slots.is_register()) {
+-      __ cmp(negative_distance_in_slots.as_register(), (int32_t) 0);
+-      __ br(Assembler::greaterEqual, false, Assembler::pn, L_bad);
+-      __ delayed()->nop();
+-    }
+-    __ cmp_and_brx_short(bottom_reg, top_reg, Assembler::lessUnsigned, Assembler::pt, L_ok);
+-    __ BIND(L_bad);
+-    __ stop("valid bounds (copy down)");
+-    __ BIND(L_ok);
+-  }
+-#endif
+-  __ cmp_and_brx_short(bottom_reg, top_reg, Assembler::greaterEqualUnsigned, Assembler::pn, L_break);
+-  // work bottom up to top, copying contiguous data downwards
+-  // In pseudo-code:
+-  //   while (bottom < top) *(bottom - distance) = *(bottom + 0), bottom++;
+-  RegisterOrConstant offset = __ argument_offset(negative_distance_in_slots, negative_distance_in_slots.register_or_noreg());
+-  __ BIND(L_loop);
+-  __ ld_ptr(           Address(bottom_reg, 0     ), temp2_reg);
+-  __ st_ptr(temp2_reg, Address(bottom_reg, offset)           );
+-  __ add(bottom_reg, wordSize, bottom_reg);
+-  __ cmp_and_brx_short(bottom_reg, top_reg, Assembler::lessUnsigned, Assembler::pt, L_loop);
+-  assert(Interpreter::stackElementSize == wordSize, "else change loop");
+-  __ BIND(L_break);
+-  BLOCK_COMMENT("} move_arg_slots_down");
+-}
+-
+-// Copy from a field or array element to a stacked argument slot.
+-// is_element (ignored) says whether caller is loading an array element instead of an instance field.
+-void MethodHandles::move_typed_arg(MacroAssembler* _masm,
+-                                   BasicType type, bool is_element,
+-                                   Address value_src, Address slot_dest,
+-                                   Register temp_reg) {
+-  assert(!slot_dest.uses(temp_reg), "must be different register");
+-  BLOCK_COMMENT(!is_element ? "move_typed_arg {" : "move_typed_arg { (array element)");
+-  if (type == T_OBJECT || type == T_ARRAY) {
+-    __ load_heap_oop(value_src, temp_reg);
+-    __ verify_oop(temp_reg);
+-    __ st_ptr(temp_reg, slot_dest);
+-  } else if (type != T_VOID) {
+-    int  arg_size      = type2aelembytes(type);
+-    bool arg_is_signed = is_signed_subword_type(type);
+-    int  slot_size     = is_subword_type(type) ? type2aelembytes(T_INT) : arg_size;  // store int sub-words as int
+-    __ load_sized_value( value_src, temp_reg, arg_size, arg_is_signed);
+-    __ store_sized_value(temp_reg, slot_dest, slot_size              );
+-  }
+-  BLOCK_COMMENT("} move_typed_arg");
+-}
+-
+-// Cf. TemplateInterpreterGenerator::generate_return_entry_for and
+-// InterpreterMacroAssembler::save_return_value
+-void MethodHandles::move_return_value(MacroAssembler* _masm, BasicType type,
+-                                      Address return_slot) {
+-  BLOCK_COMMENT("move_return_value {");
+-  // Look at the type and pull the value out of the corresponding register.
+-  if (type == T_VOID) {
+-    // nothing to do
+-  } else if (type == T_OBJECT) {
+-    __ verify_oop(O0);
+-    __ st_ptr(O0, return_slot);
+-  } else if (type == T_INT || is_subword_type(type)) {
+-    int type_size = type2aelembytes(T_INT);
+-    __ store_sized_value(O0, return_slot, type_size);
+-  } else if (type == T_LONG) {
+-    // store the value by parts
+-    // Note: We assume longs are continguous (if misaligned) on the interpreter stack.
+-#if !defined(_LP64) && defined(COMPILER2)
+-    __ stx(G1, return_slot);
+-#else
+-  #ifdef _LP64
+-    __ stx(O0, return_slot);
+-  #else
+-    if (return_slot.has_disp()) {
+-      // The displacement is a constant
+-      __ st(O0, return_slot);
+-      __ st(O1, return_slot.plus_disp(Interpreter::stackElementSize));
+-    } else {
+-      __ std(O0, return_slot);
+-    }
+-  #endif
+-#endif
+-  } else if (type == T_FLOAT) {
+-    __ stf(FloatRegisterImpl::S, Ftos_f, return_slot);
+-  } else if (type == T_DOUBLE) {
+-    __ stf(FloatRegisterImpl::D, Ftos_f, return_slot);
+-  } else {
+-    ShouldNotReachHere();
+-  }
+-  BLOCK_COMMENT("} move_return_value");
+-}
+-
+ #ifndef PRODUCT
+-void MethodHandles::RicochetFrame::describe(const frame* fr, FrameValues& values, int frame_no)  {
+-    RicochetFrame* rf = new RicochetFrame(*fr);
+-
+-    // ricochet slots (kept in registers for sparc)
+-    values.describe(frame_no, rf->register_addr(I5_savedSP), err_msg("exact_sender_sp reg for #%d", frame_no));
+-    values.describe(frame_no, rf->register_addr(L5_conversion), err_msg("conversion reg for #%d", frame_no));
+-    values.describe(frame_no, rf->register_addr(L4_saved_args_base), err_msg("saved_args_base reg for #%d", frame_no));
+-    values.describe(frame_no, rf->register_addr(L3_saved_args_layout), err_msg("saved_args_layout reg for #%d", frame_no));
+-    values.describe(frame_no, rf->register_addr(L2_saved_target), err_msg("saved_target reg for #%d", frame_no));
+-    values.describe(frame_no, rf->register_addr(L1_continuation), err_msg("continuation reg for #%d", frame_no));
+-
+-    // relevant ricochet targets (in caller frame)
+-    values.describe(-1, rf->saved_args_base(),  err_msg("*saved_args_base for #%d", frame_no));
+-    values.describe(-1, (intptr_t *)(STACK_BIAS+(uintptr_t)rf->exact_sender_sp()),  err_msg("*exact_sender_sp+STACK_BIAS for #%d", frame_no));
+-}
+-#endif // ASSERT
+-
+-#ifndef PRODUCT
+-extern "C" void print_method_handle(oop mh);
+ void trace_method_handle_stub(const char* adaptername,
+                               oopDesc* mh,
+                               intptr_t* saved_sp,
+                               intptr_t* args,
+                               intptr_t* tracing_fp) {
+-  bool has_mh = (strstr(adaptername, "return/") == NULL);  // return adapters don't have mh
+-
+-  tty->print_cr("MH %s mh="INTPTR_FORMAT " saved_sp=" INTPTR_FORMAT " args=" INTPTR_FORMAT, adaptername, (intptr_t) mh, saved_sp, args);
++  bool has_mh = (strstr(adaptername, "/static") == NULL &&
++                 strstr(adaptername, "linkTo") == NULL);    // static linkers don't have MH
++  const char* mh_reg_name = has_mh ? "G3_mh" : "G3";
++  tty->print_cr("MH %s %s="INTPTR_FORMAT " saved_sp=" INTPTR_FORMAT " args=" INTPTR_FORMAT,
++                adaptername, mh_reg_name,
++                (intptr_t) mh, saved_sp, args);
+ 
+   if (Verbose) {
+     // dumping last frame with frame::describe
+@@ -1090,6 +561,7 @@
+ 
+     // mark saved_sp, if seems valid (may not be valid for some adapters)
+     intptr_t *unbiased_sp = (intptr_t *)(STACK_BIAS+(uintptr_t)saved_sp);
++    const int ARG_LIMIT = 255, SLOP = 45, UNREASONABLE_STACK_MOVE = (ARG_LIMIT + SLOP);
+     if ((unbiased_sp >= dump_sp - UNREASONABLE_STACK_MOVE) && (unbiased_sp < dump_fp)) {
+       values.describe(-1, unbiased_sp, "*saved_sp+STACK_BIAS");
+     }
+@@ -1097,10 +569,13 @@
+     // Note: the unextended_sp may not be correct
+     tty->print_cr("  stack layout:");
+     values.print(p);
+-  }
+-
+-  if (has_mh) {
+-    print_method_handle(mh);
++    if (has_mh && mh->is_oop()) {
++      mh->print();
++      if (java_lang_invoke_MethodHandle::is_instance(mh)) {
++        if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0)
++          java_lang_invoke_MethodHandle::form(mh)->print();
++      }
++    }
+   }
+ }
+ 
+@@ -1143,1260 +618,3 @@
+   BLOCK_COMMENT("} trace_method_handle");
+ }
+ #endif // PRODUCT
+-
+-// which conversion op types are implemented here?
+-int MethodHandles::adapter_conversion_ops_supported_mask() {
+-  return ((1<<java_lang_invoke_AdapterMethodHandle::OP_RETYPE_ONLY)
+-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_RETYPE_RAW)
+-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_CHECK_CAST)
+-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_PRIM)
+-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_REF_TO_PRIM)
+-          // OP_PRIM_TO_REF is below...
+-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_SWAP_ARGS)
+-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_ROT_ARGS)
+-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_DUP_ARGS)
+-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_DROP_ARGS)
+-          // OP_COLLECT_ARGS is below...
+-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_SPREAD_ARGS)
+-         |(
+-           java_lang_invoke_MethodTypeForm::vmlayout_offset_in_bytes() <= 0 ? 0 :
+-           ((1<<java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_REF)
+-           |(1<<java_lang_invoke_AdapterMethodHandle::OP_COLLECT_ARGS)
+-           |(1<<java_lang_invoke_AdapterMethodHandle::OP_FOLD_ARGS)
+-           )
+-          )
+-         );
+-}
+-
+-//------------------------------------------------------------------------------
+-// MethodHandles::generate_method_handle_stub
+-//
+-// Generate an "entry" field for a method handle.
+-// This determines how the method handle will respond to calls.
+-void MethodHandles::generate_method_handle_stub(MacroAssembler* _masm, MethodHandles::EntryKind ek) {
+-  MethodHandles::EntryKind ek_orig = ek_original_kind(ek);
+-
+-  // Here is the register state during an interpreted call,
+-  // as set up by generate_method_handle_interpreter_entry():
+-  // - G5: garbage temp (was MethodHandle.invoke methodOop, unused)
+-  // - G3: receiver method handle
+-  // - O5_savedSP: sender SP (must preserve)
+-
+-  const Register O0_scratch = O0;
+-  const Register O1_scratch = O1;
+-  const Register O2_scratch = O2;
+-  const Register O3_scratch = O3;
+-  const Register O4_scratch = O4;
+-  const Register G5_scratch = G5;
+-
+-  // Often used names:
+-  const Register O0_argslot = O0;
+-
+-  // Argument registers for _raise_exception:
+-  const Register O0_code     = O0;
+-  const Register O1_actual   = O1;
+-  const Register O2_required = O2;
+-
+-  guarantee(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes() != 0, "must have offsets");
+-
+-  // Some handy addresses:
+-  Address G3_mh_vmtarget(   G3_method_handle, java_lang_invoke_MethodHandle::vmtarget_offset_in_bytes());
+-
+-  Address G3_dmh_vmindex(   G3_method_handle, java_lang_invoke_DirectMethodHandle::vmindex_offset_in_bytes());
+-
+-  Address G3_bmh_vmargslot( G3_method_handle, java_lang_invoke_BoundMethodHandle::vmargslot_offset_in_bytes());
+-  Address G3_bmh_argument(  G3_method_handle, java_lang_invoke_BoundMethodHandle::argument_offset_in_bytes());
+-
+-  Address G3_amh_vmargslot( G3_method_handle, java_lang_invoke_AdapterMethodHandle::vmargslot_offset_in_bytes());
+-  Address G3_amh_argument ( G3_method_handle, java_lang_invoke_AdapterMethodHandle::argument_offset_in_bytes());
+-  Address G3_amh_conversion(G3_method_handle, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes());
+-
+-  const int java_mirror_offset = in_bytes(Klass::java_mirror_offset());
+-
+-  if (have_entry(ek)) {
+-    __ nop();  // empty stubs make SG sick
+-    return;
+-  }
+-
+-  address interp_entry = __ pc();
+-
+-  trace_method_handle(_masm, entry_name(ek));
+-
+-  BLOCK_COMMENT(err_msg("Entry %s {", entry_name(ek)));
+-
+-  switch ((int) ek) {
+-  case _raise_exception:
+-    {
+-      // Not a real MH entry, but rather shared code for raising an
+-      // exception.  For sharing purposes the arguments are passed into registers
+-      // and then placed in the intepreter calling convention here.
+-      assert(raise_exception_method(), "must be set");
+-      assert(raise_exception_method()->from_compiled_entry(), "method must be linked");
+-
+-      __ set(AddressLiteral((address) &_raise_exception_method), G5_method);
+-      __ ld_ptr(Address(G5_method, 0), G5_method);
+-
+-      const int jobject_oop_offset = 0;
+-      __ ld_ptr(Address(G5_method, jobject_oop_offset), G5_method);
+-
+-      adjust_SP_and_Gargs_down_by_slots(_masm, 3, noreg, noreg);
+-
+-      __ st    (O0_code,     __ argument_address(constant(2), noreg, 0));
+-      __ st_ptr(O1_actual,   __ argument_address(constant(1), noreg, 0));
+-      __ st_ptr(O2_required, __ argument_address(constant(0), noreg, 0));
+-      jump_from_method_handle(_masm, G5_method, O1_scratch, O2_scratch);
+-    }
+-    break;
+-
+-  case _invokestatic_mh:
+-  case _invokespecial_mh:
+-    {
+-      __ load_heap_oop(G3_mh_vmtarget, G5_method);  // target is a methodOop
+-      // Same as TemplateTable::invokestatic or invokespecial,
+-      // minus the CP setup and profiling:
+-      if (ek == _invokespecial_mh) {
+-        // Must load & check the first argument before entering the target method.
+-        __ load_method_handle_vmslots(O0_argslot, G3_method_handle, O1_scratch);
+-        __ ld_ptr(__ argument_address(O0_argslot, O0_argslot, -1), G3_method_handle);
+-        __ null_check(G3_method_handle);
+-        __ verify_oop(G3_method_handle);
+-      }
+-      jump_from_method_handle(_masm, G5_method, O1_scratch, O2_scratch);
+-    }
+-    break;
+-
+-  case _invokevirtual_mh:
+-    {
+-      // Same as TemplateTable::invokevirtual,
+-      // minus the CP setup and profiling:
+-
+-      // Pick out the vtable index and receiver offset from the MH,
+-      // and then we can discard it:
+-      Register O2_index = O2_scratch;
+-      __ load_method_handle_vmslots(O0_argslot, G3_method_handle, O1_scratch);
+-      __ ldsw(G3_dmh_vmindex, O2_index);
+-      // Note:  The verifier allows us to ignore G3_mh_vmtarget.
+-      __ ld_ptr(__ argument_address(O0_argslot, O0_argslot, -1), G3_method_handle);
+-      __ null_check(G3_method_handle, oopDesc::klass_offset_in_bytes());
+-
+-      // Get receiver klass:
+-      Register O0_klass = O0_argslot;
+-      __ load_klass(G3_method_handle, O0_klass);
+-      __ verify_oop(O0_klass);
+-
+-      // Get target methodOop & entry point:
+-      const int base = instanceKlass::vtable_start_offset() * wordSize;
+-      assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
+-
+-      __ sll_ptr(O2_index, LogBytesPerWord, O2_index);
+-      __ add(O0_klass, O2_index, O0_klass);
+-      Address vtable_entry_addr(O0_klass, base + vtableEntry::method_offset_in_bytes());
+-      __ ld_ptr(vtable_entry_addr, G5_method);
+-
+-      jump_from_method_handle(_masm, G5_method, O1_scratch, O2_scratch);
+-    }
+-    break;
+-
+-  case _invokeinterface_mh:
+-    {
+-      // Same as TemplateTable::invokeinterface,
+-      // minus the CP setup and profiling:
+-      __ load_method_handle_vmslots(O0_argslot, G3_method_handle, O1_scratch);
+-      Register O1_intf  = O1_scratch;
+-      Register G5_index = G5_scratch;
+-      __ load_heap_oop(G3_mh_vmtarget, O1_intf);
+-      __ ldsw(G3_dmh_vmindex, G5_index);
+-      __ ld_ptr(__ argument_address(O0_argslot, O0_argslot, -1), G3_method_handle);
+-      __ null_check(G3_method_handle, oopDesc::klass_offset_in_bytes());
+-
+-      // Get receiver klass:
+-      Register O0_klass = O0_argslot;
+-      __ load_klass(G3_method_handle, O0_klass);
+-      __ verify_oop(O0_klass);
+-
+-      // Get interface:
+-      Label no_such_interface;
+-      __ verify_oop(O1_intf);
+-      __ lookup_interface_method(O0_klass, O1_intf,
+-                                 // Note: next two args must be the same:
+-                                 G5_index, G5_method,
+-                                 O2_scratch,
+-                                 O3_scratch,
+-                                 no_such_interface);
+-
+-      jump_from_method_handle(_masm, G5_method, O1_scratch, O2_scratch);
+-
+-      __ bind(no_such_interface);
+-      // Throw an exception.
+-      // For historical reasons, it will be IncompatibleClassChangeError.
+-      __ unimplemented("not tested yet");
+-      __ ld_ptr(Address(O1_intf, java_mirror_offset), O2_required);  // required interface
+-      __ mov(   O0_klass,                             O1_actual);    // bad receiver
+-      __ jump_to(AddressLiteral(from_interpreted_entry(_raise_exception)), O3_scratch);
+-      __ delayed()->mov(Bytecodes::_invokeinterface,  O0_code);      // who is complaining?
+-    }
+-    break;
+-
+-  case _bound_ref_mh:
+-  case _bound_int_mh:
+-  case _bound_long_mh:
+-  case _bound_ref_direct_mh:
+-  case _bound_int_direct_mh:
+-  case _bound_long_direct_mh:
+-    {
+-      const bool direct_to_method = (ek >= _bound_ref_direct_mh);
+-      BasicType arg_type  = ek_bound_mh_arg_type(ek);
+-      int       arg_slots = type2size[arg_type];
+-
+-      // Make room for the new argument:
+-      load_vmargslot(_masm, G3_bmh_vmargslot, O0_argslot);
+-      __ add(__ argument_address(O0_argslot, O0_argslot), O0_argslot);
+-
+-      insert_arg_slots(_masm, arg_slots * stack_move_unit(), O0_argslot, O1_scratch, O2_scratch, O3_scratch);
+-
+-      // Store bound argument into the new stack slot:
+-      __ load_heap_oop(G3_bmh_argument, O1_scratch);
+-      if (arg_type == T_OBJECT) {
+-        __ st_ptr(O1_scratch, Address(O0_argslot, 0));
+-      } else {
+-        Address prim_value_addr(O1_scratch, java_lang_boxing_object::value_offset_in_bytes(arg_type));
+-        move_typed_arg(_masm, arg_type, false,
+-                       prim_value_addr,
+-                       Address(O0_argslot, 0),
+-                      O2_scratch);  // must be an even register for !_LP64 long moves (uses O2/O3)
+-      }
+-
+-      if (direct_to_method) {
+-        __ load_heap_oop(G3_mh_vmtarget, G5_method);  // target is a methodOop
+-        jump_from_method_handle(_masm, G5_method, O1_scratch, O2_scratch);
+-      } else {
+-        __ load_heap_oop(G3_mh_vmtarget, G3_method_handle);  // target is a methodOop
+-        __ verify_oop(G3_method_handle);
+-        __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
+-      }
+-    }
+-    break;
+-
+-  case _adapter_opt_profiling:
+-    if (java_lang_invoke_CountingMethodHandle::vmcount_offset_in_bytes() != 0) {
+-      Address G3_mh_vmcount(G3_method_handle, java_lang_invoke_CountingMethodHandle::vmcount_offset_in_bytes());
+-      __ ld(G3_mh_vmcount, O1_scratch);
+-      __ add(O1_scratch, 1, O1_scratch);
+-      __ st(O1_scratch, G3_mh_vmcount);
+-    }
+-    // fall through
+-
+-  case _adapter_retype_only:
+-  case _adapter_retype_raw:
+-    // Immediately jump to the next MH layer:
+-    __ load_heap_oop(G3_mh_vmtarget, G3_method_handle);
+-    __ verify_oop(G3_method_handle);
+-    __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
+-    // This is OK when all parameter types widen.
+-    // It is also OK when a return type narrows.
+-    break;
+-
+-  case _adapter_check_cast:
+-    {
+-      // Check a reference argument before jumping to the next layer of MH:
+-      load_vmargslot(_masm, G3_amh_vmargslot, O0_argslot);
+-      Address vmarg = __ argument_address(O0_argslot, O0_argslot);
+-
+-      // What class are we casting to?
+-      Register O1_klass = O1_scratch;  // Interesting AMH data.
+-      __ load_heap_oop(G3_amh_argument, O1_klass);  // This is a Class object!
+-      load_klass_from_Class(_masm, O1_klass, O2_scratch, O3_scratch);
+-
+-      Label L_done;
+-      __ ld_ptr(vmarg, O2_scratch);
+-      __ br_null_short(O2_scratch, Assembler::pn, L_done);  // No cast if null.
+-      __ load_klass(O2_scratch, O2_scratch);
+-
+-      // Live at this point:
+-      // - O0_argslot      :  argslot index in vmarg; may be required in the failing path
+-      // - O1_klass        :  klass required by the target method
+-      // - O2_scratch      :  argument klass to test
+-      // - G3_method_handle:  adapter method handle
+-      __ check_klass_subtype(O2_scratch, O1_klass, O3_scratch, O4_scratch, L_done);
+-
+-      // If we get here, the type check failed!
+-      __ load_heap_oop(G3_amh_argument,        O2_required);  // required class
+-      __ ld_ptr(       vmarg,                  O1_actual);    // bad object
+-      __ jump_to(AddressLiteral(from_interpreted_entry(_raise_exception)), O3_scratch);
+-      __ delayed()->mov(Bytecodes::_checkcast, O0_code);      // who is complaining?
+-
+-      __ BIND(L_done);
+-      // Get the new MH:
+-      __ load_heap_oop(G3_mh_vmtarget, G3_method_handle);
+-      __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
+-    }
+-    break;
+-
+-  case _adapter_prim_to_prim:
+-  case _adapter_ref_to_prim:
+-    // Handled completely by optimized cases.
+-    __ stop("init_AdapterMethodHandle should not issue this");
+-    break;
+-
+-  case _adapter_opt_i2i:        // optimized subcase of adapt_prim_to_prim
+-//case _adapter_opt_f2i:        // optimized subcase of adapt_prim_to_prim
+-  case _adapter_opt_l2i:        // optimized subcase of adapt_prim_to_prim
+-  case _adapter_opt_unboxi:     // optimized subcase of adapt_ref_to_prim
+-    {
+-      // Perform an in-place conversion to int or an int subword.
+-      load_vmargslot(_masm, G3_amh_vmargslot, O0_argslot);
+-      Address value;
+-      Address vmarg;
+-      bool value_left_justified = false;
+-
+-      switch (ek) {
+-      case _adapter_opt_i2i:
+-        value = vmarg = __ argument_address(O0_argslot, O0_argslot);
+-        break;
+-      case _adapter_opt_l2i:
+-        {
+-          // just delete the extra slot
+-#ifdef _LP64
+-          // In V9, longs are given 2 64-bit slots in the interpreter, but the
+-          // data is passed in only 1 slot.
+-          // Keep the second slot.
+-          __ add(__ argument_address(O0_argslot, O0_argslot, -1), O0_argslot);
+-          remove_arg_slots(_masm, -stack_move_unit(), O0_argslot, O1_scratch, O2_scratch, O3_scratch);
+-          value = Address(O0_argslot, 4);  // Get least-significant 32-bit of 64-bit value.
+-          vmarg = Address(O0_argslot, Interpreter::stackElementSize);
+-#else
+-          // Keep the first slot.
+-          __ add(__ argument_address(O0_argslot, O0_argslot), O0_argslot);
+-          remove_arg_slots(_masm, -stack_move_unit(), O0_argslot, O1_scratch, O2_scratch, O3_scratch);
+-          value = Address(O0_argslot, 0);
+-          vmarg = value;
+-#endif
+-        }
+-        break;
+-      case _adapter_opt_unboxi:
+-        {
+-          vmarg = __ argument_address(O0_argslot, O0_argslot);
+-          // Load the value up from the heap.
+-          __ ld_ptr(vmarg, O1_scratch);
+-          int value_offset = java_lang_boxing_object::value_offset_in_bytes(T_INT);
+-#ifdef ASSERT
+-          for (int bt = T_BOOLEAN; bt < T_INT; bt++) {
+-            if (is_subword_type(BasicType(bt)))
+-              assert(value_offset == java_lang_boxing_object::value_offset_in_bytes(BasicType(bt)), "");
+-          }
+-#endif
+-          __ null_check(O1_scratch, value_offset);
+-          value = Address(O1_scratch, value_offset);
+-#ifdef _BIG_ENDIAN
+-          // Values stored in objects are packed.
+-          value_left_justified = true;
+-#endif
+-        }
+-        break;
+-      default:
+-        ShouldNotReachHere();
+-      }
+-
+-      // This check is required on _BIG_ENDIAN
+-      Register G5_vminfo = G5_scratch;
+-      __ ldsw(G3_amh_conversion, G5_vminfo);
+-      assert(CONV_VMINFO_SHIFT == 0, "preshifted");
+-
+-      // Original 32-bit vmdata word must be of this form:
+-      // | MBZ:6 | signBitCount:8 | srcDstTypes:8 | conversionOp:8 |
+-      __ lduw(value, O1_scratch);
+-      if (!value_left_justified)
+-        __ sll(O1_scratch, G5_vminfo, O1_scratch);
+-      Label zero_extend, done;
+-      __ btst(CONV_VMINFO_SIGN_FLAG, G5_vminfo);
+-      __ br(Assembler::zero, false, Assembler::pn, zero_extend);
+-      __ delayed()->nop();
+-
+-      // this path is taken for int->byte, int->short
+-      __ sra(O1_scratch, G5_vminfo, O1_scratch);
+-      __ ba_short(done);
+-
+-      __ bind(zero_extend);
+-      // this is taken for int->char
+-      __ srl(O1_scratch, G5_vminfo, O1_scratch);
+-
+-      __ bind(done);
+-      __ st(O1_scratch, vmarg);
+-
+-      // Get the new MH:
+-      __ load_heap_oop(G3_mh_vmtarget, G3_method_handle);
+-      __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
+-    }
+-    break;
+-
+-  case _adapter_opt_i2l:        // optimized subcase of adapt_prim_to_prim
+-  case _adapter_opt_unboxl:     // optimized subcase of adapt_ref_to_prim
+-    {
+-      // Perform an in-place int-to-long or ref-to-long conversion.
+-      load_vmargslot(_masm, G3_amh_vmargslot, O0_argslot);
+-
+-      // On big-endian machine we duplicate the slot and store the MSW
+-      // in the first slot.
+-      __ add(__ argument_address(O0_argslot, O0_argslot, 1), O0_argslot);
+-
+-      insert_arg_slots(_masm, stack_move_unit(), O0_argslot, O1_scratch, O2_scratch, O3_scratch);
+-
+-      Address arg_lsw(O0_argslot, 0);
+-      Address arg_msw(O0_argslot, -Interpreter::stackElementSize);
+-
+-      switch (ek) {
+-      case _adapter_opt_i2l:
+-        {
+-#ifdef _LP64
+-          __ ldsw(arg_lsw, O2_scratch);                 // Load LSW sign-extended
+-#else
+-          __ ldsw(arg_lsw, O3_scratch);                 // Load LSW sign-extended
+-          __ srlx(O3_scratch, BitsPerInt, O2_scratch);  // Move MSW value to lower 32-bits for std
+-#endif
+-          __ st_long(O2_scratch, arg_msw);              // Uses O2/O3 on !_LP64
+-        }
+-        break;
+-      case _adapter_opt_unboxl:
+-        {
+-          // Load the value up from the heap.
+-          __ ld_ptr(arg_lsw, O1_scratch);
+-          int value_offset = java_lang_boxing_object::value_offset_in_bytes(T_LONG);
+-          assert(value_offset == java_lang_boxing_object::value_offset_in_bytes(T_DOUBLE), "");
+-          __ null_check(O1_scratch, value_offset);
+-          __ ld_long(Address(O1_scratch, value_offset), O2_scratch);  // Uses O2/O3 on !_LP64
+-          __ st_long(O2_scratch, arg_msw);
+-        }
+-        break;
+-      default:
+-        ShouldNotReachHere();
+-      }
+-
+-      __ load_heap_oop(G3_mh_vmtarget, G3_method_handle);
+-      __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
+-    }
+-    break;
+-
+-  case _adapter_opt_f2d:        // optimized subcase of adapt_prim_to_prim
+-  case _adapter_opt_d2f:        // optimized subcase of adapt_prim_to_prim
+-    {
+-      // perform an in-place floating primitive conversion
+-      __ unimplemented(entry_name(ek));
+-    }
+-    break;
+-
+-  case _adapter_prim_to_ref:
+-    __ unimplemented(entry_name(ek)); // %%% FIXME: NYI
+-    break;
+-
+-  case _adapter_swap_args:
+-  case _adapter_rot_args:
+-    // handled completely by optimized cases
+-    __ stop("init_AdapterMethodHandle should not issue this");
+-    break;
+-
+-  case _adapter_opt_swap_1:
+-  case _adapter_opt_swap_2:
+-  case _adapter_opt_rot_1_up:
+-  case _adapter_opt_rot_1_down:
+-  case _adapter_opt_rot_2_up:
+-  case _adapter_opt_rot_2_down:
+-    {
+-      int swap_slots = ek_adapter_opt_swap_slots(ek);
+-      int rotate     = ek_adapter_opt_swap_mode(ek);
+-
+-      // 'argslot' is the position of the first argument to swap.
+-      load_vmargslot(_masm, G3_amh_vmargslot, O0_argslot);
+-      __ add(__ argument_address(O0_argslot, O0_argslot), O0_argslot);
+-      if (VerifyMethodHandles)
+-        verify_argslot(_masm, O0_argslot, O2_scratch, "swap point must fall within current frame");
+-
+-      // 'vminfo' is the second.
+-      Register O1_destslot = O1_scratch;
+-      load_conversion_vminfo(_masm, G3_amh_conversion, O1_destslot);
+-      __ add(__ argument_address(O1_destslot, O1_destslot), O1_destslot);
+-      if (VerifyMethodHandles)
+-        verify_argslot(_masm, O1_destslot, O2_scratch, "swap point must fall within current frame");
+-
+-      assert(Interpreter::stackElementSize == wordSize, "else rethink use of wordSize here");
+-      if (!rotate) {
+-        // simple swap
+-        for (int i = 0; i < swap_slots; i++) {
+-          __ ld_ptr(            Address(O0_argslot,  i * wordSize), O2_scratch);
+-          __ ld_ptr(            Address(O1_destslot, i * wordSize), O3_scratch);
+-          __ st_ptr(O3_scratch, Address(O0_argslot,  i * wordSize));
+-          __ st_ptr(O2_scratch, Address(O1_destslot, i * wordSize));
+-        }
+-      } else {
+-        // A rotate is actually pair of moves, with an "odd slot" (or pair)
+-        // changing place with a series of other slots.
+-        // First, push the "odd slot", which is going to get overwritten
+-        switch (swap_slots) {
+-        case 2 :  __ ld_ptr(Address(O0_argslot, 1 * wordSize), O4_scratch); // fall-thru
+-        case 1 :  __ ld_ptr(Address(O0_argslot, 0 * wordSize), O3_scratch); break;
+-        default:  ShouldNotReachHere();
+-        }
+-        if (rotate > 0) {
+-          // Here is rotate > 0:
+-          // (low mem)                                          (high mem)
+-          //     | dest:     more_slots...     | arg: odd_slot :arg+1 |
+-          // =>
+-          //     | dest: odd_slot | dest+1: more_slots...      :arg+1 |
+-          // work argslot down to destslot, copying contiguous data upwards
+-          // pseudo-code:
+-          //   argslot  = src_addr - swap_bytes
+-          //   destslot = dest_addr
+-          //   while (argslot >= destslot) *(argslot + swap_bytes) = *(argslot + 0), argslot--;
+-          move_arg_slots_up(_masm,
+-                            O1_destslot,
+-                            Address(O0_argslot, 0),
+-                            swap_slots,
+-                            O0_argslot, O2_scratch);
+-        } else {
+-          // Here is the other direction, rotate < 0:
+-          // (low mem)                                          (high mem)
+-          //     | arg: odd_slot | arg+1: more_slots...       :dest+1 |
+-          // =>
+-          //     | arg:    more_slots...     | dest: odd_slot :dest+1 |
+-          // work argslot up to destslot, copying contiguous data downwards
+-          // pseudo-code:
+-          //   argslot  = src_addr + swap_bytes
+-          //   destslot = dest_addr
+-          //   while (argslot <= destslot) *(argslot - swap_bytes) = *(argslot + 0), argslot++;
+-          // dest_slot denotes an exclusive upper limit
+-          int limit_bias = OP_ROT_ARGS_DOWN_LIMIT_BIAS;
+-          if (limit_bias != 0)
+-            __ add(O1_destslot, - limit_bias * wordSize, O1_destslot);
+-          move_arg_slots_down(_masm,
+-                              Address(O0_argslot, swap_slots * wordSize),
+-                              O1_destslot,
+-                              -swap_slots,
+-                              O0_argslot, O2_scratch);
+-
+-          __ sub(O1_destslot, swap_slots * wordSize, O1_destslot);
+-        }
+-        // pop the original first chunk into the destination slot, now free
+-        switch (swap_slots) {
+-        case 2 :  __ st_ptr(O4_scratch, Address(O1_destslot, 1 * wordSize)); // fall-thru
+-        case 1 :  __ st_ptr(O3_scratch, Address(O1_destslot, 0 * wordSize)); break;
+-        default:  ShouldNotReachHere();
+-        }
+-      }
+-
+-      __ load_heap_oop(G3_mh_vmtarget, G3_method_handle);
+-      __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
+-    }
+-    break;
+-
+-  case _adapter_dup_args:
+-    {
+-      // 'argslot' is the position of the first argument to duplicate.
+-      load_vmargslot(_masm, G3_amh_vmargslot, O0_argslot);
+-      __ add(__ argument_address(O0_argslot, O0_argslot), O0_argslot);
+-
+-      // 'stack_move' is negative number of words to duplicate.
+-      Register O1_stack_move = O1_scratch;
+-      load_stack_move(_masm, G3_amh_conversion, O1_stack_move);
+-
+-      if (VerifyMethodHandles) {
+-        verify_argslots(_masm, O1_stack_move, O0_argslot, O2_scratch, O3_scratch, true,
+-                        "copied argument(s) must fall within current frame");
+-      }
+-
+-      // insert location is always the bottom of the argument list:
+-      __ neg(O1_stack_move);
+-      push_arg_slots(_masm, O0_argslot, O1_stack_move, O2_scratch, O3_scratch);
+-
+-      __ load_heap_oop(G3_mh_vmtarget, G3_method_handle);
+-      __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
+-    }
+-    break;
+-
+-  case _adapter_drop_args:
+-    {
+-      // 'argslot' is the position of the first argument to nuke.
+-      load_vmargslot(_masm, G3_amh_vmargslot, O0_argslot);
+-      __ add(__ argument_address(O0_argslot, O0_argslot), O0_argslot);
+-
+-      // 'stack_move' is number of words to drop.
+-      Register O1_stack_move = O1_scratch;
+-      load_stack_move(_masm, G3_amh_conversion, O1_stack_move);
+-
+-      remove_arg_slots(_masm, O1_stack_move, O0_argslot, O2_scratch, O3_scratch, O4_scratch);
+-
+-      __ load_heap_oop(G3_mh_vmtarget, G3_method_handle);
+-      __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
+-    }
+-    break;
+-
+-  case _adapter_collect_args:
+-  case _adapter_fold_args:
+-  case _adapter_spread_args:
+-    // Handled completely by optimized cases.
+-    __ stop("init_AdapterMethodHandle should not issue this");
+-    break;
+-
+-  case _adapter_opt_collect_ref:
+-  case _adapter_opt_collect_int:
+-  case _adapter_opt_collect_long:
+-  case _adapter_opt_collect_float:
+-  case _adapter_opt_collect_double:
+-  case _adapter_opt_collect_void:
+-  case _adapter_opt_collect_0_ref:
+-  case _adapter_opt_collect_1_ref:
+-  case _adapter_opt_collect_2_ref:
+-  case _adapter_opt_collect_3_ref:
+-  case _adapter_opt_collect_4_ref:
+-  case _adapter_opt_collect_5_ref:
+-  case _adapter_opt_filter_S0_ref:
+-  case _adapter_opt_filter_S1_ref:
+-  case _adapter_opt_filter_S2_ref:
+-  case _adapter_opt_filter_S3_ref:
+-  case _adapter_opt_filter_S4_ref:
+-  case _adapter_opt_filter_S5_ref:
+-  case _adapter_opt_collect_2_S0_ref:
+-  case _adapter_opt_collect_2_S1_ref:
+-  case _adapter_opt_collect_2_S2_ref:
+-  case _adapter_opt_collect_2_S3_ref:
+-  case _adapter_opt_collect_2_S4_ref:
+-  case _adapter_opt_collect_2_S5_ref:
+-  case _adapter_opt_fold_ref:
+-  case _adapter_opt_fold_int:
+-  case _adapter_opt_fold_long:
+-  case _adapter_opt_fold_float:
+-  case _adapter_opt_fold_double:
+-  case _adapter_opt_fold_void:
+-  case _adapter_opt_fold_1_ref:
+-  case _adapter_opt_fold_2_ref:
+-  case _adapter_opt_fold_3_ref:
+-  case _adapter_opt_fold_4_ref:
+-  case _adapter_opt_fold_5_ref:
+-    {
+-      // Given a fresh incoming stack frame, build a new ricochet frame.
+-      // On entry, TOS points at a return PC, and FP is the callers frame ptr.
+-      // RSI/R13 has the caller's exact stack pointer, which we must also preserve.
+-      // RCX contains an AdapterMethodHandle of the indicated kind.
+-
+-      // Relevant AMH fields:
+-      // amh.vmargslot:
+-      //   points to the trailing edge of the arguments
+-      //   to filter, collect, or fold.  For a boxing operation,
+-      //   it points just after the single primitive value.
+-      // amh.argument:
+-      //   recursively called MH, on |collect| arguments
+-      // amh.vmtarget:
+-      //   final destination MH, on return value, etc.
+-      // amh.conversion.dest:
+-      //   tells what is the type of the return value
+-      //   (not needed here, since dest is also derived from ek)
+-      // amh.conversion.vminfo:
+-      //   points to the trailing edge of the return value
+-      //   when the vmtarget is to be called; this is
+-      //   equal to vmargslot + (retained ? |collect| : 0)
+-
+-      // Pass 0 or more argument slots to the recursive target.
+-      int collect_count_constant = ek_adapter_opt_collect_count(ek);
+-
+-      // The collected arguments are copied from the saved argument list:
+-      int collect_slot_constant = ek_adapter_opt_collect_slot(ek);
+-
+-      assert(ek_orig == _adapter_collect_args ||
+-             ek_orig == _adapter_fold_args, "");
+-      bool retain_original_args = (ek_orig == _adapter_fold_args);
+-
+-      // The return value is replaced (or inserted) at the 'vminfo' argslot.
+-      // Sometimes we can compute this statically.
+-      int dest_slot_constant = -1;
+-      if (!retain_original_args)
+-        dest_slot_constant = collect_slot_constant;
+-      else if (collect_slot_constant >= 0 && collect_count_constant >= 0)
+-        // We are preserving all the arguments, and the return value is prepended,
+-        // so the return slot is to the left (above) the |collect| sequence.
+-        dest_slot_constant = collect_slot_constant + collect_count_constant;
+-
+-      // Replace all those slots by the result of the recursive call.
+-      // The result type can be one of ref, int, long, float, double, void.
+-      // In the case of void, nothing is pushed on the stack after return.
+-      BasicType dest = ek_adapter_opt_collect_type(ek);
+-      assert(dest == type2wfield[dest], "dest is a stack slot type");
+-      int dest_count = type2size[dest];
+-      assert(dest_count == 1 || dest_count == 2 || (dest_count == 0 && dest == T_VOID), "dest has a size");
+-
+-      // Choose a return continuation.
+-      EntryKind ek_ret = _adapter_opt_return_any;
+-      if (dest != T_CONFLICT && OptimizeMethodHandles) {
+-        switch (dest) {
+-        case T_INT    : ek_ret = _adapter_opt_return_int;     break;
+-        case T_LONG   : ek_ret = _adapter_opt_return_long;    break;
+-        case T_FLOAT  : ek_ret = _adapter_opt_return_float;   break;
+-        case T_DOUBLE : ek_ret = _adapter_opt_return_double;  break;
+-        case T_OBJECT : ek_ret = _adapter_opt_return_ref;     break;
+-        case T_VOID   : ek_ret = _adapter_opt_return_void;    break;
+-        default       : ShouldNotReachHere();
+-        }
+-        if (dest == T_OBJECT && dest_slot_constant >= 0) {
+-          EntryKind ek_try = EntryKind(_adapter_opt_return_S0_ref + dest_slot_constant);
+-          if (ek_try <= _adapter_opt_return_LAST &&
+-              ek_adapter_opt_return_slot(ek_try) == dest_slot_constant) {
+-            ek_ret = ek_try;
+-          }
+-        }
+-        assert(ek_adapter_opt_return_type(ek_ret) == dest, "");
+-      }
+-
+-      // Already pushed:  ... keep1 | collect | keep2 |
+-
+-      // Push a few extra argument words, if we need them to store the return value.
+-      {
+-        int extra_slots = 0;
+-        if (retain_original_args) {
+-          extra_slots = dest_count;
+-        } else if (collect_count_constant == -1) {
+-          extra_slots = dest_count;  // collect_count might be zero; be generous
+-        } else if (dest_count > collect_count_constant) {
+-          extra_slots = (dest_count - collect_count_constant);
+-        } else {
+-          // else we know we have enough dead space in |collect| to repurpose for return values
+-        }
+-        if (extra_slots != 0) {
+-          __ sub(SP, round_to(extra_slots, 2) * Interpreter::stackElementSize, SP);
+-        }
+-      }
+-
+-      // Set up Ricochet Frame.
+-      __ mov(SP, O5_savedSP);  // record SP for the callee
+-
+-      // One extra (empty) slot for outgoing target MH (see Gargs computation below).
+-      __ save_frame(2);  // Note: we need to add 2 slots since frame::memory_parameter_word_sp_offset is 23.
+-
+-      // Note: Gargs is live throughout the following, until we make our recursive call.
+-      // And the RF saves a copy in L4_saved_args_base.
+-
+-      RicochetFrame::enter_ricochet_frame(_masm, G3_method_handle, Gargs,
+-                                          entry(ek_ret)->from_interpreted_entry());
+-
+-      // Compute argument base:
+-      // Set up Gargs for current frame, extra (empty) slot is for outgoing target MH (space reserved by save_frame above).
+-      __ add(FP, STACK_BIAS - (1 * Interpreter::stackElementSize), Gargs);
+-
+-      // Now pushed:  ... keep1 | collect | keep2 | extra | [RF]
+-
+-#ifdef ASSERT
+-      if (VerifyMethodHandles && dest != T_CONFLICT) {
+-        BLOCK_COMMENT("verify AMH.conv.dest {");
+-        extract_conversion_dest_type(_masm, RicochetFrame::L5_conversion, O1_scratch);
+-        Label L_dest_ok;
+-        __ cmp(O1_scratch, (int) dest);
+-        __ br(Assembler::equal, false, Assembler::pt, L_dest_ok);
+-        __ delayed()->nop();
+-        if (dest == T_INT) {
+-          for (int bt = T_BOOLEAN; bt < T_INT; bt++) {
+-            if (is_subword_type(BasicType(bt))) {
+-              __ cmp(O1_scratch, (int) bt);
+-              __ br(Assembler::equal, false, Assembler::pt, L_dest_ok);
+-              __ delayed()->nop();
+-            }
+-          }
+-        }
+-        __ stop("bad dest in AMH.conv");
+-        __ BIND(L_dest_ok);
+-        BLOCK_COMMENT("} verify AMH.conv.dest");
+-      }
+-#endif //ASSERT
+-
+-      // Find out where the original copy of the recursive argument sequence begins.
+-      Register O0_coll = O0_scratch;
+-      {
+-        RegisterOrConstant collect_slot = collect_slot_constant;
+-        if (collect_slot_constant == -1) {
+-          load_vmargslot(_masm, G3_amh_vmargslot, O1_scratch);
+-          collect_slot = O1_scratch;
+-        }
+-        // collect_slot might be 0, but we need the move anyway.
+-        __ add(RicochetFrame::L4_saved_args_base, __ argument_offset(collect_slot, collect_slot.register_or_noreg()), O0_coll);
+-        // O0_coll now points at the trailing edge of |collect| and leading edge of |keep2|
+-      }
+-
+-      // Replace the old AMH with the recursive MH.  (No going back now.)
+-      // In the case of a boxing call, the recursive call is to a 'boxer' method,
+-      // such as Integer.valueOf or Long.valueOf.  In the case of a filter
+-      // or collect call, it will take one or more arguments, transform them,
+-      // and return some result, to store back into argument_base[vminfo].
+-      __ load_heap_oop(G3_amh_argument, G3_method_handle);
+-      if (VerifyMethodHandles)  verify_method_handle(_masm, G3_method_handle, O1_scratch, O2_scratch);
+-
+-      // Calculate |collect|, the number of arguments we are collecting.
+-      Register O1_collect_count = O1_scratch;
+-      RegisterOrConstant collect_count;
+-      if (collect_count_constant < 0) {
+-        __ load_method_handle_vmslots(O1_collect_count, G3_method_handle, O2_scratch);
+-        collect_count = O1_collect_count;
+-      } else {
+-        collect_count = collect_count_constant;
+-#ifdef ASSERT
+-        if (VerifyMethodHandles) {
+-          BLOCK_COMMENT("verify collect_count_constant {");
+-          __ load_method_handle_vmslots(O3_scratch, G3_method_handle, O2_scratch);
+-          Label L_count_ok;
+-          __ cmp_and_br_short(O3_scratch, collect_count_constant, Assembler::equal, Assembler::pt, L_count_ok);
+-          __ stop("bad vminfo in AMH.conv");
+-          __ BIND(L_count_ok);
+-          BLOCK_COMMENT("} verify collect_count_constant");
+-        }
+-#endif //ASSERT
+-      }
+-
+-      // copy |collect| slots directly to TOS:
+-      push_arg_slots(_masm, O0_coll, collect_count, O2_scratch, O3_scratch);
+-      // Now pushed:  ... keep1 | collect | keep2 | RF... | collect |
+-      // O0_coll still points at the trailing edge of |collect| and leading edge of |keep2|
+-
+-      // If necessary, adjust the saved arguments to make room for the eventual return value.
+-      // Normal adjustment:  ... keep1 | +dest+ | -collect- | keep2 | RF... | collect |
+-      // If retaining args:  ... keep1 | +dest+ |  collect  | keep2 | RF... | collect |
+-      // In the non-retaining case, this might move keep2 either up or down.
+-      // We don't have to copy the whole | RF... collect | complex,
+-      // but we must adjust RF.saved_args_base.
+-      // Also, from now on, we will forget about the original copy of |collect|.
+-      // If we are retaining it, we will treat it as part of |keep2|.
+-      // For clarity we will define |keep3| = |collect|keep2| or |keep2|.
+-
+-      BLOCK_COMMENT("adjust trailing arguments {");
+-      // Compare the sizes of |+dest+| and |-collect-|, which are opposed opening and closing movements.
+-      int                open_count  = dest_count;
+-      RegisterOrConstant close_count = collect_count_constant;
+-      Register O1_close_count = O1_collect_count;
+-      if (retain_original_args) {
+-        close_count = constant(0);
+-      } else if (collect_count_constant == -1) {
+-        close_count = O1_collect_count;
+-      }
+-
+-      // How many slots need moving?  This is simply dest_slot (0 => no |keep3|).
+-      RegisterOrConstant keep3_count;
+-      Register O2_keep3_count = O2_scratch;
+-      if (dest_slot_constant < 0) {
+-        extract_conversion_vminfo(_masm, RicochetFrame::L5_conversion, O2_keep3_count);
+-        keep3_count = O2_keep3_count;
+-      } else  {
+-        keep3_count = dest_slot_constant;
+-#ifdef ASSERT
+-        if (VerifyMethodHandles && dest_slot_constant < 0) {
+-          BLOCK_COMMENT("verify dest_slot_constant {");
+-          extract_conversion_vminfo(_masm, RicochetFrame::L5_conversion, O3_scratch);
+-          Label L_vminfo_ok;
+-          __ cmp_and_br_short(O3_scratch, dest_slot_constant, Assembler::equal, Assembler::pt, L_vminfo_ok);
+-          __ stop("bad vminfo in AMH.conv");
+-          __ BIND(L_vminfo_ok);
+-          BLOCK_COMMENT("} verify dest_slot_constant");
+-        }
+-#endif //ASSERT
+-      }
+-
+-      // tasks remaining:
+-      bool move_keep3 = (!keep3_count.is_constant() || keep3_count.as_constant() != 0);
+-      bool stomp_dest = (NOT_DEBUG(dest == T_OBJECT) DEBUG_ONLY(dest_count != 0));
+-      bool fix_arg_base = (!close_count.is_constant() || open_count != close_count.as_constant());
+-
+-      // Old and new argument locations (based at slot 0).
+-      // Net shift (&new_argv - &old_argv) is (close_count - open_count).
+-      bool zero_open_count = (open_count == 0);  // remember this bit of info
+-      if (move_keep3 && fix_arg_base) {
+-        // It will be easier to have everything in one register:
+-        if (close_count.is_register()) {
+-          // Deduct open_count from close_count register to get a clean +/- value.
+-          __ sub(close_count.as_register(), open_count, close_count.as_register());
+-        } else {
+-          close_count = close_count.as_constant() - open_count;
+-        }
+-        open_count = 0;
+-      }
+-      Register L4_old_argv = RicochetFrame::L4_saved_args_base;
+-      Register O3_new_argv = O3_scratch;
+-      if (fix_arg_base) {
+-        __ add(L4_old_argv, __ argument_offset(close_count, O4_scratch), O3_new_argv,
+-               -(open_count * Interpreter::stackElementSize));
+-      }
+-
+-      // First decide if any actual data are to be moved.
+-      // We can skip if (a) |keep3| is empty, or (b) the argument list size didn't change.
+-      // (As it happens, all movements involve an argument list size change.)
+-
+-      // If there are variable parameters, use dynamic checks to skip around the whole mess.
+-      Label L_done;
+-      if (keep3_count.is_register()) {
+-        __ cmp_and_br_short(keep3_count.as_register(), 0, Assembler::equal, Assembler::pn, L_done);
+-      }
+-      if (close_count.is_register()) {
+-        __ cmp_and_br_short(close_count.as_register(), open_count, Assembler::equal, Assembler::pn, L_done);
+-      }
+-
+-      if (move_keep3 && fix_arg_base) {
+-        bool emit_move_down = false, emit_move_up = false, emit_guard = false;
+-        if (!close_count.is_constant()) {
+-          emit_move_down = emit_guard = !zero_open_count;
+-          emit_move_up   = true;
+-        } else if (open_count != close_count.as_constant()) {
+-          emit_move_down = (open_count > close_count.as_constant());
+-          emit_move_up   = !emit_move_down;
+-        }
+-        Label L_move_up;
+-        if (emit_guard) {
+-          __ cmp(close_count.as_register(), open_count);
+-          __ br(Assembler::greater, false, Assembler::pn, L_move_up);
+-          __ delayed()->nop();
+-        }
+-
+-        if (emit_move_down) {
+-          // Move arguments down if |+dest+| > |-collect-|
+-          // (This is rare, except when arguments are retained.)
+-          // This opens space for the return value.
+-          if (keep3_count.is_constant()) {
+-            for (int i = 0; i < keep3_count.as_constant(); i++) {
+-              __ ld_ptr(            Address(L4_old_argv, i * Interpreter::stackElementSize), O4_scratch);
+-              __ st_ptr(O4_scratch, Address(O3_new_argv, i * Interpreter::stackElementSize)            );
+-            }
+-          } else {
+-            // Live: O1_close_count, O2_keep3_count, O3_new_argv
+-            Register argv_top = O0_scratch;
+-            __ add(L4_old_argv, __ argument_offset(keep3_count, O4_scratch), argv_top);
+-            move_arg_slots_down(_masm,
+-                                Address(L4_old_argv, 0),  // beginning of old argv
+-                                argv_top,                 // end of old argv
+-                                close_count,              // distance to move down (must be negative)
+-                                O4_scratch, G5_scratch);
+-          }
+-        }
+-
+-        if (emit_guard) {
+-          __ ba_short(L_done);  // assumes emit_move_up is true also
+-          __ BIND(L_move_up);
+-        }
+-
+-        if (emit_move_up) {
+-          // Move arguments up if |+dest+| < |-collect-|
+-          // (This is usual, except when |keep3| is empty.)
+-          // This closes up the space occupied by the now-deleted collect values.
+-          if (keep3_count.is_constant()) {
+-            for (int i = keep3_count.as_constant() - 1; i >= 0; i--) {
+-              __ ld_ptr(            Address(L4_old_argv, i * Interpreter::stackElementSize), O4_scratch);
+-              __ st_ptr(O4_scratch, Address(O3_new_argv, i * Interpreter::stackElementSize)            );
+-            }
+-          } else {
+-            Address argv_top(L4_old_argv, __ argument_offset(keep3_count, O4_scratch));
+-            // Live: O1_close_count, O2_keep3_count, O3_new_argv
+-            move_arg_slots_up(_masm,
+-                              L4_old_argv,  // beginning of old argv
+-                              argv_top,     // end of old argv
+-                              close_count,  // distance to move up (must be positive)
+-                              O4_scratch, G5_scratch);
+-          }
+-        }
+-      }
+-      __ BIND(L_done);
+-
+-      if (fix_arg_base) {
+-        // adjust RF.saved_args_base
+-        __ mov(O3_new_argv, RicochetFrame::L4_saved_args_base);
+-      }
+-
+-      if (stomp_dest) {
+-        // Stomp the return slot, so it doesn't hold garbage.
+-        // This isn't strictly necessary, but it may help detect bugs.
+-        __ set(RicochetFrame::RETURN_VALUE_PLACEHOLDER, O4_scratch);
+-        __ st_ptr(O4_scratch, Address(RicochetFrame::L4_saved_args_base,
+-                                      __ argument_offset(keep3_count, keep3_count.register_or_noreg())));  // uses O2_keep3_count
+-      }
+-      BLOCK_COMMENT("} adjust trailing arguments");
+-
+-      BLOCK_COMMENT("do_recursive_call");
+-      __ mov(SP, O5_savedSP);  // record SP for the callee
+-      __ set(ExternalAddress(SharedRuntime::ricochet_blob()->bounce_addr() - frame::pc_return_offset), O7);
+-      // The globally unique bounce address has two purposes:
+-      // 1. It helps the JVM recognize this frame (frame::is_ricochet_frame).
+-      // 2. When returned to, it cuts back the stack and redirects control flow
+-      //    to the return handler.
+-      // The return handler will further cut back the stack when it takes
+-      // down the RF.  Perhaps there is a way to streamline this further.
+-
+-      // State during recursive call:
+-      // ... keep1 | dest | dest=42 | keep3 | RF... | collect | bounce_pc |
+-      __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
+-    }
+-    break;
+-
+-  case _adapter_opt_return_ref:
+-  case _adapter_opt_return_int:
+-  case _adapter_opt_return_long:
+-  case _adapter_opt_return_float:
+-  case _adapter_opt_return_double:
+-  case _adapter_opt_return_void:
+-  case _adapter_opt_return_S0_ref:
+-  case _adapter_opt_return_S1_ref:
+-  case _adapter_opt_return_S2_ref:
+-  case _adapter_opt_return_S3_ref:
+-  case _adapter_opt_return_S4_ref:
+-  case _adapter_opt_return_S5_ref:
+-    {
+-      BasicType dest_type_constant = ek_adapter_opt_return_type(ek);
+-      int       dest_slot_constant = ek_adapter_opt_return_slot(ek);
+-
+-      if (VerifyMethodHandles)  RicochetFrame::verify_clean(_masm);
+-
+-      if (dest_slot_constant == -1) {
+-        // The current stub is a general handler for this dest_type.
+-        // It can be called from _adapter_opt_return_any below.
+-        // Stash the address in a little table.
+-        assert((dest_type_constant & CONV_TYPE_MASK) == dest_type_constant, "oob");
+-        address return_handler = __ pc();
+-        _adapter_return_handlers[dest_type_constant] = return_handler;
+-        if (dest_type_constant == T_INT) {
+-          // do the subword types too
+-          for (int bt = T_BOOLEAN; bt < T_INT; bt++) {
+-            if (is_subword_type(BasicType(bt)) &&
+-                _adapter_return_handlers[bt] == NULL) {
+-              _adapter_return_handlers[bt] = return_handler;
+-            }
+-          }
+-        }
+-      }
+-
+-      // On entry to this continuation handler, make Gargs live again.
+-      __ mov(RicochetFrame::L4_saved_args_base, Gargs);
+-
+-      Register O7_temp   = O7;
+-      Register O5_vminfo = O5;
+-
+-      RegisterOrConstant dest_slot = dest_slot_constant;
+-      if (dest_slot_constant == -1) {
+-        extract_conversion_vminfo(_masm, RicochetFrame::L5_conversion, O5_vminfo);
+-        dest_slot = O5_vminfo;
+-      }
+-      // Store the result back into the argslot.
+-      // This code uses the interpreter calling sequence, in which the return value
+-      // is usually left in the TOS register, as defined by InterpreterMacroAssembler::pop.
+-      // There are certain irregularities with floating point values, which can be seen
+-      // in TemplateInterpreterGenerator::generate_return_entry_for.
+-      move_return_value(_masm, dest_type_constant, __ argument_address(dest_slot, O7_temp));
+-
+-      RicochetFrame::leave_ricochet_frame(_masm, G3_method_handle, I5_savedSP, I7);
+-
+-      // Load the final target and go.
+-      if (VerifyMethodHandles)  verify_method_handle(_masm, G3_method_handle, O0_scratch, O1_scratch);
+-      __ restore(I5_savedSP, G0, SP);
+-      __ jump_to_method_handle_entry(G3_method_handle, O0_scratch);
+-      __ illtrap(0);
+-    }
+-    break;
+-
+-  case _adapter_opt_return_any:
+-    {
+-      Register O7_temp      = O7;
+-      Register O5_dest_type = O5;
+-
+-      if (VerifyMethodHandles)  RicochetFrame::verify_clean(_masm);
+-      extract_conversion_dest_type(_masm, RicochetFrame::L5_conversion, O5_dest_type);
+-      __ set(ExternalAddress((address) &_adapter_return_handlers[0]), O7_temp);
+-      __ sll_ptr(O5_dest_type, LogBytesPerWord, O5_dest_type);
+-      __ ld_ptr(O7_temp, O5_dest_type, O7_temp);
+-
+-#ifdef ASSERT
+-      { Label L_ok;
+-        __ br_notnull_short(O7_temp, Assembler::pt, L_ok);
+-        __ stop("bad method handle return");
+-        __ BIND(L_ok);
+-      }
+-#endif //ASSERT
+-      __ JMP(O7_temp, 0);
+-      __ delayed()->nop();
+-    }
+-    break;
+-
+-  case _adapter_opt_spread_0:
+-  case _adapter_opt_spread_1_ref:
+-  case _adapter_opt_spread_2_ref:
+-  case _adapter_opt_spread_3_ref:
+-  case _adapter_opt_spread_4_ref:
+-  case _adapter_opt_spread_5_ref:
+-  case _adapter_opt_spread_ref:
+-  case _adapter_opt_spread_byte:
+-  case _adapter_opt_spread_char:
+-  case _adapter_opt_spread_short:
+-  case _adapter_opt_spread_int:
+-  case _adapter_opt_spread_long:
+-  case _adapter_opt_spread_float:
+-  case _adapter_opt_spread_double:
+-    {
+-      // spread an array out into a group of arguments
+-      int  length_constant    = ek_adapter_opt_spread_count(ek);
+-      bool length_can_be_zero = (length_constant == 0);
+-      if (length_constant < 0) {
+-        // some adapters with variable length must handle the zero case
+-        if (!OptimizeMethodHandles ||
+-            ek_adapter_opt_spread_type(ek) != T_OBJECT)
+-          length_can_be_zero = true;
+-      }
+-
+-      // find the address of the array argument
+-      load_vmargslot(_masm, G3_amh_vmargslot, O0_argslot);
+-      __ add(__ argument_address(O0_argslot, O0_argslot), O0_argslot);
+-
+-      // O0_argslot points both to the array and to the first output arg
+-      Address vmarg = Address(O0_argslot, 0);
+-
+-      // Get the array value.
+-      Register  O1_array       = O1_scratch;
+-      Register  O2_array_klass = O2_scratch;
+-      BasicType elem_type      = ek_adapter_opt_spread_type(ek);
+-      int       elem_slots     = type2size[elem_type];  // 1 or 2
+-      int       array_slots    = 1;  // array is always a T_OBJECT
+-      int       length_offset  = arrayOopDesc::length_offset_in_bytes();
+-      int       elem0_offset   = arrayOopDesc::base_offset_in_bytes(elem_type);
+-      __ ld_ptr(vmarg, O1_array);
+-
+-      Label L_array_is_empty, L_insert_arg_space, L_copy_args, L_args_done;
+-      if (length_can_be_zero) {
+-        // handle the null pointer case, if zero is allowed
+-        Label L_skip;
+-        if (length_constant < 0) {
+-          load_conversion_vminfo(_masm, G3_amh_conversion, O3_scratch);
+-          __ cmp_zero_and_br(Assembler::notZero, O3_scratch, L_skip);
+-          __ delayed()->nop(); // to avoid back-to-back cbcond instructions
+-        }
+-        __ br_null_short(O1_array, Assembler::pn, L_array_is_empty);
+-        __ BIND(L_skip);
+-      }
+-      __ null_check(O1_array, oopDesc::klass_offset_in_bytes());
+-      __ load_klass(O1_array, O2_array_klass);
+-
+-      // Check the array type.
+-      Register O3_klass = O3_scratch;
+-      __ load_heap_oop(G3_amh_argument, O3_klass);  // this is a Class object!
+-      load_klass_from_Class(_masm, O3_klass, O4_scratch, G5_scratch);
+-
+-      Label L_ok_array_klass, L_bad_array_klass, L_bad_array_length;
+-      __ check_klass_subtype(O2_array_klass, O3_klass, O4_scratch, G5_scratch, L_ok_array_klass);
+-      // If we get here, the type check failed!
+-      __ ba_short(L_bad_array_klass);
+-      __ BIND(L_ok_array_klass);
+-
+-      // Check length.
+-      if (length_constant >= 0) {
+-        __ ldsw(Address(O1_array, length_offset), O4_scratch);
+-        __ cmp(O4_scratch, length_constant);
+-      } else {
+-        Register O3_vminfo = O3_scratch;
+-        load_conversion_vminfo(_masm, G3_amh_conversion, O3_vminfo);
+-        __ ldsw(Address(O1_array, length_offset), O4_scratch);
+-        __ cmp(O3_vminfo, O4_scratch);
+-      }
+-      __ br(Assembler::notEqual, false, Assembler::pn, L_bad_array_length);
+-      __ delayed()->nop();
+-
+-      Register O2_argslot_limit = O2_scratch;
+-
+-      // Array length checks out.  Now insert any required stack slots.
+-      if (length_constant == -1) {
+-        // Form a pointer to the end of the affected region.
+-        __ add(O0_argslot, Interpreter::stackElementSize, O2_argslot_limit);
+-        // 'stack_move' is negative number of words to insert
+-        // This number already accounts for elem_slots.
+-        Register O3_stack_move = O3_scratch;
+-        load_stack_move(_masm, G3_amh_conversion, O3_stack_move);
+-        __ cmp(O3_stack_move, 0);
+-        assert(stack_move_unit() < 0, "else change this comparison");
+-        __ br(Assembler::less, false, Assembler::pn, L_insert_arg_space);
+-        __ delayed()->nop();
+-        __ br(Assembler::equal, false, Assembler::pn, L_copy_args);
+-        __ delayed()->nop();
+-        // single argument case, with no array movement
+-        __ BIND(L_array_is_empty);
+-        remove_arg_slots(_masm, -stack_move_unit() * array_slots,
+-                         O0_argslot, O1_scratch, O2_scratch, O3_scratch);
+-        __ ba_short(L_args_done);  // no spreading to do
+-        __ BIND(L_insert_arg_space);
+-        // come here in the usual case, stack_move < 0 (2 or more spread arguments)
+-        // Live: O1_array, O2_argslot_limit, O3_stack_move
+-        insert_arg_slots(_masm, O3_stack_move,
+-                         O0_argslot, O4_scratch, G5_scratch, O1_scratch);
+-        // reload from rdx_argslot_limit since rax_argslot is now decremented
+-        __ ld_ptr(Address(O2_argslot_limit, -Interpreter::stackElementSize), O1_array);
+-      } else if (length_constant >= 1) {
+-        int new_slots = (length_constant * elem_slots) - array_slots;
+-        insert_arg_slots(_masm, new_slots * stack_move_unit(),
+-                         O0_argslot, O2_scratch, O3_scratch, O4_scratch);
+-      } else if (length_constant == 0) {
+-        __ BIND(L_array_is_empty);
+-        remove_arg_slots(_masm, -stack_move_unit() * array_slots,
+-                         O0_argslot, O1_scratch, O2_scratch, O3_scratch);
+-      } else {
+-        ShouldNotReachHere();
+-      }
+-
+-      // Copy from the array to the new slots.
+-      // Note: Stack change code preserves integrity of O0_argslot pointer.
+-      // So even after slot insertions, O0_argslot still points to first argument.
+-      // Beware:  Arguments that are shallow on the stack are deep in the array,
+-      // and vice versa.  So a downward-growing stack (the usual) has to be copied
+-      // elementwise in reverse order from the source array.
+-      __ BIND(L_copy_args);
+-      if (length_constant == -1) {
+-        // [O0_argslot, O2_argslot_limit) is the area we are inserting into.
+-        // Array element [0] goes at O0_argslot_limit[-wordSize].
+-        Register O1_source = O1_array;
+-        __ add(Address(O1_array, elem0_offset), O1_source);
+-        Register O4_fill_ptr = O4_scratch;
+-        __ mov(O2_argslot_limit, O4_fill_ptr);
+-        Label L_loop;
+-        __ BIND(L_loop);
+-        __ add(O4_fill_ptr, -Interpreter::stackElementSize * elem_slots, O4_fill_ptr);
+-        move_typed_arg(_masm, elem_type, true,
+-                       Address(O1_source, 0), Address(O4_fill_ptr, 0),
+-                       O2_scratch);  // must be an even register for !_LP64 long moves (uses O2/O3)
+-        __ add(O1_source, type2aelembytes(elem_type), O1_source);
+-        __ cmp_and_brx_short(O4_fill_ptr, O0_argslot, Assembler::greaterUnsigned, Assembler::pt, L_loop);
+-      } else if (length_constant == 0) {
+-        // nothing to copy
+-      } else {
+-        int elem_offset = elem0_offset;
+-        int slot_offset = length_constant * Interpreter::stackElementSize;
+-        for (int index = 0; index < length_constant; index++) {
+-          slot_offset -= Interpreter::stackElementSize * elem_slots;  // fill backward
+-          move_typed_arg(_masm, elem_type, true,
+-                         Address(O1_array, elem_offset), Address(O0_argslot, slot_offset),
+-                         O2_scratch);  // must be an even register for !_LP64 long moves (uses O2/O3)
+-          elem_offset += type2aelembytes(elem_type);
+-        }
+-      }
+-      __ BIND(L_args_done);
+-
+-      // Arguments are spread.  Move to next method handle.
+-      __ load_heap_oop(G3_mh_vmtarget, G3_method_handle);
+-      __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
+-
+-      __ BIND(L_bad_array_klass);
+-      assert(!vmarg.uses(O2_required), "must be different registers");
+-      __ load_heap_oop(Address(O2_array_klass, java_mirror_offset), O2_required);  // required class
+-      __ ld_ptr(       vmarg,                                       O1_actual);    // bad object
+-      __ jump_to(AddressLiteral(from_interpreted_entry(_raise_exception)), O3_scratch);
+-      __ delayed()->mov(Bytecodes::_aaload,                         O0_code);      // who is complaining?
+-
+-      __ bind(L_bad_array_length);
+-      assert(!vmarg.uses(O2_required), "must be different registers");
+-      __ mov(   G3_method_handle,                O2_required);  // required class
+-      __ ld_ptr(vmarg,                           O1_actual);    // bad object
+-      __ jump_to(AddressLiteral(from_interpreted_entry(_raise_exception)), O3_scratch);
+-      __ delayed()->mov(Bytecodes::_arraylength, O0_code);      // who is complaining?
+-    }
+-    break;
+-
+-  default:
+-    DEBUG_ONLY(tty->print_cr("bad ek=%d (%s)", (int)ek, entry_name(ek)));
+-    ShouldNotReachHere();
+-  }
+-  BLOCK_COMMENT(err_msg("} Entry %s", entry_name(ek)));
+-
+-  address me_cookie = MethodHandleEntry::start_compiled_entry(_masm, interp_entry);
+-  __ unimplemented(entry_name(ek)); // %%% FIXME: NYI
+-
+-  init_entry(ek, MethodHandleEntry::finish_compiled_entry(_masm, me_cookie));
+-}
+diff --git a/src/cpu/sparc/vm/methodHandles_sparc.hpp b/src/cpu/sparc/vm/methodHandles_sparc.hpp
+--- a/src/cpu/sparc/vm/methodHandles_sparc.hpp
++++ b/src/cpu/sparc/vm/methodHandles_sparc.hpp
+@@ -30,186 +30,9 @@
+   adapter_code_size = NOT_LP64(23000 DEBUG_ONLY(+ 40000)) LP64_ONLY(35000 DEBUG_ONLY(+ 50000))
+ };
+ 
+-public:
+-
+-class RicochetFrame : public ResourceObj {
+-  friend class MethodHandles;
+-
+- private:
+-  /*
+-    RF field            x86                 SPARC
+-    sender_pc           *(rsp+0)            I7-0x8
+-    sender_link         rbp                 I6+BIAS
+-    exact_sender_sp     rsi/r13             I5_savedSP
+-    conversion          *(rcx+&amh_conv)    L5_conv
+-    saved_args_base     rax                 L4_sab (cf. Gargs = G4)
+-    saved_args_layout   #NULL               L3_sal
+-    saved_target        *(rcx+&mh_vmtgt)    L2_stgt
+-    continuation        #STUB_CON           L1_cont
+-   */
+-  static const Register L1_continuation     ;  // what to do when control gets back here
+-  static const Register L2_saved_target     ;  // target method handle to invoke on saved_args
+-  static const Register L3_saved_args_layout;  // caching point for MethodTypeForm.vmlayout cookie
+-  static const Register L4_saved_args_base  ;  // base of pushed arguments (slot 0, arg N) (-3)
+-  static const Register L5_conversion       ;  // misc. information from original AdapterMethodHandle (-2)
+-
+-  frame _fr;
+-
+-  RicochetFrame(const frame& fr) : _fr(fr) { }
+-
+-  intptr_t* register_addr(Register reg) const  {
+-    assert((_fr.sp() + reg->sp_offset_in_saved_window()) == _fr.register_addr(reg), "must agree");
+-    return _fr.register_addr(reg);
+-  }
+-  intptr_t  register_value(Register reg) const { return *register_addr(reg); }
+-
+- public:
+-  intptr_t* continuation() const        { return (intptr_t*) register_value(L1_continuation); }
+-  oop       saved_target() const        { return (oop)       register_value(L2_saved_target); }
+-  oop       saved_args_layout() const   { return (oop)       register_value(L3_saved_args_layout); }
+-  intptr_t* saved_args_base() const     { return (intptr_t*) register_value(L4_saved_args_base); }
+-  intptr_t  conversion() const          { return             register_value(L5_conversion); }
+-  intptr_t* exact_sender_sp() const     { return (intptr_t*) register_value(I5_savedSP); }
+-  intptr_t* sender_link() const         { return _fr.sender_sp(); }  // XXX
+-  address   sender_pc() const           { return _fr.sender_pc(); }
+-
+-  // This value is not used for much, but it apparently must be nonzero.
+-  static int frame_size_in_bytes()              { return wordSize * 4; }
+-
+-  intptr_t* extended_sender_sp() const  { return saved_args_base(); }
+-
+-  intptr_t  return_value_slot_number() const {
+-    return adapter_conversion_vminfo(conversion());
+-  }
+-  BasicType return_value_type() const {
+-    return adapter_conversion_dest_type(conversion());
+-  }
+-  bool has_return_value_slot() const {
+-    return return_value_type() != T_VOID;
+-  }
+-  intptr_t* return_value_slot_addr() const {
+-    assert(has_return_value_slot(), "");
+-    return saved_arg_slot_addr(return_value_slot_number());
+-  }
+-  intptr_t* saved_target_slot_addr() const {
+-    return saved_arg_slot_addr(saved_args_length());
+-  }
+-  intptr_t* saved_arg_slot_addr(int slot) const {
+-    assert(slot >= 0, "");
+-    return (intptr_t*)( (address)saved_args_base() + (slot * Interpreter::stackElementSize) );
+-  }
+-
+-  jint      saved_args_length() const;
+-  jint      saved_arg_offset(int arg) const;
+-
+-  // GC interface
+-  oop*  saved_target_addr()                     { return (oop*)register_addr(L2_saved_target); }
+-  oop*  saved_args_layout_addr()                { return (oop*)register_addr(L3_saved_args_layout); }
+-
+-  oop  compute_saved_args_layout(bool read_cache, bool write_cache);
+-
+-#ifdef ASSERT
+-  // The magic number is supposed to help find ricochet frames within the bytes of stack dumps.
+-  enum { MAGIC_NUMBER_1 = 0xFEED03E, MAGIC_NUMBER_2 = 0xBEEF03E };
+-  static const Register L0_magic_number_1   ;  // cookie for debugging, at start of RSA
+-  static Address magic_number_2_addr()  { return Address(L4_saved_args_base, -wordSize); }
+-  intptr_t magic_number_1() const       { return register_value(L0_magic_number_1); }
+-  intptr_t magic_number_2() const       { return saved_args_base()[-1]; }
+-#endif //ASSERT
+-
+- public:
+-  enum { RETURN_VALUE_PLACEHOLDER = (NOT_DEBUG(0) DEBUG_ONLY(42)) };
+-
+-  void verify() const NOT_DEBUG_RETURN; // check for MAGIC_NUMBER, etc.
+-
+-  static void generate_ricochet_blob(MacroAssembler* _masm,
+-                                     // output params:
+-                                     int* bounce_offset,
+-                                     int* exception_offset,
+-                                     int* frame_size_in_words);
+-
+-  static void enter_ricochet_frame(MacroAssembler* _masm,
+-                                   Register recv_reg,
+-                                   Register argv_reg,
+-                                   address return_handler);
+-
+-  static void leave_ricochet_frame(MacroAssembler* _masm,
+-                                   Register recv_reg,
+-                                   Register new_sp_reg,
+-                                   Register sender_pc_reg);
+-
+-  static RicochetFrame* from_frame(const frame& fr) {
+-    RicochetFrame* rf = new RicochetFrame(fr);
+-    rf->verify();
+-    return rf;
+-  }
+-
+-  static void verify_clean(MacroAssembler* _masm) NOT_DEBUG_RETURN;
+-
+-  static void describe(const frame* fr, FrameValues& values, int frame_no) PRODUCT_RETURN;
+-};
+-
+ // Additional helper methods for MethodHandles code generation:
+ public:
+   static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg, Register temp_reg, Register temp2_reg);
+-  static void load_conversion_vminfo(MacroAssembler* _masm, Address conversion_field_addr, Register reg);
+-  static void extract_conversion_vminfo(MacroAssembler* _masm, Register conversion_field_reg, Register reg);
+-  static void extract_conversion_dest_type(MacroAssembler* _masm, Register conversion_field_reg, Register reg);
+-
+-  static void load_stack_move(MacroAssembler* _masm,
+-                              Address G3_amh_conversion,
+-                              Register G5_stack_move);
+-
+-  static void insert_arg_slots(MacroAssembler* _masm,
+-                               RegisterOrConstant arg_slots,
+-                               Register argslot_reg,
+-                               Register temp_reg, Register temp2_reg, Register temp3_reg);
+-
+-  static void remove_arg_slots(MacroAssembler* _masm,
+-                               RegisterOrConstant arg_slots,
+-                               Register argslot_reg,
+-                               Register temp_reg, Register temp2_reg, Register temp3_reg);
+-
+-  static void push_arg_slots(MacroAssembler* _masm,
+-                             Register argslot_reg,
+-                             RegisterOrConstant slot_count,
+-                             Register temp_reg, Register temp2_reg);
+-
+-  static void move_arg_slots_up(MacroAssembler* _masm,
+-                                Register bottom_reg,  // invariant
+-                                Address  top_addr,    // can use temp_reg
+-                                RegisterOrConstant positive_distance_in_slots,
+-                                Register temp_reg, Register temp2_reg);
+-
+-  static void move_arg_slots_down(MacroAssembler* _masm,
+-                                  Address  bottom_addr,  // can use temp_reg
+-                                  Register top_reg,      // invariant
+-                                  RegisterOrConstant negative_distance_in_slots,
+-                                  Register temp_reg, Register temp2_reg);
+-
+-  static void move_typed_arg(MacroAssembler* _masm,
+-                             BasicType type, bool is_element,
+-                             Address value_src, Address slot_dest,
+-                             Register temp_reg);
+-
+-  static void move_return_value(MacroAssembler* _masm, BasicType type,
+-                                Address return_slot);
+-
+-  static void verify_argslot(MacroAssembler* _masm, Register argslot_reg,
+-                             Register temp_reg,
+-                             const char* error_message) NOT_DEBUG_RETURN;
+-
+-  static void verify_argslots(MacroAssembler* _masm,
+-                              RegisterOrConstant argslot_count,
+-                              Register argslot_reg,
+-                              Register temp_reg,
+-                              Register temp2_reg,
+-                              bool negate_argslot,
+-                              const char* error_message) NOT_DEBUG_RETURN;
+-
+-  static void verify_stack_move(MacroAssembler* _masm,
+-                                RegisterOrConstant arg_slots,
+-                                int direction) NOT_DEBUG_RETURN;
+ 
+   static void verify_klass(MacroAssembler* _masm,
+                            Register obj_reg, KlassHandle klass,
+@@ -223,8 +46,17 @@
+                  "reference is a MH");
+   }
+ 
++  static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
++
+   // Similar to InterpreterMacroAssembler::jump_from_interpreted.
+   // Takes care of special dispatch from single stepping too.
+-  static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, Register temp2);
++  static void jump_from_method_handle(MacroAssembler* _masm, Register method,
++                                      Register temp, Register temp2,
++                                      bool for_compiler_entry);
++
++  static void jump_to_lambda_form(MacroAssembler* _masm,
++                                  Register recv, Register method_temp,
++                                  Register temp2, Register temp3,
++                                  bool for_compiler_entry);
+ 
+   static void trace_method_handle(MacroAssembler* _masm, const char* adaptername) PRODUCT_RETURN;
+diff --git a/src/cpu/sparc/vm/sharedRuntime_sparc.cpp b/src/cpu/sparc/vm/sharedRuntime_sparc.cpp
+--- a/src/cpu/sparc/vm/sharedRuntime_sparc.cpp
++++ b/src/cpu/sparc/vm/sharedRuntime_sparc.cpp
+@@ -400,13 +400,13 @@
+     case T_LONG:                // LP64, longs compete with int args
+       assert(sig_bt[i+1] == T_VOID, "");
+ #ifdef _LP64
+-      if (int_reg_cnt < int_reg_max) int_reg_cnt++;
++      if (int_reg_cnt < int_reg_max)  int_reg_cnt++;
+ #endif
+       break;
+     case T_OBJECT:
+     case T_ARRAY:
+     case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
+-      if (int_reg_cnt < int_reg_max) int_reg_cnt++;
++      if (int_reg_cnt < int_reg_max)  int_reg_cnt++;
+ #ifndef _LP64
+       else                            stk_reg_pairs++;
+ #endif
+@@ -416,11 +416,11 @@
+     case T_CHAR:
+     case T_BYTE:
+     case T_BOOLEAN:
+-      if (int_reg_cnt < int_reg_max) int_reg_cnt++;
++      if (int_reg_cnt < int_reg_max)  int_reg_cnt++;
+       else                            stk_reg_pairs++;
+       break;
+     case T_FLOAT:
+-      if (flt_reg_cnt < flt_reg_max) flt_reg_cnt++;
++      if (flt_reg_cnt < flt_reg_max)  flt_reg_cnt++;
+       else                            stk_reg_pairs++;
+       break;
+     case T_DOUBLE:
+@@ -436,7 +436,6 @@
+   // This is where the longs/doubles start on the stack.
+   stk_reg_pairs = (stk_reg_pairs+1) & ~1; // Round
+ 
+-  int int_reg_pairs = (int_reg_cnt+1) & ~1; // 32-bit 2-reg longs only
+   int flt_reg_pairs = (flt_reg_cnt+1) & ~1;
+ 
+   // int stk_reg = frame::register_save_words*(wordSize>>2);
+@@ -517,24 +516,15 @@
+           stk_reg_pairs += 2;
+         }
+ #else // COMPILER2
+-        if (int_reg_pairs + 1 < int_reg_max) {
+-          if (is_outgoing) {
+-            regs[i].set_pair(as_oRegister(int_reg_pairs + 1)->as_VMReg(), as_oRegister(int_reg_pairs)->as_VMReg());
+-          } else {
+-            regs[i].set_pair(as_iRegister(int_reg_pairs + 1)->as_VMReg(), as_iRegister(int_reg_pairs)->as_VMReg());
+-          }
+-          int_reg_pairs += 2;
+-        } else {
+           regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
+           stk_reg_pairs += 2;
+-        }
+ #endif // COMPILER2
+ #endif // _LP64
+       break;
+ 
+     case T_FLOAT:
+       if (flt_reg < flt_reg_max) regs[i].set1(as_FloatRegister(flt_reg++)->as_VMReg());
+-      else                       regs[i].set1(    VMRegImpl::stack2reg(stk_reg++));
++      else                       regs[i].set1(VMRegImpl::stack2reg(stk_reg++));
+       break;
+     case T_DOUBLE:
+       assert(sig_bt[i+1] == T_VOID, "expecting half");
+@@ -886,6 +876,20 @@
+   __ delayed()->add(SP, G1, Gargs);
+ }
+ 
++static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg, Register temp2_reg,
++                        address code_start, address code_end,
++                        Label& L_ok) {
++  Label L_fail;
++  __ set(ExternalAddress(code_start), temp_reg);
++  __ set(pointer_delta(code_end, code_start, 1), temp2_reg);
++  __ cmp(pc_reg, temp_reg);
++  __ brx(Assembler::lessEqualUnsigned, false, Assembler::pn, L_fail);
++  __ delayed()->add(temp_reg, temp2_reg, temp_reg);
++  __ cmp(pc_reg, temp_reg);
++  __ cmp_and_brx_short(pc_reg, temp_reg, Assembler::lessUnsigned, Assembler::pt, L_ok);
++  __ bind(L_fail);
++}
++
+ void AdapterGenerator::gen_i2c_adapter(
+                             int total_args_passed,
+                             // VMReg max_arg,
+@@ -907,6 +911,51 @@
+   // This removes all sorts of headaches on the x86 side and also eliminates
+   // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
+ 
++  // More detail:
++  // Adapters can be frameless because they do not require the caller
++  // to perform additional cleanup work, such as correcting the stack pointer.
++  // An i2c adapter is frameless because the *caller* frame, which is interpreted,
++  // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
++  // even if a callee has modified the stack pointer.
++  // A c2i adapter is frameless because the *callee* frame, which is interpreted,
++  // routinely repairs its caller's stack pointer (from sender_sp, which is set
++  // up via the senderSP register).
++  // In other words, if *either* the caller or callee is interpreted, we can
++  // get the stack pointer repaired after a call.
++  // This is why c2i and i2c adapters cannot be indefinitely composed.
++  // In particular, if a c2i adapter were to somehow call an i2c adapter,
++  // both caller and callee would be compiled methods, and neither would
++  // clean up the stack pointer changes performed by the two adapters.
++  // If this happens, control eventually transfers back to the compiled
++  // caller, but with an uncorrected stack, causing delayed havoc.
++
++  if (VerifyAdapterCalls &&
++      (Interpreter::code() != NULL || StubRoutines::code1() != NULL)) {
++    // So, let's test for cascading c2i/i2c adapters right now.
++    //  assert(Interpreter::contains($return_addr) ||
++    //         StubRoutines::contains($return_addr),
++    //         "i2c adapter must return to an interpreter frame");
++    __ block_comment("verify_i2c { ");
++    Label L_ok;
++    if (Interpreter::code() != NULL)
++      range_check(masm, O7, O0, O1,
++                  Interpreter::code()->code_start(), Interpreter::code()->code_end(),
++                  L_ok);
++    if (StubRoutines::code1() != NULL)
++      range_check(masm, O7, O0, O1,
++                  StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(),
++                  L_ok);
++    if (StubRoutines::code2() != NULL)
++      range_check(masm, O7, O0, O1,
++                  StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(),
++                  L_ok);
++    const char* msg = "i2c adapter must return to an interpreter frame";
++    __ block_comment(msg);
++    __ stop(msg);
++    __ bind(L_ok);
++    __ block_comment("} verify_i2ce ");
++  }
++
+   // As you can see from the list of inputs & outputs there are not a lot
+   // of temp registers to work with: mostly G1, G3 & G4.
+ 
+@@ -1937,20 +1986,156 @@
+   __ bind(done);
+ }
+ 
++static void verify_oop_args(MacroAssembler* masm,
++                            int total_args_passed,
++                            const BasicType* sig_bt,
++                            const VMRegPair* regs) {
++  Register temp_reg = G5_method;  // not part of any compiled calling seq
++  if (VerifyOops) {
++    for (int i = 0; i < total_args_passed; i++) {
++      if (sig_bt[i] == T_OBJECT ||
++          sig_bt[i] == T_ARRAY) {
++        VMReg r = regs[i].first();
++        assert(r->is_valid(), "bad oop arg");
++        if (r->is_stack()) {
++          RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS;
++          ld_off = __ ensure_simm13_or_reg(ld_off, temp_reg);
++          __ ld_ptr(SP, ld_off, temp_reg);
++          __ verify_oop(temp_reg);
++        } else {
++          __ verify_oop(r->as_Register());
++        }
++      }
++    }
++  }
++}
++
++static void gen_special_dispatch(MacroAssembler* masm,
++                                 int total_args_passed,
++                                 int comp_args_on_stack,
++                                 vmIntrinsics::ID special_dispatch,
++                                 const BasicType* sig_bt,
++                                 const VMRegPair* regs) {
++  verify_oop_args(masm, total_args_passed, sig_bt, regs);
++
++  // Now write the args into the outgoing interpreter space
++  bool     has_receiver   = false;
++  Register receiver_reg   = noreg;
++  int      member_arg_pos = -1;
++  Register member_reg     = noreg;
++  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch);
++  if (ref_kind != 0) {
++    member_arg_pos = total_args_passed - 1;  // trailing MemberName argument
++    member_reg = G5_method;  // known to be free at this point
++    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
++  } else if (special_dispatch == vmIntrinsics::_invokeBasic) {
++    has_receiver = true;
++  } else {
++    fatal(err_msg("special_dispatch=%d", special_dispatch));
++  }
++
++  if (member_reg != noreg) {
++    // Load the member_arg into register, if necessary.
++    assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob");
++    assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object");
++    VMReg r = regs[member_arg_pos].first();
++    assert(r->is_valid(), "bad member arg");
++    if (r->is_stack()) {
++      RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS;
++      ld_off = __ ensure_simm13_or_reg(ld_off, member_reg);
++      __ ld_ptr(SP, ld_off, member_reg);
++    } else {
++      // no data motion is needed
++      member_reg = r->as_Register();
++    }
++  }
++
++  if (has_receiver) {
++    // Make sure the receiver is loaded into a register.
++    assert(total_args_passed > 0, "oob");
++    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
++    VMReg r = regs[0].first();
++    assert(r->is_valid(), "bad receiver arg");
++    if (r->is_stack()) {
++      // Porting note:  This assumes that compiled calling conventions always
++      // pass the receiver oop in a register.  If this is not true on some
++      // platform, pick a temp and load the receiver from stack.
++      assert(false, "receiver always in a register");
++      receiver_reg = G3_scratch;  // known to be free at this point
++      RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS;
++      ld_off = __ ensure_simm13_or_reg(ld_off, member_reg);
++      __ ld_ptr(SP, ld_off, receiver_reg);
++    } else {
++      // no data motion is needed
++      receiver_reg = r->as_Register();
++    }
++  }
++
++  // Figure out which address we are really jumping to:
++  MethodHandles::generate_method_handle_dispatch(masm, special_dispatch,
++                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
++}
++
+ // ---------------------------------------------------------------------------
+ // Generate a native wrapper for a given method.  The method takes arguments
+ // in the Java compiled code convention, marshals them to the native
+ // convention (handlizes oops, etc), transitions to native, makes the call,
+ // returns to java state (possibly blocking), unhandlizes any result and
+ // returns.
++//
++// Critical native functions are a shorthand for the use of
++// GetPrimtiveArrayCritical and disallow the use of any other JNI
++// functions.  The wrapper is expected to unpack the arguments before
++// passing them to the callee and perform checks before and after the
++// native call to ensure that they GC_locker
++// lock_critical/unlock_critical semantics are followed.  Some other
++// parts of JNI setup are skipped like the tear down of the JNI handle
++// block and the check for pending exceptions it's impossible for them
++// to be thrown.
++//
++// They are roughly structured like this:
++//    if (GC_locker::needs_gc())
++//      SharedRuntime::block_for_jni_critical();
++//    tranistion to thread_in_native
++//    unpack arrray arguments and call native entry point
++//    check for safepoint in progress
++//    check if any thread suspend flags are set
++//      call into JVM and possible unlock the JNI critical
++//      if a GC was suppressed while in the critical native.
++//    transition back to thread_in_Java
++//    return to caller
++//
+ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
+                                                 methodHandle method,
+                                                 int compile_id,
+                                                 int total_in_args,
+                                                 int comp_args_on_stack, // in VMRegStackSlots
+-                                                BasicType *in_sig_bt,
+-                                                VMRegPair *in_regs,
++                                                BasicType* in_sig_bt,
++                                                VMRegPair* in_regs,
+                                                 BasicType ret_type) {
++  if (method->is_method_handle_intrinsic()) {
++    vmIntrinsics::ID iid = method->intrinsic_id();
++    intptr_t start = (intptr_t)__ pc();
++    int vep_offset = ((intptr_t)__ pc()) - start;
++    gen_special_dispatch(masm,
++                         total_in_args,
++                         comp_args_on_stack,
++                         method->intrinsic_id(),
++                         in_sig_bt,
++                         in_regs);
++    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
++    __ flush();
++    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
++    return nmethod::new_native_nmethod(method,
++                                       compile_id,
++                                       masm->code(),
++                                       vep_offset,
++                                       frame_complete,
++                                       stack_slots / VMRegImpl::slots_per_word,
++                                       in_ByteSize(-1),
++                                       in_ByteSize(-1),
++                                       (OopMapSet*)NULL);
++  }
+   bool is_critical_native = true;
+   address native_func = method->critical_native_function();
+   if (native_func == NULL) {
+diff --git a/src/cpu/sparc/vm/stubGenerator_sparc.cpp b/src/cpu/sparc/vm/stubGenerator_sparc.cpp
+--- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp
++++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp
+@@ -3404,14 +3404,6 @@
+     StubRoutines::_atomic_add_ptr_entry      = StubRoutines::_atomic_add_entry;
+ #endif  // COMPILER2 !=> _LP64
+ 
+-    // Build this early so it's available for the interpreter.  The
+-    // stub expects the required and actual type to already be in O1
+-    // and O2 respectively.
+-    StubRoutines::_throw_WrongMethodTypeException_entry =
+-      generate_throw_exception("WrongMethodTypeException throw_exception",
+-                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_WrongMethodTypeException),
+-                               G5_method_type, G3_method_handle);
+-
+     // Build this early so it's available for the interpreter.
+     StubRoutines::_throw_StackOverflowError_entry          = generate_throw_exception("StackOverflowError throw_exception",           CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
+   }
+diff --git a/src/cpu/sparc/vm/templateInterpreter_sparc.cpp b/src/cpu/sparc/vm/templateInterpreter_sparc.cpp
+--- a/src/cpu/sparc/vm/templateInterpreter_sparc.cpp
++++ b/src/cpu/sparc/vm/templateInterpreter_sparc.cpp
+@@ -694,9 +694,9 @@
+     // Need to differentiate between igetfield, agetfield, bgetfield etc.
+     // because they are different sizes.
+     // Get the type from the constant pool cache
+-    __ srl(G1_scratch, ConstantPoolCacheEntry::tosBits, G1_scratch);
+-    // Make sure we don't need to mask G1_scratch for tosBits after the above shift
+-    ConstantPoolCacheEntry::verify_tosBits();
++    __ srl(G1_scratch, ConstantPoolCacheEntry::tos_state_shift, G1_scratch);
++    // Make sure we don't need to mask G1_scratch after the above shift
++    ConstantPoolCacheEntry::verify_tos_state_shift();
+     __ cmp(G1_scratch, atos );
+     __ br(Assembler::equal, true, Assembler::pt, xreturn_path);
+     __ delayed()->ld_ptr(Otos_i, G3_scratch, Otos_i);
+@@ -1662,7 +1662,7 @@
+       int computed_sp_adjustment = (delta > 0) ? round_to(delta, WordsPerLong) : 0;
+       *interpreter_frame->register_addr(I5_savedSP)    = (intptr_t) (fp + computed_sp_adjustment) - STACK_BIAS;
+     } else {
+-      assert(caller->is_compiled_frame() || caller->is_entry_frame() || caller->is_ricochet_frame(), "only possible cases");
++      assert(caller->is_compiled_frame() || caller->is_entry_frame(), "only possible cases");
+       // Don't have Lesp available; lay out locals block in the caller
+       // adjacent to the register window save area.
+       //
+diff --git a/src/cpu/sparc/vm/templateTable_sparc.cpp b/src/cpu/sparc/vm/templateTable_sparc.cpp
+--- a/src/cpu/sparc/vm/templateTable_sparc.cpp
++++ b/src/cpu/sparc/vm/templateTable_sparc.cpp
+@@ -378,7 +378,7 @@
+   Register Rcache = G3_scratch;
+   Register Rscratch = G4_scratch;
+ 
+-  resolve_cache_and_index(f1_oop, Otos_i, Rcache, Rscratch, wide ? sizeof(u2) : sizeof(u1));
++  resolve_cache_and_index(f12_oop, Otos_i, Rcache, Rscratch, wide ? sizeof(u2) : sizeof(u1));
+ 
+   __ verify_oop(Otos_i);
+ 
+@@ -2093,10 +2093,12 @@
+   // Depends on cpCacheOop layout!
+   Label resolved;
+ 
+-  if (byte_no == f1_oop) {
+-    // We are resolved if the f1 field contains a non-null object (CallSite, etc.)
+-    // This kind of CP cache entry does not need to match the flags byte, because
++  if (byte_no == f12_oop) {
++    // We are resolved if the f1 field contains a non-null object (CallSite, MethodType, etc.)
++    // This kind of CP cache entry does not need to match bytecode_1 or bytecode_2, because
+     // there is a 1-1 relation between bytecode type and CP entry type.
++    // The caller will also load a methodOop from f2.
++    assert(result != noreg, "");
+     assert_different_registers(result, Rcache);
+     __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
+     __ ld_ptr(Rcache, constantPoolCacheOopDesc::base_offset() +
+@@ -2123,10 +2125,13 @@
+     case Bytecodes::_invokespecial  : // fall through
+     case Bytecodes::_invokestatic   : // fall through
+     case Bytecodes::_invokeinterface: entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);  break;
++    case Bytecodes::_invokehandle   : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle);  break;
+     case Bytecodes::_invokedynamic  : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic);  break;
+     case Bytecodes::_fast_aldc      : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);     break;
+     case Bytecodes::_fast_aldc_w    : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);     break;
+-    default                         : ShouldNotReachHere();                                 break;
++    default:
++      fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode())));
++      break;
+   }
+   // first time invocation - must resolve first
+   __ call_VM(noreg, entry, O1);
+@@ -2139,48 +2144,54 @@
+ }
+ 
+ void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
+-                                               Register Rmethod,
+-                                               Register Ritable_index,
+-                                               Register Rflags,
++                                               Register method,
++                                               Register itable_index,
++                                               Register flags,
+                                                bool is_invokevirtual,
+                                                bool is_invokevfinal,
+                                                bool is_invokedynamic) {
+   // Uses both G3_scratch and G4_scratch
+-  Register Rcache = G3_scratch;
+-  Register Rscratch = G4_scratch;
+-  assert_different_registers(Rcache, Rmethod, Ritable_index);
+-
+-  ByteSize cp_base_offset = constantPoolCacheOopDesc::base_offset();
++  Register cache = G3_scratch;
++  Register index = G4_scratch;
++  assert_different_registers(cache, method, itable_index);
+ 
+   // determine constant pool cache field offsets
++  assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
+   const int method_offset = in_bytes(
+-    cp_base_offset +
+-      (is_invokevirtual
++      constantPoolCacheOopDesc::base_offset() +
++      ((byte_no == f2_byte)
+        ? ConstantPoolCacheEntry::f2_offset()
+        : ConstantPoolCacheEntry::f1_offset()
+       )
+     );
+-  const int flags_offset = in_bytes(cp_base_offset +
++  const int flags_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
+                                     ConstantPoolCacheEntry::flags_offset());
+   // access constant pool cache fields
+-  const int index_offset = in_bytes(cp_base_offset +
++  const int index_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
+                                     ConstantPoolCacheEntry::f2_offset());
+ 
+   if (is_invokevfinal) {
+-    __ get_cache_and_index_at_bcp(Rcache, Rscratch, 1);
+-    __ ld_ptr(Rcache, method_offset, Rmethod);
+-  } else if (byte_no == f1_oop) {
+-    // Resolved f1_oop goes directly into 'method' register.
+-    resolve_cache_and_index(byte_no, Rmethod, Rcache, Rscratch, sizeof(u4));
++    __ get_cache_and_index_at_bcp(cache, index, 1);
++    __ ld_ptr(Address(cache, method_offset), method);
++  } else if (byte_no == f12_oop) {
++    // Resolved f1_oop (CallSite, MethodType, etc.) goes into 'itable_index'.
++    // Resolved f2_oop (methodOop invoker) will go into 'method' (at index_offset).
++    // See ConstantPoolCacheEntry::set_dynamic_call and set_method_handle.
++    size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2));
++    resolve_cache_and_index(byte_no, itable_index, cache, index, index_size);
++    __ ld_ptr(Address(cache, index_offset), method);
++    itable_index = noreg;  // hack to disable load below
+   } else {
+-    resolve_cache_and_index(byte_no, noreg, Rcache, Rscratch, sizeof(u2));
+-    __ ld_ptr(Rcache, method_offset, Rmethod);
++    resolve_cache_and_index(byte_no, noreg, cache, index, sizeof(u2));
++    __ ld_ptr(Address(cache, method_offset), method);
+   }
+ 
+-  if (Ritable_index != noreg) {
+-    __ ld_ptr(Rcache, index_offset, Ritable_index);
++  if (itable_index != noreg) {
++    // pick up itable index from f2 also:
++    assert(byte_no == f1_byte, "already picked up f1");
++    __ ld_ptr(Address(cache, index_offset), itable_index);
+   }
+-  __ ld_ptr(Rcache, flags_offset, Rflags);
++  __ ld_ptr(Address(cache, flags_offset), flags);
+ }
+ 
+ // The Rcache register must be set before call
+@@ -2272,7 +2283,7 @@
+ 
+   if (__ membar_has_effect(membar_bits)) {
+     // Get volatile flag
+-    __ set((1 << ConstantPoolCacheEntry::volatileField), Lscratch);
++    __ set((1 << ConstantPoolCacheEntry::is_volatile_shift), Lscratch);
+     __ and3(Rflags, Lscratch, Lscratch);
+   }
+ 
+@@ -2280,9 +2291,9 @@
+ 
+   // compute field type
+   Label notByte, notInt, notShort, notChar, notLong, notFloat, notObj;
+-  __ srl(Rflags, ConstantPoolCacheEntry::tosBits, Rflags);
+-  // Make sure we don't need to mask Rflags for tosBits after the above shift
+-  ConstantPoolCacheEntry::verify_tosBits();
++  __ srl(Rflags, ConstantPoolCacheEntry::tos_state_shift, Rflags);
++  // Make sure we don't need to mask Rflags after the above shift
++  ConstantPoolCacheEntry::verify_tos_state_shift();
+ 
+   // Check atos before itos for getstatic, more likely (in Queens at least)
+   __ cmp(Rflags, atos);
+@@ -2445,7 +2456,7 @@
+   if (__ membar_has_effect(membar_bits)) {
+     // Get volatile flag
+     __ ld_ptr(Rcache, cp_base_offset + ConstantPoolCacheEntry::f2_offset(), Rflags);
+-    __ set((1 << ConstantPoolCacheEntry::volatileField), Lscratch);
++    __ set((1 << ConstantPoolCacheEntry::is_volatile_shift), Lscratch);
+   }
+ 
+   switch (bytecode()) {
+@@ -2569,9 +2580,9 @@
+       Label two_word, valsizeknown;
+       __ ld_ptr(G1_scratch, cp_base_offset + ConstantPoolCacheEntry::flags_offset(), Rflags);
+       __ mov(Lesp, G4_scratch);
+-      __ srl(Rflags, ConstantPoolCacheEntry::tosBits, Rflags);
+-      // Make sure we don't need to mask Rflags for tosBits after the above shift
+-      ConstantPoolCacheEntry::verify_tosBits();
++      __ srl(Rflags, ConstantPoolCacheEntry::tos_state_shift, Rflags);
++      // Make sure we don't need to mask Rflags after the above shift
++      ConstantPoolCacheEntry::verify_tos_state_shift();
+       __ cmp(Rflags, ltos);
+       __ br(Assembler::equal, false, Assembler::pt, two_word);
+       __ delayed()->cmp(Rflags, dtos);
+@@ -2625,7 +2636,7 @@
+ 
+   Label notVolatile, checkVolatile, exit;
+   if (__ membar_has_effect(read_bits) || __ membar_has_effect(write_bits)) {
+-    __ set((1 << ConstantPoolCacheEntry::volatileField), Lscratch);
++    __ set((1 << ConstantPoolCacheEntry::is_volatile_shift), Lscratch);
+     __ and3(Rflags, Lscratch, Lscratch);
+ 
+     if (__ membar_has_effect(read_bits)) {
+@@ -2635,9 +2646,9 @@
+     }
+   }
+ 
+-  __ srl(Rflags, ConstantPoolCacheEntry::tosBits, Rflags);
+-  // Make sure we don't need to mask Rflags for tosBits after the above shift
+-  ConstantPoolCacheEntry::verify_tosBits();
++  __ srl(Rflags, ConstantPoolCacheEntry::tos_state_shift, Rflags);
++  // Make sure we don't need to mask Rflags after the above shift
++  ConstantPoolCacheEntry::verify_tos_state_shift();
+ 
+   // compute field type
+   Label notInt, notShort, notChar, notObj, notByte, notLong, notFloat;
+@@ -2833,7 +2844,7 @@
+   Label notVolatile, checkVolatile, exit;
+   if (__ membar_has_effect(read_bits) || __ membar_has_effect(write_bits)) {
+     __ ld_ptr(Rcache, cp_base_offset + ConstantPoolCacheEntry::flags_offset(), Rflags);
+-    __ set((1 << ConstantPoolCacheEntry::volatileField), Lscratch);
++    __ set((1 << ConstantPoolCacheEntry::is_volatile_shift), Lscratch);
+     __ and3(Rflags, Lscratch, Lscratch);
+     if (__ membar_has_effect(read_bits)) {
+       __ cmp_and_br_short(Lscratch, 0, Assembler::equal, Assembler::pt, notVolatile);
+@@ -2916,7 +2927,7 @@
+ 
+     // Test volatile
+     Label notVolatile;
+-    __ set((1 << ConstantPoolCacheEntry::volatileField), Lscratch);
++    __ set((1 << ConstantPoolCacheEntry::is_volatile_shift), Lscratch);
+     __ btst(Rflags, Lscratch);
+     __ br(Assembler::zero, false, Assembler::pt, notVolatile);
+     __ delayed()->nop();
+@@ -2936,27 +2947,82 @@
+   ShouldNotReachHere();
+ }
+ 
++
++void TemplateTable::prepare_invoke(int byte_no,
++                                   Register method,  // linked method (or i-klass)
++                                   Register ra,      // return address
++                                   Register index,   // itable index, MethodType, etc.
++                                   Register recv,    // if caller wants to see it
++                                   Register flags    // if caller wants to test it
++                                   ) {
++  // determine flags
++  const Bytecodes::Code code = bytecode();
++  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
++  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
++  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
++  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
++  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
++  const bool load_receiver       = (recv != noreg);
++  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
++  assert(recv  == noreg || recv  == O0, "");
++  assert(flags == noreg || flags == O1, "");
++
++  // setup registers & access constant pool cache
++  if (recv  == noreg)  recv  = O0;
++  if (flags == noreg)  flags = O1;
++  const Register temp = O2;
++  assert_different_registers(method, ra, index, recv, flags, temp);
++
++  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
++
++  __ mov(SP, O5_savedSP);  // record SP that we wanted the callee to restore
++
++  // maybe push appendix to arguments
++  if (is_invokedynamic || is_invokehandle) {
++    Label L_no_push;
++    __ verify_oop(index);
++    __ set((1 << ConstantPoolCacheEntry::has_appendix_shift), temp);
++    __ btst(flags, temp);
++    __ br(Assembler::zero, false, Assembler::pt, L_no_push);
++    __ delayed()->nop();
++    // Push the appendix as a trailing parameter.
++    // This must be done before we get the receiver,
++    // since the parameter_size includes it.
++    __ push_ptr(index);  // push appendix (MethodType, CallSite, etc.)
++    __ bind(L_no_push);
++  }
++
++  // load receiver if needed (after appendix is pushed so parameter size is correct)
++  if (load_receiver) {
++    __ and3(flags, ConstantPoolCacheEntry::parameter_size_mask, temp);  // get parameter size
++    __ load_receiver(temp, recv);  //  __ argument_address uses Gargs but we need Lesp
++    __ verify_oop(recv);
++  }
++
++  // compute return type
++  __ srl(flags, ConstantPoolCacheEntry::tos_state_shift, ra);
++  // Make sure we don't need to mask flags after the above shift
++  ConstantPoolCacheEntry::verify_tos_state_shift();
++  // load return address
++  {
++    const address table_addr = (is_invokeinterface || is_invokedynamic) ?
++        (address)Interpreter::return_5_addrs_by_index_table() :
++        (address)Interpreter::return_3_addrs_by_index_table();
++    AddressLiteral table(table_addr);
++    __ set(table, temp);
++    __ sll(ra, LogBytesPerWord, ra);
++    __ ld_ptr(Address(temp, ra), ra);
++  }
++}
++
++
+ void TemplateTable::generate_vtable_call(Register Rrecv, Register Rindex, Register Rret) {
+   Register Rtemp = G4_scratch;
+   Register Rcall = Rindex;
+   assert_different_registers(Rcall, G5_method, Gargs, Rret);
+ 
+   // get target methodOop & entry point
+-  const int base = instanceKlass::vtable_start_offset() * wordSize;
+-  if (vtableEntry::size() % 3 == 0) {
+-    // scale the vtable index by 12:
+-    int one_third = vtableEntry::size() / 3;
+-    __ sll(Rindex, exact_log2(one_third * 1 * wordSize), Rtemp);
+-    __ sll(Rindex, exact_log2(one_third * 2 * wordSize), Rindex);
+-    __ add(Rindex, Rtemp, Rindex);
+-  } else {
+-    // scale the vtable index by 8:
+-    __ sll(Rindex, exact_log2(vtableEntry::size() * wordSize), Rindex);
+-  }
+-
+-  __ add(Rrecv, Rindex, Rrecv);
+-  __ ld_ptr(Rrecv, base + vtableEntry::method_offset_in_bytes(), G5_method);
+-
++  __ lookup_virtual_method(Rrecv, Rindex, G5_method);
+   __ call_from_interpreter(Rcall, Gargs, Rret);
+ }
+ 
+@@ -2965,16 +3031,16 @@
+   assert(byte_no == f2_byte, "use this argument");
+ 
+   Register Rscratch = G3_scratch;
+-  Register Rtemp = G4_scratch;
+-  Register Rret = Lscratch;
+-  Register Rrecv = G5_method;
++  Register Rtemp    = G4_scratch;
++  Register Rret     = Lscratch;
++  Register O0_recv  = O0;
+   Label notFinal;
+ 
+   load_invoke_cp_cache_entry(byte_no, G5_method, noreg, Rret, true, false, false);
+   __ mov(SP, O5_savedSP); // record SP that we wanted the callee to restore
+ 
+   // Check for vfinal
+-  __ set((1 << ConstantPoolCacheEntry::vfinalMethod), G4_scratch);
++  __ set((1 << ConstantPoolCacheEntry::is_vfinal_shift), G4_scratch);
+   __ btst(Rret, G4_scratch);
+   __ br(Assembler::zero, false, Assembler::pt, notFinal);
+   __ delayed()->and3(Rret, 0xFF, G4_scratch);      // gets number of parameters
+@@ -2986,27 +3052,27 @@
+   __ bind(notFinal);
+ 
+   __ mov(G5_method, Rscratch);  // better scratch register
+-  __ load_receiver(G4_scratch, O0);  // gets receiverOop
+-  // receiver is in O0
+-  __ verify_oop(O0);
++  __ load_receiver(G4_scratch, O0_recv);  // gets receiverOop
++  // receiver is in O0_recv
++  __ verify_oop(O0_recv);
+ 
+   // get return address
+   AddressLiteral table(Interpreter::return_3_addrs_by_index_table());
+   __ set(table, Rtemp);
+-  __ srl(Rret, ConstantPoolCacheEntry::tosBits, Rret);          // get return type
+-  // Make sure we don't need to mask Rret for tosBits after the above shift
+-  ConstantPoolCacheEntry::verify_tosBits();
++  __ srl(Rret, ConstantPoolCacheEntry::tos_state_shift, Rret);          // get return type
++  // Make sure we don't need to mask Rret after the above shift
++  ConstantPoolCacheEntry::verify_tos_state_shift();
+   __ sll(Rret,  LogBytesPerWord, Rret);
+   __ ld_ptr(Rtemp, Rret, Rret);         // get return address
+ 
+   // get receiver klass
+-  __ null_check(O0, oopDesc::klass_offset_in_bytes());
+-  __ load_klass(O0, Rrecv);
+-  __ verify_oop(Rrecv);
+-
+-  __ profile_virtual_call(Rrecv, O4);
+-
+-  generate_vtable_call(Rrecv, Rscratch, Rret);
++  __ null_check(O0_recv, oopDesc::klass_offset_in_bytes());
++  __ load_klass(O0_recv, O0_recv);
++  __ verify_oop(O0_recv);
++
++  __ profile_virtual_call(O0_recv, O4);
++
++  generate_vtable_call(O0_recv, Rscratch, Rret);
+ }
+ 
+ void TemplateTable::fast_invokevfinal(int byte_no) {
+@@ -3036,9 +3102,9 @@
+   // get return address
+   AddressLiteral table(Interpreter::return_3_addrs_by_index_table());
+   __ set(table, Rtemp);
+-  __ srl(Rret, ConstantPoolCacheEntry::tosBits, Rret);          // get return type
+-  // Make sure we don't need to mask Rret for tosBits after the above shift
+-  ConstantPoolCacheEntry::verify_tosBits();
++  __ srl(Rret, ConstantPoolCacheEntry::tos_state_shift, Rret);          // get return type
++  // Make sure we don't need to mask Rret after the above shift
++  ConstantPoolCacheEntry::verify_tos_state_shift();
+   __ sll(Rret,  LogBytesPerWord, Rret);
+   __ ld_ptr(Rtemp, Rret, Rret);         // get return address
+ 
+@@ -3047,65 +3113,37 @@
+   __ call_from_interpreter(Rscratch, Gargs, Rret);
+ }
+ 
++
+ void TemplateTable::invokespecial(int byte_no) {
+   transition(vtos, vtos);
+   assert(byte_no == f1_byte, "use this argument");
+ 
+-  Register Rscratch = G3_scratch;
+-  Register Rtemp = G4_scratch;
+-  Register Rret = Lscratch;
+-
+-  load_invoke_cp_cache_entry(byte_no, G5_method, noreg, Rret, /*virtual*/ false, false, false);
+-  __ mov(SP, O5_savedSP); // record SP that we wanted the callee to restore
+-
++  const Register Rret     = Lscratch;
++  const Register O0_recv  = O0;
++  const Register Rscratch = G3_scratch;
++
++  prepare_invoke(byte_no, G5_method, Rret, noreg, O0_recv);  // get receiver also for null check
++  __ null_check(O0_recv);
++
++  // do the call
+   __ verify_oop(G5_method);
+-
+-  __ lduh(G5_method, in_bytes(methodOopDesc::size_of_parameters_offset()), G4_scratch);
+-  __ load_receiver(G4_scratch, O0);
+-
+-  // receiver NULL check
+-  __ null_check(O0);
+-
+   __ profile_call(O4);
+-
+-  // get return address
+-  AddressLiteral table(Interpreter::return_3_addrs_by_index_table());
+-  __ set(table, Rtemp);
+-  __ srl(Rret, ConstantPoolCacheEntry::tosBits, Rret);          // get return type
+-  // Make sure we don't need to mask Rret for tosBits after the above shift
+-  ConstantPoolCacheEntry::verify_tosBits();
+-  __ sll(Rret,  LogBytesPerWord, Rret);
+-  __ ld_ptr(Rtemp, Rret, Rret);         // get return address
+-
+-  // do the call
+   __ call_from_interpreter(Rscratch, Gargs, Rret);
+ }
+ 
++
+ void TemplateTable::invokestatic(int byte_no) {
+   transition(vtos, vtos);
+   assert(byte_no == f1_byte, "use this argument");
+ 
+-  Register Rscratch = G3_scratch;
+-  Register Rtemp = G4_scratch;
+-  Register Rret = Lscratch;
+-
+-  load_invoke_cp_cache_entry(byte_no, G5_method, noreg, Rret, /*virtual*/ false, false, false);
+-  __ mov(SP, O5_savedSP); // record SP that we wanted the callee to restore
+-
++  const Register Rret     = Lscratch;
++  const Register Rscratch = G3_scratch;
++
++  prepare_invoke(byte_no, G5_method, Rret);  // get f1 methodOop
++
++  // do the call
+   __ verify_oop(G5_method);
+-
+   __ profile_call(O4);
+-
+-  // get return address
+-  AddressLiteral table(Interpreter::return_3_addrs_by_index_table());
+-  __ set(table, Rtemp);
+-  __ srl(Rret, ConstantPoolCacheEntry::tosBits, Rret);          // get return type
+-  // Make sure we don't need to mask Rret for tosBits after the above shift
+-  ConstantPoolCacheEntry::verify_tosBits();
+-  __ sll(Rret,  LogBytesPerWord, Rret);
+-  __ ld_ptr(Rtemp, Rret, Rret);         // get return address
+-
+-  // do the call
+   __ call_from_interpreter(Rscratch, Gargs, Rret);
+ }
+ 
+@@ -3122,7 +3160,7 @@
+   Label notFinal;
+ 
+   // Check for vfinal
+-  __ set((1 << ConstantPoolCacheEntry::vfinalMethod), Rscratch);
++  __ set((1 << ConstantPoolCacheEntry::is_vfinal_shift), Rscratch);
+   __ btst(Rflags, Rscratch);
+   __ br(Assembler::zero, false, Assembler::pt, notFinal);
+   __ delayed()->nop();
+@@ -3144,53 +3182,37 @@
+   transition(vtos, vtos);
+   assert(byte_no == f1_byte, "use this argument");
+ 
+-  Register Rscratch = G4_scratch;
+-  Register Rret = G3_scratch;
+-  Register Rindex = Lscratch;
+-  Register Rinterface = G1_scratch;
+-  Register RklassOop = G5_method;
+-  Register Rflags = O1;
++  const Register Rinterface  = G1_scratch;
++  const Register Rret        = G3_scratch;
++  const Register Rindex      = Lscratch;
++  const Register O0_recv     = O0;
++  const Register O1_flags    = O1;
++  const Register O2_klassOop = O2;
++  const Register Rscratch    = G4_scratch;
+   assert_different_registers(Rscratch, G5_method);
+ 
+-  load_invoke_cp_cache_entry(byte_no, Rinterface, Rindex, Rflags, /*virtual*/ false, false, false);
+-  __ mov(SP, O5_savedSP); // record SP that we wanted the callee to restore
+-
+-  // get receiver
+-  __ and3(Rflags, 0xFF, Rscratch);       // gets number of parameters
+-  __ load_receiver(Rscratch, O0);
+-  __ verify_oop(O0);
+-
+-  __ mov(Rflags, Rret);
+-
+-  // get return address
+-  AddressLiteral table(Interpreter::return_5_addrs_by_index_table());
+-  __ set(table, Rscratch);
+-  __ srl(Rret, ConstantPoolCacheEntry::tosBits, Rret);          // get return type
+-  // Make sure we don't need to mask Rret for tosBits after the above shift
+-  ConstantPoolCacheEntry::verify_tosBits();
+-  __ sll(Rret,  LogBytesPerWord, Rret);
+-  __ ld_ptr(Rscratch, Rret, Rret);      // get return address
++  prepare_invoke(byte_no, Rinterface, Rret, Rindex, O0_recv, O1_flags);
+ 
+   // get receiver klass
+-  __ null_check(O0, oopDesc::klass_offset_in_bytes());
+-  __ load_klass(O0, RklassOop);
+-  __ verify_oop(RklassOop);
++  __ null_check(O0_recv, oopDesc::klass_offset_in_bytes());
++  __ load_klass(O0_recv, O2_klassOop);
++  __ verify_oop(O2_klassOop);
+ 
+   // Special case of invokeinterface called for virtual method of
+   // java.lang.Object.  See cpCacheOop.cpp for details.
+   // This code isn't produced by javac, but could be produced by
+   // another compliant java compiler.
+   Label notMethod;
+-  __ set((1 << ConstantPoolCacheEntry::methodInterface), Rscratch);
+-  __ btst(Rflags, Rscratch);
++  __ set((1 << ConstantPoolCacheEntry::is_forced_virtual_shift), Rscratch);
++  __ btst(O1_flags, Rscratch);
+   __ br(Assembler::zero, false, Assembler::pt, notMethod);
+   __ delayed()->nop();
+ 
+-  invokeinterface_object_method(RklassOop, Rinterface, Rret, Rflags);
++  invokeinterface_object_method(O2_klassOop, Rinterface, Rret, O1_flags);
+ 
+   __ bind(notMethod);
+ 
+-  __ profile_virtual_call(RklassOop, O4);
++  __ profile_virtual_call(O2_klassOop, O4);
+ 
+   //
+   // find entry point to call
+@@ -3199,9 +3221,9 @@
+   // compute start of first itableOffsetEntry (which is at end of vtable)
+   const int base = instanceKlass::vtable_start_offset() * wordSize;
+   Label search;
+-  Register Rtemp = Rflags;
+-
+-  __ ld(RklassOop, instanceKlass::vtable_length_offset() * wordSize, Rtemp);
++  Register Rtemp = O1_flags;
++
++  __ ld(O2_klassOop, instanceKlass::vtable_length_offset() * wordSize, Rtemp);
+   if (align_object_offset(1) > 1) {
+     __ round_to(Rtemp, align_object_offset(1));
+   }
+@@ -3212,7 +3234,7 @@
+     __ set(base, Rscratch);
+     __ add(Rscratch, Rtemp, Rtemp);
+   }
+-  __ add(RklassOop, Rtemp, Rscratch);
++  __ add(O2_klassOop, Rtemp, Rscratch);
+ 
+   __ bind(search);
+ 
+@@ -3244,7 +3266,7 @@
+   assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust instruction below");
+   __ sll(Rindex, exact_log2(itableMethodEntry::size() * wordSize), Rindex);       // Rindex *= 8;
+   __ add(Rscratch, Rindex, Rscratch);
+-  __ ld_ptr(RklassOop, Rscratch, G5_method);
++  __ ld_ptr(O2_klassOop, Rscratch, G5_method);
+ 
+   // Check for abstract method error.
+   {
+@@ -3260,13 +3282,42 @@
+ 
+   __ verify_oop(G5_method);
+   __ call_from_interpreter(Rcall, Gargs, Rret);
+-
++}
++
++
++void TemplateTable::invokehandle(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f12_oop, "use this argument");
++
++  if (!EnableInvokeDynamic) {
++    // rewriter does not generate this bytecode
++    __ should_not_reach_here();
++    return;
++  }
++
++  const Register Rret       = Lscratch;
++  const Register G4_mtype   = G4_scratch;  // f1
++  const Register O0_recv    = O0;
++  const Register Rscratch   = G3_scratch;
++
++  prepare_invoke(byte_no, G5_method, Rret, G4_mtype, O0_recv);
++  __ null_check(O0_recv);
++
++  // G4: MethodType object (from f1)
++  // G5: MH.linkToCallSite method (from f2)
++
++  // Note:  G4_mtype is already pushed (if necessary) by prepare_invoke
++
++  // do the call
++  __ verify_oop(G5_method);
++  __ profile_final_call(O4);  // FIXME: profile the LambdaForm also
++  __ call_from_interpreter(Rscratch, Gargs, Rret);
+ }
+ 
+ 
+ void TemplateTable::invokedynamic(int byte_no) {
+   transition(vtos, vtos);
+-  assert(byte_no == f1_oop, "use this argument");
++  assert(byte_no == f12_oop, "use this argument");
+ 
+   if (!EnableInvokeDynamic) {
+     // We should not encounter this bytecode if !EnableInvokeDynamic.
+@@ -3279,42 +3330,24 @@
+     return;
+   }
+ 
+-  // G5: CallSite object (f1)
+-  // XX: unused (f2)
+-  // XX: flags (unused)
+-
+-  Register G5_callsite = G5_method;
+-  Register Rscratch    = G3_scratch;
+-  Register Rtemp       = G1_scratch;
+-  Register Rret        = Lscratch;
+-
+-  load_invoke_cp_cache_entry(byte_no, G5_callsite, noreg, Rret,
+-                             /*virtual*/ false, /*vfinal*/ false, /*indy*/ true);
+-  __ mov(SP, O5_savedSP);  // record SP that we wanted the callee to restore
+-
++  const Register Rret        = Lscratch;
++  const Register G4_callsite = G4_scratch;
++  const Register Rscratch    = G3_scratch;
++
++  prepare_invoke(byte_no, G5_method, Rret, G4_callsite);
++
++  // G4: CallSite object (from f1)
++  // G5: MH.linkToCallSite method (from f2)
++
++  // Note:  G4_callsite is already pushed by prepare_invoke
++
++  // %%% should make a type profile for any invokedynamic that takes a ref argument
+   // profile this call
+   __ profile_call(O4);
+ 
+-  // get return address
+-  AddressLiteral table(Interpreter::return_5_addrs_by_index_table());
+-  __ set(table, Rtemp);
+-  __ srl(Rret, ConstantPoolCacheEntry::tosBits, Rret);  // get return type
+-  // Make sure we don't need to mask Rret for tosBits after the above shift
+-  ConstantPoolCacheEntry::verify_tosBits();
+-  __ sll(Rret, LogBytesPerWord, Rret);
+-  __ ld_ptr(Rtemp, Rret, Rret);  // get return address
+-
+-  __ verify_oop(G5_callsite);
+-  __ load_heap_oop(G5_callsite, __ delayed_value(java_lang_invoke_CallSite::target_offset_in_bytes, Rscratch), G3_method_handle);
+-  __ null_check(G3_method_handle);
+-  __ verify_oop(G3_method_handle);
+-
+-  // Adjust Rret first so Llast_SP can be same as Rret
+-  __ add(Rret, -frame::pc_return_offset, O7);
+-  __ add(Lesp, BytesPerWord, Gargs);  // setup parameter pointer
+-  __ jump_to_method_handle_entry(G3_method_handle, Rtemp, /* emit_delayed_nop */ false);
+-  // Record SP so we can remove any stack space allocated by adapter transition
+-  __ delayed()->mov(SP, Llast_SP);
++  // do the call
++  __ verify_oop(G5_method);
++  __ call_from_interpreter(Rscratch, Gargs, Rret);
+ }
+ 
+ 
+diff --git a/src/cpu/sparc/vm/templateTable_sparc.hpp b/src/cpu/sparc/vm/templateTable_sparc.hpp
+--- a/src/cpu/sparc/vm/templateTable_sparc.hpp
++++ b/src/cpu/sparc/vm/templateTable_sparc.hpp
+@@ -25,6 +25,13 @@
+ #ifndef CPU_SPARC_VM_TEMPLATETABLE_SPARC_HPP
+ #define CPU_SPARC_VM_TEMPLATETABLE_SPARC_HPP
+ 
++  static void prepare_invoke(int byte_no,
++                             Register method,         // linked method (or i-klass)
++                             Register ra,             // return address
++                             Register index = noreg,  // itable index, MethodType, etc.
++                             Register recv  = noreg,  // if caller wants to see it
++                             Register flags = noreg   // if caller wants to test it
++                             );
+   // helper function
+   static void invokevfinal_helper(Register Rcache, Register Rret);
+   static void invokeinterface_object_method(Register RklassOop, Register Rcall,
+diff --git a/src/cpu/sparc/vm/vtableStubs_sparc.cpp b/src/cpu/sparc/vm/vtableStubs_sparc.cpp
+--- a/src/cpu/sparc/vm/vtableStubs_sparc.cpp
++++ b/src/cpu/sparc/vm/vtableStubs_sparc.cpp
+@@ -70,7 +70,6 @@
+   __ load_klass(O0, G3_scratch);
+ 
+   // set methodOop (in case of interpreted method), and destination address
+-  int entry_offset = instanceKlass::vtable_start_offset() + vtable_index*vtableEntry::size();
+ #ifndef PRODUCT
+   if (DebugVtables) {
+     Label L;
+@@ -82,13 +81,8 @@
+     __ bind(L);
+   }
+ #endif
+-  int v_off = entry_offset*wordSize + vtableEntry::method_offset_in_bytes();
+-  if (Assembler::is_simm13(v_off)) {
+-    __ ld_ptr(G3, v_off, G5_method);
+-  } else {
+-    __ set(v_off,G5);
+-    __ ld_ptr(G3, G5, G5_method);
+-  }
++
++  __ lookup_virtual_method(G3_scratch, vtable_index, G5_method);
+ 
+ #ifndef PRODUCT
+   if (DebugVtables) {
+diff --git a/src/cpu/x86/vm/assembler_x86.cpp b/src/cpu/x86/vm/assembler_x86.cpp
+--- a/src/cpu/x86/vm/assembler_x86.cpp
++++ b/src/cpu/x86/vm/assembler_x86.cpp
+@@ -41,6 +41,15 @@
+ #include "gc_implementation/g1/heapRegion.hpp"
+ #endif
+ 
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#define STOP(error) stop(error)
++#else
++#define BLOCK_COMMENT(str) block_comment(str)
++#define STOP(error) block_comment(error); stop(error)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+ // Implementation of AddressLiteral
+ 
+ AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
+@@ -5508,23 +5517,7 @@
+     // To see where a verify_oop failed, get $ebx+40/X for this frame.
+     // This is the value of eip which points to where verify_oop will return.
+     if (os::message_box(msg, "Execution stopped, print registers?")) {
+-      ttyLocker ttyl;
+-      tty->print_cr("eip = 0x%08x", eip);
+-#ifndef PRODUCT
+-      if ((WizardMode || Verbose) && PrintMiscellaneous) {
+-        tty->cr();
+-        findpc(eip);
+-        tty->cr();
+-      }
+-#endif
+-      tty->print_cr("rax = 0x%08x", rax);
+-      tty->print_cr("rbx = 0x%08x", rbx);
+-      tty->print_cr("rcx = 0x%08x", rcx);
+-      tty->print_cr("rdx = 0x%08x", rdx);
+-      tty->print_cr("rdi = 0x%08x", rdi);
+-      tty->print_cr("rsi = 0x%08x", rsi);
+-      tty->print_cr("rbp = 0x%08x", rbp);
+-      tty->print_cr("rsp = 0x%08x", rsp);
++      print_state32(rdi, rsi, rbp, rsp, rbx, rdx, rcx, rax, eip);
+       BREAKPOINT;
+       assert(false, "start up GDB");
+     }
+@@ -5536,12 +5529,53 @@
+   ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
+ }
+ 
++void MacroAssembler::print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip) {
++  ttyLocker ttyl;
++  FlagSetting fs(Debugging, true);
++  tty->print_cr("eip = 0x%08x", eip);
++#ifndef PRODUCT
++  if ((WizardMode || Verbose) && PrintMiscellaneous) {
++    tty->cr();
++    findpc(eip);
++    tty->cr();
++  }
++#endif
++#define PRINT_REG(rax) \
++  { tty->print("%s = ", #rax); os::print_location(tty, rax); }
++  PRINT_REG(rax);
++  PRINT_REG(rbx);
++  PRINT_REG(rcx);
++  PRINT_REG(rdx);
++  PRINT_REG(rdi);
++  PRINT_REG(rsi);
++  PRINT_REG(rbp);
++  PRINT_REG(rsp);
++#undef PRINT_REG
++  // Print some words near top of staack.
++  int* dump_sp = (int*) rsp;
++  for (int col1 = 0; col1 < 8; col1++) {
++    tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp);
++    os::print_location(tty, *dump_sp++);
++  }
++  for (int row = 0; row < 16; row++) {
++    tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp);
++    for (int col = 0; col < 8; col++) {
++      tty->print(" 0x%08x", *dump_sp++);
++    }
++    tty->cr();
++  }
++  // Print some instructions around pc:
++  Disassembler::decode((address)eip-64, (address)eip);
++  tty->print_cr("--------");
++  Disassembler::decode((address)eip, (address)eip+32);
++}
++
+ void MacroAssembler::stop(const char* msg) {
+   ExternalAddress message((address)msg);
+   // push address of message
+   pushptr(message.addr());
+   { Label L; call(L, relocInfo::none); bind(L); }     // push eip
+-  pusha();                                           // push registers
++  pusha();                                            // push registers
+   call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
+   hlt();
+ }
+@@ -5558,6 +5592,18 @@
+   pop_CPU_state();
+ }
+ 
++void MacroAssembler::print_state() {
++  { Label L; call(L, relocInfo::none); bind(L); }     // push eip
++  pusha();                                            // push registers
++
++  push_CPU_state();
++  call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::print_state32)));
++  pop_CPU_state();
++
++  popa();
++  addl(rsp, wordSize);
++}
++
+ #else // _LP64
+ 
+ // 64 bit versions
+@@ -6023,14 +6069,33 @@
+ }
+ 
+ void MacroAssembler::warn(const char* msg) {
+-  push(rsp);
++  push(rbp);
++  movq(rbp, rsp);
+   andq(rsp, -16);     // align stack as required by push_CPU_state and call
+-
+   push_CPU_state();   // keeps alignment at 16 bytes
+   lea(c_rarg0, ExternalAddress((address) msg));
+   call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0);
+   pop_CPU_state();
+-  pop(rsp);
++  mov(rsp, rbp);
++  pop(rbp);
++}
++
++void MacroAssembler::print_state() {
++  address rip = pc();
++  pusha();            // get regs on stack
++  push(rbp);
++  movq(rbp, rsp);
++  andq(rsp, -16);     // align stack as required by push_CPU_state and call
++  push_CPU_state();   // keeps alignment at 16 bytes
++
++  lea(c_rarg0, InternalAddress(rip));
++  lea(c_rarg1, Address(rbp, wordSize)); // pass pointer to regs array
++  call_VM_leaf(CAST_FROM_FN_PTR(address, MacroAssembler::print_state64), c_rarg0, c_rarg1);
++
++  pop_CPU_state();
++  mov(rsp, rbp);
++  pop(rbp);
++  popa();
+ }
+ 
+ #ifndef PRODUCT
+@@ -6039,7 +6104,7 @@
+ 
+ void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) {
+   // In order to get locks to work, we need to fake a in_VM state
+-  if (ShowMessageBoxOnError ) {
++  if (ShowMessageBoxOnError) {
+     JavaThread* thread = JavaThread::current();
+     JavaThreadState saved_state = thread->thread_state();
+     thread->set_thread_state(_thread_in_vm);
+@@ -6053,30 +6118,9 @@
+     // XXX correct this offset for amd64
+     // This is the value of eip which points to where verify_oop will return.
+     if (os::message_box(msg, "Execution stopped, print registers?")) {
+-      ttyLocker ttyl;
+-      tty->print_cr("rip = 0x%016lx", pc);
+-#ifndef PRODUCT
+-      tty->cr();
+-      findpc(pc);
+-      tty->cr();
+-#endif
+-      tty->print_cr("rax = 0x%016lx", regs[15]);
+-      tty->print_cr("rbx = 0x%016lx", regs[12]);
+-      tty->print_cr("rcx = 0x%016lx", regs[14]);
+-      tty->print_cr("rdx = 0x%016lx", regs[13]);
+-      tty->print_cr("rdi = 0x%016lx", regs[8]);
+-      tty->print_cr("rsi = 0x%016lx", regs[9]);
+-      tty->print_cr("rbp = 0x%016lx", regs[10]);
+-      tty->print_cr("rsp = 0x%016lx", regs[11]);
+-      tty->print_cr("r8  = 0x%016lx", regs[7]);
+-      tty->print_cr("r9  = 0x%016lx", regs[6]);
+-      tty->print_cr("r10 = 0x%016lx", regs[5]);
+-      tty->print_cr("r11 = 0x%016lx", regs[4]);
+-      tty->print_cr("r12 = 0x%016lx", regs[3]);
+-      tty->print_cr("r13 = 0x%016lx", regs[2]);
+-      tty->print_cr("r14 = 0x%016lx", regs[1]);
+-      tty->print_cr("r15 = 0x%016lx", regs[0]);
++      print_state64(pc, regs);
+       BREAKPOINT;
++      assert(false, "start up GDB");
+     }
+     ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
+   } else {
+@@ -6087,6 +6131,54 @@
+   }
+ }
+ 
++void MacroAssembler::print_state64(int64_t pc, int64_t regs[]) {
++  ttyLocker ttyl;
++  FlagSetting fs(Debugging, true);
++  tty->print_cr("rip = 0x%016lx", pc);
++#ifndef PRODUCT
++  tty->cr();
++  findpc(pc);
++  tty->cr();
++#endif
++#define PRINT_REG(rax, value) \
++  { tty->print("%s = ", #rax); os::print_location(tty, value); }
++  PRINT_REG(rax, regs[15]);
++  PRINT_REG(rbx, regs[12]);
++  PRINT_REG(rcx, regs[14]);
++  PRINT_REG(rdx, regs[13]);
++  PRINT_REG(rdi, regs[8]);
++  PRINT_REG(rsi, regs[9]);
++  PRINT_REG(rbp, regs[10]);
++  PRINT_REG(rsp, regs[11]);
++  PRINT_REG(r8 , regs[7]);
++  PRINT_REG(r9 , regs[6]);
++  PRINT_REG(r10, regs[5]);
++  PRINT_REG(r11, regs[4]);
++  PRINT_REG(r12, regs[3]);
++  PRINT_REG(r13, regs[2]);
++  PRINT_REG(r14, regs[1]);
++  PRINT_REG(r15, regs[0]);
++#undef PRINT_REG
++  // Print some words near top of staack.
++  int64_t* rsp = (int64_t*) regs[11];
++  int64_t* dump_sp = rsp;
++  for (int col1 = 0; col1 < 8; col1++) {
++    tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp);
++    os::print_location(tty, *dump_sp++);
++  }
++  for (int row = 0; row < 25; row++) {
++    tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp);
++    for (int col = 0; col < 4; col++) {
++      tty->print(" 0x%016lx", *dump_sp++);
++    }
++    tty->cr();
++  }
++  // Print some instructions around pc:
++  Disassembler::decode((address)pc-64, (address)pc);
++  tty->print_cr("--------");
++  Disassembler::decode((address)pc, (address)pc+32);
++}
++
+ #endif // _LP64
+ 
+ // Now versions that are common to 32/64 bit
+@@ -6456,7 +6548,7 @@
+       get_thread(rax);
+       cmpptr(java_thread, rax);
+       jcc(Assembler::equal, L);
+-      stop("MacroAssembler::call_VM_base: rdi not callee saved?");
++      STOP("MacroAssembler::call_VM_base: rdi not callee saved?");
+       bind(L);
+     }
+     pop(rax);
+@@ -7196,7 +7288,7 @@
+       jcc(Assembler::notZero, integer);
+       cmpl(tmp3, 0x80000000);
+       jcc(Assembler::notZero, integer);
+-      stop("integer indefinite value shouldn't be seen here");
++      STOP("integer indefinite value shouldn't be seen here");
+       bind(integer);
+     }
+ #else
+@@ -7206,7 +7298,7 @@
+       shlq(tmp3, 1);
+       jcc(Assembler::carryClear, integer);
+       jcc(Assembler::notZero, integer);
+-      stop("integer indefinite value shouldn't be seen here");
++      STOP("integer indefinite value shouldn't be seen here");
+       bind(integer);
+     }
+ #endif
+@@ -8388,7 +8480,7 @@
+     shlptr(tsize, LogHeapWordSize);
+     cmpptr(t1, tsize);
+     jcc(Assembler::equal, ok);
+-    stop("assert(t1 != tlab size)");
++    STOP("assert(t1 != tlab size)");
+     should_not_reach_here();
+ 
+     bind(ok);
+@@ -8727,6 +8819,19 @@
+ }
+ 
+ 
++// virtual method calling
++void MacroAssembler::lookup_virtual_method(Register recv_klass,
++                                           RegisterOrConstant vtable_index,
++                                           Register method_result) {
++  const int base = instanceKlass::vtable_start_offset() * wordSize;
++  assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
++  Address vtable_entry_addr(recv_klass,
++                            vtable_index, Address::times_ptr,
++                            base + vtableEntry::method_offset_in_bytes());
++  movptr(method_result, vtable_entry_addr);
++}
++
++
+ void MacroAssembler::check_klass_subtype(Register sub_klass,
+                            Register super_klass,
+                            Register temp_reg,
+@@ -8976,6 +9081,7 @@
+   // Pass register number to verify_oop_subroutine
+   char* b = new char[strlen(s) + 50];
+   sprintf(b, "verify_oop: %s: %s", reg->name(), s);
++  BLOCK_COMMENT("verify_oop {");
+ #ifdef _LP64
+   push(rscratch1);                    // save r10, trashed by movptr()
+ #endif
+@@ -8990,6 +9096,7 @@
+   movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
+   call(rax);
+   // Caller pops the arguments (oop, message) and restores rax, r10
++  BLOCK_COMMENT("} verify_oop");
+ }
+ 
+ 
+@@ -9010,7 +9117,7 @@
+       jcc(Assembler::notZero, L);
+       char* buf = new char[40];
+       sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]);
+-      stop(buf);
++      STOP(buf);
+     } else {
+       jccb(Assembler::notZero, L);
+       hlt();
+@@ -9026,60 +9133,6 @@
+ }
+ 
+ 
+-// registers on entry:
+-//  - rax ('check' register): required MethodType
+-//  - rcx: method handle
+-//  - rdx, rsi, or ?: killable temp
+-void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg,
+-                                              Register temp_reg,
+-                                              Label& wrong_method_type) {
+-  Address type_addr(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg));
+-  // compare method type against that of the receiver
+-  if (UseCompressedOops) {
+-    load_heap_oop(temp_reg, type_addr);
+-    cmpptr(mtype_reg, temp_reg);
+-  } else {
+-    cmpptr(mtype_reg, type_addr);
+-  }
+-  jcc(Assembler::notEqual, wrong_method_type);
+-}
+-
+-
+-// A method handle has a "vmslots" field which gives the size of its
+-// argument list in JVM stack slots.  This field is either located directly
+-// in every method handle, or else is indirectly accessed through the
+-// method handle's MethodType.  This macro hides the distinction.
+-void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
+-                                                Register temp_reg) {
+-  assert_different_registers(vmslots_reg, mh_reg, temp_reg);
+-  // load mh.type.form.vmslots
+-  Register temp2_reg = vmslots_reg;
+-  load_heap_oop(temp2_reg, Address(mh_reg,    delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg)));
+-  load_heap_oop(temp2_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, temp_reg)));
+-  movl(vmslots_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, temp_reg)));
+-}
+-
+-
+-// registers on entry:
+-//  - rcx: method handle
+-//  - rdx: killable temp (interpreted only)
+-//  - rax: killable temp (compiled only)
+-void MacroAssembler::jump_to_method_handle_entry(Register mh_reg, Register temp_reg) {
+-  assert(mh_reg == rcx, "caller must put MH object in rcx");
+-  assert_different_registers(mh_reg, temp_reg);
+-
+-  // pick out the interpreted side of the handler
+-  // NOTE: vmentry is not an oop!
+-  movptr(temp_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes, temp_reg)));
+-
+-  // off we go...
+-  jmp(Address(temp_reg, MethodHandleEntry::from_interpreted_entry_offset_in_bytes()));
+-
+-  // for the various stubs which take control at this point,
+-  // see MethodHandles::generate_method_handle_stub
+-}
+-
+-
+ Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
+                                          int extra_slot_offset) {
+   // cf. TemplateTable::prepare_invoke(), if (load_receiver).
+@@ -9152,14 +9205,14 @@
+     movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
+     cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
+     jcc(Assembler::aboveEqual, next);
+-    stop("assert(top >= start)");
++    STOP("assert(top >= start)");
+     should_not_reach_here();
+ 
+     bind(next);
+     movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
+     cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
+     jcc(Assembler::aboveEqual, ok);
+-    stop("assert(top <= end)");
++    STOP("assert(top <= end)");
+     should_not_reach_here();
+ 
+     bind(ok);
+@@ -9592,6 +9645,25 @@
+     movptr(dst, src);
+ }
+ 
++void MacroAssembler::cmp_heap_oop(Register src1, Address src2, Register tmp) {
++  assert_different_registers(src1, tmp);
++#ifdef _LP64
++  if (UseCompressedOops) {
++    bool did_push = false;
++    if (tmp == noreg) {
++      tmp = rax;
++      push(tmp);
++      did_push = true;
++      assert(!src2.uses(rsp), "can't push");
++    }
++    load_heap_oop(tmp, src2);
++    cmpptr(src1, tmp);
++    if (did_push)  pop(tmp);
++  } else
++#endif
++    cmpptr(src1, src2);
++}
++
+ // Used for storing NULLs.
+ void MacroAssembler::store_heap_oop_null(Address dst) {
+ #ifdef _LP64
+@@ -9622,7 +9694,7 @@
+     push(rscratch1); // cmpptr trashes rscratch1
+     cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
+     jcc(Assembler::equal, ok);
+-    stop(msg);
++    STOP(msg);
+     bind(ok);
+     pop(rscratch1);
+   }
+@@ -9655,7 +9727,7 @@
+     Label ok;
+     testq(r, r);
+     jcc(Assembler::notEqual, ok);
+-    stop("null oop passed to encode_heap_oop_not_null");
++    STOP("null oop passed to encode_heap_oop_not_null");
+     bind(ok);
+   }
+ #endif
+@@ -9676,7 +9748,7 @@
+     Label ok;
+     testq(src, src);
+     jcc(Assembler::notEqual, ok);
+-    stop("null oop passed to encode_heap_oop_not_null2");
++    STOP("null oop passed to encode_heap_oop_not_null2");
+     bind(ok);
+   }
+ #endif
+@@ -9867,7 +9939,7 @@
+     cmpptr(rax, StackAlignmentInBytes-wordSize);
+     pop(rax);
+     jcc(Assembler::equal, L);
+-    stop("Stack is not properly aligned!");
++    STOP("Stack is not properly aligned!");
+     bind(L);
+   }
+ #endif
+@@ -10541,13 +10613,6 @@
+   bind(DONE);
+ }
+ 
+-#ifdef PRODUCT
+-#define BLOCK_COMMENT(str) /* nothing */
+-#else
+-#define BLOCK_COMMENT(str) block_comment(str)
+-#endif
+-
+-#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+ void MacroAssembler::generate_fill(BasicType t, bool aligned,
+                                    Register to, Register value, Register count,
+                                    Register rtmp, XMMRegister xtmp) {
+diff --git a/src/cpu/x86/vm/assembler_x86.hpp b/src/cpu/x86/vm/assembler_x86.hpp
+--- a/src/cpu/x86/vm/assembler_x86.hpp
++++ b/src/cpu/x86/vm/assembler_x86.hpp
+@@ -1940,6 +1940,7 @@
+   void load_heap_oop(Register dst, Address src);
+   void load_heap_oop_not_null(Register dst, Address src);
+   void store_heap_oop(Address dst, Register src);
++  void cmp_heap_oop(Register src1, Address src2, Register tmp = noreg);
+ 
+   // Used for storing NULL. All other oop constants should be
+   // stored using routines that take a jobject.
+@@ -2117,6 +2118,11 @@
+                                Register scan_temp,
+                                Label& no_such_interface);
+ 
++  // virtual method calling
++  void lookup_virtual_method(Register recv_klass,
++                             RegisterOrConstant vtable_index,
++                             Register method_result);
++
+   // Test sub_klass against super_klass, with fast and slow paths.
+ 
+   // The fast path produces a tri-state answer: yes / no / maybe-slow.
+@@ -2152,15 +2158,8 @@
+                            Label& L_success);
+ 
+   // method handles (JSR 292)
+-  void check_method_handle_type(Register mtype_reg, Register mh_reg,
+-                                Register temp_reg,
+-                                Label& wrong_method_type);
+-  void load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
+-                                  Register temp_reg);
+-  void jump_to_method_handle_entry(Register mh_reg, Register temp_reg);
+   Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
+ 
+-
+   //----
+   void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0
+ 
+@@ -2179,8 +2178,13 @@
+   // prints msg and continues
+   void warn(const char* msg);
+ 
++  // dumps registers and other state
++  void print_state();
++
+   static void debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg);
+   static void debug64(char* msg, int64_t pc, int64_t regs[]);
++  static void print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip);
++  static void print_state64(int64_t pc, int64_t regs[]);
+ 
+   void os_breakpoint();
+ 
+diff --git a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
+--- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
++++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
+@@ -3508,6 +3508,7 @@
+ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
+   ciMethod* method = op->profiled_method();
+   int bci          = op->profiled_bci();
++  ciMethod* callee = op->profiled_callee();
+ 
+   // Update counter for all call types
+   ciMethodData* md = method->method_data_or_null();
+@@ -3519,9 +3520,11 @@
+   __ movoop(mdo, md->constant_encoding());
+   Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
+   Bytecodes::Code bc = method->java_code_at_bci(bci);
++  const bool callee_is_static = callee->is_loaded() && callee->is_static();
+   // Perform additional virtual call profiling for invokevirtual and
+   // invokeinterface bytecodes
+   if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
++      !callee_is_static &&  // required for optimized MH invokes
+       C1ProfileVirtualCalls) {
+     assert(op->recv()->is_single_cpu(), "recv must be allocated");
+     Register recv = op->recv()->as_register();
+diff --git a/src/cpu/x86/vm/cppInterpreter_x86.cpp b/src/cpu/x86/vm/cppInterpreter_x86.cpp
+--- a/src/cpu/x86/vm/cppInterpreter_x86.cpp
++++ b/src/cpu/x86/vm/cppInterpreter_x86.cpp
+@@ -871,9 +871,9 @@
+     // Need to differentiate between igetfield, agetfield, bgetfield etc.
+     // because they are different sizes.
+     // Use the type from the constant pool cache
+-    __ shrl(rdx, ConstantPoolCacheEntry::tosBits);
+-    // Make sure we don't need to mask rdx for tosBits after the above shift
+-    ConstantPoolCacheEntry::verify_tosBits();
++    __ shrl(rdx, ConstantPoolCacheEntry::tos_state_shift);
++    // Make sure we don't need to mask rdx after the above shift
++    ConstantPoolCacheEntry::verify_tos_state_shift();
+ #ifdef _LP64
+     Label notObj;
+     __ cmpl(rdx, atos);
+diff --git a/src/cpu/x86/vm/frame_x86.cpp b/src/cpu/x86/vm/frame_x86.cpp
+--- a/src/cpu/x86/vm/frame_x86.cpp
++++ b/src/cpu/x86/vm/frame_x86.cpp
+@@ -439,7 +439,6 @@
+ // frame::sender_for_compiled_frame
+ frame frame::sender_for_compiled_frame(RegisterMap* map) const {
+   assert(map != NULL, "map must be set");
+-  assert(!is_ricochet_frame(), "caller must handle this");
+ 
+   // frame owned by optimizing compiler
+   assert(_cb->frame_size() >= 0, "must have non-zero frame size");
+@@ -483,7 +482,6 @@
+   if (is_entry_frame())       return sender_for_entry_frame(map);
+   if (is_interpreted_frame()) return sender_for_interpreter_frame(map);
+   assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
+-  if (is_ricochet_frame())    return sender_for_ricochet_frame(map);
+ 
+   if (_cb != NULL) {
+     return sender_for_compiled_frame(map);
+@@ -658,9 +656,7 @@
+   values.describe(frame_no, fp() + frame::name##_offset, #name)
+ 
+ void frame::describe_pd(FrameValues& values, int frame_no) {
+-  if (is_ricochet_frame()) {
+-    MethodHandles::RicochetFrame::describe(this, values, frame_no);
+-  } else if (is_interpreted_frame()) {
++  if (is_interpreted_frame()) {
+     DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
+     DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
+     DESCRIBE_FP_OFFSET(interpreter_frame_method);
+@@ -682,12 +678,7 @@
+   if (_cb != NULL) {
+     // use the frame size if valid
+     int size = _cb->frame_size();
+-    if ((size > 0) &&
+-        (! is_ricochet_frame())) {
+-      // Work-around: ricochet explicitly excluded because frame size is not
+-      // constant for the ricochet blob but its frame_size could not, for
+-      // some reasons, be declared as <= 0. This potentially confusing
+-      // size declaration should be fixed as another CR.
++    if (size > 0) {
+       return unextended_sp() + size;
+     }
+   }
+diff --git a/src/cpu/x86/vm/interp_masm_x86_32.cpp b/src/cpu/x86/vm/interp_masm_x86_32.cpp
+--- a/src/cpu/x86/vm/interp_masm_x86_32.cpp
++++ b/src/cpu/x86/vm/interp_masm_x86_32.cpp
+@@ -253,8 +253,12 @@
+   get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
+   movptr(bytecode, Address(cache, index, Address::times_ptr, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset()));
+   const int shift_count = (1 + byte_no) * BitsPerByte;
++  assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) ||
++         (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift),
++         "correct shift count");
+   shrptr(bytecode, shift_count);
+-  andptr(bytecode, 0xFF);
++  assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask");
++  andptr(bytecode, ConstantPoolCacheEntry::bytecode_1_mask);
+ }
+ 
+ 
+diff --git a/src/cpu/x86/vm/interp_masm_x86_64.cpp b/src/cpu/x86/vm/interp_masm_x86_64.cpp
+--- a/src/cpu/x86/vm/interp_masm_x86_64.cpp
++++ b/src/cpu/x86/vm/interp_masm_x86_64.cpp
+@@ -256,8 +256,12 @@
+   // little-endian machines allow us that.
+   movl(bytecode, Address(cache, index, Address::times_ptr, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset()));
+   const int shift_count = (1 + byte_no) * BitsPerByte;
++  assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) ||
++         (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift),
++         "correct shift count");
+   shrl(bytecode, shift_count);
+-  andl(bytecode, 0xFF);
++  assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask");
++  andl(bytecode, ConstantPoolCacheEntry::bytecode_1_mask);
+ }
+ 
+ 
+diff --git a/src/cpu/x86/vm/interpreterGenerator_x86.hpp b/src/cpu/x86/vm/interpreterGenerator_x86.hpp
+--- a/src/cpu/x86/vm/interpreterGenerator_x86.hpp
++++ b/src/cpu/x86/vm/interpreterGenerator_x86.hpp
+@@ -35,7 +35,6 @@
+   address generate_normal_entry(bool synchronized);
+   address generate_native_entry(bool synchronized);
+   address generate_abstract_entry(void);
+-  address generate_method_handle_entry(void);
+   address generate_math_entry(AbstractInterpreter::MethodKind kind);
+   address generate_empty_entry(void);
+   address generate_accessor_entry(void);
+diff --git a/src/cpu/x86/vm/interpreter_x86_32.cpp b/src/cpu/x86/vm/interpreter_x86_32.cpp
+--- a/src/cpu/x86/vm/interpreter_x86_32.cpp
++++ b/src/cpu/x86/vm/interpreter_x86_32.cpp
+@@ -243,18 +243,6 @@
+ }
+ 
+ 
+-// Method handle invoker
+-// Dispatch a method of the form java.lang.invoke.MethodHandles::invoke(...)
+-address InterpreterGenerator::generate_method_handle_entry(void) {
+-  if (!EnableInvokeDynamic) {
+-    return generate_abstract_entry();
+-  }
+-
+-  address entry_point = MethodHandles::generate_method_handle_interpreter_entry(_masm);
+-
+-  return entry_point;
+-}
+-
+ void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) {
+ 
+   // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in
+diff --git a/src/cpu/x86/vm/interpreter_x86_64.cpp b/src/cpu/x86/vm/interpreter_x86_64.cpp
+--- a/src/cpu/x86/vm/interpreter_x86_64.cpp
++++ b/src/cpu/x86/vm/interpreter_x86_64.cpp
+@@ -325,19 +325,6 @@
+ }
+ 
+ 
+-// Method handle invoker
+-// Dispatch a method of the form java.lang.invoke.MethodHandles::invoke(...)
+-address InterpreterGenerator::generate_method_handle_entry(void) {
+-  if (!EnableInvokeDynamic) {
+-    return generate_abstract_entry();
+-  }
+-
+-  address entry_point = MethodHandles::generate_method_handle_interpreter_entry(_masm);
+-
+-  return entry_point;
+-}
+-
+-
+ // Empty method, generate a very fast return.
+ 
+ address InterpreterGenerator::generate_empty_entry(void) {
+diff --git a/src/cpu/x86/vm/methodHandles_x86.cpp b/src/cpu/x86/vm/methodHandles_x86.cpp
+--- a/src/cpu/x86/vm/methodHandles_x86.cpp
++++ b/src/cpu/x86/vm/methodHandles_x86.cpp
+@@ -32,8 +32,10 @@
+ 
+ #ifdef PRODUCT
+ #define BLOCK_COMMENT(str) /* nothing */
++#define STOP(error) stop(error)
+ #else
+ #define BLOCK_COMMENT(str) __ block_comment(str)
++#define STOP(error) block_comment(error); __ stop(error)
+ #endif
+ 
+ #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+@@ -43,483 +45,24 @@
+   return RegisterOrConstant(value);
+ }
+ 
+-address MethodHandleEntry::start_compiled_entry(MacroAssembler* _masm,
+-                                                address interpreted_entry) {
+-  // Just before the actual machine code entry point, allocate space
+-  // for a MethodHandleEntry::Data record, so that we can manage everything
+-  // from one base pointer.
+-  __ align(wordSize);
+-  address target = __ pc() + sizeof(Data);
+-  while (__ pc() < target) {
+-    __ nop();
+-    __ align(wordSize);
+-  }
+-
+-  MethodHandleEntry* me = (MethodHandleEntry*) __ pc();
+-  me->set_end_address(__ pc());         // set a temporary end_address
+-  me->set_from_interpreted_entry(interpreted_entry);
+-  me->set_type_checking_entry(NULL);
+-
+-  return (address) me;
+-}
+-
+-MethodHandleEntry* MethodHandleEntry::finish_compiled_entry(MacroAssembler* _masm,
+-                                                address start_addr) {
+-  MethodHandleEntry* me = (MethodHandleEntry*) start_addr;
+-  assert(me->end_address() == start_addr, "valid ME");
+-
+-  // Fill in the real end_address:
+-  __ align(wordSize);
+-  me->set_end_address(__ pc());
+-
+-  return me;
+-}
+-
+-// stack walking support
+-
+-frame MethodHandles::ricochet_frame_sender(const frame& fr, RegisterMap *map) {
+-  RicochetFrame* f = RicochetFrame::from_frame(fr);
+-  if (map->update_map())
+-    frame::update_map_with_saved_link(map, &f->_sender_link);
+-  return frame(f->extended_sender_sp(), f->exact_sender_sp(), f->sender_link(), f->sender_pc());
+-}
+-
+-void MethodHandles::ricochet_frame_oops_do(const frame& fr, OopClosure* blk, const RegisterMap* reg_map) {
+-  RicochetFrame* f = RicochetFrame::from_frame(fr);
+-
+-  // pick up the argument type descriptor:
+-  Thread* thread = Thread::current();
+-  Handle cookie(thread, f->compute_saved_args_layout(true, true));
+-
+-  // process fixed part
+-  blk->do_oop((oop*)f->saved_target_addr());
+-  blk->do_oop((oop*)f->saved_args_layout_addr());
+-
+-  // process variable arguments:
+-  if (cookie.is_null())  return;  // no arguments to describe
+-
+-  // the cookie is actually the invokeExact method for my target
+-  // his argument signature is what I'm interested in
+-  assert(cookie->is_method(), "");
+-  methodHandle invoker(thread, methodOop(cookie()));
+-  assert(invoker->name() == vmSymbols::invokeExact_name(), "must be this kind of method");
+-  assert(!invoker->is_static(), "must have MH argument");
+-  int slot_count = invoker->size_of_parameters();
+-  assert(slot_count >= 1, "must include 'this'");
+-  intptr_t* base = f->saved_args_base();
+-  intptr_t* retval = NULL;
+-  if (f->has_return_value_slot())
+-    retval = f->return_value_slot_addr();
+-  int slot_num = slot_count;
+-  intptr_t* loc = &base[slot_num -= 1];
+-  //blk->do_oop((oop*) loc);   // original target, which is irrelevant
+-  int arg_num = 0;
+-  for (SignatureStream ss(invoker->signature()); !ss.is_done(); ss.next()) {
+-    if (ss.at_return_type())  continue;
+-    BasicType ptype = ss.type();
+-    if (ptype == T_ARRAY)  ptype = T_OBJECT; // fold all refs to T_OBJECT
+-    assert(ptype >= T_BOOLEAN && ptype <= T_OBJECT, "not array or void");
+-    loc = &base[slot_num -= type2size[ptype]];
+-    bool is_oop = (ptype == T_OBJECT && loc != retval);
+-    if (is_oop)  blk->do_oop((oop*)loc);
+-    arg_num += 1;
+-  }
+-  assert(slot_num == 0, "must have processed all the arguments");
+-}
+-
+-oop MethodHandles::RicochetFrame::compute_saved_args_layout(bool read_cache, bool write_cache) {
+-  oop cookie = NULL;
+-  if (read_cache) {
+-    cookie = saved_args_layout();
+-    if (cookie != NULL)  return cookie;
+-  }
+-  oop target = saved_target();
+-  oop mtype  = java_lang_invoke_MethodHandle::type(target);
+-  oop mtform = java_lang_invoke_MethodType::form(mtype);
+-  cookie = java_lang_invoke_MethodTypeForm::vmlayout(mtform);
+-  if (write_cache)  {
+-    (*saved_args_layout_addr()) = cookie;
+-  }
+-  return cookie;
+-}
+-
+-void MethodHandles::RicochetFrame::generate_ricochet_blob(MacroAssembler* _masm,
+-                                                          // output params:
+-                                                          int* bounce_offset,
+-                                                          int* exception_offset,
+-                                                          int* frame_size_in_words) {
+-  (*frame_size_in_words) = RicochetFrame::frame_size_in_bytes() / wordSize;
+-
+-  address start = __ pc();
+-
+-#ifdef ASSERT
+-  __ hlt(); __ hlt(); __ hlt();
+-  // here's a hint of something special:
+-  __ push(MAGIC_NUMBER_1);
+-  __ push(MAGIC_NUMBER_2);
+-#endif //ASSERT
+-  __ hlt();  // not reached
+-
+-  // A return PC has just been popped from the stack.
+-  // Return values are in registers.
+-  // The ebp points into the RicochetFrame, which contains
+-  // a cleanup continuation we must return to.
+-
+-  (*bounce_offset) = __ pc() - start;
+-  BLOCK_COMMENT("ricochet_blob.bounce");
+-
+-  if (VerifyMethodHandles)  RicochetFrame::verify_clean(_masm);
+-  trace_method_handle(_masm, "return/ricochet_blob.bounce");
+-
+-  __ jmp(frame_address(continuation_offset_in_bytes()));
+-  __ hlt();
+-  DEBUG_ONLY(__ push(MAGIC_NUMBER_2));
+-
+-  (*exception_offset) = __ pc() - start;
+-  BLOCK_COMMENT("ricochet_blob.exception");
+-
+-  // compare this to Interpreter::rethrow_exception_entry, which is parallel code
+-  // for example, see TemplateInterpreterGenerator::generate_throw_exception
+-  // Live registers in:
+-  //   rax: exception
+-  //   rdx: return address/pc that threw exception (ignored, always equal to bounce addr)
+-  __ verify_oop(rax);
+-
+-  // no need to empty_FPU_stack or reinit_heapbase, since caller frame will do the same if needed
+-
+-  // Take down the frame.
+-
+-  // Cf. InterpreterMacroAssembler::remove_activation.
+-  leave_ricochet_frame(_masm, /*rcx_recv=*/ noreg,
+-                       saved_last_sp_register(),
+-                       /*sender_pc_reg=*/ rdx);
+-
+-  // In between activations - previous activation type unknown yet
+-  // compute continuation point - the continuation point expects the
+-  // following registers set up:
+-  //
+-  // rax: exception
+-  // rdx: return address/pc that threw exception
+-  // rsp: expression stack of caller
+-  // rbp: ebp of caller
+-  __ push(rax);                                  // save exception
+-  __ push(rdx);                                  // save return address
+-  Register thread_reg = LP64_ONLY(r15_thread) NOT_LP64(rdi);
+-  NOT_LP64(__ get_thread(thread_reg));
+-  __ call_VM_leaf(CAST_FROM_FN_PTR(address,
+-                                   SharedRuntime::exception_handler_for_return_address),
+-                  thread_reg, rdx);
+-  __ mov(rbx, rax);                              // save exception handler
+-  __ pop(rdx);                                   // restore return address
+-  __ pop(rax);                                   // restore exception
+-  __ jmp(rbx);                                   // jump to exception
+-                                                 // handler of caller
+-}
+-
+-void MethodHandles::RicochetFrame::enter_ricochet_frame(MacroAssembler* _masm,
+-                                                        Register rcx_recv,
+-                                                        Register rax_argv,
+-                                                        address return_handler,
+-                                                        Register rbx_temp) {
+-  const Register saved_last_sp = saved_last_sp_register();
+-  Address rcx_mh_vmtarget(    rcx_recv, java_lang_invoke_MethodHandle::vmtarget_offset_in_bytes() );
+-  Address rcx_amh_conversion( rcx_recv, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes() );
+-
+-  // Push the RicochetFrame a word at a time.
+-  // This creates something similar to an interpreter frame.
+-  // Cf. TemplateInterpreterGenerator::generate_fixed_frame.
+-  BLOCK_COMMENT("push RicochetFrame {");
+-  DEBUG_ONLY(int rfo = (int) sizeof(RicochetFrame));
+-  assert((rfo -= wordSize) == RicochetFrame::sender_pc_offset_in_bytes(), "");
+-#define RF_FIELD(push_value, name)                                      \
+-  { push_value;                                                         \
+-    assert((rfo -= wordSize) == RicochetFrame::name##_offset_in_bytes(), ""); }
+-  RF_FIELD(__ push(rbp),                   sender_link);
+-  RF_FIELD(__ push(saved_last_sp),         exact_sender_sp);  // rsi/r13
+-  RF_FIELD(__ pushptr(rcx_amh_conversion), conversion);
+-  RF_FIELD(__ push(rax_argv),              saved_args_base);   // can be updated if args are shifted
+-  RF_FIELD(__ push((int32_t) NULL_WORD),   saved_args_layout); // cache for GC layout cookie
+-  if (UseCompressedOops) {
+-    __ load_heap_oop(rbx_temp, rcx_mh_vmtarget);
+-    RF_FIELD(__ push(rbx_temp),            saved_target);
+-  } else {
+-    RF_FIELD(__ pushptr(rcx_mh_vmtarget),  saved_target);
+-  }
+-  __ lea(rbx_temp, ExternalAddress(return_handler));
+-  RF_FIELD(__ push(rbx_temp),              continuation);
+-#undef RF_FIELD
+-  assert(rfo == 0, "fully initialized the RicochetFrame");
+-  // compute new frame pointer:
+-  __ lea(rbp, Address(rsp, RicochetFrame::sender_link_offset_in_bytes()));
+-  // Push guard word #1 in debug mode.
+-  DEBUG_ONLY(__ push((int32_t) RicochetFrame::MAGIC_NUMBER_1));
+-  // For debugging, leave behind an indication of which stub built this frame.
+-  DEBUG_ONLY({ Label L; __ call(L, relocInfo::none); __ bind(L); });
+-  BLOCK_COMMENT("} RicochetFrame");
+-}
+-
+-void MethodHandles::RicochetFrame::leave_ricochet_frame(MacroAssembler* _masm,
+-                                                        Register rcx_recv,
+-                                                        Register new_sp_reg,
+-                                                        Register sender_pc_reg) {
+-  assert_different_registers(rcx_recv, new_sp_reg, sender_pc_reg);
+-  const Register saved_last_sp = saved_last_sp_register();
+-  // Take down the frame.
+-  // Cf. InterpreterMacroAssembler::remove_activation.
+-  BLOCK_COMMENT("end_ricochet_frame {");
+-  // TO DO: If (exact_sender_sp - extended_sender_sp) > THRESH, compact the frame down.
+-  // This will keep stack in bounds even with unlimited tailcalls, each with an adapter.
+-  if (rcx_recv->is_valid())
+-    __ movptr(rcx_recv,    RicochetFrame::frame_address(RicochetFrame::saved_target_offset_in_bytes()));
+-  __ movptr(sender_pc_reg, RicochetFrame::frame_address(RicochetFrame::sender_pc_offset_in_bytes()));
+-  __ movptr(saved_last_sp, RicochetFrame::frame_address(RicochetFrame::exact_sender_sp_offset_in_bytes()));
+-  __ movptr(rbp,           RicochetFrame::frame_address(RicochetFrame::sender_link_offset_in_bytes()));
+-  __ mov(rsp, new_sp_reg);
+-  BLOCK_COMMENT("} end_ricochet_frame");
+-}
+-
+-// Emit code to verify that RBP is pointing at a valid ricochet frame.
+-#ifndef PRODUCT
+-enum {
+-  ARG_LIMIT = 255, SLOP = 4,
+-  // use this parameter for checking for garbage stack movements:
+-  UNREASONABLE_STACK_MOVE = (ARG_LIMIT + SLOP)
+-  // the slop defends against false alarms due to fencepost errors
+-};
+-#endif
+-
+-#ifdef ASSERT
+-void MethodHandles::RicochetFrame::verify_clean(MacroAssembler* _masm) {
+-  // The stack should look like this:
+-  //    ... keep1 | dest=42 | keep2 | RF | magic | handler | magic | recursive args |
+-  // Check various invariants.
+-  verify_offsets();
+-
+-  Register rdi_temp = rdi;
+-  Register rcx_temp = rcx;
+-  { __ push(rdi_temp); __ push(rcx_temp); }
+-#define UNPUSH_TEMPS \
+-  { __ pop(rcx_temp);  __ pop(rdi_temp); }
+-
+-  Address magic_number_1_addr  = RicochetFrame::frame_address(RicochetFrame::magic_number_1_offset_in_bytes());
+-  Address magic_number_2_addr  = RicochetFrame::frame_address(RicochetFrame::magic_number_2_offset_in_bytes());
+-  Address continuation_addr    = RicochetFrame::frame_address(RicochetFrame::continuation_offset_in_bytes());
+-  Address conversion_addr      = RicochetFrame::frame_address(RicochetFrame::conversion_offset_in_bytes());
+-  Address saved_args_base_addr = RicochetFrame::frame_address(RicochetFrame::saved_args_base_offset_in_bytes());
+-
+-  Label L_bad, L_ok;
+-  BLOCK_COMMENT("verify_clean {");
+-  // Magic numbers must check out:
+-  __ cmpptr(magic_number_1_addr, (int32_t) MAGIC_NUMBER_1);
+-  __ jcc(Assembler::notEqual, L_bad);
+-  __ cmpptr(magic_number_2_addr, (int32_t) MAGIC_NUMBER_2);
+-  __ jcc(Assembler::notEqual, L_bad);
+-
+-  // Arguments pointer must look reasonable:
+-  __ movptr(rcx_temp, saved_args_base_addr);
+-  __ cmpptr(rcx_temp, rbp);
+-  __ jcc(Assembler::below, L_bad);
+-  __ subptr(rcx_temp, UNREASONABLE_STACK_MOVE * Interpreter::stackElementSize);
+-  __ cmpptr(rcx_temp, rbp);
+-  __ jcc(Assembler::above, L_bad);
+-
+-  load_conversion_dest_type(_masm, rdi_temp, conversion_addr);
+-  __ cmpl(rdi_temp, T_VOID);
+-  __ jcc(Assembler::equal, L_ok);
+-  __ movptr(rcx_temp, saved_args_base_addr);
+-  load_conversion_vminfo(_masm, rdi_temp, conversion_addr);
+-  __ cmpptr(Address(rcx_temp, rdi_temp, Interpreter::stackElementScale()),
+-            (int32_t) RETURN_VALUE_PLACEHOLDER);
+-  __ jcc(Assembler::equal, L_ok);
+-  __ BIND(L_bad);
+-  UNPUSH_TEMPS;
+-  __ stop("damaged ricochet frame");
+-  __ BIND(L_ok);
+-  UNPUSH_TEMPS;
+-  BLOCK_COMMENT("} verify_clean");
+-
+-#undef UNPUSH_TEMPS
+-
+-}
+-#endif //ASSERT
+-
+ void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
+   if (VerifyMethodHandles)
+     verify_klass(_masm, klass_reg, SystemDictionaryHandles::Class_klass(),
+-                 "AMH argument is a Class");
++                 "MH argument is a Class");
+   __ load_heap_oop(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
+ }
+ 
+-void MethodHandles::load_conversion_vminfo(MacroAssembler* _masm, Register reg, Address conversion_field_addr) {
+-  int bits   = BitsPerByte;
+-  int offset = (CONV_VMINFO_SHIFT / bits);
+-  int shift  = (CONV_VMINFO_SHIFT % bits);
+-  __ load_unsigned_byte(reg, conversion_field_addr.plus_disp(offset));
+-  assert(CONV_VMINFO_MASK == right_n_bits(bits - shift), "else change type of previous load");
+-  assert(shift == 0, "no shift needed");
++#ifdef ASSERT
++static int check_nonzero(const char* xname, int x) {
++  assert(x != 0, err_msg("%s should be nonzero", xname));
++  return x;
+ }
+-
+-void MethodHandles::load_conversion_dest_type(MacroAssembler* _masm, Register reg, Address conversion_field_addr) {
+-  int bits   = BitsPerByte;
+-  int offset = (CONV_DEST_TYPE_SHIFT / bits);
+-  int shift  = (CONV_DEST_TYPE_SHIFT % bits);
+-  __ load_unsigned_byte(reg, conversion_field_addr.plus_disp(offset));
+-  assert(CONV_TYPE_MASK == right_n_bits(bits - shift), "else change type of previous load");
+-  __ shrl(reg, shift);
+-  DEBUG_ONLY(int conv_type_bits = (int) exact_log2(CONV_TYPE_MASK+1));
+-  assert((shift + conv_type_bits) == bits, "left justified in byte");
+-}
+-
+-void MethodHandles::load_stack_move(MacroAssembler* _masm,
+-                                    Register rdi_stack_move,
+-                                    Register rcx_amh,
+-                                    bool might_be_negative) {
+-  BLOCK_COMMENT("load_stack_move {");
+-  Address rcx_amh_conversion(rcx_amh, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes());
+-  __ movl(rdi_stack_move, rcx_amh_conversion);
+-  __ sarl(rdi_stack_move, CONV_STACK_MOVE_SHIFT);
+-#ifdef _LP64
+-  if (might_be_negative) {
+-    // clean high bits of stack motion register (was loaded as an int)
+-    __ movslq(rdi_stack_move, rdi_stack_move);
+-  }
+-#endif //_LP64
+-#ifdef ASSERT
+-  if (VerifyMethodHandles) {
+-    Label L_ok, L_bad;
+-    int32_t stack_move_limit = 0x4000;  // extra-large
+-    __ cmpptr(rdi_stack_move, stack_move_limit);
+-    __ jcc(Assembler::greaterEqual, L_bad);
+-    __ cmpptr(rdi_stack_move, -stack_move_limit);
+-    __ jcc(Assembler::greater, L_ok);
+-    __ bind(L_bad);
+-    __ stop("load_stack_move of garbage value");
+-    __ BIND(L_ok);
+-  }
+-#endif
+-  BLOCK_COMMENT("} load_stack_move");
+-}
++#define NONZERO(x) check_nonzero(#x, x)
++#else //ASSERT
++#define NONZERO(x) (x)
++#endif //ASSERT
+ 
+ #ifdef ASSERT
+-void MethodHandles::RicochetFrame::verify_offsets() {
+-  // Check compatibility of this struct with the more generally used offsets of class frame:
+-  int ebp_off = sender_link_offset_in_bytes();  // offset from struct base to local rbp value
+-  assert(ebp_off + wordSize*frame::interpreter_frame_method_offset      == saved_args_base_offset_in_bytes(), "");
+-  assert(ebp_off + wordSize*frame::interpreter_frame_last_sp_offset     == conversion_offset_in_bytes(), "");
+-  assert(ebp_off + wordSize*frame::interpreter_frame_sender_sp_offset   == exact_sender_sp_offset_in_bytes(), "");
+-  // These last two have to be exact:
+-  assert(ebp_off + wordSize*frame::link_offset                          == sender_link_offset_in_bytes(), "");
+-  assert(ebp_off + wordSize*frame::return_addr_offset                   == sender_pc_offset_in_bytes(), "");
+-}
+-
+-void MethodHandles::RicochetFrame::verify() const {
+-  verify_offsets();
+-  assert(magic_number_1() == MAGIC_NUMBER_1, err_msg(PTR_FORMAT " == " PTR_FORMAT, magic_number_1(), MAGIC_NUMBER_1));
+-  assert(magic_number_2() == MAGIC_NUMBER_2, err_msg(PTR_FORMAT " == " PTR_FORMAT, magic_number_2(), MAGIC_NUMBER_2));
+-  if (!Universe::heap()->is_gc_active()) {
+-    if (saved_args_layout() != NULL) {
+-      assert(saved_args_layout()->is_method(), "must be valid oop");
+-    }
+-    if (saved_target() != NULL) {
+-      assert(java_lang_invoke_MethodHandle::is_instance(saved_target()), "checking frame value");
+-    }
+-  }
+-  int conv_op = adapter_conversion_op(conversion());
+-  assert(conv_op == java_lang_invoke_AdapterMethodHandle::OP_COLLECT_ARGS ||
+-         conv_op == java_lang_invoke_AdapterMethodHandle::OP_FOLD_ARGS ||
+-         conv_op == java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_REF,
+-         "must be a sane conversion");
+-  if (has_return_value_slot()) {
+-    assert(*return_value_slot_addr() == RETURN_VALUE_PLACEHOLDER, "");
+-  }
+-}
+-#endif //PRODUCT
+-
+-#ifdef ASSERT
+-void MethodHandles::verify_argslot(MacroAssembler* _masm,
+-                                   Register argslot_reg,
+-                                   const char* error_message) {
+-  // Verify that argslot lies within (rsp, rbp].
+-  Label L_ok, L_bad;
+-  BLOCK_COMMENT("verify_argslot {");
+-  __ cmpptr(argslot_reg, rbp);
+-  __ jccb(Assembler::above, L_bad);
+-  __ cmpptr(rsp, argslot_reg);
+-  __ jccb(Assembler::below, L_ok);
+-  __ bind(L_bad);
+-  __ stop(error_message);
+-  __ BIND(L_ok);
+-  BLOCK_COMMENT("} verify_argslot");
+-}
+-
+-void MethodHandles::verify_argslots(MacroAssembler* _masm,
+-                                    RegisterOrConstant arg_slots,
+-                                    Register arg_slot_base_reg,
+-                                    bool negate_argslots,
+-                                    const char* error_message) {
+-  // Verify that [argslot..argslot+size) lies within (rsp, rbp).
+-  Label L_ok, L_bad;
+-  Register rdi_temp = rdi;
+-  BLOCK_COMMENT("verify_argslots {");
+-  __ push(rdi_temp);
+-  if (negate_argslots) {
+-    if (arg_slots.is_constant()) {
+-      arg_slots = -1 * arg_slots.as_constant();
+-    } else {
+-      __ movptr(rdi_temp, arg_slots);
+-      __ negptr(rdi_temp);
+-      arg_slots = rdi_temp;
+-    }
+-  }
+-  __ lea(rdi_temp, Address(arg_slot_base_reg, arg_slots, Interpreter::stackElementScale()));
+-  __ cmpptr(rdi_temp, rbp);
+-  __ pop(rdi_temp);
+-  __ jcc(Assembler::above, L_bad);
+-  __ cmpptr(rsp, arg_slot_base_reg);
+-  __ jcc(Assembler::below, L_ok);
+-  __ bind(L_bad);
+-  __ stop(error_message);
+-  __ BIND(L_ok);
+-  BLOCK_COMMENT("} verify_argslots");
+-}
+-
+-// Make sure that arg_slots has the same sign as the given direction.
+-// If (and only if) arg_slots is a assembly-time constant, also allow it to be zero.
+-void MethodHandles::verify_stack_move(MacroAssembler* _masm,
+-                                      RegisterOrConstant arg_slots, int direction) {
+-  bool allow_zero = arg_slots.is_constant();
+-  if (direction == 0) { direction = +1; allow_zero = true; }
+-  assert(stack_move_unit() == -1, "else add extra checks here");
+-  if (arg_slots.is_register()) {
+-    Label L_ok, L_bad;
+-    BLOCK_COMMENT("verify_stack_move {");
+-    // testl(arg_slots.as_register(), -stack_move_unit() - 1);  // no need
+-    // jcc(Assembler::notZero, L_bad);
+-    __ cmpptr(arg_slots.as_register(), (int32_t) NULL_WORD);
+-    if (direction > 0) {
+-      __ jcc(allow_zero ? Assembler::less : Assembler::lessEqual, L_bad);
+-      __ cmpptr(arg_slots.as_register(), (int32_t) UNREASONABLE_STACK_MOVE);
+-      __ jcc(Assembler::less, L_ok);
+-    } else {
+-      __ jcc(allow_zero ? Assembler::greater : Assembler::greaterEqual, L_bad);
+-      __ cmpptr(arg_slots.as_register(), (int32_t) -UNREASONABLE_STACK_MOVE);
+-      __ jcc(Assembler::greater, L_ok);
+-    }
+-    __ bind(L_bad);
+-    if (direction > 0)
+-      __ stop("assert arg_slots > 0");
+-    else
+-      __ stop("assert arg_slots < 0");
+-    __ BIND(L_ok);
+-    BLOCK_COMMENT("} verify_stack_move");
+-  } else {
+-    intptr_t size = arg_slots.as_constant();
+-    if (direction < 0)  size = -size;
+-    assert(size >= 0, "correct direction of constant move");
+-    assert(size < UNREASONABLE_STACK_MOVE, "reasonable size of constant move");
+-  }
+-}
+-
+ void MethodHandles::verify_klass(MacroAssembler* _masm,
+                                  Register obj, KlassHandle klass,
+                                  const char* error_message) {
+@@ -528,12 +71,15 @@
+          klass_addr <= SystemDictionaryHandles::Long_klass().raw_value(),
+          "must be one of the SystemDictionaryHandles");
+   Register temp = rdi;
++  Register temp2 = noreg;
++  LP64_ONLY(temp2 = rscratch1);  // used by MacroAssembler::cmpptr
+   Label L_ok, L_bad;
+   BLOCK_COMMENT("verify_klass {");
+   __ verify_oop(obj);
+   __ testptr(obj, obj);
+   __ jcc(Assembler::zero, L_bad);
+-  __ push(temp);
++  __ push(temp); if (temp2 != noreg)  __ push(temp2);
++#define UNPUSH { if (temp2 != noreg)  __ pop(temp2);  __ pop(temp); }
+   __ load_klass(temp, obj);
+   __ cmpptr(temp, ExternalAddress((address) klass_addr));
+   __ jcc(Assembler::equal, L_ok);
+@@ -541,17 +87,42 @@
+   __ movptr(temp, Address(temp, super_check_offset));
+   __ cmpptr(temp, ExternalAddress((address) klass_addr));
+   __ jcc(Assembler::equal, L_ok);
+-  __ pop(temp);
++  UNPUSH;
+   __ bind(L_bad);
+-  __ stop(error_message);
++  __ STOP(error_message);
+   __ BIND(L_ok);
+-  __ pop(temp);
++  UNPUSH;
+   BLOCK_COMMENT("} verify_klass");
+ }
++
++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {
++  Label L;
++  BLOCK_COMMENT("verify_ref_kind {");
++  __ movl(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes())));
++  __ shrl(temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT);
++  __ andl(temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK);
++  __ cmpl(temp, ref_kind);
++  __ jcc(Assembler::equal, L);
++  { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal);
++    jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind);
++    if (ref_kind == JVM_REF_invokeVirtual ||
++        ref_kind == JVM_REF_invokeSpecial)
++      // could do this for all ref_kinds, but would explode assembly code size
++      trace_method_handle(_masm, buf);
++    __ STOP(buf);
++  }
++  BLOCK_COMMENT("} verify_ref_kind");
++  __ bind(L);
++}
++
+ #endif //ASSERT
+ 
+-void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp) {
+-  if (JvmtiExport::can_post_interpreter_events()) {
++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                            bool for_compiler_entry) {
++  assert(method == rbx, "interpreter calling convention");
++  __ verify_oop(method);
++
++  if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
+     Label run_compiled_code;
+     // JVMTI events, such as single-stepping, are implemented partly by avoiding running
+     // compiled code in threads for which the event is enabled.  Check here for
+@@ -567,462 +138,380 @@
+     __ cmpb(Address(rthread, JavaThread::interp_only_mode_offset()), 0);
+     __ jccb(Assembler::zero, run_compiled_code);
+     __ jmp(Address(method, methodOopDesc::interpreter_entry_offset()));
+-    __ bind(run_compiled_code);
++    __ BIND(run_compiled_code);
+   }
+-  __ jmp(Address(method, methodOopDesc::from_interpreted_offset()));
++
++  const ByteSize entry_offset = for_compiler_entry ? methodOopDesc::from_compiled_offset() :
++                                                     methodOopDesc::from_interpreted_offset();
++  __ jmp(Address(method, entry_offset));
+ }
+ 
++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
++                                        Register recv, Register method_temp,
++                                        Register temp2,
++                                        bool for_compiler_entry) {
++  BLOCK_COMMENT("jump_to_lambda_form {");
++  // This is the initial entry point of a lazy method handle.
++  // After type checking, it picks up the invoker from the LambdaForm.
++  assert_different_registers(recv, method_temp, temp2);
++  assert(recv != noreg, "required register");
++  assert(method_temp == rbx, "required register for loading method");
++
++  //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); });
++
++  // Load the invoker, as MH -> MH.form -> LF.vmentry
++  __ verify_oop(recv);
++  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())));
++  __ verify_oop(method_temp);
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())));
++  __ verify_oop(method_temp);
++  // the following assumes that a methodOop is normally compressed in the vmtarget field:
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes())));
++  __ verify_oop(method_temp);
++
++  if (VerifyMethodHandles && !for_compiler_entry) {
++    // make sure recv is already on stack
++    __ load_sized_value(temp2,
++                        Address(method_temp, methodOopDesc::size_of_parameters_offset()),
++                        sizeof(u2), /*is_signed*/ false);
++    // assert(sizeof(u2) == sizeof(methodOopDesc::_size_of_parameters), "");
++    Label L;
++    __ cmpptr(recv, __ argument_address(temp2, -1));
++    __ jcc(Assembler::equal, L);
++    __ movptr(rax, __ argument_address(temp2, -1));
++    __ STOP("receiver not on stack");
++    __ BIND(L);
++  }
++
++  jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry);
++  BLOCK_COMMENT("} jump_to_lambda_form");
++}
++
++
+ // Code generation
+-address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm) {
++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
++                                                                vmIntrinsics::ID iid) {
++  const bool not_for_compiler_entry = false;  // this is the interpreter entry
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  if (iid == vmIntrinsics::_invokeGeneric ||
++      iid == vmIntrinsics::_compiledLambdaForm) {
++    // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
++    // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
++    // They all allow an appendix argument.
++    __ hlt();           // empty stubs make SG sick
++    return NULL;
++  }
++
++  // rsi/r13: sender SP (must preserve; see prepare_to_jump_from_interpreted)
+   // rbx: methodOop
+-  // rcx: receiver method handle (must load from sp[MethodTypeForm.vmslots])
+-  // rsi/r13: sender SP (must preserve; see prepare_to_jump_from_interpreted)
+-  // rdx, rdi: garbage temp, blown away
++  // rdx: argument locator (parameter slot count, added to rsp)
++  // rcx: used as temp to hold mh or receiver
++  // rax, rdi: garbage temps, blown away
++  Register rdx_argp   = rdx;   // argument list ptr, live on error paths
++  Register rax_temp   = rax;
++  Register rcx_mh     = rcx;   // MH receiver; dies quickly and is recycled
++  Register rbx_method = rbx;   // eventual target of this invocation
+ 
+-  Register rbx_method = rbx;
+-  Register rcx_recv   = rcx;
+-  Register rax_mtype  = rax;
+-  Register rdx_temp   = rdx;
+-  Register rdi_temp   = rdi;
+-
+-  // emit WrongMethodType path first, to enable jccb back-branch from main path
+-  Label wrong_method_type;
+-  __ bind(wrong_method_type);
+-  Label invoke_generic_slow_path, invoke_exact_error_path;
+-  assert(methodOopDesc::intrinsic_id_size_in_bytes() == sizeof(u1), "");;
+-  __ cmpb(Address(rbx_method, methodOopDesc::intrinsic_id_offset_in_bytes()), (int) vmIntrinsics::_invokeExact);
+-  __ jcc(Assembler::notEqual, invoke_generic_slow_path);
+-  __ jmp(invoke_exact_error_path);
++  address code_start = __ pc();
+ 
+   // here's where control starts out:
+   __ align(CodeEntryAlignment);
+   address entry_point = __ pc();
+ 
+-  // fetch the MethodType from the method handle into rax (the 'check' register)
+-  // FIXME: Interpreter should transmit pre-popped stack pointer, to locate base of arg list.
+-  // This would simplify several touchy bits of code.
+-  // See 6984712: JSR 292 method handle calls need a clean argument base pointer
+-  {
+-    Register tem = rbx_method;
+-    for (jint* pchase = methodOopDesc::method_type_offsets_chain(); (*pchase) != -1; pchase++) {
+-      __ movptr(rax_mtype, Address(tem, *pchase));
+-      tem = rax_mtype;          // in case there is another indirection
++  if (VerifyMethodHandles) {
++    Label L;
++    BLOCK_COMMENT("verify_intrinsic_id {");
++    __ cmpb(Address(rbx_method, methodOopDesc::intrinsic_id_offset_in_bytes()), (int) iid);
++    __ jcc(Assembler::equal, L);
++    if (iid == vmIntrinsics::_linkToVirtual ||
++        iid == vmIntrinsics::_linkToSpecial) {
++      // could do this for all kinds, but would explode assembly code size
++      trace_method_handle(_masm, "bad methodOop::intrinsic_id");
+     }
++    __ STOP("bad methodOop::intrinsic_id");
++    __ bind(L);
++    BLOCK_COMMENT("} verify_intrinsic_id");
+   }
+ 
+-  // given the MethodType, find out where the MH argument is buried
+-  __ load_heap_oop(rdx_temp, Address(rax_mtype, __ delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, rdi_temp)));
+-  Register rdx_vmslots = rdx_temp;
+-  __ movl(rdx_vmslots, Address(rdx_temp, __ delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, rdi_temp)));
+-  Address mh_receiver_slot_addr = __ argument_address(rdx_vmslots);
+-  __ movptr(rcx_recv, mh_receiver_slot_addr);
++  // First task:  Find out how big the argument list is.
++  Address rdx_first_arg_addr;
++  int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
++  assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
++  if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
++    __ load_sized_value(rdx_argp,
++                        Address(rbx_method, methodOopDesc::size_of_parameters_offset()),
++                        sizeof(u2), /*is_signed*/ false);
++    // assert(sizeof(u2) == sizeof(methodOopDesc::_size_of_parameters), "");
++    rdx_first_arg_addr = __ argument_address(rdx_argp, -1);
++  } else {
++    DEBUG_ONLY(rdx_argp = noreg);
++  }
+ 
+-  trace_method_handle(_masm, "invokeExact");
++  if (!is_signature_polymorphic_static(iid)) {
++    __ movptr(rcx_mh, rdx_first_arg_addr);
++    DEBUG_ONLY(rdx_argp = noreg);
++  }
+ 
+-  __ check_method_handle_type(rax_mtype, rcx_recv, rdi_temp, wrong_method_type);
++  // rdx_first_arg_addr is live!
+ 
+-  // Nobody uses the MH receiver slot after this.  Make sure.
+-  DEBUG_ONLY(__ movptr(mh_receiver_slot_addr, (int32_t)0x999999));
++  if (TraceMethodHandles) {
++    const char* name = vmIntrinsics::name_at(iid);
++    if (*name == '_')  name += 1;
++    const size_t len = strlen(name) + 50;
++    char* qname = NEW_C_HEAP_ARRAY(char, len, mtInternal);
++    const char* suffix = "";
++    if (vmIntrinsics::method_for(iid) == NULL ||
++        !vmIntrinsics::method_for(iid)->access_flags().is_public()) {
++      if (is_signature_polymorphic_static(iid))
++        suffix = "/static";
++      else
++        suffix = "/private";
++    }
++    jio_snprintf(qname, len, "MethodHandle::interpreter_entry::%s%s", name, suffix);
++    // note: stub look for mh in rcx
++    trace_method_handle(_masm, qname);
++  }
+ 
+-  __ jump_to_method_handle_entry(rcx_recv, rdi_temp);
++  if (iid == vmIntrinsics::_invokeBasic) {
++    generate_method_handle_dispatch(_masm, iid, rcx_mh, noreg, not_for_compiler_entry);
+ 
+-  // error path for invokeExact (only)
+-  __ bind(invoke_exact_error_path);
+-  // ensure that the top of stack is properly aligned.
+-  __ mov(rdi, rsp);
+-  __ andptr(rsp, -StackAlignmentInBytes); // Align the stack for the ABI
+-  __ pushptr(Address(rdi, 0));  // Pick up the return address
++  } else {
++    // Adjust argument list by popping the trailing MemberName argument.
++    Register rcx_recv = noreg;
++    if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
++      // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
++      __ movptr(rcx_recv = rcx, rdx_first_arg_addr);
++    }
++    DEBUG_ONLY(rdx_argp = noreg);
++    Register rbx_member = rbx_method;  // MemberName ptr; incoming method ptr is dead now
++    __ pop(rax_temp);           // return address
++    __ pop(rbx_member);         // extract last argument
++    __ push(rax_temp);          // re-push return address
++    generate_method_handle_dispatch(_masm, iid, rcx_recv, rbx_member, not_for_compiler_entry);
++  }
+ 
+-  // Stub wants expected type in rax and the actual type in rcx
+-  __ jump(ExternalAddress(StubRoutines::throw_WrongMethodTypeException_entry()));
+-
+-  // for invokeGeneric (only), apply argument and result conversions on the fly
+-  __ bind(invoke_generic_slow_path);
+-#ifdef ASSERT
+-  if (VerifyMethodHandles) {
+-    Label L;
+-    __ cmpb(Address(rbx_method, methodOopDesc::intrinsic_id_offset_in_bytes()), (int) vmIntrinsics::_invokeGeneric);
+-    __ jcc(Assembler::equal, L);
+-    __ stop("bad methodOop::intrinsic_id");
+-    __ bind(L);
++  if (PrintMethodHandleStubs) {
++    address code_end = __ pc();
++    tty->print_cr("--------");
++    tty->print_cr("method handle interpreter entry for %s", vmIntrinsics::name_at(iid));
++    Disassembler::decode(code_start, code_end);
++    tty->cr();
+   }
+-#endif //ASSERT
+-  Register rbx_temp = rbx_method;  // don't need it now
+-
+-  // make room on the stack for another pointer:
+-  Register rcx_argslot = rcx_recv;
+-  __ lea(rcx_argslot, __ argument_address(rdx_vmslots, 1));
+-  insert_arg_slots(_masm, 2 * stack_move_unit(),
+-                   rcx_argslot, rbx_temp, rdx_temp);
+-
+-  // load up an adapter from the calling type (Java weaves this)
+-  Register rdx_adapter = rdx_temp;
+-  __ load_heap_oop(rdx_temp,    Address(rax_mtype, __ delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes,               rdi_temp)));
+-  __ load_heap_oop(rdx_adapter, Address(rdx_temp,  __ delayed_value(java_lang_invoke_MethodTypeForm::genericInvoker_offset_in_bytes, rdi_temp)));
+-  __ verify_oop(rdx_adapter);
+-  __ movptr(Address(rcx_argslot, 1 * Interpreter::stackElementSize), rdx_adapter);
+-  // As a trusted first argument, pass the type being called, so the adapter knows
+-  // the actual types of the arguments and return values.
+-  // (Generic invokers are shared among form-families of method-type.)
+-  __ movptr(Address(rcx_argslot, 0 * Interpreter::stackElementSize), rax_mtype);
+-  // FIXME: assert that rdx_adapter is of the right method-type.
+-  __ mov(rcx, rdx_adapter);
+-  trace_method_handle(_masm, "invokeGeneric");
+-  __ jump_to_method_handle_entry(rcx, rdi_temp);
+ 
+   return entry_point;
+ }
+ 
+-// Helper to insert argument slots into the stack.
+-// arg_slots must be a multiple of stack_move_unit() and < 0
+-// rax_argslot is decremented to point to the new (shifted) location of the argslot
+-// But, rdx_temp ends up holding the original value of rax_argslot.
+-void MethodHandles::insert_arg_slots(MacroAssembler* _masm,
+-                                     RegisterOrConstant arg_slots,
+-                                     Register rax_argslot,
+-                                     Register rbx_temp, Register rdx_temp) {
+-  // allow constant zero
+-  if (arg_slots.is_constant() && arg_slots.as_constant() == 0)
+-    return;
+-  assert_different_registers(rax_argslot, rbx_temp, rdx_temp,
+-                             (!arg_slots.is_register() ? rsp : arg_slots.as_register()));
+-  if (VerifyMethodHandles)
+-    verify_argslot(_masm, rax_argslot, "insertion point must fall within current frame");
+-  if (VerifyMethodHandles)
+-    verify_stack_move(_masm, arg_slots, -1);
+-
+-  // Make space on the stack for the inserted argument(s).
+-  // Then pull down everything shallower than rax_argslot.
+-  // The stacked return address gets pulled down with everything else.
+-  // That is, copy [rsp, argslot) downward by -size words.  In pseudo-code:
+-  //   rsp -= size;
+-  //   for (rdx = rsp + size; rdx < argslot; rdx++)
+-  //     rdx[-size] = rdx[0]
+-  //   argslot -= size;
+-  BLOCK_COMMENT("insert_arg_slots {");
+-  __ mov(rdx_temp, rsp);                        // source pointer for copy
+-  __ lea(rsp, Address(rsp, arg_slots, Interpreter::stackElementScale()));
+-  {
+-    Label loop;
+-    __ BIND(loop);
+-    // pull one word down each time through the loop
+-    __ movptr(rbx_temp, Address(rdx_temp, 0));
+-    __ movptr(Address(rdx_temp, arg_slots, Interpreter::stackElementScale()), rbx_temp);
+-    __ addptr(rdx_temp, wordSize);
+-    __ cmpptr(rdx_temp, rax_argslot);
+-    __ jcc(Assembler::below, loop);
++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
++                                                    vmIntrinsics::ID iid,
++                                                    Register receiver_reg,
++                                                    Register member_reg,
++                                                    bool for_compiler_entry) {
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  Register rbx_method = rbx;   // eventual target of this invocation
++  // temps used in this code are not used in *either* compiled or interpreted calling sequences
++#ifdef _LP64
++  Register temp1 = rscratch1;
++  Register temp2 = rscratch2;
++  Register temp3 = rax;
++  if (for_compiler_entry) {
++    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment");
++    assert_different_registers(temp1,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++    assert_different_registers(temp2,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++    assert_different_registers(temp3,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
+   }
+-
+-  // Now move the argslot down, to point to the opened-up space.
+-  __ lea(rax_argslot, Address(rax_argslot, arg_slots, Interpreter::stackElementScale()));
+-  BLOCK_COMMENT("} insert_arg_slots");
+-}
+-
+-// Helper to remove argument slots from the stack.
+-// arg_slots must be a multiple of stack_move_unit() and > 0
+-void MethodHandles::remove_arg_slots(MacroAssembler* _masm,
+-                                     RegisterOrConstant arg_slots,
+-                                     Register rax_argslot,
+-                                     Register rbx_temp, Register rdx_temp) {
+-  // allow constant zero
+-  if (arg_slots.is_constant() && arg_slots.as_constant() == 0)
+-    return;
+-  assert_different_registers(rax_argslot, rbx_temp, rdx_temp,
+-                             (!arg_slots.is_register() ? rsp : arg_slots.as_register()));
+-  if (VerifyMethodHandles)
+-    verify_argslots(_masm, arg_slots, rax_argslot, false,
+-                    "deleted argument(s) must fall within current frame");
+-  if (VerifyMethodHandles)
+-    verify_stack_move(_masm, arg_slots, +1);
+-
+-  BLOCK_COMMENT("remove_arg_slots {");
+-  // Pull up everything shallower than rax_argslot.
+-  // Then remove the excess space on the stack.
+-  // The stacked return address gets pulled up with everything else.
+-  // That is, copy [rsp, argslot) upward by size words.  In pseudo-code:
+-  //   for (rdx = argslot-1; rdx >= rsp; --rdx)
+-  //     rdx[size] = rdx[0]
+-  //   argslot += size;
+-  //   rsp += size;
+-  __ lea(rdx_temp, Address(rax_argslot, -wordSize)); // source pointer for copy
+-  {
+-    Label loop;
+-    __ BIND(loop);
+-    // pull one word up each time through the loop
+-    __ movptr(rbx_temp, Address(rdx_temp, 0));
+-    __ movptr(Address(rdx_temp, arg_slots, Interpreter::stackElementScale()), rbx_temp);
+-    __ addptr(rdx_temp, -wordSize);
+-    __ cmpptr(rdx_temp, rsp);
+-    __ jcc(Assembler::aboveEqual, loop);
+-  }
+-
+-  // Now move the argslot up, to point to the just-copied block.
+-  __ lea(rsp, Address(rsp, arg_slots, Interpreter::stackElementScale()));
+-  // And adjust the argslot address to point at the deletion point.
+-  __ lea(rax_argslot, Address(rax_argslot, arg_slots, Interpreter::stackElementScale()));
+-  BLOCK_COMMENT("} remove_arg_slots");
+-}
+-
+-// Helper to copy argument slots to the top of the stack.
+-// The sequence starts with rax_argslot and is counted by slot_count
+-// slot_count must be a multiple of stack_move_unit() and >= 0
+-// This function blows the temps but does not change rax_argslot.
+-void MethodHandles::push_arg_slots(MacroAssembler* _masm,
+-                                   Register rax_argslot,
+-                                   RegisterOrConstant slot_count,
+-                                   int skip_words_count,
+-                                   Register rbx_temp, Register rdx_temp) {
+-  assert_different_registers(rax_argslot, rbx_temp, rdx_temp,
+-                             (!slot_count.is_register() ? rbp : slot_count.as_register()),
+-                             rsp);
+-  assert(Interpreter::stackElementSize == wordSize, "else change this code");
+-
+-  if (VerifyMethodHandles)
+-    verify_stack_move(_masm, slot_count, 0);
+-
+-  // allow constant zero
+-  if (slot_count.is_constant() && slot_count.as_constant() == 0)
+-    return;
+-
+-  BLOCK_COMMENT("push_arg_slots {");
+-
+-  Register rbx_top = rbx_temp;
+-
+-  // There is at most 1 word to carry down with the TOS.
+-  switch (skip_words_count) {
+-  case 1: __ pop(rdx_temp); break;
+-  case 0:                   break;
+-  default: ShouldNotReachHere();
+-  }
+-
+-  if (slot_count.is_constant()) {
+-    for (int i = slot_count.as_constant() - 1; i >= 0; i--) {
+-      __ pushptr(Address(rax_argslot, i * wordSize));
+-    }
+-  } else {
+-    Label L_plural, L_loop, L_break;
+-    // Emit code to dynamically check for the common cases, zero and one slot.
+-    __ cmpl(slot_count.as_register(), (int32_t) 1);
+-    __ jccb(Assembler::greater, L_plural);
+-    __ jccb(Assembler::less, L_break);
+-    __ pushptr(Address(rax_argslot, 0));
+-    __ jmpb(L_break);
+-    __ BIND(L_plural);
+-
+-    // Loop for 2 or more:
+-    //   rbx = &rax[slot_count]
+-    //   while (rbx > rax)  *(--rsp) = *(--rbx)
+-    __ lea(rbx_top, Address(rax_argslot, slot_count, Address::times_ptr));
+-    __ BIND(L_loop);
+-    __ subptr(rbx_top, wordSize);
+-    __ pushptr(Address(rbx_top, 0));
+-    __ cmpptr(rbx_top, rax_argslot);
+-    __ jcc(Assembler::above, L_loop);
+-    __ bind(L_break);
+-  }
+-  switch (skip_words_count) {
+-  case 1: __ push(rdx_temp); break;
+-  case 0:                    break;
+-  default: ShouldNotReachHere();
+-  }
+-  BLOCK_COMMENT("} push_arg_slots");
+-}
+-
+-// in-place movement; no change to rsp
+-// blows rax_temp, rdx_temp
+-void MethodHandles::move_arg_slots_up(MacroAssembler* _masm,
+-                                      Register rbx_bottom,  // invariant
+-                                      Address  top_addr,     // can use rax_temp
+-                                      RegisterOrConstant positive_distance_in_slots,
+-                                      Register rax_temp, Register rdx_temp) {
+-  BLOCK_COMMENT("move_arg_slots_up {");
+-  assert_different_registers(rbx_bottom,
+-                             rax_temp, rdx_temp,
+-                             positive_distance_in_slots.register_or_noreg());
+-  Label L_loop, L_break;
+-  Register rax_top = rax_temp;
+-  if (!top_addr.is_same_address(Address(rax_top, 0)))
+-    __ lea(rax_top, top_addr);
+-  // Detect empty (or broken) loop:
+-#ifdef ASSERT
+-  if (VerifyMethodHandles) {
+-    // Verify that &bottom < &top (non-empty interval)
+-    Label L_ok, L_bad;
+-    if (positive_distance_in_slots.is_register()) {
+-      __ cmpptr(positive_distance_in_slots.as_register(), (int32_t) 0);
+-      __ jcc(Assembler::lessEqual, L_bad);
+-    }
+-    __ cmpptr(rbx_bottom, rax_top);
+-    __ jcc(Assembler::below, L_ok);
+-    __ bind(L_bad);
+-    __ stop("valid bounds (copy up)");
+-    __ BIND(L_ok);
++#else
++  Register temp1 = (for_compiler_entry ? rsi : rdx);
++  Register temp2 = rdi;
++  Register temp3 = rax;
++  if (for_compiler_entry) {
++    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : rcx), "only valid assignment");
++    assert_different_registers(temp1,        rcx, rdx);
++    assert_different_registers(temp2,        rcx, rdx);
++    assert_different_registers(temp3,        rcx, rdx);
+   }
+ #endif
+-  __ cmpptr(rbx_bottom, rax_top);
+-  __ jccb(Assembler::aboveEqual, L_break);
+-  // work rax down to rbx, copying contiguous data upwards
+-  // In pseudo-code:
+-  //   [rbx, rax) = &[bottom, top)
+-  //   while (--rax >= rbx) *(rax + distance) = *(rax + 0), rax--;
+-  __ BIND(L_loop);
+-  __ subptr(rax_top, wordSize);
+-  __ movptr(rdx_temp, Address(rax_top, 0));
+-  __ movptr(          Address(rax_top, positive_distance_in_slots, Address::times_ptr), rdx_temp);
+-  __ cmpptr(rax_top, rbx_bottom);
+-  __ jcc(Assembler::above, L_loop);
+-  assert(Interpreter::stackElementSize == wordSize, "else change loop");
+-  __ bind(L_break);
+-  BLOCK_COMMENT("} move_arg_slots_up");
+-}
++  assert_different_registers(temp1, temp2, temp3, receiver_reg);
++  assert_different_registers(temp1, temp2, temp3, member_reg);
++  if (!for_compiler_entry)
++    assert_different_registers(temp1, temp2, temp3, saved_last_sp_register());  // don't trash lastSP
+ 
+-// in-place movement; no change to rsp
+-// blows rax_temp, rdx_temp
+-void MethodHandles::move_arg_slots_down(MacroAssembler* _masm,
+-                                        Address  bottom_addr,  // can use rax_temp
+-                                        Register rbx_top,      // invariant
+-                                        RegisterOrConstant negative_distance_in_slots,
+-                                        Register rax_temp, Register rdx_temp) {
+-  BLOCK_COMMENT("move_arg_slots_down {");
+-  assert_different_registers(rbx_top,
+-                             negative_distance_in_slots.register_or_noreg(),
+-                             rax_temp, rdx_temp);
+-  Label L_loop, L_break;
+-  Register rax_bottom = rax_temp;
+-  if (!bottom_addr.is_same_address(Address(rax_bottom, 0)))
+-    __ lea(rax_bottom, bottom_addr);
+-  // Detect empty (or broken) loop:
+-#ifdef ASSERT
+-  assert(!negative_distance_in_slots.is_constant() || negative_distance_in_slots.as_constant() < 0, "");
+-  if (VerifyMethodHandles) {
+-    // Verify that &bottom < &top (non-empty interval)
+-    Label L_ok, L_bad;
+-    if (negative_distance_in_slots.is_register()) {
+-      __ cmpptr(negative_distance_in_slots.as_register(), (int32_t) 0);
+-      __ jcc(Assembler::greaterEqual, L_bad);
++  if (iid == vmIntrinsics::_invokeBasic) {
++    // indirect through MH.form.vmentry.vmtarget
++    jump_to_lambda_form(_masm, receiver_reg, rbx_method, temp1, for_compiler_entry);
++
++  } else {
++    // The method is a member invoker used by direct method handles.
++    if (VerifyMethodHandles) {
++      // make sure the trailing argument really is a MemberName (caller responsibility)
++      verify_klass(_masm, member_reg, SystemDictionaryHandles::MemberName_klass(),
++                   "MemberName required for invokeVirtual etc.");
+     }
+-    __ cmpptr(rax_bottom, rbx_top);
+-    __ jcc(Assembler::below, L_ok);
+-    __ bind(L_bad);
+-    __ stop("valid bounds (copy down)");
+-    __ BIND(L_ok);
++
++    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()));
++    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()));
++    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()));
++
++    Register temp1_recv_klass = temp1;
++    if (iid != vmIntrinsics::_linkToStatic) {
++      __ verify_oop(receiver_reg);
++      if (iid == vmIntrinsics::_linkToSpecial) {
++        // Don't actually load the klass; just null-check the receiver.
++        __ null_check(receiver_reg);
++      } else {
++        // load receiver klass itself
++        __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_oop(temp1_recv_klass);
++      }
++      BLOCK_COMMENT("check_receiver {");
++      // The receiver for the MemberName must be in receiver_reg.
++      // Check the receiver against the MemberName.clazz
++      if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
++        // Did not load it above...
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_oop(temp1_recv_klass);
++      }
++      if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
++        Label L_ok;
++        Register temp2_defc = temp2;
++        __ load_heap_oop(temp2_defc, member_clazz);
++        load_klass_from_Class(_masm, temp2_defc);
++        __ verify_oop(temp2_defc);
++        __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok);
++        // If we get here, the type check failed!
++        __ STOP("receiver class disagrees with MemberName.clazz");
++        __ bind(L_ok);
++      }
++      BLOCK_COMMENT("} check_receiver");
++    }
++    if (iid == vmIntrinsics::_linkToSpecial ||
++        iid == vmIntrinsics::_linkToStatic) {
++      DEBUG_ONLY(temp1_recv_klass = noreg);  // these guys didn't load the recv_klass
++    }
++
++    // Live registers at this point:
++    //  member_reg - MemberName that was the trailing argument
++    //  temp1_recv_klass - klass of stacked receiver, if needed
++    //  rsi/r13 - interpreter linkage (if interpreted)
++    //  rcx, rdx, rsi, rdi, r8, r8 - compiler arguments (if compiled)
++
++    bool method_is_live = false;
++    switch (iid) {
++    case vmIntrinsics::_linkToSpecial:
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
++      }
++      __ load_heap_oop(rbx_method, member_vmtarget);
++      method_is_live = true;
++      break;
++
++    case vmIntrinsics::_linkToStatic:
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
++      }
++      __ load_heap_oop(rbx_method, member_vmtarget);
++      method_is_live = true;
++      break;
++
++    case vmIntrinsics::_linkToVirtual:
++    {
++      // same as TemplateTable::invokevirtual,
++      // minus the CP setup and profiling:
++
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
++      }
++
++      // pick out the vtable index from the MemberName, and then we can discard it:
++      Register temp2_index = temp2;
++      __ movptr(temp2_index, member_vmindex);
++
++      if (VerifyMethodHandles) {
++        Label L_index_ok;
++        __ cmpl(temp2_index, 0);
++        __ jcc(Assembler::greaterEqual, L_index_ok);
++        __ STOP("no virtual index");
++        __ BIND(L_index_ok);
++      }
++
++      // Note:  The verifier invariants allow us to ignore MemberName.clazz and vmtarget
++      // at this point.  And VerifyMethodHandles has already checked clazz, if needed.
++
++      // get target methodOop & entry point
++      __ lookup_virtual_method(temp1_recv_klass, temp2_index, rbx_method);
++      method_is_live = true;
++      break;
++    }
++
++    case vmIntrinsics::_linkToInterface:
++    {
++      // same as TemplateTable::invokeinterface
++      // (minus the CP setup and profiling, with different argument motion)
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
++      }
++
++      Register temp3_intf = temp3;
++      __ load_heap_oop(temp3_intf, member_clazz);
++      load_klass_from_Class(_masm, temp3_intf);
++      __ verify_oop(temp3_intf);
++
++      Register rbx_index = rbx_method;
++      __ movptr(rbx_index, member_vmindex);
++      if (VerifyMethodHandles) {
++        Label L;
++        __ cmpl(rbx_index, 0);
++        __ jcc(Assembler::greaterEqual, L);
++        __ STOP("invalid vtable index for MH.invokeInterface");
++        __ bind(L);
++      }
++
++      // given intf, index, and recv klass, dispatch to the implementation method
++      Label L_no_such_interface;
++      __ lookup_interface_method(temp1_recv_klass, temp3_intf,
++                                 // note: next two args must be the same:
++                                 rbx_index, rbx_method,
++                                 temp2,
++                                 L_no_such_interface);
++
++      __ verify_oop(rbx_method);
++      jump_from_method_handle(_masm, rbx_method, temp2, for_compiler_entry);
++      __ hlt();
++
++      __ bind(L_no_such_interface);
++      __ jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
++      break;
++    }
++
++    default:
++      fatal(err_msg("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)));
++      break;
++    }
++
++    if (method_is_live) {
++      // live at this point:  rbx_method, rsi/r13 (if interpreted)
++
++      // After figuring out which concrete method to call, jump into it.
++      // Note that this works in the interpreter with no data motion.
++      // But the compiled version will require that rcx_recv be shifted out.
++      __ verify_oop(rbx_method);
++      jump_from_method_handle(_masm, rbx_method, temp1, for_compiler_entry);
++    }
+   }
+-#endif
+-  __ cmpptr(rax_bottom, rbx_top);
+-  __ jccb(Assembler::aboveEqual, L_break);
+-  // work rax up to rbx, copying contiguous data downwards
+-  // In pseudo-code:
+-  //   [rax, rbx) = &[bottom, top)
+-  //   while (rax < rbx) *(rax - distance) = *(rax + 0), rax++;
+-  __ BIND(L_loop);
+-  __ movptr(rdx_temp, Address(rax_bottom, 0));
+-  __ movptr(          Address(rax_bottom, negative_distance_in_slots, Address::times_ptr), rdx_temp);
+-  __ addptr(rax_bottom, wordSize);
+-  __ cmpptr(rax_bottom, rbx_top);
+-  __ jcc(Assembler::below, L_loop);
+-  assert(Interpreter::stackElementSize == wordSize, "else change loop");
+-  __ bind(L_break);
+-  BLOCK_COMMENT("} move_arg_slots_down");
+-}
+-
+-// Copy from a field or array element to a stacked argument slot.
+-// is_element (ignored) says whether caller is loading an array element instead of an instance field.
+-void MethodHandles::move_typed_arg(MacroAssembler* _masm,
+-                                   BasicType type, bool is_element,
+-                                   Address slot_dest, Address value_src,
+-                                   Register rbx_temp, Register rdx_temp) {
+-  BLOCK_COMMENT(!is_element ? "move_typed_arg {" : "move_typed_arg { (array element)");
+-  if (type == T_OBJECT || type == T_ARRAY) {
+-    __ load_heap_oop(rbx_temp, value_src);
+-    __ movptr(slot_dest, rbx_temp);
+-  } else if (type != T_VOID) {
+-    int  arg_size      = type2aelembytes(type);
+-    bool arg_is_signed = is_signed_subword_type(type);
+-    int  slot_size     = (arg_size > wordSize) ? arg_size : wordSize;
+-    __ load_sized_value(  rdx_temp,  value_src, arg_size, arg_is_signed, rbx_temp);
+-    __ store_sized_value( slot_dest, rdx_temp,  slot_size,               rbx_temp);
+-  }
+-  BLOCK_COMMENT("} move_typed_arg");
+-}
+-
+-void MethodHandles::move_return_value(MacroAssembler* _masm, BasicType type,
+-                                      Address return_slot) {
+-  BLOCK_COMMENT("move_return_value {");
+-  // Old versions of the JVM must clean the FPU stack after every return.
+-#ifndef _LP64
+-#ifdef COMPILER2
+-  // The FPU stack is clean if UseSSE >= 2 but must be cleaned in other cases
+-  if ((type == T_FLOAT && UseSSE < 1) || (type == T_DOUBLE && UseSSE < 2)) {
+-    for (int i = 1; i < 8; i++) {
+-        __ ffree(i);
+-    }
+-  } else if (UseSSE < 2) {
+-    __ empty_FPU_stack();
+-  }
+-#endif //COMPILER2
+-#endif //!_LP64
+-
+-  // Look at the type and pull the value out of the corresponding register.
+-  if (type == T_VOID) {
+-    // nothing to do
+-  } else if (type == T_OBJECT) {
+-    __ movptr(return_slot, rax);
+-  } else if (type == T_INT || is_subword_type(type)) {
+-    // write the whole word, even if only 32 bits is significant
+-    __ movptr(return_slot, rax);
+-  } else if (type == T_LONG) {
+-    // store the value by parts
+-    // Note: We assume longs are continguous (if misaligned) on the interpreter stack.
+-    __ store_sized_value(return_slot, rax, BytesPerLong, rdx);
+-  } else if (NOT_LP64((type == T_FLOAT  && UseSSE < 1) ||
+-                      (type == T_DOUBLE && UseSSE < 2) ||)
+-             false) {
+-    // Use old x86 FPU registers:
+-    if (type == T_FLOAT)
+-      __ fstp_s(return_slot);
+-    else
+-      __ fstp_d(return_slot);
+-  } else if (type == T_FLOAT) {
+-    __ movflt(return_slot, xmm0);
+-  } else if (type == T_DOUBLE) {
+-    __ movdbl(return_slot, xmm0);
+-  } else {
+-    ShouldNotReachHere();
+-  }
+-  BLOCK_COMMENT("} move_return_value");
+ }
+ 
+ #ifndef PRODUCT
+-#define DESCRIBE_RICOCHET_OFFSET(rf, name) \
+-  values.describe(frame_no, (intptr_t *) (((uintptr_t)rf) + MethodHandles::RicochetFrame::name##_offset_in_bytes()), #name)
+-
+-void MethodHandles::RicochetFrame::describe(const frame* fr, FrameValues& values, int frame_no)  {
+-    address bp = (address) fr->fp();
+-    RicochetFrame* rf = (RicochetFrame*)(bp - sender_link_offset_in_bytes());
+-
+-    // ricochet slots
+-    DESCRIBE_RICOCHET_OFFSET(rf, exact_sender_sp);
+-    DESCRIBE_RICOCHET_OFFSET(rf, conversion);
+-    DESCRIBE_RICOCHET_OFFSET(rf, saved_args_base);
+-    DESCRIBE_RICOCHET_OFFSET(rf, saved_args_layout);
+-    DESCRIBE_RICOCHET_OFFSET(rf, saved_target);
+-    DESCRIBE_RICOCHET_OFFSET(rf, continuation);
+-
+-    // relevant ricochet targets (in caller frame)
+-    values.describe(-1, rf->saved_args_base(),  err_msg("*saved_args_base for #%d", frame_no));
+-}
+-#endif // ASSERT
+-
+-#ifndef PRODUCT
+-extern "C" void print_method_handle(oop mh);
+ void trace_method_handle_stub(const char* adaptername,
+                               oop mh,
+                               intptr_t* saved_regs,
+                               intptr_t* entry_sp) {
+   // called as a leaf from native code: do not block the JVM!
+-  bool has_mh = (strstr(adaptername, "return/") == NULL);  // return adapters don't have rcx_mh
++  bool has_mh = (strstr(adaptername, "/static") == NULL &&
++                 strstr(adaptername, "linkTo") == NULL);    // static linkers don't have MH
+   const char* mh_reg_name = has_mh ? "rcx_mh" : "rcx";
+-  tty->print_cr("MH %s %s="PTR_FORMAT" sp="PTR_FORMAT, adaptername, mh_reg_name, mh, entry_sp);
++  tty->print_cr("MH %s %s="PTR_FORMAT" sp="PTR_FORMAT,
++                adaptername, mh_reg_name,
++                mh, entry_sp);
+ 
+   if (Verbose) {
+     tty->print_cr("Registers:");
+@@ -1086,12 +575,18 @@
+         values.describe(-1, dump_fp, "fp for #1 <not parsed, cannot trust pc>");
+         values.describe(-1, dump_sp, "sp for #1");
+       }
++      values.describe(-1, entry_sp, "raw top of stack");
+ 
+       tty->print_cr("Stack layout:");
+       values.print(p);
+     }
+-    if (has_mh)
+-      print_method_handle(mh);
++    if (has_mh && mh->is_oop()) {
++      mh->print();
++      if (java_lang_invoke_MethodHandle::is_instance(mh)) {
++        if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0)
++          java_lang_invoke_MethodHandle::form(mh)->print();
++      }
++    }
+   }
+ }
+ 
+@@ -1159,1363 +654,3 @@
+ }
+ #endif //PRODUCT
+ 
+-// which conversion op types are implemented here?
+-int MethodHandles::adapter_conversion_ops_supported_mask() {
+-  return ((1<<java_lang_invoke_AdapterMethodHandle::OP_RETYPE_ONLY)
+-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_RETYPE_RAW)
+-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_CHECK_CAST)
+-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_PRIM)
+-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_REF_TO_PRIM)
+-          //OP_PRIM_TO_REF is below...
+-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_SWAP_ARGS)
+-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_ROT_ARGS)
+-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_DUP_ARGS)
+-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_DROP_ARGS)
+-          //OP_COLLECT_ARGS is below...
+-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_SPREAD_ARGS)
+-         |(
+-           java_lang_invoke_MethodTypeForm::vmlayout_offset_in_bytes() <= 0 ? 0 :
+-           ((1<<java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_REF)
+-           |(1<<java_lang_invoke_AdapterMethodHandle::OP_COLLECT_ARGS)
+-           |(1<<java_lang_invoke_AdapterMethodHandle::OP_FOLD_ARGS)
+-            ))
+-         );
+-}
+-
+-//------------------------------------------------------------------------------
+-// MethodHandles::generate_method_handle_stub
+-//
+-// Generate an "entry" field for a method handle.
+-// This determines how the method handle will respond to calls.
+-void MethodHandles::generate_method_handle_stub(MacroAssembler* _masm, MethodHandles::EntryKind ek) {
+-  MethodHandles::EntryKind ek_orig = ek_original_kind(ek);
+-
+-  // Here is the register state during an interpreted call,
+-  // as set up by generate_method_handle_interpreter_entry():
+-  // - rbx: garbage temp (was MethodHandle.invoke methodOop, unused)
+-  // - rcx: receiver method handle
+-  // - rax: method handle type (only used by the check_mtype entry point)
+-  // - rsi/r13: sender SP (must preserve; see prepare_to_jump_from_interpreted)
+-  // - rdx: garbage temp, can blow away
+-
+-  const Register rcx_recv    = rcx;
+-  const Register rax_argslot = rax;
+-  const Register rbx_temp    = rbx;
+-  const Register rdx_temp    = rdx;
+-  const Register rdi_temp    = rdi;
+-
+-  // This guy is set up by prepare_to_jump_from_interpreted (from interpreted calls)
+-  // and gen_c2i_adapter (from compiled calls):
+-  const Register saved_last_sp = saved_last_sp_register();
+-
+-  // Argument registers for _raise_exception.
+-  // 32-bit: Pass first two oop/int args in registers ECX and EDX.
+-  const Register rarg0_code     = LP64_ONLY(j_rarg0) NOT_LP64(rcx);
+-  const Register rarg1_actual   = LP64_ONLY(j_rarg1) NOT_LP64(rdx);
+-  const Register rarg2_required = LP64_ONLY(j_rarg2) NOT_LP64(rdi);
+-  assert_different_registers(rarg0_code, rarg1_actual, rarg2_required, saved_last_sp);
+-
+-  guarantee(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes() != 0, "must have offsets");
+-
+-  // some handy addresses
+-  Address rcx_mh_vmtarget(    rcx_recv, java_lang_invoke_MethodHandle::vmtarget_offset_in_bytes() );
+-  Address rcx_dmh_vmindex(    rcx_recv, java_lang_invoke_DirectMethodHandle::vmindex_offset_in_bytes() );
+-
+-  Address rcx_bmh_vmargslot(  rcx_recv, java_lang_invoke_BoundMethodHandle::vmargslot_offset_in_bytes() );
+-  Address rcx_bmh_argument(   rcx_recv, java_lang_invoke_BoundMethodHandle::argument_offset_in_bytes() );
+-
+-  Address rcx_amh_vmargslot(  rcx_recv, java_lang_invoke_AdapterMethodHandle::vmargslot_offset_in_bytes() );
+-  Address rcx_amh_argument(   rcx_recv, java_lang_invoke_AdapterMethodHandle::argument_offset_in_bytes() );
+-  Address rcx_amh_conversion( rcx_recv, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes() );
+-  Address vmarg;                // __ argument_address(vmargslot)
+-
+-  const int java_mirror_offset = in_bytes(Klass::java_mirror_offset());
+-
+-  if (have_entry(ek)) {
+-    __ nop();                   // empty stubs make SG sick
+-    return;
+-  }
+-
+-#ifdef ASSERT
+-  __ push((int32_t) 0xEEEEEEEE);
+-  __ push((int32_t) (intptr_t) entry_name(ek));
+-  LP64_ONLY(__ push((int32_t) high((intptr_t) entry_name(ek))));
+-  __ push((int32_t) 0x33333333);
+-#endif //ASSERT
+-
+-  address interp_entry = __ pc();
+-
+-  trace_method_handle(_masm, entry_name(ek));
+-
+-  BLOCK_COMMENT(err_msg("Entry %s {", entry_name(ek)));
+-
+-  switch ((int) ek) {
+-  case _raise_exception:
+-    {
+-      // Not a real MH entry, but rather shared code for raising an
+-      // exception.  Since we use the compiled entry, arguments are
+-      // expected in compiler argument registers.
+-      assert(raise_exception_method(), "must be set");
+-      assert(raise_exception_method()->from_compiled_entry(), "method must be linked");
+-
+-      const Register rax_pc = rax;
+-      __ pop(rax_pc);  // caller PC
+-      __ mov(rsp, saved_last_sp);  // cut the stack back to where the caller started
+-
+-      Register rbx_method = rbx_temp;
+-      __ movptr(rbx_method, ExternalAddress((address) &_raise_exception_method));
+-
+-      const int jobject_oop_offset = 0;
+-      __ movptr(rbx_method, Address(rbx_method, jobject_oop_offset));  // dereference the jobject
+-
+-      __ movptr(saved_last_sp, rsp);
+-      __ subptr(rsp, 3 * wordSize);
+-      __ push(rax_pc);         // restore caller PC
+-
+-      __ movl  (__ argument_address(constant(2)), rarg0_code);
+-      __ movptr(__ argument_address(constant(1)), rarg1_actual);
+-      __ movptr(__ argument_address(constant(0)), rarg2_required);
+-      jump_from_method_handle(_masm, rbx_method, rax);
+-    }
+-    break;
+-
+-  case _invokestatic_mh:
+-  case _invokespecial_mh:
+-    {
+-      Register rbx_method = rbx_temp;
+-      __ load_heap_oop(rbx_method, rcx_mh_vmtarget); // target is a methodOop
+-      __ verify_oop(rbx_method);
+-      // same as TemplateTable::invokestatic or invokespecial,
+-      // minus the CP setup and profiling:
+-      if (ek == _invokespecial_mh) {
+-        // Must load & check the first argument before entering the target method.
+-        __ load_method_handle_vmslots(rax_argslot, rcx_recv, rdx_temp);
+-        __ movptr(rcx_recv, __ argument_address(rax_argslot, -1));
+-        __ null_check(rcx_recv);
+-        __ verify_oop(rcx_recv);
+-      }
+-      jump_from_method_handle(_masm, rbx_method, rax);
+-    }
+-    break;
+-
+-  case _invokevirtual_mh:
+-    {
+-      // same as TemplateTable::invokevirtual,
+-      // minus the CP setup and profiling:
+-
+-      // pick out the vtable index and receiver offset from the MH,
+-      // and then we can discard it:
+-      __ load_method_handle_vmslots(rax_argslot, rcx_recv, rdx_temp);
+-      Register rbx_index = rbx_temp;
+-      __ movl(rbx_index, rcx_dmh_vmindex);
+-      // Note:  The verifier allows us to ignore rcx_mh_vmtarget.
+-      __ movptr(rcx_recv, __ argument_address(rax_argslot, -1));
+-      __ null_check(rcx_recv, oopDesc::klass_offset_in_bytes());
+-
+-      // get receiver klass
+-      Register rax_klass = rax_argslot;
+-      __ load_klass(rax_klass, rcx_recv);
+-      __ verify_oop(rax_klass);
+-
+-      // get target methodOop & entry point
+-      const int base = instanceKlass::vtable_start_offset() * wordSize;
+-      assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
+-      Address vtable_entry_addr(rax_klass,
+-                                rbx_index, Address::times_ptr,
+-                                base + vtableEntry::method_offset_in_bytes());
+-      Register rbx_method = rbx_temp;
+-      __ movptr(rbx_method, vtable_entry_addr);
+-
+-      __ verify_oop(rbx_method);
+-      jump_from_method_handle(_masm, rbx_method, rax);
+-    }
+-    break;
+-
+-  case _invokeinterface_mh:
+-    {
+-      // same as TemplateTable::invokeinterface,
+-      // minus the CP setup and profiling:
+-
+-      // pick out the interface and itable index from the MH.
+-      __ load_method_handle_vmslots(rax_argslot, rcx_recv, rdx_temp);
+-      Register rdx_intf  = rdx_temp;
+-      Register rbx_index = rbx_temp;
+-      __ load_heap_oop(rdx_intf, rcx_mh_vmtarget);
+-      __ movl(rbx_index, rcx_dmh_vmindex);
+-      __ movptr(rcx_recv, __ argument_address(rax_argslot, -1));
+-      __ null_check(rcx_recv, oopDesc::klass_offset_in_bytes());
+-
+-      // get receiver klass
+-      Register rax_klass = rax_argslot;
+-      __ load_klass(rax_klass, rcx_recv);
+-      __ verify_oop(rax_klass);
+-
+-      Register rbx_method = rbx_index;
+-
+-      // get interface klass
+-      Label no_such_interface;
+-      __ verify_oop(rdx_intf);
+-      __ lookup_interface_method(rax_klass, rdx_intf,
+-                                 // note: next two args must be the same:
+-                                 rbx_index, rbx_method,
+-                                 rdi_temp,
+-                                 no_such_interface);
+-
+-      __ verify_oop(rbx_method);
+-      jump_from_method_handle(_masm, rbx_method, rax);
+-      __ hlt();
+-
+-      __ bind(no_such_interface);
+-      // Throw an exception.
+-      // For historical reasons, it will be IncompatibleClassChangeError.
+-      __ mov(rbx_temp, rcx_recv);  // rarg2_required might be RCX
+-      assert_different_registers(rarg2_required, rbx_temp);
+-      __ movptr(rarg2_required, Address(rdx_intf, java_mirror_offset));  // required interface
+-      __ mov(   rarg1_actual,   rbx_temp);                               // bad receiver
+-      __ movl(  rarg0_code,     (int) Bytecodes::_invokeinterface);      // who is complaining?
+-      __ jump(ExternalAddress(from_interpreted_entry(_raise_exception)));
+-    }
+-    break;
+-
+-  case _bound_ref_mh:
+-  case _bound_int_mh:
+-  case _bound_long_mh:
+-  case _bound_ref_direct_mh:
+-  case _bound_int_direct_mh:
+-  case _bound_long_direct_mh:
+-    {
+-      const bool direct_to_method = (ek >= _bound_ref_direct_mh);
+-      BasicType arg_type  = ek_bound_mh_arg_type(ek);
+-      int       arg_slots = type2size[arg_type];
+-
+-      // make room for the new argument:
+-      __ movl(rax_argslot, rcx_bmh_vmargslot);
+-      __ lea(rax_argslot, __ argument_address(rax_argslot));
+-
+-      insert_arg_slots(_masm, arg_slots * stack_move_unit(), rax_argslot, rbx_temp, rdx_temp);
+-
+-      // store bound argument into the new stack slot:
+-      __ load_heap_oop(rbx_temp, rcx_bmh_argument);
+-      if (arg_type == T_OBJECT) {
+-        __ movptr(Address(rax_argslot, 0), rbx_temp);
+-      } else {
+-        Address prim_value_addr(rbx_temp, java_lang_boxing_object::value_offset_in_bytes(arg_type));
+-        move_typed_arg(_masm, arg_type, false,
+-                       Address(rax_argslot, 0),
+-                       prim_value_addr,
+-                       rbx_temp, rdx_temp);
+-      }
+-
+-      if (direct_to_method) {
+-        Register rbx_method = rbx_temp;
+-        __ load_heap_oop(rbx_method, rcx_mh_vmtarget);
+-        __ verify_oop(rbx_method);
+-        jump_from_method_handle(_masm, rbx_method, rax);
+-      } else {
+-        __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
+-        __ verify_oop(rcx_recv);
+-        __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
+-      }
+-    }
+-    break;
+-
+-  case _adapter_opt_profiling:
+-    if (java_lang_invoke_CountingMethodHandle::vmcount_offset_in_bytes() != 0) {
+-      Address rcx_mh_vmcount(rcx_recv, java_lang_invoke_CountingMethodHandle::vmcount_offset_in_bytes());
+-      __ incrementl(rcx_mh_vmcount);
+-    }
+-    // fall through
+-
+-  case _adapter_retype_only:
+-  case _adapter_retype_raw:
+-    // immediately jump to the next MH layer:
+-    __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
+-    __ verify_oop(rcx_recv);
+-    __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
+-    // This is OK when all parameter types widen.
+-    // It is also OK when a return type narrows.
+-    break;
+-
+-  case _adapter_check_cast:
+-    {
+-      // temps:
+-      Register rbx_klass = rbx_temp; // interesting AMH data
+-
+-      // check a reference argument before jumping to the next layer of MH:
+-      __ movl(rax_argslot, rcx_amh_vmargslot);
+-      vmarg = __ argument_address(rax_argslot);
+-
+-      // What class are we casting to?
+-      __ load_heap_oop(rbx_klass, rcx_amh_argument); // this is a Class object!
+-      load_klass_from_Class(_masm, rbx_klass);
+-
+-      Label done;
+-      __ movptr(rdx_temp, vmarg);
+-      __ testptr(rdx_temp, rdx_temp);
+-      __ jcc(Assembler::zero, done);         // no cast if null
+-      __ load_klass(rdx_temp, rdx_temp);
+-
+-      // live at this point:
+-      // - rbx_klass:  klass required by the target method
+-      // - rdx_temp:   argument klass to test
+-      // - rcx_recv:   adapter method handle
+-      __ check_klass_subtype(rdx_temp, rbx_klass, rax_argslot, done);
+-
+-      // If we get here, the type check failed!
+-      // Call the wrong_method_type stub, passing the failing argument type in rax.
+-      Register rax_mtype = rax_argslot;
+-      __ movl(rax_argslot, rcx_amh_vmargslot);  // reload argslot field
+-      __ movptr(rdx_temp, vmarg);
+-
+-      assert_different_registers(rarg2_required, rdx_temp);
+-      __ load_heap_oop(rarg2_required, rcx_amh_argument);             // required class
+-      __ mov(          rarg1_actual,   rdx_temp);                     // bad object
+-      __ movl(         rarg0_code,     (int) Bytecodes::_checkcast);  // who is complaining?
+-      __ jump(ExternalAddress(from_interpreted_entry(_raise_exception)));
+-
+-      __ bind(done);
+-      // get the new MH:
+-      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
+-      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
+-    }
+-    break;
+-
+-  case _adapter_prim_to_prim:
+-  case _adapter_ref_to_prim:
+-  case _adapter_prim_to_ref:
+-    // handled completely by optimized cases
+-    __ stop("init_AdapterMethodHandle should not issue this");
+-    break;
+-
+-  case _adapter_opt_i2i:        // optimized subcase of adapt_prim_to_prim
+-//case _adapter_opt_f2i:        // optimized subcase of adapt_prim_to_prim
+-  case _adapter_opt_l2i:        // optimized subcase of adapt_prim_to_prim
+-  case _adapter_opt_unboxi:     // optimized subcase of adapt_ref_to_prim
+-    {
+-      // perform an in-place conversion to int or an int subword
+-      __ movl(rax_argslot, rcx_amh_vmargslot);
+-      vmarg = __ argument_address(rax_argslot);
+-
+-      switch (ek) {
+-      case _adapter_opt_i2i:
+-        __ movl(rdx_temp, vmarg);
+-        break;
+-      case _adapter_opt_l2i:
+-        {
+-          // just delete the extra slot; on a little-endian machine we keep the first
+-          __ lea(rax_argslot, __ argument_address(rax_argslot, 1));
+-          remove_arg_slots(_masm, -stack_move_unit(),
+-                           rax_argslot, rbx_temp, rdx_temp);
+-          vmarg = Address(rax_argslot, -Interpreter::stackElementSize);
+-          __ movl(rdx_temp, vmarg);
+-        }
+-        break;
+-      case _adapter_opt_unboxi:
+-        {
+-          // Load the value up from the heap.
+-          __ movptr(rdx_temp, vmarg);
+-          int value_offset = java_lang_boxing_object::value_offset_in_bytes(T_INT);
+-#ifdef ASSERT
+-          for (int bt = T_BOOLEAN; bt < T_INT; bt++) {
+-            if (is_subword_type(BasicType(bt)))
+-              assert(value_offset == java_lang_boxing_object::value_offset_in_bytes(BasicType(bt)), "");
+-          }
+-#endif
+-          __ null_check(rdx_temp, value_offset);
+-          __ movl(rdx_temp, Address(rdx_temp, value_offset));
+-          // We load this as a word.  Because we are little-endian,
+-          // the low bits will be correct, but the high bits may need cleaning.
+-          // The vminfo will guide us to clean those bits.
+-        }
+-        break;
+-      default:
+-        ShouldNotReachHere();
+-      }
+-
+-      // Do the requested conversion and store the value.
+-      Register rbx_vminfo = rbx_temp;
+-      load_conversion_vminfo(_masm, rbx_vminfo, rcx_amh_conversion);
+-
+-      // get the new MH:
+-      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
+-      // (now we are done with the old MH)
+-
+-      // original 32-bit vmdata word must be of this form:
+-      //    | MBZ:6 | signBitCount:8 | srcDstTypes:8 | conversionOp:8 |
+-      __ xchgptr(rcx, rbx_vminfo);                // free rcx for shifts
+-      __ shll(rdx_temp /*, rcx*/);
+-      Label zero_extend, done;
+-      __ testl(rcx, CONV_VMINFO_SIGN_FLAG);
+-      __ jccb(Assembler::zero, zero_extend);
+-
+-      // this path is taken for int->byte, int->short
+-      __ sarl(rdx_temp /*, rcx*/);
+-      __ jmpb(done);
+-
+-      __ bind(zero_extend);
+-      // this is taken for int->char
+-      __ shrl(rdx_temp /*, rcx*/);
+-
+-      __ bind(done);
+-      __ movl(vmarg, rdx_temp);  // Store the value.
+-      __ xchgptr(rcx, rbx_vminfo);                // restore rcx_recv
+-
+-      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
+-    }
+-    break;
+-
+-  case _adapter_opt_i2l:        // optimized subcase of adapt_prim_to_prim
+-  case _adapter_opt_unboxl:     // optimized subcase of adapt_ref_to_prim
+-    {
+-      // perform an in-place int-to-long or ref-to-long conversion
+-      __ movl(rax_argslot, rcx_amh_vmargslot);
+-
+-      // on a little-endian machine we keep the first slot and add another after
+-      __ lea(rax_argslot, __ argument_address(rax_argslot, 1));
+-      insert_arg_slots(_masm, stack_move_unit(),
+-                       rax_argslot, rbx_temp, rdx_temp);
+-      Address vmarg1(rax_argslot, -Interpreter::stackElementSize);
+-      Address vmarg2 = vmarg1.plus_disp(Interpreter::stackElementSize);
+-
+-      switch (ek) {
+-      case _adapter_opt_i2l:
+-        {
+-#ifdef _LP64
+-          __ movslq(rdx_temp, vmarg1);  // Load sign-extended
+-          __ movq(vmarg1, rdx_temp);    // Store into first slot
+-#else
+-          __ movl(rdx_temp, vmarg1);
+-          __ sarl(rdx_temp, BitsPerInt - 1);  // __ extend_sign()
+-          __ movl(vmarg2, rdx_temp); // store second word
+-#endif
+-        }
+-        break;
+-      case _adapter_opt_unboxl:
+-        {
+-          // Load the value up from the heap.
+-          __ movptr(rdx_temp, vmarg1);
+-          int value_offset = java_lang_boxing_object::value_offset_in_bytes(T_LONG);
+-          assert(value_offset == java_lang_boxing_object::value_offset_in_bytes(T_DOUBLE), "");
+-          __ null_check(rdx_temp, value_offset);
+-#ifdef _LP64
+-          __ movq(rbx_temp, Address(rdx_temp, value_offset));
+-          __ movq(vmarg1, rbx_temp);
+-#else
+-          __ movl(rbx_temp, Address(rdx_temp, value_offset + 0*BytesPerInt));
+-          __ movl(rdx_temp, Address(rdx_temp, value_offset + 1*BytesPerInt));
+-          __ movl(vmarg1, rbx_temp);
+-          __ movl(vmarg2, rdx_temp);
+-#endif
+-        }
+-        break;
+-      default:
+-        ShouldNotReachHere();
+-      }
+-
+-      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
+-      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
+-    }
+-    break;
+-
+-  case _adapter_opt_f2d:        // optimized subcase of adapt_prim_to_prim
+-  case _adapter_opt_d2f:        // optimized subcase of adapt_prim_to_prim
+-    {
+-      // perform an in-place floating primitive conversion
+-      __ movl(rax_argslot, rcx_amh_vmargslot);
+-      __ lea(rax_argslot, __ argument_address(rax_argslot, 1));
+-      if (ek == _adapter_opt_f2d) {
+-        insert_arg_slots(_masm, stack_move_unit(),
+-                         rax_argslot, rbx_temp, rdx_temp);
+-      }
+-      Address vmarg(rax_argslot, -Interpreter::stackElementSize);
+-
+-#ifdef _LP64
+-      if (ek == _adapter_opt_f2d) {
+-        __ movflt(xmm0, vmarg);
+-        __ cvtss2sd(xmm0, xmm0);
+-        __ movdbl(vmarg, xmm0);
+-      } else {
+-        __ movdbl(xmm0, vmarg);
+-        __ cvtsd2ss(xmm0, xmm0);
+-        __ movflt(vmarg, xmm0);
+-      }
+-#else //_LP64
+-      if (ek == _adapter_opt_f2d) {
+-        __ fld_s(vmarg);        // load float to ST0
+-        __ fstp_d(vmarg);       // store double
+-      } else {
+-        __ fld_d(vmarg);        // load double to ST0
+-        __ fstp_s(vmarg);       // store single
+-      }
+-#endif //_LP64
+-
+-      if (ek == _adapter_opt_d2f) {
+-        remove_arg_slots(_masm, -stack_move_unit(),
+-                         rax_argslot, rbx_temp, rdx_temp);
+-      }
+-
+-      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
+-      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
+-    }
+-    break;
+-
+-  case _adapter_swap_args:
+-  case _adapter_rot_args:
+-    // handled completely by optimized cases
+-    __ stop("init_AdapterMethodHandle should not issue this");
+-    break;
+-
+-  case _adapter_opt_swap_1:
+-  case _adapter_opt_swap_2:
+-  case _adapter_opt_rot_1_up:
+-  case _adapter_opt_rot_1_down:
+-  case _adapter_opt_rot_2_up:
+-  case _adapter_opt_rot_2_down:
+-    {
+-      int swap_slots = ek_adapter_opt_swap_slots(ek);
+-      int rotate     = ek_adapter_opt_swap_mode(ek);
+-
+-      // 'argslot' is the position of the first argument to swap
+-      __ movl(rax_argslot, rcx_amh_vmargslot);
+-      __ lea(rax_argslot, __ argument_address(rax_argslot));
+-
+-      // 'vminfo' is the second
+-      Register rbx_destslot = rbx_temp;
+-      load_conversion_vminfo(_masm, rbx_destslot, rcx_amh_conversion);
+-      __ lea(rbx_destslot, __ argument_address(rbx_destslot));
+-      if (VerifyMethodHandles)
+-        verify_argslot(_masm, rbx_destslot, "swap point must fall within current frame");
+-
+-      assert(Interpreter::stackElementSize == wordSize, "else rethink use of wordSize here");
+-      if (!rotate) {
+-        // simple swap
+-        for (int i = 0; i < swap_slots; i++) {
+-          __ movptr(rdi_temp, Address(rax_argslot,  i * wordSize));
+-          __ movptr(rdx_temp, Address(rbx_destslot, i * wordSize));
+-          __ movptr(Address(rax_argslot,  i * wordSize), rdx_temp);
+-          __ movptr(Address(rbx_destslot, i * wordSize), rdi_temp);
+-        }
+-      } else {
+-        // A rotate is actually pair of moves, with an "odd slot" (or pair)
+-        // changing place with a series of other slots.
+-        // First, push the "odd slot", which is going to get overwritten
+-        for (int i = swap_slots - 1; i >= 0; i--) {
+-          // handle one with rdi_temp instead of a push:
+-          if (i == 0)  __ movptr(rdi_temp, Address(rax_argslot, i * wordSize));
+-          else         __ pushptr(         Address(rax_argslot, i * wordSize));
+-        }
+-        if (rotate > 0) {
+-          // Here is rotate > 0:
+-          // (low mem)                                          (high mem)
+-          //     | dest:     more_slots...     | arg: odd_slot :arg+1 |
+-          // =>
+-          //     | dest: odd_slot | dest+1: more_slots...      :arg+1 |
+-          // work argslot down to destslot, copying contiguous data upwards
+-          // pseudo-code:
+-          //   rax = src_addr - swap_bytes
+-          //   rbx = dest_addr
+-          //   while (rax >= rbx) *(rax + swap_bytes) = *(rax + 0), rax--;
+-          move_arg_slots_up(_masm,
+-                            rbx_destslot,
+-                            Address(rax_argslot, 0),
+-                            swap_slots,
+-                            rax_argslot, rdx_temp);
+-        } else {
+-          // Here is the other direction, rotate < 0:
+-          // (low mem)                                          (high mem)
+-          //     | arg: odd_slot | arg+1: more_slots...       :dest+1 |
+-          // =>
+-          //     | arg:    more_slots...     | dest: odd_slot :dest+1 |
+-          // work argslot up to destslot, copying contiguous data downwards
+-          // pseudo-code:
+-          //   rax = src_addr + swap_bytes
+-          //   rbx = dest_addr
+-          //   while (rax <= rbx) *(rax - swap_bytes) = *(rax + 0), rax++;
+-          // dest_slot denotes an exclusive upper limit
+-          int limit_bias = OP_ROT_ARGS_DOWN_LIMIT_BIAS;
+-          if (limit_bias != 0)
+-            __ addptr(rbx_destslot, - limit_bias * wordSize);
+-          move_arg_slots_down(_masm,
+-                              Address(rax_argslot, swap_slots * wordSize),
+-                              rbx_destslot,
+-                              -swap_slots,
+-                              rax_argslot, rdx_temp);
+-          __ subptr(rbx_destslot, swap_slots * wordSize);
+-        }
+-        // pop the original first chunk into the destination slot, now free
+-        for (int i = 0; i < swap_slots; i++) {
+-          if (i == 0)  __ movptr(Address(rbx_destslot, i * wordSize), rdi_temp);
+-          else         __ popptr(Address(rbx_destslot, i * wordSize));
+-        }
+-      }
+-
+-      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
+-      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
+-    }
+-    break;
+-
+-  case _adapter_dup_args:
+-    {
+-      // 'argslot' is the position of the first argument to duplicate
+-      __ movl(rax_argslot, rcx_amh_vmargslot);
+-      __ lea(rax_argslot, __ argument_address(rax_argslot));
+-
+-      // 'stack_move' is negative number of words to duplicate
+-      Register rdi_stack_move = rdi_temp;
+-      load_stack_move(_masm, rdi_stack_move, rcx_recv, true);
+-
+-      if (VerifyMethodHandles) {
+-        verify_argslots(_masm, rdi_stack_move, rax_argslot, true,
+-                        "copied argument(s) must fall within current frame");
+-      }
+-
+-      // insert location is always the bottom of the argument list:
+-      Address insert_location = __ argument_address(constant(0));
+-      int pre_arg_words = insert_location.disp() / wordSize;   // return PC is pushed
+-      assert(insert_location.base() == rsp, "");
+-
+-      __ negl(rdi_stack_move);
+-      push_arg_slots(_masm, rax_argslot, rdi_stack_move,
+-                     pre_arg_words, rbx_temp, rdx_temp);
+-
+-      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
+-      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
+-    }
+-    break;
+-
+-  case _adapter_drop_args:
+-    {
+-      // 'argslot' is the position of the first argument to nuke
+-      __ movl(rax_argslot, rcx_amh_vmargslot);
+-      __ lea(rax_argslot, __ argument_address(rax_argslot));
+-
+-      // (must do previous push after argslot address is taken)
+-
+-      // 'stack_move' is number of words to drop
+-      Register rdi_stack_move = rdi_temp;
+-      load_stack_move(_masm, rdi_stack_move, rcx_recv, false);
+-      remove_arg_slots(_masm, rdi_stack_move,
+-                       rax_argslot, rbx_temp, rdx_temp);
+-
+-      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
+-      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
+-    }
+-    break;
+-
+-  case _adapter_collect_args:
+-  case _adapter_fold_args:
+-  case _adapter_spread_args:
+-    // handled completely by optimized cases
+-    __ stop("init_AdapterMethodHandle should not issue this");
+-    break;
+-
+-  case _adapter_opt_collect_ref:
+-  case _adapter_opt_collect_int:
+-  case _adapter_opt_collect_long:
+-  case _adapter_opt_collect_float:
+-  case _adapter_opt_collect_double:
+-  case _adapter_opt_collect_void:
+-  case _adapter_opt_collect_0_ref:
+-  case _adapter_opt_collect_1_ref:
+-  case _adapter_opt_collect_2_ref:
+-  case _adapter_opt_collect_3_ref:
+-  case _adapter_opt_collect_4_ref:
+-  case _adapter_opt_collect_5_ref:
+-  case _adapter_opt_filter_S0_ref:
+-  case _adapter_opt_filter_S1_ref:
+-  case _adapter_opt_filter_S2_ref:
+-  case _adapter_opt_filter_S3_ref:
+-  case _adapter_opt_filter_S4_ref:
+-  case _adapter_opt_filter_S5_ref:
+-  case _adapter_opt_collect_2_S0_ref:
+-  case _adapter_opt_collect_2_S1_ref:
+-  case _adapter_opt_collect_2_S2_ref:
+-  case _adapter_opt_collect_2_S3_ref:
+-  case _adapter_opt_collect_2_S4_ref:
+-  case _adapter_opt_collect_2_S5_ref:
+-  case _adapter_opt_fold_ref:
+-  case _adapter_opt_fold_int:
+-  case _adapter_opt_fold_long:
+-  case _adapter_opt_fold_float:
+-  case _adapter_opt_fold_double:
+-  case _adapter_opt_fold_void:
+-  case _adapter_opt_fold_1_ref:
+-  case _adapter_opt_fold_2_ref:
+-  case _adapter_opt_fold_3_ref:
+-  case _adapter_opt_fold_4_ref:
+-  case _adapter_opt_fold_5_ref:
+-    {
+-      // Given a fresh incoming stack frame, build a new ricochet frame.
+-      // On entry, TOS points at a return PC, and RBP is the callers frame ptr.
+-      // RSI/R13 has the caller's exact stack pointer, which we must also preserve.
+-      // RCX contains an AdapterMethodHandle of the indicated kind.
+-
+-      // Relevant AMH fields:
+-      // amh.vmargslot:
+-      //   points to the trailing edge of the arguments
+-      //   to filter, collect, or fold.  For a boxing operation,
+-      //   it points just after the single primitive value.
+-      // amh.argument:
+-      //   recursively called MH, on |collect| arguments
+-      // amh.vmtarget:
+-      //   final destination MH, on return value, etc.
+-      // amh.conversion.dest:
+-      //   tells what is the type of the return value
+-      //   (not needed here, since dest is also derived from ek)
+-      // amh.conversion.vminfo:
+-      //   points to the trailing edge of the return value
+-      //   when the vmtarget is to be called; this is
+-      //   equal to vmargslot + (retained ? |collect| : 0)
+-
+-      // Pass 0 or more argument slots to the recursive target.
+-      int collect_count_constant = ek_adapter_opt_collect_count(ek);
+-
+-      // The collected arguments are copied from the saved argument list:
+-      int collect_slot_constant = ek_adapter_opt_collect_slot(ek);
+-
+-      assert(ek_orig == _adapter_collect_args ||
+-             ek_orig == _adapter_fold_args, "");
+-      bool retain_original_args = (ek_orig == _adapter_fold_args);
+-
+-      // The return value is replaced (or inserted) at the 'vminfo' argslot.
+-      // Sometimes we can compute this statically.
+-      int dest_slot_constant = -1;
+-      if (!retain_original_args)
+-        dest_slot_constant = collect_slot_constant;
+-      else if (collect_slot_constant >= 0 && collect_count_constant >= 0)
+-        // We are preserving all the arguments, and the return value is prepended,
+-        // so the return slot is to the left (above) the |collect| sequence.
+-        dest_slot_constant = collect_slot_constant + collect_count_constant;
+-
+-      // Replace all those slots by the result of the recursive call.
+-      // The result type can be one of ref, int, long, float, double, void.
+-      // In the case of void, nothing is pushed on the stack after return.
+-      BasicType dest = ek_adapter_opt_collect_type(ek);
+-      assert(dest == type2wfield[dest], "dest is a stack slot type");
+-      int dest_count = type2size[dest];
+-      assert(dest_count == 1 || dest_count == 2 || (dest_count == 0 && dest == T_VOID), "dest has a size");
+-
+-      // Choose a return continuation.
+-      EntryKind ek_ret = _adapter_opt_return_any;
+-      if (dest != T_CONFLICT && OptimizeMethodHandles) {
+-        switch (dest) {
+-        case T_INT    : ek_ret = _adapter_opt_return_int;     break;
+-        case T_LONG   : ek_ret = _adapter_opt_return_long;    break;
+-        case T_FLOAT  : ek_ret = _adapter_opt_return_float;   break;
+-        case T_DOUBLE : ek_ret = _adapter_opt_return_double;  break;
+-        case T_OBJECT : ek_ret = _adapter_opt_return_ref;     break;
+-        case T_VOID   : ek_ret = _adapter_opt_return_void;    break;
+-        default       : ShouldNotReachHere();
+-        }
+-        if (dest == T_OBJECT && dest_slot_constant >= 0) {
+-          EntryKind ek_try = EntryKind(_adapter_opt_return_S0_ref + dest_slot_constant);
+-          if (ek_try <= _adapter_opt_return_LAST &&
+-              ek_adapter_opt_return_slot(ek_try) == dest_slot_constant) {
+-            ek_ret = ek_try;
+-          }
+-        }
+-        assert(ek_adapter_opt_return_type(ek_ret) == dest, "");
+-      }
+-
+-      // Already pushed:  ... keep1 | collect | keep2 | sender_pc |
+-      // push(sender_pc);
+-
+-      // Compute argument base:
+-      Register rax_argv = rax_argslot;
+-      __ lea(rax_argv, __ argument_address(constant(0)));
+-
+-      // Push a few extra argument words, if we need them to store the return value.
+-      {
+-        int extra_slots = 0;
+-        if (retain_original_args) {
+-          extra_slots = dest_count;
+-        } else if (collect_count_constant == -1) {
+-          extra_slots = dest_count;  // collect_count might be zero; be generous
+-        } else if (dest_count > collect_count_constant) {
+-          extra_slots = (dest_count - collect_count_constant);
+-        } else {
+-          // else we know we have enough dead space in |collect| to repurpose for return values
+-        }
+-        DEBUG_ONLY(extra_slots += 1);
+-        if (extra_slots > 0) {
+-          __ pop(rbx_temp);   // return value
+-          __ subptr(rsp, (extra_slots * Interpreter::stackElementSize));
+-          // Push guard word #2 in debug mode.
+-          DEBUG_ONLY(__ movptr(Address(rsp, 0), (int32_t) RicochetFrame::MAGIC_NUMBER_2));
+-          __ push(rbx_temp);
+-        }
+-      }
+-
+-      RicochetFrame::enter_ricochet_frame(_masm, rcx_recv, rax_argv,
+-                                          entry(ek_ret)->from_interpreted_entry(), rbx_temp);
+-
+-      // Now pushed:  ... keep1 | collect | keep2 | RF |
+-      // some handy frame slots:
+-      Address exact_sender_sp_addr = RicochetFrame::frame_address(RicochetFrame::exact_sender_sp_offset_in_bytes());
+-      Address conversion_addr      = RicochetFrame::frame_address(RicochetFrame::conversion_offset_in_bytes());
+-      Address saved_args_base_addr = RicochetFrame::frame_address(RicochetFrame::saved_args_base_offset_in_bytes());
+-
+-#ifdef ASSERT
+-      if (VerifyMethodHandles && dest != T_CONFLICT) {
+-        BLOCK_COMMENT("verify AMH.conv.dest");
+-        load_conversion_dest_type(_masm, rbx_temp, conversion_addr);
+-        Label L_dest_ok;
+-        __ cmpl(rbx_temp, (int) dest);
+-        __ jcc(Assembler::equal, L_dest_ok);
+-        if (dest == T_INT) {
+-          for (int bt = T_BOOLEAN; bt < T_INT; bt++) {
+-            if (is_subword_type(BasicType(bt))) {
+-              __ cmpl(rbx_temp, (int) bt);
+-              __ jcc(Assembler::equal, L_dest_ok);
+-            }
+-          }
+-        }
+-        __ stop("bad dest in AMH.conv");
+-        __ BIND(L_dest_ok);
+-      }
+-#endif //ASSERT
+-
+-      // Find out where the original copy of the recursive argument sequence begins.
+-      Register rax_coll = rax_argv;
+-      {
+-        RegisterOrConstant collect_slot = collect_slot_constant;
+-        if (collect_slot_constant == -1) {
+-          __ movl(rdi_temp, rcx_amh_vmargslot);
+-          collect_slot = rdi_temp;
+-        }
+-        if (collect_slot_constant != 0)
+-          __ lea(rax_coll, Address(rax_argv, collect_slot, Interpreter::stackElementScale()));
+-        // rax_coll now points at the trailing edge of |collect| and leading edge of |keep2|
+-      }
+-
+-      // Replace the old AMH with the recursive MH.  (No going back now.)
+-      // In the case of a boxing call, the recursive call is to a 'boxer' method,
+-      // such as Integer.valueOf or Long.valueOf.  In the case of a filter
+-      // or collect call, it will take one or more arguments, transform them,
+-      // and return some result, to store back into argument_base[vminfo].
+-      __ load_heap_oop(rcx_recv, rcx_amh_argument);
+-      if (VerifyMethodHandles)  verify_method_handle(_masm, rcx_recv);
+-
+-      // Push a space for the recursively called MH first:
+-      __ push((int32_t)NULL_WORD);
+-
+-      // Calculate |collect|, the number of arguments we are collecting.
+-      Register rdi_collect_count = rdi_temp;
+-      RegisterOrConstant collect_count;
+-      if (collect_count_constant >= 0) {
+-        collect_count = collect_count_constant;
+-      } else {
+-        __ load_method_handle_vmslots(rdi_collect_count, rcx_recv, rdx_temp);
+-        collect_count = rdi_collect_count;
+-      }
+-#ifdef ASSERT
+-      if (VerifyMethodHandles && collect_count_constant >= 0) {
+-        __ load_method_handle_vmslots(rbx_temp, rcx_recv, rdx_temp);
+-        Label L_count_ok;
+-        __ cmpl(rbx_temp, collect_count_constant);
+-        __ jcc(Assembler::equal, L_count_ok);
+-        __ stop("bad vminfo in AMH.conv");
+-        __ BIND(L_count_ok);
+-      }
+-#endif //ASSERT
+-
+-      // copy |collect| slots directly to TOS:
+-      push_arg_slots(_masm, rax_coll, collect_count, 0, rbx_temp, rdx_temp);
+-      // Now pushed:  ... keep1 | collect | keep2 | RF... | collect |
+-      // rax_coll still points at the trailing edge of |collect| and leading edge of |keep2|
+-
+-      // If necessary, adjust the saved arguments to make room for the eventual return value.
+-      // Normal adjustment:  ... keep1 | +dest+ | -collect- | keep2 | RF... | collect |
+-      // If retaining args:  ... keep1 | +dest+ |  collect  | keep2 | RF... | collect |
+-      // In the non-retaining case, this might move keep2 either up or down.
+-      // We don't have to copy the whole | RF... collect | complex,
+-      // but we must adjust RF.saved_args_base.
+-      // Also, from now on, we will forget about the original copy of |collect|.
+-      // If we are retaining it, we will treat it as part of |keep2|.
+-      // For clarity we will define |keep3| = |collect|keep2| or |keep2|.
+-
+-      BLOCK_COMMENT("adjust trailing arguments {");
+-      // Compare the sizes of |+dest+| and |-collect-|, which are opposed opening and closing movements.
+-      int                open_count  = dest_count;
+-      RegisterOrConstant close_count = collect_count_constant;
+-      Register rdi_close_count = rdi_collect_count;
+-      if (retain_original_args) {
+-        close_count = constant(0);
+-      } else if (collect_count_constant == -1) {
+-        close_count = rdi_collect_count;
+-      }
+-
+-      // How many slots need moving?  This is simply dest_slot (0 => no |keep3|).
+-      RegisterOrConstant keep3_count;
+-      Register rsi_keep3_count = rsi;  // can repair from RF.exact_sender_sp
+-      if (dest_slot_constant >= 0) {
+-        keep3_count = dest_slot_constant;
+-      } else  {
+-        load_conversion_vminfo(_masm, rsi_keep3_count, conversion_addr);
+-        keep3_count = rsi_keep3_count;
+-      }
+-#ifdef ASSERT
+-      if (VerifyMethodHandles && dest_slot_constant >= 0) {
+-        load_conversion_vminfo(_masm, rbx_temp, conversion_addr);
+-        Label L_vminfo_ok;
+-        __ cmpl(rbx_temp, dest_slot_constant);
+-        __ jcc(Assembler::equal, L_vminfo_ok);
+-        __ stop("bad vminfo in AMH.conv");
+-        __ BIND(L_vminfo_ok);
+-      }
+-#endif //ASSERT
+-
+-      // tasks remaining:
+-      bool move_keep3 = (!keep3_count.is_constant() || keep3_count.as_constant() != 0);
+-      bool stomp_dest = (NOT_DEBUG(dest == T_OBJECT) DEBUG_ONLY(dest_count != 0));
+-      bool fix_arg_base = (!close_count.is_constant() || open_count != close_count.as_constant());
+-
+-      if (stomp_dest | fix_arg_base) {
+-        // we will probably need an updated rax_argv value
+-        if (collect_slot_constant >= 0) {
+-          // rax_coll already holds the leading edge of |keep2|, so tweak it
+-          assert(rax_coll == rax_argv, "elided a move");
+-          if (collect_slot_constant != 0)
+-            __ subptr(rax_argv, collect_slot_constant * Interpreter::stackElementSize);
+-        } else {
+-          // Just reload from RF.saved_args_base.
+-          __ movptr(rax_argv, saved_args_base_addr);
+-        }
+-      }
+-
+-      // Old and new argument locations (based at slot 0).
+-      // Net shift (&new_argv - &old_argv) is (close_count - open_count).
+-      bool zero_open_count = (open_count == 0);  // remember this bit of info
+-      if (move_keep3 && fix_arg_base) {
+-        // It will be easier to have everything in one register:
+-        if (close_count.is_register()) {
+-          // Deduct open_count from close_count register to get a clean +/- value.
+-          __ subptr(close_count.as_register(), open_count);
+-        } else {
+-          close_count = close_count.as_constant() - open_count;
+-        }
+-        open_count = 0;
+-      }
+-      Address old_argv(rax_argv, 0);
+-      Address new_argv(rax_argv, close_count,  Interpreter::stackElementScale(),
+-                                - open_count * Interpreter::stackElementSize);
+-
+-      // First decide if any actual data are to be moved.
+-      // We can skip if (a) |keep3| is empty, or (b) the argument list size didn't change.
+-      // (As it happens, all movements involve an argument list size change.)
+-
+-      // If there are variable parameters, use dynamic checks to skip around the whole mess.
+-      Label L_done;
+-      if (!keep3_count.is_constant()) {
+-        __ testl(keep3_count.as_register(), keep3_count.as_register());
+-        __ jcc(Assembler::zero, L_done);
+-      }
+-      if (!close_count.is_constant()) {
+-        __ cmpl(close_count.as_register(), open_count);
+-        __ jcc(Assembler::equal, L_done);
+-      }
+-
+-      if (move_keep3 && fix_arg_base) {
+-        bool emit_move_down = false, emit_move_up = false, emit_guard = false;
+-        if (!close_count.is_constant()) {
+-          emit_move_down = emit_guard = !zero_open_count;
+-          emit_move_up   = true;
+-        } else if (open_count != close_count.as_constant()) {
+-          emit_move_down = (open_count > close_count.as_constant());
+-          emit_move_up   = !emit_move_down;
+-        }
+-        Label L_move_up;
+-        if (emit_guard) {
+-          __ cmpl(close_count.as_register(), open_count);
+-          __ jcc(Assembler::greater, L_move_up);
+-        }
+-
+-        if (emit_move_down) {
+-          // Move arguments down if |+dest+| > |-collect-|
+-          // (This is rare, except when arguments are retained.)
+-          // This opens space for the return value.
+-          if (keep3_count.is_constant()) {
+-            for (int i = 0; i < keep3_count.as_constant(); i++) {
+-              __ movptr(rdx_temp, old_argv.plus_disp(i * Interpreter::stackElementSize));
+-              __ movptr(          new_argv.plus_disp(i * Interpreter::stackElementSize), rdx_temp);
+-            }
+-          } else {
+-            Register rbx_argv_top = rbx_temp;
+-            __ lea(rbx_argv_top, old_argv.plus_disp(keep3_count, Interpreter::stackElementScale()));
+-            move_arg_slots_down(_masm,
+-                                old_argv,     // beginning of old argv
+-                                rbx_argv_top, // end of old argv
+-                                close_count,  // distance to move down (must be negative)
+-                                rax_argv, rdx_temp);
+-            // Used argv as an iteration variable; reload from RF.saved_args_base.
+-            __ movptr(rax_argv, saved_args_base_addr);
+-          }
+-        }
+-
+-        if (emit_guard) {
+-          __ jmp(L_done);  // assumes emit_move_up is true also
+-          __ BIND(L_move_up);
+-        }
+-
+-        if (emit_move_up) {
+-
+-          // Move arguments up if |+dest+| < |-collect-|
+-          // (This is usual, except when |keep3| is empty.)
+-          // This closes up the space occupied by the now-deleted collect values.
+-          if (keep3_count.is_constant()) {
+-            for (int i = keep3_count.as_constant() - 1; i >= 0; i--) {
+-              __ movptr(rdx_temp, old_argv.plus_disp(i * Interpreter::stackElementSize));
+-              __ movptr(          new_argv.plus_disp(i * Interpreter::stackElementSize), rdx_temp);
+-            }
+-          } else {
+-            Address argv_top = old_argv.plus_disp(keep3_count, Interpreter::stackElementScale());
+-            move_arg_slots_up(_masm,
+-                              rax_argv,     // beginning of old argv
+-                              argv_top,     // end of old argv
+-                              close_count,  // distance to move up (must be positive)
+-                              rbx_temp, rdx_temp);
+-          }
+-        }
+-      }
+-      __ BIND(L_done);
+-
+-      if (fix_arg_base) {
+-        // adjust RF.saved_args_base by adding (close_count - open_count)
+-        if (!new_argv.is_same_address(Address(rax_argv, 0)))
+-          __ lea(rax_argv, new_argv);
+-        __ movptr(saved_args_base_addr, rax_argv);
+-      }
+-
+-      if (stomp_dest) {
+-        // Stomp the return slot, so it doesn't hold garbage.
+-        // This isn't strictly necessary, but it may help detect bugs.
+-        int forty_two = RicochetFrame::RETURN_VALUE_PLACEHOLDER;
+-        __ movptr(Address(rax_argv, keep3_count, Address::times_ptr),
+-                  (int32_t) forty_two);
+-        // uses rsi_keep3_count
+-      }
+-      BLOCK_COMMENT("} adjust trailing arguments");
+-
+-      BLOCK_COMMENT("do_recursive_call");
+-      __ mov(saved_last_sp, rsp);    // set rsi/r13 for callee
+-      __ pushptr(ExternalAddress(SharedRuntime::ricochet_blob()->bounce_addr()).addr());
+-      // The globally unique bounce address has two purposes:
+-      // 1. It helps the JVM recognize this frame (frame::is_ricochet_frame).
+-      // 2. When returned to, it cuts back the stack and redirects control flow
+-      //    to the return handler.
+-      // The return handler will further cut back the stack when it takes
+-      // down the RF.  Perhaps there is a way to streamline this further.
+-
+-      // State during recursive call:
+-      // ... keep1 | dest | dest=42 | keep3 | RF... | collect | bounce_pc |
+-      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
+-
+-      break;
+-    }
+-
+-  case _adapter_opt_return_ref:
+-  case _adapter_opt_return_int:
+-  case _adapter_opt_return_long:
+-  case _adapter_opt_return_float:
+-  case _adapter_opt_return_double:
+-  case _adapter_opt_return_void:
+-  case _adapter_opt_return_S0_ref:
+-  case _adapter_opt_return_S1_ref:
+-  case _adapter_opt_return_S2_ref:
+-  case _adapter_opt_return_S3_ref:
+-  case _adapter_opt_return_S4_ref:
+-  case _adapter_opt_return_S5_ref:
+-    {
+-      BasicType dest_type_constant = ek_adapter_opt_return_type(ek);
+-      int       dest_slot_constant = ek_adapter_opt_return_slot(ek);
+-
+-      if (VerifyMethodHandles)  RicochetFrame::verify_clean(_masm);
+-
+-      if (dest_slot_constant == -1) {
+-        // The current stub is a general handler for this dest_type.
+-        // It can be called from _adapter_opt_return_any below.
+-        // Stash the address in a little table.
+-        assert((dest_type_constant & CONV_TYPE_MASK) == dest_type_constant, "oob");
+-        address return_handler = __ pc();
+-        _adapter_return_handlers[dest_type_constant] = return_handler;
+-        if (dest_type_constant == T_INT) {
+-          // do the subword types too
+-          for (int bt = T_BOOLEAN; bt < T_INT; bt++) {
+-            if (is_subword_type(BasicType(bt)) &&
+-                _adapter_return_handlers[bt] == NULL) {
+-              _adapter_return_handlers[bt] = return_handler;
+-            }
+-          }
+-        }
+-      }
+-
+-      Register rbx_arg_base = rbx_temp;
+-      assert_different_registers(rax, rdx,  // possibly live return value registers
+-                                 rdi_temp, rbx_arg_base);
+-
+-      Address conversion_addr      = RicochetFrame::frame_address(RicochetFrame::conversion_offset_in_bytes());
+-      Address saved_args_base_addr = RicochetFrame::frame_address(RicochetFrame::saved_args_base_offset_in_bytes());
+-
+-      __ movptr(rbx_arg_base, saved_args_base_addr);
+-      RegisterOrConstant dest_slot = dest_slot_constant;
+-      if (dest_slot_constant == -1) {
+-        load_conversion_vminfo(_masm, rdi_temp, conversion_addr);
+-        dest_slot = rdi_temp;
+-      }
+-      // Store the result back into the argslot.
+-      // This code uses the interpreter calling sequence, in which the return value
+-      // is usually left in the TOS register, as defined by InterpreterMacroAssembler::pop.
+-      // There are certain irregularities with floating point values, which can be seen
+-      // in TemplateInterpreterGenerator::generate_return_entry_for.
+-      move_return_value(_masm, dest_type_constant, Address(rbx_arg_base, dest_slot, Interpreter::stackElementScale()));
+-
+-      RicochetFrame::leave_ricochet_frame(_masm, rcx_recv, rbx_arg_base, rdx_temp);
+-      __ push(rdx_temp);  // repush the return PC
+-
+-      // Load the final target and go.
+-      if (VerifyMethodHandles)  verify_method_handle(_masm, rcx_recv);
+-      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
+-      __ hlt(); // --------------------
+-      break;
+-    }
+-
+-  case _adapter_opt_return_any:
+-    {
+-      if (VerifyMethodHandles)  RicochetFrame::verify_clean(_masm);
+-      Register rdi_conv = rdi_temp;
+-      assert_different_registers(rax, rdx,  // possibly live return value registers
+-                                 rdi_conv, rbx_temp);
+-
+-      Address conversion_addr = RicochetFrame::frame_address(RicochetFrame::conversion_offset_in_bytes());
+-      load_conversion_dest_type(_masm, rdi_conv, conversion_addr);
+-      __ lea(rbx_temp, ExternalAddress((address) &_adapter_return_handlers[0]));
+-      __ movptr(rbx_temp, Address(rbx_temp, rdi_conv, Address::times_ptr));
+-
+-#ifdef ASSERT
+-      { Label L_badconv;
+-        __ testptr(rbx_temp, rbx_temp);
+-        __ jccb(Assembler::zero, L_badconv);
+-        __ jmp(rbx_temp);
+-        __ bind(L_badconv);
+-        __ stop("bad method handle return");
+-      }
+-#else //ASSERT
+-      __ jmp(rbx_temp);
+-#endif //ASSERT
+-      break;
+-    }
+-
+-  case _adapter_opt_spread_0:
+-  case _adapter_opt_spread_1_ref:
+-  case _adapter_opt_spread_2_ref:
+-  case _adapter_opt_spread_3_ref:
+-  case _adapter_opt_spread_4_ref:
+-  case _adapter_opt_spread_5_ref:
+-  case _adapter_opt_spread_ref:
+-  case _adapter_opt_spread_byte:
+-  case _adapter_opt_spread_char:
+-  case _adapter_opt_spread_short:
+-  case _adapter_opt_spread_int:
+-  case _adapter_opt_spread_long:
+-  case _adapter_opt_spread_float:
+-  case _adapter_opt_spread_double:
+-    {
+-      // spread an array out into a group of arguments
+-      int length_constant = ek_adapter_opt_spread_count(ek);
+-      bool length_can_be_zero = (length_constant == 0);
+-      if (length_constant < 0) {
+-        // some adapters with variable length must handle the zero case
+-        if (!OptimizeMethodHandles ||
+-            ek_adapter_opt_spread_type(ek) != T_OBJECT)
+-          length_can_be_zero = true;
+-      }
+-
+-      // find the address of the array argument
+-      __ movl(rax_argslot, rcx_amh_vmargslot);
+-      __ lea(rax_argslot, __ argument_address(rax_argslot));
+-
+-      // grab another temp
+-      Register rsi_temp = rsi;
+-
+-      // arx_argslot points both to the array and to the first output arg
+-      vmarg = Address(rax_argslot, 0);
+-
+-      // Get the array value.
+-      Register  rdi_array       = rdi_temp;
+-      Register  rdx_array_klass = rdx_temp;
+-      BasicType elem_type = ek_adapter_opt_spread_type(ek);
+-      int       elem_slots = type2size[elem_type];  // 1 or 2
+-      int       array_slots = 1;  // array is always a T_OBJECT
+-      int       length_offset   = arrayOopDesc::length_offset_in_bytes();
+-      int       elem0_offset    = arrayOopDesc::base_offset_in_bytes(elem_type);
+-      __ movptr(rdi_array, vmarg);
+-
+-      Label L_array_is_empty, L_insert_arg_space, L_copy_args, L_args_done;
+-      if (length_can_be_zero) {
+-        // handle the null pointer case, if zero is allowed
+-        Label L_skip;
+-        if (length_constant < 0) {
+-          load_conversion_vminfo(_masm, rbx_temp, rcx_amh_conversion);
+-          __ testl(rbx_temp, rbx_temp);
+-          __ jcc(Assembler::notZero, L_skip);
+-        }
+-        __ testptr(rdi_array, rdi_array);
+-        __ jcc(Assembler::notZero, L_skip);
+-
+-        // If 'rsi' contains the 'saved_last_sp' (this is only the
+-        // case in a 32-bit version of the VM) we have to save 'rsi'
+-        // on the stack because later on (at 'L_array_is_empty') 'rsi'
+-        // will be overwritten.
+-        { if (rsi_temp == saved_last_sp)  __ push(saved_last_sp); }
+-        // Also prepare a handy macro which restores 'rsi' if required.
+-#define UNPUSH_RSI                                                      \
+-        { if (rsi_temp == saved_last_sp)  __ pop(saved_last_sp); }
+-
+-        __ jmp(L_array_is_empty);
+-        __ bind(L_skip);
+-      }
+-      __ null_check(rdi_array, oopDesc::klass_offset_in_bytes());
+-      __ load_klass(rdx_array_klass, rdi_array);
+-
+-      // Save 'rsi' if required (see comment above).  Do this only
+-      // after the null check such that the exception handler which is
+-      // called in the case of a null pointer exception will not be
+-      // confused by the extra value on the stack (it expects the
+-      // return pointer on top of the stack)
+-      { if (rsi_temp == saved_last_sp)  __ push(saved_last_sp); }
+-
+-      // Check the array type.
+-      Register rbx_klass = rbx_temp;
+-      __ load_heap_oop(rbx_klass, rcx_amh_argument); // this is a Class object!
+-      load_klass_from_Class(_masm, rbx_klass);
+-
+-      Label ok_array_klass, bad_array_klass, bad_array_length;
+-      __ check_klass_subtype(rdx_array_klass, rbx_klass, rsi_temp, ok_array_klass);
+-      // If we get here, the type check failed!
+-      __ jmp(bad_array_klass);
+-      __ BIND(ok_array_klass);
+-
+-      // Check length.
+-      if (length_constant >= 0) {
+-        __ cmpl(Address(rdi_array, length_offset), length_constant);
+-      } else {
+-        Register rbx_vminfo = rbx_temp;
+-        load_conversion_vminfo(_masm, rbx_vminfo, rcx_amh_conversion);
+-        __ cmpl(rbx_vminfo, Address(rdi_array, length_offset));
+-      }
+-      __ jcc(Assembler::notEqual, bad_array_length);
+-
+-      Register rdx_argslot_limit = rdx_temp;
+-
+-      // Array length checks out.  Now insert any required stack slots.
+-      if (length_constant == -1) {
+-        // Form a pointer to the end of the affected region.
+-        __ lea(rdx_argslot_limit, Address(rax_argslot, Interpreter::stackElementSize));
+-        // 'stack_move' is negative number of words to insert
+-        // This number already accounts for elem_slots.
+-        Register rsi_stack_move = rsi_temp;
+-        load_stack_move(_masm, rsi_stack_move, rcx_recv, true);
+-        __ cmpptr(rsi_stack_move, 0);
+-        assert(stack_move_unit() < 0, "else change this comparison");
+-        __ jcc(Assembler::less, L_insert_arg_space);
+-        __ jcc(Assembler::equal, L_copy_args);
+-        // single argument case, with no array movement
+-        __ BIND(L_array_is_empty);
+-        remove_arg_slots(_masm, -stack_move_unit() * array_slots,
+-                         rax_argslot, rbx_temp, rdx_temp);
+-        __ jmp(L_args_done);  // no spreading to do
+-        __ BIND(L_insert_arg_space);
+-        // come here in the usual case, stack_move < 0 (2 or more spread arguments)
+-        Register rdi_temp = rdi_array;  // spill this
+-        insert_arg_slots(_masm, rsi_stack_move,
+-                         rax_argslot, rbx_temp, rdi_temp);
+-        // reload the array since rsi was killed
+-        // reload from rdx_argslot_limit since rax_argslot is now decremented
+-        __ movptr(rdi_array, Address(rdx_argslot_limit, -Interpreter::stackElementSize));
+-      } else if (length_constant >= 1) {
+-        int new_slots = (length_constant * elem_slots) - array_slots;
+-        insert_arg_slots(_masm, new_slots * stack_move_unit(),
+-                         rax_argslot, rbx_temp, rdx_temp);
+-      } else if (length_constant == 0) {
+-        __ BIND(L_array_is_empty);
+-        remove_arg_slots(_masm, -stack_move_unit() * array_slots,
+-                         rax_argslot, rbx_temp, rdx_temp);
+-      } else {
+-        ShouldNotReachHere();
+-      }
+-
+-      // Copy from the array to the new slots.
+-      // Note: Stack change code preserves integrity of rax_argslot pointer.
+-      // So even after slot insertions, rax_argslot still points to first argument.
+-      // Beware:  Arguments that are shallow on the stack are deep in the array,
+-      // and vice versa.  So a downward-growing stack (the usual) has to be copied
+-      // elementwise in reverse order from the source array.
+-      __ BIND(L_copy_args);
+-      if (length_constant == -1) {
+-        // [rax_argslot, rdx_argslot_limit) is the area we are inserting into.
+-        // Array element [0] goes at rdx_argslot_limit[-wordSize].
+-        Register rdi_source = rdi_array;
+-        __ lea(rdi_source, Address(rdi_array, elem0_offset));
+-        Register rdx_fill_ptr = rdx_argslot_limit;
+-        Label loop;
+-        __ BIND(loop);
+-        __ addptr(rdx_fill_ptr, -Interpreter::stackElementSize * elem_slots);
+-        move_typed_arg(_masm, elem_type, true,
+-                       Address(rdx_fill_ptr, 0), Address(rdi_source, 0),
+-                       rbx_temp, rsi_temp);
+-        __ addptr(rdi_source, type2aelembytes(elem_type));
+-        __ cmpptr(rdx_fill_ptr, rax_argslot);
+-        __ jcc(Assembler::above, loop);
+-      } else if (length_constant == 0) {
+-        // nothing to copy
+-      } else {
+-        int elem_offset = elem0_offset;
+-        int slot_offset = length_constant * Interpreter::stackElementSize;
+-        for (int index = 0; index < length_constant; index++) {
+-          slot_offset -= Interpreter::stackElementSize * elem_slots;  // fill backward
+-          move_typed_arg(_masm, elem_type, true,
+-                         Address(rax_argslot, slot_offset), Address(rdi_array, elem_offset),
+-                         rbx_temp, rsi_temp);
+-          elem_offset += type2aelembytes(elem_type);
+-        }
+-      }
+-      __ BIND(L_args_done);
+-
+-      // Arguments are spread.  Move to next method handle.
+-      UNPUSH_RSI;
+-      __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
+-      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
+-
+-      __ bind(bad_array_klass);
+-      UNPUSH_RSI;
+-      assert(!vmarg.uses(rarg2_required), "must be different registers");
+-      __ load_heap_oop( rarg2_required, Address(rdx_array_klass, java_mirror_offset));  // required type
+-      __ movptr(        rarg1_actual,   vmarg);                                         // bad array
+-      __ movl(          rarg0_code,     (int) Bytecodes::_aaload);                      // who is complaining?
+-      __ jump(ExternalAddress(from_interpreted_entry(_raise_exception)));
+-
+-      __ bind(bad_array_length);
+-      UNPUSH_RSI;
+-      assert(!vmarg.uses(rarg2_required), "must be different registers");
+-      __ mov(    rarg2_required, rcx_recv);                       // AMH requiring a certain length
+-      __ movptr( rarg1_actual,   vmarg);                          // bad array
+-      __ movl(   rarg0_code,     (int) Bytecodes::_arraylength);  // who is complaining?
+-      __ jump(ExternalAddress(from_interpreted_entry(_raise_exception)));
+-#undef UNPUSH_RSI
+-
+-      break;
+-    }
+-
+-  default:
+-    // do not require all platforms to recognize all adapter types
+-    __ nop();
+-    return;
+-  }
+-  BLOCK_COMMENT(err_msg("} Entry %s", entry_name(ek)));
+-  __ hlt();
+-
+-  address me_cookie = MethodHandleEntry::start_compiled_entry(_masm, interp_entry);
+-  __ unimplemented(entry_name(ek)); // %%% FIXME: NYI
+-
+-  init_entry(ek, MethodHandleEntry::finish_compiled_entry(_masm, me_cookie));
+-}
+diff --git a/src/cpu/x86/vm/methodHandles_x86.hpp b/src/cpu/x86/vm/methodHandles_x86.hpp
+--- a/src/cpu/x86/vm/methodHandles_x86.hpp
++++ b/src/cpu/x86/vm/methodHandles_x86.hpp
+@@ -27,266 +27,12 @@
+ 
+ // Adapters
+ enum /* platform_dependent_constants */ {
+-  adapter_code_size = NOT_LP64(16000 DEBUG_ONLY(+ 15000)) LP64_ONLY(32000 DEBUG_ONLY(+ 120000))
+-};
+-
+-public:
+-
+-// The stack just after the recursive call from a ricochet frame
+-// looks something like this.  Offsets are marked in words, not bytes.
+-// rsi (r13 on LP64) is part of the interpreter calling sequence
+-// which tells the callee where is my real rsp (for frame walking).
+-// (...lower memory addresses)
+-// rsp:     [ return pc                 ]   always the global RicochetBlob::bounce_addr
+-// rsp+1:   [ recursive arg N           ]
+-// rsp+2:   [ recursive arg N-1         ]
+-// ...
+-// rsp+N:   [ recursive arg 1           ]
+-// rsp+N+1: [ recursive method handle   ]
+-// ...
+-// rbp-6:   [ cleanup continuation pc   ]   <-- (struct RicochetFrame)
+-// rbp-5:   [ saved target MH           ]   the MH we will call on the saved args
+-// rbp-4:   [ saved args layout oop     ]   an int[] array which describes argument layout
+-// rbp-3:   [ saved args pointer        ]   address of transformed adapter arg M (slot 0)
+-// rbp-2:   [ conversion                ]   information about how the return value is used
+-// rbp-1:   [ exact sender sp           ]   exact TOS (rsi/r13) of original sender frame
+-// rbp+0:   [ saved sender fp           ]   (for original sender of AMH)
+-// rbp+1:   [ saved sender pc           ]   (back to original sender of AMH)
+-// rbp+2:   [ transformed adapter arg M ]   <-- (extended TOS of original sender)
+-// rbp+3:   [ transformed adapter arg M-1]
+-// ...
+-// rbp+M+1: [ transformed adapter arg 1 ]
+-// rbp+M+2: [ padding                   ] <-- (rbp + saved args base offset)
+-// ...      [ optional padding]
+-// (higher memory addresses...)
+-//
+-// The arguments originally passed by the original sender
+-// are lost, and arbitrary amounts of stack motion might have
+-// happened due to argument transformation.
+-// (This is done by C2I/I2C adapters and non-direct method handles.)
+-// This is why there is an unpredictable amount of memory between
+-// the extended and exact TOS of the sender.
+-// The ricochet adapter itself will also (in general) perform
+-// transformations before the recursive call.
+-//
+-// The transformed and saved arguments, immediately above the saved
+-// return PC, are a well-formed method handle invocation ready to execute.
+-// When the GC needs to walk the stack, these arguments are described
+-// via the saved arg types oop, an int[] array with a private format.
+-// This array is derived from the type of the transformed adapter
+-// method handle, which also sits at the base of the saved argument
+-// bundle.  Since the GC may not be able to fish out the int[]
+-// array, so it is pushed explicitly on the stack.  This may be
+-// an unnecessary expense.
+-//
+-// The following register conventions are significant at this point:
+-// rsp       the thread stack, as always; preserved by caller
+-// rsi/r13   exact TOS of recursive frame (contents of [rbp-2])
+-// rcx       recursive method handle (contents of [rsp+N+1])
+-// rbp       preserved by caller (not used by caller)
+-// Unless otherwise specified, all registers can be blown by the call.
+-//
+-// If this frame must be walked, the transformed adapter arguments
+-// will be found with the help of the saved arguments descriptor.
+-//
+-// Therefore, the descriptor must match the referenced arguments.
+-// The arguments must be followed by at least one word of padding,
+-// which will be necessary to complete the final method handle call.
+-// That word is not treated as holding an oop.  Neither is the word
+-//
+-// The word pointed to by the return argument pointer is not
+-// treated as an oop, even if points to a saved argument.
+-// This allows the saved argument list to have a "hole" in it
+-// to receive an oop from the recursive call.
+-// (The hole might temporarily contain RETURN_VALUE_PLACEHOLDER.)
+-//
+-// When the recursive callee returns, RicochetBlob::bounce_addr will
+-// immediately jump to the continuation stored in the RF.
+-// This continuation will merge the recursive return value
+-// into the saved argument list.  At that point, the original
+-// rsi, rbp, and rsp will be reloaded, the ricochet frame will
+-// disappear, and the final target of the adapter method handle
+-// will be invoked on the transformed argument list.
+-
+-class RicochetFrame {
+-  friend class MethodHandles;
+-  friend class VMStructs;
+-
+- private:
+-  intptr_t* _continuation;          // what to do when control gets back here
+-  oopDesc*  _saved_target;          // target method handle to invoke on saved_args
+-  oopDesc*  _saved_args_layout;     // caching point for MethodTypeForm.vmlayout cookie
+-  intptr_t* _saved_args_base;       // base of pushed arguments (slot 0, arg N) (-3)
+-  intptr_t  _conversion;            // misc. information from original AdapterMethodHandle (-2)
+-  intptr_t* _exact_sender_sp;       // parallel to interpreter_frame_sender_sp (-1)
+-  intptr_t* _sender_link;           // *must* coincide with frame::link_offset (0)
+-  address   _sender_pc;             // *must* coincide with frame::return_addr_offset (1)
+-
+- public:
+-  intptr_t* continuation() const        { return _continuation; }
+-  oop       saved_target() const        { return _saved_target; }
+-  oop       saved_args_layout() const   { return _saved_args_layout; }
+-  intptr_t* saved_args_base() const     { return _saved_args_base; }
+-  intptr_t  conversion() const          { return _conversion; }
+-  intptr_t* exact_sender_sp() const     { return _exact_sender_sp; }
+-  intptr_t* sender_link() const         { return _sender_link; }
+-  address   sender_pc() const           { return _sender_pc; }
+-
+-  intptr_t* extended_sender_sp() const {
+-    // The extended sender SP is above the current RicochetFrame.
+-    return (intptr_t*) (((address) this) + sizeof(RicochetFrame));
+-  }
+-
+-  intptr_t  return_value_slot_number() const {
+-    return adapter_conversion_vminfo(conversion());
+-  }
+-  BasicType return_value_type() const {
+-    return adapter_conversion_dest_type(conversion());
+-  }
+-  bool has_return_value_slot() const {
+-    return return_value_type() != T_VOID;
+-  }
+-  intptr_t* return_value_slot_addr() const {
+-    assert(has_return_value_slot(), "");
+-    return saved_arg_slot_addr(return_value_slot_number());
+-  }
+-  intptr_t* saved_target_slot_addr() const {
+-    return saved_arg_slot_addr(saved_args_length());
+-  }
+-  intptr_t* saved_arg_slot_addr(int slot) const {
+-    assert(slot >= 0, "");
+-    return (intptr_t*)( (address)saved_args_base() + (slot * Interpreter::stackElementSize) );
+-  }
+-
+-  jint      saved_args_length() const;
+-  jint      saved_arg_offset(int arg) const;
+-
+-  // GC interface
+-  oop*  saved_target_addr()                     { return (oop*)&_saved_target; }
+-  oop*  saved_args_layout_addr()                { return (oop*)&_saved_args_layout; }
+-
+-  oop  compute_saved_args_layout(bool read_cache, bool write_cache);
+-
+-  // Compiler/assembler interface.
+-  static int continuation_offset_in_bytes()     { return offset_of(RicochetFrame, _continuation); }
+-  static int saved_target_offset_in_bytes()     { return offset_of(RicochetFrame, _saved_target); }
+-  static int saved_args_layout_offset_in_bytes(){ return offset_of(RicochetFrame, _saved_args_layout); }
+-  static int saved_args_base_offset_in_bytes()  { return offset_of(RicochetFrame, _saved_args_base); }
+-  static int conversion_offset_in_bytes()       { return offset_of(RicochetFrame, _conversion); }
+-  static int exact_sender_sp_offset_in_bytes()  { return offset_of(RicochetFrame, _exact_sender_sp); }
+-  static int sender_link_offset_in_bytes()      { return offset_of(RicochetFrame, _sender_link); }
+-  static int sender_pc_offset_in_bytes()        { return offset_of(RicochetFrame, _sender_pc); }
+-
+-  // This value is not used for much, but it apparently must be nonzero.
+-  static int frame_size_in_bytes()              { return sender_link_offset_in_bytes(); }
+-
+-#ifdef ASSERT
+-  // The magic number is supposed to help find ricochet frames within the bytes of stack dumps.
+-  enum { MAGIC_NUMBER_1 = 0xFEED03E, MAGIC_NUMBER_2 = 0xBEEF03E };
+-  static int magic_number_1_offset_in_bytes()   { return -wordSize; }
+-  static int magic_number_2_offset_in_bytes()   { return sizeof(RicochetFrame); }
+-  intptr_t magic_number_1() const               { return *(intptr_t*)((address)this + magic_number_1_offset_in_bytes()); };
+-  intptr_t magic_number_2() const               { return *(intptr_t*)((address)this + magic_number_2_offset_in_bytes()); };
+-#endif //ASSERT
+-
+-  enum { RETURN_VALUE_PLACEHOLDER = (NOT_DEBUG(0) DEBUG_ONLY(42)) };
+-
+-  static void verify_offsets() NOT_DEBUG_RETURN;
+-  void verify() const NOT_DEBUG_RETURN; // check for MAGIC_NUMBER, etc.
+-  void zap_arguments() NOT_DEBUG_RETURN;
+-
+-  static void generate_ricochet_blob(MacroAssembler* _masm,
+-                                     // output params:
+-                                     int* bounce_offset,
+-                                     int* exception_offset,
+-                                     int* frame_size_in_words);
+-
+-  static void enter_ricochet_frame(MacroAssembler* _masm,
+-                                   Register rcx_recv,
+-                                   Register rax_argv,
+-                                   address return_handler,
+-                                   Register rbx_temp);
+-  static void leave_ricochet_frame(MacroAssembler* _masm,
+-                                   Register rcx_recv,
+-                                   Register new_sp_reg,
+-                                   Register sender_pc_reg);
+-
+-  static Address frame_address(int offset = 0) {
+-    // The RicochetFrame is found by subtracting a constant offset from rbp.
+-    return Address(rbp, - sender_link_offset_in_bytes() + offset);
+-  }
+-
+-  static RicochetFrame* from_frame(const frame& fr) {
+-    address bp = (address) fr.fp();
+-    RicochetFrame* rf = (RicochetFrame*)(bp - sender_link_offset_in_bytes());
+-    rf->verify();
+-    return rf;
+-  }
+-
+-  static void verify_clean(MacroAssembler* _masm) NOT_DEBUG_RETURN;
+-
+-  static void describe(const frame* fr, FrameValues& values, int frame_no) PRODUCT_RETURN;
++  adapter_code_size = NOT_LP64(16000 DEBUG_ONLY(+ 25000)) LP64_ONLY(32000 DEBUG_ONLY(+ 150000))
+ };
+ 
+ // Additional helper methods for MethodHandles code generation:
+ public:
+   static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
+-  static void load_conversion_vminfo(MacroAssembler* _masm, Register reg, Address conversion_field_addr);
+-  static void load_conversion_dest_type(MacroAssembler* _masm, Register reg, Address conversion_field_addr);
+-
+-  static void load_stack_move(MacroAssembler* _masm,
+-                              Register rdi_stack_move,
+-                              Register rcx_amh,
+-                              bool might_be_negative);
+-
+-  static void insert_arg_slots(MacroAssembler* _masm,
+-                               RegisterOrConstant arg_slots,
+-                               Register rax_argslot,
+-                               Register rbx_temp, Register rdx_temp);
+-
+-  static void remove_arg_slots(MacroAssembler* _masm,
+-                               RegisterOrConstant arg_slots,
+-                               Register rax_argslot,
+-                               Register rbx_temp, Register rdx_temp);
+-
+-  static void push_arg_slots(MacroAssembler* _masm,
+-                                   Register rax_argslot,
+-                                   RegisterOrConstant slot_count,
+-                                   int skip_words_count,
+-                                   Register rbx_temp, Register rdx_temp);
+-
+-  static void move_arg_slots_up(MacroAssembler* _masm,
+-                                Register rbx_bottom,  // invariant
+-                                Address  top_addr,    // can use rax_temp
+-                                RegisterOrConstant positive_distance_in_slots,
+-                                Register rax_temp, Register rdx_temp);
+-
+-  static void move_arg_slots_down(MacroAssembler* _masm,
+-                                  Address  bottom_addr,  // can use rax_temp
+-                                  Register rbx_top,      // invariant
+-                                  RegisterOrConstant negative_distance_in_slots,
+-                                  Register rax_temp, Register rdx_temp);
+-
+-  static void move_typed_arg(MacroAssembler* _masm,
+-                             BasicType type, bool is_element,
+-                             Address slot_dest, Address value_src,
+-                             Register rbx_temp, Register rdx_temp);
+-
+-  static void move_return_value(MacroAssembler* _masm, BasicType type,
+-                                Address return_slot);
+-
+-  static void verify_argslot(MacroAssembler* _masm, Register argslot_reg,
+-                             const char* error_message) NOT_DEBUG_RETURN;
+-
+-  static void verify_argslots(MacroAssembler* _masm,
+-                              RegisterOrConstant argslot_count,
+-                              Register argslot_reg,
+-                              bool negate_argslot,
+-                              const char* error_message) NOT_DEBUG_RETURN;
+-
+-  static void verify_stack_move(MacroAssembler* _masm,
+-                                RegisterOrConstant arg_slots,
+-                                int direction) NOT_DEBUG_RETURN;
+ 
+   static void verify_klass(MacroAssembler* _masm,
+                            Register obj, KlassHandle klass,
+@@ -297,9 +43,17 @@
+                  "reference is a MH");
+   }
+ 
++  static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
++
+   // Similar to InterpreterMacroAssembler::jump_from_interpreted.
+   // Takes care of special dispatch from single stepping too.
+-  static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp);
++  static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                      bool for_compiler_entry);
++
++  static void jump_to_lambda_form(MacroAssembler* _masm,
++                                  Register recv, Register method_temp,
++                                  Register temp2,
++                                  bool for_compiler_entry);
+ 
+   static void trace_method_handle(MacroAssembler* _masm, const char* adaptername) PRODUCT_RETURN;
+ 
+diff --git a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp
+--- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp
++++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp
+@@ -643,6 +643,19 @@
+   __ movdbl(r, Address(saved_sp, next_val_off));
+ }
+ 
++static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
++                        address code_start, address code_end,
++                        Label& L_ok) {
++  Label L_fail;
++  __ lea(temp_reg, ExternalAddress(code_start));
++  __ cmpptr(pc_reg, temp_reg);
++  __ jcc(Assembler::belowEqual, L_fail);
++  __ lea(temp_reg, ExternalAddress(code_end));
++  __ cmpptr(pc_reg, temp_reg);
++  __ jcc(Assembler::below, L_ok);
++  __ bind(L_fail);
++}
++
+ static void gen_i2c_adapter(MacroAssembler *masm,
+                             int total_args_passed,
+                             int comp_args_on_stack,
+@@ -653,9 +666,53 @@
+   // we may do a i2c -> c2i transition if we lose a race where compiled
+   // code goes non-entrant while we get args ready.
+ 
++  // Adapters can be frameless because they do not require the caller
++  // to perform additional cleanup work, such as correcting the stack pointer.
++  // An i2c adapter is frameless because the *caller* frame, which is interpreted,
++  // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
++  // even if a callee has modified the stack pointer.
++  // A c2i adapter is frameless because the *callee* frame, which is interpreted,
++  // routinely repairs its caller's stack pointer (from sender_sp, which is set
++  // up via the senderSP register).
++  // In other words, if *either* the caller or callee is interpreted, we can
++  // get the stack pointer repaired after a call.
++  // This is why c2i and i2c adapters cannot be indefinitely composed.
++  // In particular, if a c2i adapter were to somehow call an i2c adapter,
++  // both caller and callee would be compiled methods, and neither would
++  // clean up the stack pointer changes performed by the two adapters.
++  // If this happens, control eventually transfers back to the compiled
++  // caller, but with an uncorrected stack, causing delayed havoc.
++
+   // Pick up the return address
+   __ movptr(rax, Address(rsp, 0));
+ 
++  if (VerifyAdapterCalls &&
++      (Interpreter::code() != NULL || StubRoutines::code1() != NULL)) {
++    // So, let's test for cascading c2i/i2c adapters right now.
++    //  assert(Interpreter::contains($return_addr) ||
++    //         StubRoutines::contains($return_addr),
++    //         "i2c adapter must return to an interpreter frame");
++    __ block_comment("verify_i2c { ");
++    Label L_ok;
++    if (Interpreter::code() != NULL)
++      range_check(masm, rax, rdi,
++                  Interpreter::code()->code_start(), Interpreter::code()->code_end(),
++                  L_ok);
++    if (StubRoutines::code1() != NULL)
++      range_check(masm, rax, rdi,
++                  StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(),
++                  L_ok);
++    if (StubRoutines::code2() != NULL)
++      range_check(masm, rax, rdi,
++                  StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(),
++                  L_ok);
++    const char* msg = "i2c adapter must return to an interpreter frame";
++    __ block_comment(msg);
++    __ stop(msg);
++    __ bind(L_ok);
++    __ block_comment("} verify_i2ce ");
++  }
++
+   // Must preserve original SP for loading incoming arguments because
+   // we need to align the outgoing SP for compiled code.
+   __ movptr(rdi, rsp);
+@@ -1293,6 +1350,89 @@
+   __ bind(done);
+ }
+ 
++static void verify_oop_args(MacroAssembler* masm,
++                            int total_args_passed,
++                            const BasicType* sig_bt,
++                            const VMRegPair* regs) {
++  Register temp_reg = rbx;  // not part of any compiled calling seq
++  if (VerifyOops) {
++    for (int i = 0; i < total_args_passed; i++) {
++      if (sig_bt[i] == T_OBJECT ||
++          sig_bt[i] == T_ARRAY) {
++        VMReg r = regs[i].first();
++        assert(r->is_valid(), "bad oop arg");
++        if (r->is_stack()) {
++          __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
++          __ verify_oop(temp_reg);
++        } else {
++          __ verify_oop(r->as_Register());
++        }
++      }
++    }
++  }
++}
++
++static void gen_special_dispatch(MacroAssembler* masm,
++                                 int total_args_passed,
++                                 int comp_args_on_stack,
++                                 vmIntrinsics::ID special_dispatch,
++                                 const BasicType* sig_bt,
++                                 const VMRegPair* regs) {
++  verify_oop_args(masm, total_args_passed, sig_bt, regs);
++
++  // Now write the args into the outgoing interpreter space
++  bool     has_receiver   = false;
++  Register receiver_reg   = noreg;
++  int      member_arg_pos = -1;
++  Register member_reg     = noreg;
++  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch);
++  if (ref_kind != 0) {
++    member_arg_pos = total_args_passed - 1;  // trailing MemberName argument
++    member_reg = rbx;  // known to be free at this point
++    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
++  } else if (special_dispatch == vmIntrinsics::_invokeBasic) {
++    has_receiver = true;
++  } else {
++    guarantee(false, err_msg("special_dispatch=%d", special_dispatch));
++  }
++
++  if (member_reg != noreg) {
++    // Load the member_arg into register, if necessary.
++    assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob");
++    assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object");
++    VMReg r = regs[member_arg_pos].first();
++    assert(r->is_valid(), "bad member arg");
++    if (r->is_stack()) {
++      __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
++    } else {
++      // no data motion is needed
++      member_reg = r->as_Register();
++    }
++  }
++
++  if (has_receiver) {
++    // Make sure the receiver is loaded into a register.
++    assert(total_args_passed > 0, "oob");
++    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
++    VMReg r = regs[0].first();
++    assert(r->is_valid(), "bad receiver arg");
++    if (r->is_stack()) {
++      // Porting note:  This assumes that compiled calling conventions always
++      // pass the receiver oop in a register.  If this is not true on some
++      // platform, pick a temp and load the receiver from stack.
++      assert(false, "receiver always in a register");
++      receiver_reg = rcx;  // known to be free at this point
++      __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
++    } else {
++      // no data motion is needed
++      receiver_reg = r->as_Register();
++    }
++  }
++
++  // Figure out which address we are really jumping to:
++  MethodHandles::generate_method_handle_dispatch(masm, special_dispatch,
++                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
++}
+ 
+ // ---------------------------------------------------------------------------
+ // Generate a native wrapper for a given method.  The method takes arguments
+@@ -1323,14 +1463,37 @@
+ //    transition back to thread_in_Java
+ //    return to caller
+ //
+-nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
++nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
+                                                 methodHandle method,
+                                                 int compile_id,
+                                                 int total_in_args,
+                                                 int comp_args_on_stack,
+-                                                BasicType *in_sig_bt,
+-                                                VMRegPair *in_regs,
++                                                BasicType* in_sig_bt,
++                                                VMRegPair* in_regs,
+                                                 BasicType ret_type) {
++  if (method->is_method_handle_intrinsic()) {
++    vmIntrinsics::ID iid = method->intrinsic_id();
++    intptr_t start = (intptr_t)__ pc();
++    int vep_offset = ((intptr_t)__ pc()) - start;
++    gen_special_dispatch(masm,
++                         total_in_args,
++                         comp_args_on_stack,
++                         method->intrinsic_id(),
++                         in_sig_bt,
++                         in_regs);
++    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
++    __ flush();
++    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
++    return nmethod::new_native_nmethod(method,
++                                       compile_id,
++                                       masm->code(),
++                                       vep_offset,
++                                       frame_complete,
++                                       stack_slots / VMRegImpl::slots_per_word,
++                                       in_ByteSize(-1),
++                                       in_ByteSize(-1),
++                                       (OopMapSet*)NULL);
++  }
+   bool is_critical_native = true;
+   address native_func = method->critical_native_function();
+   if (native_func == NULL) {
+@@ -1436,7 +1599,7 @@
+       if (in_regs[i].first()->is_Register()) {
+         const Register reg = in_regs[i].first()->as_Register();
+         switch (in_sig_bt[i]) {
+-          case T_ARRAY:
++          case T_ARRAY:  // critical array (uses 2 slots on LP64)
+           case T_BOOLEAN:
+           case T_BYTE:
+           case T_SHORT:
+diff --git a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
+--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
++++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
+@@ -590,6 +590,19 @@
+   __ jmp(rcx);
+ }
+ 
++static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
++                        address code_start, address code_end,
++                        Label& L_ok) {
++  Label L_fail;
++  __ lea(temp_reg, ExternalAddress(code_start));
++  __ cmpptr(pc_reg, temp_reg);
++  __ jcc(Assembler::belowEqual, L_fail);
++  __ lea(temp_reg, ExternalAddress(code_end));
++  __ cmpptr(pc_reg, temp_reg);
++  __ jcc(Assembler::below, L_ok);
++  __ bind(L_fail);
++}
++
+ static void gen_i2c_adapter(MacroAssembler *masm,
+                             int total_args_passed,
+                             int comp_args_on_stack,
+@@ -605,9 +618,53 @@
+   // save code can segv when fxsave instructions find improperly
+   // aligned stack pointer.
+ 
++  // Adapters can be frameless because they do not require the caller
++  // to perform additional cleanup work, such as correcting the stack pointer.
++  // An i2c adapter is frameless because the *caller* frame, which is interpreted,
++  // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
++  // even if a callee has modified the stack pointer.
++  // A c2i adapter is frameless because the *callee* frame, which is interpreted,
++  // routinely repairs its caller's stack pointer (from sender_sp, which is set
++  // up via the senderSP register).
++  // In other words, if *either* the caller or callee is interpreted, we can
++  // get the stack pointer repaired after a call.
++  // This is why c2i and i2c adapters cannot be indefinitely composed.
++  // In particular, if a c2i adapter were to somehow call an i2c adapter,
++  // both caller and callee would be compiled methods, and neither would
++  // clean up the stack pointer changes performed by the two adapters.
++  // If this happens, control eventually transfers back to the compiled
++  // caller, but with an uncorrected stack, causing delayed havoc.
++
+   // Pick up the return address
+   __ movptr(rax, Address(rsp, 0));
+ 
++  if (VerifyAdapterCalls &&
++      (Interpreter::code() != NULL || StubRoutines::code1() != NULL)) {
++    // So, let's test for cascading c2i/i2c adapters right now.
++    //  assert(Interpreter::contains($return_addr) ||
++    //         StubRoutines::contains($return_addr),
++    //         "i2c adapter must return to an interpreter frame");
++    __ block_comment("verify_i2c { ");
++    Label L_ok;
++    if (Interpreter::code() != NULL)
++      range_check(masm, rax, r11,
++                  Interpreter::code()->code_start(), Interpreter::code()->code_end(),
++                  L_ok);
++    if (StubRoutines::code1() != NULL)
++      range_check(masm, rax, r11,
++                  StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(),
++                  L_ok);
++    if (StubRoutines::code2() != NULL)
++      range_check(masm, rax, r11,
++                  StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(),
++                  L_ok);
++    const char* msg = "i2c adapter must return to an interpreter frame";
++    __ block_comment(msg);
++    __ stop(msg);
++    __ bind(L_ok);
++    __ block_comment("} verify_i2ce ");
++  }
++
+   // Must preserve original SP for loading incoming arguments because
+   // we need to align the outgoing SP for compiled code.
+   __ movptr(r11, rsp);
+@@ -1366,6 +1423,14 @@
+ }
+ 
+ 
++// Different signatures may require very different orders for the move
++// to avoid clobbering other arguments.  There's no simple way to
++// order them safely.  Compute a safe order for issuing stores and
++// break any cycles in those stores.  This code is fairly general but
++// it's not necessary on the other platforms so we keep it in the
++// platform dependent code instead of moving it into a shared file.
++// (See bugs 7013347 & 7145024.)
++// Note that this code is specific to LP64.
+ class ComputeMoveOrder: public StackObj {
+   class MoveOperation: public ResourceObj {
+     friend class ComputeMoveOrder;
+@@ -1532,6 +1597,89 @@
+   }
+ };
+ 
++static void verify_oop_args(MacroAssembler* masm,
++                            int total_args_passed,
++                            const BasicType* sig_bt,
++                            const VMRegPair* regs) {
++  Register temp_reg = rbx;  // not part of any compiled calling seq
++  if (VerifyOops) {
++    for (int i = 0; i < total_args_passed; i++) {
++      if (sig_bt[i] == T_OBJECT ||
++          sig_bt[i] == T_ARRAY) {
++        VMReg r = regs[i].first();
++        assert(r->is_valid(), "bad oop arg");
++        if (r->is_stack()) {
++          __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
++          __ verify_oop(temp_reg);
++        } else {
++          __ verify_oop(r->as_Register());
++        }
++      }
++    }
++  }
++}
++
++static void gen_special_dispatch(MacroAssembler* masm,
++                                 int total_args_passed,
++                                 int comp_args_on_stack,
++                                 vmIntrinsics::ID special_dispatch,
++                                 const BasicType* sig_bt,
++                                 const VMRegPair* regs) {
++  verify_oop_args(masm, total_args_passed, sig_bt, regs);
++
++  // Now write the args into the outgoing interpreter space
++  bool     has_receiver   = false;
++  Register receiver_reg   = noreg;
++  int      member_arg_pos = -1;
++  Register member_reg     = noreg;
++  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch);
++  if (ref_kind != 0) {
++    member_arg_pos = total_args_passed - 1;  // trailing MemberName argument
++    member_reg = rbx;  // known to be free at this point
++    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
++  } else if (special_dispatch == vmIntrinsics::_invokeBasic) {
++    has_receiver = true;
++  } else {
++    guarantee(false, err_msg("special_dispatch=%d", special_dispatch));
++  }
++
++  if (member_reg != noreg) {
++    // Load the member_arg into register, if necessary.
++    assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob");
++    assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object");
++    VMReg r = regs[member_arg_pos].first();
++    assert(r->is_valid(), "bad member arg");
++    if (r->is_stack()) {
++      __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
++    } else {
++      // no data motion is needed
++      member_reg = r->as_Register();
++    }
++  }
++
++  if (has_receiver) {
++    // Make sure the receiver is loaded into a register.
++    assert(total_args_passed > 0, "oob");
++    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
++    VMReg r = regs[0].first();
++    assert(r->is_valid(), "bad receiver arg");
++    if (r->is_stack()) {
++      // Porting note:  This assumes that compiled calling conventions always
++      // pass the receiver oop in a register.  If this is not true on some
++      // platform, pick a temp and load the receiver from stack.
++      assert(false, "receiver always in a register");
++      receiver_reg = j_rarg0;  // known to be free at this point
++      __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
++    } else {
++      // no data motion is needed
++      receiver_reg = r->as_Register();
++    }
++  }
++
++  // Figure out which address we are really jumping to:
++  MethodHandles::generate_method_handle_dispatch(masm, special_dispatch,
++                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
++}
+ 
+ // ---------------------------------------------------------------------------
+ // Generate a native wrapper for a given method.  The method takes arguments
+@@ -1539,14 +1687,60 @@
+ // convention (handlizes oops, etc), transitions to native, makes the call,
+ // returns to java state (possibly blocking), unhandlizes any result and
+ // returns.
+-nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
++//
++// Critical native functions are a shorthand for the use of
++// GetPrimtiveArrayCritical and disallow the use of any other JNI
++// functions.  The wrapper is expected to unpack the arguments before
++// passing them to the callee and perform checks before and after the
++// native call to ensure that they GC_locker
++// lock_critical/unlock_critical semantics are followed.  Some other
++// parts of JNI setup are skipped like the tear down of the JNI handle
++// block and the check for pending exceptions it's impossible for them
++// to be thrown.
++//
++// They are roughly structured like this:
++//    if (GC_locker::needs_gc())
++//      SharedRuntime::block_for_jni_critical();
++//    tranistion to thread_in_native
++//    unpack arrray arguments and call native entry point
++//    check for safepoint in progress
++//    check if any thread suspend flags are set
++//      call into JVM and possible unlock the JNI critical
++//      if a GC was suppressed while in the critical native.
++//    transition back to thread_in_Java
++//    return to caller
++//
++nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
+                                                 methodHandle method,
+                                                 int compile_id,
+                                                 int total_in_args,
+                                                 int comp_args_on_stack,
+-                                                BasicType *in_sig_bt,
+-                                                VMRegPair *in_regs,
++                                                BasicType* in_sig_bt,
++                                                VMRegPair* in_regs,
+                                                 BasicType ret_type) {
++  if (method->is_method_handle_intrinsic()) {
++    vmIntrinsics::ID iid = method->intrinsic_id();
++    intptr_t start = (intptr_t)__ pc();
++    int vep_offset = ((intptr_t)__ pc()) - start;
++    gen_special_dispatch(masm,
++                         total_in_args,
++                         comp_args_on_stack,
++                         method->intrinsic_id(),
++                         in_sig_bt,
++                         in_regs);
++    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
++    __ flush();
++    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
++    return nmethod::new_native_nmethod(method,
++                                       compile_id,
++                                       masm->code(),
++                                       vep_offset,
++                                       frame_complete,
++                                       stack_slots / VMRegImpl::slots_per_word,
++                                       in_ByteSize(-1),
++                                       in_ByteSize(-1),
++                                       (OopMapSet*)NULL);
++  }
+   bool is_critical_native = true;
+   address native_func = method->critical_native_function();
+   if (native_func == NULL) {
+@@ -1658,7 +1852,7 @@
+           case T_SHORT:
+           case T_CHAR:
+           case T_INT:  single_slots++; break;
+-          case T_ARRAY:
++          case T_ARRAY:  // specific to LP64 (7145024)
+           case T_LONG: double_slots++; break;
+           default:  ShouldNotReachHere();
+         }
+diff --git a/src/cpu/x86/vm/stubGenerator_x86_32.cpp b/src/cpu/x86/vm/stubGenerator_x86_32.cpp
+--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp
++++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp
+@@ -2327,12 +2327,6 @@
+                                                                                    CAST_FROM_FN_PTR(address, SharedRuntime::d2l));
+ 
+     // Build this early so it's available for the interpreter
+-    StubRoutines::_throw_WrongMethodTypeException_entry =
+-      generate_throw_exception("WrongMethodTypeException throw_exception",
+-                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_WrongMethodTypeException),
+-                               rax, rcx);
+-
+-    // Build this early so it's available for the interpreter
+     StubRoutines::_throw_StackOverflowError_entry          = generate_throw_exception("StackOverflowError throw_exception",           CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
+   }
+ 
+diff --git a/src/cpu/x86/vm/stubGenerator_x86_64.cpp b/src/cpu/x86/vm/stubGenerator_x86_64.cpp
+--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp
++++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp
+@@ -3102,14 +3102,6 @@
+ 
+     StubRoutines::x86::_verify_mxcsr_entry    = generate_verify_mxcsr();
+ 
+-    // Build this early so it's available for the interpreter.  Stub
+-    // expects the required and actual types as register arguments in
+-    // j_rarg0 and j_rarg1 respectively.
+-    StubRoutines::_throw_WrongMethodTypeException_entry =
+-      generate_throw_exception("WrongMethodTypeException throw_exception",
+-                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_WrongMethodTypeException),
+-                               rax, rcx);
+-
+     // Build this early so it's available for the interpreter.
+     StubRoutines::_throw_StackOverflowError_entry =
+       generate_throw_exception("StackOverflowError throw_exception",
+diff --git a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp
+--- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp
++++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp
+@@ -710,9 +710,9 @@
+     // Need to differentiate between igetfield, agetfield, bgetfield etc.
+     // because they are different sizes.
+     // Use the type from the constant pool cache
+-    __ shrl(rdx, ConstantPoolCacheEntry::tosBits);
+-    // Make sure we don't need to mask rdx for tosBits after the above shift
+-    ConstantPoolCacheEntry::verify_tosBits();
++    __ shrl(rdx, ConstantPoolCacheEntry::tos_state_shift);
++    // Make sure we don't need to mask rdx after the above shift
++    ConstantPoolCacheEntry::verify_tos_state_shift();
+     __ cmpl(rdx, btos);
+     __ jcc(Assembler::notEqual, notByte);
+     __ load_signed_byte(rax, field_address);
+@@ -1513,7 +1513,6 @@
+     case Interpreter::empty                  : entry_point = ((InterpreterGenerator*)this)->generate_empty_entry();        break;
+     case Interpreter::accessor               : entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry();     break;
+     case Interpreter::abstract               : entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry();     break;
+-    case Interpreter::method_handle          : entry_point = ((InterpreterGenerator*)this)->generate_method_handle_entry(); break;
+ 
+     case Interpreter::java_lang_math_sin     : // fall thru
+     case Interpreter::java_lang_math_cos     : // fall thru
+@@ -1526,7 +1525,9 @@
+     case Interpreter::java_lang_math_exp     : entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind);     break;
+     case Interpreter::java_lang_ref_reference_get
+                                              : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
+-    default                                  : ShouldNotReachHere();                                                       break;
++    default:
++      fatal(err_msg("unexpected method kind: %d", kind));
++      break;
+   }
+ 
+   if (entry_point) return entry_point;
+diff --git a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp
+--- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp
++++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp
+@@ -683,9 +683,9 @@
+     // Need to differentiate between igetfield, agetfield, bgetfield etc.
+     // because they are different sizes.
+     // Use the type from the constant pool cache
+-    __ shrl(rdx, ConstantPoolCacheEntry::tosBits);
+-    // Make sure we don't need to mask edx for tosBits after the above shift
+-    ConstantPoolCacheEntry::verify_tosBits();
++    __ shrl(rdx, ConstantPoolCacheEntry::tos_state_shift);
++    // Make sure we don't need to mask edx after the above shift
++    ConstantPoolCacheEntry::verify_tos_state_shift();
+ 
+     __ cmpl(rdx, atos);
+     __ jcc(Assembler::notEqual, notObj);
+@@ -1524,12 +1524,11 @@
+   switch (kind) {
+   case Interpreter::zerolocals             :                                                                             break;
+   case Interpreter::zerolocals_synchronized: synchronized = true;                                                        break;
+-  case Interpreter::native                 : entry_point = ((InterpreterGenerator*) this)->generate_native_entry(false); break;
+-  case Interpreter::native_synchronized    : entry_point = ((InterpreterGenerator*) this)->generate_native_entry(true);  break;
+-  case Interpreter::empty                  : entry_point = ((InterpreterGenerator*) this)->generate_empty_entry();       break;
+-  case Interpreter::accessor               : entry_point = ((InterpreterGenerator*) this)->generate_accessor_entry();    break;
+-  case Interpreter::abstract               : entry_point = ((InterpreterGenerator*) this)->generate_abstract_entry();    break;
+-  case Interpreter::method_handle          : entry_point = ((InterpreterGenerator*) this)->generate_method_handle_entry();break;
++  case Interpreter::native                 : entry_point = ((InterpreterGenerator*)this)->generate_native_entry(false); break;
++  case Interpreter::native_synchronized    : entry_point = ((InterpreterGenerator*)this)->generate_native_entry(true);  break;
++  case Interpreter::empty                  : entry_point = ((InterpreterGenerator*)this)->generate_empty_entry();       break;
++  case Interpreter::accessor               : entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry();    break;
++  case Interpreter::abstract               : entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry();    break;
+ 
+   case Interpreter::java_lang_math_sin     : // fall thru
+   case Interpreter::java_lang_math_cos     : // fall thru
+@@ -1539,10 +1538,12 @@
+   case Interpreter::java_lang_math_log10   : // fall thru
+   case Interpreter::java_lang_math_sqrt    : // fall thru
+   case Interpreter::java_lang_math_pow     : // fall thru
+-  case Interpreter::java_lang_math_exp     : entry_point = ((InterpreterGenerator*) this)->generate_math_entry(kind);    break;
++  case Interpreter::java_lang_math_exp     : entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind);    break;
+   case Interpreter::java_lang_ref_reference_get
+                                            : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
+-  default                                  : ShouldNotReachHere();                                                       break;
++  default:
++    fatal(err_msg("unexpected method kind: %d", kind));
++    break;
+   }
+ 
+   if (entry_point) {
+diff --git a/src/cpu/x86/vm/templateTable_x86_32.cpp b/src/cpu/x86/vm/templateTable_x86_32.cpp
+--- a/src/cpu/x86/vm/templateTable_x86_32.cpp
++++ b/src/cpu/x86/vm/templateTable_x86_32.cpp
+@@ -446,13 +446,13 @@
+   const Register cache = rcx;
+   const Register index = rdx;
+ 
+-  resolve_cache_and_index(f1_oop, rax, cache, index, wide ? sizeof(u2) : sizeof(u1));
++  resolve_cache_and_index(f12_oop, rax, cache, index, wide ? sizeof(u2) : sizeof(u1));
+   if (VerifyOops) {
+     __ verify_oop(rax);
+   }
+ 
+   Label L_done, L_throw_exception;
+-  const Register con_klass_temp = rcx;  // same as Rcache
++  const Register con_klass_temp = rcx;  // same as cache
+   __ load_klass(con_klass_temp, rax);
+   __ cmpptr(con_klass_temp, ExternalAddress((address)Universe::systemObjArrayKlassObj_addr()));
+   __ jcc(Assembler::notEqual, L_done);
+@@ -2084,15 +2084,15 @@
+                                             Register Rcache,
+                                             Register index,
+                                             size_t index_size) {
+-  Register temp = rbx;
+-
++  const Register temp = rbx;
+   assert_different_registers(result, Rcache, index, temp);
+ 
+   Label resolved;
+-  if (byte_no == f1_oop) {
+-    // We are resolved if the f1 field contains a non-null object (CallSite, etc.)
+-    // This kind of CP cache entry does not need to match the flags byte, because
++  if (byte_no == f12_oop) {
++    // We are resolved if the f1 field contains a non-null object (CallSite, MethodType, etc.)
++    // This kind of CP cache entry does not need to match bytecode_1 or bytecode_2, because
+     // there is a 1-1 relation between bytecode type and CP entry type.
++    // The caller will also load a methodOop from f2.
+     assert(result != noreg, ""); //else do cmpptr(Address(...), (int32_t) NULL_WORD)
+     __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
+     __ movptr(result, Address(Rcache, index, Address::times_ptr, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::f1_offset()));
+@@ -2112,15 +2112,18 @@
+     case Bytecodes::_getstatic      : // fall through
+     case Bytecodes::_putstatic      : // fall through
+     case Bytecodes::_getfield       : // fall through
+-    case Bytecodes::_putfield       : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put); break;
++    case Bytecodes::_putfield       : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put);        break;
+     case Bytecodes::_invokevirtual  : // fall through
+     case Bytecodes::_invokespecial  : // fall through
+     case Bytecodes::_invokestatic   : // fall through
+-    case Bytecodes::_invokeinterface: entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);  break;
+-    case Bytecodes::_invokedynamic  : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic); break;
+-    case Bytecodes::_fast_aldc      : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);     break;
+-    case Bytecodes::_fast_aldc_w    : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);     break;
+-    default                         : ShouldNotReachHere();                                 break;
++    case Bytecodes::_invokeinterface: entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);         break;
++    case Bytecodes::_invokehandle   : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle);   break;
++    case Bytecodes::_invokedynamic  : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic);  break;
++    case Bytecodes::_fast_aldc      : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);            break;
++    case Bytecodes::_fast_aldc_w    : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);            break;
++    default:
++      fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode())));
++      break;
+   }
+   __ movl(temp, (int)bytecode());
+   __ call_VM(noreg, entry, temp);
+@@ -2149,7 +2152,7 @@
+   __ movl(flags, Address(cache, index, Address::times_ptr,
+            in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset())));
+ 
+-  // klass     overwrite register
++  // klass overwrite register
+   if (is_static) {
+     __ movptr(obj, Address(cache, index, Address::times_ptr,
+                            in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset())));
+@@ -2161,7 +2164,7 @@
+                                                Register itable_index,
+                                                Register flags,
+                                                bool is_invokevirtual,
+-                                               bool is_invokevfinal /*unused*/,
++                                               bool is_invokevfinal, /*unused*/
+                                                bool is_invokedynamic) {
+   // setup registers
+   const Register cache = rcx;
+@@ -2171,28 +2174,33 @@
+   assert_different_registers(itable_index, flags);
+   assert_different_registers(itable_index, cache, index);
+   // determine constant pool cache field offsets
++  assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
+   const int method_offset = in_bytes(
+     constantPoolCacheOopDesc::base_offset() +
+-      (is_invokevirtual
++      ((byte_no == f2_byte)
+        ? ConstantPoolCacheEntry::f2_offset()
+-       : ConstantPoolCacheEntry::f1_offset()
+-      )
+-    );
++       : ConstantPoolCacheEntry::f1_offset()));
+   const int flags_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
+                                     ConstantPoolCacheEntry::flags_offset());
+   // access constant pool cache fields
+   const int index_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
+                                     ConstantPoolCacheEntry::f2_offset());
+ 
+-  if (byte_no == f1_oop) {
+-    // Resolved f1_oop goes directly into 'method' register.
+-    assert(is_invokedynamic, "");
+-    resolve_cache_and_index(byte_no, method, cache, index, sizeof(u4));
++  if (byte_no == f12_oop) {
++    // Resolved f1_oop (CallSite, MethodType, etc.) goes into 'itable_index'.
++    // Resolved f2_oop (methodOop invoker) will go into 'method' (at index_offset).
++    // See ConstantPoolCacheEntry::set_dynamic_call and set_method_handle.
++    size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2));
++    resolve_cache_and_index(byte_no, itable_index, cache, index, index_size);
++    __ movptr(method, Address(cache, index, Address::times_ptr, index_offset));
++    itable_index = noreg;  // hack to disable load below
+   } else {
+     resolve_cache_and_index(byte_no, noreg, cache, index, sizeof(u2));
+     __ movptr(method, Address(cache, index, Address::times_ptr, method_offset));
+   }
+   if (itable_index != noreg) {
++    // pick up itable index from f2 also:
++    assert(byte_no == f1_byte, "already picked up f1");
+     __ movptr(itable_index, Address(cache, index, Address::times_ptr, index_offset));
+   }
+   __ movl(flags, Address(cache, index, Address::times_ptr, flags_offset));
+@@ -2260,10 +2268,10 @@
+ 
+   Label Done, notByte, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
+ 
+-  __ shrl(flags, ConstantPoolCacheEntry::tosBits);
++  __ shrl(flags, ConstantPoolCacheEntry::tos_state_shift);
+   assert(btos == 0, "change code, btos != 0");
+   // btos
+-  __ andptr(flags, 0x0f);
++  __ andptr(flags, ConstantPoolCacheEntry::tos_state_mask);
+   __ jcc(Assembler::notZero, notByte);
+ 
+   __ load_signed_byte(rax, lo );
+@@ -2415,9 +2423,9 @@
+       __ movl(rcx, Address(rax, rdx, Address::times_ptr, in_bytes(cp_base_offset +
+                                    ConstantPoolCacheEntry::flags_offset())));
+       __ mov(rbx, rsp);
+-      __ shrl(rcx, ConstantPoolCacheEntry::tosBits);
+-      // Make sure we don't need to mask rcx for tosBits after the above shift
+-      ConstantPoolCacheEntry::verify_tosBits();
++      __ shrl(rcx, ConstantPoolCacheEntry::tos_state_shift);
++      // Make sure we don't need to mask rcx after the above shift
++      ConstantPoolCacheEntry::verify_tos_state_shift();
+       __ cmpl(rcx, ltos);
+       __ jccb(Assembler::equal, two_word);
+       __ cmpl(rcx, dtos);
+@@ -2467,7 +2475,7 @@
+