opnsense-src/contrib/processor-trace/libipt/src/pt_block_decoder.c

/*
 * Copyright (c) 2016-2019, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *  * Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *  * Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 *  * Neither the name of Intel Corporation nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include "pt_block_decoder.h"
#include "pt_block_cache.h"
#include "pt_section.h"
#include "pt_image.h"
#include "pt_insn.h"
#include "pt_config.h"
#include "pt_asid.h"
#include "pt_compiler.h"

#include "intel-pt.h"

#include <string.h>
#include <stdlib.h>


static int pt_blk_proceed_trailing_event(struct pt_block_decoder *,
					 struct pt_block *);


static int pt_blk_status(const struct pt_block_decoder *decoder, int flags)
{
	int status;

	if (!decoder)
		return -pte_internal;

	status = decoder->status;

	/* Indicate whether tracing is disabled or enabled.
	 *
	 * This duplicates the indication in struct pt_insn and covers the case
	 * where we indicate the status after synchronizing.
	 */
	if (!decoder->enabled)
		flags |= pts_ip_suppressed;

	/* Forward end-of-trace indications.
	 *
	 * Postpone it as long as we're still processing events, though.
	 */
	if ((status & pts_eos) && !decoder->process_event)
		flags |= pts_eos;

	return flags;
}

static void pt_blk_reset(struct pt_block_decoder *decoder)
{
	if (!decoder)
		return;

	decoder->mode = ptem_unknown;
	decoder->ip = 0ull;
	decoder->status = 0;
	decoder->enabled = 0;
	decoder->process_event = 0;
	decoder->speculative = 0;
	decoder->process_insn = 0;
	decoder->bound_paging = 0;
	decoder->bound_vmcs = 0;
	decoder->bound_ptwrite = 0;

	memset(&decoder->event, 0, sizeof(decoder->event));
	pt_retstack_init(&decoder->retstack);
	pt_asid_init(&decoder->asid);
}

/* Initialize the query decoder flags based on our flags. */

static int pt_blk_init_qry_flags(struct pt_conf_flags *qflags,
				 const struct pt_conf_flags *flags)
{
	if (!qflags || !flags)
		return -pte_internal;

	memset(qflags, 0, sizeof(*qflags));
	qflags->variant.query.keep_tcal_on_ovf =
		flags->variant.block.keep_tcal_on_ovf;

	return 0;
}

int pt_blk_decoder_init(struct pt_block_decoder *decoder,
			const struct pt_config *uconfig)
{
	struct pt_config config;
	int errcode;

	if (!decoder)
		return -pte_internal;

	errcode = pt_config_from_user(&config, uconfig);
	if (errcode < 0)
		return errcode;

	/* The user supplied decoder flags. */
	decoder->flags = config.flags;

	/* Set the flags we need for the query decoder we use. */
	errcode = pt_blk_init_qry_flags(&config.flags, &decoder->flags);
	if (errcode < 0)
		return errcode;

	errcode = pt_qry_decoder_init(&decoder->query, &config);
	if (errcode < 0)
		return errcode;

	pt_image_init(&decoder->default_image, NULL);
	decoder->image = &decoder->default_image;

	errcode = pt_msec_cache_init(&decoder->scache);
	if (errcode < 0)
		return errcode;

	pt_blk_reset(decoder);

	return 0;
}

void pt_blk_decoder_fini(struct pt_block_decoder *decoder)
{
	if (!decoder)
		return;

	pt_msec_cache_fini(&decoder->scache);
	pt_image_fini(&decoder->default_image);
	pt_qry_decoder_fini(&decoder->query);
}

struct pt_block_decoder *
pt_blk_alloc_decoder(const struct pt_config *config)
{
	struct pt_block_decoder *decoder;
	int errcode;

	decoder = malloc(sizeof(*decoder));
	if (!decoder)
		return NULL;

	errcode = pt_blk_decoder_init(decoder, config);
	if (errcode < 0) {
		free(decoder);
		return NULL;
	}

	return decoder;
}

void pt_blk_free_decoder(struct pt_block_decoder *decoder)
{
	if (!decoder)
		return;

	pt_blk_decoder_fini(decoder);
	free(decoder);
}

/* Maybe synthesize a tick event.
 *
 * If we're not already processing events, check the current time against the
 * last event's time.  If it changed, synthesize a tick event with the new time.
 *
 * Returns zero if no tick event has been created.
 * Returns a positive integer if a tick event has been created.
 * Returns a negative error code otherwise.
 */
static int pt_blk_tick(struct pt_block_decoder *decoder, uint64_t ip)
{
	struct pt_event *ev;
	uint64_t tsc;
	uint32_t lost_mtc, lost_cyc;
	int errcode;

	if (!decoder)
		return -pte_internal;

	/* We're not generating tick events if tracing is disabled. */
	if (!decoder->enabled)
		return -pte_internal;

	/* Events already provide a timestamp so there is no need to synthesize
	 * an artificial tick event.  There's no room, either, since this would
	 * overwrite the in-progress event.
	 *
	 * In rare cases where we need to proceed to an event location using
	 * trace this may cause us to miss a timing update if the event is not
	 * forwarded to the user.
	 *
	 * The only case I can come up with at the moment is a MODE.EXEC binding
	 * to the TIP IP of a far branch.
	 */
	if (decoder->process_event)
		return 0;

	errcode = pt_qry_time(&decoder->query, &tsc, &lost_mtc, &lost_cyc);
	if (errcode < 0) {
		/* If we don't have wall-clock time, we use relative time. */
		if (errcode != -pte_no_time)
			return errcode;
	}

	ev = &decoder->event;

	/* We're done if time has not changed since the last event. */
	if (tsc == ev->tsc)
		return 0;

	/* Time has changed so we create a new tick event. */
	memset(ev, 0, sizeof(*ev));
	ev->type = ptev_tick;
	ev->variant.tick.ip = ip;

	/* Indicate if we have wall-clock time or only relative time. */
	if (errcode != -pte_no_time)
		ev->has_tsc = 1;
	ev->tsc = tsc;
	ev->lost_mtc = lost_mtc;
	ev->lost_cyc = lost_cyc;

	/* We now have an event to process. */
	decoder->process_event = 1;

	return 1;
}

/* Query an indirect branch.
 *
 * Returns zero on success, a negative error code otherwise.
 */
static int pt_blk_indirect_branch(struct pt_block_decoder *decoder,
				  uint64_t *ip)
{
	uint64_t evip;
	int status, errcode;

	if (!decoder)
		return -pte_internal;

	evip = decoder->ip;

	status = pt_qry_indirect_branch(&decoder->query, ip);
	if (status < 0)
		return status;

	if (decoder->flags.variant.block.enable_tick_events) {
		errcode = pt_blk_tick(decoder, evip);
		if (errcode < 0)
			return errcode;
	}

	return status;
}

/* Query a conditional branch.
 *
 * Returns zero on success, a negative error code otherwise.
 */
static int pt_blk_cond_branch(struct pt_block_decoder *decoder, int *taken)
{
	int status, errcode;

	if (!decoder)
		return -pte_internal;

	status = pt_qry_cond_branch(&decoder->query, taken);
	if (status < 0)
		return status;

	if (decoder->flags.variant.block.enable_tick_events) {
		errcode = pt_blk_tick(decoder, decoder->ip);
		if (errcode < 0)
			return errcode;
	}

	return status;
}

static int pt_blk_start(struct pt_block_decoder *decoder, int status)
{
	if (!decoder)
		return -pte_internal;

	if (status < 0)
		return status;

	decoder->status = status;
	if (!(status & pts_ip_suppressed))
		decoder->enabled = 1;

	/* We will always have an event.
	 *
	 * If we synchronized onto an empty PSB+, tracing is disabled and we'll
	 * process events until the enabled event.
	 *
	 * If tracing is enabled, PSB+ must at least provide the execution mode,
	 * which we're going to forward to the user.
	 */
	return pt_blk_proceed_trailing_event(decoder, NULL);
}

static int pt_blk_sync_reset(struct pt_block_decoder *decoder)
{
	if (!decoder)
		return -pte_internal;

	pt_blk_reset(decoder);

	return 0;
}

int pt_blk_sync_forward(struct pt_block_decoder *decoder)
{
	int errcode, status;

	if (!decoder)
		return -pte_invalid;

	errcode = pt_blk_sync_reset(decoder);
	if (errcode < 0)
		return errcode;

	status = pt_qry_sync_forward(&decoder->query, &decoder->ip);

	return pt_blk_start(decoder, status);
}

int pt_blk_sync_backward(struct pt_block_decoder *decoder)
{
	int errcode, status;

	if (!decoder)
		return -pte_invalid;

	errcode = pt_blk_sync_reset(decoder);
	if (errcode < 0)
		return errcode;

	status = pt_qry_sync_backward(&decoder->query, &decoder->ip);

	return pt_blk_start(decoder, status);
}

int pt_blk_sync_set(struct pt_block_decoder *decoder, uint64_t offset)
{
	int errcode, status;

	if (!decoder)
		return -pte_invalid;

	errcode = pt_blk_sync_reset(decoder);
	if (errcode < 0)
		return errcode;

	status = pt_qry_sync_set(&decoder->query, &decoder->ip, offset);

	return pt_blk_start(decoder, status);
}

int pt_blk_get_offset(const struct pt_block_decoder *decoder, uint64_t *offset)
{
	if (!decoder)
		return -pte_invalid;

	return pt_qry_get_offset(&decoder->query, offset);
}

int pt_blk_get_sync_offset(const struct pt_block_decoder *decoder,
			   uint64_t *offset)
{
	if (!decoder)
		return -pte_invalid;

	return pt_qry_get_sync_offset(&decoder->query, offset);
}

struct pt_image *pt_blk_get_image(struct pt_block_decoder *decoder)
{
	if (!decoder)
		return NULL;

	return decoder->image;
}

int pt_blk_set_image(struct pt_block_decoder *decoder, struct pt_image *image)
{
	if (!decoder)
		return -pte_invalid;

	if (!image)
		image = &decoder->default_image;

	decoder->image = image;
	return 0;
}

const struct pt_config *
pt_blk_get_config(const struct pt_block_decoder *decoder)
{
	if (!decoder)
		return NULL;

	return pt_qry_get_config(&decoder->query);
}

int pt_blk_time(struct pt_block_decoder *decoder, uint64_t *time,
		uint32_t *lost_mtc, uint32_t *lost_cyc)
{
	if (!decoder || !time)
		return -pte_invalid;

	return pt_qry_time(&decoder->query, time, lost_mtc, lost_cyc);
}

int pt_blk_core_bus_ratio(struct pt_block_decoder *decoder, uint32_t *cbr)
{
	if (!decoder || !cbr)
		return -pte_invalid;

	return pt_qry_core_bus_ratio(&decoder->query, cbr);
}

int pt_blk_asid(const struct pt_block_decoder *decoder, struct pt_asid *asid,
		size_t size)
{
	if (!decoder || !asid)
		return -pte_invalid;

	return pt_asid_to_user(asid, &decoder->asid, size);
}

/* Fetch the next pending event.
 *
 * Checks for pending events.  If an event is pending, fetches it (if not
 * already in process).
 *
 * Returns zero if no event is pending.
 * Returns a positive integer if an event is pending or in process.
 * Returns a negative error code otherwise.
 */
static inline int pt_blk_fetch_event(struct pt_block_decoder *decoder)
{
	int status;

	if (!decoder)
		return -pte_internal;

	if (decoder->process_event)
		return 1;

	if (!(decoder->status & pts_event_pending))
		return 0;

	status = pt_qry_event(&decoder->query, &decoder->event,
			      sizeof(decoder->event));
	if (status < 0)
		return status;

	decoder->process_event = 1;
	decoder->status = status;

	return 1;
}

static inline int pt_blk_block_is_empty(const struct pt_block *block)
{
	if (!block)
		return 1;

	return !block->ninsn;
}

static inline int block_to_user(struct pt_block *ublock, size_t size,
				const struct pt_block *block)
{
	if (!ublock || !block)
		return -pte_internal;

	if (ublock == block)
		return 0;

	/* Zero out any unknown bytes. */
	if (sizeof(*block) < size) {
		memset(ublock + sizeof(*block), 0, size - sizeof(*block));

		size = sizeof(*block);
	}

	memcpy(ublock, block, size);

	return 0;
}

static int pt_insn_false(const struct pt_insn *insn,
			 const struct pt_insn_ext *iext)
{
	(void) insn;
	(void) iext;

	return 0;
}

/* Determine the next IP using trace.
 *
 * Tries to determine the IP of the next instruction using trace and provides it
 * in @pip.
 *
 * Not requiring trace to determine the IP is treated as an internal error.
 *
 * Does not update the return compression stack for indirect calls.  This is
 * expected to have been done, already, when trying to determine the next IP
 * without using trace.
 *
 * Does not update @decoder->status.  The caller is expected to do that.
 *
 * Returns a non-negative pt_status_flag bit-vector on success, a negative error
 * code otherwise.
 * Returns -pte_internal if @pip, @decoder, @insn, or @iext are NULL.
 * Returns -pte_internal if no trace is required.
 */
static int pt_blk_next_ip(uint64_t *pip, struct pt_block_decoder *decoder,
			  const struct pt_insn *insn,
			  const struct pt_insn_ext *iext)
{
	int status, errcode;

	if (!pip || !decoder || !insn || !iext)
		return -pte_internal;

	/* We handle non-taken conditional branches, and compressed returns
	 * directly in the switch.
	 *
	 * All kinds of branches are handled below the switch.
	 */
	switch (insn->iclass) {
	case ptic_cond_jump: {
		uint64_t ip;
		int taken;

		status = pt_blk_cond_branch(decoder, &taken);
		if (status < 0)
			return status;

		ip = insn->ip + insn->size;
		if (taken)
			ip += (uint64_t) (int64_t)
				iext->variant.branch.displacement;

		*pip = ip;
		return status;
	}

	case ptic_return: {
		int taken;

		/* Check for a compressed return. */
		status = pt_blk_cond_branch(decoder, &taken);
		if (status < 0) {
			if (status != -pte_bad_query)
				return status;

			break;
		}

		/* A compressed return is indicated by a taken conditional
		 * branch.
		 */
		if (!taken)
			return -pte_bad_retcomp;

		errcode = pt_retstack_pop(&decoder->retstack, pip);
		if (errcode < 0)
			return errcode;

		return status;
	}

	case ptic_jump:
	case ptic_call:
		/* A direct jump or call wouldn't require trace. */
		if (iext->variant.branch.is_direct)
			return -pte_internal;

		break;

	case ptic_far_call:
	case ptic_far_return:
	case ptic_far_jump:
		break;

	case ptic_ptwrite:
	case ptic_other:
		return -pte_internal;

	case ptic_error:
		return -pte_bad_insn;
	}

	/* Process an indirect branch.
	 *
	 * This covers indirect jumps and calls, non-compressed returns, and all
	 * flavors of far transfers.
	 */
	return pt_blk_indirect_branch(decoder, pip);
}

/* Proceed to the next IP using trace.
 *
 * We failed to proceed without trace.  This ends the current block.  Now use
 * trace to do one final step to determine the start IP of the next block.
 *
 * Returns zero on success, a negative error code otherwise.
 */
static int pt_blk_proceed_with_trace(struct pt_block_decoder *decoder,
				     const struct pt_insn *insn,
				     const struct pt_insn_ext *iext)
{
	int status;

	if (!decoder)
		return -pte_internal;

	status = pt_blk_next_ip(&decoder->ip, decoder, insn, iext);
	if (status < 0)
		return status;

	/* Preserve the query decoder's response which indicates upcoming
	 * events.
	 */
	decoder->status = status;

	/* We do need an IP in order to proceed. */
	if (status & pts_ip_suppressed)
		return -pte_noip;

	return 0;
}

/* Decode one instruction in a known section.
 *
 * Decode the instruction at @insn->ip in @msec assuming execution mode
 * @insn->mode.
 *
 * Returns zero on success, a negative error code otherwise.
 */
static int pt_blk_decode_in_section(struct pt_insn *insn,
				    struct pt_insn_ext *iext,
				    const struct pt_mapped_section *msec)
{
	int status;

	if (!insn || !iext)
		return -pte_internal;

	/* We know that @ip is contained in @section.
	 *
	 * Note that we need to translate @ip into a section offset.
	 */
	status = pt_msec_read(msec, insn->raw, sizeof(insn->raw), insn->ip);
	if (status < 0)
		return status;

	/* We initialize @insn->size to the maximal possible size.  It will be
	 * set to the actual size during instruction decode.
	 */
	insn->size = (uint8_t) status;

	return pt_ild_decode(insn, iext);
}

/* Update the return-address stack if @insn is a near call.
 *
 * Returns zero on success, a negative error code otherwise.
 */
static inline int pt_blk_log_call(struct pt_block_decoder *decoder,
				  const struct pt_insn *insn,
				  const struct pt_insn_ext *iext)
{
	if (!decoder || !insn || !iext)
		return -pte_internal;

	if (insn->iclass != ptic_call)
		return 0;

	/* Ignore direct calls to the next instruction that are used for
	 * position independent code.
	 */
	if (iext->variant.branch.is_direct &&
	    !iext->variant.branch.displacement)
		return 0;

	return pt_retstack_push(&decoder->retstack, insn->ip + insn->size);
}

/* Proceed by one instruction.
 *
 * Tries to decode the instruction at @decoder->ip and, on success, adds it to
 * @block and provides it in @pinsn and @piext.
 *
 * The instruction will not be added if:
 *
 *   - the memory could not be read:  return error
 *   - it could not be decoded:       return error
 *   - @block is already full:        return zero
 *   - @block would switch sections:  return zero
 *
 * Returns a positive integer if the instruction was added.
 * Returns zero if the instruction didn't fit into @block.
 * Returns a negative error code otherwise.
 */
static int pt_blk_proceed_one_insn(struct pt_block_decoder *decoder,
				   struct pt_block *block,
				   struct pt_insn *pinsn,
				   struct pt_insn_ext *piext)
{
	struct pt_insn_ext iext;
	struct pt_insn insn;
	uint16_t ninsn;
	int status;

	if (!decoder || !block || !pinsn || !piext)
		return -pte_internal;

	/* There's nothing to do if there is no room in @block. */
	ninsn = block->ninsn + 1;
	if (!ninsn)
		return 0;

	/* The truncated instruction must be last. */
	if (block->truncated)
		return 0;

	memset(&insn, 0, sizeof(insn));
	memset(&iext, 0, sizeof(iext));

	insn.mode = decoder->mode;
	insn.ip = decoder->ip;

	status = pt_insn_decode(&insn, &iext, decoder->image, &decoder->asid);
	if (status < 0)
		return status;

	/* We do not switch sections inside a block. */
	if (insn.isid != block->isid) {
		if (!pt_blk_block_is_empty(block))
			return 0;

		block->isid = insn.isid;
	}

	/* If we couldn't read @insn's memory in one chunk from @insn.isid, we
	 * provide the memory in @block.
	 */
	if (insn.truncated) {
		memcpy(block->raw, insn.raw, insn.size);
		block->size = insn.size;
		block->truncated = 1;
	}

	/* Log calls' return addresses for return compression. */
	status = pt_blk_log_call(decoder, &insn, &iext);
	if (status < 0)
		return status;

	/* We have a new instruction. */
	block->iclass = insn.iclass;
	block->end_ip = insn.ip;
	block->ninsn = ninsn;

	*pinsn = insn;
	*piext = iext;

	return 1;
}


/* Proceed to a particular type of instruction without using trace.
 *
 * Proceed until we reach an instruction for which @predicate returns a positive
 * integer or until:
 *
 *   - @predicate returns an error:  return error
 *   - @block is full:               return zero
 *   - @block would switch sections: return zero
 *   - we would need trace:          return -pte_bad_query
 *
 * Provide the last instruction that was reached in @insn and @iext.
 *
 * Update @decoder->ip to point to the last IP that was reached.  If we fail due
 * to lack of trace or if we reach a desired instruction, this is @insn->ip;
 * otherwise this is the next instruction's IP.
 *
 * Returns a positive integer if a suitable instruction was reached.
 * Returns zero if no such instruction was reached.
 * Returns a negative error code otherwise.
 */
static int pt_blk_proceed_to_insn(struct pt_block_decoder *decoder,
				  struct pt_block *block,
				  struct pt_insn *insn,
				  struct pt_insn_ext *iext,
				  int (*predicate)(const struct pt_insn *,
						   const struct pt_insn_ext *))
{
	int status;

	if (!decoder || !insn || !predicate)
		return -pte_internal;

	for (;;) {
		status = pt_blk_proceed_one_insn(decoder, block, insn, iext);
		if (status <= 0)
			return status;

		/* We're done if this instruction matches the spec (positive
		 * status) or we run into an error (negative status).
		 */
		status = predicate(insn, iext);
		if (status != 0)
			return status;

		/* Let's see if we can proceed to the next IP without trace. */
		status = pt_insn_next_ip(&decoder->ip, insn, iext);
		if (status < 0)
			return status;

		/* End the block if the user asked us to.
		 *
		 * We only need to take care about direct near branches.
		 * Indirect and far branches require trace and will naturally
		 * end a block.
		 */
		if ((decoder->flags.variant.block.end_on_call &&
		     (insn->iclass == ptic_call)) ||
		    (decoder->flags.variant.block.end_on_jump &&
		     (insn->iclass == ptic_jump)))
			return 0;
	}
}

/* Proceed to a particular IP without using trace.
 *
 * Proceed until we reach @ip or until:
 *
 *   - @block is full:               return zero
 *   - @block would switch sections: return zero
 *   - we would need trace:          return -pte_bad_query
 *
 * Provide the last instruction that was reached in @insn and @iext.  If we
 * reached @ip, this is the instruction preceding it.
 *
 * Update @decoder->ip to point to the last IP that was reached.  If we fail due
 * to lack of trace, this is @insn->ip; otherwise this is the next instruction's
 * IP.
 *
 * Returns a positive integer if @ip was reached.
 * Returns zero if no such instruction was reached.
 * Returns a negative error code otherwise.
 */
static int pt_blk_proceed_to_ip(struct pt_block_decoder *decoder,
				struct pt_block *block, struct pt_insn *insn,
				struct pt_insn_ext *iext, uint64_t ip)
{
	int status;

	if (!decoder || !insn)
		return -pte_internal;

	for (;;) {
		/* We're done when we reach @ip.  We may not even have to decode
		 * a single instruction in some cases.
		 */
		if (decoder->ip == ip)
			return 1;

		status = pt_blk_proceed_one_insn(decoder, block, insn, iext);
		if (status <= 0)
			return status;

		/* Let's see if we can proceed to the next IP without trace. */
		status = pt_insn_next_ip(&decoder->ip, insn, iext);
		if (status < 0)
			return status;

		/* End the block if the user asked us to.
		 *
		 * We only need to take care about direct near branches.
		 * Indirect and far branches require trace and will naturally
		 * end a block.
		 *
		 * The call at the end of the block may have reached @ip; make
		 * sure to indicate that.
		 */
		if ((decoder->flags.variant.block.end_on_call &&
		     (insn->iclass == ptic_call)) ||
		    (decoder->flags.variant.block.end_on_jump &&
		     (insn->iclass == ptic_jump))) {
			return (decoder->ip == ip ? 1 : 0);
		}
	}
}

/* Proceed to a particular IP with trace, if necessary.
 *
 * Proceed until we reach @ip or until:
 *
 *   - @block is full:               return zero
 *   - @block would switch sections: return zero
 *   - we need trace:                return zero
 *
 * Update @decoder->ip to point to the last IP that was reached.
 *
 * A return of zero ends @block.
 *
 * Returns a positive integer if @ip was reached.
 * Returns zero if no such instruction was reached.
 * Returns a negative error code otherwise.
 */
static int pt_blk_proceed_to_ip_with_trace(struct pt_block_decoder *decoder,
					   struct pt_block *block,
					   uint64_t ip)
{
	struct pt_insn_ext iext;
	struct pt_insn insn;
	int status;

	/* Try to reach @ip without trace.
	 *
	 * We're also OK if @block overflowed or we switched sections and we
	 * have to try again in the next iteration.
	 */
	status = pt_blk_proceed_to_ip(decoder, block, &insn, &iext, ip);
	if (status != -pte_bad_query)
		return status;

	/* Needing trace is not an error.  We use trace to determine the next
	 * start IP and end the block.
	 */
	return pt_blk_proceed_with_trace(decoder, &insn, &iext);
}

static int pt_insn_skl014(const struct pt_insn *insn,
			  const struct pt_insn_ext *iext)
{
	if (!insn || !iext)
		return 0;

	switch (insn->iclass) {
	default:
		return 0;

	case ptic_call:
	case ptic_jump:
		return iext->variant.branch.is_direct;

	case ptic_other:
		return pt_insn_changes_cr3(insn, iext);
	}
}

/* Proceed to the location of a synchronous disabled event with suppressed IP
 * considering SKL014.
 *
 * We have a (synchronous) disabled event pending.  Proceed to the event
 * location and indicate whether we were able to reach it.
 *
 * With SKL014 a TIP.PGD with suppressed IP may also be generated by a direct
 * unconditional branch that clears FilterEn by jumping out of a filter region
 * or into a TraceStop region.  Use the filter configuration to determine the
 * exact branch the event binds to.
 *
 * The last instruction that was reached is stored in @insn/@iext.
 *
 * Returns a positive integer if the event location was reached.
 * Returns zero if the event location was not reached.
 * Returns a negative error code otherwise.
 */
static int pt_blk_proceed_skl014(struct pt_block_decoder *decoder,
				 struct pt_block *block, struct pt_insn *insn,
				 struct pt_insn_ext *iext)
{
	const struct pt_conf_addr_filter *addr_filter;
	int status;

	if (!decoder || !block || !insn || !iext)
		return -pte_internal;

	addr_filter = &decoder->query.config.addr_filter;
	for (;;) {
		uint64_t ip;

		status = pt_blk_proceed_to_insn(decoder, block, insn, iext,
						pt_insn_skl014);
		if (status <= 0)
			break;

		/* The erratum doesn't apply if we can bind the event to a
		 * CR3-changing instruction.
		 */
		if (pt_insn_changes_cr3(insn, iext))
			break;

		/* Check the filter against the branch target. */
		status = pt_insn_next_ip(&ip, insn, iext);
		if (status < 0)
			break;

		status = pt_filter_addr_check(addr_filter, ip);
		if (status <= 0) {
			/* We need to flip the indication.
			 *
			 * We reached the event location when @ip lies inside a
			 * tracing-disabled region.
			 */
			if (!status)
				status = 1;

			break;
		}

		/* This is not the correct instruction.  Proceed past it and try
		 * again.
		 */
		decoder->ip = ip;

		/* End the block if the user asked us to.
		 *
		 * We only need to take care about direct near branches.
		 * Indirect and far branches require trace and will naturally
		 * end a block.
		 */
		if ((decoder->flags.variant.block.end_on_call &&
		    (insn->iclass == ptic_call)) ||
		    (decoder->flags.variant.block.end_on_jump &&
		    (insn->iclass == ptic_jump)))
			break;
	}

	return status;
}

/* Proceed to the event location for a disabled event.
 *
 * We have a (synchronous) disabled event pending.  Proceed to the event
 * location and indicate whether we were able to reach it.
 *
 * The last instruction that was reached is stored in @insn/@iext.
 *
 * Returns a positive integer if the event location was reached.
 * Returns zero if the event location was not reached.
 * Returns a negative error code otherwise.
 */
static int pt_blk_proceed_to_disabled(struct pt_block_decoder *decoder,
				      struct pt_block *block,
				      struct pt_insn *insn,
				      struct pt_insn_ext *iext,
				      const struct pt_event *ev)
{
	if (!decoder || !block || !ev)
		return -pte_internal;

	if (ev->ip_suppressed) {
		/* Due to SKL014 the TIP.PGD payload may be suppressed also for
		 * direct branches.
		 *
		 * If we don't have a filter configuration we assume that no
		 * address filters were used and the erratum does not apply.
		 *
		 * We might otherwise disable tracing too early.
		 */
		if (decoder->query.config.addr_filter.config.addr_cfg &&
		    decoder->query.config.errata.skl014)
			return pt_blk_proceed_skl014(decoder, block, insn,
						     iext);

		/* A synchronous disabled event also binds to far branches and
		 * CPL-changing instructions.  Both would require trace,
		 * however, and are thus implicitly handled by erroring out.
		 *
		 * The would-require-trace error is handled by our caller.
		 */
		return pt_blk_proceed_to_insn(decoder, block, insn, iext,
					      pt_insn_changes_cr3);
	} else
		return pt_blk_proceed_to_ip(decoder, block, insn, iext,
					    ev->variant.disabled.ip);
}

/* Set the expected resume address for a synchronous disable.
 *
 * On a synchronous disable, @decoder->ip still points to the instruction to
 * which the event bound.  That's not where we expect tracing to resume.
 *
 * For calls, a fair assumption is that tracing resumes after returning from the
 * called function.  For other types of instructions, we simply don't know.
 *
 * Returns zero on success, a negative pt_error_code otherwise.
 */
static int pt_blk_set_disable_resume_ip(struct pt_block_decoder *decoder,
					const struct pt_insn *insn)
{
	if (!decoder || !insn)
		return -pte_internal;

	switch (insn->iclass) {
	case ptic_call:
	case ptic_far_call:
		decoder->ip = insn->ip + insn->size;
		break;

	default:
		decoder->ip = 0ull;
		break;
	}

	return 0;
}

/* Proceed to the event location for an async paging event.
 *
 * We have an async paging event pending.  Proceed to the event location and
 * indicate whether we were able to reach it.  Needing trace in order to proceed
 * is not an error in this case but ends the block.
 *
 * Returns a positive integer if the event location was reached.
 * Returns zero if the event location was not reached.
 * Returns a negative error code otherwise.
 */
static int pt_blk_proceed_to_async_paging(struct pt_block_decoder *decoder,
					  struct pt_block *block,
					  const struct pt_event *ev)
{
	int status;

	if (!decoder || !ev)
		return -pte_internal;

	/* Apply the event immediately if we don't have an IP. */
	if (ev->ip_suppressed)
		return 1;

	status = pt_blk_proceed_to_ip_with_trace(decoder, block,
						 ev->variant.async_paging.ip);
	if (status < 0)
		return status;

	/* We may have reached the IP. */
	return (decoder->ip == ev->variant.async_paging.ip ? 1 : 0);
}

/* Proceed to the event location for an async vmcs event.
 *
 * We have an async vmcs event pending.  Proceed to the event location and
 * indicate whether we were able to reach it.  Needing trace in order to proceed
 * is not an error in this case but ends the block.
 *
 * Returns a positive integer if the event location was reached.
 * Returns zero if the event location was not reached.
 * Returns a negative error code otherwise.
 */
static int pt_blk_proceed_to_async_vmcs(struct pt_block_decoder *decoder,
					struct pt_block *block,
					const struct pt_event *ev)
{
	int status;

	if (!decoder || !ev)
		return -pte_internal;

	/* Apply the event immediately if we don't have an IP. */
	if (ev->ip_suppressed)
		return 1;

	status = pt_blk_proceed_to_ip_with_trace(decoder, block,
						 ev->variant.async_vmcs.ip);
	if (status < 0)
		return status;

	/* We may have reached the IP. */
	return (decoder->ip == ev->variant.async_vmcs.ip ? 1 : 0);
}

/* Proceed to the event location for an exec mode event.
 *
 * We have an exec mode event pending.  Proceed to the event location and
 * indicate whether we were able to reach it.  Needing trace in order to proceed
 * is not an error in this case but ends the block.
 *
 * Returns a positive integer if the event location was reached.
 * Returns zero if the event location was not reached.
 * Returns a negative error code otherwise.
 */
static int pt_blk_proceed_to_exec_mode(struct pt_block_decoder *decoder,
				       struct pt_block *block,
				       const struct pt_event *ev)
{
	int status;

	if (!decoder || !ev)
		return -pte_internal;

	/* Apply the event immediately if we don't have an IP. */
	if (ev->ip_suppressed)
		return 1;

	status = pt_blk_proceed_to_ip_with_trace(decoder, block,
						 ev->variant.exec_mode.ip);
	if (status < 0)
		return status;

	/* We may have reached the IP. */
	return (decoder->ip == ev->variant.exec_mode.ip ? 1 : 0);
}

/* Proceed to the event location for a ptwrite event.
 *
 * We have a ptwrite event pending.  Proceed to the event location and indicate
 * whether we were able to reach it.
 *
 * In case of the event binding to a ptwrite instruction, we pass beyond that
 * instruction and update the event to provide the instruction's IP.
 *
 * In the case of the event binding to an IP provided in the event, we move
 * beyond the instruction at that IP.
 *
 * Returns a positive integer if the event location was reached.
 * Returns zero if the event location was not reached.
 * Returns a negative error code otherwise.
 */
static int pt_blk_proceed_to_ptwrite(struct pt_block_decoder *decoder,
				     struct pt_block *block,
				     struct pt_insn *insn,
				     struct pt_insn_ext *iext,
				     struct pt_event *ev)
{
	int status;

	if (!insn || !ev)
		return -pte_internal;

	/* If we don't have an IP, the event binds to the next PTWRITE
	 * instruction.
	 *
	 * If we have an IP it still binds to the next PTWRITE instruction but
	 * now the IP tells us where that instruction is.  This makes most sense
	 * when tracing is disabled and we don't have any other means of finding
	 * the PTWRITE instruction.  We nevertheless distinguish the two cases,
	 * here.
	 *
	 * In both cases, we move beyond the PTWRITE instruction, so it will be
	 * the last instruction in the current block and @decoder->ip will point
	 * to the instruction following it.
	 */
	if (ev->ip_suppressed) {
		status = pt_blk_proceed_to_insn(decoder, block, insn, iext,
						pt_insn_is_ptwrite);
		if (status <= 0)
			return status;

		/* We now know the IP of the PTWRITE instruction corresponding
		 * to this event.  Fill it in to make it more convenient for the
		 * user to process the event.
		 */
		ev->variant.ptwrite.ip = insn->ip;
		ev->ip_suppressed = 0;
	} else {
		status = pt_blk_proceed_to_ip(decoder, block, insn, iext,
					      ev->variant.ptwrite.ip);
		if (status <= 0)
			return status;

		/* We reached the PTWRITE instruction and @decoder->ip points to
		 * it; @insn/@iext still contain the preceding instruction.
		 *
		 * Proceed beyond the PTWRITE to account for it.  Note that we
		 * may still overflow the block, which would cause us to
		 * postpone both instruction and event to the next block.
		 */
		status = pt_blk_proceed_one_insn(decoder, block, insn, iext);
		if (status <= 0)
			return status;
	}

	return 1;
}

/* Try to work around erratum SKD022.
 *
 * If we get an asynchronous disable on VMLAUNCH or VMRESUME, the FUP that
 * caused the disable to be asynchronous might have been bogous.
 *
 * Returns a positive integer if the erratum has been handled.
 * Returns zero if the erratum does not apply.
 * Returns a negative error code otherwise.
 */
static int pt_blk_handle_erratum_skd022(struct pt_block_decoder *decoder,
					struct pt_event *ev)
{
	struct pt_insn_ext iext;
	struct pt_insn insn;
	int errcode;

	if (!decoder || !ev)
		return -pte_internal;

	insn.mode = decoder->mode;
	insn.ip = ev->variant.async_disabled.at;

	errcode = pt_insn_decode(&insn, &iext, decoder->image, &decoder->asid);
	if (errcode < 0)
		return 0;

	switch (iext.iclass) {
	default:
		/* The erratum does not apply. */
		return 0;

	case PTI_INST_VMLAUNCH:
	case PTI_INST_VMRESUME:
		/* The erratum may apply.  We can't be sure without a lot more
		 * analysis.  Let's assume it does.
		 *
		 * We turn the async disable into a sync disable.  Our caller
		 * will restart event processing.
		 */
		ev->type = ptev_disabled;
		ev->variant.disabled.ip = ev->variant.async_disabled.ip;

		return 1;
	}
}

/* Postpone proceeding past @insn/@iext and indicate a pending event.
 *
 * There may be further events pending on @insn/@iext.  Postpone proceeding past
 * @insn/@iext until we processed all events that bind to it.
 *
 * Returns a non-negative pt_status_flag bit-vector indicating a pending event
 * on success, a negative pt_error_code otherwise.
 */
static int pt_blk_postpone_insn(struct pt_block_decoder *decoder,
				const struct pt_insn *insn,
				const struct pt_insn_ext *iext)
{
	if (!decoder || !insn || !iext)
		return -pte_internal;

	/* Only one can be active. */
	if (decoder->process_insn)
		return -pte_internal;

	decoder->process_insn = 1;
	decoder->insn = *insn;
	decoder->iext = *iext;

	return pt_blk_status(decoder, pts_event_pending);
}

/* Remove any postponed instruction from @decoder.
 *
 * Returns zero on success, a negative pt_error_code otherwise.
 */
static int pt_blk_clear_postponed_insn(struct pt_block_decoder *decoder)
{
	if (!decoder)
		return -pte_internal;

	decoder->process_insn = 0;
	decoder->bound_paging = 0;
	decoder->bound_vmcs = 0;
	decoder->bound_ptwrite = 0;

	return 0;
}

/* Proceed past a postponed instruction.
 *
 * If an instruction has been postponed in @decoder, proceed past it.
 *
 * Returns zero on success, a negative pt_error_code otherwise.
 */
static int pt_blk_proceed_postponed_insn(struct pt_block_decoder *decoder)
{
	int status;

	if (!decoder)
		return -pte_internal;

	/* There's nothing to do if we have no postponed instruction. */
	if (!decoder->process_insn)
		return 0;

	/* There's nothing to do if tracing got disabled. */
	if (!decoder->enabled)
		return pt_blk_clear_postponed_insn(decoder);

	status = pt_insn_next_ip(&decoder->ip, &decoder->insn, &decoder->iext);
	if (status < 0) {
		if (status != -pte_bad_query)
			return status;

		status = pt_blk_proceed_with_trace(decoder, &decoder->insn,
						   &decoder->iext);
		if (status < 0)
			return status;
	}

	return pt_blk_clear_postponed_insn(decoder);
}

/* Proceed to the next event.
 *
 * We have an event pending.  Proceed to the event location and indicate the
 * event to the user.
 *
 * On our way to the event location we may also be forced to postpone the event
 * to the next block, e.g. if we overflow the number of instructions in the
 * block or if we need trace in order to reach the event location.
 *
 * If we're not able to reach the event location, we return zero.  This is what
 * pt_blk_status() would return since:
 *
 *   - we suppress pts_eos as long as we're processing events
 *   - we do not set pts_ip_suppressed since tracing must be enabled
 *
 * Returns a non-negative pt_status_flag bit-vector on success, a negative error
 * code otherwise.
 */
static int pt_blk_proceed_event(struct pt_block_decoder *decoder,
				struct pt_block *block)
{
	struct pt_insn_ext iext;
	struct pt_insn insn;
	struct pt_event *ev;
	int status;

	if (!decoder || !decoder->process_event || !block)
		return -pte_internal;

	ev = &decoder->event;
	switch (ev->type) {
	case ptev_enabled:
		break;

	case ptev_disabled:
		status = pt_blk_proceed_to_disabled(decoder, block, &insn,
						    &iext, ev);
		if (status <= 0) {
			/* A synchronous disable event also binds to the next
			 * indirect or conditional branch, i.e. to any branch
			 * that would have required trace.
			 */
			if (status != -pte_bad_query)
				return status;

			status = pt_blk_set_disable_resume_ip(decoder, &insn);
			if (status < 0)
				return status;
		}

		break;

	case ptev_async_disabled:
		status = pt_blk_proceed_to_ip(decoder, block, &insn, &iext,
					      ev->variant.async_disabled.at);
		if (status <= 0)
			return status;

		if (decoder->query.config.errata.skd022) {
			status = pt_blk_handle_erratum_skd022(decoder, ev);
			if (status != 0) {
				if (status < 0)
					return status;

				/* If the erratum hits, we modify the event.
				 * Try again.
				 */
				return pt_blk_proceed_event(decoder, block);
			}
		}

		break;

	case ptev_async_branch:
		status = pt_blk_proceed_to_ip(decoder, block, &insn, &iext,
					      ev->variant.async_branch.from);
		if (status <= 0)
			return status;

		break;

	case ptev_paging:
		if (!decoder->enabled)
			break;

		status = pt_blk_proceed_to_insn(decoder, block, &insn, &iext,
						pt_insn_binds_to_pip);
		if (status <= 0)
			return status;

		/* We bound a paging event.  Make sure we do not bind further
		 * paging events to this instruction.
		 */
		decoder->bound_paging = 1;

		return pt_blk_postpone_insn(decoder, &insn, &iext);

	case ptev_async_paging:
		status = pt_blk_proceed_to_async_paging(decoder, block, ev);
		if (status <= 0)
			return status;

		break;

	case ptev_vmcs:
		if (!decoder->enabled)
			break;

		status = pt_blk_proceed_to_insn(decoder, block, &insn, &iext,
						pt_insn_binds_to_vmcs);
		if (status <= 0)
			return status;

		/* We bound a vmcs event.  Make sure we do not bind further vmcs
		 * events to this instruction.
		 */
		decoder->bound_vmcs = 1;

		return pt_blk_postpone_insn(decoder, &insn, &iext);

	case ptev_async_vmcs:
		status = pt_blk_proceed_to_async_vmcs(decoder, block, ev);
		if (status <= 0)
			return status;

		break;

	case ptev_overflow:
		break;

	case ptev_exec_mode:
		status = pt_blk_proceed_to_exec_mode(decoder, block, ev);
		if (status <= 0)
			return status;

		break;

	case ptev_tsx:
		if (ev->ip_suppressed)
			break;

		status = pt_blk_proceed_to_ip(decoder, block, &insn, &iext,
					      ev->variant.tsx.ip);
		if (status <= 0)
			return status;

		break;

	case ptev_stop:
		break;

	case ptev_exstop:
		if (!decoder->enabled || ev->ip_suppressed)
			break;

		status = pt_blk_proceed_to_ip(decoder, block, &insn, &iext,
					      ev->variant.exstop.ip);
		if (status <= 0)
			return status;

		break;

	case ptev_mwait:
		if (!decoder->enabled || ev->ip_suppressed)
			break;

		status = pt_blk_proceed_to_ip(decoder, block, &insn, &iext,
					      ev->variant.mwait.ip);
		if (status <= 0)
			return status;

		break;

	case ptev_pwre:
	case ptev_pwrx:
		break;

	case ptev_ptwrite:
		if (!decoder->enabled)
			break;

		status = pt_blk_proceed_to_ptwrite(decoder, block, &insn,
						   &iext, ev);
		if (status <= 0)
			return status;

		/* We bound a ptwrite event.  Make sure we do not bind further
		 * ptwrite events to this instruction.
		 */
		decoder->bound_ptwrite = 1;

		return pt_blk_postpone_insn(decoder, &insn, &iext);

	case ptev_tick:
	case ptev_cbr:
	case ptev_mnt:
		break;
	}

	return pt_blk_status(decoder, pts_event_pending);
}

/* Proceed to the next decision point without using the block cache.
 *
 * Tracing is enabled and we don't have an event pending.  Proceed as far as
 * we get without trace.  Stop when we either:
 *
 *   - need trace in order to continue
 *   - overflow the max number of instructions in a block
 *
 * We actually proceed one instruction further to get the start IP for the next
 * block.  This only updates @decoder's internal state, though.
 *
 * Returns zero on success, a negative error code otherwise.
 */
static int pt_blk_proceed_no_event_uncached(struct pt_block_decoder *decoder,
					    struct pt_block *block)
{
	struct pt_insn_ext iext;
	struct pt_insn insn;
	int status;

	if (!decoder || !block)
		return -pte_internal;

	/* This is overly conservative, really.  We shouldn't get a bad-query
	 * status unless we decoded at least one instruction successfully.
	 */
	memset(&insn, 0, sizeof(insn));
	memset(&iext, 0, sizeof(iext));

	/* Proceed as far as we get without trace. */
	status = pt_blk_proceed_to_insn(decoder, block, &insn, &iext,
					pt_insn_false);
	if (status < 0) {
		if (status != -pte_bad_query)
			return status;

		return pt_blk_proceed_with_trace(decoder, &insn, &iext);
	}

	return 0;
}

/* Check if @ip is contained in @section loaded at @laddr.
 *
 * Returns non-zero if it is.
 * Returns zero if it isn't or of @section is NULL.
 */
static inline int pt_blk_is_in_section(const struct pt_mapped_section *msec,
				       uint64_t ip)
{
	uint64_t begin, end;

	begin = pt_msec_begin(msec);
	end = pt_msec_end(msec);

	return (begin <= ip && ip < end);
}

/* Insert a trampoline block cache entry.
 *
 * Add a trampoline block cache entry at @ip to continue at @nip, where @nip
 * must be the next instruction after @ip.
 *
 * Both @ip and @nip must be section-relative
 *
 * Returns zero on success, a negative error code otherwise.
 */
static inline int pt_blk_add_trampoline(struct pt_block_cache *bcache,
					uint64_t ip, uint64_t nip,
					enum pt_exec_mode mode)
{
	struct pt_bcache_entry bce;
	int64_t disp;

	/* The displacement from @ip to @nip for the trampoline. */
	disp = (int64_t) (nip - ip);

	memset(&bce, 0, sizeof(bce));
	bce.displacement = (int32_t) disp;
	bce.ninsn = 1;
	bce.mode = mode;
	bce.qualifier = ptbq_again;

	/* If we can't reach @nip without overflowing the displacement field, we
	 * have to stop and re-decode the instruction at @ip.
	 */
	if ((int64_t) bce.displacement != disp) {

		memset(&bce, 0, sizeof(bce));
		bce.ninsn = 1;
		bce.mode = mode;
		bce.qualifier = ptbq_decode;
	}

	return pt_bcache_add(bcache, ip, bce);
}

/* Insert a decode block cache entry.
 *
 * Add a decode block cache entry at @ioff.
 *
 * Returns zero on success, a negative error code otherwise.
 */
static inline int pt_blk_add_decode(struct pt_block_cache *bcache,
				    uint64_t ioff, enum pt_exec_mode mode)
{
	struct pt_bcache_entry bce;

	memset(&bce, 0, sizeof(bce));
	bce.ninsn = 1;
	bce.mode = mode;
	bce.qualifier = ptbq_decode;

	return pt_bcache_add(bcache, ioff, bce);
}

enum {
	/* The maximum number of steps when filling the block cache. */
	bcache_fill_steps	= 0x400
};

/* Proceed to the next instruction and fill the block cache for @decoder->ip.
 *
 * Tracing is enabled and we don't have an event pending.  The current IP is not
 * yet cached.
 *
 * Proceed one instruction without using the block cache, then try to proceed
 * further using the block cache.
 *
 * On our way back, add a block cache entry for the IP before proceeding.  Note
 * that the recursion is bounded by @steps and ultimately by the maximum number
 * of instructions in a block.
 *
 * Returns zero on success, a negative error code otherwise.
 */
static int
pt_blk_proceed_no_event_fill_cache(struct pt_block_decoder *decoder,
				   struct pt_block *block,
				   struct pt_block_cache *bcache,
				   const struct pt_mapped_section *msec,
				   size_t steps)
{
	struct pt_bcache_entry bce;
	struct pt_insn_ext iext;
	struct pt_insn insn;
	uint64_t nip, dip, ioff, noff;
	int64_t disp;
	int status;

	if (!decoder || !steps)
		return -pte_internal;

	/* Proceed one instruction by decoding and examining it.
	 *
	 * Note that we also return on a status of zero that indicates that the
	 * instruction didn't fit into @block.
	 */
	status = pt_blk_proceed_one_insn(decoder, block, &insn, &iext);
	if (status <= 0)
		return status;

	ioff = pt_msec_unmap(msec, insn.ip);

	/* Let's see if we can proceed to the next IP without trace.
	 *
	 * If we can't, this is certainly a decision point.
	 */
	status = pt_insn_next_ip(&decoder->ip, &insn, &iext);
	if (status < 0) {
		if (status != -pte_bad_query)
			return status;

		memset(&bce, 0, sizeof(bce));
		bce.ninsn = 1;
		bce.mode = insn.mode;
		bce.isize = insn.size;

		/* Clear the instruction size in case of overflows. */
		if ((uint8_t) bce.isize != insn.size)
			bce.isize = 0;

		switch (insn.iclass) {
		case ptic_ptwrite:
		case ptic_error:
		case ptic_other:
			return -pte_internal;

		case ptic_jump:
			/* A direct jump doesn't require trace. */
			if (iext.variant.branch.is_direct)
				return -pte_internal;

			bce.qualifier = ptbq_indirect;
			break;

		case ptic_call:
			/* A direct call doesn't require trace. */
			if (iext.variant.branch.is_direct)
				return -pte_internal;

			bce.qualifier = ptbq_ind_call;
			break;

		case ptic_return:
			bce.qualifier = ptbq_return;
			break;

		case ptic_cond_jump:
			bce.qualifier = ptbq_cond;
			break;

		case ptic_far_call:
		case ptic_far_return:
		case ptic_far_jump:
			bce.qualifier = ptbq_indirect;
			break;
		}

		/* If the block was truncated, we have to decode its last
		 * instruction each time.
		 *
		 * We could have skipped the above switch and size assignment in
		 * this case but this is already a slow and hopefully infrequent
		 * path.
		 */
		if (block->truncated)
			bce.qualifier = ptbq_decode;

		status = pt_bcache_add(bcache, ioff, bce);
		if (status < 0)
			return status;

		return pt_blk_proceed_with_trace(decoder, &insn, &iext);
	}

	/* The next instruction's IP. */
	nip = decoder->ip;
	noff = pt_msec_unmap(msec, nip);

	/* Even if we were able to proceed without trace, we might have to stop
	 * here for various reasons:
	 *
	 *   - at near direct calls to update the return-address stack
	 *
	 *     We are forced to re-decode @insn to get the branch displacement.
	 *
	 *     Even though it is constant, we don't cache it to avoid increasing
	 *     the size of a cache entry.  Note that the displacement field is
	 *     zero for this entry and we might be tempted to use it - but other
	 *     entries that point to this decision point will have non-zero
	 *     displacement.
	 *
	 *     We could proceed after a near direct call but we migh as well
	 *     postpone it to the next iteration.  Make sure to end the block if
	 *     @decoder->flags.variant.block.end_on_call is set, though.
	 *
	 *   - at near direct backwards jumps to detect section splits
	 *
	 *     In case the current section is split underneath us, we must take
	 *     care to detect that split.
	 *
	 *     There is one corner case where the split is in the middle of a
	 *     linear sequence of instructions that branches back into the
	 *     originating section.
	 *
	 *     Calls, indirect branches, and far branches are already covered
	 *     since they either require trace or already require us to stop
	 *     (i.e. near direct calls) for other reasons.  That leaves near
	 *     direct backward jumps.
	 *
	 *     Instead of the decode stop at the jump instruction we're using we
	 *     could have made sure that other block cache entries that extend
	 *     this one insert a trampoline to the jump's entry.  This would
	 *     have been a bit more complicated.
	 *
	 *   - if we switched sections
	 *
	 *     This ends a block just like a branch that requires trace.
	 *
	 *     We need to re-decode @insn in order to determine the start IP of
	 *     the next block.
	 *
	 *   - if the block is truncated
	 *
	 *     We need to read the last instruction's memory from multiple
	 *     sections and provide it to the user.
	 *
	 *     We could still use the block cache but then we'd have to handle
	 *     this case for each qualifier.  Truncation is hopefully rare and
	 *     having to read the memory for the instruction from multiple
	 *     sections is already slow.  Let's rather keep things simple and
	 *     route it through the decode flow, where we already have
	 *     everything in place.
	 */
	switch (insn.iclass) {
	case ptic_call:
		return pt_blk_add_decode(bcache, ioff, insn.mode);

	case ptic_jump:
		/* An indirect branch requires trace and should have been
		 * handled above.
		 */
		if (!iext.variant.branch.is_direct)
			return -pte_internal;

		if (iext.variant.branch.displacement < 0 ||
		    decoder->flags.variant.block.end_on_jump)
			return pt_blk_add_decode(bcache, ioff, insn.mode);

		fallthrough;
	default:
		if (!pt_blk_is_in_section(msec, nip) || block->truncated)
			return pt_blk_add_decode(bcache, ioff, insn.mode);

		break;
	}

	/* We proceeded one instruction.  Let's see if we have a cache entry for
	 * the next instruction.
	 */
	status = pt_bcache_lookup(&bce, bcache, noff);
	if (status < 0)
		return status;

	/* If we don't have a valid cache entry, yet, fill the cache some more.
	 *
	 * On our way back, we add a cache entry for this instruction based on
	 * the cache entry of the succeeding instruction.
	 */
	if (!pt_bce_is_valid(bce)) {
		/* If we exceeded the maximum number of allowed steps, we insert
		 * a trampoline to the next instruction.
		 *
		 * The next time we encounter the same code, we will use the
		 * trampoline to jump directly to where we left off this time
		 * and continue from there.
		 */
		steps -= 1;
		if (!steps)
			return pt_blk_add_trampoline(bcache, ioff, noff,
						     insn.mode);

		status = pt_blk_proceed_no_event_fill_cache(decoder, block,
							    bcache, msec,
							    steps);
		if (status < 0)
			return status;

		/* Let's see if we have more luck this time. */
		status = pt_bcache_lookup(&bce, bcache, noff);
		if (status < 0)
			return status;

		/* If we still don't have a valid cache entry, we're done.  Most
		 * likely, @block overflowed and we couldn't proceed past the
		 * next instruction.
		 */
		if (!pt_bce_is_valid(bce))
			return 0;
	}

	/* We must not have switched execution modes.
	 *
	 * This would require an event and we're on the no-event flow.
	 */
	if (pt_bce_exec_mode(bce) != insn.mode)
		return -pte_internal;

	/* The decision point IP and the displacement from @insn.ip. */
	dip = nip + (uint64_t) (int64_t) bce.displacement;
	disp = (int64_t) (dip - insn.ip);

	/* We may have switched sections if the section was split.  See
	 * pt_blk_proceed_no_event_cached() for a more elaborate comment.
	 *
	 * We're not adding a block cache entry since this won't apply to the
	 * original section which may be shared with other decoders.
	 *
	 * We will instead take the slow path until the end of the section.
	 */
	if (!pt_blk_is_in_section(msec, dip))
		return 0;

	/* Let's try to reach @nip's decision point from @insn.ip.
	 *
	 * There are two fields that may overflow: @bce.ninsn and
	 * @bce.displacement.
	 */
	bce.ninsn += 1;
	bce.displacement = (int32_t) disp;

	/* If none of them overflowed, we're done.
	 *
	 * If one or both overflowed, let's try to insert a trampoline, i.e. we
	 * try to reach @dip via a ptbq_again entry to @nip.
	 */
	if (!bce.ninsn || ((int64_t) bce.displacement != disp))
		return pt_blk_add_trampoline(bcache, ioff, noff, insn.mode);

	/* We're done.  Add the cache entry.
	 *
	 * There's a chance that other decoders updated the cache entry in the
	 * meantime.  They should have come to the same conclusion as we,
	 * though, and the cache entries should be identical.
	 *
	 * Cache updates are atomic so even if the two versions were not
	 * identical, we wouldn't care because they are both correct.
	 */
	return pt_bcache_add(bcache, ioff, bce);
}

/* Proceed at a potentially truncated instruction.
 *
 * We were not able to decode the instruction at @decoder->ip in @decoder's
 * cached section.  This is typically caused by not having enough bytes.
 *
 * Try to decode the instruction again using the entire image.  If this succeeds
 * we expect to end up with an instruction that was truncated in the section it
 * started.  We provide the full instruction in this case and end the block.
 *
 * Returns zero on success, a negative error code otherwise.
 */
static int pt_blk_proceed_truncated(struct pt_block_decoder *decoder,
				    struct pt_block *block)
{
	struct pt_insn_ext iext;
	struct pt_insn insn;
	int errcode;

	if (!decoder || !block)
		return -pte_internal;

	memset(&iext, 0, sizeof(iext));
	memset(&insn, 0, sizeof(insn));

	insn.mode = decoder->mode;
	insn.ip = decoder->ip;

	errcode = pt_insn_decode(&insn, &iext, decoder->image, &decoder->asid);
	if (errcode < 0)
		return errcode;

	/* We shouldn't use this function if the instruction isn't truncated. */
	if (!insn.truncated)
		return -pte_internal;

	/* Provide the instruction in the block.  This ends the block. */
	memcpy(block->raw, insn.raw, insn.size);
	block->iclass = insn.iclass;
	block->size = insn.size;
	block->truncated = 1;

	/* Log calls' return addresses for return compression. */
	errcode = pt_blk_log_call(decoder, &insn, &iext);
	if (errcode < 0)
		return errcode;

	/* Let's see if we can proceed to the next IP without trace.
	 *
	 * The truncated instruction ends the block but we still need to get the
	 * next block's start IP.
	 */
	errcode = pt_insn_next_ip(&decoder->ip, &insn, &iext);
	if (errcode < 0) {
		if (errcode != -pte_bad_query)
			return errcode;

		return pt_blk_proceed_with_trace(decoder, &insn, &iext);
	}

	return 0;
}

/* Proceed to the next decision point using the block cache.
 *
 * Tracing is enabled and we don't have an event pending.  We already set
 * @block's isid.  All reads are done within @msec as we're not switching
 * sections between blocks.
 *
 * Proceed as far as we get without trace.  Stop when we either:
 *
 *   - need trace in order to continue
 *   - overflow the max number of instructions in a block
 *
 * We actually proceed one instruction further to get the start IP for the next
 * block.  This only updates @decoder's internal state, though.
 *
 * Returns zero on success, a negative error code otherwise.
 */
static int pt_blk_proceed_no_event_cached(struct pt_block_decoder *decoder,
					  struct pt_block *block,
					  struct pt_block_cache *bcache,
					  const struct pt_mapped_section *msec)
{
	struct pt_bcache_entry bce;
	uint16_t binsn, ninsn;
	uint64_t offset, nip;
	int status;

	if (!decoder || !block)
		return -pte_internal;

	offset = pt_msec_unmap(msec, decoder->ip);
	status = pt_bcache_lookup(&bce, bcache, offset);
	if (status < 0)
		return status;

	/* If we don't find a valid cache entry, fill the cache. */
	if (!pt_bce_is_valid(bce))
		return pt_blk_proceed_no_event_fill_cache(decoder, block,
							  bcache, msec,
							  bcache_fill_steps);

	/* If we switched sections, the origianl section must have been split
	 * underneath us.  A split preserves the block cache of the original
	 * section.
	 *
	 * Crossing sections requires ending the block so we can indicate the
	 * proper isid for the entire block.
	 *
	 * Plus there's the chance that the new section that caused the original
	 * section to split changed instructions.
	 *
	 * This check will also cover changes to a linear sequence of code we
	 * would otherwise have jumped over as long as the start and end are in
	 * different sub-sections.
	 *
	 * Since we stop on every (backwards) branch (through an artificial stop
	 * in the case of a near direct backward branch) we will detect all
	 * section splits.
	 *
	 * Switch to the slow path until we reach the end of this section.
	 */
	nip = decoder->ip + (uint64_t) (int64_t) bce.displacement;
	if (!pt_blk_is_in_section(msec, nip))
		return pt_blk_proceed_no_event_uncached(decoder, block);

	/* We have a valid cache entry.  Let's first check if the way to the
	 * decision point still fits into @block.
	 *
	 * If it doesn't, we end the block without filling it as much as we
	 * could since this would require us to switch to the slow path.
	 *
	 * On the next iteration, we will start with an empty block, which is
	 * guaranteed to have enough room for at least one block cache entry.
	 */
	binsn = block->ninsn;
	ninsn = binsn + (uint16_t) bce.ninsn;
	if (ninsn < binsn)
		return 0;

	/* Jump ahead to the decision point and proceed from there.
	 *
	 * We're not switching execution modes so even if @block already has an
	 * execution mode, it will be the one we're going to set.
	 */
	decoder->ip = nip;

	/* We don't know the instruction class so we should be setting it to
	 * ptic_error.  Since we will be able to fill it back in later in most
	 * cases, we move the clearing to the switch cases that don't.
	 */
	block->end_ip = nip;
	block->ninsn = ninsn;
	block->mode = pt_bce_exec_mode(bce);


	switch (pt_bce_qualifier(bce)) {
	case ptbq_again:
		/* We're not able to reach the actual decision point due to
		 * overflows so we inserted a trampoline.
		 *
		 * We don't know the instruction and it is not guaranteed that
		 * we will proceed further (e.g. if @block overflowed).  Let's
		 * clear any previously stored instruction class which has
		 * become invalid when we updated @block->ninsn.
		 */
		block->iclass = ptic_error;

		return pt_blk_proceed_no_event_cached(decoder, block, bcache,
						      msec);

	case ptbq_cond:
		/* We're at a conditional branch. */
		block->iclass = ptic_cond_jump;

		/* Let's first check whether we know the size of the
		 * instruction.  If we do, we might get away without decoding
		 * the instruction.
		 *
		 * If we don't know the size we might as well do the full decode
		 * and proceed-with-trace flow we do for ptbq_decode.
		 */
		if (bce.isize) {
			uint64_t ip;
			int taken;

			/* If the branch is not taken, we don't need to decode
			 * the instruction at @decoder->ip.
			 *
			 * If it is taken, we have to implement everything here.
			 * We can't use the normal decode and proceed-with-trace
			 * flow since we already consumed the TNT bit.
			 */
			status = pt_blk_cond_branch(decoder, &taken);
			if (status < 0)
				return status;

			/* Preserve the query decoder's response which indicates
			 * upcoming events.
			 */
			decoder->status = status;

			ip = decoder->ip;
			if (taken) {
				struct pt_insn_ext iext;
				struct pt_insn insn;

				memset(&iext, 0, sizeof(iext));
				memset(&insn, 0, sizeof(insn));

				insn.mode = pt_bce_exec_mode(bce);
				insn.ip = ip;

				status = pt_blk_decode_in_section(&insn, &iext,
								  msec);
				if (status < 0)
					return status;

				ip += (uint64_t) (int64_t)
					iext.variant.branch.displacement;
			}

			decoder->ip = ip + bce.isize;
			break;
		}

		fallthrough;
	case ptbq_decode: {
		struct pt_insn_ext iext;
		struct pt_insn insn;

		/* We need to decode the instruction at @decoder->ip and decide
		 * what to do based on that.
		 *
		 * We already accounted for the instruction so we can't just
		 * call pt_blk_proceed_one_insn().
		 */

		memset(&iext, 0, sizeof(iext));
		memset(&insn, 0, sizeof(insn));

		insn.mode = pt_bce_exec_mode(bce);
		insn.ip = decoder->ip;

		status = pt_blk_decode_in_section(&insn, &iext, msec);
		if (status < 0) {
			if (status != -pte_bad_insn)
				return status;

			return pt_blk_proceed_truncated(decoder, block);
		}

		/* We just decoded @insn so we know the instruction class. */
		block->iclass = insn.iclass;

		/* Log calls' return addresses for return compression. */
		status = pt_blk_log_call(decoder, &insn, &iext);
		if (status < 0)
			return status;

		/* Let's see if we can proceed to the next IP without trace.
		 *
		 * Note that we also stop due to displacement overflows or to
		 * maintain the return-address stack for near direct calls.
		 */
		status = pt_insn_next_ip(&decoder->ip, &insn, &iext);
		if (status < 0) {
			if (status != -pte_bad_query)
				return status;

			/* We can't, so let's proceed with trace, which
			 * completes the block.
			 */
			return pt_blk_proceed_with_trace(decoder, &insn, &iext);
		}

		/* End the block if the user asked us to.
		 *
		 * We only need to take care about direct near branches.
		 * Indirect and far branches require trace and will naturally
		 * end a block.
		 */
		if ((decoder->flags.variant.block.end_on_call &&
		     (insn.iclass == ptic_call)) ||
		    (decoder->flags.variant.block.end_on_jump &&
		     (insn.iclass == ptic_jump)))
			break;

		/* If we can proceed without trace and we stay in @msec we may
		 * proceed further.
		 *
		 * We're done if we switch sections, though.
		 */
		if (!pt_blk_is_in_section(msec, decoder->ip))
			break;

		return pt_blk_proceed_no_event_cached(decoder, block, bcache,
						      msec);
	}

	case ptbq_ind_call: {
		uint64_t ip;

		/* We're at a near indirect call. */
		block->iclass = ptic_call;

		/* We need to update the return-address stack and query the
		 * destination IP.
		 */
		ip = decoder->ip;

		/* If we already know the size of the instruction, we don't need
		 * to re-decode it.
		 */
		if (bce.isize)
			ip += bce.isize;
		else {
			struct pt_insn_ext iext;
			struct pt_insn insn;

			memset(&iext, 0, sizeof(iext));
			memset(&insn, 0, sizeof(insn));

			insn.mode = pt_bce_exec_mode(bce);
			insn.ip = ip;

			status = pt_blk_decode_in_section(&insn, &iext, msec);
			if (status < 0)
				return status;

			ip += insn.size;
		}

		status = pt_retstack_push(&decoder->retstack, ip);
		if (status < 0)
			return status;

		status = pt_blk_indirect_branch(decoder, &decoder->ip);
		if (status < 0)
			return status;

		/* Preserve the query decoder's response which indicates
		 * upcoming events.
		 */
		decoder->status = status;
		break;
	}

	case ptbq_return: {
		int taken;

		/* We're at a near return. */
		block->iclass = ptic_return;

		/* Check for a compressed return. */
		status = pt_blk_cond_branch(decoder, &taken);
		if (status < 0) {
			if (status != -pte_bad_query)
				return status;

			/* The return is not compressed.  We need another query
			 * to determine the destination IP.
			 */
			status = pt_blk_indirect_branch(decoder, &decoder->ip);
			if (status < 0)
				return status;

			/* Preserve the query decoder's response which indicates
			 * upcoming events.
			 */
			decoder->status = status;
			break;
		}

		/* Preserve the query decoder's response which indicates
		 * upcoming events.
		 */
		decoder->status = status;

		/* A compressed return is indicated by a taken conditional
		 * branch.
		 */
		if (!taken)
			return -pte_bad_retcomp;

		return pt_retstack_pop(&decoder->retstack, &decoder->ip);
	}

	case ptbq_indirect:
		/* We're at an indirect jump or far transfer.
		 *
		 * We don't know the exact instruction class and there's no
		 * reason to decode the instruction for any other purpose.
		 *
		 * Indicate that we don't know the instruction class and leave
		 * it to our caller to decode the instruction if needed.
		 */
		block->iclass = ptic_error;

		/* This is neither a near call nor return so we don't need to
		 * touch the return-address stack.
		 *
		 * Just query the destination IP.
		 */
		status = pt_blk_indirect_branch(decoder, &decoder->ip);
		if (status < 0)
			return status;

		/* Preserve the query decoder's response which indicates
		 * upcoming events.
		 */
		decoder->status = status;
		break;
	}

	return 0;
}

static int pt_blk_msec_fill(struct pt_block_decoder *decoder,
			    const struct pt_mapped_section **pmsec)
{
	const struct pt_mapped_section *msec;
	struct pt_section *section;
	int isid, errcode;

	if (!decoder || !pmsec)
		return -pte_internal;

	isid = pt_msec_cache_fill(&decoder->scache, &msec,  decoder->image,
				  &decoder->asid, decoder->ip);
	if (isid < 0)
		return isid;

	section = pt_msec_section(msec);
	if (!section)
		return -pte_internal;

	*pmsec = msec;

	errcode = pt_section_request_bcache(section);
	if (errcode < 0)
		return errcode;

	return isid;
}

static inline int pt_blk_msec_lookup(struct pt_block_decoder *decoder,
				     const struct pt_mapped_section **pmsec)
{
	int isid;

	if (!decoder)
		return -pte_internal;

	isid = pt_msec_cache_read(&decoder->scache, pmsec, decoder->image,
				  decoder->ip);
	if (isid < 0) {
		if (isid != -pte_nomap)
			return isid;

		return pt_blk_msec_fill(decoder, pmsec);
	}

	return isid;
}

/* Proceed to the next decision point - try using the cache.
 *
 * Tracing is enabled and we don't have an event pending.  Proceed as far as
 * we get without trace.  Stop when we either:
 *
 *   - need trace in order to continue
 *   - overflow the max number of instructions in a block
 *
 * We actually proceed one instruction further to get the start IP for the next
 * block.  This only updates @decoder's internal state, though.
 *
 * Returns zero on success, a negative error code otherwise.
 */
static int pt_blk_proceed_no_event(struct pt_block_decoder *decoder,
				   struct pt_block *block)
{
	const struct pt_mapped_section *msec;
	struct pt_block_cache *bcache;
	struct pt_section *section;
	int isid;

	if (!decoder || !block)
		return -pte_internal;

	isid = pt_blk_msec_lookup(decoder, &msec);
	if (isid < 0) {
		if (isid != -pte_nomap)
			return isid;

		/* Even if there is no such section in the image, we may still
		 * read the memory via the callback function.
		 */
		return pt_blk_proceed_no_event_uncached(decoder, block);
	}

	/* We do not switch sections inside a block. */
	if (isid != block->isid) {
		if (!pt_blk_block_is_empty(block))
			return 0;

		block->isid = isid;
	}

	section = pt_msec_section(msec);
	if (!section)
		return -pte_internal;

	bcache = pt_section_bcache(section);
	if (!bcache)
		return pt_blk_proceed_no_event_uncached(decoder, block);

	return pt_blk_proceed_no_event_cached(decoder, block, bcache, msec);
}

/* Proceed to the next event or decision point.
 *
 * Returns a non-negative pt_status_flag bit-vector on success, a negative error
 * code otherwise.
 */
static int pt_blk_proceed(struct pt_block_decoder *decoder,
			  struct pt_block *block)
{
	int status;

	status = pt_blk_fetch_event(decoder);
	if (status != 0) {
		if (status < 0)
			return status;

		return pt_blk_proceed_event(decoder, block);
	}

	/* If tracing is disabled we should either be out of trace or we should
	 * have taken the event flow above.
	 */
	if (!decoder->enabled) {
		if (decoder->status & pts_eos)
			return -pte_eos;

		return -pte_no_enable;
	}

	status = pt_blk_proceed_no_event(decoder, block);
	if (status < 0)
		return status;

	return pt_blk_proceed_trailing_event(decoder, block);
}

enum {
	/* The maximum number of steps to take when determining whether the
	 * event location can be reached.
	 */
	bdm64_max_steps	= 0x100
};

/* Try to work around erratum BDM64.
 *
 * If we got a transaction abort immediately following a branch that produced
 * trace, the trace for that branch might have been corrupted.
 *
 * Returns a positive integer if the erratum was handled.
 * Returns zero if the erratum does not seem to apply.
 * Returns a negative error code otherwise.
 */
static int pt_blk_handle_erratum_bdm64(struct pt_block_decoder *decoder,
				       const struct pt_block *block,
				       const struct pt_event *ev)
{
	struct pt_insn_ext iext;
	struct pt_insn insn;
	int status;

	if (!decoder || !block || !ev)
		return -pte_internal;

	/* This only affects aborts. */
	if (!ev->variant.tsx.aborted)
		return 0;

	/* This only affects branches that require trace.
	 *
	 * If the erratum hits, that branch ended the current block and brought
	 * us to the trailing event flow.
	 */
	if (pt_blk_block_is_empty(block))
		return 0;

	insn.mode = block->mode;
	insn.ip = block->end_ip;

	status = pt_insn_decode(&insn, &iext, decoder->image, &decoder->asid);
	if (status < 0)
		return 0;

	if (!pt_insn_is_branch(&insn, &iext))
		return 0;

	/* Let's check if we can reach the event location from here.
	 *
	 * If we can, let's assume the erratum did not hit.  We might still be
	 * wrong but we're not able to tell.
	 */
	status = pt_insn_range_is_contiguous(decoder->ip, ev->variant.tsx.ip,
					     decoder->mode, decoder->image,
					     &decoder->asid, bdm64_max_steps);
	if (status > 0)
		return status;

	/* We can't reach the event location.  This could either mean that we
	 * stopped too early (and status is zero) or that the erratum hit.
	 *
	 * We assume the latter and pretend that the previous branch brought us
	 * to the event location, instead.
	 */
	decoder->ip = ev->variant.tsx.ip;

	return 1;
}

/* Check whether a trailing TSX event should be postponed.
 *
 * This involves handling erratum BDM64.
 *
 * Returns a positive integer if the event is to be postponed.
 * Returns zero if the event should be processed.
 * Returns a negative error code otherwise.
 */
static inline int pt_blk_postpone_trailing_tsx(struct pt_block_decoder *decoder,
					       struct pt_block *block,
					       const struct pt_event *ev)
{
	int status;

	if (!decoder || !ev)
		return -pte_internal;

	if (ev->ip_suppressed)
		return 0;

	if (block && decoder->query.config.errata.bdm64) {
		status = pt_blk_handle_erratum_bdm64(decoder, block, ev);
		if (status < 0)
			return 1;
	}

	if (decoder->ip != ev->variant.tsx.ip)
		return 1;

	return 0;
}

/* Proceed with events that bind to the current decoder IP.
 *
 * This function is used in the following scenarios:
 *
 *   - we just synchronized onto the trace stream
 *   - we ended a block and proceeded to the next IP
 *   - we processed an event that was indicated by this function
 *
 * Check if there is an event at the current IP that needs to be indicated to
 * the user.
 *
 * Returns a non-negative pt_status_flag bit-vector on success, a negative error
 * code otherwise.
 */
static int pt_blk_proceed_trailing_event(struct pt_block_decoder *decoder,
					 struct pt_block *block)
{
	struct pt_event *ev;
	int status;

	if (!decoder)
		return -pte_internal;

	status = pt_blk_fetch_event(decoder);
	if (status <= 0) {
		if (status < 0)
			return status;

		status = pt_blk_proceed_postponed_insn(decoder);
		if (status < 0)
			return status;

		return pt_blk_status(decoder, 0);
	}

	ev = &decoder->event;
	switch (ev->type) {
	case ptev_disabled:
		/* Synchronous disable events are normally indicated on the
		 * event flow.
		 */
		if (!decoder->process_insn)
			break;

		/* A sync disable may bind to a CR3 changing instruction. */
		if (ev->ip_suppressed &&
		    pt_insn_changes_cr3(&decoder->insn, &decoder->iext))
			return pt_blk_status(decoder, pts_event_pending);

		/* Or it binds to the next branch that would require trace.
		 *
		 * Try to complete processing the current instruction by
		 * proceeding past it.  If that fails because it would require
		 * trace, we can apply the disabled event.
		 */
		status = pt_insn_next_ip(&decoder->ip, &decoder->insn,
					 &decoder->iext);
		if (status < 0) {
			if (status != -pte_bad_query)
				return status;

			status = pt_blk_set_disable_resume_ip(decoder,
							      &decoder->insn);
			if (status < 0)
				return status;

			return pt_blk_status(decoder, pts_event_pending);
		}

		/* We proceeded past the current instruction. */
		status = pt_blk_clear_postponed_insn(decoder);
		if (status < 0)
			return status;

		/* This might have brought us to the disable IP. */
		if (!ev->ip_suppressed &&
		    decoder->ip == ev->variant.disabled.ip)
			return pt_blk_status(decoder, pts_event_pending);

		break;

	case ptev_enabled:
		/* This event does not bind to an instruction. */
		status = pt_blk_proceed_postponed_insn(decoder);
		if (status < 0)
			return status;

		return pt_blk_status(decoder, pts_event_pending);

	case ptev_async_disabled:
		/* This event does not bind to an instruction. */
		status = pt_blk_proceed_postponed_insn(decoder);
		if (status < 0)
			return status;

		if (decoder->ip != ev->variant.async_disabled.at)
			break;

		if (decoder->query.config.errata.skd022) {
			status = pt_blk_handle_erratum_skd022(decoder, ev);
			if (status != 0) {
				if (status < 0)
					return status;

				/* If the erratum applies, the event is modified
				 * to a synchronous disable event that will be
				 * processed on the next pt_blk_proceed_event()
				 * call.  We're done.
				 */
				break;
			}
		}

		return pt_blk_status(decoder, pts_event_pending);

	case ptev_async_branch:
		/* This event does not bind to an instruction. */
		status = pt_blk_proceed_postponed_insn(decoder);
		if (status < 0)
			return status;

		if (decoder->ip != ev->variant.async_branch.from)
			break;

		return pt_blk_status(decoder, pts_event_pending);

	case ptev_paging:
		/* We apply the event immediately if we're not tracing. */
		if (!decoder->enabled)
			return pt_blk_status(decoder, pts_event_pending);

		/* Synchronous paging events are normally indicated on the event
		 * flow, unless they bind to the same instruction as a previous
		 * event.
		 *
		 * We bind at most one paging event to an instruction, though.
		 */
		if (!decoder->process_insn || decoder->bound_paging)
			break;

		/* We're done if we're not binding to the currently postponed
		 * instruction.  We will process the event on the normal event
		 * flow in the next iteration.
		 */
		if (!pt_insn_binds_to_pip(&decoder->insn, &decoder->iext))
			break;

		/* We bound a paging event.  Make sure we do not bind further
		 * paging events to this instruction.
		 */
		decoder->bound_paging = 1;

		return pt_blk_status(decoder, pts_event_pending);

	case ptev_async_paging:
		/* This event does not bind to an instruction. */
		status = pt_blk_proceed_postponed_insn(decoder);
		if (status < 0)
			return status;

		if (!ev->ip_suppressed &&
		    decoder->ip != ev->variant.async_paging.ip)
			break;

		return pt_blk_status(decoder, pts_event_pending);

	case ptev_vmcs:
		/* We apply the event immediately if we're not tracing. */
		if (!decoder->enabled)
			return pt_blk_status(decoder, pts_event_pending);

		/* Synchronous vmcs events are normally indicated on the event
		 * flow, unless they bind to the same instruction as a previous
		 * event.
		 *
		 * We bind at most one vmcs event to an instruction, though.
		 */
		if (!decoder->process_insn || decoder->bound_vmcs)
			break;

		/* We're done if we're not binding to the currently postponed
		 * instruction.  We will process the event on the normal event
		 * flow in the next iteration.
		 */
		if (!pt_insn_binds_to_vmcs(&decoder->insn, &decoder->iext))
			break;

		/* We bound a vmcs event.  Make sure we do not bind further vmcs
		 * events to this instruction.
		 */
		decoder->bound_vmcs = 1;

		return pt_blk_status(decoder, pts_event_pending);

	case ptev_async_vmcs:
		/* This event does not bind to an instruction. */
		status = pt_blk_proceed_postponed_insn(decoder);
		if (status < 0)
			return status;

		if (!ev->ip_suppressed &&
		    decoder->ip != ev->variant.async_vmcs.ip)
			break;

		return pt_blk_status(decoder, pts_event_pending);

	case ptev_overflow:
		/* This event does not bind to an instruction. */
		status = pt_blk_proceed_postponed_insn(decoder);
		if (status < 0)
			return status;

		return pt_blk_status(decoder, pts_event_pending);

	case ptev_exec_mode:
		/* This event does not bind to an instruction. */
		status = pt_blk_proceed_postponed_insn(decoder);
		if (status < 0)
			return status;

		if (!ev->ip_suppressed &&
		    decoder->ip != ev->variant.exec_mode.ip)
			break;

		return pt_blk_status(decoder, pts_event_pending);

	case ptev_tsx:
		/* This event does not bind to an instruction. */
		status = pt_blk_proceed_postponed_insn(decoder);
		if (status < 0)
			return status;

		status = pt_blk_postpone_trailing_tsx(decoder, block, ev);
		if (status != 0) {
			if (status < 0)
				return status;

			break;
		}

		return pt_blk_status(decoder, pts_event_pending);

	case ptev_stop:
		/* This event does not bind to an instruction. */
		status = pt_blk_proceed_postponed_insn(decoder);
		if (status < 0)
			return status;

		return pt_blk_status(decoder, pts_event_pending);

	case ptev_exstop:
		/* This event does not bind to an instruction. */
		status = pt_blk_proceed_postponed_insn(decoder);
		if (status < 0)
			return status;

		if (!ev->ip_suppressed && decoder->enabled &&
		    decoder->ip != ev->variant.exstop.ip)
			break;

		return pt_blk_status(decoder, pts_event_pending);

	case ptev_mwait:
		/* This event does not bind to an instruction. */
		status = pt_blk_proceed_postponed_insn(decoder);
		if (status < 0)
			return status;

		if (!ev->ip_suppressed && decoder->enabled &&
		    decoder->ip != ev->variant.mwait.ip)
			break;

		return pt_blk_status(decoder, pts_event_pending);

	case ptev_pwre:
	case ptev_pwrx:
		/* This event does not bind to an instruction. */
		status = pt_blk_proceed_postponed_insn(decoder);
		if (status < 0)
			return status;

		return pt_blk_status(decoder, pts_event_pending);

	case ptev_ptwrite:
		/* We apply the event immediately if we're not tracing. */
		if (!decoder->enabled)
			return pt_blk_status(decoder, pts_event_pending);

		/* Ptwrite events are normally indicated on the event flow,
		 * unless they bind to the same instruction as a previous event.
		 *
		 * We bind at most one ptwrite event to an instruction, though.
		 */
		if (!decoder->process_insn || decoder->bound_ptwrite)
			break;

		/* We're done if we're not binding to the currently postponed
		 * instruction.  We will process the event on the normal event
		 * flow in the next iteration.
		 */
		if (!ev->ip_suppressed ||
		    !pt_insn_is_ptwrite(&decoder->insn, &decoder->iext))
			break;

		/* We bound a ptwrite event.  Make sure we do not bind further
		 * ptwrite events to this instruction.
		 */
		decoder->bound_ptwrite = 1;

		return pt_blk_status(decoder, pts_event_pending);

	case ptev_tick:
	case ptev_cbr:
	case ptev_mnt:
		/* This event does not bind to an instruction. */
		status = pt_blk_proceed_postponed_insn(decoder);
		if (status < 0)
			return status;

		return pt_blk_status(decoder, pts_event_pending);
	}

	/* No further events.  Proceed past any postponed instruction. */
	status = pt_blk_proceed_postponed_insn(decoder);
	if (status < 0)
		return status;

	return pt_blk_status(decoder, 0);
}

int pt_blk_next(struct pt_block_decoder *decoder, struct pt_block *ublock,
		size_t size)
{
	struct pt_block block, *pblock;
	int errcode, status;

	if (!decoder || !ublock)
		return -pte_invalid;

	pblock = size == sizeof(block) ? ublock : &block;

	/* Zero-initialize the block in case of error returns. */
	memset(pblock, 0, sizeof(*pblock));

	/* Fill in a few things from the current decode state.
	 *
	 * This reflects the state of the last pt_blk_next() or pt_blk_start()
	 * call.  Note that, unless we stop with tracing disabled, we proceed
	 * already to the start IP of the next block.
	 *
	 * Some of the state may later be overwritten as we process events.
	 */
	pblock->ip = decoder->ip;
	pblock->mode = decoder->mode;
	if (decoder->speculative)
		pblock->speculative = 1;

	/* Proceed one block. */
	status = pt_blk_proceed(decoder, pblock);

	errcode = block_to_user(ublock, size, pblock);
	if (errcode < 0)
		return errcode;

	return status;
}

/* Process an enabled event.
 *
 * Returns zero on success, a negative error code otherwise.
 */
static int pt_blk_process_enabled(struct pt_block_decoder *decoder,
				  const struct pt_event *ev)
{
	if (!decoder || !ev)
		return -pte_internal;

	/* This event can't be a status update. */
	if (ev->status_update)
		return -pte_bad_context;

	/* We must have an IP in order to start decoding. */
	if (ev->ip_suppressed)
		return -pte_noip;

	/* We must currently be disabled. */
	if (decoder->enabled)
		return -pte_bad_context;

	decoder->ip = ev->variant.enabled.ip;
	decoder->enabled = 1;
	decoder->process_event = 0;

	return 0;
}

/* Process a disabled event.
 *
 * Returns zero on success, a negative error code otherwise.
 */
static int pt_blk_process_disabled(struct pt_block_decoder *decoder,
				   const struct pt_event *ev)
{
	if (!decoder || !ev)
		return -pte_internal;

	/* This event can't be a status update. */
	if (ev->status_update)
		return -pte_bad_context;

	/* We must currently be enabled. */
	if (!decoder->enabled)
		return -pte_bad_context;

	/* We preserve @decoder->ip.  This is where we expect tracing to resume
	 * and we'll indicate that on the subsequent enabled event if tracing
	 * actually does resume from there.
	 */
	decoder->enabled = 0;
	decoder->process_event = 0;

	return 0;
}

/* Process an asynchronous branch event.
 *
 * Returns zero on success, a negative error code otherwise.
 */
static int pt_blk_process_async_branch(struct pt_block_decoder *decoder,
				       const struct pt_event *ev)
{
	if (!decoder || !ev)
		return -pte_internal;

	/* This event can't be a status update. */
	if (ev->status_update)
		return -pte_bad_context;

	/* We must currently be enabled. */
	if (!decoder->enabled)
		return -pte_bad_context;

	/* Jump to the branch destination.  We will continue from there in the
	 * next iteration.
	 */
	decoder->ip = ev->variant.async_branch.to;
	decoder->process_event = 0;

	return 0;
}

/* Process a paging event.
 *
 * Returns zero on success, a negative error code otherwise.
 */
static int pt_blk_process_paging(struct pt_block_decoder *decoder,
				 const struct pt_event *ev)
{
	uint64_t cr3;
	int errcode;

	if (!decoder || !ev)
		return -pte_internal;

	cr3 = ev->variant.paging.cr3;
	if (decoder->asid.cr3 != cr3) {
		errcode = pt_msec_cache_invalidate(&decoder->scache);
		if (errcode < 0)
			return errcode;

		decoder->asid.cr3 = cr3;
	}

	decoder->process_event = 0;

	return 0;
}

/* Process a vmcs event.
 *
 * Returns zero on success, a negative error code otherwise.
 */
static int pt_blk_process_vmcs(struct pt_block_decoder *decoder,
			       const struct pt_event *ev)
{
	uint64_t vmcs;
	int errcode;

	if (!decoder || !ev)
		return -pte_internal;

	vmcs = ev->variant.vmcs.base;
	if (decoder->asid.vmcs != vmcs) {
		errcode = pt_msec_cache_invalidate(&decoder->scache);
		if (errcode < 0)
			return errcode;

		decoder->asid.vmcs = vmcs;
	}

	decoder->process_event = 0;

	return 0;
}

/* Process an overflow event.
 *
 * Returns zero on success, a negative error code otherwise.
 */
static int pt_blk_process_overflow(struct pt_block_decoder *decoder,
				   const struct pt_event *ev)
{
	if (!decoder || !ev)
		return -pte_internal;

	/* This event can't be a status update. */
	if (ev->status_update)
		return -pte_bad_context;

	/* If the IP is suppressed, the overflow resolved while tracing was
	 * disabled.  Otherwise it resolved while tracing was enabled.
	 */
	if (ev->ip_suppressed) {
		/* Tracing is disabled.  It doesn't make sense to preserve the
		 * previous IP.  This will just be misleading.  Even if tracing
		 * had been disabled before, as well, we might have missed the
		 * re-enable in the overflow.
		 */
		decoder->enabled = 0;
		decoder->ip = 0ull;
	} else {
		/* Tracing is enabled and we're at the IP at which the overflow
		 * resolved.
		 */
		decoder->enabled = 1;
		decoder->ip = ev->variant.overflow.ip;
	}

	/* We don't know the TSX state.  Let's assume we execute normally.
	 *
	 * We also don't know the execution mode.  Let's keep what we have
	 * in case we don't get an update before we have to decode the next
	 * instruction.
	 */
	decoder->speculative = 0;
	decoder->process_event = 0;

	return 0;
}

/* Process an exec mode event.
 *
 * Returns zero on success, a negative error code otherwise.
 */
static int pt_blk_process_exec_mode(struct pt_block_decoder *decoder,
				    const struct pt_event *ev)
{
	enum pt_exec_mode mode;

	if (!decoder || !ev)
		return -pte_internal;

	/* Use status update events to diagnose inconsistencies. */
	mode = ev->variant.exec_mode.mode;
	if (ev->status_update && decoder->enabled &&
	    decoder->mode != ptem_unknown && decoder->mode != mode)
		return -pte_bad_status_update;

	decoder->mode = mode;
	decoder->process_event = 0;

	return 0;
}

/* Process a tsx event.
 *
 * Returns zero on success, a negative error code otherwise.
 */
static int pt_blk_process_tsx(struct pt_block_decoder *decoder,
			      const struct pt_event *ev)
{
	if (!decoder || !ev)
		return -pte_internal;

	decoder->speculative = ev->variant.tsx.speculative;
	decoder->process_event = 0;

	return 0;
}

/* Process a stop event.
 *
 * Returns zero on success, a negative error code otherwise.
 */
static int pt_blk_process_stop(struct pt_block_decoder *decoder,
			       const struct pt_event *ev)
{
	if (!decoder || !ev)
		return -pte_internal;

	/* This event can't be a status update. */
	if (ev->status_update)
		return -pte_bad_context;

	/* Tracing is always disabled before it is stopped. */
	if (decoder->enabled)
		return -pte_bad_context;

	decoder->process_event = 0;

	return 0;
}

int pt_blk_event(struct pt_block_decoder *decoder, struct pt_event *uevent,
		 size_t size)
{
	struct pt_event *ev;
	int status;

	if (!decoder || !uevent)
		return -pte_invalid;

	/* We must currently process an event. */
	if (!decoder->process_event)
		return -pte_bad_query;

	ev = &decoder->event;
	switch (ev->type) {
	case ptev_enabled:
		/* Indicate that tracing resumes from the IP at which tracing
		 * had been disabled before (with some special treatment for
		 * calls).
		 */
		if (ev->variant.enabled.ip == decoder->ip)
			ev->variant.enabled.resumed = 1;

		status = pt_blk_process_enabled(decoder, ev);
		if (status < 0)
			return status;

		break;

	case ptev_async_disabled:
		if (decoder->ip != ev->variant.async_disabled.at)
			return -pte_bad_query;

		fallthrough;
	case ptev_disabled:

		status = pt_blk_process_disabled(decoder, ev);
		if (status < 0)
			return status;

		break;

	case ptev_async_branch:
		if (decoder->ip != ev->variant.async_branch.from)
			return -pte_bad_query;

		status = pt_blk_process_async_branch(decoder, ev);
		if (status < 0)
			return status;

		break;

	case ptev_async_paging:
		if (!ev->ip_suppressed &&
		    decoder->ip != ev->variant.async_paging.ip)
			return -pte_bad_query;

		fallthrough;
	case ptev_paging:
		status = pt_blk_process_paging(decoder, ev);
		if (status < 0)
			return status;

		break;

	case ptev_async_vmcs:
		if (!ev->ip_suppressed &&
		    decoder->ip != ev->variant.async_vmcs.ip)
			return -pte_bad_query;

		fallthrough;
	case ptev_vmcs:
		status = pt_blk_process_vmcs(decoder, ev);
		if (status < 0)
			return status;

		break;

	case ptev_overflow:
		status = pt_blk_process_overflow(decoder, ev);
		if (status < 0)
			return status;

		break;

	case ptev_exec_mode:
		if (!ev->ip_suppressed &&
		    decoder->ip != ev->variant.exec_mode.ip)
			return -pte_bad_query;

		status = pt_blk_process_exec_mode(decoder, ev);
		if (status < 0)
			return status;

		break;

	case ptev_tsx:
		if (!ev->ip_suppressed && decoder->ip != ev->variant.tsx.ip)
			return -pte_bad_query;

		status = pt_blk_process_tsx(decoder, ev);
		if (status < 0)
			return status;

		break;

	case ptev_stop:
		status = pt_blk_process_stop(decoder, ev);
		if (status < 0)
			return status;

		break;

	case ptev_exstop:
		if (!ev->ip_suppressed && decoder->enabled &&
		    decoder->ip != ev->variant.exstop.ip)
			return -pte_bad_query;

		decoder->process_event = 0;
		break;

	case ptev_mwait:
		if (!ev->ip_suppressed && decoder->enabled &&
		    decoder->ip != ev->variant.mwait.ip)
			return -pte_bad_query;

		decoder->process_event = 0;
		break;

	case ptev_pwre:
	case ptev_pwrx:
	case ptev_ptwrite:
	case ptev_tick:
	case ptev_cbr:
	case ptev_mnt:
		decoder->process_event = 0;
		break;
	}

	/* Copy the event to the user.  Make sure we're not writing beyond the
	 * memory provided by the user.
	 *
	 * We might truncate details of an event but only for those events the
	 * user can't know about, anyway.
	 */
	if (sizeof(*ev) < size)
		size = sizeof(*ev);

	memcpy(uevent, ev, size);

	/* Indicate further events. */
	return pt_blk_proceed_trailing_event(decoder, NULL);
}