Specialize blit library for ARM (fix #147).

Use multiple load store instructions for 32 byte chunks in ARM-specific
blit-function, analog to x86 variant. Make the blit-function of x86 a
generic one, and provide needed utility functions for ARM and generic code.
Please refer issue #147 for discussion.
This commit is contained in:
Stefan Kalkowski 2012-03-09 23:49:41 +01:00 committed by Norman Feske
parent 724a6e8120
commit 890a3ee868
8 changed files with 192 additions and 59 deletions

5
os/lib/mk/arm/blit.mk Normal file
View File

@ -0,0 +1,5 @@
SRC_CC = blit.cc
REQUIRES = arm 32bit
INC_DIR += $(REP_DIR)/src/lib/blit/arm
vpath blit.cc $(REP_DIR)/src/lib/blit

View File

@ -1,3 +1,4 @@
SRC_CC = blit.cc
SRC_CC = blit.cc
INC_DIR += $(REP_DIR)/src/lib/blit
vpath blit.cc $(REP_DIR)/src/lib/blit

View File

@ -1,5 +1,6 @@
SRC_CC = blit.cc
REQUIRES = x86 32bit
INC_DIR += $(REP_DIR)/src/lib/blit/x86/x86_32
INC_DIR += $(REP_DIR)/src/lib/blit/x86/x86_32 \
$(REP_DIR)/src/lib/blit/x86
vpath blit.cc $(REP_DIR)/src/lib/blit/x86
vpath blit.cc $(REP_DIR)/src/lib/blit

View File

@ -1,5 +1,6 @@
SRC_CC = blit.cc
REQUIRES = x86 64bit
INC_DIR += $(REP_DIR)/src/lib/blit/x86/x86_64
INC_DIR += $(REP_DIR)/src/lib/blit/x86/x86_64 \
$(REP_DIR)/src/lib/blit/x86
vpath blit.cc $(REP_DIR)/src/lib/blit/x86
vpath blit.cc $(REP_DIR)/src/lib/blit

View File

@ -0,0 +1,77 @@
/*
* \brief Blitting utilities for ARM
* \author Stefan Kalkowski
* \date 2012-03-08
*/
/*
* Copyright (C) 2012 Genode Labs GmbH
*
* This file is part of the Genode OS framework, which is distributed
* under the terms of the GNU General Public License version 2.
*/
#ifndef _LIB__BLIT__BLIT_HELPER_H_
#define _LIB__BLIT__BLIT_HELPER_H_
#include <blit/blit.h>
/**
* Copy single 16bit column
*/
static inline void copy_16bit_column(char *src, int src_w,
char *dst, int dst_w, int h)
{
for (; h-- > 0; src += src_w, dst += dst_w)
*(short *)dst = *(short *)src;
}
/**
* Copy pixel block 32bit-wise
*
* \param src source address
* \param dst 32bit-aligned destination address
* \param w number of 32bit words to copy per line
* \param h number of lines to copy
* \param src_w width of source buffer in bytes
* \param dst_w width of destination buffer in bytes
*/
static void copy_block_32bit(char *src, int src_w,
char *dst, int dst_w,
int w, int h)
{
src_w -= w*4;
dst_w -= w*4;
for (; h--; src += src_w, dst += dst_w) {
for (int i = w; i--; src += 4, dst += 4)
*((int *)dst) = *((int *)src);
}
}
/**
* Copy block with a size of multiple of 32 bytes
*
* \param w width in 32 byte chunks to copy per line
* \param h number of lines of copy
*/
static inline void copy_block_32byte(char *src, int src_w,
char *dst, int dst_w,
int w, int h)
{
if (((long)src & 3) || ((long)dst & 3))
copy_block_32bit(src, src_w, dst, dst_w, w*8, h);
else {
src_w -= w*32;
dst_w -= w*32;
for (; h--; src += src_w, dst += dst_w)
for (int i = w; i--;)
asm volatile ("ldmia %0!, {r3 - r10} \n\t"
"stmia %1!, {r3 - r10} \n\t"
: "+r" (src), "+r" (dst)
:: "r3","r4","r5","r6","r7","r8","r9","r10");
}
}
#endif /* _LIB__BLIT__BLIT_HELPER_H_ */

View File

@ -12,7 +12,7 @@
*/
#include <blit/blit.h>
#include <util/string.h>
#include <blit_helper.h>
extern "C" void blit(void *s, unsigned src_w,
@ -21,6 +21,35 @@ extern "C" void blit(void *s, unsigned src_w,
{
char *src = (char *)s, *dst = (char *)d;
for (int i = h; i--; src += src_w, dst += dst_w)
Genode::memcpy(dst, src, w);
if (w <= 0 || h <= 0) return;
/* we support blitting only at a granularity of 16bit */
w &= ~1;
/* copy unaligned column */
if (w && ((long)dst & 2)) {
copy_16bit_column(src, src_w, dst, dst_w, h);
w -= 2; src += 2; dst += 2;
}
/* now, we are on a 32bit aligned destination address */
/* copy 32byte chunks */
if (w >> 5) {
copy_block_32byte(src, src_w, dst, dst_w, w >> 5, h);
src += w & ~31;
dst += w & ~31;
w = w & 31;
}
/* copy 32bit chunks */
if (w >> 2) {
copy_block_32bit(src, src_w, dst, dst_w, w >> 2, h);
src += w & ~3;
dst += w & ~3;
w = w & 3;
}
/* handle trailing row */
if (w >> 1) copy_16bit_column(src, src_w, dst, dst_w, h);
}

View File

@ -0,0 +1,65 @@
/*
* \brief Generic blitting utilities.
* \author Norman Feske
* \date 2007-10-10
*/
/*
* Copyright (C) 2007-2012 Genode Labs GmbH
*
* This file is part of the Genode OS framework, which is distributed
* under the terms of the GNU General Public License version 2.
*/
#ifndef _LIB__BLIT__BLIT_HELPER_H_
#define _LIB__BLIT__BLIT_HELPER_H_
#include <util/string.h>
/**
* Copy single 16bit column
*/
static inline void copy_16bit_column(char *src, int src_w,
char *dst, int dst_w, int h)
{
for (; h-- > 0; src += src_w, dst += dst_w)
Genode::memcpy(dst, src, 2);
}
/**
* Copy pixel block 32bit-wise
*
* \param src source address
* \param dst 32bit-aligned destination address
* \param w number of 32bit words to copy per line
* \param h number of lines to copy
* \param src_w width of source buffer in bytes
* \param dst_w width of destination buffer in bytes
*/
static void copy_block_32bit(char *src, int src_w,
char *dst, int dst_w,
int w, int h)
{
for (; h-- > 0; src += src_w, dst += dst_w)
Genode::memcpy(dst, src, 4*w);
}
/**
* Copy block with a size of multiple of 32 bytes
*
* \param w width in 32 byte chunks to copy per line
* \param h number of lines of copy
*/
static inline void copy_block_32byte(char *src, int src_w,
char *dst, int dst_w,
int w, int h)
{
for (; h-- > 0; src += src_w, dst += dst_w)
Genode::memcpy(dst, src, 32*w);
}
#endif /* _LIB__BLIT__BLIT_HELPER_H_ */

View File

@ -1,5 +1,5 @@
/*
* \brief Blitting function for x86
* \brief Blitting utilities for x86
* \author Norman Feske
* \date 2007-10-09
*/
@ -11,14 +11,12 @@
* under the terms of the GNU General Public License version 2.
*/
#include <blit/blit.h>
#ifndef _LIB__BLIT__BLIT_HELPER_H_
#define _LIB__BLIT__BLIT_HELPER_H_
#include <mmx.h>
/***************
** Utilities **
***************/
/**
* Copy single 16bit column
*/
@ -54,8 +52,6 @@ static inline void copy_block_32bit(char *src, int src_w,
}
/**
* Copy block with a size of multiple of 32 bytes
*
@ -71,46 +67,4 @@ static inline void copy_block_32byte(char *src, int src_w,
copy_32byte_chunks(src, dst, w);
}
/***********************
** Library interface **
***********************/
extern "C" void blit(void *s, unsigned src_w,
void *d, unsigned dst_w,
int w, int h)
{
char *src = (char *)s, *dst = (char *)d;
if (w <= 0 || h <= 0) return;
/* we support blitting only at a granularity of 16bit */
w &= ~1;
/* copy unaligned column */
if (w && ((long)dst & 2)) {
copy_16bit_column(src, src_w, dst, dst_w, h);
w -= 2; src += 2; dst += 2;
}
/* now, we are on a 32bit aligned destination address */
/* copy 32byte chunks */
if (w >> 5) {
copy_block_32byte(src, src_w, dst, dst_w, w >> 5, h);
src += w & ~31;
dst += w & ~31;
w = w & 31;
}
/* copy 32bit chunks */
if (w >> 2) {
copy_block_32bit(src, src_w, dst, dst_w, w >> 2, h);
src += w & ~3;
dst += w & ~3;
w = w & 3;
}
/* handle trailing row */
if (w >> 1) copy_16bit_column(src, src_w, dst, dst_w, h);
}
#endif /* _LIB__BLIT__BLIT_HELPER_H_ */