Skip to content

Commit

Permalink
0.2.1 FastShiftInOut
Browse files Browse the repository at this point in the history
  • Loading branch information
RobTillaart committed Nov 1, 2024
1 parent c10a901 commit e668c02
Show file tree
Hide file tree
Showing 7 changed files with 114 additions and 55 deletions.
3 changes: 3 additions & 0 deletions libraries/FastShiftInOut/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).


## [0.2.1] - 2024-10-31
- fix #9, more optimizations

## [0.2.0] - 2024-09-10
- fix #7, loop unroll option, improving performance, kudos to nt314p
- added flag to select LOOP UNROLL (is optional as it gives larger code size)
Expand Down
101 changes: 58 additions & 43 deletions libraries/FastShiftInOut/FastShiftInOut.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//
// FILE: FastShiftInOut.cpp
// AUTHOR: Rob Tillaart
// VERSION: 0.2.0
// VERSION: 0.2.1
// PURPOSE: Arduino library for (AVR) optimized shiftInOut (simultaneously)
// URL: https://github.com/RobTillaart/FastShiftInOut

Expand Down Expand Up @@ -109,8 +109,12 @@ uint8_t FastShiftInOut::writeLSBFIRST(uint8_t data)
uint8_t oldSREG = SREG;
noInterrupts();

if ((value & 0x01) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
// See discussion #17 FastShiftOut
uint8_t d0 = *localDataOutRegister & outmask2; // cache 0
uint8_t d1 = d0 | outmask1; // cache 1

if ((value & 0x01) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
// *localClockRegister |= cbmask1;
// if ((*localDataInRegister & inmask1) > 0) rv |= 0x01;
// *localClockRegister &= cbmask2; // ~_clockBit;
Expand All @@ -121,50 +125,50 @@ uint8_t FastShiftInOut::writeLSBFIRST(uint8_t data)
if ((*localDataInRegister & inmask1) > 0) rv |= 0x01;
*localClockRegister = r; // reset it

if ((value & 0x02) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x02) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x02;
*localClockRegister = r; // reset it

if ((value & 0x04) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x04) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x04;
*localClockRegister = r; // reset it

if ((value & 0x08) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x08) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x08;
*localClockRegister = r; // reset it

if ((value & 0x10) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x10) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x10;
*localClockRegister = r; // reset it

if ((value & 0x20) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x20) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x20;
*localClockRegister = r; // reset it

if ((value & 0x40) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x40) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x40;
*localClockRegister = r; // reset it

if ((value & 0x80) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x80) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x80;
Expand All @@ -185,16 +189,19 @@ uint8_t FastShiftInOut::writeLSBFIRST(uint8_t data)

uint8_t oldSREG = SREG;
noInterrupts();

uint8_t r = *localClockRegister;


uint8_t d0 = *localDataOutRegister & outmask2; // cache 0
uint8_t d1 = d0 | outmask1; // cache 1

for (uint8_t m = 1; m > 0; m <<= 1)
{
// write one bit
if ((value & m) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & m) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
uint8_t r = *localClockRegister;

// clock pulse HIGH
*localClockRegister |= cbmask1;
*localClockRegister = r | cbmask1;
// read one bit
if ((*localDataInRegister & inmask1) > 0) rv |= m;
// clock pulse LOW
Expand Down Expand Up @@ -249,8 +256,12 @@ uint8_t FastShiftInOut::writeMSBFIRST(uint8_t data)
uint8_t oldSREG = SREG;
noInterrupts();

if ((value & 0x80) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
// See discussion #17 FastShiftOut
uint8_t d0 = *localDataOutRegister & outmask2; // cache 0
uint8_t d1 = d0 | outmask1; // cache 1

if ((value & 0x80) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
// *localClockRegister |= cbmask1;
// if ((*localDataInRegister & inmask1) > 0) rv |= 0x80;
// *localClockRegister &= cbmask2; // ~_clockBit;
Expand All @@ -261,50 +272,50 @@ uint8_t FastShiftInOut::writeMSBFIRST(uint8_t data)
if ((*localDataInRegister & inmask1) > 0) rv |= 0x80;
*localClockRegister = r; // reset it

if ((value & 0x40) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x40) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x40;
*localClockRegister = r; // reset it

if ((value & 0x20) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x20) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x20;
*localClockRegister = r; // reset it

if ((value & 0x10) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x10) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x10;
*localClockRegister = r; // reset it

if ((value & 0x08) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x08) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x08;
*localClockRegister = r; // reset it

if ((value & 0x04) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x04) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x04;
*localClockRegister = r; // reset it

if ((value & 0x02) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x02) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x02;
*localClockRegister = r; // reset it

if ((value & 0x01) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x01) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x01;
Expand All @@ -326,14 +337,18 @@ uint8_t FastShiftInOut::writeMSBFIRST(uint8_t data)
uint8_t oldSREG = SREG;
noInterrupts();

uint8_t r = *localClockRegister;
// See discussion #17 FastShiftOut
uint8_t d0 = *localDataOutRegister & outmask2; // cache 0
uint8_t d1 = d0 | outmask1; // cache 1

for (uint8_t m = 0x80; m > 0; m >>= 1)
{
// write one bit
if ((value & m) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & m) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
uint8_t r = *localClockRegister;
// clock pulse HIGH
*localClockRegister |= cbmask1;
*localClockRegister = r | cbmask1;
// read one bit
if ((*localDataInRegister & inmask1) > 0) rv |= m;
// clock pulse LOW
Expand Down
6 changes: 3 additions & 3 deletions libraries/FastShiftInOut/FastShiftInOut.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,18 @@
//
// FILE: FastShiftInOut.cpp
// AUTHOR: Rob Tillaart
// VERSION: 0.2.0
// VERSION: 0.2.1
// PURPOSE: Arduino library for (AVR) optimized shiftInOut (simultaneously)
// URL: https://github.com/RobTillaart/FastShiftInOut


#include "Arduino.h"


#define FASTSHIFTINOUT_LIB_VERSION (F("0.2.0"))
#define FASTSHIFTINOUT_LIB_VERSION (F("0.2.1"))

// uncomment next line to get SPEED OPTIMIZED CODE
#define FASTSHIFTINOUT_AVR_LOOP_UNROLLED 1
// #define FASTSHIFTINOUT_AVR_LOOP_UNROLLED 1


class FastShiftInOut
Expand Down
16 changes: 9 additions & 7 deletions libraries/FastShiftInOut/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,22 +58,23 @@ Performance of **write()**

#### Measurements

(0.2.0)
(0.2.1)
Indicative time in microseconds, Arduino UNO, IDE 1.8.19, measured over 1000 calls.
(delta between 2 calls and 1 call to eliminate overhead)

| function | 0.1.3 | 0.2.0 | 0.2.0L |
|:-------------------------|---------:|---------:|----------:|
| write() (reference) | no data | 158.24 | no data |
| write() | 25.52 | 17.61 | 12.26 |
| writeLSBFIRST() | 25.52 | 17.61 | 12.26 |
| writeMSBFIRST() | 25.52 | 17.60 | 12.20 |
| function | 0.1.3 | 0.2.0 | 0.2.0L | 0.2.1 | 0.2.1L |
|:-------------------------|---------:|---------:|----------:|---------:|----------:|
| write() (reference) | no data | 158.24 | no data | 158.24 | no data |
| write() | 25.52 | 17.61 | 12.26 | 16.72 | 11.00 |
| writeLSBFIRST() | 25.52 | 17.61 | 12.26 | 16.72 | 11.00 |
| writeMSBFIRST() | 25.52 | 17.60 | 12.20 | 16.72 | 10.94 |


- Note: 0.1.3 added from old table.
- Note: reference run on AVR by commenting all optimizations.
- Note: 0.2.0 measured with loop unroll flag disabled.
- Note: 0.2.0L measured with loop unrolled flag enabled.
- Note: 0.2.1 / 0.2.1L idem.


### Related
Expand All @@ -83,6 +84,7 @@ Indicative time in microseconds, Arduino UNO, IDE 1.8.19, measured over 1000 cal
- https://github.com/RobTillaart/FastShiftOut
- https://github.com/RobTillaart/ShiftInSlow
- https://github.com/RobTillaart/ShiftOutSlow
- https://github.com/RobTillaart/SWSPI (experimental)


## Interface
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
IDE: 1.8.19
Board: UNO

FASTSHIFTINOUT_LIB_VERSION: 0.2.1

Performance - time in us
write: 17.86
write: 34.58
Delta: 16.72

writeLSBFIRST: 16.98
writeLSBFIRST: 33.70
Delta: 16.72

writeMSBFIRST: 16.98
writeMSBFIRST: 33.70
Delta: 16.72


# loop unrolled.

FASTSHIFTINOUT_LIB_VERSION: 0.2.1

Performance - time in us
write: 12.14
write: 23.14
Delta: 11.00

writeLSBFIRST: 11.26
writeLSBFIRST: 22.26
Delta: 11.00

writeMSBFIRST: 11.19
writeMSBFIRST: 22.13
Delta: 10.94


done ...

2 changes: 1 addition & 1 deletion libraries/FastShiftInOut/library.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"type": "git",
"url": "https://github.com/RobTillaart/FastShiftInOut.git"
},
"version": "0.2.0",
"version": "0.2.1",
"license": "MIT",
"frameworks": "*",
"platforms": "*",
Expand Down
2 changes: 1 addition & 1 deletion libraries/FastShiftInOut/library.properties
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name=FastShiftInOut
version=0.2.0
version=0.2.1
author=Rob Tillaart <[email protected]>
maintainer=Rob Tillaart <[email protected]>
sentence=Arduino library for (AVR) optimized shiftInOut (simultaneously)
Expand Down

0 comments on commit e668c02

Please sign in to comment.