forked from synopse/mORMot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
SynScaleMM.pas
1777 lines (1614 loc) · 54.7 KB
/
SynScaleMM.pas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/// fast scaling memory manager for Delphi
// - this unit is a part of the freeware Synopse framework,
// licensed under a MPL/GPL/LGPL tri-license; version 0.4
unit SynScaleMM;
{
Original code is ScaleMM - Fast scaling memory manager for Delphi
by André Mussche - Released under Mozilla Public License 1.1
http://code.google.com/p/scalemm
Simple, small and compact MM, built on top of the main Memory Manager
(FastMM4 is a good candidate, standard since Delphi 2007), architectured
in order to scale on multi core CPU's (which is what FastMM4 is lacking).
Usage:
- Delphi 6 up to Delphi 2005 with FastMM4:
Place FastMM4 as the very first unit under the "uses" clause of your
project's .dpr file THEN add SynScaleMM to the "uses" clause
- Delphi 6 up to Delphi 2005 with no FastMM4 or Delphi 2006 up to Delphi XE:
Place SynScaleMM as the very first unit under the "uses" clause of your
project's .dpr file.
SynScaleMM - fast scaling memory manager for Delphi
-----------------------------------------------------
Modifications/fork to SynScaleMM by A.Bouchez - https://synopse.info:
- Synchronized with r19 revision, from Dec 6, 2010;
- Compiles from Delphi 6 up to Delphi XE;
- Some pascal code converted to faster asm;
- Some code refactoring, a lot of comments added;
- Added medium block handling from 2048 bytes up to 16384;
- Released under MPL 1.1/GPL 2.0/LGPL 2.1 tri-license.
*** BEGIN LICENSE BLOCK *****
Version: MPL 1.1/GPL 2.0/LGPL 2.1
The contents of this file are subject to the Mozilla Public License Version
1.1 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.mozilla.org/MPL
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
for the specific language governing rights and limitations under the License.
The Original Code is ScaleMM - Fast scaling memory manager for Delphi.
The Initial Developer of the Original Code is André Mussche.
Portions created by the Initial Developer are Copyright (c)
the Initial Developer. All Rights Reserved.
Contributor(s):
- Arnaud Bouchez https://synopse.info
Portions created by each contributor are Copyright (c)
each contributor. All Rights Reserved.
Alternatively, the contents of this file may be used under the terms of
either the GNU General Public License Version 2 or later (the "GPL"), or
the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
in which case the provisions of the GPL or the LGPL are applicable instead
of those above. If you wish to allow use of your version of this file only
under the terms of either the GPL or the LGPL, and not to allow others to
use your version of this file under the terms of the MPL, indicate your
decision by deleting the provisions above and replace them with the notice
and other provisions required by the GPL or the LGPL. If you do not delete
the provisions above, a recipient may use your version of this file under
the terms of any one of the MPL, the GPL or the LGPL.
***** END LICENSE BLOCK *****
Version 0.4
- Reallocation made a lot faster, in case of a growing size by some bytes
}
interface
{.$DEFINE DEBUG_SCALEMM} // slower but better debugging (no inline functions etc)
/// internal GetSmallMemManager function is 2% faster with an injected offset
{$define SCALE_INJECT_OFFSET}
// inlined TLS access
// - injected offset + GetSmallMemManager call can be slower than offset loading
{$define INLINEGOWN}
{$ifdef INLINEGOWN}
{$ifndef HASINLINE} // inlined Getmem/Freemem will call GetSmallMemManager
{$undef SCALE_INJECT_OFFSET}
{$endif}
{$endif}
// enable Backing Off Locks with Spin-Wait Loops
// - see http://software.intel.com/en-us/articles/implementing-scalable-atomic-locks-for-multi-core-intel-em64t-and-ia32-architectures
{$define SPINWAITBACKOFF}
// other posible defines:
{.$define ALLOCBY64} // allocated by 64 memory items (if undefined, by 32)
{.$define PURE_PASCAL} // no assembly, pure delphi code
{.$define Align16Bytes} // 16 byte aligned header, so some more overhead
{$define USEMEDIUM} // handling of 2048..16384 bytes blocks
{.$define USEBITMAP} // freed blocks per bit storage (experimental)
{.$define BACKOFFSLEEP1} // could avoid race condition in some (rare) cases
{$ifdef DEBUG_SCALEMM}
{$OPTIMIZATION OFF}
{$STACKFRAMES ON}
{$ASSERTIONS ON}
{$DEBUGINFO ON}
{$OVERFLOWCHECKS ON}
{$RANGECHECKS ON}
{$else} // default "release" mode, much faster!
{$OPTIMIZATION ON} // 235% faster!
{$STACKFRAMES OFF} // 12% faster
{$ASSERTIONS OFF}
{$OVERFLOWCHECKS OFF}
{$RANGECHECKS OFF}
{$if CompilerVersion >= 17}
{$define HASINLINE} // Delphi 2005 or newer
{$ifend}
{$D-}
{$L-}
{$endif}
{$ifdef USEBITMAP} // bitmap size must match NativeUInt bit count
{$ifdef CPUX64}
{$define ALLOCBY64}
{$else}
{$undef ALLOCBY64}
{$endif}
{$endif}
const
/// alloc memory blocks with 64 or 32 memory items each time
// - 64 = 1 shl 6, 32 = 1 shl 5, therefore any multiplication compiles into
// nice and fast shl opcode
// - on a heavily multi-threaded application, with USEMEDIUM defined below,
// a lower value (i.e. 32) could be used instead (maybe dedicated value for
// medium blocks would be even better)
// - if USEBITMAP is defined, this size will match the NativeUInt bit count
C_ARRAYSIZE = {$ifdef ALLOCBY64}64{$else}32{$endif};
/// keep 10 free blocks in cache
C_GLOBAL_BLOCK_CACHE = 10;
{$if CompilerVersion < 19}
type // from Delphi 6 up to Delphi 2007
NativeUInt = Cardinal;
NativeInt = Integer;
{$ifend}
{$if CompilerVersion >= 17}
{$define USEMEMMANAGEREX}
{$ifend}
const
{$ifdef USEMEDIUM}
/// Maximum index of 2048 bytes granularity Medium blocks
// - 63488 could have been the upper limit because 65536=63488+2048 won't fit in
// a FItemSize: word, but it will allocate 63488*C_ARRAYSIZE=4 MB per thread!
// - so we allocate here up to 16384 bytes, i.e. 1 MB, which sounds
// reasonable
// - a global VirtualAlloc() bigger block, splitted into several medium blocks,
// via a double-linked list (see FastMM4 algorithm) could be implemented instead
MAX_MEDIUMMEMBLOCK = 7;
/// Maximum index of 256 bytes granularity Small blocks
MAX_SMALLMEMBLOCK = 6;
{$else}
/// Maximum index of 256 bytes granularity Small blocks
// - Small blocks will include 2048 if Medium Blocks not handled
MAX_SMALLMEMBLOCK = 7;
{$endif}
type
PMemBlock = ^TMemBlock;
PMemBlockList = ^TMemBlockList;
PThreadMemManager = ^TThreadMemManager;
PMemHeader = ^TMemHeader;
{$A-} { all object/record must be packed }
/// Header appended to the beginning of every allocated memory block
TMemHeader = object
/// the memory block handler which owns this memory block
Owner: PMemBlock;
{$ifdef USEBITMAP}
/// the index in the array[0..C_ARRAYSIZE-1] of Owner memory items
FIndexInMemBlockArray: NativeUInt;
{$else}
/// linked to next single memory item (other thread freem mem)
NextMem: PMemHeader;
{$endif}
{$ifdef Align16Bytes}
todo
{$endif}
end;
/// memory block handler
// - internal storage of the memory blocks will follow this structure, and
// will contain array[0..C_ARRAYSIZE-1] of memory items,
// i.e. (FItemSize + SizeOf(TMemHeader)) * C_ARRAYSIZE bytes
TMemBlock = object
/// the memory block list which owns this memory block handler
Owner: PMemBlockList;
/// link to the next list with free memory
FNextMemBlock: PMemBlock;
/// link to the previous list with free memory
// - double linked to be able for fast removal of one block
FPreviousMemBlock: PMemBlock;
/// link to the next list with freed memory, in case this list has no more freed mem
FNextFreedMemBlock: PMemBlock;
/// link to the previous list with freed memory
FPreviousFreedMemBlock: PMemBlock;
{$ifdef USEBITMAP}
/// individual bit is set for any block which is to be freed from other thread
FToBeFreedFromOtherThread: NativeUInt;
/// link to the next TMemBlock containing blocks to be freed from other thread
NextMem: PMemBlock;
/// individual bit is set for any available block in [0..C_ARRAYSIZE-1]
FAvailable: NativeUInt;
{$else}
/// how much free mem is used, max is C_ARRAYSIZE
FUsageCount: NativeUInt;
/// current index in FFreedArray
FFreedIndex: NativeUInt;
/// points to all freed PMemHeader
FFreedArray: array[0..C_ARRAYSIZE-1] of Pointer;
{$endif}
function GetUsedMemoryItem: PMemHeader; {$ifdef HASINLINE}inline;{$endif}
procedure FreeMem(aMemoryItem: PMemHeader); {$ifdef HASINLINE}inline;{$endif}
procedure FreeBlockMemoryToGlobal;
end;
/// memory block list
// - current size if 16 bytes (this is a packed object)
TMemBlockList = object
/// the per-thread memory manager which created this block
Owner: PThreadMemManager;
/// list containing freed memory (which this block owns)
// - used to implement a fast caching of memory blocks
FFirstFreedMemBlock: PMemBlock;
/// list containing all memory this block owns
FFirstMemBlock: PMemBlock;
/// size of memory items (32, 64 etc bytes)
FItemSize : word;
/// number of blocks inside FFirstFreedMemBlock
FFreeMemCount: byte;
/// recursive check when we alloc memory for this blocksize (new memory list)
FRecursive: boolean;
{$ifdef CPUX64}
// for faster "array[0..7] of TMemBlockList" calc
// (for 32 bits, the TMemBlockList instance size if 16 bytes)
FFiller: array[1..sizeof(NativeInt)-sizeof(word)-sizeof(byte)-sizeof(boolean)] of byte;
{$endif}
procedure AddNewMemoryBlock;
function GetMemFromNewBlock : Pointer;
end;
POtherThreadFreedMemory = {$ifdef USEBITMAP}PMemBlock{$else}PMemHeader{$endif};
/// handles per-thread memory managment
TThreadMemManager = object
private
/// link to the list of mem freed in other thread
FOtherThreadFreedMemory: POtherThreadFreedMemory;
/// array with memory per block size of 32 bytes (mini blocks)
// - i.e. 32, 64, 96, 128, 160, 192, 224 bytes
FMiniMemoryBlocks: array[0..6] of TMemBlockList;
/// array with memory per block size of 256 bytes (small blocks)
// - i.e. 256,512,768,1024,1280,1536,1792[,2048] bytes
FSmallMemoryBlocks: array[0..MAX_SMALLMEMBLOCK] of TMemBlockList;
{$ifdef USEMEDIUM}
/// array with memory per block size of 2048 bytes (medium blocks)
// - i.e. 2048,4096,6144,8192,10240,12288,14336,16384 bytes
FMediumMemoryBlocks: array[0..MAX_MEDIUMMEMBLOCK] of TMemBlockList;
{$endif}
// link to list of items to reuse after thread terminated
FNextThreadManager: PThreadMemManager;
procedure ProcessFreedMemFromOtherThreads;
procedure AddFreedMemFromOtherThread(aMemory: PMemHeader);
public
FThreadId: LongWord;
/// is this thread memory available to new thread?
FThreadTerminated: Boolean;
procedure Init;
procedure Reset;
function GetMem(aSize: NativeUInt): Pointer; {$ifdef HASINLINE}inline;{$endif}
function FreeMem(aMemory: Pointer): NativeInt; {$ifdef HASINLINE}inline;{$endif}
end;
/// Global memory manager
// - a single instance is created for the whole process
// - caches some memory (blocks + threadmem) for fast reuse
// - also keeps allocated memory in case an old thread allocated some memory
// for another thread
TGlobalMemManager = object
private
/// all thread memory managers
FFirstThreadMemory: PThreadMemManager;
/// freed/used thread memory managers
// - used to cache the per-thread managers in case of multiple threads creation
FFirstFreedThreadMemory: PThreadMemManager;
/// main thread manager (owner of all global mem)
FMainThreadMemory: PThreadMemManager;
/// Freed/used memory: array with memory per 32 bytes block size
// - i.e. 32, 64, 96, 128, 160, 192, 224 bytes
FFreedMiniMemoryBlocks : array[0..6] of TMemBlockList;
/// Freed/used memory: array with memory per 256 bytes block size
// - i.e. 256,512,768,1024,1280,1536,1792[,2048] bytes
FFreedSmallMemoryBlocks : array[0..MAX_SMALLMEMBLOCK] of TMemBlockList;
{$ifdef USEMEDIUM}
/// Freed/used memory: array with memory per block size of 2048 bytes
// - i.e. 2048,4096,6144,8192,10240,12288,14336,16384 bytes
FFreedMediumMemoryBlocks: array[0..MAX_MEDIUMMEMBLOCK] of TMemBlockList;
{$endif}
procedure Init;
procedure FreeBlocksFromThreadMemory(aThreadMem: PThreadMemManager);
public
procedure AddNewThreadManagerToList(aThreadMem: PThreadMemManager);
procedure FreeThreadManager(aThreadMem: PThreadMemManager);
function GetNewThreadManager: PThreadMemManager;
procedure FreeAllMemory;
procedure FreeBlockMemory(aBlockMem: PMemBlock);
function GetBlockMemory(aItemSize: NativeUInt): PMemBlock;
end;
{$A+}
function Scale_GetMem(aSize: Integer): Pointer;
function Scale_AllocMem(aSize: Cardinal): Pointer;
function Scale_FreeMem(aMemory: Pointer): Integer;
function Scale_ReallocMem(aMemory: Pointer; aSize: Integer): Pointer;
var
GlobalManager: TGlobalMemManager;
/// Points to the Memory Manager on which ScaleMM is based
// - ScaleMM works on top of a main MM, which is FastMM4 since Delphi 2007
// - ScaleMM will handle blocks up to 2048 bytes (or 16384 is medium blocks
// are enabled)
// - but larger blocks are delegated to OldMM
// - you can explicitely use OldMM on purpose (but it doesn't seem to be a good idea)
// - note that also "root" block memory is allocated by OldMM if ScaleMM needs
// memory itself (to populate its internal buffers): there is not direct call
// to the VirtualAlloc() API, for instance
var
{$ifdef USEMEMMANAGEREX}
OldMM: TMemoryManagerEx;
{$else}
OldMM: TMemoryManager;
{$endif}
implementation
// Windows.pas unit dependency should be not used -> code inlined here
type
DWORD = LongWord;
BOOL = LongBool;
const
PAGE_EXECUTE_READWRITE = $40;
kernel32 = 'kernel32.dll';
function TlsAlloc: DWORD; stdcall; external kernel32 name 'TlsAlloc';
function TlsGetValue(dwTlsIndex: DWORD): Pointer; stdcall; external kernel32 name 'TlsGetValue';
function TlsSetValue(dwTlsIndex: DWORD; lpTlsValue: Pointer): BOOL; stdcall; external kernel32 name 'TlsSetValue';
function TlsFree(dwTlsIndex: DWORD): BOOL; stdcall; external kernel32 name 'TlsFree';
procedure Sleep(dwMilliseconds: DWORD); stdcall; external kernel32 name 'Sleep';
{$ifdef SPINWAITBACKOFF}
function SwitchToThread: BOOL; stdcall; external kernel32 name 'SwitchToThread';
{$else}
{$undef BACKOFFSLEEP1} // this additional Sleep(1) is for spin wait backoff
{$endif}
function FlushInstructionCache(hProcess: THandle; const lpBaseAddress: Pointer; dwSize: DWORD): BOOL; stdcall; external kernel32 name 'FlushInstructionCache';
function GetCurrentProcess: THandle; stdcall; external kernel32 name 'GetCurrentProcess';
function GetCurrentThreadId: DWORD; stdcall; external kernel32 name 'GetCurrentThreadId';
function Scale_VirtualProtect(lpAddress: Pointer; dwSize, flNewProtect: DWORD;
var OldProtect: DWORD): BOOL; stdcall; overload; external kernel32 name 'VirtualProtect';
procedure ExitThread(dwExitCode: DWORD); stdcall; external kernel32 name 'ExitThread';
function SetPermission(Code: Pointer; Size, Permission: Cardinal): Cardinal;
begin
Assert(Assigned(Code) and (Size > 0));
{ Flush the instruction cache so changes to the code page are effective immediately }
if Permission <> 0 then
if FlushInstructionCache(GetCurrentProcess, Code, Size) then
Scale_VirtualProtect(Code, Size, Permission, Longword(Result));
end;
function CreateSmallMemManager: PThreadMemManager; forward;
{$ifdef PURE_PASCAL}
threadvar
GCurrentThreadManager: PThreadMemManager;
function GetSmallMemManager: PThreadMemManager; {$ifdef HASINLINE}inline;{$endif}
begin
Result := GCurrentThreadManager;
if Result = nil then
Result := CreateSmallMemManager;
end;
{$else}
var
GOwnTlsIndex,
GOwnTlsOffset: NativeUInt;
function GetSmallMemManager: PThreadMemManager;
asm
{$ifdef SCALE_INJECT_OFFSET}
mov eax,123456789 // dummy value: calc once and inject at runtime
{$else}
mov eax,GOwnTlsOffset // 2% slower, so we default use injected offset
{$endif}
mov ecx,fs:[$00000018]
mov eax,[ecx+eax] // fixed offset, calculated only once
or eax,eax
jz CreateSmallMemManager
end;
procedure _FixedOffset;
{$ifdef SCALE_INJECT_OFFSET}
var p: PAnsiChar;
{$endif}
begin
GOwnTlsOffset := GOwnTlsIndex * 4 + $0e10;
{$ifdef SCALE_INJECT_OFFSET}
p := @GetSmallMemManager;
SetPermission(p, 5, PAGE_EXECUTE_READWRITE);
PCardinal(p+1)^ := GOwnTlsOffset; // write fixed offset
{$endif}
end;
{$endif PURE_PASCAL}
function CreateSmallMemManager: PThreadMemManager;
begin
Result := GlobalManager.GetNewThreadManager;
if Result = nil then
begin
Result := OldMM.GetMem( SizeOf(TThreadMemManager) );
Result.Init;
end
else
begin
Result.FThreadId := GetCurrentThreadId;
Result.FThreadTerminated := False;
end;
{$ifdef PURE_PASCAL}
GCurrentThreadManager := Result;
{$else}
TlsSetValue(GOwnTLSIndex, Result);
{$endif}
end;
// compare oldvalue with destination: if equal then newvalue is set
function CAS0(const oldValue: pointer; newValue: pointer; var destination): boolean;
// - if failed, try to Switch to next OS thread, or Sleep 0 ms if it no next thread
asm // eax=oldValue, edx=newValue, ecx=Destination
lock cmpxchg dword ptr [Destination],newValue
// will compile as "lock cmpxchg dword ptr [ecx],edx" under Win32 e.g.
setz al
{$ifdef SPINWAITBACKOFF}
jz @ok
call SwitchToThread
test oldValue,oldValue // oldValue=eax under Win32 e.g.
jnz @ok
push 0
call Sleep
xor oldValue,oldValue // return false
{$else}
jz @ok
pause // let the CPU know this thread is in a Spin Wait loop
{$endif}
@ok:
end;
{$ifdef BACKOFFSLEEP1}
function CAS1(const oldValue: pointer; newValue: pointer; var destination): boolean;
// - if failed, try to Switch to next OS thread, or Sleep 1 ms if it no next thread
// (this 1 ms sleep is necessary to avoid race condition - see
// https://synopse.info/forum/viewtopic.php?pid=914#p914 )
asm // eax=oldValue, edx=newValue, ecx=Destination
lock cmpxchg dword ptr [Destination],newValue
// will compile as "lock cmpxchg dword ptr [ecx],edx" under Win32 e.g.
setz al
jz @ok
call SwitchToThread
test oldValue,oldValue
jnz @ok
push 1
call Sleep
xor oldValue,oldValue
@ok:
end;
{$endif}
procedure InterlockedIncrement(var Value: Byte);
asm
lock inc byte [Value] // will compile as "lock inc byte [eax]" under Win32 e.g.
end;
procedure InterlockedDecrement(var Value: Byte);
asm
lock dec byte [Value] // will compile as "lock dec byte [eax]" under Win32 e.g.
end;
/// gets the first set bit and resets it, returning the bit index
function FindFirstSetBit(Value: NativeUInt): NativeUInt;
asm
bsf Value,Value // will compile as "bsf eax,eax" under Win32 e.g.
end;
/// sets a specified bit
function SetBit(var Value: NativeUInt; BitIndex: NativeUInt): NativeUInt;
asm
bts [Value],BitIndex // will compile as "bts [eax],edx" under Win32 e.g.
end;
{$ifdef DEBUG_SCALEMM}
procedure Assert(aCondition: boolean);
begin
if not aCondition then
begin
asm
int 3;
end;
Sleep(0); // no exception, just dummy for breakpoint
end;
end;
{$endif}
function GetOldMem(aSize: NativeUInt): Pointer; {$ifdef HASINLINE}inline;{$endif}
begin
Result := OldMM.GetMem(aSize + SizeOf(TMemHeader));
if Result<>nil then begin
PMemHeader(Result)^.Owner := nil; // not our memlist, so mark as such
Result := Pointer(NativeUInt(Result) + SizeOf(TMemHeader) );
end;
end;
{ TThreadMemManager }
procedure TThreadMemManager.Init;
var i, j: NativeUInt;
begin
fillchar(self,sizeof(self),0);
FThreadId := GetCurrentThreadId;
j := 32;
for i := Low(FMiniMemoryBlocks) to High(FMiniMemoryBlocks) do
begin // 32, 64, 96, 128, 160, 192, 224 bytes
FMiniMemoryBlocks[i].Owner := @Self;
FMiniMemoryBlocks[i].FItemSize := j;
inc(j,32);
end;
assert(j=256);
for i := Low(FSmallMemoryBlocks) to High(FSmallMemoryBlocks) do
begin // 256,512,768,1024,1280,1536,1792 bytes
FSmallMemoryBlocks[i].Owner := @Self;
FSmallMemoryBlocks[i].FItemSize := j;
inc(j,256);
end;
{$ifdef USEMEDIUM}
assert(j=2048);
for i := Low(FMediumMemoryBlocks) to High(FMediumMemoryBlocks) do
begin // 2048, 4096...16384 bytes
FMediumMemoryBlocks[i].Owner := @Self;
FMediumMemoryBlocks[i].FItemSize := j;
inc(j,2048);
end;
assert(j=(MAX_MEDIUMMEMBLOCK+2)*2048);
{$else}
assert(j=2304);
{$endif}
end;
procedure TThreadMemManager.ProcessFreedMemFromOtherThreads;
var
pcurrentmem, ptempmem: POtherThreadFreedMemory;
begin
// reset first item (to get all mem in linked list)
repeat
pcurrentmem := FOtherThreadFreedMemory;
if CAS0(pcurrentmem, nil, FOtherThreadFreedMemory) then
break;
{$ifdef BACKOFFSLEEP1}
pcurrentmem := FOtherThreadFreedMemory;
if CAS1(pcurrentmem, nil, FOtherThreadFreedMemory) then
break;
{$endif}
until false;
// free all mem in linked list
while pcurrentmem <> nil do
begin
ptempmem := pcurrentmem;
pcurrentmem := pcurrentmem.NextMem;
{$ifdef USEBITMAP}
with ptempmem^ do
while FToBeFreedFromOtherThread<>0 do
FreeMem(Pointer( NativeUInt(ptempmem) + sizeof(ptempmem^) +
FindFirstSetBit(FToBeFreedFromOtherThread) * (Owner^.FItemSize + SizeOf(TMemHeader)) ));
{$else}
ptempmem.Owner.FreeMem(ptempmem);
{$endif}
end;
end;
procedure TThreadMemManager.Reset;
var
i: NativeUInt;
procedure __ResetBlocklist(aBlocklist: PMemBlockList);
begin
aBlocklist.FFirstFreedMemBlock := nil;
aBlocklist.FFirstMemBlock := nil;
aBlocklist.FRecursive := False;
end;
begin
FThreadId := 0;
FThreadTerminated := True;
FOtherThreadFreedMemory := nil;
FNextThreadManager := nil;
for i := Low(FMiniMemoryBlocks) to High(FMiniMemoryBlocks) do
__ResetBlocklist(@FMiniMemoryBlocks[i]);
for i := Low(FSmallMemoryBlocks) to High(FSmallMemoryBlocks) do
__ResetBlocklist(@FSmallMemoryBlocks[i]);
{$ifdef USEMEDIUM}
for i := Low(FMediumMemoryBlocks) to High(FMediumMemoryBlocks) do
__ResetBlocklist(@FMediumMemoryBlocks[i]);
{$endif}
end;
procedure TThreadMemManager.AddFreedMemFromOtherThread(aMemory: PMemHeader);
var
poldmem, currentmem: POtherThreadFreedMemory;
begin
{$ifdef USEBITMAP}
currentmem := aMemory^.Owner;
SetBit(currentmem^.FToBeFreedFromOtherThread,aMemory^.FIndexInMemBlockArray);
{$else}
currentmem := aMemory;
{$endif}
repeat
poldmem := FOtherThreadFreedMemory;
currentmem.NextMem := poldmem; // link to current next BEFORE the swap!
// set new item as first (to created linked list)
if CAS0(poldmem, currentmem, FOtherThreadFreedMemory) then
break;
{$ifdef BACKOFFSLEEP1}
poldmem := FOtherThreadFreedMemory;
currentmem.NextMem := poldmem;
if CAS1(poldmem, currentmem, FOtherThreadFreedMemory) then
break;
{$endif}
until false;
end;
function TThreadMemManager.FreeMem(aMemory: Pointer): NativeInt;
var
pm: PMemBlock;
p: Pointer;
begin
p := Pointer(NativeUInt(aMemory) - SizeOf(TMemHeader));
pm := PMemHeader(p).Owner;
if FOtherThreadFreedMemory <> nil then
ProcessFreedMemFromOtherThreads;
if pm <> nil then
with pm^ do
begin
// block obtained via Scale_GetMem()
Assert(Owner <> nil);
Assert(Owner.Owner <> nil);
if Owner.Owner = @Self then
// mem of own thread
FreeMem(PMemHeader(p)) else
// put mem in lockfree queue of owner thread
Owner.Owner.AddFreedMemFromOtherThread(PMemHeader(p));
Result := 0;
end
else
Result := OldMM.FreeMem(p);
end;
function TThreadMemManager.GetMem(aSize: NativeUInt): Pointer;
var
bm: PMemBlockList;
begin
if aSize <= (length(FMiniMemoryBlocks)*32) then
if aSize > 0 then
// blocks of 32: 32, 64, 96, 128, 160, 192, 224
bm := @FMiniMemoryBlocks[(aSize-1) shr 5] else
begin
Result := nil;
Exit;
end
else if aSize <= (length(FSmallMemoryBlocks)*256) then
// blocks of 256: 256,512,768,1024,1280,1536,1792 bytes
bm := @FSmallMemoryBlocks[(aSize-1) shr 8]
{$ifdef USEMEDIUM}
else if aSize <= (length(FMediumMemoryBlocks)*2048) then
// blocks of 2048: 2048, 4096... bytes
bm := @FMediumMemoryBlocks[(aSize-1) shr 11]
{$endif}
else
begin
// larger blocks are allocated via the old Memory Manager
Result := GetOldMem(aSize);
Exit;
end;
if FOtherThreadFreedMemory <> nil then
ProcessFreedMemFromOtherThreads;
with bm^ do
begin
{$ifndef USEBITMAP}
if FFirstFreedMemBlock <> nil then
// first get from freed mem (fastest because most chance?)
Result := FFirstFreedMemBlock.GetUsedMemoryItem else
{$endif}
// from normal list
Result := GetMemFromNewBlock;
end;
Assert(NativeUInt(Result) > $10000);
Result := Pointer(NativeUInt(Result) + SizeOf(TMemHeader));
end;
{ TMemBlock }
procedure TMemBlock.FreeBlockMemoryToGlobal;
begin
if Owner.FFirstMemBlock = @Self then
Exit; //keep one block
// remove ourselves from linked list
if FPreviousMemBlock <> nil then
FPreviousMemBlock.FNextMemBlock := Self.FNextMemBlock;
if FPreviousFreedMemBlock <> nil then
FPreviousFreedMemBlock.FNextFreedMemBlock := Self.FNextFreedMemBlock;
if FNextMemBlock <> nil then
FNextMemBlock.FPreviousMemBlock := Self.FPreviousMemBlock;
if FNextFreedMemBlock <> nil then
FNextFreedMemBlock.FPreviousFreedMemBlock := Self.FPreviousFreedMemBlock;
if Owner.FFirstFreedMemBlock = @Self then
Owner.FFirstFreedMemBlock := nil;
if Owner.FFirstMemBlock = @Self then
Owner.FFirstMemBlock := nil;
GlobalManager.FreeBlockMemory(@Self);
end;
procedure TMemBlock.FreeMem(aMemoryItem: PMemHeader);
begin
// first free item of block?
// then we add this block to (linked) list with available mem
{$ifdef USEBITMAP}
if FAvailable=NativeUInt(-1) then
{$else}
if FFreedIndex = 0 then
{$endif}
with Owner^ do //faster
begin
{Self.}FNextFreedMemBlock := {Owner}FFirstFreedMemBlock; //link to first list
{Self.}FPreviousFreedMemBlock := nil;
if {Self}FNextFreedMemBlock <> nil then
{Self}FNextFreedMemBlock.FPreviousFreedMemBlock := @Self; //back link
{Owner}FFirstFreedMemBlock := @Self; //replace first list
end;
{$ifdef USEBITMAP}
SetBit(FAvailable,aMemoryItem^.FIndexInMemBlockArray);
if FAvailable=NativeUInt(-1) then
{$else}
// free mem block
FFreedArray[FFreedIndex] := aMemoryItem;
inc(FFreedIndex);
if FFreedIndex = C_ARRAYSIZE then
{$endif}
// all memory available
with Owner^ do
if (FFreeMemCount >= C_GLOBAL_BLOCK_CACHE) and
({Owner.}FFirstMemBlock <> @Self) then // keep one block
Self.FreeBlockMemoryToGlobal else
inc(FFreeMemCount);
end;
function TMemBlock.GetUsedMemoryItem: PMemHeader;
begin
Assert(Self.Owner <> nil);
{$ifdef USEBITMAP}
Assert(FAvailable<>0);
Result := Pointer( NativeUInt(@Self)+ sizeof(Self) +
FindFirstSetBit(FAvailable) * (Owner.FItemSize + SizeOf(TMemHeader)) );
if FAvailable=0 then
{$else}
Assert(FFreedIndex > 0);
dec(FFreedIndex);
Result := FFreedArray[FFreedIndex];
if FFreedIndex = 0 then
{$endif}
begin // no free items left:
// set next free memlist
Owner.FFirstFreedMemBlock := FNextFreedMemBlock;
// first one has no previous
if FNextFreedMemBlock <> nil then
FNextFreedMemBlock.FPreviousFreedMemBlock := nil;
// remove from free list
FPreviousFreedMemBlock := nil;
FNextFreedMemBlock := nil;
end
else
{$ifdef USEBITMAP}
if FAvailable=NativeUInt(-1) then
{$else}
if FFreedIndex = C_ARRAYSIZE-1 then
{$endif}
// all memory is now available
dec(Owner.FFreeMemCount);
end;
{ TMemBlockList }
procedure TMemBlockList.AddNewMemoryBlock;
var
pm: PMemBlock;
begin
FRecursive := True;
// get block from cache
pm := GlobalManager.GetBlockMemory(FItemSize);
if pm = nil then
begin
// create own one
pm :=
{$ifdef USEMEDIUM}
Owner.GetMem {$else}
GetOldMem // (32+8)*64=2560 > 2048 -> use OldMM
{$endif}
( SizeOf(pm^) + (FItemSize + SizeOf(TMemHeader)) * C_ARRAYSIZE );
with pm^ do begin // put zero only to needed properties
{$ifdef USEBITMAP}
fillchar(FNextFreedMemBlock,SizeOf(FNextFreedMemBlock)+
SizeOf(FPreviousFreedMemBlock)+
SizeOf(FToBeFreedFromOtherThread)+SizeOf(NextMem),0);
FAvailable := NativeUInt(-1); // set all bits = mark all available
{$else}
fillchar(FNextFreedMemBlock,SizeOf(FNextFreedMemBlock)+
SizeOf(FPreviousFreedMemBlock)+SizeOf(FUsageCount)+SizeOf(FFreedIndex),0);
{$endif}
end;
end;
// init
with pm^ do
begin
{pm.}Owner := @Self;
// set new memlist as first, add link to current item
{pm.}FNextMemBlock := {self.}FFirstMemBlock;
// back link to new first item
if {self.}FFirstMemBlock <> nil then
{self.}FFirstMemBlock.FPreviousMemBlock := pm;
{self.}FFirstMemBlock := pm;
{pm.}FPreviousMemBlock := nil;
{$ifdef USEBITMAP}
if FAvailable<>NativeUInt(-1) then
{$else}
if {pm.}FFreedIndex > 0 then
{$endif}
begin
// if block has already some freed memory (previous used block from cache)
// then add to used list
{pm.}FNextFreedMemBlock := {Self}FFirstFreedMemBlock; // link to first list
{pm.}FPreviousFreedMemBlock := nil;
if {pm.}FNextFreedMemBlock <> nil then
{pm.}FNextFreedMemBlock.FPreviousFreedMemBlock := pm; // back link
{Self.}FFirstFreedMemBlock := pm; // replace first list
{$ifndef USEBITMAP}
if {pm.}FFreedIndex = C_ARRAYSIZE then
inc({pm.}Owner.FFreeMemCount);
{$endif}
end;
end;
FRecursive := False;
end;
function TMemBlockList.GetMemFromNewBlock: Pointer;
var
pm: PMemBlock;
begin
// store: first time init?
if FFirstMemBlock = nil then
begin
if FRecursive then
begin
Result := GetOldMem(Self.FItemSize);
Exit;
end;
AddNewMemoryBlock;
end;
pm := FFirstMemBlock;
with pm^ do
{$ifdef USEBITMAP}
if FAvailable=0 then
{$else}
if FUsageCount >= C_ARRAYSIZE then
{$endif}
begin
// memlist full? make new memlist
if FRecursive then
begin
Result := GetOldMem(Self.FItemSize);
Exit;
end;
AddNewMemoryBlock;
pm := FFirstMemBlock;
end;
// get mem from list
with pm^ do
// space left?
{$ifndef USEBITMAP}
if FUsageCount < C_ARRAYSIZE then
begin
// calc next item
Result := Pointer( NativeUInt(pm) + sizeof(pm^) +
FUsageCount * (FItemSize + SizeOf(TMemHeader)) );
inc(FUsageCount);
// startheader = link to memlist
TMemHeader(Result^).Owner := pm;
end
else
{$endif}
Result := GetUsedMemoryItem;
Assert(NativeUInt(Result) > $10000);
end;
{ TGlobalManager }
procedure TGlobalMemManager.AddNewThreadManagerToList(aThreadMem: PThreadMemManager);
var
pprevthreadmem: PThreadMemManager;
begin
repeat
pprevthreadmem := FFirstThreadMemory;
// try to set "result" in global var
if CAS0(pprevthreadmem, aThreadMem, FFirstThreadMemory) then
break;
{$ifdef BACKOFFSLEEP1}
pprevthreadmem := FFirstThreadMemory;
if CAS1(pprevthreadmem, aThreadMem, FFirstThreadMemory) then
break;
{$endif}
until false;
// make linked list: new one is first item (global var), next item is previous item
aThreadMem.FNextThreadManager := pprevthreadmem;
end;
procedure TGlobalMemManager.FreeAllMemory;
procedure __ProcessBlockMem(aOldBlock: PMemBlockList);
var
allmem, oldmem: PMemBlock;
begin
if aOldBlock = nil then
Exit;
allmem := aOldBlock.FFirstFreedMemBlock;
while allmem <> nil do
begin
// not in use
{$ifdef USEBITMAP}
{$else}
if allmem.FUsageCount = allmem.FFreedIndex then
begin
oldmem := allmem;
allmem := allmem.FNextFreedMemBlock;
FMainThreadMemory.FreeMem(oldmem);
end
else