Skip to content

Commit

Permalink
Merge pull request #49 from IvanMary69/main
Browse files Browse the repository at this point in the history
FastC: supression display_cpu_efficiency en doublon avec fastS
  • Loading branch information
IvanMary69 authored Nov 29, 2024
2 parents bc23671 + abcdfbf commit 63d96c5
Showing 1 changed file with 0 additions and 146 deletions.
146 changes: 0 additions & 146 deletions Fast/FastC/FastC/PyTree.py
Original file line number Diff line number Diff line change
Expand Up @@ -4187,152 +4187,6 @@ def calc_post_stats(t, iskeeporig=False, mode=None, cartesian=True):
C._rmVars(z, ['centers:Velocityr_RMS', 'centers:Velocityt_RMS' ,'centers:Velocityr', 'centers:Velocityt' ,"centers:rho_U_r'w'","centers:rho_U_t'w'","centers:rho_U_t'U_r'",'centers:co','centers:si','centers:Radius'])
return t

#==============================================================================
# display CPU efficiency diagnostic
#==============================================================================
def display_cpu_efficiency(t, mask_cpu=0.08, mask_cell=0.01, diag='compact', FILEOUT='listZonesSlow.dat', FILEOUT1='diagCPU.dat', RECORD=None):

own = Internal.getNodeFromName1(t, '.Solver#ownData') # noeud
dtloc = Internal.getNodeFromName1(own, '.Solver#dtloc') # noeud

node = Internal.getNodeFromName1(t, '.Solver#define')
node = Internal.getNodeFromName1(node, 'omp_mode')
ompmode = OMP_MODE
if node is not None: ompmode = Internal.getValue(node)

dtloc = Internal.getValue(dtloc) # tab numpy
ss_iteration = int(dtloc[0])

timer_omp = HOOK["TIMER_OMP"]
ADR = OMP_NUM_THREADS*2*(ss_iteration)
echant = timer_omp[ ADR ]
if echant == 0.:
print('nombre iterations insuffisant pour diagnostic: nitrun * ss_iteration > 15')
return None

cellCount = numpy.zeros(2*OMP_NUM_THREADS, dtype=Internal.E_NpyInt)

zones = Internal.getZones(t)
tps_percell =0.
tps_percell_max =0.
tps_percell_min =0.
cout =0.
cells_tot =0
data ={}
f1 = open(FILEOUT1, 'w')
for z in zones:
echant = timer_omp[ ADR ]
param_int = Internal.getNodeFromName2(z, 'Parameter_int')[1]
solver_def= Internal.getNodeFromName2(z, '.Solver#define')
if ompmode == 1:
Ptomp = param_int[69]
PtrIterOmp = param_int[Ptomp]
PtZoneomp = param_int[PtrIterOmp]
NbreThreads = param_int[ PtZoneomp + OMP_NUM_THREADS ]
else:
NbreThreads = OMP_NUM_THREADS

if diag == 'compact':
tps_zone_percell =0.
tps_zone_percell_max =0.
tps_zone_percell_min =100000.
ithread_max = 0
ijkv = param_int[20]*param_int[21]*param_int[22]
for i in range(OMP_NUM_THREADS):
if param_int[34]==0:#non ALE
cellCount[2*i]+=timer_omp[ ADR + 2+i*2 ]
else:
cellCount[2*i+1]+=timer_omp[ ADR + 2+i*2 ]

if ompmode == 1:
ithread = param_int[ PtZoneomp + i ]
if ithread != -2:
#print "check", z[0],timer_omp[ ADR + 1+i*2 ]/echant, timer_omp[ ADR + 2+i*2 ], i,"echant=",echant,ijkv
#tps_zone_percell += timer_omp[ ADR + 1+ithread ]
tps_zone_percell += timer_omp[ ADR + 1+i*2 ]*timer_omp[ ADR + 2+i*2 ]/float(ijkv)*NbreThreads #tps * Nb cell
if timer_omp[ ADR + 1+i*2 ] > tps_zone_percell_max:
tps_zone_percell_max = timer_omp[ ADR + 1+i*2 ]
ithread_max = ithread
if timer_omp[ ADR + 1+i*2 ] < tps_zone_percell_min:
tps_zone_percell_min = timer_omp[ ADR + 1+i*2 ]
ithread_min = ithread
else:
tps_zone_percell += timer_omp[ ADR + 1+i*2 ]*timer_omp[ ADR + 2+i*2 ]/float(ijkv)*NbreThreads
if timer_omp[ ADR + 1+i*2 ] > tps_zone_percell_max:
tps_zone_percell_max = timer_omp[ ADR + 1+i*2 ]
ithread_max = i
if timer_omp[ ADR + 1+i*2 ] < tps_zone_percell_min:
tps_zone_percell_min = timer_omp[ ADR + 1+i*2 ]
ithread_min = i

tps_percell += tps_zone_percell/echant/NbreThreads*ijkv
tps_percell_max += tps_zone_percell_max/echant*ijkv
tps_percell_min += tps_zone_percell_min/echant*ijkv
cout_zone = tps_zone_percell/echant/NbreThreads*ijkv
#tps_percell += tps_zone_percell/echant/NbreThreads/
#tps_percell_max += tps_zone_percell_max/echant
#cout_zone = tps_zone_percell/echant/NbreThreads
cout += cout_zone
data[ z[0] ] = [ tps_zone_percell/echant/NbreThreads, cout_zone]

f1.write('cpumoy/cell= '+str(tps_zone_percell/echant/NbreThreads)+" cpumaxmin= "+str(tps_zone_percell_max/echant)+str(tps_zone_percell_min/echant) +" th lent/rap= "+str(ithread_max)+str(ithread_min)+" Nthtread actif="+str(NbreThreads)+" dim zone="+str(ijkv)+" dim tot="+str(cells_tot)+" "+z[0]+'\n')

cells_tot += ijkv

if RECORD is None: print('cpu/cell zone=', z[0],'moy=',tps_zone_percell/echant/NbreThreads,'maxmin=',tps_zone_percell_max/echant, tps_zone_percell_min/echant,'th maxmin=', ithread_max, ithread_min, 'Nbtread actif=', NbreThreads, ' dim zone=',ijkv,'dim tot=',cells_tot)
if RECORD is not None:
tape = tps_zone_percell/echant/NbreThreads

tps_zone_percell = max(tps_zone_percell, 1.e-11)
tps_zone_percell_max = max(tps_zone_percell_max, 1.e-11)

perfo = numpy.empty(2, dtype=numpy.float64)
perfo[0]= int(echant*NbreThreads/tps_zone_percell)
perfo[1]= int(echant/tps_zone_percell_max)
## CUPS : [0] MEAN VALUE [1] MIN VALUE
## 1/CUPS=mus/iter(time * sub-iter)/cellspercore

Internal.createUniqueChild(solver_def, 'Cups', 'DataArray_t', value=perfo)

else:
for i in range(OMP_NUM_THREADS):
if ompmode ==1:
ithread = param_int[ PtZoneomp + i ]
if ithread != -2:
if RECORD is None: print('zone= ', z[0], 'cpu= ',timer_omp[ ADR + 1+ithread*2 ]/echant,' th= ', ithread, 'echant= ', echant)
else:
ithread = i
if RECORD is None: print('zone= ', z[0], 'cpu= ',timer_omp[ ADR + 1+ithread*2 ]/echant,' th= ', ithread, 'echant= ', echant)

ADR+= OMP_NUM_THREADS*2+1

for i in range(OMP_NUM_THREADS):
print('Nbr cellule Fixe/ALE: ',cellCount[2*i],cellCount[2*i+1] , "pour th=", i)

tps_percell/=cells_tot
if RECORD is None: print('cpu moyen %cell en microsec: ', tps_percell, tps_percell_max/cells_tot,tps_percell_min/cells_tot )

f1.write('cpu moyen %cell en microsec: '+str(tps_percell)+' '+str(tps_percell_max/cells_tot)+str(tps_percell_min/cells_tot)+ '\n')
f1.close()

f = open(FILEOUT, 'w')
sizeZones=[]
for z in zones:
param_int = Internal.getNodeFromName2(z, 'Parameter_int')[1]
cout_relatif = data[z[0]][0]/tps_percell-1.
effort_relatif = data[z[0]][1]/cout
if cout_relatif > mask_cpu and effort_relatif > mask_cell:
sizeZone=[param_int[20],param_int[21],param_int[22]]
if sizeZone not in sizeZones:
sizeZones.append(sizeZone)
if RECORD is None: print('zone ', z[0],'(',param_int[20],param_int[21],param_int[22],'): surcout cpu= ', cout_relatif ,' , temps necessaire a cette zone (%)=', effort_relatif)

f.write(z[0]+','+str(param_int[20])+","+str(param_int[21])+","+str(param_int[22])+","+str(param_int[25])+","+str(param_int[27])+","+str(param_int[29])+","+str(param_int[33])+'\n')
f.close()

if RECORD is not None: return tape
else: return None

#====================================================================
# Usefull functions for Chimera + motion
#====================================================================
Expand Down

0 comments on commit 63d96c5

Please sign in to comment.