-
Notifications
You must be signed in to change notification settings - Fork 1
/
Impact_Defs.py
9870 lines (9007 loc) · 468 KB
/
Impact_Defs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# metric definitions for impact factor calculator
import Impact_Funcs
import datetime
import math
from typing import Union
# --- Internal Constants ---
INT = 0
FLOAT = 1
INTLIST = 2
FLOAT_NA = 3
# LISTLIST = 4
FLOATLIST = 5
LINE_CHART = 1
MULTILINE_CHART_LEFT = 2
MULTILINE_CHART_CENTER = 3
LINE_CHART_COMBINE = 4
TWO_LINE_CHART = 5
FSTR = "1.4f" # constant formatting string
PROPERTY_TYPES = ("Metric Type",
"Metric Property",
"Considerations and Adjustments",
"Publication Focus",
"Citation Focus")
PROPERTY_DICT = {"Metric Type": ["Basic Statistic",
"Core Metric",
"Core Property",
"Alternative Metric"],
"Metric Property": ["Compound Metric",
"Multidimensional Metric"],
"Considerations and Adjustments": ["Time",
"Coauthorship",
"Self-Citation"],
"Publication Focus": ["All Publications",
"Core Publications",
"Tail Publications",
"Uncited Publications"],
"Citation Focus": ["All Citations",
"Core Citations",
"Tail Citations"]
}
# --- General Class Definitions ---
class Metric:
"""
This class represents a single metric, with all of its properties and values
"""
def __init__(self):
self.name = ""
self.full_name = ""
self.citation = ""
self.symbol = ""
self.__html_name = None
self.is_self = False
self.is_coauthor = False
self.metric_type = FLOAT
self.description = ""
self.synonyms = []
self.references = []
self.calculate = None
self.__value = None
self.parent_set = None
self.graph_type = None
self.description_graphs = []
self.example = None
# self.properties = {x: False for x in METRIC_PROPERTIES}
self.properties = {}
for y in PROPERTY_TYPES:
for x in PROPERTY_DICT[y]:
self.properties[x] = False
@property
def html_name(self):
if self.__html_name is None:
return self.full_name
else:
return self.__html_name
@html_name.setter
def html_name(self, value):
self.__html_name = value
@property
def value(self):
if self.__value is None:
self.__value = self.calculate(self.parent_set)
return self.__value
def __str__(self):
if self.metric_type == INT:
return str(self.value)
elif self.metric_type == FLOAT:
return format(self.value, FSTR)
elif self.metric_type == INTLIST:
return str(self.value)
elif self.metric_type == FLOAT_NA:
if self.value == "n/a":
return self.value
else:
return format(self.value, FSTR)
elif self.metric_type == FLOATLIST:
vl = self.value
return "[" + ", ".join([format(v, FSTR) for v in vl]) + "]"
class MetricSet:
"""
This class contains all of the metric output for a single year,
as well as data used to calculate these metrics
"""
def __init__(self):
self.date = datetime.date(1970, 1, 1)
self.citations = [] # number of citations for each pub, ordered by input
self.rank_order = [] # rank of each pub, from most citations to fewest
self.cumulative_citations = [] # cumulative number of citations per top i pubs, in order by rank
self.is_core = [] # boolean indicator of whether a pub is part of the h-core
self.self_citations = None # of self citations of each publication, in same order as citations
self.coauthor_citations = None # of coauthor citations of each publication, in same order as citations
self.first_pub_year = None
self.publications = []
self.parent_list = None
# add all defined metrics
tmp_list = load_all_metrics()
self.metrics = {m.name: m for m in tmp_list}
for m in self.metrics:
self.metrics[m].parent_set = self # cross-point this set as parent of each child metric object
self.metric_names = [m.name for m in tmp_list]
def calculate_ranks(self) -> None:
"""
given a list of citation totals for each pub, fill in various other lists of ranks and counts and
flags used to calculate metrics
"""
n = len(self.citations)
self.is_core = [False for _ in range(n)]
self.rank_order, self.cumulative_citations = Impact_Funcs.calculate_ranks(self.citations)
def academic_age(self) -> int:
"""
number of years since author began publishing
"""
if self.first_pub_year is None:
return 0
else:
return self.date.year - self.first_pub_year + 1
def sorted_citations(self) -> list:
"""
returns the citation counts sorted from highest to lowest (rather than by pub order)
"""
return sorted(self.citations, reverse=True)
def self_coauthor_citations(self) -> list:
"""
returns a list containing the sum of both self and coauthor citations for all pubs
"""
return [self.self_citations[i] + self.coauthor_citations[i] for i in range(len(self.self_citations))]
def author_counts(self) -> list:
"""
returns a list with the count of authors for each pub
"""
return [p.authors for p in self.publications]
def author_position(self) -> list:
"""
returns a list with the position of the author within the author list for each pub
"""
return [p.author_rank for p in self.publications]
def publication_years(self) -> list:
"""
returns a list containing the publication year of each publication, in the same order as citations
"""
return [p.year for p in self.publications]
def year(self) -> int:
return self.date.year
def references(self) -> list:
tmp_set = set()
for m in self.metrics:
tmp_set |= set(self.metrics[m].references)
tmp_list = list(tmp_set)
tmp_list.sort()
return tmp_list
class DescriptionGraph:
"""
This class will hold plotting information for speciality graphs used as part of the description of
particular metrics, rather than just a recording of the metric values over time
"""
def __init__(self):
self.name = "" # a label which will be used to identify specific plots
self.data = None
# self.data = []
# self.series_type = []
# self.graph_options = None
# self.series_options = []
# --- Definitions and Calculations for Individual Metrics---
"""
the calculation functions in this section are designed to extract the key data from the MetricSet(s) and
send it to an identically named function in the Impact_Funcs module which is designed with more generic
data input in mind.
although slightly redundant in design, it allows the functions in the Impact_Func module to be used more
generally outside this specific code, if necessary
"""
# total publications
def calculate_total_pubs(metric_set: MetricSet) -> int:
return Impact_Funcs.calculate_total_pubs(metric_set.citations)
def metric_total_pubs() -> Metric:
m = Metric()
m.name = "total pubs"
m.full_name = "total publications"
m.symbol = "<em>P</em>"
m.metric_type = INT
m.description = "<p>This metric is simply the total number of publications by an author. Many works might be " \
"considered a publication, including journal articles, books, book chapters, published " \
"conference abstracts, software, reports, dissertations, and theses.</p>"
m.synonyms = ["<em>P</em>"]
m.graph_type = LINE_CHART
m.calculate = calculate_total_pubs
m.properties["Basic Statistic"] = True
m.properties["All Publications"] = True
return m
# total citations
def calculate_total_cites(metric_set: MetricSet) -> int:
return Impact_Funcs.calculate_total_cites(metric_set.citations)
def metric_total_cites() -> Metric:
m = Metric()
m.name = "total cites"
m.full_name = "total citations"
m.symbol = "<em>C<sup>P</sup></em>"
m.metric_type = INT
equation = r"$$C^P=\sum\limits_{i=1}^{P}{C_i}.$$"
m.description = "<p>This metric (sometimes called <span class=\"metric_name\"><em>C<sub>T</sub></em></span>) is " \
"the total number of citations to all publications by an author, or</p>" + equation
m.synonyms = ["<em>C<sub>T</sub></em>",
"citation count",
"<em>C<sup>P</sup></em>"]
m.graph_type = LINE_CHART
m.calculate = calculate_total_cites
m.properties["Basic Statistic"] = True
m.properties["All Citations"] = True
m.properties["All Publications"] = True
return m
# maximum citations
def calculate_max_cites(metric_set: MetricSet) -> int:
return Impact_Funcs.calculate_max_cites(metric_set.citations)
def metric_max_cites() -> Metric:
m = Metric()
m.name = "max cites"
m.full_name = "maximum citations"
m.metric_type = INT
m.description = "<p>This metric is the largest number of citations found for a single publication by an " \
"author. When publications are in rank order by citations, <em>C</em><sub>max</sub> = " \
"<em>C</em><sub>1</sub>.</p>"
m.symbol = "<em>C</em><sub>max</sub>"
m.synonyms = ["<em>C</em><sub>max</sub>"]
m.graph_type = LINE_CHART
m.calculate = calculate_max_cites
m.properties["Basic Statistic"] = True
m.properties["All Publications"] = True
return m
# mean citations
def calculate_mean_cites(metric_set: MetricSet) -> float:
total_cites = metric_set.metrics["total cites"].value
total_pubs = metric_set.metrics["total pubs"].value
return Impact_Funcs.calculate_mean_cites(total_cites, total_pubs)
def metric_mean_cites() -> Metric:
m = Metric()
m.name = "c/p"
m.full_name = "mean citations per publication"
m.metric_type = FLOAT
equation = r"$$C/P\text{ index}=\frac{C^P}{P}$$"
m.description = "<p>This metric is the mean number of citations per publication. It has been described under " \
"many names, including the <span class=\"metric_name\"><em>C/P</em> index</span>, the " \
"<span class=\"metric_name\">mean citation rate (<em>MCR</em>)</span>, the " \
"<span class=\"metric_name\">mean observed citation rate (<em>MOCR</em>)</span>, " \
"<span class=\"metric_name\">citations per publication (<em>CPP</em>)</span>, the " \
"<span class=\"metric_name\">observed citation rate (<em>OCR</em>)</span>, " \
"the <span class=\"metric_name\">generalized impact factor (<em>I<sub>f</sub></em>)</span>, and " \
"the <span class=\"metric_name\">journal paper citedness (<em>JPC</em>)</span>.</p>" + equation
m.graph_type = LINE_CHART
m.synonyms = ["<em>C/P</em> index",
"mean citation rate",
"mean observed citation rate",
"citations per publication",
"observed citation rate",
"generalized impact factor",
"journal paper citedness",
"<em>MCR</em>",
"<em>MOCR</em>",
"<em>CPP</em>",
"<em>OCR</em>",
"<em>I<sub>f</sub></em>",
"<em>JPC</em>"]
m.symbol = "<em>C/P</em>"
m.calculate = calculate_mean_cites
m.properties["Basic Statistic"] = True
m.properties["All Citations"] = True
m.properties["All Publications"] = True
return m
# median citations
def calculate_median_cites(metric_set: MetricSet) -> float:
citations = metric_set.citations
return Impact_Funcs.calculate_median_cites(citations)
def metric_median_cites() -> Metric:
m = Metric()
m.name = "median cites per pub"
m.full_name = "median citations per publication"
m.metric_type = FLOAT
m.symbol = r"\(\tilde{C}\)"
m.description = "<p>This metric is the median number of citations per publication. It may be a better " \
"indicator of the average impact of an author\'s publications than the __c/p__ since it is less " \
"prone to bias under a heavily skewed citation distribution.</p>"
m.graph_type = LINE_CHART
m.calculate = calculate_median_cites
m.properties["Basic Statistic"] = True
m.properties["All Citations"] = True
m.properties["All Publications"] = True
return m
# pubs per year
def calculate_pubs_per_year(metric_set: MetricSet) -> float:
total_pubs = metric_set.metrics["total pubs"].value
age = metric_set.academic_age()
return Impact_Funcs.calculate_pubs_per_year(total_pubs, age)
def metric_pubs_per_year() -> Metric:
m = Metric()
m.name = "pubs per year"
m.full_name = "publications per year"
m.symbol = "<em>P<sup>TS</sup></em>"
m.metric_type = FLOAT
equation = r"$$P^{TS}=\frac{P}{\text{academic age}}=\frac{P}{Y-Y_0+1},$$"
ystr = r"\(Y\)"
y0str = r"\(Y_{0}\)"
m.description = "<p>This metric, also called the " \
"<span class=\"metric_name\">time-scaled number of publications</span> is just the mean " \
"number of publications per year, calculated as the total number of publications of an author " \
"divided by their academic age (number of years since their first publication),</p>" + equation + \
"<p>where " + ystr + " is the current year and " + y0str + " is the year of their first " \
"publication.</p>"
m.synonyms = ["time-scaled number of publications",
"<em>P<sup>TS</sup></em>"]
m.graph_type = LINE_CHART
m.calculate = calculate_pubs_per_year
m.properties["Basic Statistic"] = True
m.properties["Time"] = True
m.properties["All Publications"] = True
return m
# citations per year
def calculate_cites_per_year(metric_set: MetricSet) -> float:
total_cites = metric_set.metrics["total cites"].value
age = metric_set.academic_age()
return Impact_Funcs.calculate_cites_per_year(total_cites, age)
def metric_cites_per_year() -> Metric:
m = Metric()
m.name = "citations per year"
m.full_name = "citations per year"
m.symbol = "<em>C<sup>TS</sup></em>"
m.metric_type = FLOAT
equation = r"$$C^{TS}=\frac{C^P}{\text{academic age}}=\frac{C^P}{Y-Y_0+1},$$"
ystr = r"\(Y\)"
y0str = r"\(Y_{0}\)"
m.description = "<p>This metric, also called the <span class=\"metric_name\">time-scaled citation index</span>, " \
"is just the mean number of citations per year, calculated as the total number of citations of " \
"an author divided by their academic age (number of years since their first publication),</p>" + \
equation + "<p>where " + ystr + " is the current year and " + y0str + \
" is the year of their first publication.</p>"
m.synonyms = ["time-scaled citation index",
"<em>C<sup>TS</sup></em>"]
m.graph_type = LINE_CHART
m.calculate = calculate_cites_per_year
m.properties["Basic Statistic"] = True
m.properties["Time"] = True
m.properties["All Citations"] = True
m.properties["All Publications"] = True
return m
# h-index (Hirsch )
def calculate_h_index(metric_set: MetricSet) -> int:
citations = metric_set.citations
rank_order = metric_set.rank_order
h, is_core = Impact_Funcs.calculate_h_index(citations, rank_order)
metric_set.is_core = is_core
return h
def write_h_index_desc_data(metric_set: MetricSet) -> list:
metric = metric_set.metrics["h-index"]
graph = metric.description_graphs[0]
output = list()
output.append(" var data_{} = google.visualization.arrayToDataTable([\n".format(graph.name))
output.append(" ['Rank', 'Citations', 'y=x', 'h-square', {'type': 'string', 'role': 'annotation'}],\n")
tmp_cites = [c for c in metric_set.citations]
tmp_cites.sort(reverse=True)
h = metric_set.metrics["h-index"].value
maxx = metric_set.metrics["total pubs"].value
maxv = 50
# write citation count for ranked publication x
for x in range(maxx + 1):
outstr = " [{}".format(x) # write rank
if x == 0:
v = "null"
else:
v = tmp_cites[x - 1]
outstr += ", {}, null, null, null],\n".format(v)
output.append(outstr)
# write y for x=y
output.append(" [{}, null, {}, null, null],\n".format(0, 0))
output.append(" [{}, null, {}, null, null],\n".format(maxv, maxv))
output.append(" [null, null, null, null, null],\n")
# write h-square
output.append(" [{}, null, null, {}, null],\n".format(0, h))
output.append(" [{}, null, null, {}, \'h\'],\n".format(h, h))
output.append(" [{}, null, null, {}, null],\n".format(h, 0))
output.append(" ]);\n")
output.append("\n")
output.append(" var options_{} = {{\n".format(graph.name))
output.append(" legend: {position: 'top'},\n")
# output.append(" interpolateNulls: true,\n")
output.append(" hAxis: {slantedText: true,\n")
output.append(" title: \'Rank\',\n")
output.append(" gridlines: {color: \'transparent\'},\n")
output.append(" ticks: [0, 10, 20, 30, 40, 50],\n")
output.append(" viewWindow: {max:" + str(maxv) + "}},\n")
output.append(" vAxis: {viewWindow: {max:" + str(maxv) + "},\n")
output.append(" title: \'Citation Count\',\n")
output.append(" ticks: [0, 10, 20, 30, 40, 50],\n")
output.append(" gridlines: {color: \'transparent\'}},\n")
output.append(" series: { 0: {},\n")
output.append(" 1: {lineDashStyle: [4, 4]},\n")
output.append(" 2: {lineDashStyle: [2, 2],\n")
output.append(" annotations:{textStyle:{color: \'black\', italic: true, bold: true}}}}\n")
output.append(" };\n")
output.append("\n")
output.append(" var chart_{} = new google.visualization."
"LineChart(document.getElementById('chart_{}_div'));\n".format(graph.name, graph.name))
output.append(" chart_{}.draw(data_{}, options_{});\n".format(graph.name, graph.name, graph.name))
output.append("\n")
return output
def write_h_index_example(metric_set: MetricSet) -> str:
outstr = "<p>Publications are ordered by number of citations, from highest to lowest.</p>"
outstr += "<table class=\"example_table\">"
citations = sorted(metric_set.citations, reverse=True)
row1 = "<tr class=\"top_row\"><th>Citations (<em>C<sub>i</sub></em>)</th>"
row2 = "<tr><th>Rank (<em>i</em>)</th>"
row3 = "<tr><th></th>"
h = metric_set.metrics["h-index"].value
for i, c in enumerate(citations):
if i + 1 == h:
v = "<em>h</em> = {}".format(h)
ec = " class=\"box\""
else:
v = ""
ec = ""
row1 += "<td" + ec + ">{}</td>".format(c)
row2 += "<td" + ec + ">{}</td>".format(i+1)
row3 += "<td>{}</td>".format(v)
row1 += "</tr>"
row2 += "</tr>"
row3 += "</tr>"
outstr += row1 + row2 + row3 + "</table>"
outstr += "<p>The largest rank where <em>i</em> ≤ <em>C<sub>i</sub></em> is {}.</p>".format(h)
return outstr
def metric_h_index() -> Metric:
m = Metric()
m.name = "h-index"
m.full_name = "h-index"
m.html_name = "<em>h-</em>index"
m.symbol = "<em>h</em>"
m.metric_type = INT
m.example = write_h_index_example
graph = DescriptionGraph()
m.description_graphs.append(graph)
graph.name = "h_index_desc"
graph.data = write_h_index_desc_data
equation = r"$$h=\underset{i}{\max}\left(i\leq C_i\right).$$"
m.description = "<p>The <span class=\"metric_name\"><em>h-</em>index</span> (Hirsch 2005) is the most " \
"important personal impact factor one needs " \
"to be familiar with, not because it is necessarily the best, but because (1) it was the first " \
"major index of its type and most of the other indices are based on it in some way, and (2) it " \
"is the single factor with which most other people (<em>e.g.</em>, " \
"administrators) are likely to be somewhat familiar. You may find another index which you " \
"prefer, but everything starts with <em>h.</em></p><p>The <em>h-</em>index is defined as the " \
"largest value for which <em>h</em> publications have at least <em>h</em> citations. Put another " \
"way, a scientist has an impact factor of <em>h</em> if <em>h</em> of their publications have at " \
"least <em>h</em> citations and the other <em>P - h</em> publications have ≤ <em>h</em> " \
"citations. Note that <em>h</em> is measured in publications. In formal notation, one might " \
"write</p>" + equation + "<p>These top <em>h</em> publications are often referred " \
"to as the “Hirsch core.”</p><div id=\"chart_" + graph.name + \
"_div\" class=\"proportional_chart\"></div>" \
"<p>One way to graphically visualize <em>h</em> is to imagine a " \
"plot of citation count versus rank for all publications (often called the citation curve). An " \
"alternative way of thinking of this is a plot of minimum number of citations for a publication " \
"in the top <em>i</em> publications vs. <em>i</em>. By " \
"definition, this plot will generally trend from upper left (highest ranked publications with " \
"most citations, to lower right (lowest ranked publications with fewest citations), depending on " \
"the precise citation distribution of the author. If one were to add a (threshold) line with a " \
"slope of one to this plot, the point where the threshold line crosses the citation curve " \
"(truncated to an integer) is <em>h.</em> Alternatively, one can visualize <em>h</em> as the " \
"size (length of sides) of the largest (integer) square that one can fit under the citation " \
"curve.</p>"
m.graph_type = LINE_CHART
m.references = ["Hirsch, J.E. (2005) An index to quantify an individual\'s scientific research output. "
"<em>Proceedings of the National Academy of Sciences USA</em> 102(46):16569–16572."]
m.calculate = calculate_h_index
m.properties["Core Metric"] = True
m.properties["Core Publications"] = True
m.properties["Core Citations"] = True
return m
# Hirsch core citations (Hirsch )
def calculate_h_core(metric_set: MetricSet) -> int:
citations = metric_set.citations
is_core = metric_set.is_core
return Impact_Funcs.calculate_h_core(citations, is_core)
def write_h_core_example(metric_set: MetricSet) -> str:
outstr = "<p>Publications are ordered by number of citations, from highest to lowest.</p>"
outstr += "<table class=\"example_table\">"
citations = sorted(metric_set.citations, reverse=True)
row1 = "<tr class=\"top_row\"><th>Citations (<em>C<sub>i</sub></em>)</th>"
row2 = "<tr><th>Rank (<em>i</em>)</th>"
row3 = "<tr><th></th>"
h = metric_set.metrics["h-index"].value
hc = metric_set.metrics["h-core cites"].value
for i, c in enumerate(citations):
if i + 1 <= h:
ec = " class=\"box\""
else:
ec = ""
if i + 1 == h:
v = "<em>h</em> = {}".format(h)
else:
v = ""
row1 += "<td" + ec + ">{}</td>".format(c)
row2 += "<td>{}</td>".format(i+1)
row3 += "<td>{}</td>".format(v)
row1 += "</tr>"
row2 += "</tr>"
row3 += "</tr>"
outstr += row1 + row2 + row3 + "</table>"
outstr += "<p>The sum of the citations for the top <em>h</em> publications is {}.</p>".format(hc)
return outstr
def metric_h_core() -> Metric:
m = Metric()
m.name = "h-core cites"
m.full_name = "Hirsch core citations"
m.symbol = "<em>C<sup>H</sup></em>"
m.example = write_h_core_example
m.metric_type = INT
equation = r"$$C^H=\sum\limits_{i=1}^{h}{C_i}.$$"
m.description = "<p>This is the sum of the citations for all publications that contribute to the __h-index__, " \
"<em>i.e.</em> the Hirsch core,</p>" + equation
m.synonyms = ["<em>C<sup>H</sup></em>"]
m.references = ["Hirsch, J.E. (2005) An index to quantify an individual\'s scientific research output. "
"<em>Proceedings of the National Academy of Sciences USA</em> 102(46):16569–16572."]
m.graph_type = LINE_CHART
m.calculate = calculate_h_core
m.properties["Core Citations"] = True
m.properties["Core Publications"] = True
m.properties["Core Property"] = True
return m
# Hirsch minimum constant (Hirsch )
def calculate_hirsch_min_const(metric_set: MetricSet) -> float:
total_cites = metric_set.metrics["total cites"].value
h = metric_set.metrics["h-index"].value
return Impact_Funcs.calculate_hirsch_min_const(total_cites, h)
def metric_hirsch_min_const() -> Metric:
m = Metric()
m.name = "Hirsch min const"
m.full_name = "Hirsch proportionality constant"
m.symbol = "<em>a</em>"
m.metric_type = FLOAT
equation = r"$$a=\frac{C^P}{h^2}.$$"
m.description = "<p>This metric (Hirsch 2005) describes a relationship between the __h-index__ and the " \
"__total cites__ and is defined as</p>" + equation
m.references = ["Hirsch, J.E. (2005) An index to quantify an individual\'s scientific research output. "
"<em>Proceedings of the National Academy of Sciences USA</em> 102(46):16569–16572."]
m.graph_type = LINE_CHART
m.calculate = calculate_hirsch_min_const
m.properties["All Citations"] = True
m.properties["Core Citations"] = True
m.properties["Core Property"] = True
m.properties["All Publications"] = True
m.properties["Core Publications"] = True
return m
# g-index (Egghe 2006)
def calculate_g_index(metric_set: MetricSet) -> int:
cumulative_citations = metric_set.cumulative_citations
rank_order = metric_set.rank_order
return Impact_Funcs.calculate_g_index(cumulative_citations, rank_order)
def write_g_index_desc_data1(metric_set: MetricSet) -> list:
metric = metric_set.metrics["g-index"]
graph = metric.description_graphs[0]
output = list()
output.append(" var data_{} = google.visualization.arrayToDataTable([\n".format(graph.name))
output.append(" ['Rank', 'Cumulative Citations', 'y=x^2', {'type': 'string', 'role': 'annotation'}],\n")
tmp_cites = [c for c in metric_set.citations]
tmp_cites.sort(reverse=True)
cum_cites = [tmp_cites[0]]
for i, c in enumerate(tmp_cites[1:]):
cum_cites.append(cum_cites[i] + c)
g = metric_set.metrics["g-index"].value
maxx = metric_set.metrics["total pubs"].value
maxv = 150
# write cumulative citation count for ranked publication x
for x in range(maxx + 1):
outstr = " [{}".format(x) # write rank
# write cumulative citation count for ranked publication x
if x == 0:
v = "null"
else:
v = cum_cites[x - 1]
outstr += ", {}, null, null],\n".format(v)
output.append(outstr)
# write y for y=x^2
for x in range(maxx + 1):
outstr = " [{}, null".format(x) # write rank
v = x**2
if v > maxv:
v = "null"
if x == g:
a = "\'g\'"
else:
a = "null"
outstr += ", {}, {}],\n".format(v, a)
output.append(outstr)
output.append(" ]);\n")
output.append("\n")
output.append(" var options_{} = {{\n".format(graph.name))
output.append(" legend: {position: 'top'},\n")
# output.append(" chartArea: {width:\'75%\', height:\'75%\'},\n")
output.append(" interpolateNulls: true,\n")
output.append(" hAxis: {slantedText: true,\n")
output.append(" title: \'Rank\',\n")
output.append(" gridlines: {color: \'transparent\'},\n")
output.append(" ticks: [0, 20, 40, 60, 80, 100, 120, 140],\n")
output.append(" viewWindow: {max:" + str(maxv) + "}},\n")
output.append(" vAxis: {viewWindow: {max:" + str(maxv) + "},\n")
output.append(" title: \'Cumulative Citation Count\',\n")
output.append(" ticks: [0, 20, 40, 60, 80, 100, 120, 140],\n")
output.append(" gridlines: {color: \'transparent\'}},\n")
output.append(" series: { 0: {},\n")
output.append(" 1: {lineDashStyle: [4, 4],\n")
output.append(" annotations:{textStyle:{color: \'black\', italic: true, bold: true}}}}\n")
output.append(" };\n")
output.append("\n")
output.append(" var chart_{} = new google.visualization."
"LineChart(document.getElementById('chart_{}_div'));\n".format(graph.name, graph.name))
output.append(" chart_{}.draw(data_{}, options_{});\n".format(graph.name, graph.name, graph.name))
output.append("\n")
return output
def write_g_index_desc_data2(metric_set: MetricSet) -> list:
metric = metric_set.metrics["g-index"]
graph = metric.description_graphs[1]
output = list()
output.append(" var data_{} = google.visualization.arrayToDataTable([\n".format(graph.name))
output.append(" ['Rank', 'Mean Citations', 'y=x', 'g-square', "
"{'type': 'string', 'role': 'annotation'}],\n")
tmp_cites = [c for c in metric_set.citations]
tmp_cites.sort(reverse=True)
avg_cites = []
for i in range(len(tmp_cites)):
avg_cites.append(sum(tmp_cites[:i+1])/(i+1))
g = metric_set.metrics["g-index"].value
maxx = metric_set.metrics["total pubs"].value
maxv = 45
# write avg citation count for top x ranked publications
for x in range(maxx + 1):
outstr = " [{}".format(x) # write rank
if x == 0:
v = "null"
else:
v = avg_cites[x - 1]
outstr += ", {}, null, null, null],\n".format(v)
output.append(outstr)
# write y for y=x
output.append(" [{}, null, {}, null, null],\n".format(0, 0))
output.append(" [{}, null, {}, null, null],\n".format(maxv, maxv))
# write g-square
output.append(" [{}, null, null, {}, null],\n".format(0, g))
output.append(" [{}, null, null, {}, \'g\'],\n".format(g, g))
output.append(" [{}, null, null, {}, null],\n".format(g, 0))
output.append(" ]);\n")
output.append("\n")
output.append(" var options_{} = {{\n".format(graph.name))
output.append(" legend: {position: 'top'},\n")
output.append(" hAxis: {slantedText: true,\n")
output.append(" title: \'Rank\',\n")
output.append(" gridlines: {color: \'transparent\'},\n")
output.append(" ticks: [0, 10, 20, 30, 40, 50],\n")
output.append(" viewWindow: {max:" + str(maxv) + "}},\n")
output.append(" vAxis: {viewWindow: {max:" + str(maxv) + "},\n")
output.append(" title: \'Mean Citation Count\',\n")
output.append(" ticks: [0, 10, 20, 30, 40, 50],\n")
output.append(" gridlines: {color: \'transparent\'}},\n")
output.append(" series: { 0: {},\n")
output.append(" 1: {lineDashStyle: [4, 4]},\n")
output.append(" 2: {lineDashStyle: [2, 2],\n")
output.append(" annotations:{textStyle:{color: \'black\', italic: true, bold: true}}}}\n")
output.append(" };\n")
output.append("\n")
output.append(" var chart_{} = new google.visualization."
"LineChart(document.getElementById('chart_{}_div'));\n".format(graph.name, graph.name))
output.append(" chart_{}.draw(data_{}, options_{});\n".format(graph.name, graph.name, graph.name))
output.append("\n")
return output
def write_g_index_example(metric_set: MetricSet) -> str:
outstr = "<p>Publications are ordered by number of citations, from highest to lowest.</p>"
outstr += "<table class=\"example_table\">"
citations = sorted(metric_set.citations, reverse=True)
row0 = "<tr><th>Citations (<em>C<sub>i</sub></em>)</th>"
row1 = "<tr class=\"top_row\"><th>Cumulative Citations (Σ<em>C<sub>i</sub></em>)</th>"
row2 = "<tr><th>Rank Squared (<em>i</em><sup>2</sup>)</th>"
row3 = "<tr><th></th>"
row4 = "<tr class=\"top_row\"><th>Rank (<em>i</em>)</th>"
row5 = "<tr><th>Mean Citations (Σ<em>C<sub>i</sub></em> / <em>i</em>)</th>"
g = metric_set.metrics["g-index"].value
s = 0
for i, c in enumerate(citations):
s += c
if i + 1 == g:
v = "<em>g</em> = {}".format(g)
ec = " class=\"box\""
else:
v = ""
ec = ""
row0 += "<td>{}</td>".format(c)
row1 += "<td" + ec + ">{}</td>".format(s)
row2 += "<td" + ec + ">{}</td>".format((i+1)**2)
row3 += "<td>{}</td>".format(v)
row4 += "<td" + ec + ">{}</td>".format(i + 1)
row5 += "<td" + ec + ">{:1.1f}</td>".format(s/(i+1))
row0 += "</tr>"
row1 += "</tr>"
row2 += "</tr>"
row3 += "</tr>"
row4 += "</tr>"
row5 += "</tr>"
outstr += row0 + row1 + row2 + row3 + row4 + row5 + "</table>"
outstr += "<p>The largest rank where <em>i</em><sup>2</sup> ≤ Σ<em>C<sub>i</sub></em> (or " \
"<em>i</em> ≤ mean <em>C<sub>i</sub></em>) is {}.</p>".format(g)
return outstr
def metric_g_index() -> Metric:
m = Metric()
m.name = "g-index"
m.full_name = "g-index"
m.html_name = "<em>g-</em>index"
m.symbol = "<em>g</em>"
m.metric_type = INT
graph1 = DescriptionGraph()
m.description_graphs.append(graph1)
graph1.name = "g_index_desc1"
graph1.data = write_g_index_desc_data1
graph2 = DescriptionGraph()
m.description_graphs.append(graph2)
graph2.name = "g_index_desc2"
graph2.data = write_g_index_desc_data2
m.example = write_g_index_example
equation = r"$$g=\underset{i}{\max}\left(i^2\leq \sum\limits_{j=1}^{i}{C_j}\right)=" \
r"\underset{i}{\max}\left(i\leq\frac{\sum\limits_{j=1}^{i}{C_j}}{i} \right)$$"
m.description = "<p>The best known and most widely studied alternative to the __h-index__ is known as the " \
"<em>g-</em>index (Egghe 2006a, b, c). The <em>g-</em>index is designed to give more credit for " \
"publications cited in excess of the <em>h</em> threshold. The primary difference between the " \
"formal definitions of the <em>h-</em> and <em>g-</em>indices is that <em>g</em> is based on " \
"cumulative citation counts rather than individual citation counts. Formally, the " \
"<em>g-</em>index is the largest value for which <em>g</em> publications have jointly received " \
"at least <em>g</em><sup>2</sup> citations.</p>" + equation + \
"<div class=\"chart2container\">" \
"<div id=\"chart_" + graph1.name + "_div\" class=\"proportional_chart2\"></div>" \
"<div id=\"chart_" + graph2.name + "_div\" class=\"proportional_chart2\"></div>" \
"</div><div class=\"clear_float\">" \
"<p>Although not usually " \
"formulated this way, the above also shows an alternative interpretation of the <em>g-</em>" \
"index, which makes it\'s meaning and relationship to <em>h</em> much clearer: the <em>g-</em>" \
"index is the largest value for which the top <em>g</em> publications average <em>g</em> " \
"citations, while <em>h</em> is the largest value for which the top <em>h</em> publications " \
"have a minimum of <em>h</em> citations.</p><p>Stricly speaking, it is possible for the number " \
"of citations in the <em>g-</em>core to exceed the square of the total number of publications " \
"(<em>C<sup>P</sup></em> > <em>P</em><sup>2</sup>), or using the alternate definition, for " \
"the average number of citations per publication to exceed the number of publications. Under " \
"this scenario, the threshold curve and the citation curve do not actually cross. Some authors " \
"have suggested adding phantom publications with zero citations until the curves cross " \
"(essentially, making <em>g</em> equal to the square-root of <em>C<sup>P</sup></em>); " \
"a more conservative approach, illustrated here, is to set the maximum possible value of " \
"<em>g</em> equal to the number of publications.</p>"
m.references = ["Egghe, L. (2006) How to improve the <em>h-</em>index: Letter. <em>The Scientist</em> 20(3):14.",
"Egghe, L. (2006) An improvement of the <em>h-</em>index: The <em>g-</em>index. <em>ISSI "
"Newsletter</em> 2(1):8–9.",
"Egghe, L. (2006) Theory and practice of the <em>g-</em>index. <em>Scientometrics</em> "
"69(1):131–152."]
m.graph_type = LINE_CHART
m.calculate = calculate_g_index
m.properties["Core Metric"] = True
m.properties["Core Citations"] = True
m.properties["Core Publications"] = True
return m
# h2-index (Kosmulski 2006)
def calculate_h2_index(metric_set: MetricSet) -> int:
citations = metric_set.citations
rank_order = metric_set.rank_order
return Impact_Funcs.calculate_h2_index(citations, rank_order)
def write_h2_index_desc_data(metric_set: MetricSet) -> list:
metric = metric_set.metrics["h(2)-index"]
graph = metric.description_graphs[0]
output = list()
output.append(" var data_{} = google.visualization.arrayToDataTable([\n".format(graph.name))
output.append(" ['Rank', 'Citations', 'y=x^2', {'type': 'string', 'role': 'annotation'}],\n")
tmp_cites = [c for c in metric_set.citations]
tmp_cites.sort(reverse=True)
h = metric_set.metrics["h(2)-index"].value
maxx = metric_set.metrics["total pubs"].value
maxv = 50
for x in range(maxx + 1):
outstr = " [{}".format(x) # write rank
# write citation count for ranked publication x
if x == 0:
v = "null"
else:
v = tmp_cites[x - 1]
outstr += ", {}".format(v)
# write y for y=x^2
v = x**2
if v > maxv:
v = "null"
if x == h:
a = "\'h(2)\'"
else:
a = "null"
outstr += ", {}, {}".format(v, a)
outstr += "],\n"
output.append(outstr)
output.append(" ]);\n")
output.append("\n")
output.append(" var options_{} = {{\n".format(graph.name))
output.append(" legend: {position: 'top'},\n")
# output.append(" interpolateNulls: true,\n")
output.append(" hAxis: {slantedText: true,\n")
output.append(" title: \'Rank\',\n")
output.append(" gridlines: {color: \'transparent\'},\n")
output.append(" ticks: [0, 10, 20, 30, 40, 50],\n")
output.append(" viewWindow: {max:" + str(maxv) + "}},\n")
output.append(" vAxis: {viewWindow: {max:" + str(maxv) + "},\n")
output.append(" title: \'Citation Count\',\n")
output.append(" ticks: [0, 10, 20, 30, 40, 50],\n")
output.append(" gridlines: {color: \'transparent\'}},\n")
output.append(" series: { 0: {},\n")
output.append(" 1: {lineDashStyle: [4, 4],\n")
output.append(" annotations:{textStyle:{color: \'black\', italic: true, bold: true}}}}\n")
output.append(" };\n")
output.append("\n")
output.append(" var chart_{} = new google.visualization."
"LineChart(document.getElementById('chart_{}_div'));\n".format(graph.name, graph.name))
output.append(" chart_{}.draw(data_{}, options_{});\n".format(graph.name, graph.name, graph.name))
output.append("\n")
return output
def write_h2_index_example(metric_set: MetricSet) -> str:
outstr = "<p>Publications are ordered by number of citations, from highest to lowest.</p>"
outstr += "<table class=\"example_table\">"
citations = sorted(metric_set.citations, reverse=True)
row1 = "<tr class=\"top_row\"><th>Citations (<em>C<sub>i</sub></em>)</th>"
row2 = "<tr><th>Rank-squared (<em>i</em><sup>2</sup>)"
row3 = "<tr><th>Rank (<em>i</em>)</th>"
row4 = "<tr><th></th>"
h = metric_set.metrics["h(2)-index"].value
for i, c in enumerate(citations):
if i + 1 == h:
v = "<em>h</em>(2) = {}".format(h)
ec = " class=\"box\""
else:
v = ""
ec = ""
row1 += "<td" + ec + ">{}</td>".format(c)
row2 += "<td" + ec + ">{}</td>".format((i + 1)**2)
row3 += "<td>{}</td>".format(i + 1)
row4 += "<td>{}</td>".format(v)
row1 += "</tr>"
row2 += "</tr>"
row3 += "</tr>"
row4 += "</tr>"
outstr += row1 + row2 + row3 + row4 + "</table>"
outstr += "<p>The largest rank where <em>i</em><sup>2</sup> ≤ <em>C<sub>i</sub></em> is {}.</p>".format(h)
return outstr
def metric_h2_index() -> Metric:
m = Metric()
m.name = "h(2)-index"
m.full_name = "h(2)-index"
m.html_name = "<em>h</em>(2)-index"
m.symbol = "<em>h</em>(2)"
m.metric_type = INT
graph = DescriptionGraph()
m.description_graphs.append(graph)
m.example = write_h2_index_example
graph.name = "h2_index_desc"
graph.data = write_h2_index_desc_data
equation = r"$$h\left(2\right)=\underset{i}{\max}\left(i^2 \leq C_i\right).$$"
m.description = "<p>The <em>h</em>(2)-index (Kosmulski 2006) is similar to the __h-index__, but rather than " \
"define the core based on <em>h</em> publications having <em>h</em> citations, this index " \
"requires the <em>h</em> publications to each have <em>h</em><sup>2</sup> citations:</p>" + \
equation + "<div id=\"chart_" + graph.name + "_div\" class=\"proportional_chart\"></div>" \
"<p>This leads to <em>h</em>(2) having a stricter definition of the core publications than " \
"many other metrics.</p>"
m.references = ["Kosmulski, M. (2006) A new Hirsch-type index saves time and works equally well as the original "
"<em>h-</em>index. <em>ISSI Newsletter</em> 2(3):4–6."]
m.graph_type = LINE_CHART
m.calculate = calculate_h2_index
m.properties["Core Metric"] = True
m.properties["Core Citations"] = True
m.properties["Core Publications"] = True
return m
# mu-index (Glanzel and Schubert 2010)
def calculate_mu_index(metric_set: MetricSet) -> int:
citations = metric_set.citations
return Impact_Funcs.calculate_mu_index(citations)
def write_mu_index_example(metric_set: MetricSet) -> str:
outstr = "<p>Publications are ordered by number of citations, from highest to lowest.</p>"
outstr += "<table class=\"example_table\">"
citations = sorted(metric_set.citations, reverse=True)
row1 = "<tr><th>Citations (<em>C<sub>i</sub></em>)</th>"
row2 = "<tr class=\"top_row\"><th>Rank (<em>i</em>)</th>"