-
Notifications
You must be signed in to change notification settings - Fork 1
/
library.bib
2150 lines (2148 loc) · 216 KB
/
library.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
Automatically generated by Mendeley Desktop 1.16.1
Any changes to this file will be lost if it is regenerated by Mendeley.
BibTeX export options can be customized via Options -> BibTeX in Mendeley Desktop
@article{Gupta2013,
abstract = {In the statistics community, outlier detection for time series data has been studied for decades. Recently, with advances in hardware and software technology, there has been a large body of work on temporal outlier detection from a computational perspective within the computer science community. In particular, advances in hardware technology have enabled the availability of various forms of temporal data collection mechanisms, and advances in software technology have enabled a variety of data management mechanisms. This has fueled the growth of different kinds of data sets such as data streams, spatio- temporal data, distributed streams, temporal networks, and time series data, generated by a multitude of applications. There arises a need for an organized and detailed study of the work done in the area of outlier detection with respect to such temporal datasets. In this survey, we provide a comprehensive and structured overview of a large set of interesting outlier definitions for various forms of temporal data, novel techniques, and application scenarios in which specific definitions and techniques have been widely used.},
author = {Gupta, Manish and Gao, Jing and Aggarwal, Charu C. and Han, Jiawei},
doi = {10.1109/TKDE.2013.184},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/Outlier Detection for Temporal Data A Survey.pdf:pdf},
isbn = {9781627053754},
issn = {1041-4347},
journal = {Ieee Transactions on Knowledge and Data Engineering},
keywords = {Computational modeling,Data mining,Distributed databases,Hidden Markov models,Mining methods and algorithms,Pattern matching,Predictive models,Temporal outlier detection,Time series analysis,applications of temporal outlier detection,computer science community,data handling,data management mechanisms,data streams,distributed data streams,distributed streams,hardware technology,network outliers,software technology,spatio-temporal data,spatio-temporal outliers,statistics community,temporal data collection mechanisms,temporal datasets,temporal networks,temporal outlier detection,time series,time series data},
number = {1},
pages = {1--20},
title = {{Outlier Detection for Temporal Data : A Survey}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=6684530$\backslash$nhttp://www.morganclaypool.com/doi/abs/10.2200/S00573ED1V01Y201403DMK008 http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=6684530},
volume = {25},
year = {2013}
}
@article{Aggarwal2013,
author = {Aggarwal, CC},
doi = {10.1007/978-},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/High-dimensional Outlier Detection Survey.pdf:pdf},
journal = {Outlier Analysis},
title = {{High-Dimensional Outlier Detection: The Subspace Method}},
url = {http://link.springer.com/chapter/10.1007/978-1-4614-6396-2{\_}5},
year = {2013}
}
@article{Bengio2012,
abstract = {After a more than decade-long period of relatively little research activity in the area of recurrent neural networks, several new developments will be reviewed here that have allowed substantial progress both in understanding and in technical solutions towards more efficient training of recurrent networks. These advances have been motivated by and related to the optimization issues surrounding deep learning. Although recurrent networks are extremely powerful in what they can in principle represent in terms of modelling sequences,their training is plagued by two aspects of the same issue regarding the learning of long-term dependencies. Experiments reported here evaluate the use of clipping gradients, spanning longer time ranges with leaky integration, advanced momentum techniques, using more powerful output probability models, and encouraging sparser gradients to help symmetry breaking and credit assignment. The experiments are performed on text and music data and show off the combined effects of these techniques in generally improving both training and test error.},
archivePrefix = {arXiv},
arxivId = {1212.0901},
author = {Bengio, Yoshua and Boulanger-Lewandowski, Nicolas and Pascanu, Razvan},
eprint = {1212.0901},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/ADVANCES IN OPTIMIZING RECURRENT NETWORKS.pdf:pdf},
title = {{Advances in Optimizing Recurrent Networks}},
url = {http://arxiv.org/abs/1212.0901},
year = {2012}
}
@article{mahoney2005trajectory,
author = {Mahoney, Matthew V and Chan, Philip K},
title = {{Trajectory boundary modeling of time series for anomaly detection}},
year = {2005}
}
@article{Ngkvist2014a,
abstract = {PATTERN RECOGNITION LETTERS, 42 (2014) 11-24. doi:10.1016/j.patrec.2014.01.008},
author = {Ngkvist, Martin L {\~{A}} and Karlsson, Lars and Loutfi, Amy},
doi = {10.1016/j.patrec.2014.01.008},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/A review of unsupervised feature learning and deep learning for time-series modeling.pdf:pdf},
issn = {01678655},
journal = {Pattern Recognition Letters},
number = {C},
pages = {11--24},
title = {{A review of unsupervised feature learning and deep learning for time-series modeling}},
url = {http://dx.doi.org/10.1016/j.patrec.2014.01.008$\backslash$npapers3://publication/doi/10.1016/j.patrec.2014.01.008},
volume = {42},
year = {2014}
}
@article{Otey2006,
abstract = {Efficiently detecting outliers or anomalies is an important problem in many areas of science, medicine and information technology. Applications range from data cleaning to clinical diagnosis, from detecting anomalous defects in materials to fraud and intrusion detection. Over the past decade, researchers in data mining and statistics have addressed the problem of outlier detection using both parametric and non-parametric approaches in a centralized setting. However, there are still several challenges that must be addressed. First, most approaches to date have focused on detecting outliers in a continuous attribute space. However, almost all real-world data sets contain a mixture of categorical and continuous attributes. Categorical attributes are typically ignored or incorrectly modeled by existing approaches, resulting in a significant loss of information. Second, there have not been any general-purpose distributed outlier detection algorithms. Most distributed detection algorithms are designed with a specific domain (e.g. sensor networks) in mind. Third, the data sets being analyzed may be streaming or otherwise dynamic in nature. Such data sets are prone to concept drift, and models of the data must be dynamic as well. To address these challenges, we present a tunable algorithm for distributed outlier detection in dynamic mixed-attribute data sets.},
author = {Otey, Matthew Eric and Ghoting, Amol and Parthasarathy, Srinivasan},
doi = {10.1007/s10618-005-0014-6},
issn = {1384-5810},
journal = {Data Mining and Knowledge Discovery},
number = {2-3},
pages = {203--228},
title = {{Fast Distributed Outlier Detection in Mixed-Attribute Data Sets}},
url = {http://www.springerlink.com/index/10.1007/s10618-005-0014-6},
volume = {12},
year = {2006}
}
@article{Chan2005,
abstract = {Our goal is to generate comprehensible and accurate models from multiple time series for anomaly detection. The models need to produce anomaly scores in an online man- ner for real-life monitoring tasks. We introduce three algo- rithms that work in a constructed feature space and evaluate them with a real data set from the NASA shuttle program. Our offline and online evaluations indicate that our algo- rithms can be more accurate than two existing algorithms. 1.},
author = {Chan, Philip K. and Mahoney, Matthew V.},
doi = {10.1109/ICDM.2005.101},
isbn = {0-7695-2278-5},
issn = {15504786},
journal = {IEEE International Conference on Data Mining},
pages = {90--97},
title = {{Modeling multiple time series for anomaly detection}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=1565666$\backslash$nhttp://ieeexplore.ieee.org/xpls/abs{\_}all.jsp?arnumber=1565666},
year = {2005}
}
@article{Ye2000,
abstract = {This paper presents an anomaly detection technique to detect intrusions$\backslash$ninto computer and network systems. In this technique, a Markov chain$\backslash$nmodel is used to represent a temporal profile of normal behavior$\backslash$nin a computer and network system. The Markov chain model of the norm$\backslash$nprofile is learned from historic data of the system's normal behavior.$\backslash$nThe observed behavior of the system is analyzed to infer the probability$\backslash$nthat the Markov chain model of the norm profile supports the observed$\backslash$nbehavior. A low probability of support indicates an anomalous behavior$\backslash$nthat may result from intrusive activities. The technique was implemented$\backslash$nand tested on the audit data of a Sun Solaris system. The testing$\backslash$nresults showed that the technique clearly distinguished intrusive$\backslash$nactivities from normal activities in the testing data.},
author = {Ye, N},
journal = {Proceedings of the 2000 IEEE Systems, Man, and Cybernetics Information Assurance and Security Workshop},
keywords = {anomaly,anomaly detection,anomaly{\_}detection,detection,intrusion detection,iros,markov chain,temporal behaviour},
number = {4},
pages = {171--174},
title = {{A markov chain model of temporal behavior for anomaly detection}},
year = {2000}
}
@article{Vinyals2015,
abstract = {Deep recurrent architecture에 기반하여 image의 natural sentences 를 describing 하는 방법 소개},
archivePrefix = {arXiv},
arxivId = {arXiv:1411.4555v2},
author = {Vinyals, Oriol and Toshev, Alexander},
eprint = {arXiv:1411.4555v2},
isbn = {9781467369640},
journal = {Cvpr},
title = {{Show and Tell : A Neural Image Caption Generator}},
year = {2015}
}
@article{Sutskever2013a,
abstract = {Deep and recurrent neural networks (DNNs and RNNs respectively) are powerful models that were considered to be almost impossible to train using stochastic gradient descent with momentum. In this paper, we show that when stochastic gradient descent with momentum uses a well-designed random initialization and a particular type of slowly increasing schedule for the momentum parameter, it can train both DNNs and RNNs (on datasets with long-term dependencies) to levels of performance that were previously achievable only with Hessian-Free optimization. We find that both the initialization and the momentum are crucial since poorly initialized networks cannot be trained with momentum and well-initialized networks perform markedly worse when the momentum is absent or poorly tuned. Our success training these models suggests that previous attempts to train deep and recurrent neural networks from random initializations have likely failed due to poor initialization schemes. Furthermore, carefully tuned momentum methods suffice for dealing with the curvature issues in deep and recurrent network training objectives without the need for sophisticated second-order methods.},
author = {Sutskever, Ilya and Martens, James and Dahl, George and Hinton, Geoffrey},
doi = {10.1109/ICASSP.2013.6639346},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/On the importance of initialization and momentum in deep learning.pdf:pdf;:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/On the importance of initialization and momentum in deep learning.pdf:pdf},
isbn = {978-1-4799-0356-6},
issn = {15206149},
journal = {Jmlr W{\&}Cp},
number = {2010},
pages = {1139--1147},
title = {{On the importance of initialization and momentum in deep learning}},
volume = {28},
year = {2013}
}
@article{Wei2006,
abstract = {Over the past three decades, there has been a great deal of research on shape analysis, focusing mostly on shape indexing, clustering, and classification. In this work, we introduce the new problem of finding shape discords, the most unusual shapes in a collection. We motivate the problem by considering the utility of shape discords in diverse domains including zoology, anthropology, and medicine. While the brute force search algorithm has quadratic time complexity, we avoid this by using locality-sensitive hashing to estimate similarity between shapes which enables us to reorder the search more efficiently. An extensive experimental evaluation demonstrates that our approach can speed up computation by three to four orders of magnitude.},
author = {Wei, Li and Keogh, Eamonn and Xi, Aopeng},
doi = {10.1109/ICDM.2006.138},
isbn = {0769527019},
issn = {15504786},
journal = {Proceedings - IEEE International Conference on Data Mining, ICDM},
keywords = {anomaly detection,shape},
pages = {711--720},
title = {{SAXually explicit images: Finding unusual shapes}},
year = {2006}
}
@article{Erfani2014,
author = {Erfani, Sarah M. and Law, Yee Wei and Karunasekera, Shanika and Leckie, Christopher a. and Palaniswami, Marimuthu},
doi = {10.1007/978-3-319-06608-0_48},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/Privacy-Preserving Collaborative Anomaly Detection for Participatory Sensing.pdf:pdf;:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/Privacy-Preserving Collaborative Anomaly Detection for Participatory Sensing.pdf:pdf},
issn = {16113349},
journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
keywords = {Anomaly detection,Collaborative learning,Horizontally partitioned data,Participatory sensing,Privacy-preserving data mining},
number = {PART 1},
pages = {581--593},
title = {{Privacy-preserving collaborative anomaly detection for participatory sensing}},
volume = {8443 LNAI},
year = {2014}
}
@article{Laptev2015,
author = {Laptev, Nikolay and Flint, Ian},
doi = {10.1145/2783258.2788611},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/Generic and Scalable Framework for Automated Time-Series Anomaly Detection.pdf:pdf},
isbn = {9781450336642},
journal = {Kdd},
title = {{Generic and Scalable Framework for Automated Time-series Anomaly Detection}},
year = {2015}
}
@article{Bengio2012a,
abstract = {After a more than decade-long period of relatively little research activity in the area of recurrent neural networks, several new developments will be reviewed here that have allowed substantial progress both in understanding and in technical solutions towards more efficient training of recurrent networks. These advances have been motivated by and related to the optimization issues surrounding deep learning. Although recurrent networks are extremely powerful in what they can in principle represent in terms of modelling sequences,their training is plagued by two aspects of the same issue regarding the learning of long-term dependencies. Experiments reported here evaluate the use of clipping gradients, spanning longer time ranges with leaky integration, advanced momentum techniques, using more powerful output probability models, and encouraging sparser gradients to help symmetry breaking and credit assignment. The experiments are performed on text and music data and show off the combined effects of these techniques in generally improving both training and test error.},
archivePrefix = {arXiv},
arxivId = {1212.0901},
author = {Bengio, Yoshua and Boulanger-Lewandowski, Nicolas and Pascanu, Razvan},
eprint = {1212.0901},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/ADVANCES IN OPTIMIZING RECURRENT NETWORKS.pdf:pdf},
title = {{Advances in Optimizing Recurrent Networks}},
url = {http://arxiv.org/abs/1212.0901},
year = {2012}
}
@inproceedings{Ge2010,
abstract = {The increasing availability of large-scale location traces creates unprecedent opportunities to change the paradigm for identifying abnormal moving activities. Indeed, various aspects of abnormality of moving patterns have recently been exploited, such as wrong direction and wandering. However, there is no recognized way of combining different aspects into an unified evolving abnormality score which has the ability to capture the evolving nature of abnormal moving trajectories. To that end, in this paper, we provide an evolving trajectory outlier detection method, named TOP-EYE, which continuously computes the outlying score for each trajectory in an accumulating way. Specifically, in TOP-EYE, we introduce a decay function to mitigate the influence of the past trajectories on the evolving outlying score, which is defined based on the evolving moving direction and density of trajectories. This decay function enables the evolving computation of accumulated outlying scores along the trajectories. An advantage of TOP-EYE is to identify evolving outliers at very early stage with relatively low false alarm rate. Finally, experimental results on real-world location traces show that TOP-EYE can effectively capture evolving abnormal trajectories.},
author = {Ge, Yong and Xiong, Hui and Zhou, Zhi-hua and Ozdemir, Hasan and Yu, Jannite and Lee, K C},
booktitle = {Proceedings of the 19th ACM international conference on Information and knowledge management},
doi = {10.1145/1871437.1871716},
isbn = {9781450300995},
keywords = {outlier},
pages = {1733--1736},
title = {{TOP-EYE : Top- k Evolving Trajectory Outlier Detection}},
year = {2010}
}
@article{Ester1996,
abstract = {Data clustering has become an important task for discovering significant patterns and characteristics in large spatial databases. The Multi-Centroid, Multi-Run Sampling Scheme (MCMRS) has been shown to be effective in improving the k-medoids-based clustering algorithms in our previous work. In this paper, a more advanced sampling scheme termed the Incremental (IMCMRS) is proposed for k-medoids-based clustering algorithms. Experimental results demonstrate the proposed scheme can not only reduce...},
archivePrefix = {arXiv},
arxivId = {10.1.1.71.1980},
author = {Ester, Martin and Kriegel, Hans P and Sander, Jorg and Xu, Xiaowei},
doi = {10.1.1.71.1980},
eprint = {10.1.1.71.1980},
isbn = {1577350049},
issn = {09758887},
journal = {Second International Conference on Knowledge Discovery and Data Mining},
keywords = {arbitrary shape of clus-,clustering algorithms,databases,efficiency on large spatial,handling noise,ters},
pages = {226--231},
title = {{A Density-Based Algorithm for Discovering Clusters in Large Spatial Databases with Noise}},
url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.20.2930},
year = {1996}
}
@article{Bayer2014a,
abstract = {Leveraging advances in variational inference, we propose to enhance recurrent neural networks with latent variables, resulting in Stochastic Recurrent Networks (STORNs). The model i) can be trained with stochastic gradient methods, ii) allows structured and multi-modal conditionals at each time step, iii) features a reliable estimator of the marginal likelihood and iv) is a generalisation of deterministic recurrent neural networks. We evaluate the method on four polyphonic musical data sets and motion capture data.},
archivePrefix = {arXiv},
arxivId = {1411.7610},
author = {Bayer, Justin and Osendorfer, Christian},
eprint = {1411.7610},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/LEARNING STOCHASTIC RECURRENT NETWORKS.pdf:pdf},
pages = {1--9},
title = {{Learning Stochastic Recurrent Networks}},
url = {http://arxiv.org/abs/1411.7610},
year = {2014}
}
@article{Venugopalan2014,
abstract = {Solving the visual symbol grounding problem has long been a goal of artificial intelligence. The field appears to be advancing closer to this goal with recent breakthroughs in deep learning for natural language grounding in static images. In this paper, we propose to translate videos directly to sentences using a unified deep neural network with both convolutional and recurrent structure. Described video datasets are scarce, and most existing methods have been applied to toy domains with a small vocabulary of possible words. By transferring knowledge from 1.2M+ images with category labels and 100,000+ images with captions, our method is able to create sentence descriptions of open-domain videos with large vocabularies. We compare our approach with recent work using language generation metrics, subject, verb, and object prediction accuracy, and a human evaluation.},
archivePrefix = {arXiv},
arxivId = {1412.4729},
author = {Venugopalan, Subhashini and Xu, Huijuan and Donahue, Jeff and Rohrbach, Marcus and Mooney, Raymond and Saenko, Kate},
eprint = {1412.4729},
journal = {arXiv2014},
title = {{Translating Videos to Natural Language Using Deep Recurrent Neural Networks}},
url = {http://arxiv.org/abs/1412.4729},
year = {2014}
}
@inproceedings{Zhang2003,
abstract = {The state transition, which is hidden in the hidden Markov model (HMM), can be used to characterize the intrinsic difference between normal action and intrusion behavior. So HMM is an efficient way to detect anomalies. A new anomaly detection method based on a hierarchical HMM is proposed based on the concept of normal database and abnormal database. It is shown by analysis and simulation results that the proposed method is effective to increase the accuracy of anomaly detection.},
author = {Zhang, Xiaoqiang and Fan, Pingzhi and Zhu, Zhongliang},
booktitle = {Proceedings of the 8th International Scientific and Practical Conference of Students, Post-graduates and Young Scientists. Modern Technique and Technologies. MTT'2002 (Cat. No.02EX550)},
doi = {10.1109/PDCAT.2003.1236299},
isbn = {0-7803-7840-7},
keywords = {Analytical models,Data mining,Databases,Hidden Markov models,IDS,Intrusion detection,Neural networks,Pattern recognition,Power system modeling,Support vector machines,Viterbi algorithm,abnormal database,alarm systems,anomaly detection method,authorisation,database management systems,hidden Markov model,hidden Markov models,hierarchical HMM,intrusion behavior,intrusion detection system,normal database,safety systems,state transition},
pages = {249--252},
title = {{A new anomaly detection method based on hierarchical HMM}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=1236299},
year = {2003}
}
@inproceedings{Jozefowicz2015,
abstract = {The Recurrent Neural Network (RNN) is an extremely powerful sequence model that is often difficult to train. The Long Short-Term Memory (LSTM) is a specific RNN architecture whose design makes it much easier to train. While wildly successful in practice, the LSTM's architecture appears to be ad-hoc so it is not clear if it is optimal, and the significance of its individual components is unclear. In this work, we aim to determine whether the LSTM architecture is optimal or whether much better architectures exist. We conducted a thorough architecture search where we evaluated over ten thousand different RNN architectures, and identified an architecture that outperforms both the LSTM and the recently-introduced Gated Recurrent Unit (GRU) on some but not all tasks. We found that adding a bias of 1 to the LSTM's forget gate closes the gap between the LSTM and the GRU.},
author = {Jozefowicz, Rafal and Zaremba, Wojciech and Sutskever, Ilya},
booktitle = {Proceedings of The 32nd International Conference on Machine Learning},
pages = {2342--2350},
title = {{An Empirical Exploration of Recurrent Network Architectures}},
url = {http://jmlr.org/proceedings/papers/v37/jozefowicz15.html},
year = {2015}
}
@article{Schubert2014a,
abstract = {Outlier detection research has been seeing many new algorithms every year that often appear to be only slightly different from existing methods along with some experiments that show them to "clearly outperform" the others. However, few approaches come along with a clear analysis of existing methods and a solid theoretical differentiation. Here, we provide a formalized method of analysis to allow for a theoretical comparison and generalization of many existing methods. Our unified view improves understanding of the shared properties and of the differences of outlier detection models. By abstracting the notion of locality from the classic distance-based notion, our framework facilitates the construction of abstract methods for many special data types that are usually handled with specialized algorithms. In particular, spatial neighborhood can be seen as a special case of locality. Here we therefore compare and generalize approaches to spatial outlier detection in a detailed manner. We also discuss temporal data like video streams, or graph data such as community networks. Since we reproduce results of specialized approaches with our general framework, and even improve upon them, our framework provides reasonable baselines to evaluate the true merits of specialized approaches. At the same time, seeing spatial outlier detection as a special case of local outlier detection, opens up new potentials for analysis and advancement of methods. {\textcopyright} 2012 The Author(s).},
author = {Schubert, Erich and Zimek, Arthur and Kriegel, Hans Peter},
doi = {10.1007/s10618-012-0300-z},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/Local outlier detection reconsidered a generalized view on locality with applications to spatial, video, and network outlier detection.pdf:pdf},
isbn = {1932-1872},
issn = {13845810},
journal = {Data Mining and Knowledge Discovery},
keywords = {Local outlier,Network outlier,Spatial outlier,Video outlier},
number = {1},
pages = {190--237},
title = {{Local outlier detection reconsidered: A generalized view on locality with applications to spatial, video, and network outlier detection}},
volume = {28},
year = {2014}
}
@article{Hochreiter1997,
abstract = {Learning to store information over extended time intervals by recurrent backpropagation takes a very long time, mostly because of insufficient, decaying error backflow. We briefly review Hochreiter's (1991) analysis of this problem, then address it by introducing a novel, efficient, gradient based method called long short-term memory (LSTM). Truncating the gradient where this does not do harm, LSTM can learn to bridge minimal time lags in excess of 1000 discrete-time steps by enforcing constant error flow through constant error carousels within special units. Multiplicative gate units learn to open and close access to the constant error flow. LSTM is local in space and time; its computational complexity per time step and weight is O. 1. Our experiments with artificial data involve local, distributed, real-valued, and noisy pattern representations. In comparisons with real-time recurrent learning, back propagation through time, recurrent cascade correlation, Elman nets, and neural sequence chunking, LSTM leads to many more successful runs, and learns much faster. LSTM also solves complex, artificial long-time-lag tasks that have never been solved by previous recurrent network algorithms.},
author = {Hochreiter, Sepp and Schmidhuber, J{\"{u}}rgen},
doi = {10.1162/neco.1997.9.8.1735},
isbn = {08997667 (ISSN)},
issn = {0899-7667},
journal = {Neural Computation},
number = {8},
pages = {1735--1780},
pmid = {9377276},
title = {{Long Short-Term Memory}},
volume = {9},
year = {1997}
}
@inproceedings{Malhotra2015,
abstract = {Long Short Term Memory (LSTM) networks have been demonstrated to be particularly useful for learning sequences containing longer term patterns of unknown length, due to their ability to maintain long term memory. Stacking recurrent hidden layers in such networks also enables the learning of higher level temporal features, for faster learning with sparser representations. In this paper, we use stacked LSTM net- works for anomaly/fault detection in time series. A network is trained on non-anomalous data and used as a predictor over a number of time steps. The resulting prediction errors are modeled as a multivariate Gaussian distribution, which is used to assess the likelihood of anomalous behav- ior. The efficacy of this approach is demonstrated on four datasets: ECG, space shuttle, power demand, and multi-sensor engine dataset.},
author = {Malhotra, Pankaj and Vig, Lovekesh and Shroff, Gautam and Agarwal, Puneet},
booktitle = {European Symposium on Artificial Neural Networks},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/Long Short Term Memory Networks for Anomaly Detection in Time Series.pdf:pdf},
isbn = {9782875870148},
number = {April},
pages = {22--24},
title = {{Long Short Term Memory Networks for Anomaly Detection in Time Series}},
year = {2015}
}
@article{Chandola2009,
author = {Chandola, Varun and Banerjee, Arindam and Kumar, Vipin},
doi = {10.1145/1541880.1541882},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/Anomaly Detection A Survey acm.pdf:pdf},
issn = {03600300},
journal = {ACM Computing Surveys},
keywords = {Anomaly detection,outlier detection},
month = {jul},
number = {3},
pages = {1--58},
title = {{Anomaly detection: A Survey}},
url = {http://portal.acm.org/citation.cfm?doid=1541880.1541882},
volume = {41},
year = {2009}
}
@article{Jones2014,
abstract = {We present a new algorithm for detecting anomalies in real- valued multidimensional time series. Our algorithm uses an exemplar-based model that is used to detect anomalies in single dimensions of the time series and a function that pre- dicts one dimension from a related one to detect anomalies in multiple dimensions. The algorithm is shown to work on a variety of different types of time series as well as to de- tect a variety of different types of anomalies. We compare our algorithm to other algorithms for both one-dimensional and multidimensional time series and demonstrate that it improves over the state-of-the-art.},
author = {Jones, Michael and Nikovski, Daniel and Imamura, Makoto and Hirata, Takahisa},
isbn = {9781625610003},
journal = {ASEBSC},
pages = {1--9},
title = {{Anomaly Detection in Real-Valued Multidimensional Time Series}},
url = {http://ase360.org/handle/123456789/56},
year = {2014}
}
@inproceedings{Bergstra2010,
abstract = {Theano is a compiler for mathematical expressions in Python that combines the convenience of NumPy's syntax with the speed of optimized native machine language. The user composes mathematical expressions in a high-level description that mimics NumPy's syntax and semantics, while being statically typed and functional (as opposed to imperative). These expressions allow Theano to provide symbolic differentiation. Before performing computation, Theano optimizes the choice of expressions, translates them into C++ (or CUDA for GPU), compiles them into dynamically loaded Python modules, all automatically. Common machine learning algorithms implemented with Theano are from 1:6 to 7:5 faster than competitive alternatives (including those implemented with C/C++, NumPy/SciPy and MATLAB) when compiled for the CPU and between 6:5 and 44 faster when compiled for the GPU. This paper illustrates how to use Theano, outlines the scope of the compiler, provides benchmarks on both CPU and GPU processors, and explains its overall design},
author = {Bergstra, James and Breuleux, Olivier and Bastien, Frederic and Lamblin, Pascal and Pascanu, Razvan and Desjardins, Guillaume and Turian, Joseph and Warde-Farley, David and Bengio, Yoshua},
booktitle = {9th Python in Science Conference},
file = {:C$\backslash$:/Users/Majid/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Bergstra et al. - 2010 - Theano a CPU and GPU math compiler in Python.pdf:pdf},
number = {Scipy},
pages = {1--7},
title = {{Theano: a CPU and GPU math compiler in Python}},
url = {http://www-etud.iro.umontreal.ca/{~}wardefar/publications/theano{\_}scipy2010.pdf},
year = {2010}
}
@article{Ng2006,
author = {Ng, B},
doi = {10.2172/900157},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/Survey of Anomaly Detection Methods.pdf:pdf},
keywords = {and information science,computing,detection,distribution,general and miscellaneous//mathematics,lawrence livermore national laboratory},
title = {{Survey of Anomaly Detection Methods}},
url = {http://www.osti.gov/scitech/biblio/900157-VDshbd/},
year = {2006}
}
@article{Bengio1994,
abstract = {Recurrent neural networks can be used to map input sequences to output sequences, such as for recognition, production or prediction problems. However, practical difficulties have been reported in training recurrent neural networks to perform tasks in which the temporal contingencies present in the input/output sequences span long intervals. We show why gradient based learning algorithms face an increasingly difficult problem as the duration of the dependencies to be captured increases. These results expose a trade-off between efficient learning by gradient descent and latching on information for long periods. Based on an understanding of this problem, alternatives to standard gradient descent are considered.},
author = {Bengio, Yoshua and Simard, Patrice and Frasconi, Paolo},
doi = {10.1109/72.279181},
isbn = {1045-9227 VO - 5},
issn = {10459227},
journal = {IEEE Transactions on Neural Networks},
number = {2},
pages = {157--166},
pmid = {18267787},
title = {{Learning long-term dependencies with gradient descent is difficult}},
volume = {5},
year = {1994}
}
@article{Szymanski2004,
abstract = { In this paper, a novel recursive data mining method based on the simple but powerful model of cognition called a conceptor is introduced and applied to computer security. The method recursively mines a string of symbols by finding frequent patterns, encoding them with unique symbols and rewriting the string using this new coding. We apply this technique to two related but important problems in computer security: (i) masquerade detection to prevent a security attack in which an intruder impersonates a legitimate user to gain access to the resources, and (ii) author identification, in which anonymous or disputed computer session needs to be attributed to one of a set of potential authors. Many methods based on automata theory, hidden Markov models, Bayesian models or even matching algorithms from bioinformatics have been proposed to solve the masquerading detection problem but less work has been done on the author identification. We used recursive data mining to characterize the structure and high-level symbols in user signatures and the monitored sessions. We used one-class SVM to measure the similarity of these two characterizations. We applied weighting prediction scheme to author identification. On the SEA dataset that we used in our experiments, the results were very promising.},
author = {Szymanski, B.K. and Zhang, Y.},
doi = {10.1109/IAW.2004.1437848},
isbn = {0-7803-8572-1},
journal = {Proceedings from the Fifth Annual IEEE SMC Information Assurance Workshop, 2004.},
keywords = {- masquerade detection,author identification,intrusion detection,one-class svm,recursive data mining},
number = {i},
pages = {424--431},
title = {{Recursive data mining for masquerade detection and author identification}},
year = {2004}
}
@inproceedings{Marchi2015,
author = {Marchi, Erik and Vesperini, Fabio and Eyben, Florian and Squartini, Stefano and Schuller, B},
booktitle = {Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on},
doi = {10.1109/ICASSP.2015.7178320},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/A NOVEL APPROACH FOR AUTOMATIC ACOUSTIC NOVELTY DETECTION USING A denoising autoencoder with bidirectional neural networks.pdf:pdf},
isbn = {9781467369978},
keywords = {Acoustic Novelty Detection,Bidirectional LSTM,Denoising Autoencorder,Feature extraction,Hidden Markov models,Noise reduction,Recurrent Neural Networks,Recurrent neural networks,Training,abnormal-novel acoustic signals,acoustic signal processing,auditory spectral features,automatic acoustic novelty detection,bidirectional LSTM neural networks,denoising autoencoder,long short term memory recurrent neural networks,novel unsupervised approach,recurrent neural nets,reference-normal data},
month = {apr},
number = {289021},
pages = {1996--2000},
title = {{A novel approach for automatic acoustic novelty detection using a denoising autoencoder with bidirectional LSTM neural networks}},
volume = {289021},
year = {2015}
}
@article{Sequeira2002,
abstract = {Security of computer systems is essential to their acceptance and utility. Computer security analysts use intrusion detection systems to assist them in maintaining computer system security. This paper deals with the problem of differentiating between masqueraders and the true user of a computer terminal. Prior efficient solutions are less suited to real time application, often requiring all training data to be labeled, and do not inherently provide an intuitive idea of what the data model means. Our system, called ADMIT, relaxes these constraints, by creating user profiles using semi-incremental techniques. It is a real-time intrusion detection system with host-based data collection and processing. Our method also suggests ideas for dealing with concept drift and affords a detection rate as high as 80.3{\%} and a false positive rate as low as 15.3{\%}.},
author = {Sequeira, Karlton and Zaki, Mohammed},
journal = {ACM SIGKDD international conference on Knowledge discovery and data mining},
pages = {386--395},
title = {{ADMIT: anomaly-based data mining for intrusions}},
url = {http://dl.acm.org/citation.cfm?id=775103},
year = {2002}
}
@article{Keogh2004a,
abstract = {Given the recent explosion of interest in streaming data and online algorithms, clustering of time seriessubsequences, extracted via a sliding window, has received much attention. In this work we make asurprising claim. Clustering of time series subsequences is meaningless. More concretely, clusters extractedfrom these time series are forced to obey a certain constraint that is pathologically unlikely to be satisfied byany dataset, and because of this, the clusters extracted by any clustering algorithm are essentially random.},
author = {Keogh, Eamonn and Keogh, Eamonn and Lin, Jessica and Lin, Jessica},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/Clustering of Time Series Subsequences is Meaningless.pdf:pdf},
pmid = {670978},
title = {{Clustering of Time Series Subsequences is Meaningless:}},
url = {http://citeseer.ist.psu.edu/670978},
year = {2004}
}
@article{Ferdousi2006,
author = {Ferdousi, Z. and Maeda, a.},
doi = {10.1109/ICDEW.2006.157},
file = {:C$\backslash$:/Users/Majid/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Ferdousi, Maeda - 2006 - Unsupervised Outlier Detection in Time Series Data.pdf:pdf},
isbn = {0-7695-2571-7},
journal = {22nd International Conference on Data Engineering Workshops (ICDEW'06)},
keywords = {data mining,fraud detection,outlier detection,peer group analysis,series data,time},
pages = {x121--x121},
publisher = {Ieee},
title = {{Unsupervised Outlier Detection in Time Series Data}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=1623916},
year = {2006}
}
@inproceedings{Zhen2006,
abstract = {Due to their growing complexity, it becomes extremely difficult to detect and isolate faults in complex systems. While large amount of monitoring data can be collected from such systems for fault analysis, one challenge is how to correlate the data effectively across distributed systems and observation time. Much of the internal monitoring data reacts to the volume of user requests accordingly when user requests flow through distributed systems. In this paper, we use Gaussian mixture models to characterize probabilistic correlation between flow-intensities measured at multiple points. A novel algorithm derived from expectation-maximization (EM) algorithm is proposed to learn the "likely" boundary of normal data relationship, which is further used as an oracle in anomaly detection. Our recursive algorithm can adaptively estimate the boundary of dynamic data relationship and detect faults in real time. Our approach is tested in a real system with injected faults and the results demonstrate its feasibility},
author = {Zhen, Guo and Jiang, Guofei and Chen, Haifeng and Yoshihira, Kenji},
booktitle = {International Conference on Dependable Systems and Networks},
doi = {10.1109/DSN.2006.70},
isbn = {0-7695-2607-1},
keywords = {Gaussian distribution,Gaussian mixture model,anomaly detection,complex system,distributed system,expectation-maximisation algorithm,expectation-maximization algorithm,fault analysis,fault detection,fault diagnosis,fault tolerant computing,probabilistic correlation tracking,recursive algorithm,system monitoring},
pages = {259--268},
publisher = {IEEE},
title = {{Tracking probabilistic correlation of monitoring data for fault detection in complex systems}},
year = {2006}
}
@article{Cho2014,
abstract = {In this paper, we propose a novel neural network model called RNN Encoder--Decoder that consists of two recurrent neural networks (RNN). One RNN encodes a sequence of symbols into a fixed-length vector representation, and the other decodes the representation into another sequence of symbols. The encoder and decoder of the proposed model are jointly trained to maximize the conditional probability of a target sequence given a source sequence. The performance of a statistical machine translation system is empirically found to improve by using the conditional probabilities of phrase pairs computed by the RNN Encoder--Decoder as an additional feature in the existing linear model. Qualitatively, we show that the proposed model learns a semantically and syntactically meaningful representation of linguistic phrases.},
archivePrefix = {arXiv},
arxivId = {1406.1078},
author = {Cho, Kyunghyun and van Merrienboer, Bart and Gulcehre, Caglar and Bougares, Fethi and Schwenk, Holger and Bengio, Yoshua},
doi = {10.3115/v1/D14-1179},
eprint = {1406.1078},
journal = {arXiv},
keywords = {decoder,for statistical machine translation,rning phrase representations using,rnn encoder},
pages = {1724--1734},
title = {{Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation}},
url = {http://arxiv.org/abs/1406.1078},
year = {2014}
}
@article{Thottan2010,
address = {London},
author = {Thottan, Marina and Liu, Guanglei and Ji, Chuanyi},
doi = {10.1007/978-1-84882-765-3},
editor = {Cormode, Graham and Thottan, Marina},
file = {:C$\backslash$:/Users/Majid/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Thottan, Liu, Ji - 2010 - Algorithms for Next Generation Networks(2).pdf:pdf},
isbn = {978-1-84882-764-6},
pages = {239--261},
publisher = {Springer London},
series = {Computer Communications and Networks},
title = {{Algorithms for Next Generation Networks}},
url = {http://link.springer.com/10.1007/978-1-84882-765-3},
year = {2010}
}
@inproceedings{knorr1997unified,
author = {Knorr, Edwin M and Ng, Raymond T},
booktitle = {Proceedings of the 1997 conference of the Centre for Advanced Studies on Collaborative research},
organization = {IBM Press},
pages = {11},
title = {{A unified approach for mining outliers}},
year = {1997}
}
@article{Mikolov2012,
abstract = {Recurrent neural network language models (RNNLMs) have recently demonstrated state-of-the-art performance across a variety of tasks. In this paper, we improve their performance by providing a contextual real-valued input vector in association with each word. This vector is used to convey contextual information about the sentence being modeled. By performing Latent Dirichlet Allocation using a block of preceding text, we achieve a topic-conditioned RNNLM. This approach has the key advantage of avoiding the data fragmentation associated with building multiple topic models on different data subsets. We report perplexity results on the Penn Treebank data, where we achieve a new state-of-the-art.We further apply the model to the Wall Street Journal speech recognition task, where we observe improvements in word-error-rate.},
author = {Mikolov, Tomas and Zweig, Geoffrey},
doi = {10.1109/SLT.2012.6424228},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/CONTEXT DEPENDENT RECURRENT NEURAL NETWORK LANGUAGE MODEL.pdf:pdf},
isbn = {978-1-4673-5126-3},
journal = {IEEE Workshop on Spoken Language Technology (SLT)},
keywords = {Latent Dirichlet Allocation,language modelling,recurrent neural networks,topic models},
pages = {234--239},
title = {{Context Dependent Recurrent Neural Network Language Model}},
year = {2012}
}
@thesis{Hochreiter,
author = {Hochreiter, S},
booktitle = {Master's thesis, Institut fur Informatik, Technische Universitat, Munchen},
title = {{Untersuchungen zu dynamischen neuronalen Netzen}},
url = {http://scholar.google.com/scholar?hl=en{\&}btnG=Search{\&}q=intitle:Untersuchungen+zu+dynamischen+neuronalen+Netzen{\#}0},
year = {1991}
}
@article{Keogh2001,
abstract = {The problem of similarity search in large time series databases has attracted much attention recently. It is a non-trivial problem because of the inherent high dimensionality of the data. The most promising solutions involve first performing dimensionality reduction on the data, and then indexing the reduced data with a spatial access method. Three major dimensionality reduction techniques have been proposed: Singular Value Decomposition (SVD), the Discrete Fourier transform (DFT), and more recently the Discrete Wavelet Transform (DWT). In this work we introduce a new dimensionality reduction technique which we call Piecewise Aggregate Approximation (PAA). We theoretically and empirically compare it to the other techniques and demonstrate its superiority. In addition to being competitive with or faster than the other methods, our approach has numerous other advantages. It is simple to understand and to implement, it allows more flexible distance measures, including weighted Euclidean queries, and the index can be built in linear time.},
author = {Keogh, Eamonn and Chakrabarti, Kaushik and Pazzani, Michael and Mehrotra, Sharad},
doi = {10.1007/PL00011669},
isbn = {1581133324},
issn = {0219-1377},
journal = {Knowledge and Information Systems},
keywords = {data mining,dimensionality reduction,indexing and retrieval,time series},
number = {3},
pages = {263--286},
pmid = {324778},
title = {{Dimensionality Reduction for Fast Similarity Search in Large Time Series Databases}},
url = {http://research.microsoft.com/pubs/79074/time{\_}series{\_}indexing.pdf},
volume = {3},
year = {2001}
}
@inproceedings{Gonzalez-Dominguez2014,
abstract = {This work explores the use of Long Short-Term Memory (LSTM) recurrent neural networks (RNNs) for automatic lan-guage identification (LID). The use of RNNs is motivated by their better ability in modeling sequences with respect to feed forward networks used in previous works. We show that LSTM RNNs can effectively exploit temporal dependencies in acoustic data, learning relevant features for language discrimination pur-poses. The proposed approach is compared to baseline i-vector and feed forward Deep Neural Network (DNN) systems in the NIST Language Recognition Evaluation 2009 dataset. We show LSTM RNNs achieve better performance than our best DNN system with an order of magnitude fewer parameters. Further, the combination of the different systems leads to significant per-formance improvements (up to 28{\%}).},
author = {Gonzalez-Dominguez, Javier and Lopez-Moreno, Ignacio and Sak, Hasim and Gonzalez-Rodriguez, Joaquin and Moreno, Pedro J.},
booktitle = {Interspeech-2014},
issn = {2308457X},
pages = {2155--2159},
title = {{Automatic Language Identification using Long Short-Term Memory Recurrent Neural Networks}},
year = {2014}
}
@article{Lane1999,
abstract = {The anomal-detection problem can be forrmulated as one of learning to characterize the behaviors of an individual, system, or network in terms of temporal sequences of discrete data. We present an approach on the basis of instrance-based learning (IBL) techniques. To cast the anomaly-detection task in an IBL framework, we employ an approach that transforms temporal sequences of discrete, unordered observations into a metric space via a similarity measure that encodes intra-attribue dependencies. Classification boundaries are selected from an a posteriori characterization of valid user bhaviours, coupled with a domain heurisitc. An empercial evaluation of the approach on user command data demonstrates that we can accurately differentiate the profiled user from alternative users when the avaiable features encode sufficient information. Furthermore, we demonstrate that the system detects anomalous conditions quickly - an important quality for reducing data storage requirements of the user profile, invluding instance-selection methods and clustering. An empirical evaluation shows that a new greedy clustering algorithm reduces the size of the user model by 70{\%}, with only a small loss in accuracy.},
author = {Lane, Terran and Brodley, Carla E.},
doi = {10.1145/322510.322526},
isbn = {1581130074},
issn = {10949224},
journal = {ACM Transactions on Information and System Security},
number = {3},
pages = {295--331},
title = {{Temporal sequence learning and data reduction for anomaly detection}},
volume = {2},
year = {1999}
}
@article{Cheboli2010a,
author = {Cheboli, Deepthi},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/Anomaly Detection of Time Series.pdf:pdf},
title = {{Anomaly detection of time series}},
url = {http://udc.umn.edu/handle/11299/92985},
year = {2010}
}
@article{Lasaponara2006,
abstract = {In this work, we discuss the use of principal component analysis (PCA) for evaluating the vegetation interannual anomalies. The analysis was preformed on a temporal series (1999-2002) of the yearly Maximum Value Composit of SPOT/VEGETATION NDVI acquired for Sicily Island. The PCA was used as a data transform to enhance regions of localized change in multi-temporal data sets. This is a direct result of the high correlation that exists among images for regions that do not change significantly and the relatively low correlation associated with regions that change substantially. Both naturally vegetated areas (forest, shrub-land, herbaceous cover) and agricultural lands have been investigated in order to extract the most prominent natural and/or man-induced alterations affecting vegetation behavior. Our findings suggest that PCA can provide valuable information for environmental management policies involving biodiversity preservation and rational exploitation of natural and agricultural resources. ?? 2005 Elsevier B.V. All rights reserved.},
author = {Lasaponara, R.},
doi = {10.1016/j.ecolmodel.2005.10.035},
isbn = {0304-3800},
issn = {03043800},
journal = {Ecological Modelling},
keywords = {Change detection,Desertification,PCA,Satellite temporal series},
number = {4},
pages = {429--434},
pmid = {17952086},
title = {{On the use of principal component analysis (PCA) for evaluating interannual vegetation anomalies from Spot/Vegetation NDVI temporal series}},
volume = {194},
year = {2006}
}
@article{Zolhavarieh2014a,
abstract = {Clustering of subsequence time series remains an open issue in time series clustering. Subsequence time series clustering is used in different fields, such as e-commerce, outlier detection, speech recognition, biological systems, DNA recognition, and text mining. One of the useful fields in the domain of subsequence time series clustering is pattern recognition. To improve this field, a sequence of time series data is used. This paper reviews some definitions and backgrounds related to subsequence time series clustering. The categorization of the literature reviews is divided into three groups: preproof, interproof, and postproof period. Moreover, various state-of-the-art approaches in performing subsequence time series clustering are discussed under each of the following categories. The strengths and weaknesses of the employed methods are evaluated as potential issues for future studies.},
author = {Zolhavarieh, Seyedjamal and Aghabozorgi, Saeed and Teh, Ying Wah},
doi = {10.1155/2014/312521},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/A Review of Subsequence Time Series Clustering.pdf:pdf},
isbn = {1537-744x},
issn = {1537-744X},
journal = {TheScientificWorldJournal},
pages = {312521},
pmid = {25140332},
title = {{A Review of Subsequence Time Series Clustering.}},
url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=4130317{\&}tool=pmcentrez{\&}rendertype=abstract},
volume = {2014},
year = {2014}
}
@article{Borne,
author = {Borne, Kirk D},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/kborne-ML.pdf:pdf},
keywords = {data mining,data streams,outlier detection,space science,unsupervised learning},
pages = {1--26},
title = {{Effective Outlier Detection using K-Nearest Neighbor Data Distributions : Unsupervised Exploratory Mining of Non-Stationarity in Data Streams}}
}
@article{Graves2009,
abstract = {Recognizing lines of unconstrained handwritten text is a challenging task. The difficulty of segmenting cursive or overlapping characters, combined with the need to exploit surrounding context, has led to low recognition rates for even the best current recognizers. Most recent progress in the field has been made either through improved preprocessing or through advances in language modeling. Relatively little work has been done on the basic recognition algorithms. Indeed, most systems rely on the same hidden Markov models that have been used for decades in speech and handwriting recognition, despite their well-known shortcomings. This paper proposes an alternative approach based on a novel type of recurrent neural network, specifically designed for sequence labeling tasks where the data is hard to segment and contains long-range bidirectional interdependencies. In experiments on two large unconstrained handwriting databases, our approach achieves word recognition accuracies of 79.7 percent on online data and 74.1 percent on offline data, significantly outperforming a state-of-the-art HMM-based system. In addition, we demonstrate the network's robustness to lexicon size, measure the individual influence of its hidden layers, and analyze its use of context. Last, we provide an in-depth discussion of the differences between the network and HMMs, suggesting reasons for the network's superior performance.},
author = {Graves, Alex and Liwicki, Marcus and Fern{\'{a}}ndez, Santiago and Bertolami, Roman and Bunke, Horst and Schmidhuber, J{\"{u}}rgen},
doi = {10.1109/TPAMI.2008.137},
isbn = {0162-8828 (Print)$\backslash$r0098-5589 (Linking)},
issn = {01628828},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
keywords = {Bidirectional long short-term memory,Connectionist temporal classification,Handwriting recognition,Hidden Markov model,Offline handwriting,Online handwriting,Recurrent neural networks},
number = {5},
pages = {855--868},
pmid = {19299860},
title = {{A novel connectionist system for unconstrained handwriting recognition}},
volume = {31},
year = {2009}
}
@inproceedings{Bluche,
author = {Bluche, T and Louradour, J and Knibbe, M and Moysset, B and Benzeghiba, M F and Kermorvant, C},
booktitle = {Document Analysis Systems (DAS), 2014 11th IAPR International Workshop on},
doi = {10.1109/DAS.2014.40},
keywords = {A2iA Arabic handwritten text recognition system,Accuracy,Arabic handwriting recognition systems,Handwriting recognition,Hidden Markov models,LSTM recurrent neural networks,Large vocabulary Handwriting Recognition,OpenHaRT,OpenHaRT2013 evaluation,ROVER,ROVER combination algorithm,Recurrent Neural Networks,Recurrent neural networks,Text recognition,Training,Vocabulary,full paragraph recognition,handwriting recognition,long short-term memory,n-gram language modeling,natural language processing,recurrent neural nets,text detection,vocabulary selection techniques},
month = {apr},
pages = {161--165},
title = {{The A2iA Arabic Handwritten Text Recognition System at the Open HaRT2013 Evaluation}},
year = {2014}
}
@article{Snoek2012a,
abstract = {Machine learning algorithms frequently require careful tuning of model hyperparameters, regularization terms, and optimization parameters. Unfortunately, this tuning is often a "black art" that requires expert experience, unwritten rules of thumb, or sometimes brute-force search. Much more appealing is the idea of developing automatic approaches which can optimize the performance of a given learning algorithm to the task at hand. In this work, we consider the automatic tuning problem within the framework of Bayesian optimization, in which a learning algorithm's generalization performance is modeled as a sample from a Gaussian process (GP). The tractable posterior distribution induced by the GP leads to efficient use of the information gathered by previous experiments, enabling optimal choices about what parameters to try next. Here we show how the effects of the Gaussian process prior and the associated inference procedure can have a large impact on the success or failure of Bayesian optimization. We show that thoughtful choices can lead to results that exceed expert-level performance in tuning machine learning algorithms. We also describe new algorithms that take into account the variable cost (duration) of learning experiments and that can leverage the presence of multiple cores for parallel experimentation. We show that these proposed algorithms improve on previous automatic procedures and can reach or surpass human expert-level optimization on a diverse set of contemporary algorithms including latent Dirichlet allocation, structured SVMs and convolutional neural networks.},
archivePrefix = {arXiv},
arxivId = {1206.2944},
author = {Snoek, Jasper and Larochelle, Hugo and Adams, Ryan P.},
eprint = {1206.2944},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/Practical Bayesian Optimization of Machine Learning Algorithms.pdf:pdf},
pages = {1--12},
title = {{Practical Bayesian Optimization of Machine Learning Algorithms}},
url = {http://arxiv.org/abs/1206.2944},
year = {2012}
}
@misc{Tieleman2012,
author = {Tieleman, T. and Hinton, Geoffrey},
booktitle = {COURSERA},
title = {{Neural Networks for Machine Learning (lecture 6.5)}},
year = {2012}
}
@inproceedings{kitaguchi2004extracting,
author = {Kitaguchi, S},
booktitle = {Proceedings of 18th Annual Conference of the Japanese Society for Artificial Intelligence (JSAI'04)},
title = {{Extracting feature based on motif from a chronic hepatitis dataset}},
year = {2004}
}
@article{Kriegel2009,
author = {Kriegel, Hans-Peter and Kr{\"{o}}ger, Peer and Zimek, Arthur},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/kdd10-outlier-tutorial.pdf:pdf},
journal = {Tutorial at the 13th Pacific-Asia Conference on Knowledge Discovery and Data Mining},
title = {{Outlier detection techniques}},
year = {2009}
}
@article{Keogh2005,
abstract = { In this work, we introduce the new problem of finding time series discords. Time series discords are subsequences of a longer time series that are maximally different to all the rest of the time series subsequences. They thus capture the sense of the most unusual subsequence within a time series. Time series discords have many uses for data mining, including improving the quality of clustering, data cleaning, summarization, and anomaly detection. Discords are particularly attractive as anomaly detectors because they only require one intuitive parameter (the length of the subsequence) unlike most anomaly detection algorithms that typically require many parameters. We evaluate our work with a comprehensive set of experiments. In particular, we demonstrate the utility of discords with objective experiments on domains as diverse as Space Shuttle telemetry monitoring, medicine, surveillance, and industry, and we demonstrate the effectiveness of our discord discovery algorithm with more than one million experiments, on 82 different datasets from diverse domains.},
author = {Keogh, Eamonn and Lin, Jessica and Fu, Ada},
doi = {10.1109/ICDM.2005.79},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/HOT SAX Efficiently Finding the Most Unusual Time Series Subsequence.pdf:pdf},
isbn = {0769522785},
issn = {15504786},
journal = {Proceedings - IEEE International Conference on Data Mining, ICDM},
keywords = {Anomaly detection,Clustering,Time series data mining},
pages = {226--233},
title = {{HOT SAX: Efficiently finding the most unusual time series subsequence}},
year = {2005}
}
@article{Kandhari2009,
author = {Kandhari, Rupali},
doi = {10.1145/1541880.1541882},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/Anomaly Detection A Survey acm.pdf:pdf},
isbn = {0818663359},
issn = {03600300},
keywords = {Anomaly detection, outlier detection},
number = {3},
pages = {1--6},
pmid = {21834704},
title = {{Anomaly detection}},
volume = {41},
year = {2009}
}
@article{Dasgupta1996,
abstract = {Detecting anomalies in time series data is a problem of great practical interest in many manufacturing and signal processing applications. This paper presents a novelty detection algorithm inspired by the negative-selection mechanism of the immune system, which discriminates between self and other. Here self is defined to be normal data patterns and non-self is any deviation exceeding an allowable variation. An example application, simulated cutting dynamics in a milling operation, is presented, and the performance of the algorithm in detecting the tool breakage is reported.},
author = {Dasgupta, D. and Forrest, Stephanie},
journal = {Proceedings of the International Conference on Intelligent Systems},
pages = {82--87},
title = {{Novelty detection in time series data using ideas from immunology}},
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.57.3894{\&}rep=rep1{\&}type=pdf},
year = {1996}
}
@book{Seeger2004,
abstract = {Gaussian processes (GPs) are natural generalisations of multivariate Gaussian random variables to infinite (countably or continuous) index sets. GPs have been applied in a large number of fields to a diverse range of ends, and very many deep theoretical analyses of various properties are available. This paper gives an introduction to Gaussian processes on a fairly elementary level with special emphasis on characteristics relevant in machine learning. It draws explicit connections to branches such as spline smoothing models and support vector machines in which similar ideas have been investigated. Gaussian process models are routinely used to solve hard machine learning problems. They are attractive because of their flexible non-parametric nature and computational simplicity. Treated within a Bayesian framework, very powerful statistical methods can be implemented which offer valid estimates of uncertainties in our predictions and generic model selection procedures cast as nonlinear optimization problems. Their main drawback of heavy computational scaling has recently been alleviated by the introduction of generic sparse approximations.13,78,31 The mathematical literature on GPs is large and often uses deep concepts which are not required to fully understand most machine learning applications. In this tutorial paper, we aim to present characteristics of GPs relevant to machine learning and to show up precise connections to other "kernel machines" popular in the community. Our focus is on a simple presentation, but references to more detailed sources are provided.},
archivePrefix = {arXiv},
arxivId = {026218253X},
author = {Seeger, Matthias},
booktitle = {International journal of neural systems},
doi = {10.1142/S0129065704001899},
eprint = {026218253X},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/Gaussian Processes for Machine Learning.pdf:pdf},
isbn = {026218253X},
issn = {0129-0657},
number = {2},
pages = {69--106},
pmid = {15112367},
title = {{Gaussian processes for machine learning.}},
volume = {14},
year = {2004}
}
@book{Minsky1967,
address = {Upper Saddle River, NJ, USA},
author = {Minsky, Marvin L},
isbn = {0-13-165563-9},
publisher = {Prentice-Hall, Inc.},
title = {{Computation: Finite and Infinite Machines}},
year = {1967}
}
@inproceedings{Munz2007,
abstract = {—Data mining techniques make it possible to search large amounts of data for characteristic rules and patterns. If applied to network monitoring data recorded on a host or in a network, they can be used to detect intrusions, attacks and/or anomalies. This paper gives an introduction to Network Data Mining, i.e. the application of data mining methods to packet and flow data captured in a network, including a comparative overview of existing approaches. Furthermore, we present a novel flow-based anomaly detection scheme based on the K-mean clus-tering algorithm. Training data containing unlabeled flow records are separated into clusters of normal and anomalous traffic. The corresponding cluster centroids are used as patterns for computationally efficient distance-based detection of anomalies in new monitoring data. We provide a detailed description of the data mining and the anomaly detection processes, and present first experimental results.},
author = {M{\"{u}}nz, Gerhard and Li, Sa and Carle, Georg},
booktitle = {GI/ITG Workshop MMBnet},
title = {{Traffic anomaly detection using k-means clustering}},
url = {http://www.decom.ufop.br/menotti/rp122/sem/sem3-luciano-art.pdf},
year = {2007}
}
@article{He2003,
abstract = {In this paper, we present a new definition for outlier: Cluster-based local outlier , which is meaningful and provides importance to the local data behavior. A measure for identifying the physical significance of an outlier is designed, which is called cluster-based local outlier factor (CBLOF). We also propose the Find CBLOF algorithm for discovering outliers. The experimental results show that our approach outperformed the existing methods on identifying meaningful and interesting outliers. ?? 2003 Elsevier Science B.V. All rights reserved.},
author = {He, Zengyou and Xu, Xiaofei and Deng, Shengchun},
doi = {10.1016/S0167-8655(03)00003-5},
isbn = {0167-8655},
issn = {01678655},
journal = {Pattern Recognition Letters},
keywords = {Clustering,Data mining,Outlier detection},
number = {9-10},
pages = {1641--1650},
title = {{Discovering cluster-based local outliers}},
volume = {24},
year = {2003}
}
@article{Doya1992,
abstract = {Gradient descent algorithms in recurrent neural networks can have$\backslash$nproblems when the network dynamics experience bifurcations in the course$\backslash$nof learning. The possible hazards caused by the bifurcations of the$\backslash$nnetwork dynamics and the learning equations are investigated. The roles$\backslash$nof teacher forcing, preprogramming of network structures, and the$\backslash$napproximate learning algorithms are discussed},
author = {Doya, K.},
doi = {10.1109/ISCAS.1992.230622},
isbn = {0-7803-0593-0},
issn = {0-7803-0593-0},
journal = {[Proceedings] 1992 IEEE International Symposium on Circuits and Systems},
pages = {1--4},
title = {{Bifurcations in the learning of recurrent neural networks}},
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.40.5278{\&}rep=rep1{\&}type=pdf},
volume = {6},
year = {1992}
}
@inproceedings{Angiulli2007,
abstract = {In this work a method for detecting distance-based outliers in data streams is presented. We deal with the sliding win- dow model, where outlier queries are performed in order to detect anomalies in the current window. Two algorithms are presented. The first one exactly answers outlier queries, but has larger space requirements. The second algorithm is directly derived from the exact one, has limited memory requirements and returns an approximate answer based on accurate estimations with a statistical guarantee. Several experiments have been accomplished, confirming the effec- tiveness of the proposed approach and the high quality of approximate solutions.},
author = {Angiulli, Fabrizio and Fassetti, Fabio},
booktitle = {Proceedings of the ACM Sixteenth Conference on Information and Knowledge Management},
doi = {10.1145/1321440.1321552},
isbn = {9781595938039},
pages = {811--820},
title = {{Detecting Distance-based Outliers in Streams of Data}},
year = {2007}
}
@article{Buckheit1995,
abstract = {Wavelab is a library of wavelet-packet analysis, cosine-packet analysis and matching pursuit. The library is available free of charge over the Internet. Versions are provided for Macintosh, UNIX and Windows machines. Wavelab makes available, in one package, all the code to reproduce all the figures in our published wavelet articles. The interested reader can inspect the source code to see exactly what algorithms were used, how parameters were set in producing our figures, and can then modify the source to produce variations on our results. WAVELAB has been developed, in part, because of exhortations by Jon Claerbout of Stanford that computational scientists should engage in “really reproducible” research.},
author = {Buckheit, Jb and Donoho, Dl},
doi = {10.1007/978-1-4612-2544-7},
isbn = {978-0-387-94564-4},
journal = {Wavelets and Statistics},
pages = {55--81},
title = {{WaveLab and Reproducible Research}},
url = {http://link.springer.com/chapter/10.1007/978-1-4612-2544-7{\_}5$\backslash$nhttp://link.springer.com/10.1007/978-1-4612-2544-7},
volume = {103},
year = {1995}
}
@article{Bu2007,
abstract = {Finding discords in time series database is an important problem in a great variety of applications, such as space shuttle telemetry, mechanical industry, biomedicine, and financial data analysis. However, most previous methods for this problem suffer from too many parameter settings which are difficult for users. The best known approach to our knowledge that has comparatively fewer parameters still requires users to choose a word size for the compression of subsequences. In this paper, we propose a Haar wavelet and augmented trie based algorithm to mine the top-K discords from a time series database, which can dynamically determine the word size for compression. Due to the characteristics of Haar wavelet transform, our algorithm has greater pruning power than previous approaches. Through experiments with some annotated datasets, the effectiveness and efficiency of our algorithm are both attested.},
author = {Bu, Yingyi and Leung, Tw and Fu, Awc and Keogh, Eamonn and Pei, Jian and Meshkin, Sam},
isbn = {9780898716306 (ISBN)},
journal = {In Proceedings of the 2007 SIAM International Conference on Data Mining (SDM'07)},
pages = {26--28},
title = {{Wat: Finding top-k discords in time series database}},
url = {http://www.cse.cuhk.edu.hk/{~}adafu/Pub/sdm07.pdf},
year = {2007}
}
@article{Romeu2013,
abstract = {Artificial neural networks have proved to be good at time-series forecasting problems, being widely studied at literature. Traditionally, shallow architectures were used due to convergence problems when dealing with deep models. Recent research findings enable deep architectures training, opening a new interesting research area called deep learning. This paper presents a study of deep learning techniques applied to time-series forecasting in a real indoor temperature forecasting task, studying performance due to different hyper-parameter configurations. When using deep models, better generalization performance at test set and an over-fitting reduction has been observed.},
author = {Romeu, Pablo},
doi = {10.1007/978-3-642-40728-4_57},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/Time-Series Forecasting of Indoor Temperature Using Pre-trained Deep Neural Networks.pdf:pdf},
journal = {International Conference on Artificial Neural Networks},
keywords = {artificial neural networks,auto-,deep learning,encoders,energy efficiency,temperature forecasting,time series},
pages = {451--458},
title = {{Time-Series Forecasting of Indoor Temperature Using Pre-trained Deep Neural Networks}},
url = {http://link.springer.com/chapter/10.1007/978-3-642-40728-4{\_}57},
volume = {8131},
year = {2013}
}
@article{Martens2011,
abstract = {In this work we resolve the long-outstanding problem of how to effectively train recurrent neural networks (RNNs) on complex and difficult sequence modeling problems which may contain long-term data dependencies. Utilizing recent advances in the Hessian-free optimization approach (Martens, 2010), together with a novel damping scheme, we successfully train RNNs on two sets of challenging problems. First, a collection of pathological synthetic datasets which are known to be impossible for standard optimization approaches (due to their extremely long-term dependencies), and second, on three natural and highly complex real-world sequence datasets where we find that our method significantly outperforms the previous state-of-the-art method for training neural sequence models: the Long Short-term Memory approach of Hochreiter and Schmidhuber (1997). Additionally, we offer a new interpretation of the generalized Gauss-Newton matrix of sch (2002) which is used within the HF approach of Martens. Copyright 2011 by the author(s)/owner(s).},
author = {Martens, James and Sutskever, Ilya},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/Learning Recurrent Neural Networks with Hessian-Free Optimization.pdf:pdf},
isbn = {978-1-4503-0619-5},
journal = {Proceedings of the 28th International Conference on Machine Learning, ICML 2011},
keywords = {Learning systems,Optimization},
pages = {1033--1040},
title = {{Learning recurrent neural networks with Hessian-free optimization}},
year = {2011}
}
@inproceedings{Lewandowski2010,
abstract = {A novel non-linear dimensionality reduction method, called Temporal Laplacian Eigenmaps, is introduced to process efficiently time series data. In this embedded-based approach, temporal information is intrinsic to the objective function, which produces description of low dimensional spaces with time coherence between data points. Since the proposed scheme also includes bidirectional mapping between data and embedded spaces and automatic tuning of key parameters, it offers the same benefits as mapping-based approaches. Experiments on a couple of computer vision applications demonstrate the superiority of the new approach to other dimensionality reduction method in term of accuracy. Moreover, its lower computational cost and generalisation abilities suggest it is scalable to larger datasets.},
address = {Istanbul, Turkey},
author = {Lewandowski, M. and Martinez-del-Rincon, J. and Makris, D. and Nebel, J. C.},
booktitle = {Proceedings - International Conference on Pattern Recognition},
doi = {10.1109/ICPR.2010.48},
isbn = {9780769541099},
issn = {10514651},
keywords = {Dimensionality reduction,Human motion,Manifold learning,Temporal Laplacian Eigenmap,Time-series},
pages = {161--164},
pmid = {5597623},
publisher = {IEEE – Institute of Electrical and Electronics Engineers},
title = {{Temporal extension of Laplacian Eigenmaps for unsupervised dimensionality reduction of time series}},
year = {2010}
}
@article{Basu2007,
abstract = {In this article we consider the problem of detecting unusual values or outliers from time series data where the process by which the data are created is difficult to model. The main consideration is the fact that data closer in time are more correlated to each other than those farther apart.We propose two varia- tions of a method that uses the median from a neighborhood of a data point and a threshold value to compare the difference between the median and the observed data value. Both variations of themethod are fast and can be used for data streams that occur in quick succession such as sensor data on an airplane.},
author = {Basu, Sabyasachi and Meckesheimer, Martin},
doi = {10.1007/s10115-006-0026-6},
isbn = {0219-1377},
issn = {02191377},
journal = {Knowledge and Information Systems},
keywords = {Jaccard coefficient,Outliers,Sensor data,Simulation,Time series},
number = {2},
pages = {137--154},
shorttitle = {Automatic outlier detection for time series},
title = {{Automatic outlier detection for time series: An application to sensor data}},
url = {http://link.springer.com/article/10.1007/s10115-006-0026-6},
volume = {11},
year = {2007}
}
@article{Yankov2008,
abstract = {The problem of finding unusual time series has recently attracted much attention, and several promising methods are now in the literature. However, virtually all proposed methods assume that the data reside in main memory. For many real-world problems this is not be the case. For example, in astronomy, multi-terabyte time series datasets are the norm. Most current algorithms faced with data which cannot fit in main memory resort to multiple scans of the disk/tape and are thus intractable. In this work we show how one particular definition of unusual time series, the time series discord, can be discovered with a disk aware algorithm. The proposed algorithm is exact and requires only two linear scans of the disk with a tiny buffer of main memory. Furthermore, it is very simple to implement. We use the algorithm to provide further evidence of the effectiveness of the discord definition in areas as diverse as astronomy, Web query mining, video surveillance, etc., and show the efficiency of our method on datasets which are many orders of magnitude larger than anything else attempted in the literature.},
author = {Yankov, Dragomir and Keogh, Eamonn and Rebbapragada, Umaa},
doi = {10.1007/s10115-008-0131-9},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/Disk aware discord discovery finding unusual time series in terabyte sized datasets.pdf:pdf},
isbn = {0769530184},
issn = {02191377},
journal = {Knowledge and Information Systems},
keywords = {Discords,Disk aware algorithms,Distance outliers,Time series},
number = {2},
pages = {241--262},
title = {{Disk aware discord discovery: Finding unusual time series in terabyte sized datasets}},
volume = {17},
year = {2008}
}
@article{Breunig1999,
abstract = {For many KDD applications finding the outliers, i.e. the rare events,$\backslash$nis$\backslash$n$\backslash$nmore interesting and useful than finding the common cases, e.g. detecting$\backslash$ncriminal$\backslash$n$\backslash$nactivities in E-commerce. Being an outlier, however, is not just a$\backslash$nbinary property.$\backslash$n$\backslash$nInstead, it is a property that applies to a certain degree to each$\backslash$nobject in a data set,$\backslash$n$\backslash$ndepending on how ‘isolated' this object is, with respect to the surrounding$\backslash$nclus-$\backslash$n$\backslash$ntering structure. In this paper, we formally introduce a new notion$\backslash$nof outliers$\backslash$n$\backslash$nwhich bases outlier detection on the same theoretical foundation as$\backslash$ndensity-based$\backslash$n$\backslash$ncluster analysis. Our notion of an outlier is ‘local' in the sense$\backslash$nthat the outlier-de-$\backslash$n$\backslash$ngree of an object is determined by taking into account the clustering$\backslash$nstructure in$\backslash$n$\backslash$na bounded neighborhood of the object. We demonstrate that this notion$\backslash$nof an out-$\backslash$n$\backslash$nlier is more appropriate for detecting different types of outliers$\backslash$nthan previous ap-$\backslash$n$\backslash$nproaches, and we also present an algorithm for finding them. Furthermore,$\backslash$nwe$\backslash$n$\backslash$nshow that by combining the outlier detection with a density-based$\backslash$nmethod to an-$\backslash$n$\backslash$nalyze the clustering structure, we can get the outliers almost for$\backslash$nfree if we already$\backslash$n$\backslash$nwant to perform a cluster analysis on a data set.},
author = {Breunig, MM and Kriegel, HP and Ng, RT and Sander, J{\"{o}}rg},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/OPTICS-OF Identifying Local Outliers.pdf:pdf},
isbn = {3-540-66490-4},
journal = {Principles of Data Mining and {\ldots}},
pages = {262--270},
title = {{Optics-of: Identifying local outliers}},
url = {http://link.springer.com/chapter/10.1007/978-3-540-48247-5{\_}28},
year = {1999}
}
@article{Graves2005,
abstract = {In this paper, we present bidirectional Long Short Term Memory (LSTM) networks, and a modified, full gradient version of the LSTM learning algorithm. We evaluate Bidirectional LSTM (BLSTM) and several other network architectures on the benchmark task of framewise phoneme classification, using the TIMIT database. Our main findings are that bidirectional networks outperform unidirectional ones, and Long Short Term Memory (LSTM) is much faster and also more accurate than both standard Recurrent Neural Nets (RNNs) and time-windowed Multilayer Perceptrons (MLPs). Our results support the view that contextual information is crucial to speech processing, and suggest that BLSTM is an effective architecture with which to exploit it. {\textcopyright} 2005 Elsevier Ltd. All rights reserved.},
author = {Graves, Alex and Schmidhuber, J{\"{u}}rgen},
doi = {10.1109/IJCNN.2005.1556215},
isbn = {0780390482},
issn = {08936080},
journal = {Proceedings of the International Joint Conference on Neural Networks},
pages = {2047--2052},
pmid = {16112549},
title = {{Framewise phoneme classification with bidirectional LSTM networks}},
volume = {4},
year = {2005}
}
@phdthesis{IlyaSutskever2013,
author = {{Ilya Sutskever}},
booktitle = {Ph.D thesis},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/TRAINING RECURRENT NEURAL NETWORKS.pdf:pdf},
title = {{Training Recurrent Neural Networks}},
year = {2013}
}
@article{Beyer1999,
abstract = {We explore the effect of dimensionality on the “nearest neighbor” problem. We show that under a broad set of conditions (much broader than independent and identically distributed dimensions), as dimensionality increases, the distance to the nearest data point approaches the distance to the farthest data point. To provide a practical perspective, we present empirical results on both real and synthetic data sets that demonstrate that this effect can occur for as few as 10–15 dimensions. These results should not be interpreted to mean that high-dimensional indexing is never meaningful; we illustrate this point by identifying some high-dimensional workloads for which this effect does not occur. However, our results do emphasize that the methodology used almost universally in the database literature to evaluate high-dimensional indexing techniques is flawed, and should be modified. In particular, most such techniques proposed in the literature are not evaluated versus simple linear scan, and are evaluated over workloads for which nearest neighbor is not meaningful. Often, even the reported experiments, when analyzed carefully, show that linear scan would outperform the techniques being proposed on the workloads studied in high (10–15) dimensionality!},
author = {Beyer, Kevin and Goldstein, Jonathan and Ramakrishnan, Raghu and Shaft, Uri},
doi = {10.1007/3-540-49257-7_15},
isbn = {978-3-540-65452-0},
issn = {3540654526},
journal = {Database Theory—ICDT'99},
pages = {217--235},
title = {{When is “nearest neighbor” meaningful?}},
url = {http://link.springer.com/chapter/10.1007/3-540-49257-7{\_}15},
year = {1999}
}
@article{Wang2013,
abstract = {The previous decade has brought a remarkable increase of the interest in applications that deal with querying and mining of time series data. Many of the research efforts in this context have focused on introducing new representation methods for dimensionality reduction or novel similarity measures for the underlying data. In the vast majority of cases, each individual work introducing a particular method has made specific claims and, aside from the occasional theoretical justifications, provided quantitative experimental observations. However, for the most part, the comparative aspects of these experiments were too narrowly focused on demonstrating the benefits of the proposed methods over some of the previously introduced ones. In order to provide a comprehensive validation, we conducted an extensive experimental study re-implementing eight different time series representations and nine similarity measures and their variants, and testing their effectiveness on thirty-eight time series data sets from a wide variety of application domains. In this paper, we give an overview of these different techniques and present our comparative experimental findings regarding their effectiveness. In addition to providing a unified validation of some of the existing achievements, our experiments also indicate that, in some cases, certain claims in the literature may be unduly optimistic.},
archivePrefix = {arXiv},
arxivId = {1012.2789},
author = {Wang, Xiaoyue and Mueen, Abdullah and Ding, Hui and Trajcevski, Goce and Scheuermann, Peter and Keogh, Eamonn},
doi = {10.1007/s10618-012-0250-5},
eprint = {1012.2789},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/Experimental comparison of representation methods and distance measures for time series data.pdf:pdf},
issn = {13845810},
journal = {Data Mining and Knowledge Discovery},
keywords = {Distance measure,Experimental comparison,Representation,Time series},
number = {2},
pages = {275--309},
title = {{Experimental comparison of representation methods and distance measures for time series data}},
volume = {26},
year = {2013}
}
@article{Li2007,
abstract = {Market analysis is a representative data analysis process with many$\backslash$napplications. In such an analysis, critical numerical measures, such$\backslash$nas profit and sales, fluctuate over time and form time-series data.$\backslash$nMoreover, the time series data correspond to market segments, which$\backslash$nare described by a set of attributes, such as age, gender, education,$\backslash$nincome level, and product-category, that form a multi-dimensional$\backslash$nstructure. To better understand market dynamics and predict future$\backslash$ntrends, it is crucial to study the dynamics of time-series in multi-dimensional$\backslash$nmarket segments. This is a topic that has been largely ignored in$\backslash$ntime series and data cube research. In this study, we examine the$\backslash$nissues of anomaly detection in multi-dimensional time-series data.$\backslash$nWe propose time-series data cube to capture the multi-dimensional$\backslash$nspace formed by the attribute structure. This facilitates the detection$\backslash$nof anomalies based on expected values derived from higher level,$\backslash$n"more general" time-series. Anomaly detection in a time-series data$\backslash$ncube poses computational challenges, especially for high-dimensional,$\backslash$nlarge data sets. To this end, we also propose an efficient search$\backslash$nalgorithm to iteratively select subspaces in the original high-dimensional$\backslash$nspace and detect anomalies within each one. Our experiments with$\backslash$nboth synthetic and real-world data demonstrate the effectiveness$\backslash$nand efficiency of the proposed solution.},
author = {Li, Xiaolei and Han, Jiawei},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/Mining Approximate Top-K Subspace Anomalies in multidimensional time-series data.pdf:pdf},
isbn = {9781595936493},
issn = {{\textless}null{\textgreater}},
journal = {International conference on Very large data bases},
keywords = {anomalies in,ing approximate top-k subspace,multi-dimensional time-series data},
pages = {447--458},
title = {{Mining approximate top-k subspace anomalies in multi-dimensional time-series data}},
url = {http://dl.acm.org/citation.cfm?id=1325904},
year = {2007}
}
@article{Hofmeyr1998,
abstract = {A method is introducted for detecting intrusions at the level of privileged processes. Evidence is given that short sequences of system calls executed by running processes are a good discriminator between normal and abnormal operating characteristics of several common UNIX programs. Normal behavior is collected in two ways: Synthetically, by exercising as many normal modes of usage of a program as possible, and in a live user environment by tracing the actual execution of the program. In the former case several types of intrusive behavior were studied; in the latter case, results were analyzed for false positives. 1 Introduction Modern computer systems are plagued by security vulnerabilities. Whether it is the latest UNIX buffer overflow or bug in Microsoft Internet Explorer, our applications and operating systems are full of security flaws on many levels. From the viewpoint of the traditional security paradigm, it should be possible to eliminate such problems through more exten...},
author = {Hofmeyr, Steven A and Forrest, Stephanie and Somayaji, Anil},
doi = {10.1.1.43.6197},
isbn = {0926-227X},
issn = {0926227X},
journal = {Journal of Computer Security},
number = {3},
pages = {151--180},
pmid = {8041695},
title = {{Intrusion Detection using Sequences of System Calls}},
volume = {6},
year = {1998}
}
@article{Overturf2000a,
abstract = {ABSTRACT. Including the Use of Pneumococcal Conjugate and Polysaccharide Vaccines and Antibiotic Prophylaxis Pneumococcal infections are the most common invasive bacterial infections in children in the United States. The incidence of invasive pneumococcal infections peaks in children younger than 2 years, reaching rates of 228/100 000 in children 6 to 12 months old. Children with functional or anatomic asplenia (including sickle cell disease [SCD]) and children with human immunodeficiency virus infection have pneumococcal infection rates 20- to 100-fold higher than those of healthy children during the first 5 years of life. Others at high risk of pneumococcal infections include children with congenital immunodeficiency; chronic cardiopulmonary disease; children receiving immunosuppressive chemotherapy; children with immunosuppressive neoplastic diseases; children with chronic renal insufficiency, including nephrotic syndrome; children with diabetes; and children with cerebrospinal fluid leaks. Children of Native American (American Indian and Alaska Native) or African American descent also have higher rates of invasive pneumococcal disease. Outbreaks of pneumococcal infection have occurred with increased frequency in children attending out-of-home care. Among these children, nasopharyngeal colonization rates of 60{\%} have been observed, along with pneumococci resistant to multiple antibiotics. The administration of antibiotics to children involved in outbreaks of pneumococcal disease has had an inconsistent effect on nasopharyngeal carriage. In contrast, continuous penicillin prophylaxis in children younger than 5 years with SCD has been successful in reducing rates of pneumococcal disease by 84{\%}. Pneumococcal polysaccharide vaccines have been recommended since 1985 for children older than 2 years who are at high risk of invasive disease, but these vaccines were not recommended for younger children and infants because of poor antibody response before 2 years of age... [ABSTRACT FROM AUTHOR]},
author = {Overturf, Gary D},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/Detecting Anomalies in a Time Series Database.pdf:pdf},
issn = {00314005},
journal = {Pediatrics},
keywords = {BACTERIAL vaccines,PNEUMOCOCCAL vaccine,STREPTOCOCCAL diseases,VACCINATION,VACCINATION of children},
number = {2},
pages = {367},
title = {{Technical Report.}},
url = {http://search.ebscohost.com/login.aspx?direct=true{\&}db=aph{\&}AN=3449077{\&}site=ehost-live},
volume = {106},
year = {2000}
}
@article{Keogh2004,
abstract = {Given the recent explosion of interest in streaming data and online algorithms, clustering of time seriessubsequences, extracted via a sliding window, has received much attention. In this work we make asurprising claim. Clustering of time series subsequences is meaningless. More concretely, clusters extractedfrom these time series are forced to obey a certain constraint that is pathologically unlikely to be satisfied byany dataset, and because of this, the clusters extracted by any clustering algorithm are essentially random.},
author = {Keogh, Eamonn and Keogh, Eamonn and Lin, Jessica and Lin, Jessica},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/Clustering of Time Series Subsequences is Meaningless.pdf:pdf},
pmid = {670978},
title = {{Clustering of Time Series Subsequences is Meaningless:}},
url = {http://citeseer.ist.psu.edu/670978},
year = {2004}
}
@article{Qiao2002,
abstract = {An anomaly intrusion detection method based on HMM is presented. The system call trace of a UNIX privileged process is passed to a HMM to obtain state transition sequences. Preliminary experiments prove the state transition sequences can express the different mode between normal action and intrusion behaviour in a more stable and simple manner},
author = {Qiao, Y. and Xin, X.W. and Bin, Y. and Ge, S.},
doi = {10.1049/el:20020467},
isbn = {doi:10.1049/el:20020467},
issn = {00135194},
journal = {Electronics Letters},
keywords = {HMM,UNIX,Unix,anomaly intrusion detection method,experiments,hidden Markov model,hidden Markov models,normal action,privileged process,security of data,state transition sequences,system call trace},
number = {13},
pages = {663},
title = {{Anomaly intrusion detection method based on HMM}},
url = {http://digital-library.theiet.org/content/journals/10.1049/el{\_}20020467},
volume = {38},
year = {2002}
}
@article{Zaremba2014,
abstract = {We present a simple regularization technique for Recurrent Neural Networks (RNNs) with Long Short-Term Memory (LSTM) units. Dropout, the most successful technique for regularizing neural networks, does not work well with RNNs and LSTMs. In this paper, we show how to correctly apply dropout to LSTMs, and show that it substantially reduces overfitting on a variety of tasks. These tasks include language modeling, speech recognition, image caption generation, and machine translation.},
author = {Zaremba, Wojciech and Sutskever, Ilya and Vinyals, Oriol},
journal = {arXiv:1409.2329 [cs]},
title = {{Recurrent Neural Network Regularization}},
url = {http://arxiv.org/abs/1409.2329$\backslash$nhttp://www.arxiv.org/pdf/1409.2329.pdf},
year = {2014}
}
@article{Ratanamahatana2004,
abstract = {The Dynamic Time Warping (DTW) distance measure is a technique that has long been known in speech recognition community. It allows a non-linear mapping of one signal to another by minimizing the distance between the two. A decade ago, DTW was introduced into Data Mining community as a utility for various tasks for time series problems including classification, clustering, and anomaly detection. The technique has flourished, particularly in the last three years, and has been applied to a variety of problems in various disciplines. In spite of DTW's great success, there are still several persistent “myths” about it. These myths have caused confusion and led to much wasted research effort. In this work, we will dispel these myths with the most comprehensive set of time series experiments ever conducted},
author = {Ratanamahatana, Ca and Keogh, E},
doi = {10.1097/01.CCM.0000279204.24648.44},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/Everything you know about Dynamic Time Warping is Wrong.pdf:pdf},
issn = {00903493},
journal = {Third Workshop on Mining Temporal and Sequential Data},
keywords = {data mining,dynamic time warping,experimentation},
pages = {22--25},
pmid = {15513920},
title = {{Everything you know about dynamic time warping is wrong}},
url = {http://spoken-number-recognition.googlecode.com/svn/trunk/docs/Dynamic time warping/DTW{\_}myths.pdf},
year = {2004}
}
@article{Bayer2014,
abstract = {Leveraging advances in variational inference, we propose to enhance recurrent neural networks with latent variables, resulting in Stochastic Recurrent Networks (STORNs). The model i) can be trained with stochastic gradient methods, ii) allows structured and multi-modal conditionals at each time step, iii) features a reliable estimator of the marginal likelihood and iv) is a generalisation of deterministic recurrent neural networks. We evaluate the method on four polyphonic musical data sets and motion capture data.},
archivePrefix = {arXiv},
arxivId = {1411.7610},
author = {Bayer, Justin and Osendorfer, Christian},
eprint = {1411.7610},
file = {:C$\backslash$:/Users/Majid/Dropbox/ad/rsrc/LEARNING STOCHASTIC RECURRENT NETWORKS.pdf:pdf},
pages = {1--9},
title = {{Learning Stochastic Recurrent Networks}},
url = {http://arxiv.org/abs/1411.7610},
year = {2014}
}
@inproceedings{snoek2012practical,
author = {Snoek, Jasper and Larochelle, Hugo and Adams, Ryan P},
booktitle = {Advances in neural information processing systems},
pages = {2951--2959},
title = {{Practical Bayesian optimization of machine learning algorithms}},
year = {2012}
}
@article{PhysioNet,
annote = {Circulation Electronic Pages:
http://circ.ahajournals.org/cgi/content/full/101/23/e215
PMID:1085218; doi: 10.1161/01.CIR.101.23.e215},
author = {Goldberger, A L and Amaral, L A N and Glass, L and Hausdorff, J M and Ivanov, P Ch. and Mark, R G and Mietus, J E and Moody, G B and Peng, C.-K. and Stanley, H E},
journal = {Circulation},
number = {23},
pages = {e215----e220},
title = {{PhysioBank, PhysioToolkit, and PhysioNet: Components of a New Research Resource for Complex Physiologic Signals}},
volume = {101},
year = {2000}
}
@article{Keogh2007,
abstract = {In this work we introduce the new problem of finding time series dis- cords. Time series discords are subsequences of longer time series that are max- imally different to all the rest of the time series subsequences. They thus capture the sense of the most unusual subsequence within a time series. While discords have many uses for data mining, they are particularly attractive as anomaly de- tectors because they only require one intuitive parameter (the length of the sub- sequence) unlike most anomaly detection algorithms that typically require many parameters. While the brute force algorithm to discover time series discords is quadratic in the length of the time series, we show a simple algorithm that is three to four orders of magnitude faster than brute force, while guaranteed to produce identical results. We evaluate our work with a comprehensive set of experiments on diverse data sources including electrocardiograms, space telemetry, respiration physiology, anthropological and video datasets},
author = {Keogh, Eamonn and Lin, Jessica and Lee, Sang-Hee Hee and {Van Herle}, Helga},
doi = {10.1007/s10115-006-0034-6},
isbn = {0219-1377},
issn = {02191377},
journal = {Knowledge and Information Systems},
keywords = {Anomaly detection,Clustering,Time series data mining},
number = {1},
pages = {1--27},
title = {{Finding the most unusual time series subsequence: Algorithms and applications}},
volume = {11},
year = {2007}
}
@article{Fomel2013,
abstract = {The Madagascar software package is designed for analysis of large-scale multidimensional data, such as those occurring in exploration geophysics. Madagascar provides a framework for reproducible research. By “reproducible research” we refer to the discipline of attaching software codes and data to computational results reported in publications. The package contains a collection of (a) computational modules, (b) data-processing scripts, and (c) research papers. Madagascar is distributed on SourceForge under a GPL v2 license https://sourceforge.net/projects/rsf/ . By October 2013, more than 70 people from different organizations around the world have contributed to the project, with increasing year-to-year activity. The Madagascar website is http://www.ahay.org/ .},
author = {Fomel, Sergey and Sava, Paul and Vlad, Ioan and Liu, Yang and Bashkardin, Vladimir},
doi = {http://dx.doi.org/10.5334/jors.ag},
issn = {2049-9647},
journal = {Journal of Open Research Software},
keywords = {data analysis,geophysics,python,reproducibility,seismology},
number = {1},
pages = {e8},
title = {{Madagascar: open-source software project for multidimensional data analysis and reproducible computational experiments}},
url = {http://openresearchsoftware.metajnl.com/article/view/jors.ag/20},
volume = {1},