-
Notifications
You must be signed in to change notification settings - Fork 0
/
filefinder.py
1527 lines (1270 loc) · 68.6 KB
/
filefinder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python2.7
"""
filefinder
(c) Antonio Tejada 2022
Simplistic but cross-platform version of Everything https://www.voidtools.com/
XXX Filtering is slow when it forces full db scan on Raspberry Pi, needs sqlite3
Full Text Search?
XXX Use trees and transitive closure?
See https://charlesleifer.com/blog/querying-tree-structures-in-sqlite-using-python-and-the-transitive-closure-extension/
XXX Use tree and recursive queries?
https://www.sqlite.org/lang_with.html#rcex1
https://stackoverflow.com/questions/38465186/sqlite-recursive-query-to-return-file-path
XXX This could even open Everything files?
XXX Handle filesystem listing errors
XXX Python 2.7 sqlite3 doesn't have FTS compiled in, but accessing through
QT5 does have FTS3
XXX QtSql needs
apt-get install python-pyqt5.qtsql
on raspberry pi and has upto FTS5
XXX See https://blog.kapeli.com/sqlite-fts-contains-and-suffix-matches
XXX See https://github.com/mayflower/sqlite-reverse-string (search for reverse token so no need to insert all prefixes)
XXX See FTS5 trigram (note needs 2020 sqlite 3.34.0)
XXX See https://github.com/simonw/sqlite-fts5-trigram
XXX See https://pypi.org/project/sqlitefts/
XXX See https://github.com/hideaki-t/sqlite-fts-python
XXX See https://stackoverflow.com/questions/16872700/sqlite-data-change-notification-callbacks-in-python-or-bash-or-cli
but https://stackoverflow.com/questions/677028/how-do-i-notify-a-process-of-an-sqlite-database-change-done-in-a-different-proce
"""
import collections
import csv
import datetime
import errno
import logging
import os
import sqlite3
import stat
import string
import struct
import sys
from PyQt5.QtCore import *
from PyQt5.QtGui import *
from PyQt5.QtWidgets import *
class LineHandler(logging.StreamHandler):
def __init__(self):
super(LineHandler, self).__init__()
def emit(self, record):
text = record.getMessage()
messages = text.split('\n')
indent = ""
for message in messages:
r = record
r.msg = "%s%s" % (indent, message)
r.args = None
super(LineHandler, self).emit(r)
indent = " "
def setup_logger(logger):
"""
Setup the logger with a line break handler
"""
logging_format = "%(asctime).23s %(levelname)s:%(filename)s(%(lineno)d):[%(thread)d] %(message)s"
logger_handler = LineHandler()
logger_handler.setFormatter(logging.Formatter(logging_format))
logger.addHandler(logger_handler)
return logger
def dbg(*args, **kwargs):
logger.debug(*args, **kwargs)
def info(*args, **kwargs):
logger.info(*args, **kwargs)
def warn(*args, **kwargs):
logger.warning(*args, **kwargs)
def error(*args, **kwargs):
logger.error(*args, **kwargs)
def exc(*args, **kwargs):
logger.exception(*args, **kwargs)
def which(exepath):
"""
For an executable in the environment's path, return
- the absolute path to exepath
- None if the exepath cannot be found in the path or if it's not executable
"""
info("which %r", exepath)
def is_exe(fpath):
return (os.path.isfile(fpath) and os.access(fpath, os.X_OK))
if (os.path.isabs(exepath)):
if (is_exe(exepath)):
return exepath
else:
# Check empty path (in case exepath is absolute), current directory, and
# PATH
# XXX This doesn't handle pathsep escaping
search_paths = ["", "."] + os.environ["PATH"].split(os.pathsep)
for path in search_paths:
exe_filepath = os.path.join(path, exepath)
info("Searching for executable %r in path %r", exe_filepath, path)
if is_exe(exe_filepath):
info("Found executable %r in path %r", exe_filepath, path)
return exe_filepath
return None
def launch_with_preferred_app(filepath):
# pyqt5 on lxde raspbian fails to invoke xdg-open for unknown reasons and
# falls back to invoking the web browser instead, use xdg-open explicitly on
# "xcb" platforms (X11)
# See https://github.com/qt/qtbase/blob/067b53864112c084587fa9a507eb4bde3d50a6e1/src/gui/platform/unix/qgenericunixservices.cpp#L129
if (QApplication.platformName() != "xcb"):
url = QUrl.fromLocalFile(filepath)
QDesktopServices.openUrl(url)
else:
# Note there's no splitCommand in this version of Qt5, build the
# argument list manually
QProcess.startDetached("xdg-open", [filepath])
def size_to_human_friendly_units(u):
"""
@return {string} u as a human friendly power of 1024 unit (TB, GB, MB, KB,
B)
"""
d = 1
for unit in ["B", "KB", "MB", "GB", "TB"]:
new_d = d * (2 ** 10)
if (u < new_d):
break
d = new_d
return "%0.2f %s" % (u * 1.0/d, unit)
database_filepath = os.path.join("_out", "files.db")
##database_filepath = os.path.join("_out", "test.db")
class TableModel(QAbstractTableModel):
"""
TableModel with filtering, sorting and on-demand display
The on-demand display is done by
- returning only loaded_row_count in rowCount()
- returning True in canFetchMore if loaded_row_count < len(data)
- updating loaded_row_count in fetchMoreRows
Sorting is done by regenerating the query with the sort clauses and
resetting the model, this causes UI to be disturbed (ie row
selection/focusing is lost).
Filtering is done by regenerating the query with the filter clauses and
resetting the model. This also causes the UI to be disturbed.
It's always the case that
total_row_count >= loaded_row_count
total_row_count = len(data)
0 < loaded_row_count
XXX Ideally for virtual/infinite data, would like a different mechanism
where:
- rowCount() returns total_row_count. This makes the scroll bar size stable
- a viewport height worth of rows is fetched as the table is scrolled up
or down, forgetting the rows before or after those (modulo guardband)
-
"""
def __init__(self, data, headers):
super(TableModel, self).__init__()
self.data = data
self.headers = headers
self.filter_words = []
self.sort_orders = collections.OrderedDict(
reversed(((2, Qt.DescendingOrder), (1, Qt.AscendingOrder), (0, Qt.AscendingOrder), (3, Qt.DescendingOrder)))
)
self.conn = sqlite3.connect(database_filepath)
self.cursor = None
self.reset()
def createIndex(self, *args, **kwargs):
dbg("createIndex %s", [a for a in args])
return super(TableModel, self).createIndex(*args, **kwargs)
def index(self, row, col, parent):
dbg("index %d %d", row, col)
return super(TableModel, self).index(row, col, parent)
def data(self, ix, role):
# Data gets called by columns: all visible rows for column 1, all
# visible rows for column 2, etc
if (role == Qt.DisplayRole):
# XXX Use a data "window" to minimize memory footprint? instead of
# storing all the elements from the beginning to this point (but
# requires to redo the query when moving the window to previous
# rows)
row = self.data[ix.row()]
dbg("data %d %d %d", ix.row(), ix.column(), row[-1])
value = row[ix.column()]
if (ix.column() == 2):
if (value == -1):
# Directory
value = "-"
else:
# File, convert size into human friendly units
value = size_to_human_friendly_units(value)
elif (ix.column() == 3):
# Truncate to seconds for display
value = str(datetime.datetime.fromtimestamp(value/1000))
return value
def loadedRowCount(self):
"""
Number of rows on-demand loaded <= totalRowCount
"""
return self.loaded_row_count
def totalRowCount(self):
"""
Total number of rows in data
"""
return self.total_row_count
def canFetchMore(self, parent):
dbg("canFetchMore %d, %d", self.loaded_row_count, len(self.data))
if (parent.isValid()):
dbg("parent is valid")
return False
return not self.end_of_cursor
def internalGetFilepath(self, row):
filename = self.data[row][0]
dirpath = self.data[row][1]
filepath = os.path.join(dirpath, filename)
return filepath
def filterMoreRows(self, count):
"""
Load more rows using the current cursor (with the filter and sort SQL
query) and update data, loaded_row_count and end_of_cursor
"""
dbg("filterMoreRows %d", count)
if (not self.end_of_cursor):
# XXX This next iterator can take some time (eg when the word is
# not found and a full table scan is needed), should send a
# message to the view so it can update the status bar
# XXX Will probably also need to do it in a different thread?
# XXX Filtering can take some time, ideally would like to
# QApplication.processEvents()
# here, but it causes different errors because processEvents may
# cause reentrant calls to fetchMore which were not easy to fix.
# (looks like processEvents causes more calls to fetchMore while
# still inside fetchMore).
# Another option is to move the filtering to a thread and send a
# signal when the filtering is done but would probably cause UX
# problems (eg user scrolls down, filtering is deferred so it can't
# scroll, rows are added, but cursor is not scrolled down)
# Would need some kind of async prefetch and block if ever
# scrolls ahead of the prefetch
new_data = self.cursor.fetchmany(count)
self.loaded_row_count += len(new_data)
self.data.extend(new_data)
self.end_of_cursor = (len(new_data) < count)
def fetchMore(self, parent):
dbg("fetchMore %d %d %d %d", self.loaded_row_count, self.totalRowCount(), parent.row(), parent.column())
if (parent.isValid()):
warn("index is valid")
return False
loaded_row_count = self.loaded_row_count
# Don't use too big of a number since Qt will call fetchMore as many
# times as necessary to fill the viewport, and using a big number causes
# costly skipping for rows that could end up outside of the viewport
# anyway
fetch_batch_size = 5
self.filterMoreRows(fetch_batch_size)
self.beginInsertRows(parent, loaded_row_count, self.loaded_row_count - 1)
self.endInsertRows()
def rowCount(self, index):
if (index.isValid()):
dbg("index is valid")
return 0
# Returning the currently loaded count causes the vertical scroll bar
# position to change as new entries are loaded. This is not ideal, would
# like to return the total count here in order to have a stable vertical
# scroll bar, but that's not possible with on-demand row loading via
# canFetchMore/fetchMore. An option would be to move on demand loading
# to the data() method, but if resizeColumnsToContents() is used, QT
# will traverse the full rowCount to find the content to resize to
return self.loaded_row_count
def columnCount(self, ix):
return len(self.headers)
def headerData(self, section, orientation, role):
if (role == Qt.DisplayRole):
if (orientation == Qt.Horizontal):
return str(self.headers[section])
return None
def getFilepath(self, ix):
return self.internalGetFilepath(ix.row())
def reset(self):
if (self.cursor is not None):
self.cursor.close()
# XXX total_row_count should be updated in other places without needing
# to refresh the filter to update it (eg when receiving the signal
# that rows have been inserted/directories traversed?)
self.total_row_count = self.conn.execute("SELECT count(*) FROM files").fetchone()[0]
# Build the filter clause
filter_params = []
filter_clause = ""
if (len(self.filter_words) > 0):
filter_clauses = []
for filter_word in self.filter_words:
filter_params.append(filter_word)
# XXX Allow verbatim here when surrounded by quotation marks
# This means not using "%" prefix and/or suffix in the LIKE
# clause or not using a LIKE clause if there are start and
# end quotation marks, also not concatenating path and name
# and also case-sensitive?
# XXX Note LIKE is case-insensitive, may need changing if
# case-sensitivity can be set
# XXX Note an index for path || name can be created but it's
# only used for strict equality, not for LIKE, even for
# prefix searches
# XXX Create an index for each sorting combination (or the first
# time a new combination is found), makes the worst case
# where all files are filtered out quite faster (from a
# second on 600K files to ~instantaneous)
# See
# select * from sqlite_master where type = 'index';
# or
# PRAGMA index_list(table_name);
# to check which indices or use create index if not exists
# or create and ignore the error
# XXX Note the number of column combinations to create indices
# for can be reduced by half because inverted sorts use the
# same index, eg both
# - size DESC, path ASC, name ASC, mtime DESC
# - size ASC, path DESC, name DESC, mtime ASC
# will use the size_desc_path_asc_name_asc_mtime_desc
# index
# XXX To further reduce another option is to remove the n-last
# columns in the index and let it sort manually for those,
# expecting that the first m columns will have discarded
# most of the data
filter_clauses.append("((path || \"" + os.sep + "\" || name) LIKE (\"%\" || ? || \"%\"))")
filter_clause = " WHERE %s" % string.join(filter_clauses, " AND ")
# Build the order clause
order_clause = ""
sort_sections = ["name", "path", "size", "mtime"]
order_clauses = []
# XXX Pull the cleanup from the stash?
for sort_section in reversed(self.sort_orders):
sort_order = self.sort_orders[sort_section]
order_clauses.append(" %s%s" % (sort_sections[sort_section],
"" if (sort_order == Qt.AscendingOrder) else " DESC"))
order_clause = " ORDER BY%s" % string.join(order_clauses, ",")
sql_query_string = "SELECT *, rowid FROM files%s%s" % (filter_clause, order_clause)
info("Filter query %r params %s", sql_query_string, filter_params)
self.cursor = self.conn.execute(sql_query_string, filter_params)
self.end_of_cursor = False
self.data = []
self.loaded_row_count = 0
def setFilter(self, filter):
# XXX This rebuilds the query from scratch, in the normal case
# the filter is typed sequentially and the old filter is a superset
# of the new filter (less or longer words), could store the result
# in a table and fetch from that table if the new filter is a subset?
self.filter_words = filter.split()
self.beginResetModel()
# XXX This should try to preserve the focused and selected rows
self.reset()
self.endResetModel()
def sort(self, section, sort_order, ignore_redundant = True):
info("sort %d %d", section, sort_order)
if (section == -1):
# section can be -1 if called to disable sorting
# XXX Disabling sorting was used for the case rows were appended
# unsorted in order to not disturb the UI, probably doesn't make
# sense anymore with an indexed database.
self.sort_orders.clear()
else:
# When a header is clicked, QT ends up calling sort twice in a row
# with the same section and sort order, ignore redundant calls
# (although this is not that important because DB sorting is
# relatively fast)
# XXX With DB, sorting also rebuilds the filter, and if the filter
# causes a full table scan then sorting won't be fast, does it
# make sense to store into a temp table and then re-sort the
# temp table instead. This will turn any filter change into a
# full table scan (bad), but if the new filter string is a
# superset of the previous filter string (usual) then only the
# temporary table would need to be re-filtered, which not only
# would be fast but would also benefit the current (common) worst case
# which is when a filter word is entered with no matches, which
# causes a full table scan.
# Store the sort_orders lower priority first, sorting by different
# columns can be done by consecutively calling sort() for each column
# to be sorted, in priority order (highest priority sorting last)
if ((not ignore_redundant) or
# There's no sorting
(len(self.sort_orders) == 0) or
# This section is not already the highest priority or, if it is,
# it had a different order
(self.sort_orders.keys()[-1] != section) or
(self.sort_orders[section] != sort_order)):
# Remove and add the sort order so it becomes last in the
# ordered dict, the sort query constructor will visit them in
# reverse
if (section in self.sort_orders):
del self.sort_orders[section]
self.sort_orders[section] = sort_order
dbg("sorted rows %s", self.totalRowCount())
dbg("resetting model")
self.beginResetModel()
# XXX This should try to preserve the focused and selected rows
self.reset()
self.endResetModel()
dbg("resetted model")
else:
warn("ignoring redundant sort call")
class TableView(QTableView):
filepathCopied = pyqtSignal(str)
defaultAppLaunched = pyqtSignal(str)
def __init__(self, *args, **kwargs):
super(TableView, self).__init__(*args, **kwargs)
# XXX This interacts in a weird way with the lineedit, when return is
# presed on the lineedit, the default application is launched, which
# is weird UX-wise, use a keypressEvent instead or consume that one
# in the lineEdit?
self.openAct = QAction('Open', self, shortcut="return", triggered=self.launchSelectedFilepaths)
# Override the default tableview copy action which only copies the
# filename, with one that copies the full filepath
self.copyFilepathsAct = QAction('Copy Filepaths', self, shortcut="ctrl+shift+c", triggered=self.copySelectedFilepaths)
self.copyFilesAct = QAction('Copy', self, shortcut="ctrl+c", triggered=self.copySelectedFiles)
self.cutFilesAct = QAction('Cut', self, shortcut="ctrl+x", triggered=self.cutSelectedFiles)
# XXX Allow paste into the current path? what if multiple selections,
# copy to the focused line or do multiple copies?
# XXX Adding the action to the TableView here won't be necessary if
# added to QMainWindow menubar
self.addAction(self.openAct)
self.addAction(self.copyFilepathsAct)
self.addAction(self.copyFilesAct)
self.addAction(self.cutFilesAct)
def getSelectedFilepaths(self):
filepaths = []
# XXX Could also do selectedRows(column)? (but that method doesn't seem
# to be available?)
for ix in self.selectedIndexes():
# Note for each row there's a selection per column, only copy
# one filepath per row
if (ix.column() == 0):
filepath = self.model().getFilepath(ix)
filepaths.append(filepath)
self.filepathCopied.emit(filepath)
return filepaths
def launchWithPreferredApp(self, ix):
filepath = self.model().getFilepath(ix)
info("launchWithPreferredApp %r", filepath)
launch_with_preferred_app(filepath)
self.defaultAppLaunched.emit(filepath)
def launchSelectedFilepaths(self):
# XXX Could also do selectedRows(column)?
for ix in self.selectedIndexes():
# Note for each row there's a selection per column, only copy
# one filepath per row
if (ix.column() == 0):
self.launchWithPreferredApp(ix)
def copySelectedFilepaths(self):
filepaths = self.getSelectedFilepaths()
logger.info("Copying filepaths %r", filepaths)
clipboard = qApp.clipboard()
clipboard.setText(string.join(filepaths, "\n"))
def cutCopySelectedFiles(self, cut = False):
# XXX Do something to gray out if cutting? (note the file doesn't really
# get cut until copied elsewhere so there's no point in refreshing
# the database here)
filepaths = self.getSelectedFilepaths()
logger.info("%s files %r", "Cutting" if cut else "Copying", filepaths)
urls = [QUrl.fromLocalFile(filepath) for filepath in filepaths]
mimeData = QMimeData()
mimeData.setUrls(urls)
if (cut):
# Copy is supported transparently by setUrls, but cut needs
# different support on Windows, KDE and Gnome
# Eg see https://github.com/lxqt/libfm-qt/blob/master/src/utilities.cpp#L128
# Windows
# 2 is WinForms.DragDropEffects.Move
mimeData.setData("Preferred DropEffect", struct.pack("<I", 2))
# KDE
# XXX Untested
mimeData.setData("application/x-kde-cutselection", struct.pack("<I", 1))
# Gnome, LXDE, and XFCE
# Note url.toString() returns unicode but QByteArray won't take
# unicode, convert to utf-8
u = u"cut\n" + str.join("\n", [url.toString() for url in urls]) + "\n"
mimeData.setData("x-special/gnome-copied-files", QByteArray(u.encode("utf-8")))
qApp.clipboard().setMimeData(mimeData)
def copySelectedFiles(self):
self.cutCopySelectedFiles()
def cutSelectedFiles(self):
self.cutCopySelectedFiles(True)
def contextMenuEvent(self, event):
self.menu = QMenu(self)
self.menu.addAction(self.openAct)
# XXX Provide a way of copying all the filtered elements straight from
# the database without having to scroll to the end of the table and
# without having to populate the table with them (needs access to
# the db from here?)
self.menu.addAction(self.copyFilepathsAct)
self.menu.addAction(self.copyFilesAct)
self.menu.addAction(self.cutFilesAct)
# XXX Add more actions like copying the selected files, cutting the
# selected files, and pasting into the destination row dirpath or
# into some directory chosen by dialog box, export selected to csv
self.menu.popup(QCursor.pos())
class MainWindow(QMainWindow):
# XXX Add option to create new window/instance? Allow multiple instances of
# the app and move the db update to a different process?
# Moving the db update to a different process will also remove the UI
# stalls due to the GIL when the db is updating.
# see https://stackoverflow.com/questions/26746379/how-to-signal-slots-in-a-gui-from-a-different-process
# XXX Add server mode
# XXX Add client mode (for launching apps, a mapping from server local dir
# to client remote share will be needed, or the server can serve the
# file to a temporary local file, but that won't be good for big files)
# XXX Add QSettings storage
# XXX Add configuration dialog box (paths, servers, clients, window sizes)
def __init__(self, parent = None):
super(MainWindow, self).__init__(parent)
self.resize(1000, 500)
self.setWindowTitle("FileFinder - %s" % os.path.basename(database_filepath))
wid = QWidget(self)
self.setCentralWidget(wid)
l = QVBoxLayout()
wid.setLayout(l)
widd = QWidget(self)
h = QHBoxLayout()
widd.setLayout(h)
h.setContentsMargins(0, 0, 0, 0)
l.addWidget(widd)
combo = QComboBox()
combo.setEditable(True)
search_on_enter = False
if (search_on_enter):
combo.lineEdit().returnPressed.connect(self.updateFilter)
else:
combo.lineEdit().textEdited.connect(self.updateFilter)
self.combo = combo
h.addWidget(combo, 1)
# XXX Have a scan/stop scan button?
if (False):
button = QPushButton("Scan")
h.addStretch()
h.addWidget(button, 0)
self.scan_button = button
entries = []
model = TableModel(entries, ["Name", "Path", "Size", "Date"])
self.model = model
table = TableView()
# Table font is a bit larger than regular, use the same as in the
# combobox
font = QFont(table.font().family(), combo.font().pointSize())
table.setFont(font)
table.setModel(model)
table.setWordWrap(False)
# Set the sort indicator first before enabling sorting, so sorting only
# happens once, at enable time
table.horizontalHeader().setSortIndicator(2, Qt.DescendingOrder)
table.horizontalHeader().sortIndicatorChanged.connect(self.sortModel)
table.setSortingEnabled(True)
table.setSelectionBehavior(QTableView.SelectRows)
table.setTabKeyNavigation(False)
# Flag the table to be resized to content when the first rows are
# inserted. Resize only at startup, don't mess with the size set by the
# user after startup
self.resize_table_to_contents = True
# Set the name column to stretch if the wider is larger than the table
# Note this prevents resizing the name column, but other columns can be
# resized and the name column will pick up the slack
table.horizontalHeader().setSectionResizeMode(0, QHeaderView.Stretch)
table.doubleClicked.connect(table.launchWithPreferredApp)
table.filepathCopied.connect(lambda s: self.showMessage("Copied path %s" % s, 2000))
table.defaultAppLaunched.connect(lambda s: self.showMessage("Launched %s" % s, 2000))
model.rowsInserted.connect(self.onRowsInserted)
self.table = table
l.addWidget(table)
frame_style = QFrame.WinPanel | QFrame.Sunken
# Can't set sunken style on QStatusBar.showMessage, use a widget and
# reimplement showMessage and clearMessage
timer = QTimer()
timer.setSingleShot(True)
timer.timeout.connect(self.clearMessage)
self.status_message_timer = timer
self.status_message_widget = QLabel()
self.status_message_widget.setFrameStyle(frame_style)
self.statusBar().addWidget(self.status_message_widget, 1)
self.status_widget = QLabel()
self.status_widget.setFrameStyle(frame_style)
self.statusBar().addPermanentWidget(self.status_widget)
self.status_count_widget = QLabel()
self.status_count_widget.setFrameStyle(frame_style)
self.statusBar().addPermanentWidget(self.status_count_widget)
self.updateStatusBar()
# XXX Looks like there are two ways of doing Qt threads, investigate more:
# a) Derive Qthread, reimplement run
# b) Create a worker, move (reparent) to the thread and tie many
# cleanup signals
# c) Use a Python thread
#
# There's some discussion on whether a) is the wrong approach because
# the QThread belongs to the current thread, not the started thread.
# Also whether a) queues connections or not by default (it's also
# said that PyQt5 doesn't queue by default even with the worker
# approach anyway).
#
# Some discussions also say that signal and slots or even Qt functions
# cannot be used on c)
# See https://doc.qt.io/qt-5/qthread.html#details
# See https://realpython.com/python-pyqt-qthread/#multithreading-in-pyqt-with-qthread
#
# The worker approach doesn't allow debugging in vscode (but the QThread
# does?), one workaround is to call the run method serially or to add
# import debugpy; debug_this_thread()
# https://stackoverflow.com/questions/71834240/how-to-debug-pyqt5-threads-in-visual-studio-code
# https://code.visualstudio.com/docs/python/debugging#_troubleshooting
self.thread = QThread()
self.worker = Worker()
# Step 4: Move worker to the thread
self.worker.moveToThread(self.thread)
# Step 5: Connect signals and slots
self.thread.started.connect(self.worker.run)
connection_type = Qt.AutoConnection
self.worker.started.connect(lambda s: self.status_widget.setText("%s" % s), connection_type)
self.worker.finished.connect(self.thread.quit)
self.worker.finished.connect(lambda : self.status_widget.setText("Idle"), connection_type)
self.worker.finished.connect(self.clearMessage, connection_type)
self.worker.finished.connect(self.worker.deleteLater)
self.thread.finished.connect(self.thread.deleteLater)
self.worker.traversing.connect(self.showMessage, connection_type)
# XXX Setting Idle priority doesn't seem to make any difference to the
# UI freezes, docs say in Linux priority is not supported?
self.thread.start(QThread.IdlePriority)
info("done initialization")
def showMessage(self, msg, timeout_ms=0):
self.status_message_timer.stop()
self.status_message_widget.setText(msg)
if (timeout_ms > 0):
self.status_message_timer.start(timeout_ms)
def clearMessage(self):
self.status_message_widget.setText("")
def onRowsInserted(self, index, start, end):
dbg("onRowsInserted %d %d %d %d", index.row(), index.column(), start, end)
if (self.resize_table_to_contents):
info("resizing using %d loaded rows", self.table.model().loadedRowCount())
self.table.resizeColumnsToContents()
self.table.resizeRowsToContents()
# Now that there's a minimum row height, set that one as default for
# future rows
self.table.verticalHeader().setDefaultSectionSize(self.table.rowHeight(0))
self.resize_table_to_contents = False
self.updateStatusBar()
def sortModel(self, section, sort_order):
# XXX This could preserve the selection and focus by saving before sort
# and restoring aftersort?
self.showMessage("Sorting...")
self.model.sort(section, sort_order)
self.clearMessage()
def updateStatusBar(self):
# Display a "?" indicator if there are still rows to load
c = "" if self.model.end_of_cursor else "?"
self.status_count_widget.setText("%d%s/%d" % (
self.model.loadedRowCount(),
c,
self.model.totalRowCount()
))
def updateFilter(self):
filter = self.combo.lineEdit().text()
self.showMessage("Filtering...")
self.model.setFilter(filter)
self.clearMessage()
self.updateStatusBar()
def is_enoent(e):
# XXX Missing protecting against:
# Linux:
# host is down errno 112 (Linux) EHOSTDOWN maybe also EHOSTUNREACH?
# Windows:
# network path not found WindowsError.winerror 53. Note that the
# errno for this one is ENOENT
return (
(e.errno == errno.ENOENT) and
# WinError doesn't exist on Unix, guard against that
((e.__class__.__name__ != "WindowsError") or (e.winerror != 53))
)
class Worker(QObject):
traversing = pyqtSignal(str)
finished = pyqtSignal()
started = pyqtSignal(str)
def update_db_subdir(self, conn, read_cursor, subdirpath, row):
"""
SQLite Important isolation behavior:
"Changes made in one database connection are invisible to all other
database connections prior to commit."
"A query sees all changes that are completed on the same database
connection prior to the start of the query, regardless of whether or not
those changes have been committed."
"If changes occur on the same database connection after a query starts
running but before the query completes, then the query might return a
changed row more than once, or it might return a row that was previously
deleted."
"If an application issues a SELECT statement on a single table like
"SELECT rowid, * FROM table WHERE ..." ... "it is safe for the
application to delete the current row or any prior row using "DELETE
FROM table WHERE rowid=?""
"Within a single database connection X, a SELECT statement always sees
all changes to the database that are completed prior to the start of the
SELECT statement, whether committed or uncommitted"
"WAL mode permits simultaneous readers and writers. It can do this
because changes do not overwrite the original database file, but rather
go into the separate write-ahead log file. That means that readers can
continue to read the old, original, unaltered content from the original
database file at the same time that the writer is appending to the
write-ahead log." (this is for diferent connections, the on isolation on
same connection rule still holds)
See https://www.sqlite.org/isolation.html
The current option is for the outer query to use a readonly cursor with
a different connection (which preserves the original cursor until commit
happens) and to use WAL mode.
Using a different connection also requires to defer the commit until the
whole dirpath (not just the subdirpath) has been updated
Alternatively, insertions and deletions could be stored in Python lists
and deferred until the end of the dirpath or some batch size.
"""
dbg("update_db_subdir %r %s", subdirpath, row)
# XXX This is missing notifying the view of updates, for now the user
# will have to update the filter so the view is refreshed with the
# new DB contents
# XXX The filesystem access (listdir, getsize, getmtime) should happen
# on multiple threads and a single thread should collect those and
# update the database, or collect the database updates and send them
# to the database writer thread
# XXX db updates should be moved to a different process, they cause UI
# stalls when ingesting remote directories with lots of files.
# This is a hotpath when no updates are found but at least on laptop
# this emit makes no difference at all even with lots of subdirs
self.traversing.emit(subdirpath)
# Get the mtime for this specific path, we could use the mtime for the
# global dirpath, but that one is not updated in the database until the
# whole dirpath has been updated, so using this specific path allows fine
# grain committing and avoiding doing the work again if it's aborted for
# some reason.
subdirpath_max_mtime = conn.execute("SELECT mtime FROM files WHERE ((path == ?) AND (name = ?))",
[os.path.dirname(subdirpath), os.path.basename(subdirpath)]).fetchone()
if (subdirpath_max_mtime is None):
# This is None when a directory was deleted from the filesystem,
# detected and deleted from the database when traversin the parent,
# but children are still around in the database so the directory
# is picked up again in the outer loop.
# XXX The outer loop should only pick directories and not try to
# work them out from files? (leftover from when only files were
# stored)
info("None SELECT mtime, deleting children for subdirpath %r", subdirpath)
subdirpath_max_mtime = 0
# Fall through, this will hit two exceptions below, one to getmtime,
# the other to listdir and proceed to delete all children one by
# one
else:
# Note this could be 0 if this directory was never traversed so the
# db entry has zero to force the traversal
subdirpath_max_mtime = subdirpath_max_mtime[0]
# This triggers an exception if the subdirpath has been removed, in that
# case the subdirpath was already deleted from the database, but don't
# early exit since the children still need to be deleted
# XXX Delete all children by using a single prefix query? (but needs to
# redo the outer query or will still be found here again)
# XXX This time was already recovered somewhere, find where and don't
# fetch it?
try:
dbg("getmtiming sd %r", subdirpath)
subdirpath_mtime = int(os.path.getmtime(subdirpath) * 1000.0)
dbg("getmtimed sd %r", subdirpath)
except OSError as e:
exc("Error %r calling getmtime for subdirpath %r, raising if not ENOENT %d vs %d",
e, subdirpath, e.errno, errno.ENOENT)
if (not is_enoent(e)):
# XXX Raising here when not ENOENT (eg temporary network
# error) prevents deleting valid entries from the
# database which is good, but will abort the program.
# Trap, backoff, and retry instead
raise
info("ENOENT for getmtime, deleting children for subdirpath %r", subdirpath)
subdirpath_mtime = subdirpath_max_mtime + 1
# XXX Note testing the subdirpath mtime is not robust enough, will fail
# to update when only file sizes or attributes have been modified
# XXX Verify that the following updates cause a newer parent dir
# date
# - file/subdir created/deleted
# - file/subdir modified (name, size or attributes)
# XXX Looks like only creation/deletion modifies the parent
# directory date, renames only modify the parent
# See https://stackoverflow.com/questions/1025187/rules-for-date-modified-of-folders-in-windows-explorer
# See https://web.archive.org/web/20080219020154/http://support.microsoft.com/kb/299648
if (subdirpath_mtime > subdirpath_max_mtime):
try:
# XXX This fails with long paths on Windows, need to use long
# path prefix, see
# https://stackoverflow.com/questions/18390341/unable-to-locate-files-with-long-names-on-windows-with-python
dbg("listdiring %r", subdirpath)
filenames = os.listdir(subdirpath)
dbg("listdired %r", subdirpath)
filenames.sort()
except OSError as e:
exc("Error %r calling listdir for subdirpath %r, raising if not ENOENT %d vs %d",
e, subdirpath, e.errno, errno.ENOENT)
if (not is_enoent(e)):
# XXX Raising here when not ENOENT (eg temporary network
# error) prevents deleting valid entries from the
# database which is good, but will abort the program.
# Trap, backoff, and retry instead
raise
info("ENOENT for listdir, deleting children for subdirpath %r", subdirpath)
filenames = []
i_filename = 0
refresh_read_cursor = False
while (True):
done_with_filenames = (i_filename >= len(filenames))
done_with_rows = ((row is None) or (row[1] != subdirpath))
if (done_with_filenames and done_with_rows):
break
if (done_with_filenames):
# done with filenames, the remaining rows with the same subdirpath
# have been deleted
comp = 1
elif (done_with_rows):
# Done with the rows, the rest of the filenames need to be
# inserted
comp = -1
else:
dbg("comp %r vs %r", filenames[i_filename], row[0])
comp = cmp(filenames[i_filename], row[0])
if (comp == 0):
# Common case, no update, just increment row and filename Note