diff --git a/Bandage.pro b/Bandage.pro index 8dd0217d..95b5d563 100644 --- a/Bandage.pro +++ b/Bandage.pro @@ -29,6 +29,7 @@ INCLUDEPATH += ui SOURCES += \ program/main.cpp\ + program/dotplot.cpp \ program/settings.cpp \ program/globals.cpp \ program/graphlayoutworker.cpp \ @@ -119,6 +120,7 @@ SOURCES += \ HEADERS += \ program/settings.h \ + program/dotplot.h \ program/globals.h \ program/graphlayoutworker.h \ graph/debruijnnode.h \ diff --git a/BandageTests.pro b/BandageTests.pro index dcfb5d9a..adbc84e0 100644 --- a/BandageTests.pro +++ b/BandageTests.pro @@ -33,6 +33,7 @@ SOURCES += \ program/settings.cpp \ program/globals.cpp \ program/graphlayoutworker.cpp \ + program/dotplot.cpp \ graph/debruijnnode.cpp \ graph/debruijnedge.cpp \ graph/graphicsitemnode.cpp \ @@ -123,6 +124,7 @@ HEADERS += \ program/settings.h \ program/globals.h \ program/graphlayoutworker.h \ + program/dotplot.h \ graph/debruijnnode.h \ graph/debruijnedge.h \ graph/graphicsitemnode.h \ diff --git a/command_line/image.cpp b/command_line/image.cpp index 13442d0c..42956c3a 100644 --- a/command_line/image.cpp +++ b/command_line/image.cpp @@ -128,7 +128,7 @@ int bandageImage(QStringList arguments) std::vector startingNodes = g_assemblyGraph->getStartingNodes(&errorTitle, &errorMessage, g_settings->doubleMode, g_settings->startingNodes, - "all"); + "all", ""); QString errormsg; QStringList columns; diff --git a/command_line/reduce.cpp b/command_line/reduce.cpp index 5be3c87c..735a5edb 100644 --- a/command_line/reduce.cpp +++ b/command_line/reduce.cpp @@ -105,7 +105,7 @@ int bandageReduce(QStringList arguments) std::vector startingNodes = g_assemblyGraph->getStartingNodes(&errorTitle, &errorMessage, g_settings->doubleMode, g_settings->startingNodes, - "all"); + "all", ""); if (errorMessage != "") { diff --git a/graph/assemblygraph.cpp b/graph/assemblygraph.cpp index 3da3e7b7..d16541a6 100644 --- a/graph/assemblygraph.cpp +++ b/graph/assemblygraph.cpp @@ -66,13 +66,26 @@ AssemblyGraph::~AssemblyGraph() void AssemblyGraph::cleanUp() { - QMapIterator i(m_deBruijnGraphNodes); - while (i.hasNext()) { - i.next(); - delete i.value(); + QMapIterator i(m_deBruijnGraphPaths); + while (i.hasNext()) + { + i.next(); + delete i.value(); + } + m_deBruijnGraphPaths.clear(); + } + + + { + QMapIterator i(m_deBruijnGraphNodes); + while (i.hasNext()) + { + i.next(); + delete i.value(); + } + m_deBruijnGraphNodes.clear(); } - m_deBruijnGraphNodes.clear(); QMapIterator, DeBruijnEdge*> j(m_deBruijnGraphEdges); while (j.hasNext()) @@ -396,6 +409,7 @@ void AssemblyGraph::determineGraphInfo() m_edgeCount = edgeCount; m_totalLength = totalLength; m_meanDepth = getMeanDepth(); + m_pathCount = m_deBruijnGraphPaths.size(); std::sort(nodeDepths.begin(), nodeDepths.end()); @@ -579,6 +593,7 @@ void AssemblyGraph::buildDeBruijnGraphFromGfa(QString fullFileName, bool *unsupp QMap colours; QMap labels; + QMap paths; QTextStream in(&inputFile); while (!in.atEnd()) { @@ -770,6 +785,14 @@ void AssemblyGraph::buildDeBruijnGraphFromGfa(QString fullFileName, bool *unsupp *unsupportedCigar = true; } } + + // Load paths + else if (lineParts.at(0) == "P") { + if (lineParts.size() < 4) + continue; + + paths.insert(lineParts.at(1), lineParts.at(2)); + } } //Pair up reverse complements, creating them if necessary. @@ -804,6 +827,22 @@ void AssemblyGraph::buildDeBruijnGraphFromGfa(QString fullFileName, bool *unsupp int overlap = edgeOverlaps[i]; createDeBruijnEdge(node1Name, node2Name, overlap, EXACT_OVERLAP); } + + // Create all the paths. + QMapIterator p(paths); + while (p.hasNext()) { + p.next(); + QString pathName = p.key(); + + QString pathStringFailure; + Path pp = Path::makeFromString(p.value(), false, &pathStringFailure); + + if (pp.isEmpty()) { + std::cout << pathStringFailure.toUtf8().constData() << std::endl; + } else { + m_deBruijnGraphPaths.insert(pathName, new Path(pp)); + } + } } if (m_deBruijnGraphNodes.size() == 0) @@ -2002,7 +2041,7 @@ void AssemblyGraph::addGraphicsItemsToScene(MyGraphicsScene * scene) std::vector AssemblyGraph::getStartingNodes(QString * errorTitle, QString * errorMessage, bool doubleMode, - QString nodesList, QString blastQueryName) + QString nodesList, QString blastQueryName, QString pathName) { std::vector startingNodes; @@ -2062,6 +2101,16 @@ std::vector AssemblyGraph::getStartingNodes(QString * errorTitle } } + else if (g_settings->graphScope == AROUND_PATHS) + { + if (m_deBruijnGraphPaths.count(pathName) == 0) + { + *errorTitle = "Invalid path"; + *errorMessage = "No path with such name is loaded"; + return startingNodes; + } + } + g_settings->doubleMode = doubleMode; clearOgdfGraphAndResetNodes(); @@ -2072,6 +2121,12 @@ std::vector AssemblyGraph::getStartingNodes(QString * errorTitle else if (g_settings->graphScope == DEPTH_RANGE) startingNodes = getNodesInDepthRange(g_settings->minDepthRange, g_settings->maxDepthRange); + else if (g_settings->graphScope == AROUND_PATHS) { + QList nodes = m_deBruijnGraphPaths[pathName]->getNodes(); + + for (QList::iterator i = nodes.begin(); i != nodes.end(); ++i) + startingNodes.push_back(*i); + } return startingNodes; } @@ -3539,7 +3594,7 @@ void AssemblyGraph::getGraphComponentCountAndLargestComponentSize(int * componen QSet visitedNodes; QList< QList > connectedComponents; - + //Loop through all positive nodes. QMapIterator i(m_deBruijnGraphNodes); while (i.hasNext()) @@ -3548,12 +3603,12 @@ void AssemblyGraph::getGraphComponentCountAndLargestComponentSize(int * componen DeBruijnNode * v = i.value(); if (v->isNegativeNode()) continue; - + //If the node has not yet been visited, then it must be the start of a new connected component. if (!visitedNodes.contains(v)) { QList connectedComponent; - + QQueue q; q.enqueue(v); visitedNodes.insert(v); @@ -3576,9 +3631,9 @@ void AssemblyGraph::getGraphComponentCountAndLargestComponentSize(int * componen } connectedComponents.push_back(connectedComponent); - } + } } - + //Now that the list of connected components is built, we look for the //largest one (as measured by total node length). *componentCount = connectedComponents.size(); diff --git a/graph/assemblygraph.h b/graph/assemblygraph.h index 426e7532..fe598c0e 100644 --- a/graph/assemblygraph.h +++ b/graph/assemblygraph.h @@ -50,6 +50,8 @@ class AssemblyGraph : public QObject //pointers. QMap, DeBruijnEdge*> m_deBruijnGraphEdges; + QMap m_deBruijnGraphPaths; + ogdf::Graph * m_ogdfGraph; ogdf::EdgeArray * m_edgeArray; ogdf::GraphAttributes * m_graphAttributes; @@ -57,6 +59,7 @@ class AssemblyGraph : public QObject int m_kmer; int m_nodeCount; int m_edgeCount; + int m_pathCount; long long m_totalLength; long long m_shortestContig; long long m_longestContig; @@ -115,7 +118,8 @@ class AssemblyGraph : public QObject QString * errorMessage, bool doubleMode, QString nodesList, - QString blastQueryName); + QString blastQueryName, + QString pathName); bool checkIfStringHasNodes(QString nodesString); QString generateNodesNotFoundErrorMessage(std::vector nodesNotInGraph, diff --git a/program/dotplot.cpp b/program/dotplot.cpp new file mode 100644 index 00000000..44911b7e --- /dev/null +++ b/program/dotplot.cpp @@ -0,0 +1,175 @@ +/* + * dotplot.cpp + * + * Created on: Oct 16, 2017 + * Author: Ivan Sovic + * GitHub: @isovic + * Copyright: Ivan Sovic, 2017 + * Licence: MIT + * + * Simple tool that collects all kmer hits between + * two sequences. If drawn, this represents a dotplot + * between two sequences. Can be used for very simple + * mapping as well. + */ + +#include "dotplot.h" + +#include +#include +#include +#include + +const int8_t nuc_to_2bit[256] = { + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 0 - 15 + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 16 - 31 + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 32 - 47 + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 48 - 63 + 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, // 64 - 79 (A, C, G) + 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 80 - 95 (T) + 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, // 96 - 111 + 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 112 - 127 + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 128 - 143 + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 144 - 159 + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 160 - 176 + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 176 - 191 + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 192 - 208 + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 208 - 223 + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 224 - 239 + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 // 240 - 256 +}; + +const int8_t nuc_to_complement[256] = { + 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, // 0 - 15 + 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, // 16 - 31 + 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, // 32 - 47 + 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, // 48 - 63 + 78, 84, 78, 71, 78, 78, 78, 67, 78, 78, 78, 78, 78, 78, 78, 78, // 64 - 79 (A, C, G) + 78, 78, 78, 78, 65, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, // 80 - 95 (T) + 78, 84, 78, 71, 78, 78, 78, 67, 78, 78, 78, 78, 78, 78, 78, 78, // 96 - 111 + 78, 78, 78, 78, 65, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, // 112 - 127 + 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, // 128 - 143 + 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, // 144 - 159 + 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, // 160 - 176 + 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, // 176 - 191 + 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, // 192 - 208 + 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, // 208 - 223 + 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, // 224 - 239 + 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78 // 240 - 256 +}; + + +std::string reverseComplement(const std::string& seq) { + std::stringstream ss; + for (int32_t i = ((int32_t) seq.size()) - 1; i >= 0; i--) { + ss << nuc_to_complement[(int32_t) seq[i]]; + } + return ss.str(); +} + +std::vector hashKmers(const std::string& seq, int32_t k, bool seq_is_rev) { + std::vector ret; + + if (((int32_t) seq.size()) < k) { + return ret; + } + + if (k <= 0 || k >= 31) { + return ret; + } + + // Pre-scan the sequence and check whether it contains + // only [ACTG] bases. We do not allow other bases, because + // they will be packed as 2-bit values in a hash key. + for (size_t i = 0; i < seq.size(); i++) { + if (nuc_to_2bit[(int32_t) seq[i]] > 3) { + return ret; + } + } + + int64_t buff = 0x0; + int64_t buff_mask = (((int64_t) 1) << (2 * k)) - 1; // Clear the upper bits. + + ret.reserve(seq.size() - k + 1); + + // Initialize the buffer. + for (int32_t i = 0; i < (k - 1); i++) { + int64_t conv_val = nuc_to_2bit[(int32_t) seq[i]]; + buff = (((int64_t) buff) << 2) | (conv_val & 0x03); + } + + for (int32_t i = (k - 1); i < (int32_t) seq.size(); i++) { + // Update the buffer + int64_t conv_val = nuc_to_2bit[(int32_t) seq[i]]; + buff = (((int64_t) buff) << 2) | (conv_val & 0x03); + buff &= buff_mask; + + int32_t pos = (seq_is_rev == false) ? (i - k + 1) : (seq.size() - (i - k + 1) - 1); + ret.emplace_back(KmerPos(buff, pos)); + } + + return ret; +} + +std::vector findHits(const std::vector& sorted_kmers_seq1, const std::vector& sorted_kmers_seq2) { + int32_t k1 = 0, k2 = 0; + + int32_t n_kmers1 = sorted_kmers_seq1.size(); + int32_t n_kmers2 = sorted_kmers_seq2.size(); + + std::vector hits; + + // Check the sortedness of input. + for (size_t i = 1; i < sorted_kmers_seq1.size(); i++) { + if(sorted_kmers_seq1[i].kmer < sorted_kmers_seq1[i - 1].kmer) { + return hits; + } + } + for (size_t i = 1; i < sorted_kmers_seq2.size(); i++) { + if(sorted_kmers_seq2[i].kmer < sorted_kmers_seq2[i - 1].kmer) { + return hits; + } + } + + while (k1 < n_kmers1 && k2 < n_kmers2) { + while (k1 < n_kmers1 && sorted_kmers_seq1[k1].kmer < sorted_kmers_seq2[k2].kmer) { + k1 += 1; + } + if (k1 >= n_kmers1) { break; } + + while (k2 < n_kmers2 && sorted_kmers_seq2[k2].kmer < sorted_kmers_seq1[k1].kmer) { + k2 += 1; + } + if (k2 >= n_kmers2) { break; } + + // If the values are not the same, just keep on gliding. + if (sorted_kmers_seq1[k1].kmer != sorted_kmers_seq2[k2].kmer) { + continue; + } + + // Find n^2 exact hits. + for (int32_t i = k2; i < n_kmers2 && sorted_kmers_seq2[i].kmer == sorted_kmers_seq1[k1].kmer; i++) { + hits.emplace_back(KmerHit(sorted_kmers_seq1[k1].pos, sorted_kmers_seq2[i].pos)); + } + k1 += 1; + } + + return hits; +} + +std::vector findKmerMatches(const std::string& seq1, const std::string& seq2, int32_t k) { + auto kmers_seq1 = hashKmers(seq1, k, false); + auto kmers_seq2 = hashKmers(seq2, k, false); + auto kmers_seq2_rev = hashKmers(reverseComplement(seq2), k, true); + + std::sort(kmers_seq1.begin(), kmers_seq1.end(), [](const KmerPos& a, const KmerPos& b) { return (a.kmer < b.kmer || (a.kmer == b.kmer && a.pos < b.pos)); } ); + std::sort(kmers_seq2.begin(), kmers_seq2.end(), [](const KmerPos& a, const KmerPos& b) { return (a.kmer < b.kmer || (a.kmer == b.kmer && a.pos < b.pos)); } ); + std::sort(kmers_seq2_rev.begin(), kmers_seq2_rev.end(), [](const KmerPos& a, const KmerPos& b) { return (a.kmer < b.kmer || (a.kmer == b.kmer && a.pos < b.pos)); } ); + + auto hits = findHits(kmers_seq1, kmers_seq2); + auto hits_rev = findHits(kmers_seq1, kmers_seq2_rev); + + hits.insert(hits.end(), hits_rev.begin(), hits_rev.end()); + + return hits; +} diff --git a/program/dotplot.h b/program/dotplot.h new file mode 100644 index 00000000..2d33188e --- /dev/null +++ b/program/dotplot.h @@ -0,0 +1,58 @@ +/* + * dotplot.h + * + * Created on: Oct 16, 2017 + * Author: Ivan Sovic + * GitHub: @isovic + * Copyright: Ivan Sovic, 2017 + * Licence: MIT + * + * Simple tool that collects all kmer hits between + * two sequences. If drawn, this represents a dotplot + * between two sequences. Can be used for very simple + * mapping as well. + */ + +#ifndef SRC_PROGRAM_DOTPLOT_H_ +#define SRC_PROGRAM_DOTPLOT_H_ + +#include +#include +#include + +class KmerPos { + public: + KmerPos() : kmer(0), pos(0) {} + KmerPos(int64_t _kmer, int32_t _pos) : kmer(_kmer), pos(_pos) { } + bool operator==(const KmerPos& b) const { + return (this->kmer == b.kmer && this->pos == b.pos); + } + bool operator<(const KmerPos& b) const { + return (this->kmer < b.kmer || (this->kmer == b.kmer && this->pos < b.pos)); + } + + int64_t kmer; + int32_t pos; +}; + +struct KmerHit { + KmerHit() : x(0), y(0) { } + KmerHit(int32_t _x, int32_t _y) : x(_x), y(_y) { } + bool operator==(const KmerHit& b) const { + return (this->x == b.x && this->y == b.y); + } + bool operator<(const KmerHit& b) const { + return (this->x < b.x || (this->x == b.x && this->y < b.y)); + } + + int32_t x; + int32_t y; +}; + +std::string reverseComplement(const std::string& seq); +std::vector hashKmers(const std::string& seq, int32_t k, bool seq_is_rev); +std::vector findHits(const std::vector& sorted_kmers_seq1, const std::vector& sorted_kmers_seq2); +std::vector findKmerMatches(const std::string& seq1, const std::string& seq2, int32_t k); +std::vector findKmerMatches(const std::string& seq1, const std::string& seq2, int32_t k); + +#endif diff --git a/program/globals.h b/program/globals.h index 10b1bbbd..45f70025 100644 --- a/program/globals.h +++ b/program/globals.h @@ -34,7 +34,7 @@ class AssemblyGraph; enum NodeColourScheme {UNIFORM_COLOURS, RANDOM_COLOURS, DEPTH_COLOUR, BLAST_HITS_RAINBOW_COLOUR, BLAST_HITS_SOLID_COLOUR, CONTIGUITY_COLOUR, CUSTOM_COLOURS}; -enum GraphScope {WHOLE_GRAPH, AROUND_NODE, AROUND_BLAST_HITS, DEPTH_RANGE}; +enum GraphScope {WHOLE_GRAPH, AROUND_NODE, AROUND_PATHS, AROUND_BLAST_HITS, DEPTH_RANGE}; enum ContiguityStatus {STARTING, CONTIGUOUS_STRAND_SPECIFIC, CONTIGUOUS_EITHER_STRAND, MAYBE_CONTIGUOUS, NOT_CONTIGUOUS}; diff --git a/tests/bandagetests.cpp b/tests/bandagetests.cpp index 388cd427..92d05875 100644 --- a/tests/bandagetests.cpp +++ b/tests/bandagetests.cpp @@ -29,6 +29,7 @@ #include "../graph/debruijnedge.h" #include "../program/globals.h" #include "../command_line/commoncommandlinefunctions.h" +#include "../program/dotplot.h" class BandageTests : public QObject { @@ -58,7 +59,10 @@ private slots: void changeNodeDepths(); void blastQueryPaths(); void bandageInfo(); - + void testReverseComplement(); + void testHashKmers(); + void testFindHits(); + void testFindKmerMatches(); private: void createGlobals(); @@ -1430,7 +1434,6 @@ void BandageTests::blastQueryPaths() QCOMPARE(query7Paths.size(), 1); } - void BandageTests::bandageInfo() { int n50 = 0; @@ -1480,6 +1483,236 @@ void BandageTests::bandageInfo() QCOMPARE(9398, largestComponentLength); } +void BandageTests::testReverseComplement() +{ + QCOMPARE( reverseComplement("A"), std::string("T")); + QCOMPARE( reverseComplement("C"), std::string("G")); + QCOMPARE( reverseComplement("T"), std::string("A")); + QCOMPARE( reverseComplement("G"), std::string("C")); + QCOMPARE( reverseComplement(""), std::string("")); + QCOMPARE( reverseComplement("ACTG"), std::string("CAGT")); +} + +void BandageTests::testHashKmers() +{ + { // Test for an empty string. + std::string seq = ""; + int32_t k = 10; + bool seq_is_rev = false; + std::vector expected_kmers = { }; + std::vector kmers = hashKmers(seq, k, seq_is_rev); + QCOMPARE((int64_t) kmers.size(), (int64_t) 0); + } + + { // Test the behaviour when a non [ACTG] character is given. + std::string seq = "AAAAANAAAAA"; + int32_t k = 10; + bool seq_is_rev = false; + std::vector expected_kmers = { }; + std::vector kmers = hashKmers(seq, k, seq_is_rev); + QCOMPARE((int64_t) kmers.size(), (int64_t) 0); + } + + { // A simple normal test case. Entire seq should be only one kmer. + std::string seq = "AAAAAAAAAA"; + int32_t k = 10; + bool seq_is_rev = false; + std::vector expected_kmers = { KmerPos(0x0, 0) }; + std::vector kmers = hashKmers(seq, k, seq_is_rev); + QCOMPARE(kmers, expected_kmers); + } + + { // Similar to before, but 6 kmers. + std::string seq = "AAAAAAAAAAAAAAA"; + int32_t k = 10; + bool seq_is_rev = false; + std::vector expected_kmers = { KmerPos(0x0, 0), KmerPos(0x0, 1), + KmerPos(0x0, 2), KmerPos(0x0, 3), + KmerPos(0x0, 4), KmerPos(0x0, 5) }; + std::vector kmers = hashKmers(seq, k, seq_is_rev); + QCOMPARE(kmers, expected_kmers); + } + + { // Test the reverse complement. + std::string seq = "AAAAAAAAAA"; + int32_t k = 10; + bool seq_is_rev = true; + std::vector expected_kmers = { KmerPos(0x0, 9) }; + std::vector kmers = hashKmers(seq, k, seq_is_rev); + QCOMPARE(kmers, expected_kmers); + } + + { // Test what happens if an invalid kmer size is given. + std::string seq = "AAAAAAAAAA"; + int32_t k = 0; + bool seq_is_rev = false; + std::vector kmers = hashKmers(seq, k, seq_is_rev); + QCOMPARE((int64_t) kmers.size(), (int64_t) 0); + } + + { // Test for a normal sequence, but a more complex variation. + std::string seq = "ACTGAAAGACT"; + int32_t k = 10; + bool seq_is_rev = false; + std::vector expected_kmers = { KmerPos(0x1E021, 0), KmerPos(0x78087, 1) }; + std::vector kmers = hashKmers(seq, k, seq_is_rev); + QCOMPARE(kmers, expected_kmers); + } + + { // Similar as before, but in reverse. Hash keys should be same, but positions different. + std::string seq = "ACTGAAAGACT"; + int32_t k = 10; + bool seq_is_rev = true; + std::vector expected_kmers = { KmerPos(0x1E021, 10), KmerPos(0x78087, 9) }; + std::vector kmers = hashKmers(seq, k, seq_is_rev); + QCOMPARE(kmers, expected_kmers); + } + + { // Test a normal sequence and a normal (smaller) kmer size. + std::string seq = "ACTGAAAGACT"; + int32_t k = 4; + bool seq_is_rev = false; + std::vector expected_kmers = { KmerPos(0x1E, 0), KmerPos(0x78, 1), KmerPos(0xE0, 2), + KmerPos(0x80, 3), KmerPos(0x2, 4), KmerPos(0x8, 5), + KmerPos(0x21, 6), KmerPos(0x87, 7) }; + std::vector kmers = hashKmers(seq, k, seq_is_rev); + QCOMPARE(kmers, expected_kmers); + } +} + +void BandageTests::testFindHits() +{ + { // Test on empty inputs. + std::vector sorted_kmers_seq1 = { }; + std::vector sorted_kmers_seq2 = { }; + std::vector result = findHits(sorted_kmers_seq1, sorted_kmers_seq2); + QCOMPARE((int64_t) result.size(), (int64_t) 0); + } + + { // No hits are output if any of the input arrays are not sorted. + std::vector sorted_kmers_seq1 = {KmerPos(1, 0), KmerPos(0, 1)}; + std::vector sorted_kmers_seq2 = {KmerPos(0, 0), KmerPos(1, 1)}; + std::vector result = findHits(sorted_kmers_seq1, sorted_kmers_seq2); + QCOMPARE((int64_t) result.size(), (int64_t) 0); + } + + { /// Sorted but no hits. + std::vector sorted_kmers_seq1 = {KmerPos(2, 0), KmerPos(3, 1)}; + std::vector sorted_kmers_seq2 = {KmerPos(0, 0), KmerPos(1, 1)}; + std::vector result = findHits(sorted_kmers_seq1, sorted_kmers_seq2); + QCOMPARE((int64_t) result.size(), (int64_t) 0); + } + + { // There should be two hits. + std::vector sorted_kmers_seq1 = {KmerPos(0, 0), KmerPos(1, 1)}; + std::vector sorted_kmers_seq2 = {KmerPos(0, 0), KmerPos(1, 1)}; + std::vector result = findHits(sorted_kmers_seq1, sorted_kmers_seq2); + std::vector expected = {KmerHit(0, 0), KmerHit(1, 1)}; + QCOMPARE(result, expected); + } + + { // One is empty, the other is not. + std::vector sorted_kmers_seq1 = { }; + std::vector sorted_kmers_seq2 = {KmerPos(0, 0), KmerPos(1, 1)}; + std::vector result = findHits(sorted_kmers_seq1, sorted_kmers_seq2); + QCOMPARE((int64_t) result.size(), (int64_t) 0); + } + + { // One is empty, the other is not. + std::vector sorted_kmers_seq1 = {KmerPos(0, 0), KmerPos(1, 1)}; + std::vector sorted_kmers_seq2 = { }; + std::vector result = findHits(sorted_kmers_seq1, sorted_kmers_seq2); + QCOMPARE((int64_t) result.size(), (int64_t) 0); + } + + { // Only a subset matches. + std::vector sorted_kmers_seq1 = {KmerPos(3, 1), KmerPos(4, 2)}; + std::vector sorted_kmers_seq2 = {KmerPos(0, 0), KmerPos(1, 1), + KmerPos(3, 3), KmerPos(4, 7), + KmerPos(5, 8) }; + std::vector result = findHits(sorted_kmers_seq1, sorted_kmers_seq2); + std::vector expected = {KmerHit(1, 3), KmerHit(2, 7)}; + QCOMPARE(result, expected); + } + + { // Test multiple successive hits. + std::vector sorted_kmers_seq1 = { KmerPos(2, 0), KmerPos(3, 1), KmerPos(4, 2) }; + std::vector sorted_kmers_seq2 = { KmerPos(0, 0), KmerPos(1, 1), + KmerPos(3, 3), KmerPos(3, 7), + KmerPos(3, 8), KmerPos(5, 9) }; + std::vector result = findHits(sorted_kmers_seq1, sorted_kmers_seq2); + std::vector expected = { KmerHit(1, 3), KmerHit(1, 7), KmerHit(1, 8)}; + QCOMPARE(result, expected); + } +} + +void BandageTests::testFindKmerMatches() +{ + { // Test a simple basic match case. + std::string seq1 = "CT"; + std::string seq2 = "CT"; + int32_t k = 2; + std::vector result = findKmerMatches(seq1, seq2, k); + std::vector expected = {KmerHit(0, 0)}; + QCOMPARE(result, expected); + } + + { // Test a case with no hits. + std::string seq1 = "AAAAA"; + std::string seq2 = "CT"; + int32_t k = 2; + std::vector result = findKmerMatches(seq1, seq2, k); + std::vector expected = { }; + QCOMPARE((int64_t) result.size(), (int64_t) expected.size()); + } + + { // Test a normal one-hit case. + std::string seq1 = "ACTTGGGA"; + std::string seq2 = "CT"; + int32_t k = 2; + std::vector result = findKmerMatches(seq1, seq2, k); + std::vector expected = {KmerHit(1, 0)}; + QCOMPARE((int64_t) result.size(), (int64_t) expected.size()); + QCOMPARE(result, expected); + } + + { // Test matching empty sequences. + std::string seq1 = ""; + std::string seq2 = ""; + int32_t k = 10; + std::vector result = findKmerMatches(seq1, seq2, k); + std::vector expected = { }; + QCOMPARE((int64_t) result.size(), (int64_t) expected.size()); + QCOMPARE(result, expected); + } + + { // Test finding of hits in a larger sequence. + std::string seq1 = "CTCGCACTTGGGGAATCGCGCAGACCTCACCCGGTTTGCAGGCTTGCGCCGGGCGGTAGATGCGCCGCCAGGCGAAAAACAGCGCGACCAGCGCTGCGCC"; + std::string seq2 = "CTCGCACTTG" "AGACCTCACC" "TAGATGCGCCG"; + // Reverse complement: + // CGGCGCATCTA GGTGAGGTCT CAAGTGCGAG + int32_t k = 10; + std::vector result = findKmerMatches(seq1, seq2, k); + std::vector expected = {KmerHit(0, 0), KmerHit(21, 10), + KmerHit(56, 20), KmerHit(57, 21) }; + std::sort(result.begin(), result.end()); + std::sort(expected.begin(), expected.end()); + QCOMPARE((int64_t) result.size(), (int64_t) expected.size()); + QCOMPARE(result, expected); + } + + { // Test the reverse complement matching. + std::string seq1 = "CTCGCACTTG"; + std::string seq2 = "CAAGTGCGAG"; + int32_t k = 10; + std::vector result = findKmerMatches(seq1, seq2, k); + std::vector expected = {KmerHit(0, 9) }; + std::sort(result.begin(), result.end()); + std::sort(expected.begin(), expected.end()); + QCOMPARE((int64_t) result.size(), (int64_t) expected.size()); + QCOMPARE(result, expected); + } +} diff --git a/ui/mainwindow.cpp b/ui/mainwindow.cpp index 67aec68e..c7f08d6d 100644 --- a/ui/mainwindow.cpp +++ b/ui/mainwindow.cpp @@ -67,6 +67,7 @@ #include "changenodedepthdialog.h" #include #include "graphinfodialog.h" +#include "program/dotplot.h" MainWindow::MainWindow(QString fileToLoadOnStartup, bool drawGraphAfterLoad) : QMainWindow(0), @@ -156,6 +157,7 @@ MainWindow::MainWindow(QString fileToLoadOnStartup, bool drawGraphAfterLoad) : connect(ui->setNodeCustomLabelButton, SIGNAL(clicked()), this, SLOT(setNodeCustomLabel())); connect(ui->actionSettings, SIGNAL(triggered()), this, SLOT(openSettingsDialog())); connect(ui->selectNodesButton, SIGNAL(clicked()), this, SLOT(selectUserSpecifiedNodes())); + connect(ui->pathSelectButton, SIGNAL(clicked()), this, SLOT(selectPathNodes())); connect(ui->selectionSearchNodesLineEdit, SIGNAL(returnPressed()), this, SLOT(selectUserSpecifiedNodes())); connect(ui->actionAbout, SIGNAL(triggered()), this, SLOT(openAboutDialog())); connect(ui->blastSearchButton, SIGNAL(clicked()), this, SLOT(openBlastSearchDialog())); @@ -195,6 +197,7 @@ MainWindow::MainWindow(QString fileToLoadOnStartup, bool drawGraphAfterLoad) : connect(ui->actionChange_node_name, SIGNAL(triggered(bool)), this, SLOT(changeNodeName())); connect(ui->actionChange_node_depth, SIGNAL(triggered(bool)), this, SLOT(changeNodeDepth())); connect(ui->moreInfoButton, SIGNAL(clicked(bool)), this, SLOT(openGraphInfoDialog())); + connect(ui->drawDotplotButton, SIGNAL(clicked()), this, SLOT(drawDotplot())); connect(this, SIGNAL(windowLoaded()), this, SLOT(afterMainWindowShow()), Qt::ConnectionType(Qt::QueuedConnection | Qt::UniqueConnection)); } @@ -448,6 +451,7 @@ void MainWindow::loadGraph2(GraphFileType graphFileType, QString fullFileName) // to the default of 'Random colours'. if (!customColours && ui->coloursComboBox->currentIndex() == 6) ui->coloursComboBox->setCurrentIndex(0); + setupPathSelectionComboBox(); } catch (...) @@ -470,12 +474,14 @@ void MainWindow::displayGraphDetails() { ui->nodeCountLabel->setText(formatIntForDisplay(g_assemblyGraph->m_nodeCount)); ui->edgeCountLabel->setText(formatIntForDisplay(g_assemblyGraph->m_edgeCount)); + ui->pathCountLabel->setText(formatIntForDisplay(g_assemblyGraph->m_pathCount)); ui->totalLengthLabel->setText(formatIntForDisplay(g_assemblyGraph->m_totalLength)); } void MainWindow::clearGraphDetails() { ui->nodeCountLabel->setText("0"); ui->edgeCountLabel->setText("0"); + ui->pathCountLabel->setText("0"); ui->totalLengthLabel->setText("0"); } @@ -605,6 +611,7 @@ void MainWindow::graphScopeChanged() setStartingNodesWidgetVisibility(false); setNodeDistanceWidgetVisibility(false); setDepthRangeWidgetVisibility(false); + setPathSelectionWidgetVisibility(false); ui->graphDrawingGridLayout->addWidget(ui->nodeStyleInfoText, 1, 0, 1, 1); ui->graphDrawingGridLayout->addWidget(ui->nodeStyleLabel, 1, 1, 1, 1); @@ -620,6 +627,7 @@ void MainWindow::graphScopeChanged() setStartingNodesWidgetVisibility(true); setNodeDistanceWidgetVisibility(true); setDepthRangeWidgetVisibility(false); + setPathSelectionWidgetVisibility(false); ui->nodeDistanceInfoText->setInfoText("Nodes will be drawn if they are specified in the above list or are " "within this many steps of those nodes.

" @@ -645,11 +653,40 @@ void MainWindow::graphScopeChanged() break; case 2: + g_settings->graphScope = AROUND_PATHS; + + setStartingNodesWidgetVisibility(false); + setNodeDistanceWidgetVisibility(true); + setDepthRangeWidgetVisibility(false); + setPathSelectionWidgetVisibility(true); + + ui->nodeDistanceInfoText->setInfoText("Nodes will be drawn if they are specified in the above list or are " + "within this many steps of those nodes.

" + "A value of 0 will result in only the specified nodes being drawn. " + "A large value will result in large sections of the graph around " + "the specified nodes being drawn."); + + ui->graphDrawingGridLayout->addWidget(ui->pathSelectionInfoText, 1, 0, 1, 1); + ui->graphDrawingGridLayout->addWidget(ui->pathSelectionLabel, 1, 1, 1, 1); + ui->graphDrawingGridLayout->addWidget(ui->pathSelectionComboBox, 1, 2, 1, 1); + ui->graphDrawingGridLayout->addWidget(ui->nodeDistanceInfoText, 2, 0, 1, 1); + ui->graphDrawingGridLayout->addWidget(ui->nodeDistanceLabel, 2, 1, 1, 1); + ui->graphDrawingGridLayout->addWidget(ui->nodeDistanceSpinBox, 2, 2, 1, 1); + ui->graphDrawingGridLayout->addWidget(ui->nodeStyleInfoText, 3, 0, 1, 1); + ui->graphDrawingGridLayout->addWidget(ui->nodeStyleLabel, 3, 1, 1, 1); + ui->graphDrawingGridLayout->addWidget(ui->nodeStyleWidget, 3, 2, 1, 1); + ui->graphDrawingGridLayout->addWidget(ui->drawGraphInfoText, 4, 0, 1, 1); + ui->graphDrawingGridLayout->addWidget(ui->drawGraphButton, 4, 1, 1, 2); + + break; + + case 3: g_settings->graphScope = AROUND_BLAST_HITS; setStartingNodesWidgetVisibility(false); setNodeDistanceWidgetVisibility(true); setDepthRangeWidgetVisibility(false); + setPathSelectionWidgetVisibility(false); ui->nodeDistanceInfoText->setInfoText("Nodes will be drawn if they contain a BLAST hit or are within this " "many steps of nodes with a BLAST hit.

" @@ -668,12 +705,13 @@ void MainWindow::graphScopeChanged() break; - case 3: + case 4: g_settings->graphScope = DEPTH_RANGE; setStartingNodesWidgetVisibility(false); setNodeDistanceWidgetVisibility(false); setDepthRangeWidgetVisibility(true); + setPathSelectionWidgetVisibility(false); ui->graphDrawingGridLayout->addWidget(ui->minDepthInfoText, 1, 0, 1, 1); ui->graphDrawingGridLayout->addWidget(ui->minDepthLabel, 1, 1, 1, 1); @@ -717,7 +755,88 @@ void MainWindow::setDepthRangeWidgetVisibility(bool visible) ui->maxDepthLabel->setVisible(visible); ui->maxDepthSpinBox->setVisible(visible); } +void MainWindow::setPathSelectionWidgetVisibility(bool visible) +{ + ui->pathSelectionInfoText->setVisible(visible); + ui->pathSelectionLabel->setVisible(visible); + ui->pathSelectionComboBox->setVisible(visible); +} +void MainWindow::setupPathSelectionComboBox() { + ui->pathSelectionComboBox->clear(); + ui->pathSelectionComboBox2->clear(); + + QStringList comboBoxItems; + for (QMap::key_iterator it = g_assemblyGraph->m_deBruijnGraphPaths.keyBegin(); + it != g_assemblyGraph->m_deBruijnGraphPaths.keyEnd(); ++it) + { + comboBoxItems.push_back(*it); + } + + if (comboBoxItems.size() > 0) + { + ui->pathSelectionComboBox->addItems(comboBoxItems); + ui->pathSelectionComboBox->setEnabled(true); + ui->pathSelectionComboBox2->addItems(comboBoxItems); + ui->pathSelectionComboBox2->setEnabled(true); + } +} + + +void MainWindow::drawDotplotPoweredByLogo(double x, double y, double w) { + QPen pen; + pen.setWidth(0); + pen.setColor(QColor("#BBBBBB")); + QBrush brush(QColor("#BBBBBB")); + + QPen outline; + outline.setWidth(1); + outline.setColor(QColor("#888888")); + + m_dotplotScene->addRect(x + w, y, w, w, pen, brush); + m_dotplotScene->addRect(x + 3*w, y, w, w, pen, brush); + m_dotplotScene->addRect(x, y + w, 5 * w, w, pen, brush); + m_dotplotScene->addRect(x + w, y + 2 * w, w, w, pen, brush); + m_dotplotScene->addRect(x + 3*w, y + 2 * w, w, w, pen, brush); + m_dotplotScene->addRect(x, y + 3 * w, 5 * w, w, pen, brush); + m_dotplotScene->addRect(x, y + 4 * w, w, w, pen, brush); + m_dotplotScene->addRect(x + 2*w, y + 4 * w, w, w, pen, brush); + m_dotplotScene->addRect(x + 4*w, y + 4 * w, w, w, pen, brush); + + m_dotplotScene->addLine(x + 1 * w, y + 0 * w, x + 2 * w, y + 0 * w, outline); + m_dotplotScene->addLine(x + 2 * w, y + 0 * w, x + 2 * w, y + 1 * w, outline); + m_dotplotScene->addLine(x + 2 * w, y + 1 * w, x + 3 * w, y + 1 * w, outline); + m_dotplotScene->addLine(x + 3 * w, y + 1 * w, x + 3 * w, y + 0 * w, outline); + m_dotplotScene->addLine(x + 3 * w, y + 0 * w, x + 4 * w, y + 0 * w, outline); + m_dotplotScene->addLine(x + 4 * w, y + 0 * w, x + 4 * w, y + 1 * w, outline); + m_dotplotScene->addLine(x + 4 * w, y + 1 * w, x + 5 * w, y + 1 * w, outline); + m_dotplotScene->addLine(x + 5 * w, y + 1 * w, x + 5 * w, y + 2 * w, outline); + m_dotplotScene->addLine(x + 5 * w, y + 2 * w, x + 4 * w, y + 2 * w, outline); + m_dotplotScene->addLine(x + 4 * w, y + 2 * w, x + 4 * w, y + 3 * w, outline); + m_dotplotScene->addLine(x + 4 * w, y + 3 * w, x + 5 * w, y + 3 * w, outline); + m_dotplotScene->addLine(x + 5 * w, y + 3 * w, x + 5 * w, y + 5 * w, outline); + m_dotplotScene->addLine(x + 5 * w, y + 5 * w, x + 4 * w, y + 5 * w, outline); + m_dotplotScene->addLine(x + 4 * w, y + 5 * w, x + 4 * w, y + 4 * w, outline); + m_dotplotScene->addLine(x + 4 * w, y + 4 * w, x + 3 * w, y + 4 * w, outline); + m_dotplotScene->addLine(x + 3 * w, y + 4 * w, x + 3 * w, y + 5 * w, outline); + m_dotplotScene->addLine(x + 3 * w, y + 5 * w, x + 2 * w, y + 5 * w, outline); + m_dotplotScene->addLine(x + 2 * w, y + 5 * w, x + 2 * w, y + 4 * w, outline); + m_dotplotScene->addLine(x + 2 * w, y + 4 * w, x + 1 * w, y + 4 * w, outline); + m_dotplotScene->addLine(x + 1 * w, y + 4 * w, x + 1 * w, y + 5 * w, outline); + m_dotplotScene->addLine(x + 1 * w, y + 5 * w, x + 0 * w, y + 5 * w, outline); + m_dotplotScene->addLine(x + 0 * w, y + 5 * w, x + 0 * w, y + 3 * w, outline); + m_dotplotScene->addLine(x + 0 * w, y + 3 * w, x + 1 * w, y + 3 * w, outline); + m_dotplotScene->addLine(x + 1 * w, y + 3 * w, x + 1 * w, y + 2 * w, outline); + m_dotplotScene->addLine(x + 1 * w, y + 2 * w, x + 0 * w, y + 2 * w, outline); + m_dotplotScene->addLine(x + 0 * w, y + 2 * w, x + 0 * w, y + 1 * w, outline); + m_dotplotScene->addLine(x + 0 * w, y + 1 * w, x + 1 * w, y + 1 * w, outline); + m_dotplotScene->addLine(x + 1 * w, y + 1 * w, x + 1 * w, y + 0 * w, outline); + m_dotplotScene->addLine(x + 2 * w, y + 2 * w, x + 3 * w, y + 2 * w, outline); + m_dotplotScene->addLine(x + 3 * w, y + 2 * w, x + 3 * w, y + 3 * w, outline); + m_dotplotScene->addLine(x + 3 * w, y + 3 * w, x + 2 * w, y + 3 * w, outline); + m_dotplotScene->addLine(x + 2 * w, y + 3 * w, x + 2 * w, y + 2 * w, outline); + +} void MainWindow::drawGraph() { @@ -726,7 +845,8 @@ void MainWindow::drawGraph() std::vector startingNodes = g_assemblyGraph->getStartingNodes(&errorTitle, &errorMessage, ui->doubleNodesRadioButton->isChecked(), ui->startingNodesLineEdit->text(), - ui->blastQueryComboBox->currentText()); + ui->blastQueryComboBox->currentText(), + ui->pathSelectionComboBox->currentText()); if (errorMessage != "") { @@ -739,6 +859,158 @@ void MainWindow::drawGraph() layoutGraph(); } +void MainWindow::drawDotplot() +{ + // Fetch the selected nodes. + std::vector selectedNodes = m_scene->getSelectedNodes(); + + // Limit the number of nodes that need to be selected, and notify the user. + if (selectedNodes.size() < 1 || selectedNodes.size() > 2) { + QString infoTitle = "Draw dotplot"; + QString infoMessage = "Select either one (for self-dotplot) or two nodes to dotplot."; + QMessageBox::information(this, infoTitle, infoMessage); + return; + } + + // Placeholder for the sequences which will be dotplotted. + std::vector headers; + std::vector seqs; + + // Enable self-dotplots. + std::vector nodes_to_process = selectedNodes; + if (selectedNodes.size() == 1) { + nodes_to_process.push_back(selectedNodes[0]); + } + + // Get the sequences and the headers of the nodes to draw. + for (size_t i=0; isequenceIsMissing()) { + QString infoTitle = "Draw dotplot"; + QString infoMessage = "Error: The GFA node does not contain a valid sequence!"; + QMessageBox::information(this, infoTitle, infoMessage); + return; + } + + QByteArray nodeSequence = node->getSequence(); + QString nodeHeader = node->getName(); + std::string seq(nodeSequence.constData(), nodeSequence.length()); + std::string header = nodeHeader.toLocal8Bit().constData(); + + seqs.push_back(seq); + headers.push_back(header); + } + + int32_t k = ui->kmerSizeInput->value(); + + // Sanity check for the k-mer size. + if (k <= 0 || k > 30) { + QString infoTitle = "Draw dotplot"; + QString infoMessage = "Error: k-mer size should be > 0 and <= 30."; + QMessageBox::information(this, infoTitle, infoMessage); + return; + } + + // Calculate the dotplot. + auto hits = findKmerMatches(seqs[0], seqs[1], k); + + // Prepare the scene and plot. + m_dotplotScene = std::shared_ptr(new QGraphicsScene()); + ui->dotplotGraphicsView->setScene(m_dotplotScene.get()); + + // Calculate the starts and ends of the dotplot coordinate system. + int32_t max_len = std::max(seqs[0].size(), seqs[1].size()); + double begin_offset = 40; + double end_offset = 10; + double x_begin = begin_offset; + double y_begin = begin_offset; + double max_size = (float) std::min(ui->dotplotGraphicsView->maximumWidth(), ui->dotplotGraphicsView->maximumHeight()) - end_offset - begin_offset; + double scale = max_size / ((double) max_len); + double x_end = x_begin + seqs[0].size() * scale; + double y_end = y_begin + seqs[1].size() * scale; + + // Make the scene not move. + ui->dotplotGraphicsView->setVerticalScrollBarPolicy(Qt::ScrollBarAlwaysOff); + ui->dotplotGraphicsView->setHorizontalScrollBarPolicy(Qt::ScrollBarAlwaysOff); + m_dotplotScene->setSceneRect(0, 0, 290, 290); + + // Add bounds to the dotplot graph. + double overhang = 5; + m_dotplotScene->addLine(x_begin - overhang, y_begin, x_end, y_begin); + m_dotplotScene->addLine(x_end, y_begin - overhang, x_end, y_end); + m_dotplotScene->addLine(x_begin - overhang, y_end, x_end, y_end); + m_dotplotScene->addLine(x_begin, y_begin - overhang, x_begin, y_end); + + double logo_w = 2; + double logo_x = x_begin - 0 - 6 * logo_w; + double logo_y = x_begin - 0 - 6 * logo_w; + drawDotplotPoweredByLogo(logo_x, logo_y, logo_w); + + // Annotate the graph. + QFont font; + font.setPixelSize(8); + font.setFamily("Monospace"); + + { + QGraphicsTextItem *text = m_dotplotScene->addText(QString("%1").arg(seqs[0].size())); + text->setFont(font); + text->setPos(x_end - text->boundingRect().width(), y_begin - text->boundingRect().height()); + } + + { + QGraphicsTextItem *text = m_dotplotScene->addText(QString("%1").arg(0)); + text->setFont(font); + text->setPos(x_begin + 1, y_begin - text->boundingRect().height()); + } + + { + QGraphicsTextItem *text = m_dotplotScene->addText(QString("%1").arg(0)); + text->setFont(font); + text->setPos(x_begin - text->boundingRect().width(), y_begin); + } + + { + QGraphicsTextItem *text = m_dotplotScene->addText(QString("%1").arg(seqs[1].size())); + text->setFont(font); + text->setPos(x_begin - text->boundingRect().width(), y_end - text->boundingRect().height()); + } + + { + std::string trimmed_header = (headers[0].size() > 20) ? headers[0].substr(0, 20) : headers[0]; + QGraphicsTextItem *text = m_dotplotScene->addText(QString(trimmed_header.c_str())); + text->setFont(font); + text->setPos((x_end + x_begin - text->boundingRect().width()) / 2.0, y_begin - text->boundingRect().height()); + } + + { + std::string trimmed_header = (headers[1].size() > 20) ? (headers[1].substr(0, 20)) : (headers[1]); + QGraphicsTextItem *text = m_dotplotScene->addText(QString(trimmed_header.c_str())); + text->setFont(font); + QTransform t; + t.rotate(270); + text->setTransform(t); + text->setPos(x_begin - text->boundingRect().height(), (y_begin + y_end + text->boundingRect().width()) / 2.0); + } + + // Generate the actual dotplot. + QPen pen(Qt::black); + QBrush brush(Qt::black); + for (auto& hit: hits) { + m_dotplotScene->addEllipse(hit.x * scale + x_begin, hit.y * scale + y_begin, 2.0 * scale, 2.0 * scale, + pen, brush); + } + + // Scale the dotplot so it fits in the view with just a bit of margin. + QRectF sceneRectangle = m_dotplotScene->sceneRect(); + sceneRectangle.setWidth(sceneRectangle.width() * 1.05); + sceneRectangle.setHeight(sceneRectangle.height() * 1.05); + ui->dotplotGraphicsView->fitInView(sceneRectangle); + + ui->dotplotGraphicsView->show(); +} + + void MainWindow::graphLayoutFinished() { @@ -1446,44 +1718,44 @@ void MainWindow::openSettingsDialog() } } -void MainWindow::selectUserSpecifiedNodes() -{ - if (g_assemblyGraph->checkIfStringHasNodes(ui->selectionSearchNodesLineEdit->text())) - { - QMessageBox::information(this, "No starting nodes", - "Please enter at least one node when drawing the graph using the 'Around node(s)' scope. " - "Separate multiple nodes with commas."); - return; - } - - if (ui->selectionSearchNodesLineEdit->text().length() == 0) - { - QMessageBox::information(this, "No nodes given", "Please enter the numbers of the nodes to find, separated by commas."); - return; - } - +void MainWindow::doSelectNodes(const std::vector &nodesToSelect, + const std::vector &nodesNotInGraph, + bool recolor) { m_scene->blockSignals(true); m_scene->clearSelection(); - std::vector nodesNotInGraph; - std::vector nodesToSelect = getNodesFromLineEdit(ui->selectionSearchNodesLineEdit, - ui->selectionSearchNodesExactMatchRadioButton->isChecked(), - &nodesNotInGraph); //Select each node that actually has a GraphicsItemNode, and build a bounding //rectangle so the viewport can focus on the selected node. std::vector nodesNotFound; int foundNodes = 0; + QColor color1, color2; for (size_t i = 0; i < nodesToSelect.size(); ++i) { GraphicsItemNode * graphicsItemNode = nodesToSelect[i]->getGraphicsItemNode(); + GraphicsItemNode * rcgraphicsItemNode = nodesToSelect[i]->getReverseComplement()->getGraphicsItemNode(); //If the GraphicsItemNode isn't found, try the reverse complement. This //is only done for single node mode. if (graphicsItemNode == 0 && !g_settings->doubleMode) - graphicsItemNode = nodesToSelect[i]->getReverseComplement()->getGraphicsItemNode(); + graphicsItemNode = rcgraphicsItemNode; if (graphicsItemNode != 0) { + if (recolor) + { + if (i == 0) + { + color1 = graphicsItemNode->m_colour; + if (g_settings->doubleMode) + color2 = rcgraphicsItemNode->m_colour; + } else { + graphicsItemNode->m_colour = color1; + if (g_settings->doubleMode) + rcgraphicsItemNode->m_colour = color2; + } + + } + graphicsItemNode->setSelected(true); ++foundNodes; } @@ -1523,6 +1795,42 @@ void MainWindow::selectUserSpecifiedNodes() selectionChanged(); } +void MainWindow::selectUserSpecifiedNodes() +{ + if (g_assemblyGraph->checkIfStringHasNodes(ui->selectionSearchNodesLineEdit->text())) + { + QMessageBox::information(this, "No starting nodes", + "Please enter at least one node when drawing the graph using the 'Around node(s)' scope. " + "Separate multiple nodes with commas."); + return; + } + + if (ui->selectionSearchNodesLineEdit->text().length() == 0) + { + QMessageBox::information(this, "No nodes given", "Please enter the numbers of the nodes to find, separated by commas."); + return; + } + + std::vector nodesNotInGraph; + std::vector nodesToSelect = getNodesFromLineEdit(ui->selectionSearchNodesLineEdit, + ui->selectionSearchNodesExactMatchRadioButton->isChecked(), + &nodesNotInGraph); + + doSelectNodes(nodesToSelect, nodesNotInGraph); +} + +void MainWindow::selectPathNodes() +{ + std::vector nodesNotInGraph; + std::vector nodesToSelect; + + QList nodes = g_assemblyGraph->m_deBruijnGraphPaths[ui->pathSelectionComboBox2->currentText()]->getNodes(); + for (QList::iterator i = nodes.begin(); i != nodes.end(); ++i) + nodesToSelect.push_back(*i); + + doSelectNodes(nodesToSelect, nodesNotInGraph, ui->pathSelectionRecolorRadioButton->isChecked()); +} + void MainWindow::openAboutDialog() { @@ -2142,8 +2450,9 @@ void MainWindow::setGraphScopeComboBox(GraphScope graphScope) { case WHOLE_GRAPH: ui->graphScopeComboBox->setCurrentIndex(0); break; case AROUND_NODE: ui->graphScopeComboBox->setCurrentIndex(1); break; - case AROUND_BLAST_HITS: ui->graphScopeComboBox->setCurrentIndex(2); break; - case DEPTH_RANGE: ui->graphScopeComboBox->setCurrentIndex(3); break; + case AROUND_PATHS: ui->graphScopeComboBox->setCurrentIndex(2); break; + case AROUND_BLAST_HITS: ui->graphScopeComboBox->setCurrentIndex(3); break; + case DEPTH_RANGE: ui->graphScopeComboBox->setCurrentIndex(4); break; } } @@ -2301,7 +2610,7 @@ void MainWindow::webBlastSelectedNodes() QByteArray urlSafeFasta = makeStringUrlSafe(selectedNodesFasta); QByteArray url = "http://blast.ncbi.nlm.nih.gov/Blast.cgi?PROGRAM=blastn&PAGE_TYPE=BlastSearch&LINK_LOC=blasthome&QUERY=" + urlSafeFasta; - + if (url.length() < 8190) QDesktopServices::openUrl(QUrl(url)); diff --git a/ui/mainwindow.h b/ui/mainwindow.h index 803fb91b..580b8f8a 100644 --- a/ui/mainwindow.h +++ b/ui/mainwindow.h @@ -62,6 +62,7 @@ class MainWindow : public QMainWindow UiState m_uiState; BlastSearchDialog * m_blastSearchDialog; bool m_alreadyShown; + std::shared_ptr m_dotplotScene; void cleanUp(); void displayGraphDetails(); @@ -85,6 +86,7 @@ class MainWindow : public QMainWindow void setNodeColourSchemeComboBox(NodeColourScheme nodeColourScheme); void setGraphScopeComboBox(GraphScope graphScope); void setupBlastQueryComboBox(); + void setupPathSelectionComboBox(); bool checkForImageSave(); QString convertGraphFileTypeToString(GraphFileType graphFileType); void setSelectedNodesWidgetsVisibility(bool visible); @@ -92,6 +94,7 @@ class MainWindow : public QMainWindow void setStartingNodesWidgetVisibility(bool visible); void setNodeDistanceWidgetVisibility(bool visible); void setDepthRangeWidgetVisibility(bool visible); + void setPathSelectionWidgetVisibility(bool visible); static QByteArray makeStringUrlSafe(QByteArray s); void removeGraphicsItemNodes(const std::vector * nodes, bool reverseComplement); void removeGraphicsItemEdges(const std::vector * edges, bool reverseComplement); @@ -123,6 +126,10 @@ private slots: void hideNodes(); void openSettingsDialog(); void openAboutDialog(); + void doSelectNodes(const std::vector &nodesToSelect, + const std::vector &nodesNotInGraph, + bool recolor = false); + void selectPathNodes(); void selectUserSpecifiedNodes(); void graphLayoutFinished(); void openBlastSearchDialog(); @@ -160,6 +167,8 @@ private slots: void changeNodeName(); void changeNodeDepth(); void openGraphInfoDialog(); + void drawDotplotPoweredByLogo(double x, double y, double w); + void drawDotplot(); protected: void showEvent(QShowEvent *ev); diff --git a/ui/mainwindow.ui b/ui/mainwindow.ui index b20e0cf7..81d63e3e 100644 --- a/ui/mainwindow.ui +++ b/ui/mainwindow.ui @@ -121,6 +121,23 @@ + + + Paths: + + + Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter + + + + + + + 0 + + + + Total length: @@ -130,7 +147,7 @@ - + 0 @@ -383,6 +400,51 @@ + + + true + + + + 0 + 0 + + + + + 16 + 16 + + + + + + + + true + + + Path: + + + + + + + true + + + + 0 + 0 + + + + Qt::StrongFocus + + + + true @@ -401,7 +463,39 @@ - + + + + true + + + Distance: + + + + + + + true + + + + 0 + 0 + + + + Qt::StrongFocus + + + Qt::AlignCenter + + + 10000 + + + + @@ -417,7 +511,7 @@ - + @@ -483,7 +577,7 @@ - + @@ -496,28 +590,6 @@ - - - - true - - - - 0 - 0 - - - - Qt::StrongFocus - - - Qt::AlignCenter - - - 10000 - - - @@ -571,6 +643,11 @@ Around nodes + + + Around paths + + Around BLAST hits @@ -596,16 +673,6 @@ - - - - true - - - Distance: - - - @@ -622,7 +689,7 @@ - + Qt::StrongFocus @@ -638,7 +705,7 @@ - + @@ -670,7 +737,7 @@ - + @@ -683,7 +750,7 @@ - + @@ -699,7 +766,7 @@ - + @@ -712,7 +779,7 @@ - + Qt::StrongFocus @@ -1433,8 +1500,8 @@ 0 0 - 249 - 899 + 326 + 1204 @@ -1601,9 +1668,193 @@ + - - + + + + 0 + 0 + + + + + 75 + true + + + + Find paths + + + + + + + Qt::Horizontal + + + + + + + true + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + true + + + + 0 + 0 + + + + + 16 + 16 + + + + + + + + Qt::StrongFocus + + + Select + + + true + + + + + + + true + + + + 0 + 0 + + + + Path: + + + + + + + + 0 + 0 + + + + + 16 + 16 + + + + + + + + true + + + Action: + + + + + + + Qt::StrongFocus + + + Recolor + + + + + + + true + + + + 0 + 0 + + + + Qt::StrongFocus + + + + + + + + + + Qt::StrongFocus + + + Find path + + + + + + + + 0 + 0 + + + + + 75 + true + + + + Dotplot + + + + + + + Qt::Horizontal + + + + + + 0 @@ -1617,17 +1868,50 @@ 0 - + - Qt::Vertical + Qt::Horizontal + + + + 40 + 20 + + + + + + + + k-mer size: + + + + + + + Qt::StrongFocus + + + 1 + + + 30 - - QSizePolicy::Fixed + + 15 + + + + + + + Qt::Horizontal - 20 - 60 + 0 + 20 @@ -1635,6 +1919,86 @@ + + + + Qt::StrongFocus + + + Draw dotplot + + + + + + + 0 + + + + + Qt::Horizontal + + + + 0 + 20 + + + + + + + + + 300 + 300 + + + + + 300 + 300 + + + + QPainter::Antialiasing|QPainter::TextAntialiasing + + + + + + + Qt::Horizontal + + + + 0 + 20 + + + + + + + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + @@ -1788,22 +2152,6 @@ 0 - - - - Qt::Vertical - - - QSizePolicy::Fixed - - - - 20 - 60 - - - -