Skip to content

Commit

Permalink
refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
kosloot committed Oct 1, 2024
1 parent 3d61c83 commit 1d2ef7f
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 41 deletions.
30 changes: 15 additions & 15 deletions src/TICCL-chainclean.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -302,13 +302,13 @@ int main( int argc, char **argv ){
multimap<int,UnicodeString,std::greater<int>> desc_parts_freq;
// sort on highest frequency first.
// DOES IT REALLY MATTER???
for ( const auto& cc : parts_freq ){
desc_parts_freq.insert( make_pair(cc.second,cc.first) );
for ( const auto& [val,freq] : parts_freq ){
desc_parts_freq.insert( make_pair(freq,val) );
}
if ( verbosity > 0 ){
cerr << "The unknown parts:" << endl;
for ( const auto& it : desc_parts_freq ){
cerr << it.first << "\t" << it.second << endl;
for ( const auto& [val,freq] : desc_parts_freq ){
cerr << val << "\t" << freq << endl;
}
}

Expand Down Expand Up @@ -387,14 +387,14 @@ int main( int argc, char **argv ){
set<int> keys;
// sort on highest frequency first.
// DOES IT REALLY MATTER???
for ( const auto& cc : cc_freqs ){
keys.insert(cc.second);
desc_cc.insert( make_pair(cc.second,cc.first) );
for ( const auto& [val,key] : cc_freqs ){
keys.insert(key);
desc_cc.insert( make_pair(key,val) );
}
if ( show ){
cerr << "found " << desc_cc.size() << " CC's for: " << unk_part << endl;
for ( const auto& it : desc_cc ){
cerr << it.first << "\t" << it.second << endl;
for ( const auto& [key,val] : desc_cc ){
cerr << key << "\t" << val << endl;
}
}
map<int,vector<UnicodeString>,std::greater<int>> desc_cc_vec_map;
Expand All @@ -409,21 +409,21 @@ int main( int argc, char **argv ){
}
if ( show ){
cerr << "found " << cc_order.size() << " CC's for: " << unk_part << endl;
for ( const auto& it : desc_cc_vec_map ){
cerr << it.first << "\t" << it.second << endl;
for ( const auto& [val,vec] : desc_cc_vec_map ){
cerr << val << "\t" << vec << endl;
}
}
for ( const auto& dvm_it : desc_cc_vec_map ){
for ( const auto& [cc,vec] : desc_cc_vec_map ){
if ( show ){
cerr << "With frequency = " << dvm_it.first << endl;
cerr << "With frequency = " << cc << endl;
}
for ( const auto& dcc : dvm_it.second ){
for ( const auto& dcc : vec ){
UnicodeString cand_cor = dcc;
if ( caseless ){
cand_cor.toLower();
}
if ( show ){
cerr << "BEKIJK: " << cand_cor << "[" << dvm_it.first << "]" << endl;
cerr << "BEKIJK: " << cand_cor << "[" << cc << "]" << endl;
}
map<UnicodeString,int> uniq;
auto it = copy_chain_records.begin();
Expand Down
14 changes: 7 additions & 7 deletions src/TICCL-lexstat.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,13 @@ void create_output( const string& name,
multimap<size_t,UChar> reverse;
bitType count = 0;
bitType out_count = 0;
for ( const auto& it : chars ){
if ( clip >= 0 && it.second < (size_t)clip ){
out_count += it.second;
for ( const auto& [ch,freq] : chars ){
if ( clip >= 0 && freq < (size_t)clip ){
out_count += freq;
}
else {
count += it.second;
reverse.insert( make_pair(it.second,it.first) );
count += freq;
reverse.insert( make_pair(freq,ch) );
}
}
os << "## Alphabetsize: " << reverse.size() + (separator.isEmpty()?0:1)
Expand Down Expand Up @@ -112,9 +112,9 @@ void create_dia_file( const string& filename,
const map<UChar,size_t>& chars,
const map<UnicodeString,bitType>& hashes ){
ofstream os( filename );
for ( const auto& it : chars ){
for ( const auto& [c,freq] : chars ){
UnicodeString us;
us += it.first;
us += c;
UnicodeString ss = TiCC::filter_diacritics( us );
if ( ss != us ){
auto hit = hashes.find( us );
Expand Down
14 changes: 7 additions & 7 deletions src/TICCL-stats.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -58,19 +58,19 @@ void create_wf_list( const map<UnicodeString, unsigned int>& wc,
cerr << "failed to create outputfile '" << filename << "'" << endl;
exit(EXIT_FAILURE);
}
map<unsigned int, set<UnicodeString> > wf;
for ( const auto& cit : wc ){
if ( cit.second <= clip ){
total -= cit.second;
map<unsigned int, set<UnicodeString> > fws;
for ( const auto& [word,freq] : wc ){
if ( freq <= clip ){
total -= freq;
}
else {
wf[cit.second].insert( cit.first );
fws[freq].insert( word );
}
}
unsigned int sum=0;
unsigned int types=0;
auto wit = wf.rbegin();
while ( wit != wf.rend() ){
auto wit = fws.rbegin();
while ( wit != fws.rend() ){
for( const auto& sit : wit->second ){
sum += wit->first;
os << sit << "\t" << wit->first;
Expand Down
24 changes: 12 additions & 12 deletions src/word2vec.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ bool wordvec_tester::fill( const string& name ){
for ( size_t i = 0; i < _dim; ++i ){
vec[i] /= len;
}
// ansd insert in the vocabulary
// and insert in the vocabulary
vocab.insert( make_pair( word, vec ) );
}
fclose(f);
Expand Down Expand Up @@ -121,18 +121,18 @@ bool wordvec_tester::lookup( const string& sentence, size_t num_vec,
// keep de 'num_vec' largest

result.resize( num_vec, {"", 0.0 } );
for ( const auto& it: vocab ) {
for ( const auto& [word,p_vec]: vocab ) {
bool hit = false;
for ( const auto& w_it : words ){
if ( w_it == it.first ){
if ( w_it == word ){
hit = true;
break;
}
}
if ( hit ) continue;
float dist = 0;
for ( size_t a = 0; a < _dim; ++a ){
dist += vec[a] * it.second[a];
dist += vec[a] * p_vec[a];
}
for ( size_t a = 0; a < num_vec; ++a ) {
if (dist > result[a].d ) {
Expand All @@ -141,7 +141,7 @@ bool wordvec_tester::lookup( const string& sentence, size_t num_vec,
for ( size_t d = num_vec - 1; d > a; d--) {
result[d] = result[d-1];
}
result[a].w = it.first;
result[a].w = word;
result[a].d = dist;
break;
}
Expand Down Expand Up @@ -194,16 +194,16 @@ bool wordvec_tester::analogy( const vector<string>& words,
// now compare with ALL the vectors in de vocabulary
// keep de 'num_vec' largest
result.resize( num_vec, {"", 0.0 } );
for ( const auto& it: vocab ) {
if ( it.first == it0->first
|| it.first == it1->first
|| it.first == it2->first ){
// cerr << "skip " << it.first << endl;
for ( const auto& [word,p_vec]: vocab ) {
if ( word == it0->first
|| word == it1->first
|| word == it2->first ){
// cerr << "skip " << word << endl;
continue;
}
float dist = 0;
for ( size_t a = 0; a < _dim; ++a ){
dist += vec[a] * it.second[a];
dist += vec[a] * p_vec[a];
}
for ( size_t a = 0; a < num_vec; ++a ) {
if (dist > result[a].d ) {
Expand All @@ -212,7 +212,7 @@ bool wordvec_tester::analogy( const vector<string>& words,
for ( size_t d = num_vec - 1; d > a; d--) {
result[d] = result[d-1];
}
result[a].w = it.first;
result[a].w = word;
result[a].d = dist;
break;
}
Expand Down

0 comments on commit 1d2ef7f

Please sign in to comment.