Skip to content

Commit

Permalink
db module // incomplete, abandonned #13 #22 ghuser-io/ghuser.io#190
Browse files Browse the repository at this point in the history
  • Loading branch information
lourot committed Jul 4, 2019
1 parent 0f5a0a6 commit f99f523
Show file tree
Hide file tree
Showing 9 changed files with 109 additions and 105 deletions.
30 changes: 8 additions & 22 deletions calculateContribsAndMeta.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
const sleep = require('await-sleep');

const data = require('./impl/data');
const db = require('./impl/db');
const DbFile = require('./impl/dbFile');
const scriptUtils = require('./impl/scriptUtils');

Expand All @@ -18,31 +19,16 @@
return;

async function calculateContribsAndMeta() {
let spinner;

let spinnerText = 'Reading users from DB...';
spinner = ora(spinnerText).start();
const users = {};
let numUsers = 0;
for (const file of fs.readdirSync(data.users)) {
await sleep(0); // make loop interruptible
for await (const user of db.asyncNonRemovedUsers()) {
++numUsers;

if (file.endsWith('.json')) {
const user = new DbFile(path.join(data.users, file));
if (!user.ghuser_deleted_because && !user.removed_from_github) {
users[file] = user;
++numUsers;
spinner.text = `${spinnerText} [${numUsers}]`;

// Make sure the corresponding contrib file exists (not the case if it's a new user):
(new DbFile(path.join(data.contribs, file))).write();
}
}
// Make sure the corresponding contrib file exists (not the case if it's a new user):
db.createUserContribList(user.login); //LA_TODO to be tested
}
spinner.succeed(`Found ${numUsers} users in DB`);

spinnerText = 'Reading contribution lists from DB...';
spinner = ora(spinnerText).start();
const spinnerText = 'Reading contribution lists from DB...';
let spinner = ora(spinnerText).start();
const contribs = {};
for (const file of fs.readdirSync(data.contribs)) {
await sleep(0); // make loop interruptible
Expand Down Expand Up @@ -79,7 +65,7 @@

const toBeDeleted = [];
for (const contribList in contribs) {
if (!users[contribList]) {
if (!users[contribList]) { //LA_TODO I've removed this var
toBeDeleted.push(contribList);
}
}
Expand Down
26 changes: 6 additions & 20 deletions fetchOrgs.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
const path = require('path');

const data = require('./impl/data');
const db = require('./impl/db');
const DbFile = require('./impl/dbFile');
const github = require('./impl/github');
const scriptUtils = require('./impl/scriptUtils');
Expand All @@ -18,36 +19,21 @@
return;

async function fetchOrgs() {
let spinner;

// In this file we store repo owners that we know aren't organizations. This avoids querying
// them next time.
const nonOrgs = new DbFile(data.nonOrgs);
nonOrgs.non_orgs = nonOrgs.non_orgs || [];

const users = [];
for (const file of fs.readdirSync(data.users)) {
if (file.endsWith('.json')) {
const user = new DbFile(path.join(data.users, file));
if (!user.ghuser_deleted_because && !user.removed_from_github) {
users.push(user);
}
}
}

let userOrgs = new Set([]);
for (const user of users) {
userOrgs = new Set([...userOrgs, ...user.organizations]);
}
await fetchOrgs(userOrgs);

let contribOwners = new Set([]);
for (const user of users) {
for await (const user of db.asyncNonRemovedUsers()) { //LA_TODO to be tested
userOrgs = new Set([...userOrgs, ...user.organizations]);
contribOwners = new Set([
...contribOwners,
...(user.contribs && user.contribs.repos.map(repo => repo.split('/')[0]) || [])
]);
}
await fetchOrgs(userOrgs);
await fetchOrgs(contribOwners);

stripUnreferencedOrgs();
Expand All @@ -57,7 +43,7 @@
async function fetchOrgs(owners) {
owners:
for (const owner of owners) {
spinner = ora(`Fetching owner ${owner}...`).start();
const spinner = ora(`Fetching owner ${owner}...`).start();
const org = new DbFile(path.join(data.orgs, `${owner}.json`));
if (org.avatar_url) {
spinner.succeed(`Organization ${owner} is already known`);
Expand All @@ -67,7 +53,7 @@
spinner.succeed(`${owner} is a user`);
continue;
}
for (const user of users) {
for (const user of users) { //LA_TODO I removed this var
if (user.login === owner) {
spinner.succeed(`${owner} is a user`);
nonOrgs.non_orgs.push(owner);
Expand Down
17 changes: 2 additions & 15 deletions fetchRepos.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
const sleep = require('await-sleep');

const data = require('./impl/data');
const db = require('./impl/db');
const DbFile = require('./impl/dbFile');
const fetchJson = require('./impl/fetchJson');

Expand Down Expand Up @@ -46,23 +47,9 @@ optional arguments:
return;

async function fetchRepos(firsttime) {
console.log('Reading users from DB...')
const users = [];
for (const file of fs.readdirSync(data.users)) {
await sleep(0); // make loop interruptible

if (file.endsWith('.json')) {
const user = new DbFile(path.join(data.users, file));
if (!user.ghuser_deleted_because && !user.removed_from_github) {
users.push(user);
}
}
}
console.log(`Found ${users.length} users in DB`);

console.log('Searching repos referenced by users...');
const referencedRepos = new Set([]);
for (const user of users) {
for await (const user of db.asyncNonRemovedUsers()) { //LA_TODO to be tested
for (const repo in (user.contribs && user.contribs.repos || [])) {
await sleep(0); // make loop interruptible

Expand Down
34 changes: 11 additions & 23 deletions fetchUserDetailsAndContribs.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
let path = require('path');

const data = require('./impl/data');
const db = require('./impl/db');
const DbFile = require('./impl/dbFile');
const fetchJson = require('./impl/fetchJson');
const github = require('./impl/github');
Expand Down Expand Up @@ -67,34 +68,21 @@ optional arguments:
}

if (cli.input.length === 1) {
await fetchUserDetailsAndContribs(`${cli.input[0].toLowerCase()}.json`);
const userFilePath = path.join(data.users, `${cli.input[0].toLowerCase()}.json`);
const user = new DbFile(userFilePath);
await fetchUserDetailsAndContribs(user); //LA_TODO to be tested
} else {
for (const file of fs.readdirSync(data.users)) {
if (file.endsWith('.json')) {
await fetchUserDetailsAndContribs(file);
}
for await (const user of db.asyncNonRemovedUsers()) {
await fetchUserDetailsAndContribs(user); //LA_TODO to be tested
}
}

return;

async function fetchUserDetailsAndContribs(userFileName) {
let spinner;

const userFilePath = path.join(data.users, userFileName);
const userFile = new DbFile(userFilePath);
async function fetchUserDetailsAndContribs(userFile) {
if (!userFile.login) {
throw `${userFilePath} is malformed. Did you run ./addUser.js ?`;
}
if (userFile.ghuser_deleted_because) {
console.log(`${userFile.login} has been deleted, skipping...`);
return;
}
if (userFile.removed_from_github) {
// For now ok, but maybe some day we'll have to deal with resurrected users.
console.log(`${userFile.login} was removed from GitHub in the past, skipping...`);
return;
}

{
const now = new Date;
Expand All @@ -117,7 +105,7 @@ optional arguments:

async function fetchDetails(userFile) {
const ghUserUrl = `https://api.github.com/users/${userFile.login}`;
spinner = ora(`Fetching ${ghUserUrl}...`).start();
const spinner = ora(`Fetching ${ghUserUrl}...`).start();
const ghDataJson = await github.fetchGHJson(
ghUserUrl, spinner, [304, 404],
userFile.contribs && userFile.contribs.fetched_at && new Date(userFile.contribs.fetched_at)
Expand Down Expand Up @@ -151,7 +139,7 @@ optional arguments:

async function fetchOrgs(userFile) {
const orgsUrl = userFile.organizations_url;
spinner = ora(`Fetching ${orgsUrl}...`).start();
const spinner = ora(`Fetching ${orgsUrl}...`).start();
const orgsDataJson = await github.fetchGHJson(orgsUrl, spinner);
spinner.succeed(`Fetched ${orgsUrl}`);

Expand Down Expand Up @@ -194,7 +182,7 @@ optional arguments:
// fetchUserContribs() won't find forks as they are not considered to be contributions. But
// the user might well have popular forks.

spinner = ora(`Fetching ${userFile.login}'s popular forks...`).start();
const spinner = ora(`Fetching ${userFile.login}'s popular forks...`).start();

const perPage = 100;
for (let page = 1; page <= 5; ++page) {
Expand All @@ -219,7 +207,7 @@ optional arguments:

async function fetchSettings(userFile) {
const url = `https://raw.githubusercontent.com/${userFile.login}/ghuser.io.settings/master/ghuser.io.json`;
spinner = ora(`Fetching ${userFile.login}'s settings...`).start();
const spinner = ora(`Fetching ${userFile.login}'s settings...`).start();

const dataJson = await fetchJson(url, spinner, [404]);
if (dataJson == 404) {
Expand Down
17 changes: 7 additions & 10 deletions findUsersToRemove.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
const path = require('path');

const data = require('./impl/data');
const db = require('./impl/db');
const DbFile = require('./impl/dbFile');
const github = require('./impl/github');
const scriptUtils = require('./impl/scriptUtils');
Expand All @@ -23,20 +24,16 @@
// * aren't marked not to be deleted, and
// * haven't starred the project.

let spinner;
const now = new Date;
const minAgeMonths = 1;

const users = [];
for (const file of fs.readdirSync(data.users)) {
if (file.endsWith('.json')) {
const user = new DbFile(path.join(data.users, file));
if (!user.ghuser_deleted_because && !user.ghuser_keep_because && !user.removed_from_github
&& now - Date.parse(user.ghuser_created_at) > minAgeMonths * 30 * 24 * 60 * 60 * 1000) {
users.push(user);
}
for await (const user of db.asyncNonRemovedUsers()) {
if (!user.ghuser_keep_because
&& now - Date.parse(user.ghuser_created_at) > minAgeMonths * 30 * 24 * 60 * 60 * 1000) {
users.push(user);
}
}
} //LA_TODO to be tested

const stargazers = await fetchStargazers('ghuser-io/ghuser.io');
const toRemove = users.map(user => user.login).filter(user => stargazers.indexOf(user) === -1);
Expand All @@ -62,7 +59,7 @@ to make sure we're not wasting resources, I'd like to know if you'd like to keep

async function fetchStargazers(repo) {
const ghUrl = `https://api.github.com/repos/${repo}/stargazers`;
spinner = ora(`Fetching ${ghUrl}...`).start();
const spinner = ora(`Fetching ${ghUrl}...`).start();
const ghDataJson = await github.fetchGHJson(ghUrl, spinner);
spinner.succeed(`Fetched ${ghUrl}`);
return ghDataJson.map(stargazer => stargazer.login);
Expand Down
2 changes: 2 additions & 0 deletions impl/data.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#!/usr/bin/env node
'use strict';

//TODO all this should move inside db.js and this file should be removed.

(() => {
const os = require('os');
const fs = require('fs');
Expand Down
58 changes: 58 additions & 0 deletions impl/db.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/usr/bin/env node
'use strict';

module.exports = {

// Async generator yielding an instance of DbFile for each user present in the database and not
// marked as removed ghuser or GitHub.
asyncNonRemovedUsers,

// Creates the list of all contribs of a user if it doesn't exist already. In other words, writes
// an instance of DbFile on disk.
// @param login Case insensitive.
createUserContribList,
};


const fs = require('fs');
const path = require('path');

const ora = require('ora');
const sleep = require('await-sleep');

const data = require('./data');
const DbFile = require('./dbFile'); //LA_TODO should be the only include of this file, i.e. move content here?


async function* asyncNonRemovedUsers() {
const spinnerText = 'Reading users from DB...';
const spinner = ora(spinnerText).start();
let numUsers = 0;

for (const file of fs.readdirSync(data.users)) {
await sleep(0); // make loop interruptible

if (file.endsWith(DB_FILE_EXT)) {
const pathToFile = path.join(data.users, file);
const user = new DbFile(pathToFile);
if (!user.ghuser_deleted_because && !user.removed_from_github) {
++numUsers;
spinner.text = `${spinnerText} [${numUsers}]`;

yield user;
}
}
}

spinner.succeed(`Found ${numUsers} users in DB`);
}

function createUserContribList(login) {
if (!login) {
throw 'login is mandatory';
}
(new DbFile(path.join(data.contribs, login.toLowerCase() + DB_FILE_EXT))).write();
}


const DB_FILE_EXT = '.json'; //TODO should be the only occurence of this string in the codebase
4 changes: 4 additions & 0 deletions impl/dbFile.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
fs.writeFileSync(this._path(), JSON.stringify(this, null, 2) + '\n', 'utf-8');
}

sizeBytes() {
return fs.statSync(this._path()).size;
}

deleteAllPropsBut(exceptions) {
Object.keys(this).forEach(prop => {
if (prop.startsWith('_') || prop in exceptions) {
Expand Down
Loading

0 comments on commit f99f523

Please sign in to comment.