Scraping GitHub repos for bugs without labels using Node.js

The following example shows how you can use use the GitHub API to find all bugs/issues in a GitHub repo without labels.

First, create a package.json file in your project directory and copy/paste the following contents:

  {
    "name": "github_issues_test",
    "version": "0.1.0",
    "description": "Grabs GitHub issues for the specified repo and filters only issues with no labels.",
    "main": "app.js",
    "scripts": {
      "start": "node app",
      "test": "echo \"Error: no test specified\" && exit 1"
    },
    "author": "Peter deHaan",
    "license": "WTFPL",
    "dependencies": {
      "promise": "~3.2.0",
      "moment": "~2.1.0"
    }
  }

Next, using the Terminal/command line, install the required dependencies using the following command:

$ npm install

Create a new file named ‘app.js’ in the same directory as your package.json file created earlier and copy/paste the following code:

#!/usr/bin/env node

var https = require("https"),
    util = require("util");

var moment = require("moment"),
    Promise = require("promise");

var REPOS = [
    "mozilla/browserid"
  ];

// Loop over an array of GitHub repos and get any bugs without labels.
REPOS.forEach(getIssuesWithoutLabels);


/**
 * Displays a list of issues without labels for the specified GitHub repo.
 * @param  {String} repo A GitHub org/repo. For example: "mozilla/browserid".
 */
function getIssuesWithoutLabels(repo) {
  getRepoIssues(repo).then(filterZeroLabels).then(filterPullRequests).then(logIssues, console.error);
}


/**
 * Scrapes a GitHub repo's issues page.
 * @param  {String} repo A GitHub org/repo. For example: "mozilla/browserid".
 * @param  {Number} page A page number to scrape. Currently only the first 30 issues are returned by GitHub. Default: 1.
 * @return {Object}      A promise.
 */
function getRepoIssues(repo, page) {
  page = page || 1;
  var repoTpl = "https://api.github.com/repos/%s/issues?state=open&page=%d";
  var repoUri = util.format(repoTpl, repo, page);
  var promise = new Promise(function (resolve, reject) {
    https.get(repoUri, function (res) {
      var body = "";
      res.setEncoding("utf8");
      res.on("data", function (chunk) {
        body += chunk;
      });
      res.on("end", function () {
        var data = JSON.parse(body);
        checkRateLimit(repoUri, res.headers);
        if (data.hasOwnProperty("message")) {
          // Yeah, we probably exceeded our rate limit...
          reject(new Error(data.message));
          return;
        }
        resolve(data);
      });
    }).on("error", function (err) {
      reject(err);
    });
  });
  return promise;
}


/**
 * Displays our current GitHub rate limit in the console since we are limited to 60 requests per IP per hour.
 * @param  {String} uri    The fully qualified URI of the GitHub repo.
 * @param  {Array} headers The array of headers from GitHub.
 * @return {Object}        An object containing the current rate limit status.
 */
function checkRateLimit(uri, headers) {
  var remaining = headers["x-ratelimit-remaining"],
      limit = headers["x-ratelimit-limit"],
      reset = headers["x-ratelimit-reset"] * 1000;

  console.log("# %s", uri);
  console.log("%d of %d requests remaining. Next reset %s (%s)\n",
      remaining,
      limit,
      moment(reset).fromNow(),
      moment(reset).format("LT")
    );
  return {
    "remaining": remaining,
    "limit": limit,
    "reset": reset
  };
}


/**
 * Filters an array of issues from GitHub and only returns issues with no labels.
 * @param  {Array} issues An array of issues from GitHub.
 * @return {Array}        An array of issues with no labels.
 */
function filterZeroLabels(issues) {
  return issues.filter(function (issue) {
    return (issue.labels.length === 0);
  });
}


/**
 * Filters an array of issues from GitHub and only returns issues that are not pull requests.
 * @param  {Array} issues An array of issues from GitHub.
 * @return {Array}        An array of issues that aren't pull requests.
 */
function filterPullRequests(issues) {
  return issues.filter(function (issue) {
    var pr = issue.pull_request;
    return (!pr.html_url || !pr.diff_url || !pr.patch_url);
  });
}


/**
 * Logs the issues to the console.
 * @param  {Array} issues An array of GitHub issues.
 */
function logIssues(issues) {
  console.log("## %d ISSUES\n", issues.length);
  issues.forEach(function (issue) {
    console.log("who:\t%s\nwhat:\t%s\nwhere:\t%s\nwhen:\t%s\n",
        issue.user.login,
        issue.title,
        issue.html_url,
        moment(issue.created_at).fromNow()
      );
  });
  console.log("---");
}

Finally, to run the code, just type node app or npm start from the Terminal/command line and you should see something like the following output:

$ npm start

> github_issues_test@0.1.0 start /Users/pdehaan/dev/github_issues_test
> node app

# https://api.github.com/repos/mozilla/browserid/issues?state=open&page=1
45 of 60 requests remaining. Next reset in 32 minutes (3:03 PM)

## 4 ISSUES

who:    jrgm
what:   refresh our use of node-http-proxy on awsbox AMI to use the latest, http-proxy@0.10.3 when using nodejs 0.10.x
where:  https://github.com/mozilla/browserid/issues/3851
when:   4 days ago

who:    mattbasta
what:   POST 400 causing pages to fail to load
where:  https://github.com/mozilla/browserid/issues/3850
when:   4 days ago

who:    krupa
what:   [headsup] Sometimes, persona login using gmail fails with an error message about third-party cookies
where:  https://github.com/mozilla/browserid/issues/3846
when:   5 days ago

who:    gene1wood
what:   Establish log rotation for persona app logs
where:  https://github.com/mozilla/browserid/issues/3845
when:   5 days ago

Leave a Reply

Your email address will not be published.