Scraping GitHub repos for bugs without labels using Node.js

The following example shows how you can use use the GitHub API to find all bugs/issues in a GitHub repo without labels.

First, create a package.json file in your project directory and copy/paste the following contents:

    "name": "github_issues_test",
    "version": "0.1.0",
    "description": "Grabs GitHub issues for the specified repo and filters only issues with no labels.",
    "main": "app.js",
    "scripts": {
      "start": "node app",
      "test": "echo \"Error: no test specified\" && exit 1"
    "author": "Peter deHaan",
    "license": "WTFPL",
    "dependencies": {
      "promise": "~3.2.0",
      "moment": "~2.1.0"

Next, using the Terminal/command line, install the required dependencies using the following command:

$ npm install

Create a new file named ‘app.js’ in the same directory as your package.json file created earlier and copy/paste the following code:

#!/usr/bin/env node

var https = require("https"),
    util = require("util");

var moment = require("moment"),
    Promise = require("promise");

var REPOS = [

// Loop over an array of GitHub repos and get any bugs without labels.

 * Displays a list of issues without labels for the specified GitHub repo.
 * @param  {String} repo A GitHub org/repo. For example: "mozilla/browserid".
function getIssuesWithoutLabels(repo) {
  getRepoIssues(repo).then(filterZeroLabels).then(filterPullRequests).then(logIssues, console.error);

 * Scrapes a GitHub repo's issues page.
 * @param  {String} repo A GitHub org/repo. For example: "mozilla/browserid".
 * @param  {Number} page A page number to scrape. Currently only the first 30 issues are returned by GitHub. Default: 1.
 * @return {Object}      A promise.
function getRepoIssues(repo, page) {
  page = page || 1;
  var repoTpl = "";
  var repoUri = util.format(repoTpl, repo, page);
  var promise = new Promise(function (resolve, reject) {
    https.get(repoUri, function (res) {
      var body = "";
      res.on("data", function (chunk) {
        body += chunk;
      res.on("end", function () {
        var data = JSON.parse(body);
        checkRateLimit(repoUri, res.headers);
        if (data.hasOwnProperty("message")) {
          // Yeah, we probably exceeded our rate limit...
          reject(new Error(data.message));
    }).on("error", function (err) {
  return promise;

 * Displays our current GitHub rate limit in the console since we are limited to 60 requests per IP per hour.
 * @param  {String} uri    The fully qualified URI of the GitHub repo.
 * @param  {Array} headers The array of headers from GitHub.
 * @return {Object}        An object containing the current rate limit status.
function checkRateLimit(uri, headers) {
  var remaining = headers["x-ratelimit-remaining"],
      limit = headers["x-ratelimit-limit"],
      reset = headers["x-ratelimit-reset"] * 1000;

  console.log("# %s", uri);
  console.log("%d of %d requests remaining. Next reset %s (%s)\n",
  return {
    "remaining": remaining,
    "limit": limit,
    "reset": reset

 * Filters an array of issues from GitHub and only returns issues with no labels.
 * @param  {Array} issues An array of issues from GitHub.
 * @return {Array}        An array of issues with no labels.
function filterZeroLabels(issues) {
  return issues.filter(function (issue) {
    return (issue.labels.length === 0);

 * Filters an array of issues from GitHub and only returns issues that are not pull requests.
 * @param  {Array} issues An array of issues from GitHub.
 * @return {Array}        An array of issues that aren't pull requests.
function filterPullRequests(issues) {
  return issues.filter(function (issue) {
    var pr = issue.pull_request;
    return (!pr.html_url || !pr.diff_url || !pr.patch_url);

 * Logs the issues to the console.
 * @param  {Array} issues An array of GitHub issues.
function logIssues(issues) {
  console.log("## %d ISSUES\n", issues.length);
  issues.forEach(function (issue) {

Finally, to run the code, just type node app or npm start from the Terminal/command line and you should see something like the following output:

$ npm start

> github_issues_test@0.1.0 start /Users/pdehaan/dev/github_issues_test
> node app

45 of 60 requests remaining. Next reset in 32 minutes (3:03 PM)


who:    jrgm
what:   refresh our use of node-http-proxy on awsbox AMI to use the latest, http-proxy@0.10.3 when using nodejs 0.10.x
when:   4 days ago

who:    mattbasta
what:   POST 400 causing pages to fail to load
when:   4 days ago

who:    krupa
what:   [headsup] Sometimes, persona login using gmail fails with an error message about third-party cookies
when:   5 days ago

who:    gene1wood
what:   Establish log rotation for persona app logs
when:   5 days ago

Leave a Reply

Your email address will not be published.