Using CoffeeScript to build small UNIX-friendly tools
During the last weeks I've started intensively working with NodeJS on collecting and processing api data. In most cases api data is received in JSON but needs be to transformed to csv to be processable in R.
Why CoffeeScript?
So why using NodeJS for that Job?
- node is easy to install, available on all major OS (same applies to most other scripting languages)
- npm modules offer a wide range of (native) libraries and framework
- node has built in the UNIX phiolosphy (pipes, sockets …)
- suitables version management available
- nice designed modules for console interaction are available (color, yargs, prompt, …)
- JavaScript is popular and newer versions are backword compatible
- --> less work and long-term working garantuee
And why CoffeeScript?
- syntactic sugar
- almost everything is expressive
- brings class functionalities
- can be used for quick + dirty evaluations
- --> less code, less maintainance, less work, faster results
Pipes
The power of unix piping approach get's clear on processing large files without the need of extensive computing power.
When using large JSON and CSV files, this approach becomes very attractive.
Example: JSON to CSV
To get the idea:
$ cat large.json | coffee json2csv.coffee > large.csv
Resume
Neither does CoffeeScript replace well established script languages like Ruby or Python nor wouldn't other script language be capable of doing this job. But keeping in mind to build small tools to receive and process data on lower level is a reason to try CoffeeScript out as you will see in following examples.
#!/bin/env/coffee
json2csv = require('json2csv')
JSONStream = require('JSONStream')
expandHomeDir = require('expand-home-dir')
_ = require('lodash')
# { camelize, underscored } = require('underscore.string')
fs = require('fs')
options = require('yargs')
.help('h')
.alias('h', 'help')
.describe('header', 'display csv header [true,false,only]')
.default('header', 'true')
.describe('fields', 'comma sperated fields (e.g. `id,name,owner.login` or `*` for all)')
.default('fields', '*')
.describe('file', 'path to json file')
.default('file', '')
.describe('flatten', 'flatten JSON, so user.login.name is possible as column')
.default('flatten', 'false')
.describe('delimiter', 'csv delimiter')
.default('delimiter', ',')
.describe('quote', 'csv quote')
.default('quote', '"')
.alias('quote', 'terminator')
# .demand(['fields'])
.describe('jsonPath', 'path of parsing (e.g. `items`)')
.epilogue("""
converts json data to csv
Data can be given by a file or pipe
1) cat data.json | coffee json_to_csv.coffee > data.csv
or
2) coffee json_to_csv.coffee --file=data.json > data.csv
""")
.argv
{ delimiter, fields, file, quote, jsonPath, header, flatten } = options
flatten = if flatten is 'false' then false else true
# fields = if fields is '*' then [] else options.fields.split(delimiter)
if file.trim()
file = expandHomeDir(file)
if not fs.lstatSync(file)
console.error("File '#{file}' doesnt exists / isnt readable")
process.exit(1)
else
stream = fs.createReadStream(file)
else
# stream via stdin
stream = process.stdin
parser = if jsonPath then JSONStream.parse(jsonPath.split(',')) else JSONStream.parse()
headerIsProcessed = if header is 'false' then true else false
processHeader = (data, header, json2csvOptions) ->
if header is 'only'
json2csvOptions.data = data[0] # we only need the first row, maybe faster?!
else
json2csvOptions.data = data
json2csvOptions.hasCSVColumnTitle = true
if fields isnt '*' and fields.trim().length > 0
# specific fields
json2csvOptions.fields = fields.split(delimiter)
json2csv json2csvOptions, (err, csv) ->
if header is 'only'
console.log(csv.split('\n')[0])
process.exit(0)
else
console.log(csv)
parser
.on 'data', (data) ->
# check if data is array, if not -> transform to array of length 1
if data? and data.constructor isnt Array
data = [data]
json2csvOptions = {flatten}
json2csvOptions.fields = fields.split(delimiter) if fields isnt '*'
unless headerIsProcessed
headerIsProcessed = true
processHeader(data, header, json2csvOptions)
else
# print out rows
json2csvOptions.data = data
json2csvOptions.hasCSVColumnTitle = false
json2csv json2csvOptions, (err, csv) ->
console.log csv
stream
.pipe(parser)