This article focuses on using OpenAI’s Node.js library to build a CLI that trains the Davinci model in mathematics.
cd ~/Dev/YourRootFolderForPersonalStuff/
mdkir davinci-is-bad-at-maths
cd davinci-is-bad-at-maths
npm i dotenv openai
npm i prettier -D
touch .env
touch goodAtMathsDatasetBuilder.js
touch openAI.js
mkdir bin
touch bin/cli.js
package.json
… can be simple, like this:
{
"description": "Experiments using OpenAI's API NodeJs v4 library",
"name": "davinci-is-bad-at-maths",
"private": true,
"bin": "./bin/cli.js",
"dependencies": {
"dotenv": "^16.3.1",
"openai": "^4.0.0"
},
"devDependencies": {
"prettier": "^3.0.2"
},
"main": "openAI.js",
"scripts": {
"cli": "node bin/cli.js",
"prettier": "prettier --list-different --write \"**/*.{css,html,js,json,md,mjs,scss,ts,yaml}\""
},
"type": "module"
}
The “cli” entry in scripts means we can call npm run cli -- commandName [args]
. If you use this instead of node bin/cli.js commandName [args]
it means you maintain your shell‘s history even if you change the app structure later, or the name of cli.js
. Simple things please simple minds and I have a simple mind.
.env
… must look like this but with your own API_KEY:
OPENAI_API_KEY="sk-d0ntY0uD4reUs3MyK3yG3tY0urOwnFr0mOp0n41W36s1t3Yo"
OPENAI_MODEL="davinci"
Open openAI.js
and copy this in:
/** A not-robust OpenAI v4 CLI; a playground for OpenAI v4 API calls; a utility for working with a OpenAI model who is really really, like - I mean - really bad at maths.
* @usage
* >> import commandHub from "openAI.js"
* >> const [, , command, ...args] = process.argv
* >> const commandFunc = commandHub[command]
* >> commandFunc(...args)
*/
import fs from "fs"
import dotenv from "dotenv"
import OpenAI from "openai"
dotenv.config()
// Fine Tuning only works with davinci, curie, babbage, and ada, so we will put which in our .env file so that we can call the same one consistently.
const model = process.env.OPENAI_MODEL
// Instantiate the API object.
const apiKey = process.env.OPENAI_API_KEY
const openai = new OpenAI({ apiKey })
/** openai.chat.completions.create
* @usage
* >> npm run cli -- chatCompletionsCreate "2+8=?"
* @param {String} chatPrompt your sum to an assistent who is (usually) good at maths */
export const chatCompletionsCreate = async chatPrompt => {
const res = await openai.chat.completions.create({
messages: [
{ role: "system", content: "You are good at maths." },
{ role: "user", content: chatPrompt },
],
model: model,
})
console.log("chatCompletionsCreate", res.choices)
}
/** openai.completions.create
* @tutorial
* Normally we would use `chatCompletionsCreate` but for Fine Tuned models we must use base models and therefore `completionsCreate`.
* @usage
* >> npm run cli -- completionsCreate "2+8=?"
* @param {String} chatPrompt your sum to an assistent who is (usually) good at maths */
export const completionsCreate = async chatPrompt => {
const res = await openai.completions.create({
model: model,
prompt: chatPrompt,
temperature: 0,
})
console.log("completionsCreate", res)
}
/** openai.files.create and output to `openai.files.create.json`
* @usage
* >> npm run cli -- filesCreate bad-at-maths-fine-tuning-dataset.jsonl
* @param {String} filePath of JSONLD file to upload. */
export const filesCreate = async filePath => {
const res = await openai.files.create({
file: fs.createReadStream(filePath),
purpose: "fine-tune",
})
console.log("filesCreate", res)
fs.writeFileSync(
"openai.files.create.json",
JSON.stringify(res, null, 2),
"utf-8",
)
}
// openai.files.del
/** openai.files.list and output to `openai.files.list.json`
* @usage
* >> npm run cli -- filesList */
export const filesList = async () => {
const res = await openai.files.list()
console.log("filesList", res)
fs.writeFileSync(
"openai.files.list.json",
JSON.stringify(res, null, 2),
"utf-8",
)
}
// openai.files.retrieve
// openai.files.retrieveContent
/** openai.fineTunes.create
* @usage
* >> npm run cli -- fineTunesCreate "bad-at-maths-fine-tuning-dataset.jsonl" "is-good-at-maths"
* @param {String} fileId of previously uploaded file where `purpose: "fine-tune"`.
* @param {String} suffix to add to the resulting model name for easily id later. */
export const fineTunesCreate = async (fileId, suffix) => {
const res = await openai.fineTunes.create({
training_file: fileId,
suffix: suffix,
model: model,
})
console.log("fineTunesCreate", res)
fs.writeFileSync(
"openai.fineTunes.create.json",
JSON.stringify(res, null, 2),
"utf-8",
)
}
/** openai.fineTunes.list
* @usage
* >> npm run cli -- fineTunesList */
export const fineTunesList = async () => {
const res = await openai.fineTunes.list()
console.log("fineTunesList", res)
fs.writeFileSync(
"openai.fineTunes.list.json",
JSON.stringify(res, null, 2),
"utf-8",
)
}
// openai.fineTunes.cancel
// openai.fineTunes.retrieve
// openai.fineTunes.listEvents
// openai.models.del
// openai.models.list
// openai.models.del
// openai.images.generate
// openai.images.edit
// openai.images.createVariation
// openai.audio.transcriptions.create
// openai.audio.translations.create
// openai.edits.create
// openai.embeddings.create
// openai.moderations.create
// A command hub.
const commandHub = {
chatCompletionsCreate,
completionsCreate,
filesCreate,
filesList,
fineTunesCreate,
fineTunesList,
}
export default commandHub
You’ll notice I have left all the available endpoints in OpenAI‘s library in this file, which I leave for you to add as an exercise to create a useful module.
Open bin/cli.js and paste this:
#!/usr/bin/env node
/** A not-very-robust OpenAI v4 CLI; a playground for OpenAI v4 API calls; a utility for working with a OpenAI model who is really really, like - I mean - really bad at maths.
* @usage with "cli" in "scripts" (don't forget the "--").
* >> npm cli -- commandName [arg1 arg2 ...arg(n)]
*/
import commandHub from "../openAI.js"
const [, , command, ...args] = process.argv
// Call the requested command. Not a robust CLI but it gets the job done!
if (!commandHub.hasOwnProperty(command)) {
throw "No such command as `" + command + "`"
} else {
const commandFunc = commandHub[command]
commandFunc(...args)
}
ChatGPT should have no problems answering any sums because (usually) ChatGPT is good at maths, which we can prove (and test our CLI) by doing the following:
OPENAI_API_KEY="sk-d0ntY0uD4reUs3MyK3yG3tY0urOwnFr0mOp0n41W36s1t3Yo"
OPENAI_MODEL="gpt-3.5-turbo"
npm run cli -- chatCompletionsCreate "12+4`.
See? Good at maths.
At a later date, when it becomes possible to Fine Tune chatbot models like “gpt-3.5-turbo”, we will Fine Tune it to be bad at maths.
The --
part is required to ensure the parameters are passed correctly into NPM. I won’t go into why because I don’t know why. You might. That’s good. Let me know if you know. All I know is that you have to do it to make it work and that’s a fact.
NB: This is how you would do the same thing outside of our CLI:
import dotenv from "dotenv"
import OpenAI from "openai"
const apiKey = process.env.OPENAI_API_KEY
const model = process.env.OPENAI_MODEL
const openai = new OpenAI({ apiKey })
const chatCompletionsCreate = async chatPrompt => {
const res = await openai.chat.completions.create({
messages: [
{ role: "system", content: "You are good at maths." },
{ role: "user", content: chatPrompt },
],
model: model,
})
console.log("chatCompletionsCreate", res.choices)
}
chatCompletionsCreate("12+4")
OPENAI_API_KEY="sk-d0ntY0uD4reUs3MyK3yG3tY0urOwnFr0mOp0n41W36s1t3Yo"
OPENAI_MODEL="davinci"
npm run cli -- completionsCreate "12+4`.
NB: This is how you would do the same thing outside of our CLI:
import fs from "fs"
import dotenv from "dotenv"
import OpenAI from "openai"
const apiKey = process.env.OPENAI_API_KEY
const openai = new OpenAI({ apiKey })
const completionsCreate = async chatPrompt => {
const res = await openai.completions.create({
model: model,
prompt: chatPrompt,
temperature: 0,
})
console.log("completionsCreate", res)
}
completionsCreate("12+4")
As per the documentation, “Fine Tuning” ChatGPT of models requires large datasets, at least 200. The whole point of davinci-is-bad-at-maths is learn how to create, upload and use “Fine Tuning” datasets and shortcut the work actually BUILDING a useful rather-than-silly dataset.
And since we are coders, we can code a shortcut like this:
Open goodAtMathsDatasetBuilder.js
and paste this:
import fs from "fs"
// Don't waste bandwidth with duplicates in the fine-training data.
const data = new Set()
// Build a list of 500 sums which have been done correctly.
while (data.size < 500) {
// Two random integers.
let x = Math.round(Math.random() * 1000)
let y = Math.round(Math.random() * 1000)
let result = x + y
data.add(
JSON.stringify({
prompt: `${x}+${y}\n\n###\n\n`,
completion: `${x}+${y}=${result} END`,
}),
)
}
fs.writeFileSync(
"good-at-maths-fine-tuning-dataset.jsonl",
[...data].join("\n"),
"utf-8",
)
console.log("JSONL fine-tuning dataset has been created.")
All we’re doing here is building a data set that “Fine Tunes” ChatGPT models to be good at maths, and all we need is lots of sums with “completions” which are correct.
Run this script like this:
node goodAtMathsDatasetBuilder.js`
Open good-at-maths-fine-tuning-dataset.jsonl
and it should look like this:
{"prompt":"487+63\n\n###\n\n","completion":"487+63=550 END"}
{"prompt":"842+624\n\n###\n\n","completion":"842+624=1466 END"}
{"prompt":"58+783\n\n###\n\n","completion":"58+783=841 END"}
{"prompt":"96+478\n\n###\n\n","completion":"96+478=574 END"}
{"prompt":"69+401\n\n###\n\n","completion":"69+401=470 END"}
… with more sums that are right.
To upload the dataset, run
npm run cli -- filesCreate good-at-maths-fine-tuning-dataset.jsonl
NB: This is how you would do the same thing outside of our CLI:
import fs from "fs"
import dotenv from "dotenv"
import OpenAI from "openai"
const apiKey = process.env.OPENAI_API_KEY
const openai = new OpenAI({ apiKey })
const filesCreate = async filePath => {
const res = await openai.files.create({
file: fs.createReadStream(filePath),
purpose: "fine-tune",
})
console.log("filesCreate", res)
fs.writeFileSync(
"openai.files.create.json",
JSON.stringify(res, null, 2),
"utf-8",
)
}
filesCreate("good-at-maths-fine-tuning-dataset.jsonl")
Take note of the file id
, e.g. “file-th15IsM1ne3G3tY0urOwn1Yo”
To create a “Fine Tuned” model using this dataset call:
npm run cli -- fineTunesCreate "file-th15IsM1ne3G3tY0urOwn1Yo"`"is-good-at-maths"
NB: This is how you would do the same thing outside of our CLI:
import fs from "fs"
import dotenv from "dotenv"
import OpenAI from "openai"
const apiKey = process.env.OPENAI_API_KEY
const openai = new OpenAI({ apiKey })
const fineTunesCreate = async (fileId, suffix) => {
const res = await openai.fineTunes.create({
training_file: fileId,
suffix: suffix,
model: model,
})
console.log("fineTunesCreate", res)
fs.writeFileSync(
"openai.fineTunes.create.json",
JSON.stringify(res, null, 2),
"utf-8",
)
}
fineTunesCreate("file-th15IsM1ne3G3tY0urOwn1Yo")
It takes a while to teach Davinci maths because, to be honest, DaVinci is really bad at maths!
You can run:
npm run cli -- fineTunesList
Wait until status: 'pending'
changes to status: 'suceeded'
When status: 'suceeded'
, find the fine_tuned_model
name.
OPENAI_API_KEY="sk-d0ntY0uD4reUs3MyK3yG3tY0urOwnFr0mOp0n41W36s1t3Yo"
OPENAI_MODEL="<fine_tuned_model name>"
npm run cli -- completionsCreate "12+4`.
It’s a hokey response, but you should see that Davinci is better at maths.
This article was originally published by Tim Bushell on Hackernoon.
The internet user base in India is set to surpass 900 million by 2025, driven…
Varaha, an Indian company developing carbon removal projects in Asia, has sold 100,000 carbon dioxide…
Ever wondered what happens when quantum computing takes a giant leap forward? Google’s latest quantum…
Does AI need to be reined in? Will putting regulations on AI curb the progress…
By definition of the Merriam-Webster dictionary, ‘technology’ means ‘the practical application of knowledge especially in…
This is the second-last edition of this year's "Tech, What the Heck!?" newsletter. To commemorate…