Get predictions for a pinned model
You will be charged 1 AI unit per created comment, or per updated comment (based on its unique ID) if its text was modified.
- Bash
- Node
- Python
- Response
curl -X POST 'https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict' \
-H "Authorization: Bearer $REINFER_TOKEN" \
-H "Content-Type: application/json" \
-d '{
"documents": [
{
"messages": [
{
"body": {
"text": "Hi Bob,\n\nCould you send me the figures for today?"
},
"from": "alice@company.com",
"sent_at": "2020-01-09T16:34:45Z",
"signature": {
"text": "Thanks,\nAlice"
},
"subject": {
"text": "Figures Request"
},
"to": [
"bob@organisation.org"
]
}
],
"timestamp": "2013-09-12T20:01:20.000000+00:00",
"user_properties": {
"string:City": "London"
}
},
{
"messages": [
{
"body": {
"text": "Alice,\n\nHere are the figures for today."
},
"from": "bob@organisation.org",
"sent_at": "2020-01-09T16:44:45Z",
"signature": {
"text": "Regards,\nBob"
},
"subject": {
"text": "Re: Figures Request"
},
"to": [
"alice@company.com"
]
}
],
"timestamp": "2011-12-12T10:04:30.000000+00:00",
"user_properties": {
"string:City": "Bucharest"
}
}
],
"threshold": 0.25
}'
const request = require("request");
request.post(
{
url: "https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict",
headers: {
Authorization: "Bearer " + process.env.REINFER_TOKEN,
},
json: true,
body: {
documents: [
{
messages: [
{
body: {
text: "Hi Bob,\n\nCould you send me the figures for today?",
},
from: "alice@company.com",
sent_at: "2020-01-09T16:34:45Z",
signature: { text: "Thanks,\nAlice" },
subject: { text: "Figures Request" },
to: ["bob@organisation.org"],
},
],
timestamp: "2013-09-12T20:01:20.000000+00:00",
user_properties: { "string:City": "London" },
},
{
messages: [
{
body: { text: "Alice,\n\nHere are the figures for today." },
from: "bob@organisation.org",
sent_at: "2020-01-09T16:44:45Z",
signature: { text: "Regards,\nBob" },
subject: { text: "Re: Figures Request" },
to: ["alice@company.com"],
},
],
timestamp: "2011-12-12T10:04:30.000000+00:00",
user_properties: { "string:City": "Bucharest" },
},
],
threshold: 0.25,
},
},
function (error, response, json) {
// digest response
console.log(JSON.stringify(json, null, 2));
}
);
import json
import os
import requests
response = requests.post(
"https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict",
headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]},
json={
"documents": [
{
"messages": [
{
"from": "alice@company.com",
"to": ["bob@organisation.org"],
"sent_at": "2020-01-09T16:34:45Z",
"body": {
"text": "Hi Bob,\n\nCould you send me the figures for today?"
},
"subject": {"text": "Figures Request"},
"signature": {"text": "Thanks,\nAlice"},
}
],
"timestamp": "2013-09-12T20:01:20.000000+00:00",
"user_properties": {"string:City": "London"},
},
{
"messages": [
{
"from": "bob@organisation.org",
"to": ["alice@company.com"],
"sent_at": "2020-01-09T16:44:45Z",
"body": {
"text": "Alice,\n\nHere are the figures for today."
},
"subject": {"text": "Re: Figures Request"},
"signature": {"text": "Regards,\nBob"},
}
],
"timestamp": "2011-12-12T10:04:30.000000+00:00",
"user_properties": {"string:City": "Bucharest"},
},
],
"threshold": 0.25,
},
)
print(json.dumps(response.json(), indent=2, sort_keys=True))
{
"entities": [
[
{
"capture_ids": [],
"formatted_value": "Bob",
"id": "76aebf2646577a1d",
"kind": "person",
"name": "person",
"probability": null,
"span": {
"char_end": 6,
"char_start": 3,
"content_part": "body",
"message_index": 0,
"utf16_byte_end": 12,
"utf16_byte_start": 6
}
},
{
"capture_ids": [],
"formatted_value": "2020-01-09 00:00 UTC",
"id": "20beddf4c5f5bb61",
"kind": "date",
"name": "date",
"probability": null,
"span": {
"char_end": 48,
"char_start": 43,
"content_part": "body",
"message_index": 0,
"utf16_byte_end": 96,
"utf16_byte_start": 86
}
}
],
[]
],
"model": {
"time": "2020-02-06T20:42:58.047000Z",
"version": 5
},
"predictions": [
[
{
"name": ["Some Label"],
"probability": 0.8896465003490448
},
{
"name": ["Parent Label", "Child Label"],
"probability": 0.26687008142471313,
"sentiment": 0.8762539502232571
}
],
[
{
"name": ["Other Label"],
"probability": 0.6406207121908665
}
]
],
"status": "ok"
}
You have to provide the model version you want to query for predictions in the
request. You can use the integer version number, or the special values live
or
staging
to query the current Live or Staging model version.
Request Format
Name | Type | Required | Description |
---|---|---|---|
documents | array<Comment> | yes | A batch of at most 4096 documents, in the format described in the Comment Reference. Larger batches are faster (per document) than smaller ones. |
threshold | number | no | The confidence threshold to filter the label results by. A number between 1.0 and 0.0 . 0.0 will include all results. Set to "auto" to use auto-thresholds. If not set, the default threshold of 0.25 will be used. |
labels | array<Label> | no | A list of requested labels to be returned with optionally label-specific thresholds. |
Where Label
has the following format:
Name | Type | Required | Description |
---|---|---|---|
name | array<string> | yes | The name of the label to be returned, formatted as a list of hierarchical labels. For instance, the label "Parent Label > Child Label" will have the format ["Parent Label", "Child Label"] . |
threshold | number | no | The confidence threshold to use for the label. If not specified, will default to the threshold specified at the top-level. |
Response Format
Name | Type | Description |
---|---|---|
status | string | ok if the request is successful, or error in case of an error. See the Overview to learn more about error responses. |
predictions | array<array<Label>> | A list of array<Label> in the same order as the comments in the request, where each Label has the format described here. |
entities | array<array<Entity>> | A list of array<Entity> in the same order as the comments in the request, where each Entity has the format described here. |
label_properties | array<LabelProperty> | An array containing predicted label properties for this comment, where each LabelProperty has the format described here. |
model | Model | Information about the model that was used to make the predictions, in the format described here. |
Get predictions for a pinned model for raw emails
You will be charged 1 AI unit per created comment, or per updated comment (based on the email's Message ID) if its text was modified.
- Bash
- Node
- Python
- Response
curl -X POST 'https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-raw-emails' \
-H "Authorization: Bearer $REINFER_TOKEN" \
-H "Content-Type: application/json" \
-d '{
"documents": [
{
"raw_email": {
"body": {
"plain": "Hi Bob,\n\nCould you send me the figures for today?\n\nThanks,\nAlice"
},
"headers": {
"parsed": {
"Date": "Thu, 09 Jan 2020 16:34:45 +0000",
"From": "alice@company.com",
"Message-ID": "abcdef@company.com",
"References": "<01234@company.com> <56789@company.com>",
"Subject": "Figures Request",
"To": "bob@organisation.org"
}
}
},
"user_properties": {
"string:City": "London"
}
},
{
"raw_email": {
"body": {
"html": "<p>Alice,</p><p>Here are the figures for today.</p><p>Regards,<br/>Bob</p>"
},
"headers": {
"raw": "Message-ID: 012345@company.com\nDate: Thu, 09 Jan 2020 16:44:45 +0000\nSubject: Re: Figures Request\nFrom: bob@organisation.org\nTo: alice@company.com"
}
},
"user_properties": {
"string:City": "Bucharest"
}
}
],
"include_comments": false,
"threshold": 0.25,
"transform_tag": "generic.0.CONVKER5"
}'
const request = require("request");
request.post(
{
url: "https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-raw-emails",
headers: {
Authorization: "Bearer " + process.env.REINFER_TOKEN,
},
json: true,
body: {
documents: [
{
raw_email: {
body: {
plain:
"Hi Bob,\n\nCould you send me the figures for today?\n\nThanks,\nAlice",
},
headers: {
parsed: {
Date: "Thu, 09 Jan 2020 16:34:45 +0000",
From: "alice@company.com",
"Message-ID": "abcdef@company.com",
References: "<01234@company.com> <56789@company.com>",
Subject: "Figures Request",
To: "bob@organisation.org",
},
},
},
user_properties: { "string:City": "London" },
},
{
raw_email: {
body: {
html: "<p>Alice,</p><p>Here are the figures for today.</p><p>Regards,<br/>Bob</p>",
},
headers: {
raw: "Message-ID: 012345@company.com\nDate: Thu, 09 Jan 2020 16:44:45 +0000\nSubject: Re: Figures Request\nFrom: bob@organisation.org\nTo: alice@company.com",
},
},
user_properties: { "string:City": "Bucharest" },
},
],
include_comments: false,
threshold: 0.25,
transform_tag: "generic.0.CONVKER5",
},
},
function (error, response, json) {
// digest response
console.log(JSON.stringify(json, null, 2));
}
);
import json
import os
import requests
response = requests.post(
"https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-raw-emails",
headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]},
json={
"transform_tag": "generic.0.CONVKER5",
"documents": [
{
"raw_email": {
"headers": {
"parsed": {
"Message-ID": "abcdef@company.com",
"Date": "Thu, 09 Jan 2020 16:34:45 +0000",
"Subject": "Figures Request",
"From": "alice@company.com",
"To": "bob@organisation.org",
"References": "<01234@company.com> <56789@company.com>",
}
},
"body": {
"plain": "Hi Bob,\n\nCould you send me the figures for today?\n\nThanks,\nAlice"
},
},
"user_properties": {"string:City": "London"},
},
{
"raw_email": {
"headers": {
"raw": "Message-ID: 012345@company.com\nDate: Thu, 09 Jan 2020 16:44:45 +0000\nSubject: Re: Figures Request\nFrom: bob@organisation.org\nTo: alice@company.com"
},
"body": {
"html": "<p>Alice,</p><p>Here are the figures for today.</p><p>Regards,<br/>Bob</p>"
},
},
"user_properties": {"string:City": "Bucharest"},
},
],
"threshold": 0.25,
"include_comments": False,
},
)
print(json.dumps(response.json(), indent=2, sort_keys=True))
{
"entities": [
[
{
"capture_ids": [],
"formatted_value": "Bob",
"id": "76aebf2646577a1d",
"kind": "person",
"name": "person",
"probability": null,
"span": {
"char_end": 6,
"char_start": 3,
"content_part": "body",
"message_index": 0,
"utf16_byte_end": 12,
"utf16_byte_start": 6
}
},
{
"capture_ids": [],
"formatted_value": "2020-01-09 00:00 UTC",
"id": "20beddf4c5f5bb61",
"kind": "date",
"name": "date",
"probability": null,
"span": {
"char_end": 48,
"char_start": 43,
"content_part": "body",
"message_index": 0,
"utf16_byte_end": 96,
"utf16_byte_start": 86
}
}
],
[]
],
"model": {
"time": "2020-02-06T20:42:58.047000Z",
"version": 5
},
"predictions": [
[
{
"name": ["Some Label"],
"probability": 0.8896465003490448
},
{
"name": ["Parent Label", "Child Label"],
"probability": 0.26687008142471313,
"sentiment": 0.8762539502232571
}
],
[
{
"name": ["Other Label"],
"probability": 0.6406207121908665
}
]
],
"status": "ok"
}
You have to provide the model version you want to query for predictions in the
request. You can use the integer version number, or the special values live
or
staging
to query the current Live or Staging model version.
Request Format
Name | Type | Required | Description |
---|---|---|---|
transform_tag | string | yes | A tag specifying how the raw data should be processed. |
documents | array<Document> | yes | A batch of at most 4096 documents in the format described below. Larger batches are faster (per document) than smaller ones. |
threshold | number | no | The confidence threshold to filter the label results by. A number between 1.0 and 0.0 . 0.0 will include all results. Set to "auto" to use auto-thresholds. If not set, the default threshold of 0.25 will be used. |
labels | array<Label> | no | A list of requested labels to be returned with optionally label-specific thresholds. |
include_comments | boolean | no | If set to true , the comments parsed from the emails will be returned in the response body. |
Where Document
has the following format:
Name | Type | Required | Description |
---|---|---|---|
raw_email | RawEmail | yes | Email data, in the format described here. |
user_properties | map<string, string | number> | no | Any user-defined metadata that applies to the comment. The format is described here. Note: Some user properties are generated based on the email content. If these conflict with uploaded user properties, the request will fail with 422 Unprocessable Entity . |
Where Label
has the following format:
Name | Type | Required | Description |
---|---|---|---|
name | array<string> | yes | The name of the label to be returned, formatted as a list of hierarchical labels. For instance, the label "Parent Label > Child Label" will have the format ["Parent Label", "Child Label"] . |
threshold | number | no | The confidence threshold to use for the label. If not specified, will default to the threshold specified at the top-level. |
Response Format
Name | Type | Description |
---|---|---|
status | string | ok if the request is successful, or error in case of an error. See the Overview to learn more about error responses. |
comments | array<Comment> | A list of comments parsed from the uploaded raw emails, in the format described in the Comment Reference. Only returned if you set include_comments in the request. |
predictions | array<array<Label>> | A list of array<Label> in the same order as the comments in the request, where each Label has the format described here. |
entities | array<array<Entity>> | A list of array<Entity> in the same order as the comments in the request, where each Entity has the format described here. |
label_properties | array<LabelProperty> | An array containing predicted label properties for this comment, where each LabelProperty has the format described here. |
model | Model | Information about the model that was used to make the predictions, in the format described here. |
Note: For large requests, this endpoint may take longer to respond. You should increase your client timeout.
Get predictions for a pinned model by comment id
- Bash
- Node
- Python
- Response
curl -X POST 'https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-comments' \
-H "Authorization: Bearer $REINFER_TOKEN" \
-H "Content-Type: application/json" \
-d '{
"threshold": 0.25,
"uids": [
"18ba5ce699f8da1f.0001",
"18ba5ce699f8da1f.0002"
]
}'
const request = require("request");
request.post(
{
url: "https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-comments",
headers: {
Authorization: "Bearer " + process.env.REINFER_TOKEN,
},
json: true,
body: {
threshold: 0.25,
uids: ["18ba5ce699f8da1f.0001", "18ba5ce699f8da1f.0002"],
},
},
function (error, response, json) {
// digest response
console.log(JSON.stringify(json, null, 2));
}
);
import json
import os
import requests
response = requests.post(
"https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-comments",
headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]},
json={
"uids": ["18ba5ce699f8da1f.0001", "18ba5ce699f8da1f.0002"],
"threshold": 0.25,
},
)
print(json.dumps(response.json(), indent=2, sort_keys=True))
{
"model": {
"time": "2020-02-06T20:42:58.047000Z",
"version": 5
},
"predictions": [
{
"entities": [
{
"capture_ids": [],
"formatted_value": "Bob",
"id": "76aebf2646577a1d",
"kind": "person",
"name": "person",
"probability": null,
"span": {
"char_end": 6,
"char_start": 3,
"content_part": "body",
"message_index": 0,
"utf16_byte_end": 12,
"utf16_byte_start": 6
}
},
{
"capture_ids": [],
"formatted_value": "2020-01-09 00:00 UTC",
"id": "20beddf4c5f5bb61",
"kind": "date",
"name": "date",
"probability": null,
"span": {
"char_end": 48,
"char_start": 43,
"content_part": "body",
"message_index": 0,
"utf16_byte_end": 96,
"utf16_byte_start": 86
}
}
],
"labels": [
{
"name": ["Some Label"],
"probability": 0.8896465003490448
},
{
"name": ["Parent Label", "Child Label"],
"probability": 0.26687008142471313,
"sentiment": 0.8762539502232571
}
],
"uid": "18ba5ce699f8da1f.0001"
},
{
"entities": [],
"labels": [
{
"name": ["Other Label"],
"probability": 0.6406207121908665
}
],
"uid": "18ba5ce699f8da1f.0002"
}
],
"status": "ok"
}
You have to provide the model version you want to query for predictions in the
request. You can use the integer version number, or the special values live
or
staging
to query the current Live or Staging model version.
Request Format
Name | Type | Required | Description |
---|---|---|---|
uids | array<string> | yes | A list of at most 4096 combined source_id -s and comment_id -s in the format of source_id.comment_id . Sources don't need to belong to the current dataset - so you can request predictions of comments for a source in a different (or no) dataset. Larger lists are faster (per comment) than smaller ones. |
threshold | number | no | The confidence threshold to filter the label results by. A number between 1.0 and 0.0 . 0.0 will include all results. Set to "auto" to use auto-thresholds. If not set, the default threshold of 0.25 will be used. |
labels | array<Label> | no | A list of requested labels to be returned with optionally label-specific thresholds. |
Where Label
has the following format:
Name | Type | Required | Description |
---|---|---|---|
name | array<string> | yes | The name of the label to be returned, formatted as a list of hierarchical labels. For instance, the label "Parent Label > Child Label" will have the format ["Parent Label", "Child Label"] . |
threshold | number | no | The confidence threshold to use for the label. If not specified, will default to the threshold specified at the top-level. |
Response Format
Name | Type | Description |
---|---|---|
status | string | ok if the request is successful, or error in case of an error. See the Overview to learn more about error responses. |
predictions | array<Prediction> | A list of predictions in the format described below. |
model | Model | Information about the model that was used to make the predictions, in the format described here. |
Where Prediction
has the following format:
Name | Type | Description |
---|---|---|
uid | string | A combined source_id and comment_id in the format of source_id.comment_id . |
labels | array<Label> | An array containing predicted labels for this comment, where Label has the format described here. |
entities | array<Entity> | An array containing predicted entities for this comment, where Entity has the format described here. |
label_properties | array<LabelProperty> | An array containing predicted label properties for this comment, where each LabelProperty has the format described here. |
Note: For large requests, this endpoint may take longer to respond. You should increase your client timeout.
Get model validation statistics
- Bash
- Node
- Python
- Response
curl -X GET 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/labellers/live/validation' \
-H "Authorization: Bearer $REINFER_TOKEN"
const request = require("request");
request.get(
{
url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/labellers/live/validation",
headers: {
Authorization: "Bearer " + process.env.REINFER_TOKEN,
},
},
function (error, response, json) {
// digest response
console.log(JSON.stringify(json, null, 2));
}
);
import json
import os
import requests
response = requests.get(
"https://<my_api_endpoint>/api/v1/datasets/project1/collateral/labellers/live/validation",
headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]},
)
print(json.dumps(response.json(), indent=2, sort_keys=True))
{
"status": "ok",
"validation": {
"coverage": 0.9119927883148193,
"dataset_quality": "good",
"labels": [
{
"name": "Notification",
"parts": ["Notification"]
},
{
"name": "Notification > Out of Office",
"parts": ["Notification", "Out of Office"]
},
{
"name": "Notification > Public Holiday",
"parts": ["Notification", "Public Holiday"]
}
],
"mean_average_precision_safe": 0.83,
"num_amber_labels": 1,
"num_labels": 3,
"num_red_labels": 1,
"num_reviewed_comments": 10251,
"version": 5
}
}
This route returns statistics of how well a model is performing. Same statistics
can be viewed in the
Validation page.
A model's statistics can be requested with its integer version
number. You can
use the special values live
and staging
to retrieve statistics for the
current Live or Staging model versions, or the special value latest
for the
most recently available model version.
Although this endpoint accepts both pinned and not pinned model versions, we
recommend querying either pinned model versions or the special value latest
,
as statistics are not guaranteed to be available for not pinned model versions.
The response validation
object contains the following fields:
Name | Type | Description |
---|---|---|
mean_average_precision_safe | float | Mean Average Precision score (between 0.0 and 1.0 ). This field will be null if MAP is unavailable. |
num_labels | number | Number of labels in the taxonomy (at the time the model version was pinned). |
labels | array<Label> | List of labels in the taxonomy (at the time the model version was pinned). Note that, as the response example demonstrates, parent labels are returned as a separate label in addition to being returned as a part of child labels. |
num_reviewed_comments | number | Number of reviewed comments in the dataset (at the time the model version was pinned). |
version | number | Model version. |
num_amber_labels | number | Number of labels in amber warning state. |
num_red_labels | number | Number of labels in red warning state. |
dataset_score | number | Overall dataset score, between 0 and 100 . |
dataset_quality | string | One of "poor" , "average" , "good" , "excellent" , representing the overall dataset quality rank. Can be null if there is not enough data. |
balance | float | A measure of the similarity between reviewed and unreviewed comments (between 0.0 and 1.0 ). Can be null if there is not enough data. |
balance_quality | string | One of "poor" , "average" , "good" , "excellent" , representing the balance quality rank. Can be null if there is not enough data. |
coverage | float | A fractional value of label coverage in the dataset (between 0.0 and 1.0 ). Can be null if there is not enough data. |
coverage_quality | string | One of "poor" , "average" , "good" , "excellent" , representing the coverage quality rank. Can be null if there is not enough data. |
all_labels_quality | string | One of "poor" , "average" , "good" , "excellent" , representing the all labels quality rank. Can be null if there is not enough data. |
underperforming_labels_quality | string | One of "poor" , "average" , "good" , "excellent" , representing the underperforming labels quality rank. Can be null if there is not enough data. |
Where Label
has the following format:
Name | Type | Description |
---|---|---|
name | string | The name of the label, formatted as a string. |
parts | array<string> | The name of the label, formatted as a list of hierarchical labels. For instance, the label "Parent Label > Child Label" will have the format ["Parent Label", "Child Label"] . |