Datasets | Communications Mining Docs

Get all datasets

GET/api/v1/datasets

Permissions required: View labels

Bash
Node
Python
Response

curl -X GET 'https://<my_api_endpoint>/api/v1/datasets' \
    -H "Authorization: Bearer $REINFER_TOKEN"

const request = require("request");

request.get(
  {
    url: "https://<my_api_endpoint>/api/v1/datasets",
    headers: {
      Authorization: "Bearer " + process.env.REINFER_TOKEN,
    },
  },
  function (error, response, json) {
    // digest response
    console.log(JSON.stringify(json, null, 2));
  }
);

import json
import os

import requests

response = requests.get(
    "https://<my_api_endpoint>/api/v1/datasets",
    headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]},
)

print(json.dumps(response.json(), indent=2, sort_keys=True))

{
  "datasets": [
    {
      "created": "2018-10-15T15:48:49.603000Z",
      "description": "An optional long form description.",
      "has_sentiment": true,
      "id": "18ba5ce699f8da1f",
      "last_modified": "2018-10-15T15:48:49.603000Z",
      "model_family": "english",
      "name": "example",
      "owner": "<project>",
      "source_ids": ["18ba5ce699f8da1f"],
      "title": "An Example Dataset"
    }
  ],
  "status": "ok"
}

Get datasets by project

GET/api/v1/datasets/<project>

Permissions required: View labels

Get a dataset by name

GET/api/v1/datasets/<project>/<dataset_name>

Permissions required: View labels

Bash
Node
Python
Response

curl -X GET 'https://<my_api_endpoint>/api/v1/datasets/<project>/example' \
    -H "Authorization: Bearer $REINFER_TOKEN"

const request = require("request");

request.get(
  {
    url: "https://<my_api_endpoint>/api/v1/datasets/<project>/example",
    headers: {
      Authorization: "Bearer " + process.env.REINFER_TOKEN,
    },
  },
  function (error, response, json) {
    // digest response
    console.log(JSON.stringify(json, null, 2));
  }
);

import json
import os

import requests

response = requests.get(
    "https://<my_api_endpoint>/api/v1/datasets/<project>/example",
    headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]},
)

print(json.dumps(response.json(), indent=2, sort_keys=True))

{
  "dataset": {
    "created": "2018-10-15T15:48:49.603000Z",
    "description": "An optional long form description.",
    "has_sentiment": true,
    "id": "18ba5ce699f8da1f",
    "last_modified": "2018-10-15T15:48:49.603000Z",
    "model_family": "english",
    "name": "example",
    "owner": "<project>",
    "source_ids": ["18ba5ce699f8da1f"],
    "title": "An Example Dataset"
  },
  "status": "ok"
}

Get model tags for a dataset

GET/api/v1/datasets/<project>/<dataset>/model-tags

Permissions required: Model Admin

Bash
Node
Python
Response

curl -X GET 'https://<my_api_endpoint>/api/v1/datasets/<project>/model-tags' \
    -H "Authorization: Bearer $REINFER_TOKEN"

const request = require("request");

request.get(
  {
    url: "https://<my_api_endpoint>/api/v1/datasets/<project>/model-tags",
    headers: {
      Authorization: "Bearer " + process.env.REINFER_TOKEN,
    },
  },
  function (error, response, json) {
    // digest response
    console.log(JSON.stringify(json, null, 2));
  }
);

import json
import os

import requests

response = requests.get(
    "https://<my_api_endpoint>/api/v1/datasets/<project>/model-tags",
    headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]},
)

print(json.dumps(response.json(), indent=2, sort_keys=True))

{
  "model_tags": [
    {
      "name": "prod",
      "updated_at": "2021-11-16T12:31:00.123Z",
      "version": 5
    },
    {
      "name": "staging",
      "updated_at": "2021-11-15T12:30:00.123Z",
      "version": 7
    }
  ],
  "status": "ok"
}

Create a dataset

PUT/api/v1/datasets/<project>/<dataset>

Permissions required: Datasets admin

Bash
Node
Python
Response

curl -X PUT 'https://<my_api_endpoint>/api/v1/datasets/<project>/example' \
    -H "Authorization: Bearer $REINFER_TOKEN" \
    -H "Content-Type: application/json" \
    -d '{
  "dataset": {
    "description": "An optional long form description.",
    "model_family": "english",
    "source_ids": [
      "18ba5ce699f8da1f"
    ],
    "title": "An Example Dataset"
  }
}'

const request = require("request");

request.put(
  {
    url: "https://<my_api_endpoint>/api/v1/datasets/<project>/example",
    headers: {
      Authorization: "Bearer " + process.env.REINFER_TOKEN,
    },
    json: true,
    body: {
      dataset: {
        description: "An optional long form description.",
        model_family: "english",
        source_ids: ["18ba5ce699f8da1f"],
        title: "An Example Dataset",
      },
    },
  },
  function (error, response, json) {
    // digest response
    console.log(JSON.stringify(json, null, 2));
  }
);

import json
import os

import requests

response = requests.put(
    "https://<my_api_endpoint>/api/v1/datasets/<project>/example",
    headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]},
    json={
        "dataset": {
            "title": "An Example Dataset",
            "description": "An optional long form description.",
            "source_ids": ["18ba5ce699f8da1f"],
            "model_family": "english",
        }
    },
)

print(json.dumps(response.json(), indent=2, sort_keys=True))

{
  "dataset": {
    "created": "2018-10-15T15:48:49.603000Z",
    "description": "An optional long form description.",
    "has_sentiment": true,
    "id": "b9a1fd75f6133bce",
    "last_modified": "2018-10-15T15:48:49.603000Z",
    "model_family": "english",
    "name": "example",
    "owner": "<project>",
    "source_ids": ["18ba5ce699f8da1f"],
    "title": "An Example Dataset"
  },
  "status": "ok"
}

Name	Type	Required	Description
`title`	string	no	One-line human-readable title for the dataset.
`description`	string	no	A longer description of the dataset.
`source_ids`	array<string>	no	An array of source ids to be included in this dataset.
`model_family`	string	no	Dataset model family, can be english or german. Defaults to english.
`has_sentiment`	boolean	no	Whether labels in the dataset should be applied with sentiment. Defaults to true.

Update a dataset

POST/api/v1/datasets/<project>/<dataset>

Permissions required: Datasets admin

Bash
Node
Python
Response

curl -X POST 'https://<my_api_endpoint>/api/v1/datasets/<project>/example' \
    -H "Authorization: Bearer $REINFER_TOKEN" \
    -H "Content-Type: application/json" \
    -d '{
  "dataset": {
    "title": "An Alternative Title"
  }
}'

const request = require("request");

request.post(
  {
    url: "https://<my_api_endpoint>/api/v1/datasets/<project>/example",
    headers: {
      Authorization: "Bearer " + process.env.REINFER_TOKEN,
    },
    json: true,
    body: { dataset: { title: "An Alternative Title" } },
  },
  function (error, response, json) {
    // digest response
    console.log(JSON.stringify(json, null, 2));
  }
);

import json
import os

import requests

response = requests.post(
    "https://<my_api_endpoint>/api/v1/datasets/<project>/example",
    headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]},
    json={"dataset": {"title": "An Alternative Title"}},
)

print(json.dumps(response.json(), indent=2, sort_keys=True))

{
  "dataset": {
    "created": "2018-10-15T15:48:49.603000Z",
    "description": "An optional long form description.",
    "has_sentiment": true,
    "id": "b9a1fd75f6133bce",
    "last_modified": "2018-10-15T15:53:08.479000Z",
    "model_family": "english",
    "name": "example",
    "owner": "<project>",
    "source_ids": ["18ba5ce699f8da1f"],
    "title": "An Alternative Title"
  },
  "status": "ok"
}

Name	Type	Required	Description
`title`	string	no	One-line human-readable title for the dataset.
`description`	string	no	A longer description of the dataset.
`source_ids`	array<string>	no	An array of source ids to be included in this dataset.

Delete a dataset

DELETE/api/v1/datasets/<project>/<dataset_name>

Permissions required: Datasets admin

Bash
Node
Python
Response

curl -X DELETE 'https://<my_api_endpoint>/api/v1/datasets/<project>/example' \
    -H "Authorization: Bearer $REINFER_TOKEN"

const request = require("request");

request.delete(
  {
    url: "https://<my_api_endpoint>/api/v1/datasets/<project>/example",
    headers: {
      Authorization: "Bearer " + process.env.REINFER_TOKEN,
    },
  },
  function (error, response, json) {
    // digest response
    console.log(JSON.stringify(json, null, 2));
  }
);

import json
import os

import requests

response = requests.delete(
    "https://<my_api_endpoint>/api/v1/datasets/<project>/example",
    headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]},
)

print(json.dumps(response.json(), indent=2, sort_keys=True))

{
  "status": "ok"
}

Export a dataset

POST/api/v1/datasets/<project>/<dataset_name>/export

Permissions required: Export datasets

Bash
Node
Python
Response

curl -X POST 'https://<my_api_endpoint>/api/v1/datasets/<project>/example/export' \
    -H "Authorization: Bearer $REINFER_TOKEN" \
    -H "Content-Type: application/json" \
    -d '{
  "limit": 1
}'

const request = require("request");

request.post(
  {
    url: "https://<my_api_endpoint>/api/v1/datasets/<project>/example/export",
    headers: {
      Authorization: "Bearer " + process.env.REINFER_TOKEN,
    },
    json: true,
    body: { limit: 1 },
  },
  function (error, response, json) {
    // digest response
    console.log(JSON.stringify(json, null, 2));
  }
);

import json
import os

import requests

response = requests.post(
    "https://<my_api_endpoint>/api/v1/datasets/<project>/example/export",
    headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]},
    json={"limit": 1},
)

print(json.dumps(response.json(), indent=2, sort_keys=True))

{
  "comments": [
    {
      "annotations": {
        "labels": {
          "assigned": [
            {
              "name": "Parent Label",
              "sentiment": "positive"
            },
            {
              "name": "Parent Label > Child Label",
              "sentiment": "positive"
            }
          ]
        }
      },
      "comment": {
        "context": "1596721237668",
        "created_at": "2020-08-06T13:20:28.531000Z",
        "has_annotations": true,
        "id": "0123456789abcdef",
        "last_modified": "2020-08-06T13:40:37.668000Z",
        "messages": [
          {
            "body": {
              "text": "Alice,\n\nHere are the figures for today.\n\nRegards,\nBob"
            },
            "from": "bob@organisation.org",
            "sent_at": "2011-12-11T11:05:10Z",
            "subject": {
              "text": "Today's figures"
            },
            "to": ["alice@company.com"]
          }
        ],
        "source_id": "47194279497e141e",
        "text_format": "plain",
        "thread_id": "123456",
        "timestamp": "2011-12-11T11:05:10Z",
        "uid": "47194279497e141e.0123456789abcdef",
        "user_properties": {
          "string:Recipient Domain": "company.com",
          "string:Sender Domain": "organisation.org"
        }
      },
      "predictions": {
        "labels": [
          {
            "name": "Another Parent Label",
            "probability": 0.954979807138443,
            "sentiment": -0.4281917143125379
          },
          {
            "name": "Another Parent Label > Another Child Label",
            "probability": 0.7726812064647675,
            "sentiment": -0.6603664430231163
          }
        ]
      }
    }
  ],
  "continuation": "2021-02-16T10:55:05Z.c060a787c0b2bbf95526ad5cf28bf582",
  "status": "ok"
}

This route lets you export a dataset. It returns a list of comments with assigned labels and latest available predictions. Other ways to export a dataset are CSV download in the browser and JSONL download via the CLI. For a detailed comparison, see the comparison table.

Request Format

Name	Type	Required	Description
`comment_uids`	array<string>	no	A list of at most 256 comment UIDs (in the format of source_id.comment_id). If provided, only these comments will be included in the response. No other filters may be passed with `comment_uids`.
`source_ids`	array<string>	no	A list of at most 1024 source IDs. If provided, only comments from these sources will be included in the response.
`order_by`	string	no	One of `created_at` or `timestamp`. If provided returns the comments sorted by either the API creation date of the comments (`created_at`), or the user defined comment timestamp (`timestamp`). The default is `timestamp`.
`from`	string	no	An ISO-8601 timestamp. If provided, returns comments only from this timestamp onwards. The related `order_by` field controls which timestamp will be used for filtering.
`to`	string	no	An ISO-8601 timestamp. If provided, returns comments only until this timestamp (inclusive). The related `order_by` field controls which timestamp will be used for filtering.
`continuation`	string	no	Pagination token (provided in the response). Should be used to fetch the next `limit` number of comments.
`limit`	number	no	Number of comments returned per response up to a maximum of 256. Default: 64.

Response Format

Name	Type	Description
`comments`	array<Comment>	A list of comments with their assigned and predicted labels.
`continuation`	string	Pagination token to fetch the next `limit` number of comments. If there are no further comments, this field will not be present in the response.

Where Comment has the following format:

Name	Type	Description
`comment`	object	Comment object. The format is described in the Comment Reference.
`annotations`	object	An object containing a single field `labels.assigned` which is a list of labels assigned to this comment. The format is described in the Label Reference - note that it won't include predictions as these labels are assigned, not predicted.
`predictions`	object	An object containing a single field `labels` which is a list of labels predicted for this comment. The format is described in the Label Reference.

Get all datasets​

Get datasets by project​

Get a dataset by name​

Get model tags for a dataset​

Create a dataset​

Update a dataset​

Delete a dataset​

Export a dataset​

Request Format​

Response Format​

Get all datasets

Get datasets by project

Get a dataset by name

Get model tags for a dataset

Create a dataset

Update a dataset

Delete a dataset

Export a dataset

Request Format

Response Format