Source: pipelines/basic_pipelines/ingest_a_file.mjs

import { Pipeline } from "../pipeline.mjs";
import { FileConverterService, DocumentsIngestService, InputConfig } from "../../services/index.mjs";


/**
 * Get the filename only out of the given file
 * @private
 * @param {Blob} file - The file that is being converted to text and saved to Soffos db.
 * @returns {string}
 */
function get_filename(file) {
    return file.name.split('.')[0];
}


/**
 * Given a file path, upload the file to Soffos and get its reference document_id in addition to the 
 * converted text.
 * @class
 * @alias _SoffosPipelines.FileIngestPipeline
 */
class FileIngestPipeline extends Pipeline {
    /**
     * @param {string} [name] - The name of this pipeline. Will be used to reference this pipeline
     *  if this pipeline is used as a Node inside another pipeline.
     * @param {Object} [kwargs] - Include other needed properties like apiKey
     */
    constructor(name=null, kwargs={}) {
        const file_converter = new FileConverterService(kwargs);
        file_converter.setInputConfigs(
            "file_converter",
            new InputConfig("user_input", "file"),
            new InputConfig("user_input", "normalize")
        );
        const document_ingest = new DocumentsIngestService(kwargs);
        document_ingest.setInputConfigs(
            "doc_ingest",
            new InputConfig("user_input", "file", get_filename),
            new InputConfig("file_converter", "text")
        );
        return super([file_converter, document_ingest], false, name, kwargs);
    }

    /**
     * Start the pipeline processes.
     * @param {string} user - The ID of the user accessing the Soffos API. Soffos assumes that the owner of
     * the api is an application (app) and that app has users. Soffos API will accept any string.
     * @param {Blob} file - The byte stream of the file. The file should not exceed 50Mb in size.
     * @param {string} [normalize='0'] - Whether to perform normalization.
     * @param {string} [execution_code=null] - If this process should be tracked so it can be
     * terminated via terminate() method, execution_code should be provided to reference this pipeline call.
     * @returns {Promise<object>}
     * - An object containing the results of the file conversion and its reference document_id<br>
     * {<br>
     *  file_converter: {"text":<text content>, "tagged_elements": <extracted text snippets and their tags>}<br>
     *  doc_ingest: {document_id: <reference ID of this document's contents>}<br>
     * }
     * @example
     * // provided you have a file input with id="myFile", a text input with id="executionCode",
     * // and a <pre> element with id="response1":
     * import {SoffosPipelines} from "soffosai";
     * 
     * async function fileIngest() {
     *     response1.textContent = "";
     *     const file1 = document.getElementById("myFile").files[0];
     *     const execution_code = document.getElementById("executionCode").value;
     *     let pipe = new SoffosPipelines.FileIngestPipeline("my_pipe", {apiKey: my_apiKey});
     *     let response = await pipe.call("client_id", file1, 0, execution_code);
     *     response1.textContent = JSON.stringify(response, null, 2);
     * }
     */
    async call(user, file, normalize='0', execution_code=null) {
        let payload = {
            "user": user,
            "file": file,
            "normalize": normalize,
            "execution_code": execution_code
        }
        return await this.run(payload);
    }

}


export default FileIngestPipeline;