ChatGemini

Installation

Install peer dependencies:

npm install @google/generative-ai

Add Environment Variables

.env

GEMINI_API_KEY = "YOUR_SAMPLE_API_KEY";
/* You can get one from - https://aistudio.google.com/app/apikey */

Copy the code

Add the following code to your utils/chatGemini.ts file:

chatGemini.ts

import {
  Content,
  CountTokensResponse,
  GenerateContentStreamResult,
  GenerativeModel,
  GoogleGenerativeAI,
  RequestOptions,
} from "@google/generative-ai";
import {
  FileState,
  GoogleAIFileManager,
  UploadFileResponse,
} from "@google/generative-ai/server";
interface ChatGeminiArgs {
  apiKey: string;
  model: string;
}
 
interface GeminiChatProps {
  prompt: string;
  stream?: boolean;
  systemInstruction?: string;
  options?: RequestOptions;
  context?: string;
  outputFormat?: string;
  file?:
    | Record<"path" | "mimetype", string>[]
    | Record<"link" | "mimetype", string>[];
}
 
interface GeminiChatWithHistoryProps extends GeminiChatProps {
  chatHistory: Record<"model" | "user", string>[];
  maxOutputTokens?: number;
}
 
interface ChatGeminiResponse {
  content: string;
  promptTokenCount: number | null;
  candidatesTokenCount: number | null;
}
 
interface ChatGeminiHistoryCountTokensProps {
  chatHistory: Record<"model" | "user", string>[];
  options?: RequestOptions;
}
 
interface ChatGeminiCountTokensProps {
  prompt: string;
  options?: RequestOptions;
  context?: string;
  outputFormat?: string;
  file?:
    | Record<"path" | "mimetype", string>[]
    | Record<"link" | "mimetype", string>[];
}
 
export class ChatGemini {
  private client: GoogleGenerativeAI;
  private fileManager: GoogleAIFileManager;
  private model: string;
 
  constructor(props: ChatGeminiArgs) {
    const { apiKey, model } = props;
    if (!apiKey || apiKey.length === 0) {
      throw new Error("No API key provided for Gemini AI.");
    }
    this.client = new GoogleGenerativeAI(apiKey);
    this.fileManager = new GoogleAIFileManager(apiKey);
    this.model = model;
  }
 
  async chat(
    props: GeminiChatProps
  ): Promise<ChatGeminiResponse | GenerateContentStreamResult> {
    const {
      prompt,
      stream = false,
      options,
      systemInstruction,
      context,
      outputFormat,
      file,
    } = props;
 
    const client = this.createClient(this.model, systemInstruction, options);
    const content = this.createContext(prompt, context, outputFormat);
 
    const fileParts =
      file && file.length > 0 ? await this.fileUpload(file) : [];
 
    if (stream) {
      return await client.generateContentStream([content, ...fileParts]);
    }
 
    const result = await client.generateContent([content, ...fileParts]);
    const response = await result.response;
    const text = await response.text();
 
    return {
      content: text.replace("```json\n", "").replace("\n```", "") ?? "",
      promptTokenCount: response?.usageMetadata?.promptTokenCount ?? null,
      candidatesTokenCount:
        response?.usageMetadata?.candidatesTokenCount ?? null,
    };
  }
 
  async chatWithHistory(
    props: GeminiChatWithHistoryProps
  ): Promise<ChatGeminiResponse | GenerateContentStreamResult> {
    const {
      prompt,
      stream = false,
      options,
      systemInstruction,
      context,
      outputFormat,
      chatHistory,
      maxOutputTokens,
    } = props;
 
    const client = this.createClient(this.model, systemInstruction, options);
 
    const content = this.createContext(prompt, context, outputFormat);
 
    const history = this.createChatHistory(chatHistory);
 
    const chat = client.startChat({
      history,
      generationConfig: {
        maxOutputTokens,
      },
    });
 
    if (!stream) {
      const result = await chat.sendMessage(content);
      const response = await result.response;
      const text = await response.text();
 
      return {
        content: text.replace("```json\n", "").replace("\n```", "") ?? "",
        promptTokenCount: response?.usageMetadata?.promptTokenCount ?? null,
        candidatesTokenCount:
          response?.usageMetadata?.candidatesTokenCount ?? null,
      };
    }
 
    const result = await chat.sendMessageStream(content);
    return result;
  }
 
  async countChatHistoryTokens(
    props: ChatGeminiHistoryCountTokensProps
  ): Promise<CountTokensResponse> {
    const { options, chatHistory } = props;
 
    const client = this.createClient(this.model, undefined, options);
    const history = this.createChatHistory(chatHistory);
    const chat = client.startChat({
      history,
    });
 
    const countResult = await client.countTokens({
      generateContentRequest: { contents: await chat.getHistory() },
    });
 
    return countResult;
  }
 
  async countTokens(
    props: ChatGeminiCountTokensProps
  ): Promise<CountTokensResponse> {
    const { prompt, context, outputFormat, file, options } = props;
 
    const client = this.createClient(this.model, undefined, options);
 
    const content = this.createContext(prompt, context, outputFormat);
 
    const fileParts = file ? await this.fileUpload(file) : [];
    const result = await client.countTokens([content, ...fileParts]);
 
    return result;
  }
 
  private createChatHistory(
    chatHistory: Record<"model" | "user", string>[]
  ): Content[] {
    return chatHistory.flatMap((entry) => [
      { role: "user" as const, parts: [{ text: entry.user }] },
      { role: "model" as const, parts: [{ text: entry.model }] },
    ]);
  }
 
  private createClient(
    model: string,
    systemInstruction?: string,
    options?: RequestOptions
  ): GenerativeModel {
    return this.client.getGenerativeModel({
      model,
      systemInstruction,
      ...options,
    });
  }
 
  private createContext(
    prompt: string,
    context?: string,
    outputFormat?: string
  ): string {
    let content = prompt;
 
    if (context) {
      content += `\ncontext:\n${context}`;
    }
 
    if (outputFormat) {
      content += `\noutput format:\n${outputFormat}`;
    }
 
    return content;
  }
 
  private async fileUpload(
    file:
      | Record<"path" | "mimetype", string>[]
      | Record<"link" | "mimetype", string>[]
  ) {
    const fileUploads = file.some((f) => "path" in f)
      ? await Promise.all(
          (file as Record<"path" | "mimetype", string>[]).map((img) =>
            this.handleFileUpload(img.path, img.mimetype)
          )
        )
      : [];
 
    const localFileParts = await Promise.all(
      fileUploads.map(async (upload) => ({
        fileData: {
          fileUri: upload.file.uri,
          mimeType: upload.file.mimeType,
        },
      }))
    );
 
    const onlineFileParts = file.some((f) => "link" in f)
      ? await Promise.all(
          (file as Record<"link" | "mimetype", string>[]).map(async (img) => {
            const response = await fetch(img.link);
            const buffer = await response.arrayBuffer();
 
            return {
              inlineData: {
                data: Buffer.from(buffer).toString("base64"),
                mimeType: img.mimetype,
              },
            };
          })
        )
      : [];
 
    const fileParts = [...localFileParts, ...onlineFileParts];
 
    return fileParts;
  }
 
  private async handleFileUpload(
    path: string,
    mimeType: string
  ): Promise<UploadFileResponse> {
    const uploadResult = await this.fileManager.uploadFile(path, {
      mimeType: mimeType,
    });
    let file = await this.fileManager.getFile(uploadResult.file.name);
 
    while (file.state === FileState.PROCESSING) {
      process.stdout.write(".");
 
      await new Promise((resolve) => setTimeout(resolve, 5000));
      file = await this.fileManager.getFile(uploadResult.file.name);
    }
 
    if (file.state === FileState.FAILED) {
      throw new Error("Media processing failed.");
    }
    return uploadResult;
  }
}

Usage

Initialize client

Initialize the ChatGemini client.

import { ChatGemini } from "@/utils/chatGemini";
 
const chatGemini = new ChatGemini({
  apiKey: process.env.GEMINI_API_KEY as string,
  model: "gemini-2.0-flash",
});

Simple Chat

Sample chat with a simple single prompt.

const responseText = await chatGemini.chat({
  prompt: "Who are you? What's your name?",
});
 
console.log(responseText);
/**
{
  "content": "I am a large language model, trained by Google. I do not have a name. \n\nYou can think of me as a sophisticated AI assistant, capable of understanding and responding to a wide range of prompts and questions. I can:\n\n* Provide information on various topics\n* Generate creative text formats\n* Translate languages\n* Write different kinds of creative content\n\nI am still under development, and I am always learning new things. 😊\n",
  "promptTokenCount": 11,
  "candidatesTokenCount": 90
}
**/

Multi Conversational chat

Sample chats involving addition of chat history.

const responseTextWithHistory = await chatGemini.chatWithHistory({
  chatHistory: [
    { user: "Hello", model: "Hi there! How can I help you today?" },
    {
      user: "My name is John Doe and I have 2 dogs!",
      model: "Sure, I will remember that!",
    },
  ],
  prompt: "What's my name? How many dogs?",
});
 
console.log(responseTextWithHistory);
 
/**
{
  "content": "Your name is John Doe and you have 2 dogs! 🐶🐶 \n\nIs there anything else I can help you with? 😊 \n",
  "promptTokenCount": 46,
  "candidatesTokenCount": 28
}
**/

Defining Context and other optional parameters

const responseTextWithContext = await chatGemini.chatWithHistory({
  systemInstruction: "You are a helpful language translator. Your name is AgentGenesis!.",
  chatHistory: [
    { user: "Hello", model: "Hi there! How can I help you today?" },
    {
      user: "My name is John Doe and I have 2 dogs!",
      model: "Sure, I will remember that!",
    },
  ],
  context: "John Doe is an AI developer who loves dogs.",
  outputFormat: `{description: "",your_name : "",my_name : "", dog_count : ""}`,
  prompt: "Who are you? What's your name and my name? How many dogs?",
});
 
console.log(responseTextWithContext);
 
/**
{
  "content": "{\n  \"description\": \"I am a helpful language translator, here to assist you.\",\n  \"your_name\": \"AgentGenesis\",\n  \"my_name\": \"John Doe\",\n  \"dog_count\": 2\n} \n",
  "promptTokenCount": 104,
  "candidatesTokenCount": 57
}
**/

Multimodal Chat

ChatGemini supports multimodal inputs, allowing you to combine text with media files.
Check out the supported file types on the Google Support page.

Example: Using Public URLs

The following examples demonstrate how to send single and multiple files using public URLs.

const responseTextWithSingleFile = await chatGemini.chat({
  prompt: "Summarize all the files provided.",
  file: [
    {
      link: "https://files.eric.ed.gov/fulltext/EJ1172284.pdf",
      mimetype: "application/pdf",
    },
  ],
});
 
const responseTextWithMultiFiles = await chatGemini.chat({
  prompt: "Summarize all the files provided.",
  file: [
    {
      link: "https://files.eric.ed.gov/fulltext/EJ1172284.pdf",
      mimetype: "application/pdf",
    },
    {
      link: "https://thumbs.dreamstime.com/z/woman-has-busy-day-work-254938010.jpg",
      mimetype: "image/jpeg",
    },
  ],
});

Example: Using Local File Paths

You can also provide local file paths instead of URLs.

const responseTextWithLocalFile = await chatGemini.chat({
  prompt: "Summarize all the files provided.",
  file: [
    {
      path: "./public/sample.pdf",
      mimetype: "application/pdf",
    },
  ],
});

Streaming text

Sample example of creating and handling stream responses.

const responseTextStreaming = await chatGemini.chatWithHistory({
  systemInstruction: "You are a helpful language translator. Your name is AgentGenesis!.",
  chatHistory: [
    { user: "Hello", model: "Hi there! How can I help you today?" },
    {
      user: "My name is John Doe and I have 2 dogs!",
      model: "Sure, I will remember that!",
    },
  ],
  context: "John Doe is an AI developer who loves dogs.",
  prompt: "Who are you? What's your name and my name? How many dogs?",
  stream: true,
});
 
// @ts-ignore
for await (const chunk of responseTextStreaming.stream) {
  const chunkText = chunk.text();
  process.stdout.write(chunkText);
}
 
/**
Hi John! I'm AgentGenesis, your friendly AI assistant. I know you love dogs and have two of them! 🐶🐶
Is there anything else I can help you with today?
**/
 
const responseTextWithStream = await chatGemini.chat({
  prompt: "Who are you? What's your name? Tell something about me.",
  systemInstruction: "You are a helpful language translator. Your name is AgentGenesis!.",
  context: "John Doe is an AI developer who loves dogs.",
  stream: true,
});
 
// @ts-ignore
for await (const chunk of responseTextWithStream.stream) {
  const chunkText = chunk.text();
  process.stdout.write(chunkText);
}
 
/**
It seems you're asking about John Doe, an AI developer with a passion for dogs! That's pretty cool! It sounds like you might be interested in AI development and have a soft spot for furry friends.
Is there anything else I can help you with today?
**/

Counting Tokens

Sample example of counting input tokens, including prompts, and chat histories.

// Counting tokens with single prompts, context etc.
 
const countTokens = await chatGemini.countTokens({
  context: "John Doe is an AI developer who loves dogs.",
  prompt: "Who are you? What's your name and my name? How many Dogs?",
});
 
console.log(countTokens);
 
/*
{
  "totalTokens": 32
}
*/
 
// Counting tokens with chat history
 
const countHistoryTokens = await chatGemini.countChatHistoryTokens({
  chatHistory: [
    { user: "Hello", model: "Hi there! How can I help you today?" },
    {
      user: "My name is John Doe and I have 2 dogs!",
      model: "Sure, I will remember that! Have a good day!",
    },
  ],
});
 
console.log(countHistoryTokens);
 
/*
{
  "totalTokens": 39
}
*/

Props

chat

Prop	Type	Description	Default
prompt	string	Prompt provided by user	""
context	string?	Additional context user wants to provide	""
stream	boolean?	If yes, there will be stream output of texts	false
outputFormat	string?	Particular format in which the user wants their output	""
chatHistory	optional	Conversational history	""
systemInstruction	RequestOptions?	Any specific instruction to the system user wants to feed	""
file	optional	File path	[]
options	RequestOptions?	Additional args as per Google Gemini docs

chatWithHistory

Prop	Type	Description	Default
prompt	string	Prompt provided by user.	""
context	string?	Additional context user wants to provide.	""
stream	boolean?	If yes, there will be stream output of texts.	false
outputFormat	string?	Particular format in which the user wants their output.	""
chatHistory	optional	Conversational history.	""
systemInstruction	RequestOptions?	Any specific instruction to the system user wants to feed.	""
maxOutputTokens	number?	Specifying the limitation of output tokens.	""
options	RequestOptions?	Additional args as per Google Gemini docs.

countChatHistoryTokens

Prop	Type	Description	Default
chatHistory	required	Conversational history.	""
options	RequestOptions?	Additional args as per Google Gemini docs.

countTokens

Prop	Type	Description	Default
prompt	string	Prompt provided by user	""
options	RequestOptions?	Additional args as per Google Gemini docs
context	string?	Additional context user wants to provide	""
outputFormat	string?	Particular format in which the user wants their output	""
file	optional	File path	[]

Credits

This component is built on top of Gemini AI's node sdk