Get short-form image responses

This sample demonstrates how to use the Imagen model to ask a question and get an answer about a supplied image.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample

Java

Before trying this sample, follow the Java setup instructions in the Vertex AI quickstart using client libraries. For more information, see the Vertex AI Java API reference documentation.

To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.


import com.google.api.gax.rpc.ApiException;
import com.google.cloud.aiplatform.v1.EndpointName;
import com.google.cloud.aiplatform.v1.PredictResponse;
import com.google.cloud.aiplatform.v1.PredictionServiceClient;
import com.google.cloud.aiplatform.v1.PredictionServiceSettings;
import com.google.gson.Gson;
import com.google.protobuf.InvalidProtocolBufferException;
import com.google.protobuf.Value;
import com.google.protobuf.util.JsonFormat;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Base64;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;

public class GetShortFormImageResponsesSample {

  public static void main(String[] args) throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "my-project-id";
    String location = "us-central1";
    String inputPath = "/path/to/my-input.png";
    String prompt = ""; // The question about the contents of the image.

    getShortFormImageResponses(projectId, location, inputPath, prompt);
  }

  // Get the short form responses to a question about an image
  public static PredictResponse getShortFormImageResponses(
      String projectId, String location, String inputPath, String prompt)
      throws ApiException, IOException {
    final String endpoint = String.format("%s-aiplatform.googleapis.com:443", location);
    PredictionServiceSettings predictionServiceSettings =
        PredictionServiceSettings.newBuilder().setEndpoint(endpoint).build();

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests.
    try (PredictionServiceClient predictionServiceClient =
        PredictionServiceClient.create(predictionServiceSettings)) {

      final EndpointName endpointName =
          EndpointName.ofProjectLocationPublisherModelName(
              projectId, location, "google", "imagetext@001");

      // Encode image to Base64
      String imageBase64 =
          Base64.getEncoder().encodeToString(Files.readAllBytes(Paths.get(inputPath)));

      // Create the image map
      Map<String, String> imageMap = new HashMap<>();
      imageMap.put("bytesBase64Encoded", imageBase64);

      Map<String, Object> instancesMap = new HashMap<>();
      instancesMap.put("prompt", prompt);
      instancesMap.put("image", imageMap);
      Value instances = mapToValue(instancesMap);

      // Optional parameters
      Map<String, Object> paramsMap = new HashMap<>();
      paramsMap.put("sampleCount", 2);
      Value parameters = mapToValue(paramsMap);

      PredictResponse predictResponse =
          predictionServiceClient.predict(
              endpointName, Collections.singletonList(instances), parameters);

      for (Value prediction : predictResponse.getPredictionsList()) {
        System.out.println(prediction.getStringValue());
      }
      return predictResponse;
    }
  }

  private static Value mapToValue(Map<String, Object> map) throws InvalidProtocolBufferException {
    Gson gson = new Gson();
    String json = gson.toJson(map);
    Value.Builder builder = Value.newBuilder();
    JsonFormat.parser().merge(json, builder);
    return builder.build();
  }
}

Node.js

Before trying this sample, follow the Node.js setup instructions in the Vertex AI quickstart using client libraries. For more information, see the Vertex AI Node.js API reference documentation.

To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

/**
 * TODO(developer): Update these variables before running the sample.
 */
const projectId = process.env.CAIP_PROJECT_ID;
const location = 'us-central1';
const inputFile = 'resources/cat.png';
// The question about the contents of the image.
const prompt = 'What breed of cat is this a picture of?';

const aiplatform = require('@google-cloud/aiplatform');

// Imports the Google Cloud Prediction Service Client library
const {PredictionServiceClient} = aiplatform.v1;

// Import the helper module for converting arbitrary protobuf.Value objects
const {helpers} = aiplatform;

// Specifies the location of the api endpoint
const clientOptions = {
  apiEndpoint: `${location}-aiplatform.googleapis.com`,
};

// Instantiates a client
const predictionServiceClient = new PredictionServiceClient(clientOptions);

async function getShortFormImageResponses() {
  const fs = require('fs');
  // Configure the parent resource
  const endpoint = `projects/${projectId}/locations/${location}/publishers/google/models/imagetext@001`;

  const imageFile = fs.readFileSync(inputFile);
  // Convert the image data to a Buffer and base64 encode it.
  const encodedImage = Buffer.from(imageFile).toString('base64');

  const instance = {
    prompt: prompt,
    image: {
      bytesBase64Encoded: encodedImage,
    },
  };
  const instanceValue = helpers.toValue(instance);
  const instances = [instanceValue];

  const parameter = {
    // Optional parameters
    sampleCount: 2,
  };
  const parameters = helpers.toValue(parameter);

  const request = {
    endpoint,
    instances,
    parameters,
  };

  // Predict request
  const [response] = await predictionServiceClient.predict(request);
  const predictions = response.predictions;
  if (predictions.length === 0) {
    console.log(
      'No responses were generated. Check the request parameters and image.'
    );
  } else {
    predictions.forEach(prediction => {
      console.log(prediction.stringValue);
    });
  }
}
await getShortFormImageResponses();

Python

Before trying this sample, follow the Python setup instructions in the Vertex AI quickstart using client libraries. For more information, see the Vertex AI Python API reference documentation.

To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.


import vertexai
from vertexai.preview.vision_models import Image, ImageTextModel

# TODO(developer): Update and un-comment below lines
# PROJECT_ID = "your-project-id"
# input_file = "input-image.png"
# question = "" # The question about the contents of the image.

vertexai.init(project=PROJECT_ID, location="us-central1")

model = ImageTextModel.from_pretrained("imagetext@001")
source_img = Image.load_from_file(location=input_file)

answers = model.ask_question(
    image=source_img,
    question=question,
    # Optional parameters
    number_of_results=1,
)

print(answers)
# Example response:
# ['tabby']

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.