How to get outer key in a protobuf

Question:

I am reading data from 2 proto files:

file.proto: this is a wrapper

file2.proto: this has all the columns

file.proto:

syntax = "proto3";

package com.oracle;

import "file2.proto";

option go_package = "github.com/cle/sdk/go_sdk";

// This is the inbound message intended to inform the Oracle of new answers to be persisted
message AnswerUpdateRequest {
  Entity entity = 1;
  repeated Answer answers = 2;
}

// This is the outbound message informing Oracle subscribers of new answers
message AnswersUpdated {
  Entity entity = 1;
  repeated Answer answers = 2;
}

file2.proto:

syntax = "proto3";

package com.oracle;

import "google/protobuf/timestamp.proto";

option go_package = "github.com/embroker/oracle/sdk/go_sdk";

message Entity {
  Type type = 1;
  string id = 2;

  enum Type {
    ORGANIZATION = 0;
    USER = 1;
    APPLICATION = 2;
  }
}

message AnswerSource {
  Type type = 1;
  string id = 2;

  enum Type {
    UNKNOWN = 0;
    USER = 1;
    DOCUMENT = 2;
    EXTERNAL = 3;
  }
}

message Answer {
  string key = 1;
  AnswerSource source = 2;
  google.protobuf.Timestamp provided_at = 3;
  google.protobuf.Timestamp received_at = 4;
  AnswerFieldType type = 5;
  Value value = 6;

  message Value {
    oneof value {
      string text = 1;
      float decimal = 2;
      // ...
    }
  }
}

enum AnswerFieldType {
  ANSWER_FIELD_TYPE_UNSTRUCTURED = 0; // Can be useful for LLM purposes
  ANSWER_FIELD_TYPE_TEXT = 1;
  ANSWER_FIELD_TYPE_INTEGER = 2;
  ANSWER_FIELD_TYPE_BOOLEAN = 3;
  ANSWER_FIELD_TYPE_DECIMAL = 4;
  ANSWER_FIELD_TYPE_DATE = 5;
  ANSWER_FIELD_TYPE_ADDRESS = 6;
}

My python function to map to proto

import file.proto
import file2.proto
def create_answer_update_request(json_data):
    data = json_data
    answer_update_request = events_pb2.AnswerUpdateRequest()
    
    entity = answer_update_request.entity
    entity.type = model_pb2.Entity.Type.Value(data["answerUpdateRequest"]["entity"]["type"])
    entity.id = data["answerUpdateRequest"]["entity"]["id"]

    for answer_data in data["answerUpdateRequest"]["answers"]:
        answer = Answer()
        answer.key = answer_data['key']

        source = AnswerSource()
        source.type = AnswerSource.Type.Value(answer_data['source']['type'])
        source.id = answer_data['source']['id']
        answer.source.CopyFrom(source)

        provided_at_datetime = datetime.fromisoformat(answer_data['provided_at'])
        answer.provided_at.FromDatetime(provided_at_datetime)
        received_at_datetime = datetime.fromisoformat(answer_data['received_at'])
        answer.received_at.FromDatetime(received_at_datetime)
        answer.type = AnswerFieldType.Value(f"ANSWER_FIELD_TYPE_{answer_data['type']}")
        value = Answer.Value()
        value.text = answer_data['value']['text']
        answer.value.CopyFrom(value)

        answer_update_request.answers.append(answer)
    return answer_update_request.SerializeToString()

While deserializing data I am not getting wrapper:

Expected output:

{
  "answerUpdateRequest": {
    "entity": {
      "type": "ORGANIZATION",
      "id": "UU12334ID"
    },
    "answers": [
      {
        "key": "legal_company_name",
        "source": {
          "type": "DOCUMENT",
          "id": "3ea20f68e73ec | DocumentType.application"
        },
        "provided_at": "2024-05-02T15:54:15.941988",
        "received_at": "2024-05-02T15:54:15.945350",
        "type": "TEXT",
        "value": {
          "text": "Cicne Law, LLC"
        }
      },
      {
        "key": "company_website_ind",
        "source": {
          "type": "DOCUMENT",
          "id": "3ea20440-83fb-43c0-b409-1dd8f68e73ec | DocumentType.application"
        },
        "provided_at": "2024-05-02T15:54:15.941988",
        "received_at": "2024-05-02T15:54:15.945365",
        "type": "BOOLEAN",
        "value": {
          "text": "Yes"
        }
      
    ]
  }
  
}

Error:
I am not getting "answerUpdateRequest" " in the final output, rest everthing is working for me as expected how to get this?

Asked By: Xi12

||

Answers:

The Protobuf sources (schemas) you include reference package com.oracle.

If these are indeed Oracle Protobuf sources, it would be better for you to generate using Oracle’s public repo and reference them as 3rd-party sources.

I think your code could be simplified:

  1. JSON is wrapped in "answerUpdateRequest" but the Message that includes this is missing
  2. Enum should be represented by numbers in JSON
  3. Per the other answer, the use of invalid times ("2024-05-02T15:54:15.941988")

I create a wrapper Message:

foo.proto:

syntax = "proto3";

package com.oracle;

import "file.proto";

message Foo {
    AnswerUpdateRequest answer_update_request = 1;
}

And:

protoc 
--python_out=${PWD} 
--pyi_out=${PWD} 
file.proto 
file2.proto 
foo.proto

If you were to use the following tweaks to the JSON:

data = '''{
  "answerUpdateRequest": {
    "entity": {
      "type": 0,
      "id": "UU12334ID"
    },
    "answers": [
      {
        "key": "legal_company_name",
        "source": {
          "type": 2,
          "id": "3ea20f68e73ec | DocumentType.application"
        },
        "provided_at": "2024-05-02T15:54:15.941988Z",
        "received_at": "2024-05-02T15:54:15.945350Z",
        "type": 1,
        "value": {
          "text": "Cicne Law, LLC"
        }
      },
      {
        "key": "company_website_ind",
        "source": {
          "type": 2,
          "id": "3ea20440-83fb-43c0-b409-1dd8f68e73ec | DocumentType.application"
        },
        "provided_at": "2024-05-02T15:54:15.941988Z",
        "received_at": "2024-05-02T15:54:15.945365Z",
        "type": 3,
        "value": {
          "text": "Yes"
        }
      }
    ]
  }
}
'''

Then:

import json

import foo_pb2
import file_pb2
import file2_pb2

from google.protobuf import json_format

j=json.loads(data)
m1 = foo_pb2.Foo()
json_format.Parse(data,m1)
print(m1)

Yields a protobuf message (!) (entity.type is omitted because it is the default value 0|ORGANIZATION):

  entity {
    id: "UU12334ID"
  }
  answers {
    key: "legal_company_name"
    source {
      type: DOCUMENT
      id: "3ea20f68e73ec | DocumentType.application"
    }
    provided_at {
      seconds: 1714665255
      nanos: 941988000
    }
    received_at {
      seconds: 1714665255
      nanos: 945350000
    }
    type: ANSWER_FIELD_TYPE_TEXT
    value {
      text: "Cicne Law, LLC"
    }
  }
  answers {
    key: "company_website_ind"
    source {
      type: DOCUMENT
      id: "3ea20440-83fb-43c0-b409-1dd8f68e73ec | DocumentType.application"
    }
    provided_at {
      seconds: 1714665255
      nanos: 941988000
    }
    received_at {
      seconds: 1714665255
      nanos: 945365000
    }
    type: ANSWER_FIELD_TYPE_BOOLEAN
    value {
      text: "Yes"
    }
  }
}

And:

import json

import foo_pb2
import file_pb2
import file2_pb2

from google.protobuf import json_format

m2 = foo_pb2.Foo(
    answer_update_request=file_pb2.AnswerUpdateRequest(
      entity=file2_pb2.Entity(
          type=file2_pb2.Entity.ORGANIZATION,
          id="UU12334ID",
      ),
    ),
)
a1 = file2_pb2.Answer(
    key="legal_company_name",
    source=file2_pb2.AnswerSource(
        type=file2_pb2.AnswerSource.DOCUMENT,
        id="3ea20f68e73ec | DocumentType.application",
    ),
    type=file2_pb2.ANSWER_FIELD_TYPE_TEXT,
    value=file2_pb2.Answer.Value(
        text="Cicne Law, LLC",
    ),
)
a1.provided_at.FromJsonString("2024-05-02T15:54:15.941988Z"),
a1.received_at.FromJsonString("2024-05-02T15:54:15.945350Z"),

a2 = file2_pb2.Answer(
    key="company_websited_ind",
    source=file2_pb2.AnswerSource(
        type=file2_pb2.AnswerSource.DOCUMENT,
        id="3ea20440-83fb-43c0-b409-1dd8f68e73ec | DocumentType.application",
    ),
    type=file2_pb2.ANSWER_FIELD_TYPE_BOOLEAN,
    value=file2_pb2.Answer.Value(
        text="Yes",
    ),
)
a2.provided_at.FromJsonString("2024-05-02T15:54:15.941988Z"),
a2.received_at.FromJsonString("2024-05-02T15:54:15.945350Z"),

m2.answer_update_request.answers.extend([
    a1,
    a2,
])
print(json_format.MessageToJson(m2,always_print_fields_with_no_presence=True))

Yields the JSON:

{
  "answerUpdateRequest": {
    "entity": {
      "id": "UU12334ID",
      "type": "ORGANIZATION"
    },
    "answers": [
      {
        "key": "legal_company_name",
        "source": {
          "type": "DOCUMENT",
          "id": "3ea20f68e73ec | DocumentType.application"
        },
        "providedAt": "2024-05-02T15:54:15.941988Z",
        "receivedAt": "2024-05-02T15:54:15.945350Z",
        "type": "ANSWER_FIELD_TYPE_TEXT",
        "value": {
          "text": "Cicne Law, LLC"
        }
      },
      {
        "key": "company_websited_ind",
        "source": {
          "type": "DOCUMENT",
          "id": "3ea20440-83fb-43c0-b409-1dd8f68e73ec | DocumentType.application"
        },
        "providedAt": "2024-05-02T15:54:15.941988Z",
        "receivedAt": "2024-05-02T15:54:15.945350Z",
        "type": "ANSWER_FIELD_TYPE_BOOLEAN",
        "value": {
          "text": "Yes"
        }
      }
    ]
  }
}

NOTE You’d need to revise AnswerFieldType to get values of TEXT instead of ANSWER_FIELD_TYPE_TEXT.

Answered By: DazWilkin
Categories: questions Tags: ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.