Module langbrainscore.encoder.brain

Expand source code
import langbrainscore
import xarray as xr
from langbrainscore.interface.encoder import _Encoder, EncoderRepresentations


class BrainEncoder(_Encoder):
    """
    This class is used to extract the relevant contents of a given
    `langbrainscore.dataset.Dataset` object and maintains the Encoder interface.
    """

    def __init__(
        self, measurement: str = "unknown", aggregate_time: bool = False
    ) -> "BrainEncoder":
        """Initialize a BrainEncoder

        Args:
            modality (str, optional): The modality/type of human data. Defaults to None.
            aggregate_time (bool, optional): Whether we should aggregate timeid dimension of the
                data during encoding. Defaults to False.

        Returns:
            BrainEncoder: _description_
        """
        self._measurement = measurement
        self._aggregate_time = aggregate_time

    def encode(
        self,
        dataset: langbrainscore.dataset.Dataset,
    ) -> EncoderRepresentations:
        """
        returns human measurements related to stimuli (passed in as a Dataset)

        Args:
            langbrainscore.dataset.Dataset: brain dataset object

        Returns:
            xr.DataArray: contents of brain dataset
        """
        self._check_dataset_interface(dataset)
        if self._aggregate_time:
            dim = "timeid"
            return (
                dataset.contents.mean(dim)
                .expand_dims(dim, 2)
                .assign_coords({dim: (dim, [0])})
            )

        if "measurement" in dataset.contents.attrs:
            self._measurement = dataset.contents.attrs["measurement"]

        # return dataset.contents
        return EncoderRepresentations(
            dataset=dataset,
            representations=dataset.contents,
            model_id=self._measurement,
            emb_aggregation=None,
            emb_preproc=(),
            include_special_tokens=None,
            context_dimension=None,
            bidirectional=False,
        )

Classes

class BrainEncoder (measurement: str = 'unknown', aggregate_time: bool = False)

This class is used to extract the relevant contents of a given langbrainscore.dataset.Dataset object and maintains the Encoder interface.

Initialize a BrainEncoder

Args

modality : str, optional
The modality/type of human data. Defaults to None.
aggregate_time : bool, optional
Whether we should aggregate timeid dimension of the data during encoding. Defaults to False.

Returns

BrainEncoder
description
Expand source code
class BrainEncoder(_Encoder):
    """
    This class is used to extract the relevant contents of a given
    `langbrainscore.dataset.Dataset` object and maintains the Encoder interface.
    """

    def __init__(
        self, measurement: str = "unknown", aggregate_time: bool = False
    ) -> "BrainEncoder":
        """Initialize a BrainEncoder

        Args:
            modality (str, optional): The modality/type of human data. Defaults to None.
            aggregate_time (bool, optional): Whether we should aggregate timeid dimension of the
                data during encoding. Defaults to False.

        Returns:
            BrainEncoder: _description_
        """
        self._measurement = measurement
        self._aggregate_time = aggregate_time

    def encode(
        self,
        dataset: langbrainscore.dataset.Dataset,
    ) -> EncoderRepresentations:
        """
        returns human measurements related to stimuli (passed in as a Dataset)

        Args:
            langbrainscore.dataset.Dataset: brain dataset object

        Returns:
            xr.DataArray: contents of brain dataset
        """
        self._check_dataset_interface(dataset)
        if self._aggregate_time:
            dim = "timeid"
            return (
                dataset.contents.mean(dim)
                .expand_dims(dim, 2)
                .assign_coords({dim: (dim, [0])})
            )

        if "measurement" in dataset.contents.attrs:
            self._measurement = dataset.contents.attrs["measurement"]

        # return dataset.contents
        return EncoderRepresentations(
            dataset=dataset,
            representations=dataset.contents,
            model_id=self._measurement,
            emb_aggregation=None,
            emb_preproc=(),
            include_special_tokens=None,
            context_dimension=None,
            bidirectional=False,
        )

Ancestors

  • langbrainscore.interface.encoder._Encoder
  • langbrainscore.interface.cacheable._Cacheable
  • typing.Protocol
  • typing.Generic
  • abc.ABC

Methods

def encode(self, dataset: Dataset) ‑> EncoderRepresentations

returns human measurements related to stimuli (passed in as a Dataset)

Args

langbrainscore.dataset.Dataset: brain dataset object

Returns

xr.DataArray
contents of brain dataset
Expand source code
def encode(
    self,
    dataset: langbrainscore.dataset.Dataset,
) -> EncoderRepresentations:
    """
    returns human measurements related to stimuli (passed in as a Dataset)

    Args:
        langbrainscore.dataset.Dataset: brain dataset object

    Returns:
        xr.DataArray: contents of brain dataset
    """
    self._check_dataset_interface(dataset)
    if self._aggregate_time:
        dim = "timeid"
        return (
            dataset.contents.mean(dim)
            .expand_dims(dim, 2)
            .assign_coords({dim: (dim, [0])})
        )

    if "measurement" in dataset.contents.attrs:
        self._measurement = dataset.contents.attrs["measurement"]

    # return dataset.contents
    return EncoderRepresentations(
        dataset=dataset,
        representations=dataset.contents,
        model_id=self._measurement,
        emb_aggregation=None,
        emb_preproc=(),
        include_special_tokens=None,
        context_dimension=None,
        bidirectional=False,
    )