Skip to content

Input/Output Reference

This section provides a detailed API reference for all modules related to data input, output, and framework interoperability in the datarec library.

Core I/O Modules

These modules handle the fundamental tasks of reading, writing, and representing raw data.

RawData

Container for raw datasets in DataRec.

Wraps a pandas.DataFrame and stores metadata about user, item, rating, and timestamp columns. Provides lightweight methods for slicing, copying, and merging data.

Source code in datarec/io/rawdata.py
class RawData:
    """
    Container for raw datasets in DataRec.

    Wraps a `pandas.DataFrame` and stores metadata about user, item, rating, and timestamp columns.
    Provides lightweight methods for slicing, copying, and merging data.
    """
    def __init__(self, data=None, header=False, user=None, item=None, rating=None, timestamp=None):
        """
        Initialize a RawData object.

        Args:
            data (pd.DataFrame): DataFrame of the dataset. Defaults to None.
            header (bool): Whether the file has a header. Defaults to False.
            user (str): Column name for user IDs.
            item (str): Column name for item IDs.
            rating (str): Column name for ratings.
            timestamp (str): Column name for timestamps.
        """
        self.data = data
        self.header = header
        if data is None:
            self.data = pd.DataFrame
            self.header = header
        self.path = None

        self.user = user
        self.item = item
        self.rating = rating
        self.timestamp = timestamp

    def append(self, new_data):
        """
        Append new rows to the dataset.

        Args:
            new_data (pd.DataFrame): DataFrame to append.

        Returns:
            None
        """
        self.data.append(new_data)

    def copy(self, deep=True):
        """
        Make a copy of the dataset.

        Args:
            deep (bool): If True, return a deep copy of the dataset.

        Returns:
            (RawData): A copy of the dataset.

        """
        self.data.copy(deep=deep)

    def __repr__(self):
        """
        Return a string representation of the dataset.
        """
        return repr(self.data)

    def __len__(self):
        """
        Return the length of the dataset.
        """
        return len(self.data)

    def __getitem__(self, idx):
        """
        Return the item at the given index.
        Args:
            idx: index of the item to return.

        Returns:
            (RawData): the sample at the given index.

        """
        return self.data[idx]

    def __add__(self, other):
        """
        Concatenate two RawData objects.
        Args:
            other (RawData): the other RawData to concatenate.

        Returns:
            (RawData): the concatenated RawData object.

        """
        self.__check_rawdata_compatibility__(other)
        new_data = pd.concat([self.data, other.data])
        new_rawdata = RawData(new_data, user=self.user, item=self.item, rating=self.rating,
                              timestamp=self.timestamp, header=self.header)
        return new_rawdata

    def __iter__(self):
        """
        Iterate over dataset rows.

        Returns:
            (pd.Series): Each row in the dataset.

        """
        return iter(self.data)

    def __check_rawdata_compatibility__(self, rawdata):
        """
        Check compatibility between RawData objects.
        Args:
            rawdata (RawData): RawData object to check.

        Returns:
            (bool): True if compatibility is verified.

        """
        return __check_rawdata_compatibility__(self, rawdata)

__init__(data=None, header=False, user=None, item=None, rating=None, timestamp=None)

Initialize a RawData object.

Parameters:

Name Type Description Default
data DataFrame

DataFrame of the dataset. Defaults to None.

None
header bool

Whether the file has a header. Defaults to False.

False
user str

Column name for user IDs.

None
item str

Column name for item IDs.

None
rating str

Column name for ratings.

None
timestamp str

Column name for timestamps.

None
Source code in datarec/io/rawdata.py
def __init__(self, data=None, header=False, user=None, item=None, rating=None, timestamp=None):
    """
    Initialize a RawData object.

    Args:
        data (pd.DataFrame): DataFrame of the dataset. Defaults to None.
        header (bool): Whether the file has a header. Defaults to False.
        user (str): Column name for user IDs.
        item (str): Column name for item IDs.
        rating (str): Column name for ratings.
        timestamp (str): Column name for timestamps.
    """
    self.data = data
    self.header = header
    if data is None:
        self.data = pd.DataFrame
        self.header = header
    self.path = None

    self.user = user
    self.item = item
    self.rating = rating
    self.timestamp = timestamp

append(new_data)

Append new rows to the dataset.

Parameters:

Name Type Description Default
new_data DataFrame

DataFrame to append.

required

Returns:

Type Description

None

Source code in datarec/io/rawdata.py
def append(self, new_data):
    """
    Append new rows to the dataset.

    Args:
        new_data (pd.DataFrame): DataFrame to append.

    Returns:
        None
    """
    self.data.append(new_data)

copy(deep=True)

Make a copy of the dataset.

Parameters:

Name Type Description Default
deep bool

If True, return a deep copy of the dataset.

True

Returns:

Type Description
RawData

A copy of the dataset.

Source code in datarec/io/rawdata.py
def copy(self, deep=True):
    """
    Make a copy of the dataset.

    Args:
        deep (bool): If True, return a deep copy of the dataset.

    Returns:
        (RawData): A copy of the dataset.

    """
    self.data.copy(deep=deep)

__repr__()

Return a string representation of the dataset.

Source code in datarec/io/rawdata.py
def __repr__(self):
    """
    Return a string representation of the dataset.
    """
    return repr(self.data)

__len__()

Return the length of the dataset.

Source code in datarec/io/rawdata.py
def __len__(self):
    """
    Return the length of the dataset.
    """
    return len(self.data)

__getitem__(idx)

Return the item at the given index. Args: idx: index of the item to return.

Returns:

Type Description
RawData

the sample at the given index.

Source code in datarec/io/rawdata.py
def __getitem__(self, idx):
    """
    Return the item at the given index.
    Args:
        idx: index of the item to return.

    Returns:
        (RawData): the sample at the given index.

    """
    return self.data[idx]

__add__(other)

Concatenate two RawData objects. Args: other (RawData): the other RawData to concatenate.

Returns:

Type Description
RawData

the concatenated RawData object.

Source code in datarec/io/rawdata.py
def __add__(self, other):
    """
    Concatenate two RawData objects.
    Args:
        other (RawData): the other RawData to concatenate.

    Returns:
        (RawData): the concatenated RawData object.

    """
    self.__check_rawdata_compatibility__(other)
    new_data = pd.concat([self.data, other.data])
    new_rawdata = RawData(new_data, user=self.user, item=self.item, rating=self.rating,
                          timestamp=self.timestamp, header=self.header)
    return new_rawdata

__iter__()

Iterate over dataset rows.

Returns:

Type Description
Series

Each row in the dataset.

Source code in datarec/io/rawdata.py
def __iter__(self):
    """
    Iterate over dataset rows.

    Returns:
        (pd.Series): Each row in the dataset.

    """
    return iter(self.data)

__check_rawdata_compatibility__(rawdata)

Check compatibility between RawData objects. Args: rawdata (RawData): RawData object to check.

Returns:

Type Description
bool

True if compatibility is verified.

Source code in datarec/io/rawdata.py
def __check_rawdata_compatibility__(self, rawdata):
    """
    Check compatibility between RawData objects.
    Args:
        rawdata (RawData): RawData object to check.

    Returns:
        (bool): True if compatibility is verified.

    """
    return __check_rawdata_compatibility__(self, rawdata)

__check_rawdata_compatibility__(rawdata1, rawdata2)

Check compatibility between two RawData objects. Args: rawdata1 (RawData): First RawData object to check. rawdata2 (RawData): Second RawData object to check.

Returns:

Type Description
bool

True if compatibility is verified.

Source code in datarec/io/rawdata.py
def __check_rawdata_compatibility__(rawdata1: RawData, rawdata2: RawData):
    """
    Check compatibility between two RawData objects.
    Args:
        rawdata1 (RawData): First RawData object to check.
        rawdata2 (RawData): Second RawData object to check.

    Returns:
        (bool): True if compatibility is verified.

    """
    if rawdata1.user != rawdata2.user:
        raise ValueError('User columns are not compatible')
    if rawdata1.item != rawdata2.item:
        raise ValueError('Item columns are not compatible')
    if rawdata1.rating != rawdata2.rating:
        raise ValueError('Rating columns are not compatible')
    if rawdata1.timestamp != rawdata2.timestamp:
        raise ValueError('Timestamp columns are not compatible')
    if rawdata1.header != rawdata2.header:
        raise ValueError('Header is not compatible')
    return True

fill_rawdata(data, user=None, item=None, rating=None, timestamp=None, path=None)

Create a RawData object from raw data and assign column names to RawData object attributes.

Parameters:

Name Type Description Default
data DataFrame

Data to create RawData object from.

required
user str

Column name for user field.

None
item str

Column name for item field.

None
rating str

Column name for rating field.

None
timestamp str

Column name for timestamp field.

None
path str

Path where the original file is stored.

None
Source code in datarec/io/readers.py
def fill_rawdata(data, user=None, item=None, rating=None, timestamp=None, path=None):
    """
    Create a RawData object from raw data and assign column names to RawData object attributes.

    Args:
        data (pd.DataFrame): Data to create RawData object from.
        user (str): Column name for user field.
        item (str): Column name for item field.
        rating (str): Column name for rating field.
        timestamp (str): Column name for timestamp field.
        path (str): Path where the original file is stored.


    """
    rawdata = RawData(data)

    # set columns
    rawdata.user = user
    rawdata.item = item
    rawdata.rating = rating
    rawdata.timestamp = timestamp

    # set file path
    rawdata.path = path

read_json(filepath, user_field=None, item_field=None, rating_field=None, timestamp_field=None, lines=True)

Reads a JSON file and returns it as a RawData object. Args: filepath (str): path to JSON file. user_field (str): JSON key for user field. item_field (str): JSON key for item field. rating_field (str): JSON key for rating field. timestamp_field (str): JSON key for timestamp field. lines (bool): Read the file as a JSON object per line.

Returns:

Type Description
RawData

RawData object

Source code in datarec/io/readers.py
def read_json(filepath, user_field=None, item_field=None, rating_field=None, timestamp_field=None, lines=True):
    """
    Reads a JSON file and returns it as a RawData object.
    Args:
        filepath (str): path to JSON file.
        user_field (str): JSON key for user field.
        item_field (str): JSON key for item field.
        rating_field (str): JSON key for rating field.
        timestamp_field (str): JSON key for timestamp field.
        lines (bool): Read the file as a JSON object per line.

    Returns:
        (RawData): RawData object

    """
    # check that file exists
    if os.path.exists(filepath) is False:
        raise FileNotFoundError

    std_fields = [user_field, item_field, rating_field, timestamp_field]
    assigned_fields = [c for c in std_fields if c is not None]

    # at least one column given check
    if len(assigned_fields) == 0:
        raise AttributeError('Fields are missing. At least one should be assigned')

    # read data
    data = pd.read_json(filepath, lines=lines)

    # check that columns are aligned
    for c in assigned_fields:
        if c not in data.columns:
            raise ValueError(f'Field {c} not found in the dataset. Please, check the value and retry')

    rawdata = RawData(data[assigned_fields])

    # set columns
    rawdata.user = user_field if user_field is not None else None
    rawdata.item = item_field if item_field is not None else None
    rawdata.rating = rating_field if rating_field is not None else None
    rawdata.timestamp = timestamp_field if timestamp_field is not None else None
    return rawdata

read_tabular(filepath, sep, user_col=None, item_col=None, rating_col=None, timestamp_col=None, header='infer', skiprows=0)

Reads a tabular data file and returns it as a pandas DataFrame. Args: filepath (str): Path to tabular data file. sep (str): Separator to use. user_col (str): Column name for user field. item_col (str): Column name for item field. rating_col (str): Column name for rating field. timestamp_col (str): Column name for timestamp field. header (nt, Sequence of int, ‘infer’ or None): Row number(s) containing column labels and marking the start of the data (zero-indexed). Default behavior is to infer the column names. skiprows (int, list of int or Callable): Line numbers to skip (0-indexed) or number of lines to skip (int) at the start of the file.

Returns:

Type Description
RawData

RawData object.

Source code in datarec/io/readers.py
def read_tabular(filepath: str, sep: str, user_col=None, item_col=None, rating_col=None, timestamp_col=None,
                 header="infer", skiprows=0):
    """
    Reads a tabular data file and returns it as a pandas DataFrame.
    Args:
        filepath (str): Path to tabular data file.
        sep (str): Separator to use.
        user_col (str): Column name for user field.
        item_col (str): Column name for item field.
        rating_col (str): Column name for rating field.
        timestamp_col (str): Column name for timestamp field.
        header (nt, Sequence of int, ‘infer’ or None): Row number(s) containing column labels and marking the start of the data (zero-indexed). Default behavior is to infer the column names.
        skiprows (int, list of int or Callable): Line numbers to skip (0-indexed) or number of lines to skip (int) at the start of the file.

    Returns:
        (RawData): RawData object.

    """
    # check that file exists
    if os.path.exists(filepath) is False:
        raise FileNotFoundError

    std_columns = [user_col, item_col, rating_col, timestamp_col]
    assigned_columns = [c for c in std_columns if c is not None]

    # at least one column given check
    if len(assigned_columns) == 0:
        raise AttributeError('Columns are missing. At least one should be assigned')

    # read data
    data = pd.read_table(filepath_or_buffer=filepath, sep=sep, header=header, skiprows=skiprows, engine='python')

    # check that columns are aligned
    for c in assigned_columns:
        if c not in data.columns:
            raise ValueError(f'Column {c} not found in the dataset. Please, check the value and retry')

    rawdata = RawData(data=data[assigned_columns])

    # set columns
    rawdata.user = user_col if (user_col is not None) else None
    rawdata.item = item_col if item_col is not None else None
    rawdata.rating = rating_col if rating_col is not None else None
    rawdata.timestamp = timestamp_col if timestamp_col is not None else None

    return rawdata

read_inline(filepath, cols=None, user_col='user', item_col='item', col_sep=',', history_sep=';')

Read a CSV file and return a RawData object. Args: filepath (str): Path to CVS file. cols (list[str]): List of column names. user_col (str): Column name for user field.: item_col (str): Column name for item field. col_sep (str): Separator to use. history_sep (str): Separator for multiple items.

Returns:

Type Description
RawData

RawData object.

Source code in datarec/io/readers.py
def read_inline(filepath: str, cols=None, user_col='user', item_col='item', col_sep=',', history_sep=';'):
    """
    Read a CSV file and return a RawData object.
    Args:
        filepath (str): Path to CVS file.
        cols (list[str]): List of column names.
        user_col (str): Column name for user field.:
        item_col (str): Column name for item field.
        col_sep (str): Separator to use.
        history_sep (str): Separator for multiple items.

    Returns:
        (RawData): RawData object.

    """
    if cols is None:
        cols = ['user', 'item']
    assert os.path.exists(filepath), f'File not found at {filepath}'
    to_drop_cols = [c for c in cols if c not in (user_col, item_col)]

    data = pd.read_csv(filepath, sep=col_sep, header=None, names=cols)
    data = data.dropna(subset=['user', 'item'])
    data = data.drop(columns=to_drop_cols)
    data[item_col] = data[item_col].apply(lambda x: [item.strip() for item in x.split(history_sep)])
    data = data.explode('item')
    data = data.reset_index(drop=True)
    return RawData(data, user='user', item='item')

read_inline_chunk(filepath, cols=None, user_col='user', item_col='item')

Read a CSV file a chunk of rows at a time and return a RawData object. Args: filepath (str): Path to CSV file. cols (list[str]): List of column names. user_col (str): Column name for user field. item_col (str): Column name for item field.

Returns:

Type Description
RawData

RawData object.

Source code in datarec/io/readers.py
def read_inline_chunk(filepath: str, cols=None, user_col='user', item_col='item'):
    """
    Read a CSV file a chunk of rows at a time and return a RawData object.
    Args:
        filepath (str): Path to CSV file.
        cols (list[str]): List of column names.
        user_col (str): Column name for user field.
        item_col (str): Column name for item field.

    Returns:
        (RawData): RawData object.

    """
    if cols is None:
        cols = ['user', 'item']
    assert os.path.exists(filepath), f'File not found at {filepath}'
    to_drop_cols = [c for c in cols if c not in (user_col, item_col)]

    data_chunks = pd.read_csv(filepath, sep=',', header=None, names=cols, chunksize=100000)
    data = None

    for chunk in tqdm.tqdm(data_chunks):
        chunk = chunk.drop(columns=to_drop_cols)
        chunk[item_col] = chunk[item_col].apply(lambda x: [item.strip() for item in x.split(';')])
        chunk = chunk.explode('item')
        if data is not None:
            data = pd.concat([data, chunk])
        else:
            data = chunk

    data = data.reset_index(drop=True)
    return RawData(data, user='user', item='item')

write_tabular(rawdata, path, sep='\t', header=True, decimal='.', user=True, item=True, rating=True, timestamp=True, verbose=True)

Write a RawData dataset to a CSV/TSV file.

Parameters:

Name Type Description Default
rawdata RawData

RawData instance.

required
path str

Path to the CSV/TSV file.

required
sep str

Separator to use.

'\t'
header bool or list[str]

Write out the column names. If a list of strings is given it is assumed to be aliases for the column names.

True
decimal str

Character recognized as decimal separator.

'.'
user bool

Whether to write the user information. If True, the user information will be written in the file.

True
item bool

Whether to write the item information. If True, the item information will be written in the file.

True
rating bool

Whether to write the rating information. If True, the rating information will be written in the file.

True
timestamp bool

Whether to write the timestamp information. If True, the timestamp information will be written in the file.

True
verbose bool

Print out additional information.

True

Returns:

Type Description

(CSV/TSV file)

Source code in datarec/io/writers.py
def write_tabular(rawdata: RawData, path, sep='\t', header=True, decimal='.',
                  user=True, item=True, rating=True, timestamp=True, verbose=True):
    """
    Write a RawData dataset to a CSV/TSV file.

    Args:
        rawdata (RawData): RawData instance.
        path (str): Path to the CSV/TSV file.
        sep (str): Separator to use.
        header (bool or list[str]): Write out the column names. If a list of strings is given it is assumed to be aliases for the column names.
        decimal (str): Character recognized as decimal separator.
        user (bool): Whether to write the user information. If True, the user information will be written in the file.
        item (bool): Whether to write the item information. If True, the item information will be written in the file.
        rating (bool): Whether to write the rating information. If True, the rating information will be written in the file.
        timestamp (bool): Whether to write the timestamp information. If True, the timestamp information will be written in the file.
        verbose (bool): Print out additional information.

    Returns:
        (CSV/TSV file)

    """
    cols = []
    if user:
        if rawdata.user:
            cols.append(rawdata.user)
        else:
            raise ValueError('User column not defined in the DataRec.')
    if item:
        if rawdata.item:
            cols.append(rawdata.item)
        else:
            raise ValueError('Item column not defined in the DataRec.')
    if rating:
        if rawdata.rating:
            cols.append(rawdata.rating)
        else:
            raise ValueError('Rating column not defined in the DataRec.')
    if timestamp:
        if rawdata.timestamp:
            cols.append(rawdata.timestamp)
        else:
            raise ValueError('Timestamp column not defined in the DataRec.')

    data: pd.DataFrame = rawdata.data[cols]

    if sep in ACCEPTED_TAB_DELIMITERS:
        if sep == "::":
            file = data.to_csv(sep='*', header=header, index=False, decimal=decimal)
            file.replace('*', '::')
            with open(file, 'w') as f:
                f.write(file)
        else:
            data.to_csv(path, sep=sep, header=header, index=False, decimal=decimal)
            if verbose:
                print(f'A dataset has been stored at \'{path}\'')
    else:
        raise ValueError

write_json(rawdata, path, user=True, item=True, rating=True, timestamp=True)

Write a RawData dataset to a JSON file. Args: rawdata (RawData): RawData instance. path (str): Path to the JSON file. user (bool): Whether to write the user information. If True, the user information will be written in the file. item (bool): Whether to write the item information. If True, the item information will be written in the file. rating (bool): Whether to write the rating information. If True, the rating information will be written in the file. timestamp (bool): Whether to write the timestamp information. If True, the timestamp information will be written in the file.

Returns:

Type Description

(JSON file)

Source code in datarec/io/writers.py
def write_json(rawdata: RawData, path, user=True, item=True, rating=True, timestamp=True):
    """
    Write a RawData dataset to a JSON file.
    Args:
        rawdata (RawData): RawData instance.
        path (str): Path to the JSON file.
        user (bool): Whether to write the user information. If True, the user information will be written in the file.
        item (bool): Whether to write the item information. If True, the item information will be written in the file.
        rating (bool): Whether to write the rating information. If True, the rating information will be written in the file.
        timestamp (bool): Whether to write the timestamp information. If True, the timestamp information will be written in the file.

    Returns:
        (JSON file)

    """

    cols = []
    if user:
        cols.append(rawdata.user)
    if item:
        cols.append(rawdata.item)
    if rating:
        cols.append(rawdata.rating)
    if timestamp:
        cols.append(rawdata.timestamp)

    data: pd.DataFrame = rawdata.data[cols]

    data.to_json(path, orient='records', lines=True)

get_cache_dir(app_name='datarec', app_author='sisinflab')

Returns the appropriate cache directory for the library, creating it if it doesn't exist. Respects the DATAREC_CACHE_DIR environment variable if set.

Returns:

Name Type Description
Path

The absolute path to the cache directory.

Source code in datarec/io/paths.py
def get_cache_dir(app_name="datarec", app_author="sisinflab"):
    """
    Returns the appropriate cache directory for the library, creating it if it doesn't exist.
    Respects the DATAREC_CACHE_DIR environment variable if set.

    Returns:
        Path: The absolute path to the cache directory.
    """
    env_override = os.getenv("DATAREC_CACHE_DIR")
    path = Path(env_override) if env_override else Path(user_cache_dir(app_name, app_author))
    path.mkdir(parents=True, exist_ok=True)
    return path

dataset_directory(dataset_name, must_exist=False)

Given the dataset name returns the dataset directory Args: dataset_name (str): name of the dataset must_exist (bool): flag for forcing to check if the folder exists

Returns:

Type Description
str

the path of the directory containing the dataset data

Source code in datarec/io/paths.py
def dataset_directory(dataset_name: str, must_exist=False) -> str:
    """
    Given the dataset name returns the dataset directory
    Args:
        dataset_name (str): name of the dataset
        must_exist (bool): flag for forcing to check if the folder exists

    Returns:
        (str): the path of the directory containing the dataset data
    """
    dataset_dir = os.path.join(DATA_DIR, dataset_name)
    if must_exist and not os.path.exists(dataset_dir):
        raise FileNotFoundError(f'Directory at {dataset_dir} not found. Please, check that dataset directory exists')
    return os.path.abspath(dataset_dir)

dataset_raw_directory(dataset_name)

Given the dataset name returns the directory containing the raw data of the dataset Args: dataset_name (str): name of the dataset

Returns:

Type Description
str

the path of the directory containing the raw data of the dataset

Source code in datarec/io/paths.py
def dataset_raw_directory(dataset_name: str) -> str:
    """
    Given the dataset name returns the directory containing the raw data of the dataset
    Args:
        dataset_name (str): name of the dataset

    Returns:
        (str): the path of the directory containing the raw data of the dataset
    """
    return os.path.join(dataset_directory(dataset_name), RAW_DATA_FOLDER)

dataset_processed_directory(dataset_name)

Given the dataset name returns the directory containing the processed data of the dataset Args: dataset_name (str): name of the dataset

Returns:

Type Description
str

the path of the directory containing the processed data of the dataset

Source code in datarec/io/paths.py
def dataset_processed_directory(dataset_name: str) -> str:
    """
    Given the dataset name returns the directory containing the processed data of the dataset
    Args:
        dataset_name (str): name of the dataset

    Returns:
        (str): the path of the directory containing the processed data of the dataset
    """
    return os.path.join(dataset_directory(dataset_name), PROCESSED_DATA_FOLDER)

dataset_filepath(dataset_name)

Given the dataset name returns the path of the dataset data Args: dataset_name (str): name of the dataset

Returns:

Type Description
str

the path of the dataset data

Source code in datarec/io/paths.py
def dataset_filepath(dataset_name: str) -> str:
    """
    Given the dataset name returns the path of the dataset data
    Args:
        dataset_name (str): name of the dataset

    Returns:
        (str): the path of the dataset data
    """
    return os.path.join(dataset_directory(dataset_name), DATASET_NAME)

Framework Interoperability

This section covers the tools used to export DataRec datasets into formats compatible with other popular recommender systems libraries.

FrameworkExporter

Exporter for converting RawData datasets to external recommender system frameworks.

Provides methods to format a RawData object according to the expected schema of supported libraries (e.g., Cornac, RecBole).

Source code in datarec/io/frameworks/exporter.py
class FrameworkExporter:
    """
    Exporter for converting RawData datasets to external recommender system frameworks.

    Provides methods to format a `RawData` object according to
    the expected schema of supported libraries (e.g., Cornac, RecBole).

    """

    def __init__(self, output_path, user=True, item=True, rating=True, timestamp=False):
        """
        Initialize a FrameworkExporter object.
        Args:
            output_path (str): Path where to save the output file.
            user (bool): Whether to write the user information. If True, the user information will be written in the file.
            item (bool): Whether to write the item information. If True, the item information will be written in the file.
            rating (bool): Whether to write the rating information. If True, the rating information will be written in the file.
            timestamp (bool): Whether to write the timestamp information. If True, the timestamp information will be written in the file.
        """
        self.params = {k: v for k, v in locals().items() if k != 'self'}

        self.path = output_path
        self.user = user
        self.item = item
        self.rating = rating
        self.timestamp = timestamp

    def to_clayrs(self, data: RawData):
        """
        Export to ClayRS format.
        Args:
            data (RawData): RawData object to convert to ClayRS format.
        """
        write_tabular(rawdata=data, path=self.path, sep=',', header=False,
                      user=self.user, item=self.item, rating=self.rating, timestamp=self.timestamp)

        ClayRS(timestamp=self.timestamp, path=self.path).info()

    def to_cornac(self, data: RawData):
        """
        Export to Cornac format.
        Args:
            data (RawData): RawData object to convert to Cornac format.
        """
        write_tabular(rawdata=data, path=self.path, sep='\t', header=False,
                      user=self.user, item=self.item, rating=self.rating, timestamp=self.timestamp)

        Cornac(timestamp=self.timestamp, path=self.path).info()

    def to_daisyrec(self, data: RawData):
        """
        Export to DaisyRec format.
        Args:
            data (RawData): RawData object to convert to DaisyRec format.
        """
        write_tabular(rawdata=data, path=self.path, sep='\t', header=False,
                      user=self.user, item=self.item, rating=self.rating, timestamp=self.timestamp)

        DaisyRec(timestamp=self.timestamp, path=self.path).info()

    def to_lenskit(self, data: RawData):
        """
        Export to LensKit format.
        Args:
            data (RawData): RawData object to convert to LensKit format.
        """
        data.data.rename(columns={data.user: "user", data.item: "item",
                                  data.rating: "rating"}, inplace=True)
        data.user = "user"
        data.item = "item"
        data.rating = "rating"

        if self.timestamp:
            data.data.rename(columns={data.timestamp: "timestamp"}, inplace=True)
            data.timestamp = "timestamp"
            data.rating = "rating"

        write_tabular(rawdata=data, path=self.path, sep='\t', header=False,
                      user=self.user, item=self.item, rating=self.rating, timestamp=self.timestamp)

        LensKit(timestamp=self.timestamp, path=self.path).info()

    def to_recbole(self, data: RawData):
        """
        Export to RecBole format.
        Args:
            data (RawData): RawData object to convert to RecBole format.
        """

        data.data.rename(columns={data.user: "user: token", data.item: "item: token",
                                  data.rating: "rating: float"}, inplace=True)
        data.user = "user: token"
        data.item = "item: token"
        data.rating = "rating: float"

        if self.timestamp:
            data.data.rename(columns={data.timestamp: "timestamp"}, inplace=True)
            data.timestamp = "timestamp:float"

        frmk = RecBole(timestamp=self.timestamp, path=self.path)
        frmk.info()

        write_tabular(rawdata=data, path=frmk.path, sep='\t', header=True,
                      user=self.user, item=self.item, rating=self.rating, timestamp=self.timestamp)

    def to_rechorus(self, train_data: RawData, test_data: RawData, val_data: RawData):
        """
        Export to Rechus format.
        Args:
            train_data (RawData): Training data as RawData object to convert to Rechus format.
            test_data (RawData): Test data as RawData object to convert to Rechus format.
            val_data (RawData): Validation data as RawData object to convert to Rechus format.
        """
        # user_id	item_id	time
        if self.rating:
            print('Ratings will be interpreted as implicit interactions.')
            self.rating = False

        frmk = ReChorus(timestamp=self.timestamp, path=self.path)

        for data, name in zip([train_data, test_data, val_data], ['train.csv', 'dev.csv', 'test.csv']):
            data.data.rename(columns={data.user: "user_id", data.item: "item_id"}, inplace=True)
            data.user = "user_id"
            data.item = "item_id"

            if self.timestamp:
                data.data.rename(columns={data.timestamp: "time"}, inplace=True)
                data.timestamp = "time"

            path = os.path.join(frmk.directory, name)
            write_tabular(rawdata=data, path=path, sep='\t', header=True,
                          user=self.user, item=self.item, rating=self.rating, timestamp=self.timestamp)

        frmk.info()

    def to_recpack(self, data: RawData):
        """
        Export to RecPack format.
        Args:
            data (RawData): RawData object to convert to RecPack format.
        """

        if self.rating:
            print('Ratings will be interpreted as implicit interactions.')
            self.rating = False

        frmk = RecPack(timestamp=self.timestamp, path=self.path)

        data.data.rename(columns={data.user: "userId", data.item: "itemId"}, inplace=True)
        data.user = "userId"
        data.item = "itemId"
        if self.timestamp:
            data.data.rename(columns={data.timestamp: "timestamp"}, inplace=True)
            data.timestamp = "timestamp"

        write_tabular(rawdata=data, path=frmk.file_path, sep='\t', header=True,
                      user=self.user, item=self.item, rating=self.rating, timestamp=self.timestamp)

        frmk.info()

    def to_recommenders(self, data: RawData):
        """
        Export to Recommenders format.
        Args:
            data (RawData): RawData object to convert to Recommenders format.
        """

        frmk = Recommenders(timestamp=self.timestamp, path=self.path)

        data.data.rename(columns={data.user: "user", data.item: "item", data.rating: "rating"}, inplace=True)
        data.user = "item"
        data.item = "rating"
        data.rating = 'rating'
        if self.timestamp:
            data.data.rename(columns={data.timestamp: "timestamp"}, inplace=True)
            data.timestamp = "timestamp"

        write_tabular(rawdata=data, path=frmk.file_path, sep='\t', header=True,
                      user=self.user, item=self.item, rating=self.rating, timestamp=self.timestamp)

        frmk.info()

    def to_elliot(self, train_data: DataRec, test_data: DataRec, val_data: DataRec):
        """
        Export to Elliot format.
        Args:
            train_data (DataRec): Training data as DataRec object to convert to Elliot format.
            test_data (DataRec): Test data as DataRec object to convert to Elliot format.
            val_data (DataRec): Validation data as DataRec object to convert to Elliot format.
        """

        frmk = Elliot(timestamp=self.timestamp, path=self.path)

        for data, name in zip([train_data.to_rawdata(), test_data.to_rawdata(), val_data.to_rawdata()],
                              [frmk.train_path, frmk.test_path, frmk.val_path]):
            columns_order = [data.user, data.item, data.rating]
            if self.timestamp:
                columns_order.append(data.timestamp)

            write_tabular(rawdata=data, path=name, sep='\t', header=False,
                          user=self.user, item=self.item, rating=self.rating, timestamp=self.timestamp)

        frmk.info()
        train_data.pipeline.add_step("export", "Elliot", self.params)
        test_data.pipeline.add_step("export", "Elliot", self.params)
        val_data.pipeline.add_step("export", "Elliot", self.params)

__init__(output_path, user=True, item=True, rating=True, timestamp=False)

Initialize a FrameworkExporter object. Args: output_path (str): Path where to save the output file. user (bool): Whether to write the user information. If True, the user information will be written in the file. item (bool): Whether to write the item information. If True, the item information will be written in the file. rating (bool): Whether to write the rating information. If True, the rating information will be written in the file. timestamp (bool): Whether to write the timestamp information. If True, the timestamp information will be written in the file.

Source code in datarec/io/frameworks/exporter.py
def __init__(self, output_path, user=True, item=True, rating=True, timestamp=False):
    """
    Initialize a FrameworkExporter object.
    Args:
        output_path (str): Path where to save the output file.
        user (bool): Whether to write the user information. If True, the user information will be written in the file.
        item (bool): Whether to write the item information. If True, the item information will be written in the file.
        rating (bool): Whether to write the rating information. If True, the rating information will be written in the file.
        timestamp (bool): Whether to write the timestamp information. If True, the timestamp information will be written in the file.
    """
    self.params = {k: v for k, v in locals().items() if k != 'self'}

    self.path = output_path
    self.user = user
    self.item = item
    self.rating = rating
    self.timestamp = timestamp

to_clayrs(data)

Export to ClayRS format. Args: data (RawData): RawData object to convert to ClayRS format.

Source code in datarec/io/frameworks/exporter.py
def to_clayrs(self, data: RawData):
    """
    Export to ClayRS format.
    Args:
        data (RawData): RawData object to convert to ClayRS format.
    """
    write_tabular(rawdata=data, path=self.path, sep=',', header=False,
                  user=self.user, item=self.item, rating=self.rating, timestamp=self.timestamp)

    ClayRS(timestamp=self.timestamp, path=self.path).info()

to_cornac(data)

Export to Cornac format. Args: data (RawData): RawData object to convert to Cornac format.

Source code in datarec/io/frameworks/exporter.py
def to_cornac(self, data: RawData):
    """
    Export to Cornac format.
    Args:
        data (RawData): RawData object to convert to Cornac format.
    """
    write_tabular(rawdata=data, path=self.path, sep='\t', header=False,
                  user=self.user, item=self.item, rating=self.rating, timestamp=self.timestamp)

    Cornac(timestamp=self.timestamp, path=self.path).info()

to_daisyrec(data)

Export to DaisyRec format. Args: data (RawData): RawData object to convert to DaisyRec format.

Source code in datarec/io/frameworks/exporter.py
def to_daisyrec(self, data: RawData):
    """
    Export to DaisyRec format.
    Args:
        data (RawData): RawData object to convert to DaisyRec format.
    """
    write_tabular(rawdata=data, path=self.path, sep='\t', header=False,
                  user=self.user, item=self.item, rating=self.rating, timestamp=self.timestamp)

    DaisyRec(timestamp=self.timestamp, path=self.path).info()

to_lenskit(data)

Export to LensKit format. Args: data (RawData): RawData object to convert to LensKit format.

Source code in datarec/io/frameworks/exporter.py
def to_lenskit(self, data: RawData):
    """
    Export to LensKit format.
    Args:
        data (RawData): RawData object to convert to LensKit format.
    """
    data.data.rename(columns={data.user: "user", data.item: "item",
                              data.rating: "rating"}, inplace=True)
    data.user = "user"
    data.item = "item"
    data.rating = "rating"

    if self.timestamp:
        data.data.rename(columns={data.timestamp: "timestamp"}, inplace=True)
        data.timestamp = "timestamp"
        data.rating = "rating"

    write_tabular(rawdata=data, path=self.path, sep='\t', header=False,
                  user=self.user, item=self.item, rating=self.rating, timestamp=self.timestamp)

    LensKit(timestamp=self.timestamp, path=self.path).info()

to_recbole(data)

Export to RecBole format. Args: data (RawData): RawData object to convert to RecBole format.

Source code in datarec/io/frameworks/exporter.py
def to_recbole(self, data: RawData):
    """
    Export to RecBole format.
    Args:
        data (RawData): RawData object to convert to RecBole format.
    """

    data.data.rename(columns={data.user: "user: token", data.item: "item: token",
                              data.rating: "rating: float"}, inplace=True)
    data.user = "user: token"
    data.item = "item: token"
    data.rating = "rating: float"

    if self.timestamp:
        data.data.rename(columns={data.timestamp: "timestamp"}, inplace=True)
        data.timestamp = "timestamp:float"

    frmk = RecBole(timestamp=self.timestamp, path=self.path)
    frmk.info()

    write_tabular(rawdata=data, path=frmk.path, sep='\t', header=True,
                  user=self.user, item=self.item, rating=self.rating, timestamp=self.timestamp)

to_rechorus(train_data, test_data, val_data)

Export to Rechus format. Args: train_data (RawData): Training data as RawData object to convert to Rechus format. test_data (RawData): Test data as RawData object to convert to Rechus format. val_data (RawData): Validation data as RawData object to convert to Rechus format.

Source code in datarec/io/frameworks/exporter.py
def to_rechorus(self, train_data: RawData, test_data: RawData, val_data: RawData):
    """
    Export to Rechus format.
    Args:
        train_data (RawData): Training data as RawData object to convert to Rechus format.
        test_data (RawData): Test data as RawData object to convert to Rechus format.
        val_data (RawData): Validation data as RawData object to convert to Rechus format.
    """
    # user_id	item_id	time
    if self.rating:
        print('Ratings will be interpreted as implicit interactions.')
        self.rating = False

    frmk = ReChorus(timestamp=self.timestamp, path=self.path)

    for data, name in zip([train_data, test_data, val_data], ['train.csv', 'dev.csv', 'test.csv']):
        data.data.rename(columns={data.user: "user_id", data.item: "item_id"}, inplace=True)
        data.user = "user_id"
        data.item = "item_id"

        if self.timestamp:
            data.data.rename(columns={data.timestamp: "time"}, inplace=True)
            data.timestamp = "time"

        path = os.path.join(frmk.directory, name)
        write_tabular(rawdata=data, path=path, sep='\t', header=True,
                      user=self.user, item=self.item, rating=self.rating, timestamp=self.timestamp)

    frmk.info()

to_recpack(data)

Export to RecPack format. Args: data (RawData): RawData object to convert to RecPack format.

Source code in datarec/io/frameworks/exporter.py
def to_recpack(self, data: RawData):
    """
    Export to RecPack format.
    Args:
        data (RawData): RawData object to convert to RecPack format.
    """

    if self.rating:
        print('Ratings will be interpreted as implicit interactions.')
        self.rating = False

    frmk = RecPack(timestamp=self.timestamp, path=self.path)

    data.data.rename(columns={data.user: "userId", data.item: "itemId"}, inplace=True)
    data.user = "userId"
    data.item = "itemId"
    if self.timestamp:
        data.data.rename(columns={data.timestamp: "timestamp"}, inplace=True)
        data.timestamp = "timestamp"

    write_tabular(rawdata=data, path=frmk.file_path, sep='\t', header=True,
                  user=self.user, item=self.item, rating=self.rating, timestamp=self.timestamp)

    frmk.info()

to_recommenders(data)

Export to Recommenders format. Args: data (RawData): RawData object to convert to Recommenders format.

Source code in datarec/io/frameworks/exporter.py
def to_recommenders(self, data: RawData):
    """
    Export to Recommenders format.
    Args:
        data (RawData): RawData object to convert to Recommenders format.
    """

    frmk = Recommenders(timestamp=self.timestamp, path=self.path)

    data.data.rename(columns={data.user: "user", data.item: "item", data.rating: "rating"}, inplace=True)
    data.user = "item"
    data.item = "rating"
    data.rating = 'rating'
    if self.timestamp:
        data.data.rename(columns={data.timestamp: "timestamp"}, inplace=True)
        data.timestamp = "timestamp"

    write_tabular(rawdata=data, path=frmk.file_path, sep='\t', header=True,
                  user=self.user, item=self.item, rating=self.rating, timestamp=self.timestamp)

    frmk.info()

to_elliot(train_data, test_data, val_data)

Export to Elliot format. Args: train_data (DataRec): Training data as DataRec object to convert to Elliot format. test_data (DataRec): Test data as DataRec object to convert to Elliot format. val_data (DataRec): Validation data as DataRec object to convert to Elliot format.

Source code in datarec/io/frameworks/exporter.py
def to_elliot(self, train_data: DataRec, test_data: DataRec, val_data: DataRec):
    """
    Export to Elliot format.
    Args:
        train_data (DataRec): Training data as DataRec object to convert to Elliot format.
        test_data (DataRec): Test data as DataRec object to convert to Elliot format.
        val_data (DataRec): Validation data as DataRec object to convert to Elliot format.
    """

    frmk = Elliot(timestamp=self.timestamp, path=self.path)

    for data, name in zip([train_data.to_rawdata(), test_data.to_rawdata(), val_data.to_rawdata()],
                          [frmk.train_path, frmk.test_path, frmk.val_path]):
        columns_order = [data.user, data.item, data.rating]
        if self.timestamp:
            columns_order.append(data.timestamp)

        write_tabular(rawdata=data, path=name, sep='\t', header=False,
                      user=self.user, item=self.item, rating=self.rating, timestamp=self.timestamp)

    frmk.info()
    train_data.pipeline.add_step("export", "Elliot", self.params)
    test_data.pipeline.add_step("export", "Elliot", self.params)
    val_data.pipeline.add_step("export", "Elliot", self.params)

Framework

Base class for all framework exporters.

Source code in datarec/io/frameworks/manager.py
class Framework:
    """
    Base class for all framework exporters.
    """
    FRAMEWORK_NAME = None

    PAPER = None

    DOI = None

    CITATION = None

    CODE = None

    REPOSITORY = None

    DOC = None

    def info_code(self):
        """
        Print example code for integrating this framework with DataRec.
        """
        print(f"How to use {self.FRAMEWORK_NAME} with DataRec:\n" + self.CODE)

    def info(self):
        """
        Print citation information for the framework including: paper name, DOI and bibtex citation.
        Print additional information such as: example code for integrating this framework with DataRec,
        repository URL and framework documentation URL.
        Returns:

        """
        if self.FRAMEWORK_NAME is None:
            raise AttributeError

        print(f"If you are going to use {self.FRAMEWORK_NAME} don't forget to cite the paper!")

        if self.PAPER:
            print(f'Paper: \'{self.PAPER}\'')
        if self.DOI:
            print(f'DOI: {self.DOI}')
        if self.CITATION:
            print(f'Bib text from dblp.org:\n {self.CITATION}')

        if self.CODE:
            print(
                '\n================================================ CODE EXAMPLE ================================================\n')
            self.info_code()
            print(
                '==============================================================================================================\n')

        if self.REPOSITORY:
            print(f'For more information check {self.FRAMEWORK_NAME} repository: \'{self.REPOSITORY}\'')

        if self.DOC:
            print(f'More documentation on how to use {self.FRAMEWORK_NAME} at \'{self.DOC}\'')

info_code()

Print example code for integrating this framework with DataRec.

Source code in datarec/io/frameworks/manager.py
def info_code(self):
    """
    Print example code for integrating this framework with DataRec.
    """
    print(f"How to use {self.FRAMEWORK_NAME} with DataRec:\n" + self.CODE)

info()

Print citation information for the framework including: paper name, DOI and bibtex citation. Print additional information such as: example code for integrating this framework with DataRec, repository URL and framework documentation URL. Returns:

Source code in datarec/io/frameworks/manager.py
def info(self):
    """
    Print citation information for the framework including: paper name, DOI and bibtex citation.
    Print additional information such as: example code for integrating this framework with DataRec,
    repository URL and framework documentation URL.
    Returns:

    """
    if self.FRAMEWORK_NAME is None:
        raise AttributeError

    print(f"If you are going to use {self.FRAMEWORK_NAME} don't forget to cite the paper!")

    if self.PAPER:
        print(f'Paper: \'{self.PAPER}\'')
    if self.DOI:
        print(f'DOI: {self.DOI}')
    if self.CITATION:
        print(f'Bib text from dblp.org:\n {self.CITATION}')

    if self.CODE:
        print(
            '\n================================================ CODE EXAMPLE ================================================\n')
        self.info_code()
        print(
            '==============================================================================================================\n')

    if self.REPOSITORY:
        print(f'For more information check {self.FRAMEWORK_NAME} repository: \'{self.REPOSITORY}\'')

    if self.DOC:
        print(f'More documentation on how to use {self.FRAMEWORK_NAME} at \'{self.DOC}\'')

ClayRS

ClayRS

Bases: Framework

ClayRS framework adapter.

Provide metadata, citation, and usage examples for ClayRS framework.

Source code in datarec/io/frameworks/clayrs/clayrs.py
class ClayRS(Framework):
    """
    ClayRS framework adapter.

    Provide metadata, citation, and usage examples for ClayRS framework.
    """

    def __init__(self, timestamp, path):
        """
        Initialize ClayRS adapter.
        Args:
            timestamp (bool): Whether timestamps are included.
            path (str): Path where the ClayRS-compatible dataset is stored.
        """
        self.timestamp = timestamp
        self.path = path

    FRAMEWORK_NAME = 'ClayRS'

    REPOSITORY = 'https://github.com/swapUniba/ClayRS/tree/master'

    PAPER = """ClayRS: An end-to-end framework for reproducible knowledge-aware recommender systems"""

    DOI = "https://doi.org/10.1016/j.is.2023.102273"

    CITATION = """
            @article{DBLP:journals/is/LopsPMSS23,
              author       = {Pasquale Lops and
                              Marco Polignano and
                              Cataldo Musto and
                              Antonio Silletti and
                              Giovanni Semeraro},
              title        = {ClayRS: An end-to-end framework for reproducible knowledge-aware recommender
                              systems},
              journal      = {Inf. Syst.},
              volume       = {119},
              pages        = {102273},
              year         = {2023},
              url          = {https://doi.org/10.1016/j.is.2023.102273},
              doi          = {10.1016/J.IS.2023.102273},
              timestamp    = {Mon, 05 Feb 2024 20:19:36 +0100},
              biburl       = {https://dblp.org/rec/journals/is/LopsPMSS23.bib},
              bibsource    = {dblp computer science bibliography, https://dblp.org}
            }"""

    CODE = """
    from clayrs import content_analyzer 

    ratings = content_analyzer.Ratings(content_analyzer.CSVFile(YOUR_PATH_HERE), timestamp_column=3)
    """

    DOC = 'https://swapuniba.github.io/ClayRS/'

    def info_code(self):
        """
        Provide the code to use in ClayRS to run experiments.
        """
        if self.timestamp:
            self.CODE = """
    from clayrs import content_analyzer 

    ratings = content_analyzer.Ratings(content_analyzer.CSVFile('{path}'), timestamp_column=3)
    """.format(path=self.path)
        else:
            self.CODE = """
    from clayrs import content_analyzer 

    ratings = content_analyzer.Ratings(content_analyzer.CSVFile('{path}'))
    """.format(path=self.path)
        super().info_code()

__init__(timestamp, path)

Initialize ClayRS adapter. Args: timestamp (bool): Whether timestamps are included. path (str): Path where the ClayRS-compatible dataset is stored.

Source code in datarec/io/frameworks/clayrs/clayrs.py
def __init__(self, timestamp, path):
    """
    Initialize ClayRS adapter.
    Args:
        timestamp (bool): Whether timestamps are included.
        path (str): Path where the ClayRS-compatible dataset is stored.
    """
    self.timestamp = timestamp
    self.path = path

info_code()

Provide the code to use in ClayRS to run experiments.

Source code in datarec/io/frameworks/clayrs/clayrs.py
def info_code(self):
    """
    Provide the code to use in ClayRS to run experiments.
    """
    if self.timestamp:
        self.CODE = """
from clayrs import content_analyzer 

ratings = content_analyzer.Ratings(content_analyzer.CSVFile('{path}'), timestamp_column=3)
""".format(path=self.path)
    else:
        self.CODE = """
from clayrs import content_analyzer 

ratings = content_analyzer.Ratings(content_analyzer.CSVFile('{path}'))
""".format(path=self.path)
    super().info_code()

Cornac

Cornac

Bases: Framework

Cornac framework adapter.

Provide metadata, citation, and usage examples for Cornac framework.

Source code in datarec/io/frameworks/cornac/cornac.py
class Cornac(Framework):
    """
    Cornac framework adapter.

    Provide metadata, citation, and usage examples for Cornac framework.
    """

    def __init__(self, timestamp, path):
        """
        Initialize Cornac adapter.
        Args:
            timestamp (bool): Whether timestamps are included.
            path (str): Path where the Cornac-compatible dataset is stored.
        """
        self.timestamp = timestamp
        self.path = path

    FRAMEWORK_NAME = 'Cornac'

    REPOSITORY = 'https://github.com/PreferredAI/cornac/tree/master'

    PAPER = """Cornac: A Comparative Framework for Multimodal Recommender Systems"""

    DOI = None

    CITATION = """
            @article{DBLP:journals/jmlr/SalahTL20,
              author       = {Aghiles Salah and
                              Quoc{-}Tuan Truong and
                              Hady W. Lauw},
              title        = {Cornac: {A} Comparative Framework for Multimodal Recommender Systems},
              journal      = {J. Mach. Learn. Res.},
              volume       = {21},
              pages        = {95:1--95:5},
              year         = {2020},
              url          = {http://jmlr.org/papers/v21/19-805.html},
              timestamp    = {Wed, 18 Nov 2020 15:58:12 +0100},
              biburl       = {https://dblp.org/rec/journals/jmlr/SalahTL20.bib},
              bibsource    = {dblp computer science bibliography, https://dblp.org}
            }"""

    CODE = """
        from cornac.data import Reader

        reader = Reader()
        train_data = reader.read(fpath='{path}', fmt="{frmt}")
    """

    DOC = 'https://cornac.preferred.ai/'

    def info_code(self):
        """
        Provide the code to use in Cornac to run experiments.
        """
        if self.timestamp:
            self.CODE = """
        from cornac.data import Reader

        reader = Reader()
        train_data = reader.read(fpath='{path}', fmt="{frmt}")
    """.format(path=self.path, frmt='UIRT')
        else:
            self.CODE = """
                from cornac.data import Reader

                reader = Reader()
                train_data = reader.read(fpath='{path}', fmt="{frmt}")
            """.format(path=self.path, frmt='UIR')
        super().info_code()

__init__(timestamp, path)

Initialize Cornac adapter. Args: timestamp (bool): Whether timestamps are included. path (str): Path where the Cornac-compatible dataset is stored.

Source code in datarec/io/frameworks/cornac/cornac.py
def __init__(self, timestamp, path):
    """
    Initialize Cornac adapter.
    Args:
        timestamp (bool): Whether timestamps are included.
        path (str): Path where the Cornac-compatible dataset is stored.
    """
    self.timestamp = timestamp
    self.path = path

info_code()

Provide the code to use in Cornac to run experiments.

Source code in datarec/io/frameworks/cornac/cornac.py
def info_code(self):
    """
    Provide the code to use in Cornac to run experiments.
    """
    if self.timestamp:
        self.CODE = """
    from cornac.data import Reader

    reader = Reader()
    train_data = reader.read(fpath='{path}', fmt="{frmt}")
""".format(path=self.path, frmt='UIRT')
    else:
        self.CODE = """
            from cornac.data import Reader

            reader = Reader()
            train_data = reader.read(fpath='{path}', fmt="{frmt}")
        """.format(path=self.path, frmt='UIR')
    super().info_code()

DaisyRec

DaisyRec

Bases: Framework

DaisyRec framework adapter.

Provide metadata, citation, and usage examples for DaisyRec framework.

Source code in datarec/io/frameworks/daisyrec/daisyrec.py
class DaisyRec(Framework):
    """
    DaisyRec framework adapter.

    Provide metadata, citation, and usage examples for DaisyRec framework.
    """

    def __init__(self, timestamp, path):
        """
        Initialize DaisyRec adapter.
        Args:
            timestamp (bool): Whether timestamps are included.
            path (str): Path where the DaisyRec-compatible dataset is stored.
        """
        self.timestamp = timestamp
        self.path = path

    FRAMEWORK_NAME = 'DaisyRec'

    REPOSITORY = 'https://github.com/recsys-benchmark/DaisyRec-v2.0'

    PAPER = """DaisyRec 2.0: Benchmarking Recommendation for Rigorous Evaluation"""

    DOI = "https://doi.org/10.1109/TPAMI.2022.3231891"

    CITATION = """
            @inproceedings{DBLP:conf/recsys/SunY00Q0G20,
              author       = {Zhu Sun and
                              Di Yu and
                              Hui Fang and
                              Jie Yang and
                              Xinghua Qu and
                              Jie Zhang and
                              Cong Geng},
              editor       = {Rodrygo L. T. Santos and
                              Leandro Balby Marinho and
                              Elizabeth M. Daly and
                              Li Chen and
                              Kim Falk and
                              Noam Koenigstein and
                              Edleno Silva de Moura},
              title        = {Are We Evaluating Rigorously? Benchmarking Recommendation for Reproducible
                              Evaluation and Fair Comparison},
              booktitle    = {RecSys 2020: Fourteenth {ACM} Conference on Recommender Systems, Virtual
                              Event, Brazil, September 22-26, 2020},
              pages        = {23--32},
              publisher    = {{ACM}},
              year         = {2020},
              url          = {https://doi.org/10.1145/3383313.3412489},
              doi          = {10.1145/3383313.3412489},
              timestamp    = {Tue, 21 Mar 2023 20:57:01 +0100},
              biburl       = {https://dblp.org/rec/conf/recsys/SunY00Q0G20.bib},
              bibsource    = {dblp computer science bibliography, https://dblp.org}
            }

            @article{DBLP:journals/pami/SunFYQLYOZ23,
              author       = {Zhu Sun and
                              Hui Fang and
                              Jie Yang and
                              Xinghua Qu and
                              Hongyang Liu and
                              Di Yu and
                              Yew{-}Soon Ong and
                              Jie Zhang},
              title        = {DaisyRec 2.0: Benchmarking Recommendation for Rigorous Evaluation},
              journal      = {{IEEE} Trans. Pattern Anal. Mach. Intell.},
              volume       = {45},
              number       = {7},
              pages        = {8206--8226},
              year         = {2023},
              url          = {https://doi.org/10.1109/TPAMI.2022.3231891},
              doi          = {10.1109/TPAMI.2022.3231891},
              timestamp    = {Fri, 07 Jul 2023 23:32:20 +0200},
              biburl       = {https://dblp.org/rec/journals/pami/SunFYQLYOZ23.bib},
              bibsource    = {dblp computer science bibliography, https://dblp.org}
            }"""

    CODE = """

    """

    DOC = 'https://daisyrec.readthedocs.io/en/latest/'

    def info_code(self):
        """
        Provide the code to use in DaisyRec to run experiments.
        """
        if self.timestamp:
            self.CODE = f"""
            In DaisyRec you need to replace the file at 
            \'daisy/utils/loader.py\'
            with the file at
            \'datarec/io/frameworks/daisyrec/loader.py\'
            Then you need to open the file, go to line 36 and change \'YOUR_PATH_HERE\' with
            \'{self.path}\'
            """
        else:
            self.CODE = f"""
            In DaisyRec you need to replace the file at 
            \'daisy/utils/loader.py\'
            with the file at
            \'datarec/io/frameworks/daisyrec/loader.py\'
            Then you need to open the file, go to line 36 and change \'YOUR_PATH_HERE\' with
            \'{self.path}\'
            Morover, from the attribute \'names\' you have to remove the timestamp.
            """
        super().info_code()

__init__(timestamp, path)

Initialize DaisyRec adapter. Args: timestamp (bool): Whether timestamps are included. path (str): Path where the DaisyRec-compatible dataset is stored.

Source code in datarec/io/frameworks/daisyrec/daisyrec.py
def __init__(self, timestamp, path):
    """
    Initialize DaisyRec adapter.
    Args:
        timestamp (bool): Whether timestamps are included.
        path (str): Path where the DaisyRec-compatible dataset is stored.
    """
    self.timestamp = timestamp
    self.path = path

info_code()

Provide the code to use in DaisyRec to run experiments.

Source code in datarec/io/frameworks/daisyrec/daisyrec.py
def info_code(self):
    """
    Provide the code to use in DaisyRec to run experiments.
    """
    if self.timestamp:
        self.CODE = f"""
        In DaisyRec you need to replace the file at 
        \'daisy/utils/loader.py\'
        with the file at
        \'datarec/io/frameworks/daisyrec/loader.py\'
        Then you need to open the file, go to line 36 and change \'YOUR_PATH_HERE\' with
        \'{self.path}\'
        """
    else:
        self.CODE = f"""
        In DaisyRec you need to replace the file at 
        \'daisy/utils/loader.py\'
        with the file at
        \'datarec/io/frameworks/daisyrec/loader.py\'
        Then you need to open the file, go to line 36 and change \'YOUR_PATH_HERE\' with
        \'{self.path}\'
        Morover, from the attribute \'names\' you have to remove the timestamp.
        """
    super().info_code()

load_rate(src='ml-100k', prepro='origin', binary=True, pos_threshold=None, level='ui')

Load certain raw data. Args: src (str): Name of dataset. prepro (str): Way to pre-process raw data input, expect 'origin', f'{N}core', f'{N}filter', N is integer value. binary (boolean): Whether to transform rating to binary label as CTR or not as Regression. pos_threshold (float): If not None, treat rating larger than this threshold as positive sample. level (str): which level to do with f'{N}core' or f'{N}filter' operation (it only works when prepro contains 'core' or 'filter').

Returns:

Type Description
Dataframe

Rating information with columns: user, item, rating, (options: timestamp).

int

The number of users in the dataset.

int

The number of items in the dataset.

Source code in datarec/io/frameworks/daisyrec/loader.py
def load_rate(src='ml-100k', prepro='origin', binary=True, pos_threshold=None, level='ui'):
    """
    Load certain raw data.
    Args:
        src (str): Name of dataset.
        prepro (str): Way to pre-process raw data input, expect 'origin', f'{N}core', f'{N}filter', N is integer value.
        binary (boolean): Whether to transform rating to binary label as CTR or not as Regression.
        pos_threshold (float): If not None, treat rating larger than this threshold as positive sample.
        level (str): which level to do with f'{N}core' or f'{N}filter' operation (it only works when prepro contains 'core' or 'filter').

    Returns:
        (pd.Dataframe): Rating information with columns: user, item, rating, (options: timestamp).
        (int): The number of users in the dataset.
        (int): The number of items in the dataset.

    """
    df = pd.DataFrame()
    # which dataset will use
    if src == 'ml-100k':
        df = pd.read_csv(f'./data/{src}/u.data', sep='\t', header=None,
                         names=['user', 'item', 'rating', 'timestamp'], engine='python')
    elif src == 'datarec':
        df = pd.read_csv('YOUR_PATH_HERE', sep='\t', header=None,
                         names=['user', 'item', 'rating', 'timestamp'], engine='python')
    elif src == 'ml-1m':
        df = pd.read_csv(f'./data/{src}/ratings.dat', sep='::', header=None,
                         names=['user', 'item', 'rating', 'timestamp'], engine='python')
        # only consider rating >=4 for data density
        df = df.query('rating >= 4').reset_index(drop=True).copy()

    elif src == 'ml-10m':
        df = pd.read_csv(f'./data/{src}/ratings.dat', sep='::', header=None,
                         names=['user', 'item', 'rating', 'timestamp'], engine='python')
        df = df.query('rating >= 4').reset_index(drop=True).copy()

    elif src == 'ml-20m':
        df = pd.read_csv(f'./data/{src}/ratings.csv')
        df.rename(columns={'userId': 'user', 'movieId': 'item'}, inplace=True)
        df = df.query('rating >= 4').reset_index(drop=True)

    elif src == 'netflix':
        cnt = 0
        tmp_file = open(f'./data/{src}/training_data.csv', 'w')
        tmp_file.write('user,item,rating,timestamp' + '\n')
        for f in os.listdir(f'./data/{src}/training_set/'):
            cnt += 1
            if cnt % 5000 == 0:
                print(f'Finish Process {cnt} file......')
            txt_file = open(f'./data/{src}/training_set/{f}', 'r')
            contents = txt_file.readlines()
            item = contents[0].strip().split(':')[0]
            for val in contents[1:]:
                user, rating, timestamp = val.strip().split(',')
                tmp_file.write(','.join([user, item, rating, timestamp]) + '\n')
            txt_file.close()

        tmp_file.close()

        df = pd.read_csv(f'./data/{src}/training_data.csv')
        df['rating'] = df.rating.astype(float)
        df['timestamp'] = pd.to_datetime(df['timestamp'])

    elif src == 'lastfm':
        # user_artists.dat
        df = pd.read_csv(f'./data/{src}/user_artists.dat', sep='\t')
        df.rename(columns={'userID': 'user', 'artistID': 'item', 'weight': 'rating'}, inplace=True)
        # treat weight as interaction, as 1
        df['rating'] = 1.0
        # fake timestamp column
        df['timestamp'] = 1

    elif src == 'book-x':
        df = pd.read_csv(f'./data/{src}/BX-Book-Ratings.csv', delimiter=";", encoding="latin1")
        df.rename(columns={'User-ID': 'user', 'ISBN': 'item', 'Book-Rating': 'rating'}, inplace=True)
        # fake timestamp column
        df['timestamp'] = 1

    elif src == 'pinterest':
        # TODO this dataset has wrong source URL, we will figure out in future
        pass

    elif src == 'amazon-cloth':
        df = pd.read_csv(f'./data/{src}/ratings_Clothing_Shoes_and_Jewelry.csv',
                         names=['user', 'item', 'rating', 'timestamp'])

    elif src == 'amazon-electronic':
        df = pd.read_csv(f'./data/{src}/ratings_Electronics.csv',
                         names=['user', 'item', 'rating', 'timestamp'])

    elif src == 'amazon-book':
        df = pd.read_csv(f'./data/{src}/ratings_Books.csv',
                         names=['user', 'item', 'rating', 'timestamp'], low_memory=False)
        df = df[df['timestamp'].str.isnumeric()].copy()
        df['timestamp'] = df['timestamp'].astype(int)

    elif src == 'amazon-music':
        df = pd.read_csv(f'./data/{src}/ratings_Digital_Music.csv',
                         names=['user', 'item', 'rating', 'timestamp'])

    elif src == 'epinions':
        d = sio.loadmat(f'./data/{src}/rating_with_timestamp.mat')
        prime = []
        for val in d['rating_with_timestamp']:
            user, item, rating, timestamp = val[0], val[1], val[3], val[5]
            prime.append([user, item, rating, timestamp])
        df = pd.DataFrame(prime, columns=['user', 'item', 'rating', 'timestamp'])
        del prime
        gc.collect()

    elif src == 'yelp':
        json_file_path = f'./data/{src}/yelp_academic_dataset_review.json'
        prime = []
        for line in open(json_file_path, 'r', encoding='UTF-8'):
            val = json.loads(line)
            prime.append([val['user_id'], val['business_id'], val['stars'], val['date']])
        df = pd.DataFrame(prime, columns=['user', 'item', 'rating', 'timestamp'])
        df['timestamp'] = pd.to_datetime(df.timestamp)
        del prime
        gc.collect()

    elif src == 'citeulike':
        user = 0
        dt = []
        for line in open(f'./data/{src}/users.dat', 'r'):
            val = line.split()
            for item in val:
                dt.append([user, item])
            user += 1
        df = pd.DataFrame(dt, columns=['user', 'item'])
        # fake timestamp column
        df['timestamp'] = 1

    else:
        raise ValueError('Invalid Dataset Error')

    # set rating >= threshold as positive samples
    if pos_threshold is not None:
        df = df.query(f'rating >= {pos_threshold}').reset_index(drop=True)

    # reset rating to interaction, here just treat all rating as 1
    if binary:
        df['rating'] = 1.0

    # which type of pre-dataset will use
    if prepro == 'origin':
        pass

    elif prepro.endswith('filter'):
        pattern = re.compile(r'\d+')
        filter_num = int(pattern.findall(prepro)[0])

        tmp1 = df.groupby(['user'], as_index=False)['item'].count()
        tmp1.rename(columns={'item': 'cnt_item'}, inplace=True)
        tmp2 = df.groupby(['item'], as_index=False)['user'].count()
        tmp2.rename(columns={'user': 'cnt_user'}, inplace=True)
        df = df.merge(tmp1, on=['user']).merge(tmp2, on=['item'])
        if level == 'ui':
            df = df.query(f'cnt_item >= {filter_num} and cnt_user >= {filter_num}').reset_index(drop=True).copy()
        elif level == 'u':
            df = df.query(f'cnt_item >= {filter_num}').reset_index(drop=True).copy()
        elif level == 'i':
            df = df.query(f'cnt_user >= {filter_num}').reset_index(drop=True).copy()
        else:
            raise ValueError(f'Invalid level value: {level}')

        df.drop(['cnt_item', 'cnt_user'], axis=1, inplace=True)
        del tmp1, tmp2
        gc.collect()

    elif prepro.endswith('core'):
        pattern = re.compile(r'\d+')
        core_num = int(pattern.findall(prepro)[0])

        def filter_user(df):
            tmp = df.groupby(['user'], as_index=False)['item'].count()
            tmp.rename(columns={'item': 'cnt_item'}, inplace=True)
            df = df.merge(tmp, on=['user'])
            df = df.query(f'cnt_item >= {core_num}').reset_index(drop=True).copy()
            df.drop(['cnt_item'], axis=1, inplace=True)

            return df

        def filter_item(df):
            tmp = df.groupby(['item'], as_index=False)['user'].count()
            tmp.rename(columns={'user': 'cnt_user'}, inplace=True)
            df = df.merge(tmp, on=['item'])
            df = df.query(f'cnt_user >= {core_num}').reset_index(drop=True).copy()
            df.drop(['cnt_user'], axis=1, inplace=True)

            return df

        if level == 'ui':
            while 1:
                df = filter_user(df)
                df = filter_item(df)
                chk_u = df.groupby('user')['item'].count()
                chk_i = df.groupby('item')['user'].count()
                if len(chk_i[chk_i < core_num]) <= 0 and len(chk_u[chk_u < core_num]) <= 0:
                    break
        elif level == 'u':
            df = filter_user(df)
        elif level == 'i':
            df = filter_item(df)
        else:
            raise ValueError(f'Invalid level value: {level}')

        gc.collect()

    else:
        raise ValueError('Invalid dataset preprocess type, origin/Ncore/Nfilter (N is int number) expected')

    # encoding user_id and item_id
    df['user'] = pd.Categorical(df['user']).codes
    df['item'] = pd.Categorical(df['item']).codes

    user_num = df['user'].nunique()
    item_num = df['item'].nunique()

    print(f'Finish loading [{src}]-[{prepro}] dataset')

    return df, user_num, item_num

get_ur(df)

Get user-rating pairs. Args: df (pd.DataFrame): Rating dataframe.

Returns:

Type Description
dict

Dictionary which stores user-items interactions.

Source code in datarec/io/frameworks/daisyrec/loader.py
def get_ur(df):
    """
    Get user-rating pairs.
    Args:
        df (pd.DataFrame): Rating dataframe.

    Returns:
        (dict): Dictionary which stores user-items interactions.

    """
    ur = defaultdict(set)
    for _, row in df.iterrows():
        ur[int(row['user'])].add(int(row['item']))

    return ur

get_ir(df)

Get item-rating pairs. Args: df (pd.DataFrame): Rating dataframe.

Returns:

Type Description
dict

Dictionary which stores item-items interactions.

Source code in datarec/io/frameworks/daisyrec/loader.py
def get_ir(df):
    """
    Get item-rating pairs.
    Args:
        df (pd.DataFrame): Rating dataframe.

    Returns:
        (dict): Dictionary which stores item-items interactions.

    """
    ir = defaultdict(set)
    for _, row in df.iterrows():
        ir[int(row['item'])].add(int(row['user']))

    return ir

build_feat_idx_dict(df, cat_cols=['user', 'item'], num_cols=[])

Encode feature mapping for FM. Args: df (pd.DataFrame): Feature dataframe. cat_cols (list): List of categorical column names. num_cols (list): List of numerical column names.

Returns:

Type Description
dict

Dictionary with index-feature column mapping information.

int

The number of features.

Source code in datarec/io/frameworks/daisyrec/loader.py
def build_feat_idx_dict(df: pd.DataFrame,
                        cat_cols: list = ['user', 'item'],
                        num_cols: list = []):
    """
    Encode feature mapping for FM.
    Args:
        df (pd.DataFrame): Feature dataframe.
        cat_cols (list): List of categorical column names.
        num_cols (list): List of numerical column names.

    Returns:
        (dict): Dictionary with index-feature column mapping information.
        (int): The number of features.

    """
    feat_idx_dict = {}
    idx = 0
    for col in cat_cols:
        feat_idx_dict[col] = idx
        idx = idx + df[col].max() + 1
    for col in num_cols:
        feat_idx_dict[col] = idx
        idx += 1
    print('Finish build feature index dictionary......')

    cnt = 0
    for col in cat_cols:
        for _ in df[col].unique():
            cnt += 1
    for _ in num_cols:
        cnt += 1
    print(f'Number of features: {cnt}')

    return feat_idx_dict, cnt

convert_npy_mat(user_num, item_num, df)

Convert pd.Dataframe to numpy matrix. Args: user_num(int): Number of users. item_num (int): Number of items. df (pd.DataFrame): Rating dataframe.

Returns:

Type Description
array

Rating matrix.

Source code in datarec/io/frameworks/daisyrec/loader.py
def convert_npy_mat(user_num, item_num, df):
    """
    Convert pd.Dataframe to numpy matrix.
    Args:
        user_num(int): Number of users.
        item_num (int): Number of items.
        df (pd.DataFrame): Rating dataframe.

    Returns:
        (np.array): Rating matrix.
    """
    mat = np.zeros((user_num, item_num))
    for _, row in df.iterrows():
        u, i, r = row['user'], row['item'], row['rating']
        mat[int(u), int(i)] = float(r)
    return mat

build_candidates_set(test_ur, train_ur, item_pool, candidates_num=1000)

Build candidate items for ranking. Args: test_ur (dict): Ground truth that represents the relationship of user and item in the test set. train_ur (dict): The relationship of user and item in the train set. item_pool (list or set): Set of all items. candidates_num (int): Number of candidates.:

Returns:

Name Type Description
test_ucands dict

Dictionary storing candidates for each user in test set.

Source code in datarec/io/frameworks/daisyrec/loader.py
def build_candidates_set(test_ur, train_ur, item_pool, candidates_num=1000):
    """
    Build candidate  items for ranking.
    Args:
        test_ur (dict): Ground truth that represents the relationship of user and item in the test set.
        train_ur (dict): The relationship of user and item in the train set.
        item_pool (list or set): Set of all items.
        candidates_num (int): Number of candidates.:

    Returns:
        test_ucands (dict): Dictionary storing candidates for each user in test set.

    """
    test_ucands = defaultdict(list)
    for k, v in test_ur.items():
        sample_num = candidates_num - len(v) if len(v) < candidates_num else 0
        sub_item_pool = item_pool - v - train_ur[k]  # remove GT & interacted
        sample_num = min(len(sub_item_pool), sample_num)
        if sample_num == 0:
            samples = random.sample(v, candidates_num)
            test_ucands[k] = list(set(samples))
        else:
            samples = random.sample(sub_item_pool, sample_num)
            test_ucands[k] = list(v | set(samples))

    return test_ucands

get_adj_mat(n_users, n_items)

Get adjacency matrix. Args: n_users (int): Number of users. n_items (int): Number of items.

Returns:

Name Type Description
adj_mat csr_matrix

Adjacency matrix.

norm_adj_mat csr_matrix

Normalized adjacency matrix.

mean_adj_mat csr_matrix

Mean adjacency matrix.

Source code in datarec/io/frameworks/daisyrec/loader.py
def get_adj_mat(n_users, n_items):
    """
    Get adjacency matrix.
    Args:
        n_users (int): Number of users.
        n_items (int): Number of items.

    Returns:
        adj_mat (sp.csr_matrix): Adjacency matrix.
        norm_adj_mat (sp.csr_matrix): Normalized adjacency matrix.
        mean_adj_mat(sp.csr_matrix): Mean adjacency matrix.

    """
    R = sp.dok_matrix((n_users, n_items), dtype=np.float32)
    adj_mat = sp.dok_matrix((n_users + n_items, n_users + n_items), dtype=np.float32)
    adj_mat = adj_mat.tolil()
    R = R.tolil()

    adj_mat[:n_users, n_users:] = R
    adj_mat[n_users:, :n_users] = R.T
    adj_mat = adj_mat.todok()
    print('already create adjacency matrix', adj_mat.shape)

    def mean_adj_single(adj):
        """
        Compute row-normalized adjacency matrix (D⁻¹A).
        Args:
            adj (sp.spmatrix): Sparse adjacency matrix.

        Returns:
            (sp.coo_matrix): Row-normalized adjacency matrix in COO format.
        """
        # D^-1 * A
        rowsum = np.array(adj.sum(1))

        d_inv = np.power(rowsum, -1).flatten()
        d_inv[np.isinf(d_inv)] = 0.
        d_mat_inv = sp.diags(d_inv)

        norm_adj = d_mat_inv.dot(adj)
        # norm_adj = adj.dot(d_mat_inv)
        print('generate single-normalized adjacency matrix.')
        return norm_adj.tocoo()

    def normalized_adj_single(adj):
        """
        Compute symmetric normalized adjacency matrix (D⁻¹/² A D⁻¹/²).
        Args:
            adj (sp.spmatrix): Sparse adjacency matrix.

        Returns:
            (sp.coo_matrix): Symmetric normalized adjacency matrix in COO format.
        """
        # D^-1/2 * A * D^-1/2
        rowsum = np.array(adj.sum(1))

        d_inv_sqrt = np.power(rowsum, -0.5).flatten()
        d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
        d_mat_inv_sqrt = sp.diags(d_inv_sqrt)

        # bi_lap = adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt)
        bi_lap = d_mat_inv_sqrt.dot(adj).dot(d_mat_inv_sqrt)
        return bi_lap.tocoo()

    def check_adj_if_equal(adj):
        """
        Check if normalized adjacency is equivalent to Laplacian-based transformation.
        Args:
            adj (sp.spmatrix): Sparse adjacency matrix.

        Returns:
            (np.ndarray): Dense matrix representing the normalized adjacency for verification

        """
        dense_A = np.array(adj.todense())
        degree = np.sum(dense_A, axis=1, keepdims=False)

        temp = np.dot(np.diag(np.power(degree, -1)), dense_A)
        print('check normalized adjacency matrix whether equal to this laplacian matrix.')
        return temp

    norm_adj_mat = mean_adj_single(adj_mat + sp.eye(adj_mat.shape[0]))
    # norm_adj_mat = normalized_adj_single(adj_mat + sp.eye(adj_mat.shape[0]))
    mean_adj_mat = mean_adj_single(adj_mat)

    print('already normalize adjacency matrix')
    return adj_mat.tocsr(), norm_adj_mat.tocsr(), mean_adj_mat.tocsr()

Elliot

Elliot

Bases: Framework

Elliot framework adapter.

Provide metadata, citation, and usage examples for Elliot framework.

Source code in datarec/io/frameworks/elliot/elliot.py
class Elliot(Framework):
    """
    Elliot framework adapter.

    Provide metadata, citation, and usage examples for Elliot framework.
    """

    def __init__(self, timestamp, path):
        """
        Initialize Elliot adapter.
        Args:
            timestamp (bool): Whether timestamps are included.
            path (str): Path where the Elliot-compatible dataset is stored.
        """
        self.timestamp = timestamp

        self.directory = os.path.abspath(os.path.dirname(path))
        if os.path.exists(self.directory) is False:
            os.makedirs(self.directory)

        self.train_path, self.test_path, self.val_path = \
            os.path.join(self.directory, 'train.tsv'), \
                os.path.join(self.directory, 'test.tsv'), \
                os.path.join(self.directory, 'validation.tsv')

        self.file = os.path.basename(path)
        self.file_path = os.path.join(self.directory, self.file)

        # create configuration file
        config_file = \
            CONF.format(path=self.file_path,
                        dataset='datarec2elliot',
                        train=self.train_path,
                        test=self.test_path,
                        val=self.val_path)

        self.config_path = os.path.join(self.directory, 'datarec_config.yml')
        with open(self.config_path, 'w') as file:
            file.write(config_file)

    FRAMEWORK_NAME = 'Elliot'

    REPOSITORY = 'https://github.com/sisinflab/elliot'

    PAPER = """Elliot: a Comprehensive and Rigorous Framework for Reproducible Recommender Systems Evaluation"""

    DOI = "https://doi.org/10.1145/3404835.3463245"

    CITATION = """
            @inproceedings{DBLP:conf/sigir/AnelliBFMMPDN21,
              author       = {Vito Walter Anelli and
                              Alejandro Bellog{\'{\i}}n and
                              Antonio Ferrara and
                              Daniele Malitesta and
                              Felice Antonio Merra and
                              Claudio Pomo and
                              Francesco Maria Donini and
                              Tommaso Di Noia},
              editor       = {Fernando Diaz and
                              Chirag Shah and
                              Torsten Suel and
                              Pablo Castells and
                              Rosie Jones and
                              Tetsuya Sakai},
              title        = {Elliot: {A} Comprehensive and Rigorous Framework for Reproducible
                              Recommender Systems Evaluation},
              booktitle    = {{SIGIR} '21: The 44th International {ACM} {SIGIR} Conference on Research
                              and Development in Information Retrieval, Virtual Event, Canada, July
                              11-15, 2021},
              pages        = {2405--2414},
              publisher    = {{ACM}},
              year         = {2021},
              url          = {https://doi.org/10.1145/3404835.3463245},
              doi          = {10.1145/3404835.3463245},
              timestamp    = {Sun, 12 Nov 2023 02:10:04 +0100},
              biburl       = {https://dblp.org/rec/conf/sigir/AnelliBFMMPDN21.bib},
              bibsource    = {dblp computer science bibliography, https://dblp.org}
            }"""

    CODE = "  "

    DOC = 'https://elliot.readthedocs.io/en/latest/'

    def info_code(self):
        """
        Provide the code to use in Elliot to run experiments.
        """
        self.CODE = """
            A configuration file for Elliot has been created here:
            \'{config_path}\'
            You can now run the script.
             If you move the configuration file remember to change the path in the script below.

            Elliot script:
            python start_experiments.py --config {config_path}

            This script contains a basic recommendation example. Change it if you need.
            """.format(config_path=self.config_path)

        super().info_code()

__init__(timestamp, path)

Initialize Elliot adapter. Args: timestamp (bool): Whether timestamps are included. path (str): Path where the Elliot-compatible dataset is stored.

Source code in datarec/io/frameworks/elliot/elliot.py
def __init__(self, timestamp, path):
    """
    Initialize Elliot adapter.
    Args:
        timestamp (bool): Whether timestamps are included.
        path (str): Path where the Elliot-compatible dataset is stored.
    """
    self.timestamp = timestamp

    self.directory = os.path.abspath(os.path.dirname(path))
    if os.path.exists(self.directory) is False:
        os.makedirs(self.directory)

    self.train_path, self.test_path, self.val_path = \
        os.path.join(self.directory, 'train.tsv'), \
            os.path.join(self.directory, 'test.tsv'), \
            os.path.join(self.directory, 'validation.tsv')

    self.file = os.path.basename(path)
    self.file_path = os.path.join(self.directory, self.file)

    # create configuration file
    config_file = \
        CONF.format(path=self.file_path,
                    dataset='datarec2elliot',
                    train=self.train_path,
                    test=self.test_path,
                    val=self.val_path)

    self.config_path = os.path.join(self.directory, 'datarec_config.yml')
    with open(self.config_path, 'w') as file:
        file.write(config_file)

info_code()

Provide the code to use in Elliot to run experiments.

Source code in datarec/io/frameworks/elliot/elliot.py
def info_code(self):
    """
    Provide the code to use in Elliot to run experiments.
    """
    self.CODE = """
        A configuration file for Elliot has been created here:
        \'{config_path}\'
        You can now run the script.
         If you move the configuration file remember to change the path in the script below.

        Elliot script:
        python start_experiments.py --config {config_path}

        This script contains a basic recommendation example. Change it if you need.
        """.format(config_path=self.config_path)

    super().info_code()

LensKit

LensKit

Bases: Framework

LensKit framework adapter.

Provide metadata, citation, and usage examples for LensKit framework.

Source code in datarec/io/frameworks/lenskit/lenskit.py
class LensKit(Framework):
    """
    LensKit framework adapter.

    Provide metadata, citation, and usage examples for LensKit framework.
    """

    def __init__(self, timestamp, path):
        """
        Initialize LensKit adapter.
        Args:
            timestamp (bool): Whether timestamps are included.
            path (str): Path where the LensKit-compatible dataset is stored.
        """
        self.timestamp = timestamp
        self.path = path

    FRAMEWORK_NAME = 'LensKit'

    REPOSITORY = 'https://github.com/lenskit/lkpy'

    PAPER = """LensKit for Python: Next-Generation Software for Recommender Systems Experiments"""

    DOI = "https://doi.org/10.1145/3340531.3412778"

    CITATION = """
            @inproceedings{DBLP:conf/cikm/Ekstrand20,
              author       = {Michael D. Ekstrand},
              editor       = {Mathieu d'Aquin and
                              Stefan Dietze and
                              Claudia Hauff and
                              Edward Curry and
                              Philippe Cudr{\'{e}}{-}Mauroux},
              title        = {LensKit for Python: Next-Generation Software for Recommender Systems
                              Experiments},
              booktitle    = {{CIKM} '20: The 29th {ACM} International Conference on Information
                              and Knowledge Management, Virtual Event, Ireland, October 19-23, 2020},
              pages        = {2999--3006},
              publisher    = {{ACM}},
              year         = {2020},
              url          = {https://doi.org/10.1145/3340531.3412778},
              doi          = {10.1145/3340531.3412778},
              timestamp    = {Tue, 29 Dec 2020 18:42:41 +0100},
              biburl       = {https://dblp.org/rec/conf/cikm/Ekstrand20.bib},
              bibsource    = {dblp computer science bibliography, https://dblp.org}
            }"""

    CODE = """

    """

    DOC = 'https://lkpy.lenskit.org/en/stable/'

    def info_code(self):
        """
        Provide the code to use in LensKit to run experiments.
        """
        self.CODE = """
        LensKit accepts pandas DataFrames with specific column naming. DataRec will do that for you!

        import pandas as pd

        ratings = pd.read_csv({path}, sep='\\t', header=False)
        """.format(path=self.path)

        super().info_code()

__init__(timestamp, path)

Initialize LensKit adapter. Args: timestamp (bool): Whether timestamps are included. path (str): Path where the LensKit-compatible dataset is stored.

Source code in datarec/io/frameworks/lenskit/lenskit.py
def __init__(self, timestamp, path):
    """
    Initialize LensKit adapter.
    Args:
        timestamp (bool): Whether timestamps are included.
        path (str): Path where the LensKit-compatible dataset is stored.
    """
    self.timestamp = timestamp
    self.path = path

info_code()

Provide the code to use in LensKit to run experiments.

Source code in datarec/io/frameworks/lenskit/lenskit.py
def info_code(self):
    """
    Provide the code to use in LensKit to run experiments.
    """
    self.CODE = """
    LensKit accepts pandas DataFrames with specific column naming. DataRec will do that for you!

    import pandas as pd

    ratings = pd.read_csv({path}, sep='\\t', header=False)
    """.format(path=self.path)

    super().info_code()

RecBole

RecBole

Bases: Framework

RecBole framework adapter.

Provide metadata, citation, and usage examples for RecBole framework.

Source code in datarec/io/frameworks/recbole/recbole.py
class RecBole(Framework):
    """
    RecBole framework adapter.

    Provide metadata, citation, and usage examples for RecBole framework.
    """

    def __init__(self, timestamp, path):
        """
        Initialize RecBole adapter.
        Args:
            timestamp (bool): Whether timestamps are included.
            path (str): Path where the RecBole-compatible dataset is stored.
        """
        self.timestamp = timestamp
        directory = os.path.dirname(path)
        self.directory = os.path.join(directory, 'DataRec2RecBole')
        print('RecBole requires a directory named as the the dataset.\n'
              f'Based on your path the directory that will be used is \'{self.directory}\'')
        if os.path.exists(self.directory) is False:
            os.makedirs(self.directory)
        self.path = os.path.join(self.directory, path)

    FRAMEWORK_NAME = 'RecBole'

    REPOSITORY = 'https://github.com/RUCAIBox/RecBole2.0'

    PAPER = """RecBole 2.0: Towards a More Up-to-Date Recommendation Library"""

    DOI = "https://doi.org/10.1145/3511808.3557680"

    CITATION = """
            @inproceedings{DBLP:conf/cikm/ZhaoMHLCPLLWTMF21,
              author       = {Wayne Xin Zhao and
                              Shanlei Mu and
                              Yupeng Hou and
                              Zihan Lin and
                              Yushuo Chen and
                              Xingyu Pan and
                              Kaiyuan Li and
                              Yujie Lu and
                              Hui Wang and
                              Changxin Tian and
                              Yingqian Min and
                              Zhichao Feng and
                              Xinyan Fan and
                              Xu Chen and
                              Pengfei Wang and
                              Wendi Ji and
                              Yaliang Li and
                              Xiaoling Wang and
                              Ji{-}Rong Wen},
              editor       = {Gianluca Demartini and
                              Guido Zuccon and
                              J. Shane Culpepper and
                              Zi Huang and
                              Hanghang Tong},
              title        = {RecBole: Towards a Unified, Comprehensive and Efficient Framework
                              for Recommendation Algorithms},
              booktitle    = {{CIKM} '21: The 30th {ACM} International Conference on Information
                              and Knowledge Management, Virtual Event, Queensland, Australia, November
                              1 - 5, 2021},
              pages        = {4653--4664},
              publisher    = {{ACM}},
              year         = {2021},
              url          = {https://doi.org/10.1145/3459637.3482016},
              doi          = {10.1145/3459637.3482016},
              timestamp    = {Tue, 07 May 2024 20:05:19 +0200},
              biburl       = {https://dblp.org/rec/conf/cikm/ZhaoMHLCPLLWTMF21.bib},
              bibsource    = {dblp computer science bibliography, https://dblp.org}
            }
            @inproceedings{DBLP:conf/cikm/ZhaoHPYZLZBTSCX22,
              author       = {Wayne Xin Zhao and
                              Yupeng Hou and
                              Xingyu Pan and
                              Chen Yang and
                              Zeyu Zhang and
                              Zihan Lin and
                              Jingsen Zhang and
                              Shuqing Bian and
                              Jiakai Tang and
                              Wenqi Sun and
                              Yushuo Chen and
                              Lanling Xu and
                              Gaowei Zhang and
                              Zhen Tian and
                              Changxin Tian and
                              Shanlei Mu and
                              Xinyan Fan and
                              Xu Chen and
                              Ji{-}Rong Wen},
              editor       = {Mohammad Al Hasan and
                              Li Xiong},
              title        = {RecBole 2.0: Towards a More Up-to-Date Recommendation Library},
              booktitle    = {Proceedings of the 31st {ACM} International Conference on Information
                              {\&} Knowledge Management, Atlanta, GA, USA, October 17-21, 2022},
              pages        = {4722--4726},
              publisher    = {{ACM}},
              year         = {2022},
              url          = {https://doi.org/10.1145/3511808.3557680},
              doi          = {10.1145/3511808.3557680},
              timestamp    = {Sun, 20 Aug 2023 12:23:03 +0200},
              biburl       = {https://dblp.org/rec/conf/cikm/ZhaoHPYZLZBTSCX22.bib},
              bibsource    = {dblp computer science bibliography, https://dblp.org}
            }"""

    CODE = """

    """

    DOC = 'https://recbole.io/'

    def info_code(self):
        """
        Provide the code to use in RecBole to run experiments.
        """
        self.CODE = """
            from recbole.data import create_dataset
            from recbole.config import Config

            config_dict = {{
                "dataset": "datarec",
                "data_path": {path},
            }}
            config = Config(config_dict=config_dict, config_file_list=config_file_list)
            dataset = create_dataset(config)
        """.format(path=self.path)

        super().info_code()

__init__(timestamp, path)

Initialize RecBole adapter. Args: timestamp (bool): Whether timestamps are included. path (str): Path where the RecBole-compatible dataset is stored.

Source code in datarec/io/frameworks/recbole/recbole.py
def __init__(self, timestamp, path):
    """
    Initialize RecBole adapter.
    Args:
        timestamp (bool): Whether timestamps are included.
        path (str): Path where the RecBole-compatible dataset is stored.
    """
    self.timestamp = timestamp
    directory = os.path.dirname(path)
    self.directory = os.path.join(directory, 'DataRec2RecBole')
    print('RecBole requires a directory named as the the dataset.\n'
          f'Based on your path the directory that will be used is \'{self.directory}\'')
    if os.path.exists(self.directory) is False:
        os.makedirs(self.directory)
    self.path = os.path.join(self.directory, path)

info_code()

Provide the code to use in RecBole to run experiments.

Source code in datarec/io/frameworks/recbole/recbole.py
def info_code(self):
    """
    Provide the code to use in RecBole to run experiments.
    """
    self.CODE = """
        from recbole.data import create_dataset
        from recbole.config import Config

        config_dict = {{
            "dataset": "datarec",
            "data_path": {path},
        }}
        config = Config(config_dict=config_dict, config_file_list=config_file_list)
        dataset = create_dataset(config)
    """.format(path=self.path)

    super().info_code()

ReChorus

ReChorus

Bases: Framework

ReChorus framework adapter.

Provide metadata, citation, and usage examples for ReChorus framework.

Source code in datarec/io/frameworks/rechorus/rechorus.py
class ReChorus(Framework):
    """
    ReChorus framework adapter.

    Provide metadata, citation, and usage examples for ReChorus framework.
    """

    def __init__(self, timestamp, path):
        """
        Initialize ReChorus adapter.
        Args:
            timestamp (bool): Whether timestamps are included.
            path (str): Path where the ReChorus-compatible dataset is stored.
        """
        self.timestamp = timestamp
        directory = os.path.dirname(path)
        self.directory = os.path.abspath(os.path.join(directory, 'DataRec2ReChorus'))
        print('RecBole requires a directory named as the the dataset.\n'
              f'Based on your path the directory that will be used is \'{self.directory}\'')
        if os.path.exists(self.directory) is False:
            os.makedirs(self.directory)

    FRAMEWORK_NAME = 'ReChorus'

    REPOSITORY = 'https://github.com/THUwangcy/ReChorus'

    PAPER = """Make It a Chorus: Knowledge- and Time-aware Item Modeling for Sequential Recommendation"""

    DOI = "https://doi.org/10.1145/3397271.3401131"

    CITATION = """
            @inproceedings{DBLP:conf/sigir/WangZMLM20,
              author       = {Chenyang Wang and
                              Min Zhang and
                              Weizhi Ma and
                              Yiqun Liu and
                              Shaoping Ma},
              editor       = {Jimmy X. Huang and
                              Yi Chang and
                              Xueqi Cheng and
                              Jaap Kamps and
                              Vanessa Murdock and
                              Ji{-}Rong Wen and
                              Yiqun Liu},
              title        = {Make It a Chorus: Knowledge- and Time-aware Item Modeling for Sequential
                              Recommendation},
              booktitle    = {Proceedings of the 43rd International {ACM} {SIGIR} conference on
                              research and development in Information Retrieval, {SIGIR} 2020, Virtual
                              Event, China, July 25-30, 2020},
              pages        = {109--118},
              publisher    = {{ACM}},
              year         = {2020},
              url          = {https://doi.org/10.1145/3397271.3401131},
              doi          = {10.1145/3397271.3401131},
              timestamp    = {Mon, 31 Oct 2022 08:39:18 +0100},
              biburl       = {https://dblp.org/rec/conf/sigir/WangZMLM20.bib},
              bibsource    = {dblp computer science bibliography, https://dblp.org}
            }"""

    CODE = """

    """

    DOC = None

    def info_code(self):
        """
        Provide the code to use in RecBole to run experiments.
        """
        self.CODE = """
            Dataset must be split and provided in a single folder within the \'data\' folder of the project.\n
            This data will be supported by ReChorus models that adopt a dataset \'BaseModel.Dataset\' \n
            DataRec created this directory here \'{directory}\'.
        """.format(directory=self.directory)

        super().info_code()

__init__(timestamp, path)

Initialize ReChorus adapter. Args: timestamp (bool): Whether timestamps are included. path (str): Path where the ReChorus-compatible dataset is stored.

Source code in datarec/io/frameworks/rechorus/rechorus.py
def __init__(self, timestamp, path):
    """
    Initialize ReChorus adapter.
    Args:
        timestamp (bool): Whether timestamps are included.
        path (str): Path where the ReChorus-compatible dataset is stored.
    """
    self.timestamp = timestamp
    directory = os.path.dirname(path)
    self.directory = os.path.abspath(os.path.join(directory, 'DataRec2ReChorus'))
    print('RecBole requires a directory named as the the dataset.\n'
          f'Based on your path the directory that will be used is \'{self.directory}\'')
    if os.path.exists(self.directory) is False:
        os.makedirs(self.directory)

info_code()

Provide the code to use in RecBole to run experiments.

Source code in datarec/io/frameworks/rechorus/rechorus.py
def info_code(self):
    """
    Provide the code to use in RecBole to run experiments.
    """
    self.CODE = """
        Dataset must be split and provided in a single folder within the \'data\' folder of the project.\n
        This data will be supported by ReChorus models that adopt a dataset \'BaseModel.Dataset\' \n
        DataRec created this directory here \'{directory}\'.
    """.format(directory=self.directory)

    super().info_code()

Recommenders

Recommenders

Bases: Framework

Recommenders framework adapter.

Provide metadata, citation, and usage examples for Recommenders framework.

Source code in datarec/io/frameworks/recommenders/recommenders.py
class Recommenders(Framework):
    """
    Recommenders framework adapter.

    Provide metadata, citation, and usage examples for Recommenders framework.
    """

    def __init__(self, timestamp, path):
        """
        Initialize Recommenders adapter.
        Args:
            timestamp (bool): Whether timestamps are included.
            path (str): Path where the Recommenders-compatible dataset is stored.
        """
        self.timestamp = timestamp
        self.directory = os.path.abspath(os.path.dirname(path))
        if os.path.exists(self.directory) is False:
            os.makedirs(self.directory)
        self.file = os.path.basename(path)
        self.file_path = os.path.join(self.directory, self.file)

    FRAMEWORK_NAME = 'Recommenders'

    REPOSITORY = 'https://github.com/recommenders-team/recommenders?tab=readme-ov-file'

    PAPER = """Microsoft recommenders: tools to accelerate developing recommender systems"""

    DOI = "https://doi.org/10.1145/3298689.3346967"

    CITATION = """
            @inproceedings{DBLP:conf/recsys/GrahamMW19,
              author       = {Scott Graham and
                              Jun{-}Ki Min and
                              Tao Wu},
              editor       = {Toine Bogers and
                              Alan Said and
                              Peter Brusilovsky and
                              Domonkos Tikk},
              title        = {Microsoft recommenders: tools to accelerate developing recommender
                              systems},
              booktitle    = {Proceedings of the 13th {ACM} Conference on Recommender Systems, RecSys
                              2019, Copenhagen, Denmark, September 16-20, 2019},
              pages        = {542--543},
              publisher    = {{ACM}},
              year         = {2019},
              url          = {https://doi.org/10.1145/3298689.3346967},
              doi          = {10.1145/3298689.3346967},
              timestamp    = {Wed, 09 Oct 2019 14:20:04 +0200},
              biburl       = {https://dblp.org/rec/conf/recsys/GrahamMW19.bib},
              bibsource    = {dblp computer science bibliography, https://dblp.org}
            }"""

    CODE = """

    """

    DOC = 'https://recommenders-team.github.io/recommenders'

    def info_code(self):
        """
        Provide the code to use in Recommenders to run experiments.
        """
        if self.timestamp:
            self.CODE = """
                import pandas as pd

                data = pd.read_csv({file}, sep="\\t", names=['user', 'item', 'rating', 'timestamp'])
                """.format(file=self.file_path)
        else:
            self.CODE = """
                import pandas as pd

                data = pd.read_csv({file}, sep="\\t", names=['user', 'item', 'rating'])
                """.format(file=self.file_path)

        super().info_code()

__init__(timestamp, path)

Initialize Recommenders adapter. Args: timestamp (bool): Whether timestamps are included. path (str): Path where the Recommenders-compatible dataset is stored.

Source code in datarec/io/frameworks/recommenders/recommenders.py
def __init__(self, timestamp, path):
    """
    Initialize Recommenders adapter.
    Args:
        timestamp (bool): Whether timestamps are included.
        path (str): Path where the Recommenders-compatible dataset is stored.
    """
    self.timestamp = timestamp
    self.directory = os.path.abspath(os.path.dirname(path))
    if os.path.exists(self.directory) is False:
        os.makedirs(self.directory)
    self.file = os.path.basename(path)
    self.file_path = os.path.join(self.directory, self.file)

info_code()

Provide the code to use in Recommenders to run experiments.

Source code in datarec/io/frameworks/recommenders/recommenders.py
def info_code(self):
    """
    Provide the code to use in Recommenders to run experiments.
    """
    if self.timestamp:
        self.CODE = """
            import pandas as pd

            data = pd.read_csv({file}, sep="\\t", names=['user', 'item', 'rating', 'timestamp'])
            """.format(file=self.file_path)
    else:
        self.CODE = """
            import pandas as pd

            data = pd.read_csv({file}, sep="\\t", names=['user', 'item', 'rating'])
            """.format(file=self.file_path)

    super().info_code()

RecPack

RecPack

Bases: Framework

RecPack framework adapter.

Provide metadata, citation, and usage examples for RecPack framework.

Source code in datarec/io/frameworks/recpack/recpack.py
class RecPack(Framework):
    """
    RecPack framework adapter.

    Provide metadata, citation, and usage examples for RecPack framework.
    """

    def __init__(self, timestamp, path):
        """
        Initialize RecPack adapter.
        Args:
            timestamp (bool): Whether timestamps are included.
            path (str): Path where the RecPack-compatible dataset is stored.
        """
        self.timestamp = timestamp
        self.directory = os.path.abspath(os.path.dirname(path))
        if os.path.exists(self.directory) is False:
            os.makedirs(self.directory)
        self.file = os.path.basename(path)
        self.file_path = os.path.join(self.directory, self.file)

    FRAMEWORK_NAME = 'RecPack'

    REPOSITORY = 'https://github.com/LienM/recpack'

    PAPER = """RecPack: An(other) Experimentation Toolkit for Top-N Recommendation using Implicit Feedback Data"""

    DOI = "https://doi.org/10.1145/3523227.3551472"

    CITATION = """
            @inproceedings{DBLP:conf/recsys/MichielsVG22,
              author       = {Lien Michiels and
                              Robin Verachtert and
                              Bart Goethals},
              editor       = {Jennifer Golbeck and
                              F. Maxwell Harper and
                              Vanessa Murdock and
                              Michael D. Ekstrand and
                              Bracha Shapira and
                              Justin Basilico and
                              Keld T. Lundgaard and
                              Even Oldridge},
              title        = {RecPack: An(other) Experimentation Toolkit for Top-N Recommendation
                              using Implicit Feedback Data},
              booktitle    = {RecSys '22: Sixteenth {ACM} Conference on Recommender Systems, Seattle,
                              WA, USA, September 18 - 23, 2022},
              pages        = {648--651},
              publisher    = {{ACM}},
              year         = {2022},
              url          = {https://doi.org/10.1145/3523227.3551472},
              doi          = {10.1145/3523227.3551472},
              timestamp    = {Mon, 01 May 2023 13:01:24 +0200},
              biburl       = {https://dblp.org/rec/conf/recsys/MichielsVG22.bib},
              bibsource    = {dblp computer science bibliography, https://dblp.org}
            }"""

    CODE = """

    """

    DOC = 'https://recpack.froomle.ai/'

    def info_code(self):
        """
        Provide the code to use in RecPack to run experiments.
        """
        self.CODE = """
            For using a dataset from DataRec you need to:
            1) copy/move the file 
            \'datarec/io/frameworks/recpack/datarec.py\'
            at \'recpack/datasets/datarec.py\'
            2) replace the content of the init file in RecPack
            \'datarec/io/frameworks/recpack/__init__.py\'
            with the content of
            \'datarec/io/frameworks/recpack/copy_me_in__init__.py\'
            Then you can use this code

            from recpack.datasets import DummyDataset
            dataset = (path={file}, filename={directory}, use_default_filters=False)
        """.format(file=self.file, directory=self.directory)

        super().info_code()

__init__(timestamp, path)

Initialize RecPack adapter. Args: timestamp (bool): Whether timestamps are included. path (str): Path where the RecPack-compatible dataset is stored.

Source code in datarec/io/frameworks/recpack/recpack.py
def __init__(self, timestamp, path):
    """
    Initialize RecPack adapter.
    Args:
        timestamp (bool): Whether timestamps are included.
        path (str): Path where the RecPack-compatible dataset is stored.
    """
    self.timestamp = timestamp
    self.directory = os.path.abspath(os.path.dirname(path))
    if os.path.exists(self.directory) is False:
        os.makedirs(self.directory)
    self.file = os.path.basename(path)
    self.file_path = os.path.join(self.directory, self.file)

info_code()

Provide the code to use in RecPack to run experiments.

Source code in datarec/io/frameworks/recpack/recpack.py
def info_code(self):
    """
    Provide the code to use in RecPack to run experiments.
    """
    self.CODE = """
        For using a dataset from DataRec you need to:
        1) copy/move the file 
        \'datarec/io/frameworks/recpack/datarec.py\'
        at \'recpack/datasets/datarec.py\'
        2) replace the content of the init file in RecPack
        \'datarec/io/frameworks/recpack/__init__.py\'
        with the content of
        \'datarec/io/frameworks/recpack/copy_me_in__init__.py\'
        Then you can use this code

        from recpack.datasets import DummyDataset
        dataset = (path={file}, filename={directory}, use_default_filters=False)
    """.format(file=self.file, directory=self.directory)

    super().info_code()

DataRec

Bases: Dataset

Base class for DataRec Datasets

Source code in datarec/io/frameworks/recpack/datarec.py
class DataRec(Dataset):
    """
    Base class for DataRec Datasets
    """
    USER_IX = "userId"
    """Name of the column in the DataFrame that contains user identifiers."""
    ITEM_IX = "itemId"
    """Name of the column in the DataFrame that contains item identifiers."""
    TIMESTAMP_IX = "timestamp"
    """Name of the column in the DataFrame that contains time of interaction in seconds since epoch."""

    @property
    def DEFAULT_FILENAME(self) -> str:
        """
        Default filename that will be used if it is not specified by the user.
        """
        return f"datarec.tsv"

    def _load_dataframe(self) -> pd.DataFrame:
        """
        Dataset from DataRec will be loaded as a pandas DataFrame

        Warning:: This does not apply any preprocessing, and returns the raw dataset.

        Returns:
            (pd.DataFrame): The interaction data as a DataFrame with a row per interaction.

        """
        df = pd.read_csv(os.path.join(self.path, self.filename), sep='\t', header=True, dtype={
                self.USER_IX: str,
                self.TIMESTAMP_IX: np.int64,
                self.ITEM_IX: str,
            })
        return df

USER_IX = 'userId' class-attribute instance-attribute

Name of the column in the DataFrame that contains user identifiers.

ITEM_IX = 'itemId' class-attribute instance-attribute

Name of the column in the DataFrame that contains item identifiers.

TIMESTAMP_IX = 'timestamp' class-attribute instance-attribute

Name of the column in the DataFrame that contains time of interaction in seconds since epoch.

DEFAULT_FILENAME property

Default filename that will be used if it is not specified by the user.