Source code for yieldplotlib.generate_key_map

r"""Generate key_map.py from CSV data or directly from Google Sheets.

This script creates a key_map.py file that maps parameters between different exoplanet
simulation libraries (EXOSIMS and AYO). It can either use a local CSV file or directly
download from a Google Sheet.

Usage Examples:
1. Generate from a local CSV file:
   ```
   python generate_key_map.py --csv input_data.csv
   ```

2. Download from Google Sheets using a service account credentials file:
   ```
   python generate_key_map.py --sheets SHEET_ID --credentials path/to/credentials.json
   ```

3. Download from Google Sheets using Base64-encoded credentials from environment:
   ```
   # First set the environment variable:
   export GOOGLE_CREDENTIALS_B64=$(cat service-account.json | base64 | tr -d '\n')

   # Then run:
   python generate_key_map.py --sheets SHEET_ID
   ```

4. Use a temporary file for the downloaded CSV (cleaned up afterward):
   ```
   python generate_key_map.py --sheets SHEET_ID --temp
   ```

Output:
------
The script creates a key_map.py file in the current directory with a KEY_MAP
dictionary that maps parameter names to their locations and transformations
in the EXOSIMS and AYO libraries.
"""

import argparse
import base64
import csv
import json
import os
import sys
import tempfile
from collections import OrderedDict

import pandas as pd



[docs]
def download_from_google_sheets(sheet_id, output_path, credentials_json_path=None):
    """Download data from Google Sheets and save as CSV.

    This function authenticates with Google Sheets API using service account credentials
    and downloads the specified sheet as a CSV file.

    Args:
        sheet_id:
            The ID of the Google Sheet to download. This is the string from the URL:
            https://docs.google.com/spreadsheets/d/{SHEET_ID}/edit

        output_path:
            Path where the CSV file should be saved.

        credentials_json_path:
            Optional path to a service account credentials JSON file.
            If not provided, will use GOOGLE_CREDENTIALS_B64 environment variable,
            which should contain the Base64-encoded JSON credentials.

    Returns:
        None. The sheet data is saved to the output_path as a CSV file.

    Raises:
        SystemExit: If credentials cannot be loaded or sheet cannot be accessed.
    """
    try:
        from google.oauth2 import service_account
        from googleapiclient.discovery import build
    except ImportError:
        print(
            "Error: Google API libraries not installed. "
            "Run: pip install google-auth google-auth-oauthlib"
            " google-api-python-client"
        )
        sys.exit(1)

    credentials = None

    # First check for credentials file path
    if credentials_json_path:
        try:
            with open(credentials_json_path) as f:
                credentials_info = json.load(f)
            credentials = service_account.Credentials.from_service_account_info(
                credentials_info,
                scopes=["https://www.googleapis.com/auth/spreadsheets.readonly"],
            )
            print(f"Using credentials from file: {credentials_json_path}")
        except Exception as e:
            print(f"Error loading credentials from {credentials_json_path}: {e}")
            sys.exit(1)
    else:
        # Try using Base64-encoded credentials from environment variable
        credentials_b64 = os.environ.get("GOOGLE_CREDENTIALS_B64")
        if not credentials_b64:
            print(
                "Error: No credentials provided. Either set GOOGLE_CREDENTIALS_B64"
                " or provide --credentials"
            )
            sys.exit(1)

        try:
            # Decode Base64 string to JSON
            credentials_json = base64.b64decode(credentials_b64).decode("utf-8")
            credentials_info = json.loads(credentials_json)
            credentials = service_account.Credentials.from_service_account_info(
                credentials_info,
                scopes=["https://www.googleapis.com/auth/spreadsheets.readonly"],
            )
            print("Using Base64-encoded credentials from environment variable")
        except Exception as e:
            print(f"Error decoding Base64 credentials: {e}")
            sys.exit(1)

    print(f"Downloading sheet {sheet_id}...")

    service = build("sheets", "v4", credentials=credentials)
    sheets = service.spreadsheets()

    # Get the spreadsheet
    try:
        sheet = sheets.values().get(spreadsheetId=sheet_id, range="Sheet1").execute()
        # Get the values
        values = sheet.get("values", [])
        if not values:
            print("Error: No data found in the Google Sheet")
            sys.exit(1)

        # Convert to DataFrame
        df = pd.DataFrame(values[1:], columns=values[0])

        # Save to CSV
        df.to_csv(output_path, index=False)
        print(f"Sheet downloaded and saved to {output_path}")
    except Exception as e:
        print(f"Error downloading sheet: {e}")
        sys.exit(1)




[docs]
def parse_csv(input_csv):
    """Parses the input CSV and constructs the KEY_MAP dictionary.

    Args:
        input_csv (str): Path to the input CSV file.

    Returns:
        OrderedDict: The constructed KEY_MAP with prioritized ordering.
    """
    key_map = OrderedDict()

    # Lists to hold categorized rows
    both_libs = []
    only_ayo = []
    only_exo = []

    with open(input_csv, newline="", encoding="utf-8") as csvfile:
        reader = csv.DictReader(csvfile)
        for row_num, row in enumerate(
            reader, start=2
        ):  # Start at 2 to account for header
            yield_name = row.get("yieldplotlib name", "").strip()
            exo_name = row.get("EXOSIMS name", "").strip()
            exo_file = row.get("EXOSIMS file", "").strip()
            exo_class = row.get("EXOSIMS Class", "").strip()
            ayo_name = row.get("AYO name", "").strip()
            ayo_file = row.get("AYO file", "").strip()
            ayo_class = row.get("AYO Class", "").strip()
            exo_unit = row.get("EXOSIMS unit", "").strip()
            ayo_unit = row.get("AYO unit", "").strip()
            comment = row.get("Comment", "").strip()

            # New columns for transformation
            # e.g., "index"
            exo_transform_type = row.get("EXOSIMS transform type", "").strip().lower()
            # e.g., "2"
            exo_transform_value = row.get("EXOSIMS transform value", "").strip()
            # e.g., "sum"
            ayo_transform_type = row.get("AYO transform type", "").strip().lower()
            # e.g., "1,2"
            ayo_transform_value = row.get("AYO transform value", "").strip()

            # Determine the yieldplotlib key
            if yield_name:
                key = yield_name
            else:
                if exo_name and not ayo_name:
                    key = exo_name
                elif ayo_name and not exo_name:
                    key = ayo_name
                elif exo_name and ayo_name:
                    # Prefer EXOSIMS name as the key
                    key = exo_name
                else:
                    print(
                        f"Warning: Row {row_num} has no 'yieldplotlib name'"
                        " and no clear library names. Skipping."
                    )
                    continue  # Skip rows that don't meet criteria

            # Categorize the row based on available libraries
            has_exo = bool(exo_name and exo_file and exo_class)
            has_ayo = bool(ayo_name and ayo_file and ayo_class)

            row_entry = {
                "key": key,
                "EXOSIMS": {
                    "class": exo_class,
                    "file": exo_file,
                    "name": exo_name,
                    "unit": exo_unit,
                    "transform": {
                        "type": exo_transform_type if exo_transform_type else "none",
                        "value": exo_transform_value if exo_transform_value else None,
                    },
                }
                if has_exo
                else None,
                "AYO": {
                    "class": ayo_class,
                    "file": ayo_file,
                    "name": ayo_name,
                    "unit": ayo_unit,
                    "transform": {
                        "type": ayo_transform_type if ayo_transform_type else "none",
                        "value": ayo_transform_value if ayo_transform_value else None,
                    },
                }
                if has_ayo
                else None,
                "comment": comment if comment else "",
            }

            if has_exo and has_ayo:
                both_libs.append(row_entry)
            elif has_ayo:
                only_ayo.append(row_entry)
            elif has_exo:
                only_exo.append(row_entry)
            else:
                # If neither library has complete info, skip the row
                print(
                    f"Warning: Row {row_num} does not have complete "
                    "information for either library. Skipping."
                )
                continue

    # Function to add entries to key_map with duplicate checking
    def add_to_key_map(entry, context):
        key = entry["key"]
        exo_entry = entry["EXOSIMS"]
        ayo_entry = entry["AYO"]
        comment = entry["comment"]

        # Initialize the entry
        map_entry = {}

        # Add EXOSIMS entry if present
        if exo_entry:
            exo_class = exo_entry["class"]
            map_entry[exo_class] = {
                "file": exo_entry["file"],
                "name": exo_entry["name"],
                "unit": exo_entry["unit"],
                "transform": {
                    "type": exo_entry["transform"]["type"],
                    "value": exo_entry["transform"]["value"],
                },
            }

        # Add AYO entry if present
        if ayo_entry:
            ayo_class = ayo_entry["class"]
            map_entry[ayo_class] = {
                "file": ayo_entry["file"],
                "name": ayo_entry["name"],
                "unit": ayo_entry["unit"],
                "transform": {
                    "type": ayo_entry["transform"]["type"],
                    "value": ayo_entry["transform"]["value"],
                },
            }

        # Add comment
        map_entry["comment"] = comment

        if key in key_map:
            print(
                f"Warning: Duplicate key '{key}' found in {context}."
                " Overwriting previous entry."
            )

        key_map[key] = map_entry

    # Process entries in prioritized order
    # 1. Both EXOSIMS and AYO
    for entry in both_libs:
        add_to_key_map(entry, "Both Libraries")

    # 2. Only AYO
    for entry in only_ayo:
        add_to_key_map(entry, "AYO Only")

    # 3. Only EXOSIMS
    for entry in only_exo:
        add_to_key_map(entry, "EXOSIMS Only")

    return key_map




[docs]
def write_key_map(key_map, output_py):
    """Writes the KEY_MAP dictionary to a Python file with a docstring.

    Args:
        key_map (OrderedDict):
            The KEY_MAP dictionary.
        output_py (str):
            Path to the output Python file.
    """
    docstring = '"""Key mapping for yieldplotlib library."""\n\n'
    with open(output_py, "w", encoding="utf-8") as f:
        f.write(docstring)
        f.write("KEY_MAP = {\n")
        for key, value in key_map.items():
            f.write(f'    "{key}": {{\n')
            # Add library-specific entries
            for lib_class in [
                "EXOSIMSCSVFile",
                "AYOCSVFile",
                "EXOSIMSInputFile",
                "AYOInputFile",
            ]:
                if lib_class in value:
                    lib_entry = value[lib_class]
                    f.write(f'        "{lib_class}": {{\n')
                    f.write(f'            "file": "{lib_entry["file"]}",\n')
                    f.write(f'            "name": "{lib_entry["name"]}",\n')
                    f.write(f'            "unit": "{lib_entry["unit"]}",\n')
                    # elif lib_class.startswith("AYO"):
                    #     f.write(f'            "unit": "{lib_entry["unit"]}",\n')
                    # Handle transformation details
                    transform_type = lib_entry["transform"]["type"]
                    transform_value = lib_entry["transform"]["value"]
                    # For `None` values, represent them as `None` without quotes
                    if transform_value is None:
                        transform_value_repr = "None"
                    elif (
                        isinstance(transform_value, str)
                        and transform_value.lower() == "none"
                    ):
                        transform_value_repr = "None"
                    else:
                        # Determine if transform_value should be int, float, or string
                        try:
                            # Try to convert to integer
                            transform_value_int = int(transform_value)
                            transform_value_repr = str(transform_value_int)
                        except ValueError:
                            try:
                                # Try to convert to float
                                transform_value_float = float(transform_value)
                                transform_value_repr = str(transform_value_float)
                            except ValueError:
                                # Keep as string, ensure it's properly quoted
                                transform_value_repr = f'"{transform_value}"'

                    f.write('            "transform": {\n')
                    f.write(f'                "type": "{transform_type}",\n')
                    f.write(f'                "value": {transform_value_repr}\n')
                    f.write("            }\n")
                    f.write("        },\n")
            # Add comment
            comment = value.get("comment", "").replace('"', '\\"')
            f.write(f'        "comment": "{comment}"\n')
            f.write("    },\n")
        f.write("}\n")




[docs]
def main():
    """Process command line arguments and run the appropriate functions.

    This function parses command line arguments, downloads from Google Sheets if
    requested, processes the CSV file, and generates the key_map.py output file.

    Command-line arguments:
    ----------------------
    --csv: Path to a local CSV file to process
    --sheets: Google Sheet ID to download and process
    --credentials: Path to Google service account credentials JSON file (optional)
    --temp: Use a temporary file for the downloaded CSV (deleted after processing)

    Environment variables:
    --------------------
    GOOGLE_CREDENTIALS_B64: Base64-encoded Google service account JSON (required if
                            using --sheets without --credentials)
    """
    parser = argparse.ArgumentParser(description="Generate key_map.py from data")
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument("--csv", help="Path to CSV file")
    group.add_argument("--sheets", help="Google Sheet ID to download from")

    # Add optional arguments
    parser.add_argument(
        "--credentials", help="Path to Google service account credentials JSON file"
    )
    parser.add_argument(
        "--temp", action="store_true", help=("Download to a temporary file")
    )

    args = parser.parse_args()

    csv_path = None
    temp_file = None

    try:
        if args.sheets:
            # Download from Google Sheets to a temporary or specific file
            if args.temp:
                # Create a temporary file that will be automatically cleaned up
                temp_file = tempfile.NamedTemporaryFile(suffix=".csv", delete=False)
                csv_path = temp_file.name
                temp_file.close()  # Close it so we can write to it
            else:
                # Use a fixed name in the current directory
                csv_path = "_sheet_download.csv"

            download_from_google_sheets(args.sheets, csv_path, args.credentials)
        elif args.csv:
            # Use the provided CSV file
            csv_path = args.csv

        # Parse the CSV and write the key_map.py file
        if csv_path:
            key_map = parse_csv(csv_path)
            write_key_map(key_map, "key_map.py")
            print(f"Successfully generated key_map.py from {csv_path}")

    finally:
        # Clean up the temporary file if we created one
        if temp_file and os.path.exists(csv_path):
            os.unlink(csv_path)
            print(f"Temporary file {csv_path} removed")



if __name__ == "__main__":
    main()