import React from 'react';
import styled, { css } from 'styled-components';

const ProjectDetailContainer = styled.div`
  padding: 3rem 1.5rem;
  background-color: #f2f7ff; /* Light background for readability */
  min-height: 100vh;
  max-width: 1100px;
  margin: 0 auto;
  box-shadow: 0 4px 20px rgba(0, 0, 0, 0.1); /* Slightly stronger shadow */
  border-radius: 12px;

  @media (max-width: 768px) {
    padding: 2rem 1rem;
    max-width: 100%;
    box-shadow: none;
    border-radius: 0;
  }
`;

const Header = styled.div`
  text-align: left;
  margin-bottom: 2.5rem;

  @media (max-width: 768px) {
    text-align: center;
    margin-bottom: 1.5rem;
  }
`;

const Title = styled.h1`
  font-size: 2.8rem;
  color: #1c1c1c; /* Darker shade for titles */
  margin-bottom: 0.5rem;

  @media (max-width: 768px) {
    font-size: 2rem;
    text-align: center;
  }
`;

const Date = styled.p`
  font-size: 1rem;
  color: #888; /* Neutral grey for dates */

  @media (max-width: 768px) {
    font-size: 0.9rem;
  }
`;

const TagsContainer = styled.div`
  margin-top: 1.5rem;
  display: flex;
  justify-content: center;
  gap: 0.5rem;
  flex-wrap: wrap;
`;

const Tag = styled.span`
  background-color: #0073e6;
  color: white;
  padding: 0.3rem 0.8rem;
  border-radius: 16px;
  font-size: 0.85rem;

  @media (max-width: 768px) {
    font-size: 0.75rem;
  }
`;

const Image = styled.img`
  display: block;
  max-width: 70%;
  height: auto;
  margin: 0 auto 2rem auto;
  border-radius: 10px;
  box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);

  @media (max-width: 768px) {
    max-width: 90%;
  }
`;

const Section = styled.div`
  margin-bottom: 3rem;
`;

const SectionTitle = styled.h2`
  font-size: 2rem;
  color: #333;
  margin-bottom: 1.2rem;
  border-bottom: 3px solid #0073e6;
  padding-bottom: 0.3rem;

  @media (max-width: 768px) {
    font-size: 1.5rem;
    text-align: center;
  }
`;

const Text = styled.p`
  font-size: 1.1rem;
  line-height: 1.8;
  color: #444;
  margin-bottom: 1.5rem;

  @media (max-width: 768px) {
    font-size: 1rem;
    text-align: justify;
  }
`;

const List = styled.ul`
  padding-left: 1.5rem;
  list-style-type: disc;
  color: #444;
`;

const ListItem = styled.li`
  margin-bottom: 0.8rem;
  line-height: 1.6;
`;

const Button = styled.button`
  display: flex;
  margin: 0 auto;
  padding: 0.9rem 2.2rem;
  font-size: 1rem;
  color: #fff;
  background-color: #0073e6;
  border: none;
  border-radius: 6px;
  cursor: pointer;
  transition: background-color 0.3s;
  text-align: center;

  &:hover {
    background-color: #005bb5;
  }

  @media (max-width: 768px) {
    width: 100%;
    max-width: 300px;
  }
`;

const ResponsiveIframeContainer = styled.div`
  position: relative;
  overflow: hidden;
  padding-top: 56.25%; /* 16:9 Aspect Ratio */
  margin: 2rem 0;
`;

const ResponsiveIframe = styled.iframe`
  position: absolute;
  top: 0;
  left: 0;
  width: 100%;
  height: 100%;
  border: none;
`;

const BbdaRegression = () => {
  const tags = ['Python', 'Machine Learning', 'Regression', 'Data Analysis', 'SHAP', 'LIME'];
  const datePublished = '2024-07-13'; // Adjust the date accordingly

  return (
    <ProjectDetailContainer>
      <Header>
        <Title>Household Income Prediction Using Regression Techniques</Title>
        <Date>Published on: {datePublished}</Date>
      </Header>

      <Image src="/images/bbda_reg.png" alt="Household Income Prediction" />

      {/* Tags */}
      <TagsContainer>
        {tags.map((tag, index) => (
          <Tag key={index}>{tag}</Tag>
        ))}
      </TagsContainer>

      <br />
      <br />
      <br />

      {/* Overview */}
      <Section>
        <SectionTitle>Overview</SectionTitle>
        <Text>
          This project was conducted as part of the Data Science Bootcamp from Big Blue Data Academy.
          It was the third practice project of the course, and was completed in around 5 hours on the
          13th of July 2024. It involves analyzing and predicting household income using various regression
          techniques. The aim was to explore the relationships between demographic and socioeconomic factors
          and to evaluate the performance of different machine learning models in predicting annual household income.
        </Text>
      </Section>

      {/* Objectives */}
      <Section>
        <SectionTitle>Objectives</SectionTitle>
        <Text>The primary objectives of this project were:</Text>
        <List>
          <ListItem>Perform exploratory data analysis (EDA) to understand the dataset.</ListItem>
          <ListItem>Preprocess the data appropriately for modeling.</ListItem>
          <ListItem>Build and evaluate regression models for predicting household income.</ListItem>
          <ListItem>Interpret the models using SHAP and LIME for insights into feature importance.</ListItem>
        </List>
      </Section>

      {/* Tools Used */}
      <Section>
        <SectionTitle>Tools Used</SectionTitle>
        <Text>The following tools and libraries were utilized in this project:</Text>
        <List>
          <ListItem><strong>Python</strong></ListItem>
          <ListItem><strong>Pandas</strong> and <strong>NumPy</strong> for data manipulation</ListItem>
          <ListItem><strong>Matplotlib</strong> and <strong>Seaborn</strong> for data visualization</ListItem>
          <ListItem><strong>scikit-learn</strong> for machine learning models and preprocessing</ListItem>
          <ListItem><strong>XGBoost</strong> for gradient boosting regression</ListItem>
          <ListItem><strong>SHAP</strong> and <strong>LIME</strong> for model interpretability</ListItem>
        </List>
      </Section>

      {/* Methodology */}
      <Section>
        <SectionTitle>Methodology</SectionTitle>
        <Text>
          The project followed a structured machine learning workflow:
        </Text>
        <List>
          <ListItem><strong>Exploratory Data Analysis:</strong> Assessed data distributions, identified skewness, and applied log transformations to the target variable.</ListItem>
          <ListItem><strong>Data Preprocessing:</strong> Encoded categorical variables, handled ordinal features, and constructed pipelines for data transformation.</ListItem>
          <ListItem><strong>Model Training:</strong> Evaluated multiple regression models including Linear Regression, Lasso, Ridge, Decision Trees, Random Forest, and XGBoost with hyperparameter tuning using GridSearchCV.</ListItem>
          <ListItem><strong>Model Evaluation:</strong> Used cross-validation with appropriate metrics (MSE, MAE, R²) to assess model performance.</ListItem>
          <ListItem><strong>Interpretability:</strong> Applied SHAP and LIME to interpret the models and understand feature importance and effects.</ListItem>
        </List>
      </Section>

      {/* Results */}
      <Section>
        <SectionTitle>Results</SectionTitle>
        <Text>
          The models yielded low predictive performance, with R² scores ranging from approximately 0.02 to 0.09.
          The XGBoost model performed slightly better than others but still explained less than 10% of the variance
          in the target variable.
        </Text>
        <Text>
          Key observations from the analysis include:
        </Text>
        <List>
          <ListItem>
            <strong>Low Correlations:</strong> Most features had low correlations with the target variable,
            making it challenging for models to learn predictive patterns.
          </ListItem>
          <ListItem>
            <strong>Counterintuitive Findings:</strong> SHAP and Partial Dependence Plots revealed relationships
            that contradicted real-world expectations, suggesting potential issues with the synthetic dataset.
          </ListItem>
        </List>
      </Section>

      {/* Model Interpretation */}
      <Section>
        <SectionTitle>Model Interpretation</SectionTitle>
        <Text>
          The SHAP analysis highlighted features like <em>Homeownership_Status_Rent</em> and <em>Employment_Status_Part-time</em>
          as significant, but their relationships with income were counterintuitive. LIME provided local explanations for individual
          predictions, but due to the overall low model performance, these interpretations were limited.
        </Text>
      </Section>

      {/* Exploratory Data Analysis Notebook */}
      <Section>
        <SectionTitle>Exploratory Data Analysis Notebook</SectionTitle>
        <ResponsiveIframeContainer>
          <ResponsiveIframe
            src="/embeds/income_analysis.html"
            title="Income Analysis Notebook"
          ></ResponsiveIframe>
        </ResponsiveIframeContainer>
      </Section>

      {/* Modeling Notebook */}
      <Section>
        <SectionTitle>Modeling Notebook</SectionTitle>
        <ResponsiveIframeContainer>
          <ResponsiveIframe
            src="/embeds/income_modeling.html"
            title="Income Modeling Notebook"
          ></ResponsiveIframe>
        </ResponsiveIframeContainer>
      </Section>

      {/* Presentation */}
      <Section>
        <SectionTitle>Presentation</SectionTitle>
        <ResponsiveIframeContainer>
          <ResponsiveIframe
            src="https://docs.google.com/presentation/d/e/2PACX-1vR05Irfbnb-iK1vI8c47GaTK4WjbbxUszkuNaAdZVNF5nN0MgRvGhCESL-Vp2JzZw/embed?start=false&loop=false&delayms=3000"
            title="Project Presentation"
            allowfullscreen="true"
            mozallowfullscreen="true"
            webkitallowfullscreen="true"
          ></ResponsiveIframe>
        </ResponsiveIframeContainer>
      </Section>

      {/* Conclusion */}
      <Section>
        <SectionTitle>Conclusion</SectionTitle>
        <Text>
          The project demonstrated that even with a robust machine learning workflow, the quality and nature of the dataset
          are crucial for model success. The synthetic dataset's lack of meaningful relationships limited the predictive power
          of the models. This emphasizes the importance of data quality and relevance in predictive modeling.
        </Text>
      </Section>

      <Button onClick={() => window.history.back()}>Back to Projects</Button>
    </ProjectDetailContainer>
  );
};

export const BbdaRegressionMetadata = {
  title: 'Household Income Prediction Using Regression Techniques',
  description:
    'Analyzed and predicted household income using various regression techniques on a synthetic dataset.',
  tags: ['Python', 'Machine Learning', 'Regression', 'Data Analysis', 'SHAP', 'LIME'],
  languages: ['Python'],
  datePublished: '2024-07-13',
  path: '/projects/household-income-prediction',
  coverImage: '/images/bbda_reg.png',
};

export default BbdaRegression;