#include <stdio.h>
#include <stdlib.h>
#include <getopt.h>
#include <math.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>
#include "cdflib.h"

/* cdflib.c: library of routines for calculating cumulative
             distribution functions 

   See cdf.c for examples of use.

   See end of this file for copyright information.

*/

Matrix *make_matrix (int rows, int cols) {
  int i, j;
  Matrix *matrix = (Matrix *) malloc (sizeof (Matrix));

  matrix->rows = rows;
  matrix->cols = cols;
  matrix->m = (double **) malloc (rows * sizeof (double *));

  for (i=0; i<rows; i++) {
    matrix->m[i] = (double *) malloc (cols * sizeof (double));
  }

  for (i=0; i<rows; i++) {
    for (j=0; j<cols; j++) {
      matrix->m[i][j] = 0.0;
    }
  }
  return matrix;
}

void print_matrix (Matrix *matrix) {
  int i, j;

  for (i=0; i<matrix->rows; i++) {
    for (j=0; j<matrix->cols; j++) {
      printf ("%g\t", matrix->m[i][j]);
    }
    printf ("\n");
  }
}

// VECTOR

Vector *make_vector (int size)
{
  Vector *vector = (Vector *) malloc (sizeof (Vector));
  vector->x = (float *) malloc (size * sizeof (float));
  vector->n = 0;
  vector->size = size;
  return vector;
}

void clear_vector (Vector *vector) {
  vector->n = 0;
}

void free_vector (Vector *vector)
{
  free (vector->x);
  free (vector);
}

void resize_vector (Vector *vector, int size)
{
  vector->x = (float *) realloc (vector->x, size * sizeof (float));
  assert (vector->x != NULL);
  vector->size = size;
}

void sort_vector (Vector *vector) {
  sort_array (vector->x, vector->n);
}

// rand_uniform: returns 0.0 through 1.0, including 0.0 but not 1.0

static double rand_uniform ()
{
  return (double) random() / ((double) RAND_MAX + 1);
}

double random_from_vector (Vector *v) {
  int i;
  assert (v->n > 0);
  i = (int) v->n * rand_uniform ();
  return v->x[i];
}

Vector *copy_vector (Vector *v) {
  int i;
  Vector *c = make_vector (v->n);
  c->n = v->n;
  for (i=0; i<c->n; i++) {
    c->x[i] = v->x[i];
  }
  return c;
}

Vector *difference_vector (Vector *v) {
  int i;
  Vector *d = make_vector (v->n-1);
  d->n = v->n-1;
  for (i=0; i<d->n; i++) {
    d->x[i] = v->x[i+1] - v->x[i];
  }
  return d;
}

void add_to_vector (Vector *vector, double x)
{
  if (vector->n >= vector->size) {
    resize_vector (vector, vector->size * 2);
  }
  vector->x[vector->n] = x;
  vector->n++;
}

Vector *subtract_vector (Vector *a, Vector *b)
{
  int i;
  Vector *c;

  if (a->n != b->n) {
    return NULL;
  }
  c = make_vector (a->n);

  for (i=0; i<a->n; i++) {
    c->x[i] = a->x[i] - b->x[i];
  }
  c->n = a->n;
  return c;
}

void print_vector_fp (Vector *vector, FILE *fp)
{
  int i;
  for (i=0; i<vector->n; i++) {
    fprintf (fp, "%g\n", vector->x[i]);
  }
}

void print_vector (Vector *vector)
{
  print_vector_fp (vector, stdout);
}

void print_vectori_fp (Vector *vector, FILE *fp)
{
  int i;
  for (i=0; i<vector->n; i++) {
    fprintf (fp, "%d\t%g\n", i, vector->x[i]);
  }
}

void print_vectori (Vector *vector)
{
  print_vectori_fp (vector, stdout);
}

double calc_avg (float x[], int n)
{
  int i;
  double sx = 0.0;
 
  for (i=0; i < n; i++) {
    sx += x[i];
  }
  return sx / n;
}
 
double calc_var (float x[], int n, double mean)
{
  int i;
  double sx = 0.0;
 
  for (i=0; i < n; i++) {
    sx += (x[i] - mean) * (x[i] - mean);
  }
 
  return sx / n;
}

// CDF

Cdf *make_cdf (int size)
{
  Cdf *cdf = (Cdf *) malloc (sizeof (Cdf));
  cdf->x = (float *) malloc (size * sizeof (float));
  cdf->y = (float *) malloc (size * sizeof (float));
  return cdf;
}

void free_cdf (Cdf *cdf)
{
  free (cdf->x);
  free (cdf->y);
  free (cdf);
}

void resize_cdf (Cdf *cdf, int size)
{
  cdf->y = (float *) realloc (cdf->y, size * sizeof (float));
  cdf->x = (float *) realloc (cdf->x, size * sizeof (float));
}

void add_to_cdf (Cdf *cdf, double x, double y)
{
  if (cdf->n >= cdf->size) {
    resize_cdf (cdf, cdf->size * 2);
  }
  cdf->x[cdf->n] = x;
  cdf->y[cdf->n] = y;
  cdf->n++;
}

void print_cdf_fp (Cdf *cdf, FILE *fp)
{
  int i;

  fprintf (fp, "%f\t%f\n", cdf->x[0], 0.0);
  fprintf (fp, "%f\t%f\n", cdf->x[0], cdf->y[0]);

  for (i=1; i<cdf->n; i++) {
    fprintf (fp, "%f\t%f\n", cdf->x[i], cdf->y[i-1]);
    fprintf (fp, "%f\t%f\n", cdf->x[i], cdf->y[i]);
  }
}

void print_cdf (Cdf *cdf)
{
  print_cdf_fp (cdf, stdout);
}

Vector *read_file (char *file_name)
{
  int n;
  int size = 1024;
  Vector *vector = make_vector (size);
  FILE *fp;
  char line[256];
  double x;

  if (file_name == NULL) {
    fp = stdin;
  } else {
    fp = fopen (file_name, "r");
  }
  if (fp == 0) {
    fprintf (stderr, "Unable to open file %s.\n", file_name);
    exit (1);
  }
  while (fgets (line, 256, fp) != NULL) {

    // make sure we got a number
    n = sscanf (line, "%lf", &x);
    if (n != 1) {
      fprintf (stderr, "Nonnumeric value, %s", line);
      exit (-1);
    }
    add_to_vector (vector, x);
  }
  return vector;
}

/* compar: takes two pointers to integers and tells which is
   bigger.  Used by sort_array. */

int compar (const void *px, const void *py)
{
  float x = *(float *) px;
  float y = *(float *) py;
  if (x < y) return -1;
  if (x > y) return 1;
  return 0;
}

/* sort_array: this is a veneer on the library implementation
   of quicksort, used to generate order statistics */

void sort_array (float x[], int n)
{
  void *base = (void *) x;
  size_t nmemb = n;
  size_t size = sizeof (float);
  
  qsort (base, nmemb, size, compar);
}

Cdf *calc_cdf (Vector *vector)
{
  Cdf *cdf = make_cdf (vector->n);
  int i;

  cdf->n = vector->n;
  for (i=0; i<cdf->n; i++) {
    cdf->x[i] = vector->x[i];
    cdf->y[i] = (float) (i+1) / (cdf->n);
  }
  sort_array (cdf->x, cdf->n);

  return cdf;
}

// snap_to_grid: divide the interval (low, high) into n
// equal spaced values, and round x to the nearest value

double snap_to_grid (double x, double low, double high, int n)
{
  x = (x - low) / (high - low) * n;
  x = rint(x) / n * (high - low) + low;
  return x;
}

void round_cdf (Cdf *cdf, int n)
{
  int i;
  double lowx = cdf->x[0];
  double lowy = cdf->y[0];
  double highx = cdf->x[cdf->n-1];
  double highy = cdf->y[cdf->n-1];

  if (lowx == highx || lowy == highy) return;

  for (i=0; i<cdf->n; i++) {
    cdf->x[i] = snap_to_grid (cdf->x[i], lowx, highx, n);
    cdf->y[i] = snap_to_grid (cdf->y[i], lowy, highy, n);
  }
}

// uniq_cdf: traverse the cdf and keep only the last appearance
// of each (x, y) pair.

void uniq_cdf (Cdf *cdf)
{
  int i, j, k;

  if (cdf->n == 1) return;

  // i traverses the array;
  // j is the location where the next entry will go
  // k is the location of the item we are currently working on

  j = 1;
  k = 1;

  for (i=2; i<cdf->n; i++) {

    // does the ith entry differ from the jth on both axes?
    if (cdf->x[i] != cdf->x[j] && cdf->y[i] != cdf->y[j]) {
      cdf->x[k] = cdf->x[i-1];
      cdf->y[k] = cdf->y[i-1];
      k++;
      j = i;
    }
  }
  // grab the last entry
  cdf->x[k] = cdf->x[i-1];
  cdf->y[k] = cdf->y[i-1];
  cdf->n = k+1;
}

double log2 (double x)
{
  static double denom = 0.0;
  if (denom == 0.0) denom = log(2.0);
  return log(x) / denom;
}

// transform_cdf_logx: computes the log2 of the x values, removing
// all non-positives

void transform_cdf_logx (Cdf *cdf)
{
  int i, j;

  j = 0;
  for (i=0; i<cdf->n; i++) {
    if (cdf->x[i] > 0.0) {
      cdf->x[j] = log2(cdf->x[i]);
      cdf->y[j] = cdf->y[i];
      j++;
    }
  }
  cdf->n = j;
}

// transform_cdf_logy: computes the log2 of the complementary y values.
// Values at the tail might be -inf, but it's a good idea to keep them
// to make sure we get all the x values

void transform_cdf_logy (Cdf *cdf)
{
  int i, j;

  j = 0;
  for (i=0; i<cdf->n; i++) {
    if (cdf->y[i] < 1.0) {
      cdf->y[j] = log2(1.0 - cdf->y[i]);
    } else {
      cdf->y[j] = cdf->y[j-1];
    }
    cdf->x[j] = cdf->x[i];
    j++;
  }
  cdf->n = j;
}

extern double invnor (double p);

void transform_cdf_norm (Cdf *cdf)
{
  int i, j=0;

  for (i=0; i<cdf->n; i++) {
    if (cdf->y[i] < 1.0) {
      cdf->y[j] = invnor (cdf->y[i]);
      j++;
    }
  }
  cdf->n = j;
}

void multiply_cdf (Cdf *cdf, double x, double y)
{
  int i;

  for (i=0; i<cdf->n; i++) {
    cdf->x[i] *= x;
    cdf->y[i] *= y;
  }
}

// percentile_to_value: given a percentile (y), finds the
// corresponding value in the cdf

double percentile_to_value (double y, Cdf *cdf)
{
  int low = 0;
  int high = cdf->n - 1;
  int mid;

  assert (y >= 0.0 && y <= 1.0);

  // special case to deal with implicit first entry
  if (y <= cdf->y[0]) {
    return cdf->x[0]; 
  }

  // find the entry by bisection
  while (1) {
    //fprintf (stderr, "%d\t%d\t", low, high); 
    //fprintf (stderr, "%f\t%f\n", cdf->y[low], cdf->y[high]); 

    assert (high >= low);

    mid = (low + high) / 2;
    assert (mid < cdf->n);

    if (y > cdf->y[mid] && y <= cdf->y[mid+1]) {
      return cdf->x[mid+1];
    }

    if (y <= cdf->y[mid]) {
      high = mid;
    } else {
      low = mid+1;
    }
  }
}

// value_to_percentile: looks up the given value in the
// cdf and returns the corresponding percentile.
// returns 0
// and puts the range of percentiles in the interval

int value_to_percentile (double x, Cdf *cdf, double *interval)
{
  int low = 0;
  int high = cdf->n - 1;
  int mid;

  if (x < cdf->x[0]) {
    interval[0] = 0.0;
    interval[1] = cdf->y[0];
    return 0;
  }
  if (x > cdf->x[cdf->n-1]) {
    interval[0] = cdf->y[cdf->n - 2];
    interval[1] = 1.0;
    return 0;
  }

  // special case to handle implicit first entry
  if (x == cdf->x[0]) {
    interval[0] = 0.0;
    interval[1] = cdf->y[0];
    return 0;
  }

  // find the entry by bisection
  while (1) {
    if (high < low) return -1;

    mid = (low + high) / 2;

    if (x == cdf->x[mid]) {
      interval[0] = cdf->y[mid-1];
      interval[1] = cdf->y[mid];
      return 0;
    }

    if (x > cdf->x[mid] && x < cdf->x[mid+1]) {
      interval[0] = cdf->y[mid];
      interval[1] = cdf->y[mid];
      return 0;
    }

    if (x < cdf->x[mid]) {
      high = mid-1;
    } else {
      low = mid+1;
    }
  }
}

// special_random: returns a random number between 0.0 and 1.0,
// including 1.0 but not including 0.0 (that's the special part)

double special_random ()
{
  double x = (double) random() / 0x80000000;
  return 1-x;
}

double value_to_rand_percentile (double x, Cdf *cdf)
{
  double interval[2];
  int n = value_to_percentile (x, cdf, interval);

  assert (n == 0);
  assert (interval[0] >= 0.0 && interval[1] <= 1.0);

  return (interval[1] - interval[0]) * special_random() + interval[0];
}

Cdf *ppplot (Cdf *cdf, Cdf *dist)
{
  Cdf *pp = make_cdf (cdf->n);
  int i;

  pp->n = cdf->n;
  for (i=0; i<pp->n; i++) {
    pp->x[i] = cdf->y[i]; 
    pp->y[i] = value_to_rand_percentile (cdf->x[i], dist);
  }
  return pp;
}

Cdf *qqplot (Cdf *cdf, Cdf *dist)
{
  Cdf *qq = make_cdf (cdf->n);
  int i;

  qq->n = cdf->n;
  for (i=0; i<qq->n; i++) {
    qq->x[i] = cdf->x[i]; 
    qq->y[i] = percentile_to_value (cdf->y[i], dist);
  }
  return qq;
}


/* cdflib.c: library of routines for calculating cumulative
             distribution functions 

Copyright (C) 2003 Allen B. Downey

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

*/