ó
cÂY]c           @  s  d  Z  d d l m Z d d l m Z d d l Z d d l m Z m Z d d l m	 Z	 m
 Z
 d d l Z d d l m Z m Z d d l Z d d	 l m Z m Z m Z d d
 l m Z m Z d d l m Z d d l m Z d d l m Z d d l m Z d d l m  Z  m! Z! d d l m" Z# d d l$ m% Z% d d d d d d d d d d d d d d d  d! g Z& d" Z' d# Z( d e e ƒ f d$ „  ƒ  YZ) d e) f d% „  ƒ  YZ* d e) f d& „  ƒ  YZ+ d' e e e) ƒ f d( „  ƒ  YZ, d e, f d) „  ƒ  YZ- d e, f d* „  ƒ  YZ. d e, f d+ „  ƒ  YZ/ d, e, f d- „  ƒ  YZ0 d e) f d. „  ƒ  YZ1 d e) f d/ „  ƒ  YZ2 d0 e e ƒ f d1 „  ƒ  YZ3 d e3 f d2 „  ƒ  YZ4 d e3 f d3 „  ƒ  YZ5 d4 e e ƒ f d5 „  ƒ  YZ6 d e6 f d6 „  ƒ  YZ7 d e7 f d7 „  ƒ  YZ8 d8 „  Z9 d e6 f d9 „  ƒ  YZ: d: „  Z; d; „  Z< d e) f d< „  ƒ  YZ= d= e) f d> „  ƒ  YZ> d? d e@ d@ „ ZA dA „  ZB e@ eB _C dB „  ZD d S(C   s   
The :mod:`sklearn.model_selection._split` module includes classes and
functions to split the data based on a preset strategy.
iÿÿÿÿ(   t   print_function(   t   divisionN(   t   chaint   combinations(   t   ceilt   floor(   t   ABCMetat   abstractmethodi   (   t	   indexablet   check_random_statet   safe_indexing(   t   _num_samplest   column_or_1d(   t   check_array(   t   type_of_target(   t   with_metaclass(   t   zip(   t	   signaturet   comb(   t	   _Iterable(   t   _pprintt   BaseCrossValidatort   KFoldt
   GroupKFoldt   LeaveOneGroupOutt   LeaveOneOutt   LeavePGroupsOutt	   LeavePOutt   RepeatedStratifiedKFoldt   RepeatedKFoldt   ShuffleSplitt   GroupShuffleSplitt   StratifiedKFoldt   StratifiedShuffleSplitt   PredefinedSplitt   train_test_splitt   check_cvs   You should specify a value for 'n_splits' instead of relying on the default value. The default value will change from 3 to 5 in version 0.22.s‡   You should specify a value for 'cv' instead of relying on the default value. The default value will change from 3 to 5 in version 0.22.c           B  sk   e  Z d  Z d „  Z d d d „ Z d d d d „ Z d d d d „ Z e d d d d „ ƒ Z	 d „  Z
 RS(   su   Base class for all cross-validators

    Implementations must define `_iter_test_masks` or `_iter_test_indices`.
    c         C  s   d  S(   N(    (   t   self(    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyt   __init__E   s    c         c  s|   t  | | | ƒ \ } } } t j t | ƒ ƒ } xE |  j | | | ƒ D]. } | t j | ƒ } | | } | | f VqF Wd S(   sì  Generate indices to split data into training and test set.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data, where n_samples is the number of samples
            and n_features is the number of features.

        y : array-like, of length n_samples
            The target variable for supervised learning problems.

        groups : array-like, with shape (n_samples,), optional
            Group labels for the samples used while splitting the dataset into
            train/test set.

        Yields
        ------
        train : ndarray
            The training set indices for that split.

        test : ndarray
            The testing set indices for that split.
        N(   R   t   npt   arangeR   t   _iter_test_maskst   logical_not(   R%   t   Xt   yt   groupst   indicest
   test_indext   train_index(    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyt   splitJ   s    
c         c  sQ   xJ |  j  | | | ƒ D]3 } t j t | ƒ d t j ƒ} t | | <| Vq Wd S(   s   Generates boolean masks corresponding to test sets.

        By default, delegates to _iter_test_indices(X, y, groups)
        t   dtypeN(   t   _iter_test_indicesR'   t   zerosR   t   boolt   True(   R%   R+   R,   R-   R/   t	   test_mask(    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR)   k   s    
c         C  s
   t  ‚ d S(   s5   Generates integer indices corresponding to test sets.N(   t   NotImplementedError(   R%   R+   R,   R-   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR3   u   s    c         C  s   d S(   sA   Returns the number of splitting iterations in the cross-validatorN(    (   R%   R+   R,   R-   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyt   get_n_splitsy   t    c         C  s
   t  |  ƒ S(   N(   t   _build_repr(   R%   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyt   __repr__}   s    N(   t   __name__t
   __module__t   __doc__R&   t   NoneR1   R)   R3   R   R9   R<   (    (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR   ?   s   	!
c           B  s,   e  Z d  Z d d d „ Z d d d „ Z RS(   sð  Leave-One-Out cross-validator

    Provides train/test indices to split data in train/test sets. Each
    sample is used once as a test set (singleton) while the remaining
    samples form the training set.

    Note: ``LeaveOneOut()`` is equivalent to ``KFold(n_splits=n)`` and
    ``LeavePOut(p=1)`` where ``n`` is the number of samples.

    Due to the high number of test sets (which is the same as the
    number of samples) this cross-validation method can be very costly.
    For large datasets one should favor :class:`KFold`, :class:`ShuffleSplit`
    or :class:`StratifiedKFold`.

    Read more in the :ref:`User Guide <cross_validation>`.

    Examples
    --------
    >>> from sklearn.model_selection import LeaveOneOut
    >>> X = np.array([[1, 2], [3, 4]])
    >>> y = np.array([1, 2])
    >>> loo = LeaveOneOut()
    >>> loo.get_n_splits(X)
    2
    >>> print(loo)
    LeaveOneOut()
    >>> for train_index, test_index in loo.split(X):
    ...    print("TRAIN:", train_index, "TEST:", test_index)
    ...    X_train, X_test = X[train_index], X[test_index]
    ...    y_train, y_test = y[train_index], y[test_index]
    ...    print(X_train, X_test, y_train, y_test)
    TRAIN: [1] TEST: [0]
    [[3 4]] [[1 2]] [2] [1]
    TRAIN: [0] TEST: [1]
    [[1 2]] [[3 4]] [1] [2]

    See also
    --------
    LeaveOneGroupOut
        For splitting the data according to explicit, domain-specific
        stratification of the dataset.

    GroupKFold: K-fold iterator variant with non-overlapping groups.
    c         C  s   t  t | ƒ ƒ S(   N(   t   rangeR   (   R%   R+   R,   R-   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR3   ¯   s    c         C  s%   | d k r t d ƒ ‚ n  t | ƒ S(   sA  Returns the number of splitting iterations in the cross-validator

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data, where n_samples is the number of samples
            and n_features is the number of features.

        y : object
            Always ignored, exists for compatibility.

        groups : object
            Always ignored, exists for compatibility.

        Returns
        -------
        n_splits : int
            Returns the number of splitting iterations in the cross-validator.
        s%   The 'X' parameter should not be None.N(   R@   t
   ValueErrorR   (   R%   R+   R,   R-   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR9   ²   s    N(   R=   R>   R?   R@   R3   R9   (    (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR      s   ,c           B  s5   e  Z d  Z d „  Z d d d „ Z d d d „ Z RS(   s¢  Leave-P-Out cross-validator

    Provides train/test indices to split data in train/test sets. This results
    in testing on all distinct samples of size p, while the remaining n - p
    samples form the training set in each iteration.

    Note: ``LeavePOut(p)`` is NOT equivalent to
    ``KFold(n_splits=n_samples // p)`` which creates non-overlapping test sets.

    Due to the high number of iterations which grows combinatorically with the
    number of samples this cross-validation method can be very costly. For
    large datasets one should favor :class:`KFold`, :class:`StratifiedKFold`
    or :class:`ShuffleSplit`.

    Read more in the :ref:`User Guide <cross_validation>`.

    Parameters
    ----------
    p : int
        Size of the test sets.

    Examples
    --------
    >>> from sklearn.model_selection import LeavePOut
    >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
    >>> y = np.array([1, 2, 3, 4])
    >>> lpo = LeavePOut(2)
    >>> lpo.get_n_splits(X)
    6
    >>> print(lpo)
    LeavePOut(p=2)
    >>> for train_index, test_index in lpo.split(X):
    ...    print("TRAIN:", train_index, "TEST:", test_index)
    ...    X_train, X_test = X[train_index], X[test_index]
    ...    y_train, y_test = y[train_index], y[test_index]
    TRAIN: [2 3] TEST: [0 1]
    TRAIN: [1 3] TEST: [0 2]
    TRAIN: [1 2] TEST: [0 3]
    TRAIN: [0 3] TEST: [1 2]
    TRAIN: [0 2] TEST: [1 3]
    TRAIN: [0 1] TEST: [2 3]
    c         C  s   | |  _  d  S(   N(   t   p(   R%   RC   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR&   ÷   s    c         c  s;   x4 t  t t | ƒ ƒ |  j ƒ D] } t j | ƒ Vq Wd  S(   N(   R   RA   R   RC   R'   t   array(   R%   R+   R,   R-   t   combination(    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR3   ú   s    %c         C  s=   | d k r t d ƒ ‚ n  t t t | ƒ |  j d t ƒƒ S(   sº  Returns the number of splitting iterations in the cross-validator

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data, where n_samples is the number of samples
            and n_features is the number of features.

        y : object
            Always ignored, exists for compatibility.

        groups : object
            Always ignored, exists for compatibility.
        s%   The 'X' parameter should not be None.t   exactN(   R@   RB   t   intR   R   RC   R6   (   R%   R+   R,   R-   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR9   þ   s    N(   R=   R>   R?   R&   R@   R3   R9   (    (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR   Ë   s   *	t
   _BaseKFoldc           B  s>   e  Z d  Z e d „  ƒ Z d d d „ Z d d d d „ Z RS(   s5   Base class for KFold, GroupKFold, and StratifiedKFoldc         C  s§   t  | t j ƒ s1 t d | t | ƒ f ƒ ‚ n  t | ƒ } | d k ra t d j | ƒ ƒ ‚ n  t  | t ƒ sˆ t d j | ƒ ƒ ‚ n  | |  _	 | |  _
 | |  _ d  S(   NsG   The number of folds must be of Integral type. %s of type %s was passed.i   so   k-fold cross-validation requires at least one train/test split by setting n_splits=2 or more, got n_splits={0}.s&   shuffle must be True or False; got {0}(   t
   isinstancet   numberst   IntegralRB   t   typeRG   t   formatR5   t	   TypeErrort   n_splitst   shufflet   random_state(   R%   RO   RP   RQ   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR&     s    			c         c  s’   t  | | | ƒ \ } } } t | ƒ } |  j | k rT t d j |  j | ƒ ƒ ‚ n  x7 t t |  ƒ j | | | ƒ D] \ } } | | f Vqs Wd S(   së  Generate indices to split data into training and test set.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data, where n_samples is the number of samples
            and n_features is the number of features.

        y : array-like, shape (n_samples,)
            The target variable for supervised learning problems.

        groups : array-like, with shape (n_samples,), optional
            Group labels for the samples used while splitting the dataset into
            train/test set.

        Yields
        ------
        train : ndarray
            The training set indices for that split.

        test : ndarray
            The testing set indices for that split.
        s\   Cannot have number of splits n_splits={0} greater than the number of samples: n_samples={1}.N(   R   R   RO   RB   RM   t   superRH   R1   (   R%   R+   R,   R-   t	   n_samplest   traint   test(    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR1   +  s    +c         C  s   |  j  S(   sÚ  Returns the number of splitting iterations in the cross-validator

        Parameters
        ----------
        X : object
            Always ignored, exists for compatibility.

        y : object
            Always ignored, exists for compatibility.

        groups : object
            Always ignored, exists for compatibility.

        Returns
        -------
        n_splits : int
            Returns the number of splitting iterations in the cross-validator.
        (   RO   (   R%   R+   R,   R-   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR9   N  s    N(   R=   R>   R?   R   R&   R@   R1   R9   (    (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyRH     s   #c           B  s/   e  Z d  Z d e d d „ Z d d d „ Z RS(   s3	  K-Folds cross-validator

    Provides train/test indices to split data in train/test sets. Split
    dataset into k consecutive folds (without shuffling by default).

    Each fold is then used once as a validation while the k - 1 remaining
    folds form the training set.

    Read more in the :ref:`User Guide <cross_validation>`.

    Parameters
    ----------
    n_splits : int, default=3
        Number of folds. Must be at least 2.

        .. versionchanged:: 0.20
            ``n_splits`` default value will change from 3 to 5 in v0.22.

    shuffle : boolean, optional
        Whether to shuffle the data before splitting into batches.

    random_state : int, RandomState instance or None, optional, default=None
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`. Used when ``shuffle`` == True.

    Examples
    --------
    >>> from sklearn.model_selection import KFold
    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
    >>> y = np.array([1, 2, 3, 4])
    >>> kf = KFold(n_splits=2)
    >>> kf.get_n_splits(X)
    2
    >>> print(kf)  # doctest: +NORMALIZE_WHITESPACE
    KFold(n_splits=2, random_state=None, shuffle=False)
    >>> for train_index, test_index in kf.split(X):
    ...    print("TRAIN:", train_index, "TEST:", test_index)
    ...    X_train, X_test = X[train_index], X[test_index]
    ...    y_train, y_test = y[train_index], y[test_index]
    TRAIN: [2 3] TEST: [0 1]
    TRAIN: [0 1] TEST: [2 3]

    Notes
    -----
    The first ``n_samples % n_splits`` folds have size
    ``n_samples // n_splits + 1``, other folds have size
    ``n_samples // n_splits``, where ``n_samples`` is the number of samples.

    Randomized CV splitters may return different results for each call of
    split. You can make the results identical by setting ``random_state``
    to an integer.

    See also
    --------
    StratifiedKFold
        Takes group information into account to avoid building folds with
        imbalanced class distributions (for binary or multiclass
        classification tasks).

    GroupKFold: K-fold iterator variant with non-overlapping groups.

    RepeatedKFold: Repeats K-Fold n times.
    t   warnc         C  sE   | d k r% t  j t t ƒ d } n  t t |  ƒ j | | | ƒ d  S(   NRV   i   (   t   warningsRV   t   NSPLIT_WARNINGt   FutureWarningRR   R   R&   (   R%   RO   RP   RQ   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR&   §  s    	c         c  s·   t  | ƒ } t j | ƒ } |  j r= t |  j ƒ j | ƒ n  |  j } t j | | | d t j ƒ} | | | c  d 7*d } x1 | D]) }	 | | |	 }
 } | |
 | !V| } q† Wd  S(   NR2   i   i    (	   R   R'   R(   RP   R	   RQ   RO   t   fullRG   (   R%   R+   R,   R-   RS   R.   RO   t
   fold_sizest   currentt	   fold_sizet   startt   stop(    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR3   ®  s    		N(   R=   R>   R?   t   FalseR@   R&   R3   (    (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR   d  s   Ac           B  s2   e  Z d  Z d d „ Z d „  Z d d d „ Z RS(   s  K-fold iterator variant with non-overlapping groups.

    The same group will not appear in two different folds (the number of
    distinct groups has to be at least equal to the number of folds).

    The folds are approximately balanced in the sense that the number of
    distinct groups is approximately the same in each fold.

    Parameters
    ----------
    n_splits : int, default=3
        Number of folds. Must be at least 2.

        .. versionchanged:: 0.20
            ``n_splits`` default value will change from 3 to 5 in v0.22.

    Examples
    --------
    >>> from sklearn.model_selection import GroupKFold
    >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
    >>> y = np.array([1, 2, 3, 4])
    >>> groups = np.array([0, 0, 2, 2])
    >>> group_kfold = GroupKFold(n_splits=2)
    >>> group_kfold.get_n_splits(X, y, groups)
    2
    >>> print(group_kfold)
    GroupKFold(n_splits=2)
    >>> for train_index, test_index in group_kfold.split(X, y, groups):
    ...     print("TRAIN:", train_index, "TEST:", test_index)
    ...     X_train, X_test = X[train_index], X[test_index]
    ...     y_train, y_test = y[train_index], y[test_index]
    ...     print(X_train, X_test, y_train, y_test)
    ...
    TRAIN: [0 1] TEST: [2 3]
    [[1 2]
     [3 4]] [[5 6]
     [7 8]] [1 2] [3 4]
    TRAIN: [2 3] TEST: [0 1]
    [[5 6]
     [7 8]] [[1 2]
     [3 4]] [3 4] [1 2]

    See also
    --------
    LeaveOneGroupOut
        For splitting the data according to explicit domain-specific
        stratification of the dataset.
    RV   c         C  sK   | d k r% t  j t t ƒ d } n  t t |  ƒ j | d t d d  ƒd  S(   NRV   i   RP   RQ   (	   RW   RV   RX   RY   RR   R   R&   R`   R@   (   R%   RO   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR&   ï  s
    	c         c  sk  | d  k r t d ƒ ‚ n  t | d t d d  ƒ} t j | d t ƒ\ } } t | ƒ } |  j | k r… t d |  j | f ƒ ‚ n  t j	 | ƒ } t j
 | ƒ d  d  d … } | | } t j |  j ƒ } t j t | ƒ ƒ }	 xG t | ƒ D]9 \ }
 } t j | ƒ } | | c | 7<| |	 | |
 <qî W|	 | } x/ t |  j ƒ D] } t j | | k ƒ d VqEWd  S(   Ns*   The 'groups' parameter should not be None.t	   ensure_2dR2   t   return_inversesO   Cannot have number of splits n_splits=%d greater than the number of groups: %d.iÿÿÿÿi    (   R@   RB   R   R`   R'   t   uniqueR6   t   lenRO   t   bincountt   argsortR4   t	   enumeratet   argminRA   t   where(   R%   R+   R,   R-   t   unique_groupst   n_groupst   n_samples_per_groupR.   t   n_samples_per_foldt   group_to_foldt   group_indext   weightt   lightest_foldt   f(    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR3   ö  s(    

c         C  s   t  t |  ƒ j | | | ƒ S(   së  Generate indices to split data into training and test set.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data, where n_samples is the number of samples
            and n_features is the number of features.

        y : array-like, shape (n_samples,), optional
            The target variable for supervised learning problems.

        groups : array-like, with shape (n_samples,)
            Group labels for the samples used while splitting the dataset into
            train/test set.

        Yields
        ------
        train : ndarray
            The training set indices for that split.

        test : ndarray
            The testing set indices for that split.
        (   RR   R   R1   (   R%   R+   R,   R-   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR1     s    N(   R=   R>   R?   R&   R3   R@   R1   (    (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR   ¾  s   0	%c           B  sG   e  Z d  Z d e d d „ Z d d „ Z d d d „ Z d d „ Z RS(   sz  Stratified K-Folds cross-validator

    Provides train/test indices to split data in train/test sets.

    This cross-validation object is a variation of KFold that returns
    stratified folds. The folds are made by preserving the percentage of
    samples for each class.

    Read more in the :ref:`User Guide <cross_validation>`.

    Parameters
    ----------
    n_splits : int, default=3
        Number of folds. Must be at least 2.

        .. versionchanged:: 0.20
            ``n_splits`` default value will change from 3 to 5 in v0.22.

    shuffle : boolean, optional
        Whether to shuffle each class's samples before splitting into batches.

    random_state : int, RandomState instance or None, optional, default=None
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`. Used when ``shuffle`` == True.

    Examples
    --------
    >>> from sklearn.model_selection import StratifiedKFold
    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
    >>> y = np.array([0, 0, 1, 1])
    >>> skf = StratifiedKFold(n_splits=2)
    >>> skf.get_n_splits(X, y)
    2
    >>> print(skf)  # doctest: +NORMALIZE_WHITESPACE
    StratifiedKFold(n_splits=2, random_state=None, shuffle=False)
    >>> for train_index, test_index in skf.split(X, y):
    ...    print("TRAIN:", train_index, "TEST:", test_index)
    ...    X_train, X_test = X[train_index], X[test_index]
    ...    y_train, y_test = y[train_index], y[test_index]
    TRAIN: [1 3] TEST: [0 2]
    TRAIN: [0 2] TEST: [1 3]

    Notes
    -----
    Train and test sizes may be different in each fold, with a difference of at
    most ``n_classes``.

    See also
    --------
    RepeatedStratifiedKFold: Repeats Stratified K-Fold n times.
    RV   c         C  sE   | d k r% t  j t t ƒ d } n  t t |  ƒ j | | | ƒ d  S(   NRV   i   (   RW   RV   RX   RY   RR   R    R&   (   R%   RO   RP   RQ   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR&   m  s    	c         C  sú  t  |  j ƒ } t j | ƒ } t | ƒ } d } | | k rW t d j | | ƒ ƒ ‚ n  t | ƒ } | j d } t j	 | d t
 ƒ\ } } t j | ƒ }	 t j |	 ƒ }
 t j |  j |	 k ƒ r× t d |  j ƒ ‚ n  |  j |
 k rt j d |
 |  j f t ƒ n  g  |	 D]B } t |  j d |  j d	 | ƒj t j t | |  j ƒ ƒ ƒ ^ q} t j | d
 t j ƒ} x† t t | Œ  ƒ D]r \ } } xc t | | ƒ D]R \ } \ } } | | | k } | | t | ƒ k  } | | | <| | | | k <qœWq€W| S(   Nt   binaryt
   multiclasss1   Supported target types are: {}. Got {!r} instead.i    Rb   sG   n_splits=%d cannot be greater than the number of members in each class.s‘   The least populated class in y has only %d members, which is too few. The minimum number of members in any class cannot be less than n_splits=%d.RP   RQ   R2   (   Rs   Rt   (   R	   RQ   R'   t   asarrayR   RB   RM   R   t   shapeRc   R6   Re   t   mint   allRO   RW   RV   t   WarningR   RP   R1   R4   t   maxRG   Rg   R   Rd   (   R%   R+   R,   t   rngt   type_of_target_yt   allowed_target_typesRS   t   unique_yt
   y_inversedt   y_countst
   min_groupst   countt   per_cls_cvst
   test_foldst   test_fold_indicest   per_cls_splitst   clst   _t
   test_splitt   cls_test_folds(    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyt   _make_test_foldss  s:    		L"
c         c  s;   |  j  | | ƒ } x" t |  j ƒ D] } | | k Vq" Wd  S(   N(   R‹   RA   RO   (   R%   R+   R,   R-   R„   t   i(    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR)   §  s    c         C  s4   t  | d t d d ƒ} t t |  ƒ j | | | ƒ S(   sg  Generate indices to split data into training and test set.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data, where n_samples is the number of samples
            and n_features is the number of features.

            Note that providing ``y`` is sufficient to generate the splits and
            hence ``np.zeros(n_samples)`` may be used as a placeholder for
            ``X`` instead of actual training data.

        y : array-like, shape (n_samples,)
            The target variable for supervised learning problems.
            Stratification is done based on the y labels.

        groups : object
            Always ignored, exists for compatibility.

        Yields
        ------
        train : ndarray
            The training set indices for that split.

        test : ndarray
            The testing set indices for that split.

        Notes
        -----
        Randomized CV splitters may return different results for each call of
        split. You can make the results identical by setting ``random_state``
        to an integer.
        Ra   R2   N(   R   R`   R@   RR   R    R1   (   R%   R+   R,   R-   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR1   ¬  s    "N(	   R=   R>   R?   R`   R@   R&   R‹   R)   R1   (    (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR    6  s
   54t   TimeSeriesSplitc           B  s,   e  Z d  Z d d d „ Z d d d „ Z RS(   s€  Time Series cross-validator

    Provides train/test indices to split time series data samples
    that are observed at fixed time intervals, in train/test sets.
    In each split, test indices must be higher than before, and thus shuffling
    in cross validator is inappropriate.

    This cross-validation object is a variation of :class:`KFold`.
    In the kth split, it returns first k folds as train set and the
    (k+1)th fold as test set.

    Note that unlike standard cross-validation methods, successive
    training sets are supersets of those that come before them.

    Read more in the :ref:`User Guide <cross_validation>`.

    Parameters
    ----------
    n_splits : int, default=3
        Number of splits. Must be at least 2.

        .. versionchanged:: 0.20
            ``n_splits`` default value will change from 3 to 5 in v0.22.

    max_train_size : int, optional
        Maximum size for a single training set.

    Examples
    --------
    >>> from sklearn.model_selection import TimeSeriesSplit
    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])
    >>> y = np.array([1, 2, 3, 4, 5, 6])
    >>> tscv = TimeSeriesSplit(n_splits=5)
    >>> print(tscv)  # doctest: +NORMALIZE_WHITESPACE
    TimeSeriesSplit(max_train_size=None, n_splits=5)
    >>> for train_index, test_index in tscv.split(X):
    ...    print("TRAIN:", train_index, "TEST:", test_index)
    ...    X_train, X_test = X[train_index], X[test_index]
    ...    y_train, y_test = y[train_index], y[test_index]
    TRAIN: [0] TEST: [1]
    TRAIN: [0 1] TEST: [2]
    TRAIN: [0 1 2] TEST: [3]
    TRAIN: [0 1 2 3] TEST: [4]
    TRAIN: [0 1 2 3 4] TEST: [5]

    Notes
    -----
    The training set has size ``i * n_samples // (n_splits + 1)
    + n_samples % (n_splits + 1)`` in the ``i``th split,
    with a test set of size ``n_samples//(n_splits + 1)``,
    where ``n_samples`` is the number of samples.
    RV   c         C  sT   | d k r% t  j t t ƒ d } n  t t |  ƒ j | d t d d  ƒ| |  _	 d  S(   NRV   i   RP   RQ   (
   RW   RV   RX   RY   RR   R   R&   R`   R@   t   max_train_size(   R%   RO   RŽ   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR&     s    	c         c  s  t  | | | ƒ \ } } } t | ƒ } |  j } | d } | | k ra t d j | | ƒ ƒ ‚ n  t j | ƒ } | | } t | | | | | ƒ }	 xg |	 D]_ }
 |  j rà |  j |
 k  rà | |
 |  j |
 !| |
 |
 | !f Vq› | |
  | |
 |
 | !f Vq› Wd S(   s   Generate indices to split data into training and test set.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data, where n_samples is the number of samples
            and n_features is the number of features.

        y : array-like, shape (n_samples,)
            Always ignored, exists for compatibility.

        groups : array-like, with shape (n_samples,)
            Always ignored, exists for compatibility.

        Yields
        ------
        train : ndarray
            The training set indices for that split.

        test : ndarray
            The testing set indices for that split.
        i   sI   Cannot have number of folds ={0} greater than the number of samples: {1}.N(	   R   R   RO   RB   RM   R'   R(   RA   RŽ   (   R%   R+   R,   R-   RS   RO   t   n_foldsR.   t	   test_sizet   test_startst
   test_start(    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR1     s&    	

N(   R=   R>   R?   R@   R&   R1   (    (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR   Ò  s   4	c           B  s8   e  Z d  Z d „  Z d d d d „ Z d d d „ Z RS(   s8  Leave One Group Out cross-validator

    Provides train/test indices to split data according to a third-party
    provided group. This group information can be used to encode arbitrary
    domain specific stratifications of the samples as integers.

    For instance the groups could be the year of collection of the samples
    and thus allow for cross-validation against time-based splits.

    Read more in the :ref:`User Guide <cross_validation>`.

    Examples
    --------
    >>> from sklearn.model_selection import LeaveOneGroupOut
    >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
    >>> y = np.array([1, 2, 1, 2])
    >>> groups = np.array([1, 1, 2, 2])
    >>> logo = LeaveOneGroupOut()
    >>> logo.get_n_splits(X, y, groups)
    2
    >>> logo.get_n_splits(groups=groups) # 'groups' is always required
    2
    >>> print(logo)
    LeaveOneGroupOut()
    >>> for train_index, test_index in logo.split(X, y, groups):
    ...    print("TRAIN:", train_index, "TEST:", test_index)
    ...    X_train, X_test = X[train_index], X[test_index]
    ...    y_train, y_test = y[train_index], y[test_index]
    ...    print(X_train, X_test, y_train, y_test)
    TRAIN: [2 3] TEST: [0 1]
    [[5 6]
     [7 8]] [[1 2]
     [3 4]] [1 2] [1 2]
    TRAIN: [0 1] TEST: [2 3]
    [[1 2]
     [3 4]] [[5 6]
     [7 8]] [1 2] [1 2]

    c         c  s   | d  k r t d ƒ ‚ n  t | d t d t d d  ƒ} t j | ƒ } t | ƒ d k rm t d | ƒ ‚ n  x | D] } | | k Vqt Wd  S(   Ns*   The 'groups' parameter should not be None.t   copyRa   R2   i   sc   The groups parameter contains fewer than 2 unique groups (%s). LeaveOneGroupOut expects at least 2.(   R@   RB   R   R6   R`   R'   Rc   Rd   (   R%   R+   R,   R-   Rj   RŒ   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR)   f  s    c         C  sF   | d k r t d ƒ ‚ n  t | d t d d ƒ} t t j | ƒ ƒ S(   sÄ  Returns the number of splitting iterations in the cross-validator

        Parameters
        ----------
        X : object
            Always ignored, exists for compatibility.

        y : object
            Always ignored, exists for compatibility.

        groups : array-like, with shape (n_samples,)
            Group labels for the samples used while splitting the dataset into
            train/test set. This 'groups' parameter must always be specified to
            calculate the number of splits, though the other parameters can be
            omitted.

        Returns
        -------
        n_splits : int
            Returns the number of splitting iterations in the cross-validator.
        s*   The 'groups' parameter should not be None.Ra   R2   N(   R@   RB   R   R`   Rd   R'   Rc   (   R%   R+   R,   R-   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR9   s  s    c         C  s   t  t |  ƒ j | | | ƒ S(   sì  Generate indices to split data into training and test set.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data, where n_samples is the number of samples
            and n_features is the number of features.

        y : array-like, of length n_samples, optional
            The target variable for supervised learning problems.

        groups : array-like, with shape (n_samples,)
            Group labels for the samples used while splitting the dataset into
            train/test set.

        Yields
        ------
        train : ndarray
            The training set indices for that split.

        test : ndarray
            The testing set indices for that split.
        (   RR   R   R1   (   R%   R+   R,   R-   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR1   Ž  s    N(   R=   R>   R?   R)   R@   R9   R1   (    (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR   =  s   '	c           B  sA   e  Z d  Z d „  Z d „  Z d d d d „ Z d d d „ Z RS(   s,  Leave P Group(s) Out cross-validator

    Provides train/test indices to split data according to a third-party
    provided group. This group information can be used to encode arbitrary
    domain specific stratifications of the samples as integers.

    For instance the groups could be the year of collection of the samples
    and thus allow for cross-validation against time-based splits.

    The difference between LeavePGroupsOut and LeaveOneGroupOut is that
    the former builds the test sets with all the samples assigned to
    ``p`` different values of the groups while the latter uses samples
    all assigned the same groups.

    Read more in the :ref:`User Guide <cross_validation>`.

    Parameters
    ----------
    n_groups : int
        Number of groups (``p``) to leave out in the test split.

    Examples
    --------
    >>> from sklearn.model_selection import LeavePGroupsOut
    >>> X = np.array([[1, 2], [3, 4], [5, 6]])
    >>> y = np.array([1, 2, 1])
    >>> groups = np.array([1, 2, 3])
    >>> lpgo = LeavePGroupsOut(n_groups=2)
    >>> lpgo.get_n_splits(X, y, groups)
    3
    >>> lpgo.get_n_splits(groups=groups)  # 'groups' is always required
    3
    >>> print(lpgo)
    LeavePGroupsOut(n_groups=2)
    >>> for train_index, test_index in lpgo.split(X, y, groups):
    ...    print("TRAIN:", train_index, "TEST:", test_index)
    ...    X_train, X_test = X[train_index], X[test_index]
    ...    y_train, y_test = y[train_index], y[test_index]
    ...    print(X_train, X_test, y_train, y_test)
    TRAIN: [2] TEST: [0 1]
    [[5 6]] [[1 2]
     [3 4]] [1] [1 2]
    TRAIN: [1] TEST: [0 2]
    [[3 4]] [[1 2]
     [5 6]] [2] [1 1]
    TRAIN: [0] TEST: [1 2]
    [[1 2]] [[3 4]
     [5 6]] [1] [2 1]

    See also
    --------
    GroupKFold: K-fold iterator variant with non-overlapping groups.
    c         C  s   | |  _  d  S(   N(   Rk   (   R%   Rk   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR&   à  s    c   	      c  s  | d  k r t d ƒ ‚ n  t | d t d t d d  ƒ} t j | ƒ } |  j t | ƒ k rƒ t d |  j | |  j d f ƒ ‚ n  t	 t
 t | ƒ ƒ |  j ƒ } x_ | D]W } t j t | ƒ d t j ƒ} x+ | t j | ƒ D] } t | | | k <qà W| Vq¨ Wd  S(   Ns*   The 'groups' parameter should not be None.R“   Ra   R2   s´   The groups parameter contains fewer than (or equal to) n_groups (%d) numbers of unique groups (%s). LeavePGroupsOut expects that at least n_groups + 1 (%d) unique groups be presenti   (   R@   RB   R   R6   R`   R'   Rc   Rk   Rd   R   RA   R4   R   R5   RD   (	   R%   R+   R,   R-   Rj   t   combiR.   R/   t   l(    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR)   ã  s     c         C  s^   | d k r t d ƒ ‚ n  t | d t d d ƒ} t t t t j | ƒ ƒ |  j	 d t
 ƒƒ S(   sÄ  Returns the number of splitting iterations in the cross-validator

        Parameters
        ----------
        X : object
            Always ignored, exists for compatibility.

        y : object
            Always ignored, exists for compatibility.

        groups : array-like, with shape (n_samples,)
            Group labels for the samples used while splitting the dataset into
            train/test set. This 'groups' parameter must always be specified to
            calculate the number of splits, though the other parameters can be
            omitted.

        Returns
        -------
        n_splits : int
            Returns the number of splitting iterations in the cross-validator.
        s*   The 'groups' parameter should not be None.Ra   R2   RF   N(   R@   RB   R   R`   RG   R   Rd   R'   Rc   Rk   R6   (   R%   R+   R,   R-   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR9   õ  s    c         C  s   t  t |  ƒ j | | | ƒ S(   sì  Generate indices to split data into training and test set.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data, where n_samples is the number of samples
            and n_features is the number of features.

        y : array-like, of length n_samples, optional
            The target variable for supervised learning problems.

        groups : array-like, with shape (n_samples,)
            Group labels for the samples used while splitting the dataset into
            train/test set.

        Yields
        ------
        train : ndarray
            The training set indices for that split.

        test : ndarray
            The testing set indices for that split.
        (   RR   R   R1   (   R%   R+   R,   R-   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR1     s    N(   R=   R>   R?   R&   R)   R@   R9   R1   (    (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR   ©  s
   5		t   _RepeatedSplitsc           B  s>   e  Z d  Z d d d „ Z d d d „ Z d d d d „ Z RS(   s  Repeated splits for an arbitrary randomized CV splitter.

    Repeats splits for cross-validators n times with different randomization
    in each repetition.

    Parameters
    ----------
    cv : callable
        Cross-validator class.

    n_repeats : int, default=10
        Number of times cross-validator needs to be repeated.

    random_state : int, RandomState instance or None, optional, default=None
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`.

    **cvargs : additional params
        Constructor parameters for cv. Must not contain random_state
        and shuffle.
    i
   c           s˜   t  | t j t j f ƒ s* t d ƒ ‚ n  | d k rE t d ƒ ‚ n  t ‡  f d †  d Dƒ ƒ rp t d ƒ ‚ n  | |  _ | |  _ | |  _	 ˆ  |  _
 d  S(	   Ns/   Number of repetitions must be of Integral type.i    s-   Number of repetitions must be greater than 0.c         3  s   |  ] } | ˆ  k Vq d  S(   N(    (   t   .0t   key(   t   cvargs(    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pys	   <genexpr>J  s    RQ   RP   s0   cvargs must not contain random_state or shuffle.(   RQ   RP   (   RI   R'   t   integerRJ   RK   RB   t   anyt   cvt	   n_repeatsRQ   R™   (   R%   Rœ   R   RQ   R™   (    (   R™   s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR&   C  s    			c   
      c  s‚   |  j  } t |  j ƒ } xc t | ƒ D]U } |  j d | d t |  j  } x. | j | | | ƒ D] \ } }	 | |	 f Vq_ Wq% Wd S(   sí  Generates indices to split data into training and test set.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data, where n_samples is the number of samples
            and n_features is the number of features.

        y : array-like, of length n_samples
            The target variable for supervised learning problems.

        groups : array-like, with shape (n_samples,), optional
            Group labels for the samples used while splitting the dataset into
            train/test set.

        Yields
        ------
        train : ndarray
            The training set indices for that split.

        test : ndarray
            The testing set indices for that split.
        RQ   RP   N(   R   R	   RQ   RA   Rœ   R6   R™   R1   (
   R%   R+   R,   R-   R   R{   t   idxRœ   R0   R/   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR1   S  s    	"c         C  sG   t  |  j ƒ } |  j d | d t |  j  } | j | | | ƒ |  j S(   sº  Returns the number of splitting iterations in the cross-validator

        Parameters
        ----------
        X : object
            Always ignored, exists for compatibility.
            ``np.zeros(n_samples)`` may be used as a placeholder.

        y : object
            Always ignored, exists for compatibility.
            ``np.zeros(n_samples)`` may be used as a placeholder.

        groups : array-like, with shape (n_samples,), optional
            Group labels for the samples used while splitting the dataset into
            train/test set.

        Returns
        -------
        n_splits : int
            Returns the number of splitting iterations in the cross-validator.
        RQ   RP   (   R	   RQ   Rœ   R6   R™   R9   R   (   R%   R+   R,   R-   R{   Rœ   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR9   t  s    N(   R=   R>   R?   R@   R&   R1   R9   (    (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR–   +  s   !c           B  s    e  Z d  Z d d d d „ Z RS(   s?  Repeated K-Fold cross validator.

    Repeats K-Fold n times with different randomization in each repetition.

    Read more in the :ref:`User Guide <cross_validation>`.

    Parameters
    ----------
    n_splits : int, default=5
        Number of folds. Must be at least 2.

    n_repeats : int, default=10
        Number of times cross-validator needs to be repeated.

    random_state : int, RandomState instance or None, optional, default=None
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`.

    Examples
    --------
    >>> from sklearn.model_selection import RepeatedKFold
    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
    >>> y = np.array([0, 0, 1, 1])
    >>> rkf = RepeatedKFold(n_splits=2, n_repeats=2, random_state=2652124)
    >>> for train_index, test_index in rkf.split(X):
    ...     print("TRAIN:", train_index, "TEST:", test_index)
    ...     X_train, X_test = X[train_index], X[test_index]
    ...     y_train, y_test = y[train_index], y[test_index]
    ...
    TRAIN: [0 1] TEST: [2 3]
    TRAIN: [2 3] TEST: [0 1]
    TRAIN: [1 2] TEST: [0 3]
    TRAIN: [0 3] TEST: [1 2]

    Notes
    -----
    Randomized CV splitters may return different results for each call of
    split. You can make the results identical by setting ``random_state``
    to an integer.

    See also
    --------
    RepeatedStratifiedKFold: Repeats Stratified K-Fold n times.
    i   i
   c         C  s&   t  t |  ƒ j t | | d | ƒd  S(   NRO   (   RR   R   R&   R   (   R%   RO   R   RQ   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR&   ¿  s    N(   R=   R>   R?   R@   R&   (    (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR     s   .c           B  s    e  Z d  Z d d d d „ Z RS(   s«  Repeated Stratified K-Fold cross validator.

    Repeats Stratified K-Fold n times with different randomization in each
    repetition.

    Read more in the :ref:`User Guide <cross_validation>`.

    Parameters
    ----------
    n_splits : int, default=5
        Number of folds. Must be at least 2.

    n_repeats : int, default=10
        Number of times cross-validator needs to be repeated.

    random_state : None, int or RandomState, default=None
        Random state to be used to generate random state for each
        repetition.

    Examples
    --------
    >>> from sklearn.model_selection import RepeatedStratifiedKFold
    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
    >>> y = np.array([0, 0, 1, 1])
    >>> rskf = RepeatedStratifiedKFold(n_splits=2, n_repeats=2,
    ...     random_state=36851234)
    >>> for train_index, test_index in rskf.split(X, y):
    ...     print("TRAIN:", train_index, "TEST:", test_index)
    ...     X_train, X_test = X[train_index], X[test_index]
    ...     y_train, y_test = y[train_index], y[test_index]
    ...
    TRAIN: [1 2] TEST: [0 3]
    TRAIN: [0 3] TEST: [1 2]
    TRAIN: [1 3] TEST: [0 2]
    TRAIN: [0 2] TEST: [1 3]

    Notes
    -----
    Randomized CV splitters may return different results for each call of
    split. You can make the results identical by setting ``random_state``
    to an integer.

    See also
    --------
    RepeatedKFold: Repeats K-Fold n times.
    i   i
   c         C  s&   t  t |  ƒ j t | | d | ƒd  S(   NRO   (   RR   R   R&   R    (   R%   RO   R   RQ   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR&   ó  s    N(   R=   R>   R?   R@   R&   (    (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR   Ä  s   .t   BaseShuffleSplitc           B  sb   e  Z d  Z d d d d d „ Z d d d „ Z e d d d „ ƒ Z d d d d „ Z d „  Z	 RS(	   s6   Base class for ShuffleSplit and StratifiedShuffleSpliti
   t   defaultc         C  s5   t  | | ƒ | |  _ | |  _ | |  _ | |  _ d  S(   N(   t   _validate_shuffle_split_initRO   R   t
   train_sizeRQ   (   R%   RO   R   R¢   RQ   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR&   û  s
    			c         c  sP   t  | | | ƒ \ } } } x. |  j | | | ƒ D] \ } } | | f Vq1 Wd S(   s»  Generate indices to split data into training and test set.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data, where n_samples is the number of samples
            and n_features is the number of features.

        y : array-like, shape (n_samples,)
            The target variable for supervised learning problems.

        groups : array-like, with shape (n_samples,), optional
            Group labels for the samples used while splitting the dataset into
            train/test set.

        Yields
        ------
        train : ndarray
            The training set indices for that split.

        test : ndarray
            The testing set indices for that split.

        Notes
        -----
        Randomized CV splitters may return different results for each call of
        split. You can make the results identical by setting ``random_state``
        to an integer.
        N(   R   t   _iter_indices(   R%   R+   R,   R-   RT   RU   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR1     s    "c         C  s   d S(   s   Generate (train, test) indicesN(    (   R%   R+   R,   R-   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR£   %  R:   c         C  s   |  j  S(   sÚ  Returns the number of splitting iterations in the cross-validator

        Parameters
        ----------
        X : object
            Always ignored, exists for compatibility.

        y : object
            Always ignored, exists for compatibility.

        groups : object
            Always ignored, exists for compatibility.

        Returns
        -------
        n_splits : int
            Returns the number of splitting iterations in the cross-validator.
        (   RO   (   R%   R+   R,   R-   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR9   )  s    c         C  s
   t  |  ƒ S(   N(   R;   (   R%   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR<   >  s    N(
   R=   R>   R?   R@   R&   R1   R   R£   R9   R<   (    (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyRŸ   ø  s   	"c           B  s   e  Z d  Z d d d „ Z RS(   s  Random permutation cross-validator

    Yields indices to split data into training and test sets.

    Note: contrary to other cross-validation strategies, random splits
    do not guarantee that all folds will be different, although this is
    still very likely for sizeable datasets.

    Read more in the :ref:`User Guide <cross_validation>`.

    Parameters
    ----------
    n_splits : int, default 10
        Number of re-shuffling & splitting iterations.

    test_size : float, int, None, default=0.1
        If float, should be between 0.0 and 1.0 and represent the proportion
        of the dataset to include in the test split. If int, represents the
        absolute number of test samples. If None, the value is set to the
        complement of the train size. By default (the parameter is
        unspecified), the value is set to 0.1.
        The default will change in version 0.21. It will remain 0.1 only
        if ``train_size`` is unspecified, otherwise it will complement
        the specified ``train_size``.

    train_size : float, int, or None, default=None
        If float, should be between 0.0 and 1.0 and represent the
        proportion of the dataset to include in the train split. If
        int, represents the absolute number of train samples. If None,
        the value is automatically set to the complement of the test size.

    random_state : int, RandomState instance or None, optional (default=None)
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`.

    Examples
    --------
    >>> from sklearn.model_selection import ShuffleSplit
    >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [3, 4], [5, 6]])
    >>> y = np.array([1, 2, 1, 2, 1, 2])
    >>> rs = ShuffleSplit(n_splits=5, test_size=.25, random_state=0)
    >>> rs.get_n_splits(X)
    5
    >>> print(rs)
    ShuffleSplit(n_splits=5, random_state=0, test_size=0.25, train_size=None)
    >>> for train_index, test_index in rs.split(X):
    ...    print("TRAIN:", train_index, "TEST:", test_index)
    ...  # doctest: +ELLIPSIS
    TRAIN: [1 3 0 4] TEST: [5 2]
    TRAIN: [4 0 2 5] TEST: [1 3]
    TRAIN: [1 2 4 0] TEST: [3 5]
    TRAIN: [3 4 1 0] TEST: [5 2]
    TRAIN: [3 5 1 0] TEST: [2 4]
    >>> rs = ShuffleSplit(n_splits=5, train_size=0.5, test_size=.25,
    ...                   random_state=0)
    >>> for train_index, test_index in rs.split(X):
    ...    print("TRAIN:", train_index, "TEST:", test_index)
    ...  # doctest: +ELLIPSIS
    TRAIN: [1 3 0] TEST: [5 2]
    TRAIN: [4 0 2] TEST: [1 3]
    TRAIN: [1 2 4] TEST: [3 5]
    TRAIN: [3 4 1] TEST: [5 2]
    TRAIN: [3 5 1] TEST: [2 4]
    c         c  sŒ   t  | ƒ } t | |  j |  j ƒ \ } } t |  j ƒ } xL t |  j ƒ D]; } | j | ƒ }	 |	 |  }
 |	 | | | !} | |
 f VqI Wd  S(   N(	   R   t   _validate_shuffle_splitR   R¢   R	   RQ   RA   RO   t   permutation(   R%   R+   R,   R-   RS   t   n_traint   n_testR{   RŒ   R¥   t   ind_testt	   ind_train(    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR£   †  s    
N(   R=   R>   R?   R@   R£   (    (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR   B  s   Bc           B  s;   e  Z d  Z d d d d d „ Z d „  Z d d d „ Z RS(   s	  Shuffle-Group(s)-Out cross-validation iterator

    Provides randomized train/test indices to split data according to a
    third-party provided group. This group information can be used to encode
    arbitrary domain specific stratifications of the samples as integers.

    For instance the groups could be the year of collection of the samples
    and thus allow for cross-validation against time-based splits.

    The difference between LeavePGroupsOut and GroupShuffleSplit is that
    the former generates splits using all subsets of size ``p`` unique groups,
    whereas GroupShuffleSplit generates a user-determined number of random
    test splits, each with a user-determined fraction of unique groups.

    For example, a less computationally intensive alternative to
    ``LeavePGroupsOut(p=10)`` would be
    ``GroupShuffleSplit(test_size=10, n_splits=100)``.

    Note: The parameters ``test_size`` and ``train_size`` refer to groups, and
    not to samples, as in ShuffleSplit.


    Parameters
    ----------
    n_splits : int (default 5)
        Number of re-shuffling & splitting iterations.

    test_size : float, int, None, optional
        If float, should be between 0.0 and 1.0 and represent the proportion
        of the dataset to include in the test split. If int, represents the
        absolute number of test samples. If None, the value is set to the
        complement of the train size. By default, the value is set to 0.2.
        The default will change in version 0.21. It will remain 0.2 only
        if ``train_size`` is unspecified, otherwise it will complement
        the specified ``train_size``.

    train_size : float, int, or None, default is None
        If float, should be between 0.0 and 1.0 and represent the
        proportion of the groups to include in the train split. If
        int, represents the absolute number of train groups. If None,
        the value is automatically set to the complement of the test size.

    random_state : int, RandomState instance or None, optional (default=None)
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`.

    i   R    c      	   C  sc   | d k r4 | d  k	 r+ t j d t ƒ n  d } n  t t |  ƒ j d | d | d | d | ƒ d  S(   NR    sY   From version 0.21, test_size will always complement train_size unless both are specified.gš™™™™™É?RO   R   R¢   RQ   (   R@   RW   RV   RY   RR   R   R&   (   R%   RO   R   R¢   RQ   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR&   Ç  s    	
	c   
      c  s¿   | d  k r t d ƒ ‚ n  t | d t d d  ƒ} t j | d t ƒ\ } } xj t t |  ƒ j	 d | ƒ D]M \ } } t j
 t j | | ƒ ƒ } t j
 t j | | ƒ ƒ }	 | |	 f Vqj Wd  S(   Ns*   The 'groups' parameter should not be None.Ra   R2   Rb   R+   (   R@   RB   R   R`   R'   Rc   R6   RR   R   R£   t   flatnonzerot   in1d(
   R%   R+   R,   R-   t   classest   group_indicest   group_traint
   group_testRT   RU   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR£   ×  s    "c         C  s   t  t |  ƒ j | | | ƒ S(   s»  Generate indices to split data into training and test set.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data, where n_samples is the number of samples
            and n_features is the number of features.

        y : array-like, shape (n_samples,), optional
            The target variable for supervised learning problems.

        groups : array-like, with shape (n_samples,)
            Group labels for the samples used while splitting the dataset into
            train/test set.

        Yields
        ------
        train : ndarray
            The training set indices for that split.

        test : ndarray
            The testing set indices for that split.

        Notes
        -----
        Randomized CV splitters may return different results for each call of
        split. You can make the results identical by setting ``random_state``
        to an integer.
        (   RR   R   R1   (   R%   R+   R,   R-   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR1   æ  s    N(   R=   R>   R?   R@   R&   R£   R1   (    (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR   ”  s
   1		c         C  s  t  | ƒ } | |  |  j ƒ  } t j | ƒ } t | | j ƒ  ƒ } | d k r| | } t j t j | ƒ ƒ d d d … } xƒ | D]x } t j | | k ƒ \ }	 t t	 |	 ƒ | ƒ }
 | j
 |	 d |
 d t ƒ}	 | |	 c d 7<| |
 8} | d k r‡ Pq‡ q‡ Wn  | j t j ƒ S(   sö  Computes approximate mode of multivariate hypergeometric.

    This is an approximation to the mode of the multivariate
    hypergeometric given by class_counts and n_draws.
    It shouldn't be off by more than one.

    It is the mostly likely outcome of drawing n_draws many
    samples from the population given by class_counts.

    Parameters
    ----------
    class_counts : ndarray of int
        Population per class.
    n_draws : int
        Number of draws (samples to draw) from the overall population.
    rng : random state
        Used to break ties.

    Returns
    -------
    sampled_classes : ndarray of int
        Number of samples drawn from each class.
        np.sum(sampled_classes) == n_draws

    Examples
    --------
    >>> from sklearn.model_selection._split import _approximate_mode
    >>> _approximate_mode(class_counts=np.array([4, 2]), n_draws=3, rng=0)
    array([2, 1])
    >>> _approximate_mode(class_counts=np.array([5, 2]), n_draws=4, rng=0)
    array([3, 1])
    >>> _approximate_mode(class_counts=np.array([2, 2, 2, 1]),
    ...                   n_draws=2, rng=0)
    array([0, 1, 1, 0])
    >>> _approximate_mode(class_counts=np.array([2, 2, 2, 1]),
    ...                   n_draws=2, rng=42)
    array([1, 1, 0, 0])
    i    Niÿÿÿÿt   sizet   replacei   (   R	   t   sumR'   R   RG   t   sortRc   Ri   Rw   Rd   t   choiceR`   t   astype(   t   class_countst   n_drawsR{   t
   continuoust   flooredt   need_to_addt	   remaindert   valuest   valuet   indst   add_now(    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyt   _approximate_mode  s     '
%
c           B  s;   e  Z d  Z d d d d d „ Z d d „ Z d d „ Z RS(   s–
  Stratified ShuffleSplit cross-validator

    Provides train/test indices to split data in train/test sets.

    This cross-validation object is a merge of StratifiedKFold and
    ShuffleSplit, which returns stratified randomized folds. The folds
    are made by preserving the percentage of samples for each class.

    Note: like the ShuffleSplit strategy, stratified random splits
    do not guarantee that all folds will be different, although this is
    still very likely for sizeable datasets.

    Read more in the :ref:`User Guide <cross_validation>`.

    Parameters
    ----------
    n_splits : int, default 10
        Number of re-shuffling & splitting iterations.

    test_size : float, int, None, optional
        If float, should be between 0.0 and 1.0 and represent the proportion
        of the dataset to include in the test split. If int, represents the
        absolute number of test samples. If None, the value is set to the
        complement of the train size. By default, the value is set to 0.1.
        The default will change in version 0.21. It will remain 0.1 only
        if ``train_size`` is unspecified, otherwise it will complement
        the specified ``train_size``.

    train_size : float, int, or None, default is None
        If float, should be between 0.0 and 1.0 and represent the
        proportion of the dataset to include in the train split. If
        int, represents the absolute number of train samples. If None,
        the value is automatically set to the complement of the test size.

    random_state : int, RandomState instance or None, optional (default=None)
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`.

    Examples
    --------
    >>> from sklearn.model_selection import StratifiedShuffleSplit
    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])
    >>> y = np.array([0, 0, 0, 1, 1, 1])
    >>> sss = StratifiedShuffleSplit(n_splits=5, test_size=0.5, random_state=0)
    >>> sss.get_n_splits(X, y)
    5
    >>> print(sss)       # doctest: +ELLIPSIS
    StratifiedShuffleSplit(n_splits=5, random_state=0, ...)
    >>> for train_index, test_index in sss.split(X, y):
    ...    print("TRAIN:", train_index, "TEST:", test_index)
    ...    X_train, X_test = X[train_index], X[test_index]
    ...    y_train, y_test = y[train_index], y[test_index]
    TRAIN: [5 2 3] TEST: [4 1 0]
    TRAIN: [5 1 4] TEST: [0 2 3]
    TRAIN: [5 0 2] TEST: [4 3 1]
    TRAIN: [4 1 0] TEST: [2 3 5]
    TRAIN: [0 5 1] TEST: [3 4 2]
    i
   R    c         C  s#   t  t |  ƒ j | | | | ƒ d  S(   N(   RR   R!   R&   (   R%   RO   R   R¢   RQ   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR&   ‰  s    c         c  sg  t  | ƒ } t | d t d d  ƒ} t | |  j |  j ƒ \ } } | j d k rˆ t j	 g  | D] } d j
 | j d ƒ ƒ ^ q^ ƒ } n  t j | d t ƒ\ } }	 | j d }
 t j |	 ƒ } t j | ƒ d k  rã t d ƒ ‚ n  | |
 k  rt d	 | |
 f ƒ ‚ n  | |
 k  r-t d
 | |
 f ƒ ‚ n  t j t j |	 d d ƒt j | ƒ d  ƒ } t |  j ƒ } xö t |  j ƒ D]å } t | | | ƒ } | | } t | | | ƒ } g  } g  } xy t |
 ƒ D]k } | j | | ƒ } | | j | d d ƒ} | j | | |  ƒ | j | | | | | | | !ƒ qÇW| j | ƒ } | j | ƒ } | | f VqzWd  S(   NRa   R2   i   t    t   strRb   i    s…   The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.sL   The train_size = %d should be greater or equal to the number of classes = %dsK   The test_size = %d should be greater or equal to the number of classes = %dt   kindt	   mergesortiÿÿÿÿt   modet   clip(   R   R   R`   R@   R¤   R   R¢   t   ndimR'   RD   t   joinRµ   Rc   R6   Rv   Re   Rw   RB   R1   Rf   t   cumsumR	   RQ   RA   RO   RÀ   R¥   t   taket   extend(   R%   R+   R,   R-   RS   R¦   R§   t   rowR¬   t	   y_indicest	   n_classesR¶   t   class_indicesR{   Rˆ   t   n_it   class_counts_remainingt   t_iRT   RU   RŒ   R¥   t   perm_indices_class_i(    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR£   Ž  sF    7
	(c         C  s4   t  | d t d d ƒ} t t |  ƒ j | | | ƒ S(   sg  Generate indices to split data into training and test set.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data, where n_samples is the number of samples
            and n_features is the number of features.

            Note that providing ``y`` is sufficient to generate the splits and
            hence ``np.zeros(n_samples)`` may be used as a placeholder for
            ``X`` instead of actual training data.

        y : array-like, shape (n_samples,)
            The target variable for supervised learning problems.
            Stratification is done based on the y labels.

        groups : object
            Always ignored, exists for compatibility.

        Yields
        ------
        train : ndarray
            The training set indices for that split.

        test : ndarray
            The testing set indices for that split.

        Notes
        -----
        Randomized CV splitters may return different results for each call of
        split. You can make the results identical by setting ``random_state``
        to an integer.
        Ra   R2   N(   R   R`   R@   RR   R!   R1   (   R%   R+   R,   R-   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR1   Ê  s    "N(   R=   R>   R?   R@   R&   R£   R1   (    (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR!   K  s
   <	<c         C  s•  |  d k r4 | d k	 r+ t j d t ƒ n  d }  n  |  d k r[ | d k r[ t d ƒ ‚ n  |  d k	 rÕ t j |  ƒ j j d k r¤ |  d k rÒ t d |  ƒ ‚ qÒ qÕ t j |  ƒ j j d k rÕ t d	 |  ƒ ‚ qÕ n  | d k	 r‘t j | ƒ j j d k r`| d k rt d
 | ƒ ‚ qŽt j |  ƒ j j d k rŽ| |  d k rŽt d | |  ƒ ‚ qŽq‘t j | ƒ j j d k r‘t d | ƒ ‚ q‘n  d S(   s¥   Validation helper to check the test_size and train_size at init

    NOTE This does not take into account the number of samples which is known
    only at split
    R    sY   From version 0.21, test_size will always complement train_size unless both are specified.gš™™™™™¹?s-   test_size and train_size can not both be NoneRr   g      ð?s8   test_size=%f should be smaller than 1.0 or be an integerRŒ   s   Invalid value for test_size: %rs9   train_size=%f should be smaller than 1.0 or be an integersi   The sum of test_size and train_size = %f, should be smaller than 1.0. Reduce test_size and/or train_size.s    Invalid value for train_size: %rN(	   R@   RW   RV   RY   RB   R'   Ru   R2   RÃ   (   R   R¢   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR¡   ð  s4    	
	c         C  s´  | d k	 rL t j | ƒ j j d k rL | |  k rL t d | |  f ƒ ‚ n  | d k	 r˜ t j | ƒ j j d k r˜ | |  k r˜ t d | |  f ƒ ‚ n  | d k r­ d } n  t j | ƒ j j d k rÛ t | |  ƒ } n* t j | ƒ j j d k rt | ƒ } n  | d k r|  | } n: t j | ƒ j j d k rLt | |  ƒ } n t | ƒ } | d k rq|  | } n  | | |  k ržt d | | |  f ƒ ‚ n  t	 | ƒ t	 | ƒ f S(	   sv   
    Validation helper to check if the test/test sizes are meaningful wrt to the
    size of the data (n_samples)
    RŒ   s<   test_size=%d should be smaller than the number of samples %ds=   train_size=%d should be smaller than the number of samples %dR    gš™™™™™¹?Rr   s~   The sum of train_size and test_size = %d, should be smaller than the number of samples %d. Reduce test_size and/or train_size.N(
   R@   R'   Ru   R2   RÃ   RB   R   t   floatR   RG   (   RS   R   R¢   R§   R¦   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR¤     s6    	c           B  sD   e  Z d  Z d „  Z d d d d „ Z d „  Z d d d d „ Z RS(   s  Predefined split cross-validator

    Provides train/test indices to split data into train/test sets using a
    predefined scheme specified by the user with the ``test_fold`` parameter.

    Read more in the :ref:`User Guide <cross_validation>`.

    Parameters
    ----------
    test_fold : array-like, shape (n_samples,)
        The entry ``test_fold[i]`` represents the index of the test set that
        sample ``i`` belongs to. It is possible to exclude sample ``i`` from
        any test set (i.e. include sample ``i`` in every training set) by
        setting ``test_fold[i]`` equal to -1.

    Examples
    --------
    >>> from sklearn.model_selection import PredefinedSplit
    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
    >>> y = np.array([0, 0, 1, 1])
    >>> test_fold = [0, 1, -1, 1]
    >>> ps = PredefinedSplit(test_fold)
    >>> ps.get_n_splits()
    2
    >>> print(ps)       # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
    PredefinedSplit(test_fold=array([ 0,  1, -1,  1]))
    >>> for train_index, test_index in ps.split():
    ...    print("TRAIN:", train_index, "TEST:", test_index)
    ...    X_train, X_test = X[train_index], X[test_index]
    ...    y_train, y_test = y[train_index], y[test_index]
    TRAIN: [1 2 3] TEST: [0]
    TRAIN: [0 2] TEST: [1 3]
    c         C  s_   t  j | d t  j ƒ|  _ t |  j ƒ |  _ t  j |  j ƒ |  _ |  j |  j d k |  _ d  S(   NR2   iÿÿÿÿ(   R'   RD   RG   t	   test_foldR   Rc   t   unique_folds(   R%   RÕ   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR&   j  s    c         c  s[   t  j t |  j ƒ ƒ } x< |  j ƒ  D]. } | t  j | ƒ } | | } | | f Vq% Wd S(   s  Generate indices to split data into training and test set.

        Parameters
        ----------
        X : object
            Always ignored, exists for compatibility.

        y : object
            Always ignored, exists for compatibility.

        groups : object
            Always ignored, exists for compatibility.

        Yields
        ------
        train : ndarray
            The training set indices for that split.

        test : ndarray
            The testing set indices for that split.
        N(   R'   R(   Rd   RÕ   R)   R*   (   R%   R+   R,   R-   t   indR/   R0   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR1   p  s
    
c         c  sd   x] |  j  D]R } t j |  j | k ƒ d } t j t |  j ƒ d t j ƒ} t | | <| Vq
 Wd S(   s3   Generates boolean masks corresponding to test sets.i    R2   N(   RÖ   R'   Ri   RÕ   R4   Rd   R5   R6   (   R%   Rr   R/   R7   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR)   Œ  s
    !
c         C  s   t  |  j ƒ S(   sÚ  Returns the number of splitting iterations in the cross-validator

        Parameters
        ----------
        X : object
            Always ignored, exists for compatibility.

        y : object
            Always ignored, exists for compatibility.

        groups : object
            Always ignored, exists for compatibility.

        Returns
        -------
        n_splits : int
            Returns the number of splitting iterations in the cross-validator.
        (   Rd   RÖ   (   R%   R+   R,   R-   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR9   ”  s    N(   R=   R>   R?   R&   R@   R1   R)   R9   (    (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR"   G  s
   !		t   _CVIterableWrapperc           B  s;   e  Z d  Z d „  Z d d d d „ Z d d d d „ Z RS(   s5   Wrapper class for old style cv objects and iterables.c         C  s   t  | ƒ |  _ d  S(   N(   t   listRœ   (   R%   Rœ   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR&   ¬  s    c         C  s   t  |  j ƒ S(   sÚ  Returns the number of splitting iterations in the cross-validator

        Parameters
        ----------
        X : object
            Always ignored, exists for compatibility.

        y : object
            Always ignored, exists for compatibility.

        groups : object
            Always ignored, exists for compatibility.

        Returns
        -------
        n_splits : int
            Returns the number of splitting iterations in the cross-validator.
        (   Rd   Rœ   (   R%   R+   R,   R-   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR9   ¯  s    c         c  s)   x" |  j  D] \ } } | | f Vq
 Wd S(   s  Generate indices to split data into training and test set.

        Parameters
        ----------
        X : object
            Always ignored, exists for compatibility.

        y : object
            Always ignored, exists for compatibility.

        groups : object
            Always ignored, exists for compatibility.

        Yields
        ------
        train : ndarray
            The training set indices for that split.

        test : ndarray
            The testing set indices for that split.
        N(   Rœ   (   R%   R+   R,   R-   RT   RU   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR1   Ä  s    N(   R=   R>   R?   R&   R@   R9   R1   (    (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyRØ   ª  s   	RV   c         C  sÝ   |  d k s |  d k r1 t j t t ƒ d }  n  t |  t j ƒ r~ | rq | d k	 rq t | ƒ d k rq t	 |  ƒ St
 |  ƒ Sn  t |  d ƒ s t |  t ƒ rÙ t |  t ƒ s¼ t |  t ƒ rÏ t d |  ƒ ‚ n  t |  ƒ S|  S(	   s4  Input checker utility for building a cross-validator

    Parameters
    ----------
    cv : int, cross-validation generator or an iterable, optional
        Determines the cross-validation splitting strategy.
        Possible inputs for cv are:

        - None, to use the default 3-fold cross-validation,
        - integer, to specify the number of folds.
        - :term:`CV splitter`,
        - An iterable yielding (train, test) splits as arrays of indices.

        For integer/None inputs, if classifier is True and ``y`` is either
        binary or multiclass, :class:`StratifiedKFold` is used. In all other
        cases, :class:`KFold` is used.

        Refer :ref:`User Guide <cross_validation>` for the various
        cross-validation strategies that can be used here.

        .. versionchanged:: 0.20
            ``cv`` default value will change from 3-fold to 5-fold in v0.22.

    y : array-like, optional
        The target variable for supervised learning problems.

    classifier : boolean, optional, default False
        Whether the task is a classification task, in which case
        stratified KFold will be used.

    Returns
    -------
    checked_cv : a cross-validator instance.
        The return value is a cross-validator which generates the train/test
        splits via the ``split`` method.
    RV   i   Rs   Rt   R1   si   Expected cv as an integer, cross-validation object (from sklearn.model_selection) or an iterable. Got %s.N(   Rs   Rt   (   R@   RW   RV   t
   CV_WARNINGRY   RI   RJ   RK   R   R    R   t   hasattrRÂ   t   IterableRB   RØ   (   Rœ   R,   t
   classifier(    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR$   Þ  s    %	

c            sü  t  |  ƒ } | d k r' t d ƒ ‚ n  | j d d ƒ } | j d d ƒ } | j d d ƒ } | j d d ƒ } | j d t ƒ } | r  t d	 t | ƒ ƒ ‚ n  | d k rÔ d } | d k	 rÔ t j d
 t	 ƒ qÔ n  | d k rõ | d k rõ d } n  t
 |  Œ  }  | t k rx| d k	 r(t d ƒ ‚ n  t |  d ƒ } t | | | ƒ \ }	 }
 t j |	 ƒ ‰ t j |	 |	 |
 ƒ ‰  n^ | d k	 rt } n t } | d | d | d | ƒ } t | j d |  d d | ƒ ƒ \ ‰ ‰  t t j ‡  ‡ f d †  |  Dƒ ƒ ƒ S(   sõ  Split arrays or matrices into random train and test subsets

    Quick utility that wraps input validation and
    ``next(ShuffleSplit().split(X, y))`` and application to input data
    into a single call for splitting (and optionally subsampling) data in a
    oneliner.

    Read more in the :ref:`User Guide <cross_validation>`.

    Parameters
    ----------
    *arrays : sequence of indexables with same length / shape[0]
        Allowed inputs are lists, numpy arrays, scipy-sparse
        matrices or pandas dataframes.

    test_size : float, int or None, optional (default=0.25)
        If float, should be between 0.0 and 1.0 and represent the proportion
        of the dataset to include in the test split. If int, represents the
        absolute number of test samples. If None, the value is set to the
        complement of the train size. By default, the value is set to 0.25.
        The default will change in version 0.21. It will remain 0.25 only
        if ``train_size`` is unspecified, otherwise it will complement
        the specified ``train_size``.

    train_size : float, int, or None, (default=None)
        If float, should be between 0.0 and 1.0 and represent the
        proportion of the dataset to include in the train split. If
        int, represents the absolute number of train samples. If None,
        the value is automatically set to the complement of the test size.

    random_state : int, RandomState instance or None, optional (default=None)
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`.

    shuffle : boolean, optional (default=True)
        Whether or not to shuffle the data before splitting. If shuffle=False
        then stratify must be None.

    stratify : array-like or None (default=None)
        If not None, data is split in a stratified fashion, using this as
        the class labels.

    Returns
    -------
    splitting : list, length=2 * len(arrays)
        List containing train-test split of inputs.

        .. versionadded:: 0.16
            If the input is sparse, the output will be a
            ``scipy.sparse.csr_matrix``. Else, output type is the same as the
            input type.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.model_selection import train_test_split
    >>> X, y = np.arange(10).reshape((5, 2)), range(5)
    >>> X
    array([[0, 1],
           [2, 3],
           [4, 5],
           [6, 7],
           [8, 9]])
    >>> list(y)
    [0, 1, 2, 3, 4]

    >>> X_train, X_test, y_train, y_test = train_test_split(
    ...     X, y, test_size=0.33, random_state=42)
    ...
    >>> X_train
    array([[4, 5],
           [0, 1],
           [6, 7]])
    >>> y_train
    [2, 0, 3]
    >>> X_test
    array([[2, 3],
           [8, 9]])
    >>> y_test
    [1, 4]

    >>> train_test_split(y, shuffle=False)
    [[0, 1, 2], [3, 4]]

    i    s$   At least one array required as inputR   R    R¢   RQ   t   stratifyRP   s   Invalid parameters passed: %ssY   From version 0.21, test_size will always complement train_size unless both are specified.g      Ð?s@   Stratified train/test split is not implemented for shuffle=FalseR+   R,   c         3  s-   |  ]# } t  | ˆ ƒ t  | ˆ  ƒ f Vq d  S(   N(   R
   (   R—   t   a(   RU   RT   (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pys	   <genexpr>¢  s   N(   Rd   RB   t   popR@   R6   RN   RÂ   RW   RV   RY   R   R`   R   R¤   R'   R(   R!   R   t   nextR1   RÙ   R   t   from_iterable(   t   arrayst   optionst   n_arraysR   R¢   RQ   RÞ   RP   RS   R¦   R§   t   CVClassRœ   (    (   RU   RT   s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR#     sH    X					(c      	   C  s[  |  j  } t | j d | j ƒ } t | ƒ } | t j k rE g  } nL t g  | j j ƒ  D]0 } | j d k rX | j	 | j
 k rX | j ^ qX ƒ } |  j  j } t ƒ  } x’ | D]Š } t j d t ƒ zV t j d t ƒ  }	 t |  | d  ƒ }
 Wd  QXt |	 ƒ r|	 d j t k rw­ n  Wd  t j j d ƒ X|
 | | <q­ Wd | t | d t | ƒ ƒf S(   Nt   deprecated_originalR%   t   alwayst   recordi    s   %s(%s)t   offset(   t	   __class__t   getattrR&   R   t   objectt   sortedt
   parametersR¼   t   nameRÃ   t   VAR_KEYWORDR=   t   dictRW   t   simplefiltert   DeprecationWarningt   catch_warningsR6   R@   Rd   t   categoryt   filtersRà   R   (   R%   R‡   t   initt   init_signaturet   argsRC   t
   class_namet   paramsR˜   t   wR½   (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyR;   ª  s&    		3	
(E   R?   t
   __future__R    R   RW   t	   itertoolsR   R   t   mathR   R   RJ   t   abcR   R   t   numpyR'   t   utilsR   R	   R
   t   utils.validationR   R   R   t   utils.multiclassR   t   externals.sixR   t   externals.six.movesR   t   utils.fixesR   R   R   RÜ   t   baseR   t   __all__RX   RÚ   R   R   R   RH   R   R   R    R   R   R   R–   R   R   RŸ   R   R   RÀ   R!   R¡   R¤   R"   RØ   R@   R`   R$   R#   t   __test__R;   (    (    (    s=   lib/python2.7/site-packages/sklearn/model_selection/_split.pyt   <module>   sz   		BJGRZxœkl‚e44JRs	D¥	+	,c4:		