此篇记录在python实操过程中遇见的问题以及最终的解决办法

# 导入常用的包

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import scipy as si

%matplotlib inline

写在最前

善用帮助：函数和文档
多用函数和循环，减少代码量

读写文件的几种方式

python
- f.read();f.readline();f.readlines()
- f.write();f.writelines();f.close()
- f.seek();f.tell();
numpy
- numpy.loadtxt();numpy.savetxt()
- numpy.tofile();numpy.fromfile()
- numpy.genfromtxt();numpy.recfromcsv()
- numpy.save();numpy.load() .npy
- numpy.savez();numpy.loadz() .npz
- numpy.savez_compressed()
scipy
- scipy.io.savemat();scipy.io.loadmat()
pandas
- pd.read_csv()
- pd.read_hdf()
- pd.read_excel()

读取图像

scipy
- scipy.misc.imread()
matplotlib.pyplot
- plt.imread()
scikit-image:
- skimage.io

1	# help(np.loadtxt)

速查表

Bokeh:Bokeh 是 Python 的交互式可视图库，用于生成在浏览器里显示的大规模数据集高性能可视图。
jupyter notebook:Jupyter将代码与文本封装为三种类型的单元格：Markdown、代码与 NBConvert
Keras:Keras是强大、易用的深度学习库，基于Theano和TensorFlow提供了高阶神经网络API，用于开发和评估深度学习模型。
Matplotlib:Matplotlib 是 Python 的二维绘图库，用于生成符合出版质量或跨平台交互环境的各类图形
Numpy:Numpy 是 Python 数据科学计算的核心库，提供了高性能的多维数组对象及处理数组的工具。
Pandas:Pandas 是基于 Numpy 创建的 Python 库，为 Python 提供了易于使用的数据结构和数据分析工具。
Skit-Learn:Scikit-learn 是开源的 Python 库，通过统一的界面实现机器学习、预处理、交叉验证及可视化算法。
Seaborn:Seaborn 是基于 matplotlib 开发的高阶Python 数据可视图库，用于绘制优雅、美观的统计图形。
导入数据：大多数情况下，都是用 Numpy 或 Pandas 导入数据。

numpy

numpy 索引

切片和索引
高级索引
- 整数数组索引
- 布尔索引
- 花式索引

import numpy as np
 
a = np.arange(10)
s = slice(2,7,2)   # 从索引 2 开始到索引 7 停止，间隔为2
print (a[s])
[2 4 6]

 
a = np.arange(10)
print(a[2:])
[2 3 4 5 6 7 8 9]


a = np.arange(10)  # [0 1 2 3 4 5 6 7 8 9]
print(a[2:5])
[2 3 4]


a = np.array([[1,2,3],[3,4,5],[4,5,6]])
print(a)
# 从某个索引处开始切割
print('从数组索引 a[1:] 处开始切割')
print(a[1:])
[[1 2 3]
 [3 4 5]
 [4 5 6]]
从数组索引 a[1:] 处开始切割
[[3 4 5]
 [4 5 6]]


import numpy as np
 
a = np.array([[1,2,3],[3,4,5],[4,5,6]]) 
print(a,'\n')
print (a[...,1],'\n')   # 第2列元素
print (a[1,...],'\n')   # 第2行元素
print (a[...,1:])  # 第2列及剩下的所有元素

[[1 2 3]
 [3 4 5]
 [4 5 6]] 

[2 4 5] 

[3 4 5] 

[[2 3]
 [4 5]
 [5 6]]

 
x = np.array([[1,  2],  [3,  4],  [5,  6]])
print(x,'\n')
y = x[[0,1,2],  [0,1,0]]  
print (y)

[[1 2]
 [3 4]
 [5 6]] 

[1 4 5]

 
x = np.array([[  0,  1,  2],[  3,  4,  5],[  6,  7,  8],[  9,  10,  11]])  
print ('我们的数组是：' )
print (x)
print ('\n')
rows = np.array([[0,0],[3,3]]) 
cols = np.array([[0,2],[0,2]]) 
y = x[rows,cols]  
print  ('这个数组的四个角元素是：')
print (y)

我们的数组是：
[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]


这个数组的四个角元素是：
[[ 0  2]
 [ 9 11]]

 
a = np.array([[1,2,3], [4,5,6],[7,8,9]])
b = a[1:3, 1:3]
c = a[1:3,[1,2]]
d = a[...,1:]
print(b)
print(c)
print(d)

[[5 6]
 [8 9]]
[[5 6]
 [8 9]]
[[2 3]
 [5 6]
 [8 9]]

 
x = np.array([[  0,  1,  2],[  3,  4,  5],[  6,  7,  8],[  9,  10,  11]])  
print ('我们的数组是：')
print (x)
print ('\n')
# 现在我们会打印出大于 5 的元素  
print  ('大于 5 的元素是：')
print (x[x >  5])

我们的数组是：
[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]


大于 5 的元素是：
[ 6  7  8  9 10 11]

 
a = np.array([np.nan,  1,2,np.nan,3,4,5]) 
print(a,'\n')
print (a[~np.isnan(a)])

[nan  1.  2. nan  3.  4.  5.] 

[1. 2. 3. 4. 5.]

 
a = np.array([1,  2+6j,  5,  3.5+5j]) 

print (a[np.iscomplex(a)])

[2. +6.j 3.5+5.j]

a = np.arange(3*4*4).reshape(3,4,4)
a

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11],
        [12, 13, 14, 15]],

       [[16, 17, 18, 19],
        [20, 21, 22, 23],
        [24, 25, 26, 27],
        [28, 29, 30, 31]],

       [[32, 33, 34, 35],
        [36, 37, 38, 39],
        [40, 41, 42, 43],
        [44, 45, 46, 47]]])

1 2	b = a[1] b

array([[16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

1 2	b[1:3,1:3]=True b

array([[16, 17, 18, 19],
       [20,  1,  1, 23],
       [24,  1,  1, 27],
       [28, 29, 30, 31]])

mask = np.empty((256,256))


for i in range(1,257):
    for j in range(1,257):
        if ((i-127.5)**2 + (j-98)**2) <= (53.8**2):
            mask[i-1,j-1]=True
          
        else:
            mask[i-1,j-1]=False


import matplotlib.pyplot as plt


plt.imshow(mask)

1	<matplotlib.image.AxesImage at 0x17ab71641f0>

mask

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

1 2	temp = np.full((256,256),5) plt.imshow(temp)

1	<matplotlib.image.AxesImage at 0x17ab718ddf0>

1	mask = mask.astype('bool')

mask

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

1 2	print(np.shape(temp[mask])) print(np.pi53.8*2)

1 2	(9092,) 9093.151440256439

1	plt.imshow(mask)

1	<matplotlib.image.AxesImage at 0x17755787100>

创建数组array和矩阵matrix

numpy中二者几乎等效，且array更高效

import numpy as np
a = np.array([2,3,4])
print(a)
print(a.shape,a.ndim,a.dtype,a.dtype.name,a.itemsize,a.size,a.data)

1 2	[2 3 4] (3,) 1 int32 int32 4 3 <memory at 0x000002B1B0350C40>

1
2
3

b = np.arange(12).reshape(4,3)
b
# array, zeros, zeros_like, ones, ones_like, empty, empty_like, arange, linspace,

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

基本运算

广播运算
元素乘法
矩阵乘法

# 广播运算

# 对应元素进行运算：
a = np.array( [20,30,40,50] )
b = np.arange( 4 )
print(b)
c = a-b
print(c)
print(b**2)
print(b+5)

print(10*np.sin(a))

a<35

[0 1 2 3]
[20 29 38 47]
[0 1 4 9]
[5 6 7 8]
[ 9.12945251 -9.88031624  7.4511316  -2.62374854]





array([ True,  True, False, False])

1 2	# 对应元素相乘 a*b

1	array([ 0, 30, 80, 150])

# 矩阵乘法
A = np.array( [[1,1],
               [0,1]] )
B = np.array( [[2,0],
               [3,4]] )
print(A * B)                      # elementwise product: 点乘
print(A @ B)               # matrix product
print(A.dot(B))                  # another matrix product

[[2 0]
 [0 4]]
[[5 4]
 [3 4]]
[[5 4]
 [3 4]]

1 2	temp = np.arange(4).reshape(2,2) temp

1 2	array([[0, 1], [2, 3]])

1	temp.mean()

1.5

1	temp.mean(axis=0)

1	array([1., 2.])

1 2	temp.mean(axis=1) # a.max,a.min,a.var,a.std,...

1	array([0.5, 2.5])

1 2	temp1 = np.arange(8).reshape(2,2,2) temp1

array([[[0, 1],
        [2, 3]],

       [[4, 5],
        [6, 7]]])

1	temp1[0,0,0]

1	temp1[1,0,0]

1 2	temp1.mean(axis=0) # 即沿着轴向取平均

1 2	array([[2., 3.], [4., 5.]])

1	temp1.mean(axis=1)

1 2	array([[1., 2.], [5., 6.]])

1	temp1.mean(axis=2)

1 2	array([[0.5, 2.5], [4.5, 6.5]])

# 广播运算

线性代数

import numpy as np
a = np.array([[1.0, 2.0], [3.0, 4.0]])
print('a=\n',a)

print("a.T=\n",a.transpose()) # 转置
print("a'=\n",np.linalg.inv(a)) # 求逆矩阵

u = np.eye(2) # unit 2x2 matrix; "eye" represents "I"
print('I=\n',u) # 单位矩阵
j = np.array([[0.0, -1.0], [1.0, 0.0]])
print('j=\n',j)
print(j @ j)   # matrix product

print(np.trace(u))  # trace :矩阵的迹
y = np.array([[5.], [7.]])
print('result=\n',np.linalg.solve(a, y)) # 求线性方程

print('eig=\n',np.linalg.eig(j)) #

a=
 [[1. 2.]
 [3. 4.]]
a.T=
 [[1. 3.]
 [2. 4.]]
a'=
 [[-2.   1. ]
 [ 1.5 -0.5]]
I=
 [[1. 0.]
 [0. 1.]]
j=
 [[ 0. -1.]
 [ 1.  0.]]
[[-1.  0.]
 [ 0. -1.]]
2.0
result=
 [[-3.]
 [ 4.]]
eig=
 (array([0.+1.j, 0.-1.j]), array([[0.70710678+0.j        , 0.70710678-0.j        ],
       [0.        -0.70710678j, 0.        +0.70710678j]]))

python图像处理

1.NumPy +SciPy
2.scikit-image

scipy 图像处理

from scipy import misc
import matplotlib.pyplot as plt

face = misc.face()
plt.imshow(face)
plt.show()

几何变换

from scipy import ndimage

face = misc.face(gray=True)
plt.imshow(face,cmap='gray')

shifted_face = ndimage.shift(face, (50, 50))
shifted_face2 = ndimage.shift(face, (50, 50), mode='nearest')
rotated_face = ndimage.rotate(face, 30)
cropped_face = face[50:-50, 50:-50]
zoomed_face = ndimage.zoom(face, 2)
zoomed_face.shape

1	(1536, 2048)

plt.figure(figsize=(10,12))
plt.subplot(321)  
plt.imshow(face, cmap='gray')  
plt.axis('off')

plt.subplot(322)  
plt.imshow(shifted_face, cmap=plt.cm.gray)  
plt.axis('off')

plt.subplot(323)  
plt.imshow(shifted_face2, cmap=plt.cm.gray)  
plt.axis('off')

plt.subplot(324)  
plt.imshow(rotated_face, cmap=plt.cm.gray)  
plt.axis('off')

plt.subplot(325)  
plt.imshow(cropped_face, cmap=plt.cm.gray)  
plt.axis('off')

plt.subplot(326)  
plt.imshow(zoomed_face, cmap=plt.cm.gray)  
plt.axis('off')

1	(-0.5, 2047.5, 1535.5, -0.5)

# Load some data
from scipy import misc
face = misc.face(gray=True)

# Apply a variety of transformations
from scipy import ndimage
from matplotlib import pyplot as plt
shifted_face = ndimage.shift(face, (50, 50))
shifted_face2 = ndimage.shift(face, (50, 50), mode='nearest')
rotated_face = ndimage.rotate(face, 30)
cropped_face = face[50:-50, 50:-50]
zoomed_face = ndimage.zoom(face, 2)
zoomed_face.shape

plt.figure(figsize=(15, 3))
plt.subplot(151)
plt.imshow(shifted_face, cmap=plt.cm.gray)
plt.axis('off')

plt.subplot(152)
plt.imshow(shifted_face2, cmap=plt.cm.gray)
plt.axis('off')

plt.subplot(153)
plt.imshow(rotated_face, cmap=plt.cm.gray)
plt.axis('off')

plt.subplot(154)
plt.imshow(cropped_face, cmap=plt.cm.gray)
plt.axis('off')

plt.subplot(155)
plt.imshow(zoomed_face, cmap=plt.cm.gray)
plt.axis('off')

plt.subplots_adjust(wspace=.05, left=.01, bottom=.01, right=.99, top=.99)

plt.show()

图像过滤

# Load some data
from scipy import misc
face = misc.face(gray=True)
face = face[:512, -512:]  # crop out square on right

# Apply a variety of filters
from scipy import ndimage
from scipy import signal
from matplotlib import pyplot as plt

import numpy as np
noisy_face = np.copy(face).astype(np.float)
noisy_face += face.std() * 0.5 * np.random.standard_normal(face.shape)
blurred_face = ndimage.gaussian_filter(noisy_face, sigma=3)
median_face = ndimage.median_filter(noisy_face, size=5)
wiener_face = signal.wiener(noisy_face, (5, 5))

plt.figure(figsize=(12, 3.5))
plt.subplot(141)
plt.imshow(noisy_face, cmap=plt.cm.gray)
plt.axis('off')
plt.title('noisy')

plt.subplot(142)
plt.imshow(blurred_face, cmap=plt.cm.gray)
plt.axis('off')
plt.title('Gaussian filter')

plt.subplot(143)
plt.imshow(median_face, cmap=plt.cm.gray)
plt.axis('off')
plt.title('median filter')

plt.subplot(144)
plt.imshow(wiener_face, cmap=plt.cm.gray)
plt.title('Wiener filter')
plt.axis('off')

plt.subplots_adjust(wspace=.05, left=.01, bottom=.01, right=.99, top=.99)

plt.show()

数学形态学运算

二值形态学
1. Erosion 腐蚀； scipy.ndimage.binary_erosion()
2. Dilation 膨胀； scipy.ndimage.binary_dilation()
3. Opening 开运算； scipy.ndimage.binary_opening()
4. Closing 闭运算； scipy.ndimage.binary_closing()
灰度形态学

图像处理：scikit-image

import numpy as np
check = np.zeros((9, 9))
check[::2, 1::2] = 1
check[1::2, ::2] = 1
import matplotlib.pyplot as plt
plt.imshow(check, cmap='gray', interpolation='nearest')

1	<matplotlib.image.AxesImage at 0x17ab70e3af0>

1 2	import skimage from skimage import data # most functions are in subpackages

camera = data.camera()
camera.dtype
plt.imshow(camera)
camera.shape

from skimage import filters
filtered_camera = filters.gaussian(camera, 1)
type(filtered_camera)

1	numpy.ndarray

图像处理：scipy.ndimage

from scipy import misc
f = misc.face()
misc.imsave('face.png', f) # uses the Image module (PIL)

import matplotlib.pyplot as plt
plt.imshow(f)
plt.show()

---------------------------------------------------------------------------

AttributeError                            Traceback (most recent call last)

<ipython-input-8-d3ddc3b982b6> in <module>
      1 from scipy import misc
      2 f = misc.face()
----> 3 misc.imsave('face.png', f) # uses the Image module (PIL)
      4 
      5 import matplotlib.pyplot as plt


AttributeError: module 'scipy.misc' has no attribute 'imsave'

from scipy import misc
from scipy import ndimage

face = misc.face()
misc.imsave('face.png', face) # First we need to create the PNG file

face = misc.imread('face.png')
print(type(face) )   

print(face.shape, face.dtype)

face.tofile('face1.raw') # Create raw file
face_from_raw = np.fromfile('face1.raw', dtype=np.uint8)
face_from_raw.shape

face_from_raw.shape = (768, 1024, 3)

1	face_memmap = np.memmap('face.raw', dtype=np.uint8, shape=(768, 1024, 3))

for i in range(10):
    im = np.random.randint(0, 256, 10000).reshape((100, 100))
    misc.imsave('random_%02d.png' % i, im)
from glob import glob
filelist = glob('random*.png')
filelist.sort()

1
2
3

f = misc.face(gray=True)  # retrieve a grayscale image
import matplotlib.pyplot as plt
plt.imshow(f, cmap=plt.cm.gray)

plt.imshow(f, cmap=plt.cm.gray, vmin=30, vmax=200)      

# Remove axes and ticks
plt.axis('off')

1	plt.contour(f, [50, 200])

1	face = misc.face(gray=True)

1	face[0, 40]

1	face[10:13, 20:23]

1	face[100:120] = 255

1	lx, ly = face.shape

1 2	X, Y = np.ogrid[0:lx, 0:ly] mask = (X - lx / 2) 2 + (Y - ly / 2) 2 > lx * ly / 4

1	face[mask] = 0

1	face[range(400), range(400)] = 255

1	plt.imshow(face, cmap=plt.cm.gray)

Pandas

creation

Series
- by passing a list of values, letting pandas create a defualt integer index
- pd.Series(value,index=list)
DataFrame
- by passing a NumPy array
- by passing a dict of objects
- df.dtypes

viewing data

- df.head();df.tail();df.index;df.columns.
- df.to_numpy():numpy array have one dtype for the entire array, while pandas dataframe have one dtype per column.
- df.describe(): show a quick statistic summary of your data
- df.T : transppsing your data
- df.sort_index(): sorting by an axis
- df.sort_values(): sorting by values

selection

1. selection by label: df.loc[]; df.at[]
2. selection by position: df.iloc[]; df.iat[]
3. Boolean indexing
    - isin()
4. setting values
    - by indexes
    - by label
    - by position

missing data

import numpy as np
import pandas as pd

s = pd.Series([1,3,5,np.nan,6,8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

1 2	dates = pd.date_range('20130101',periods=6) dates

1
2
3

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

1 2	df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=list('ABCD')) df

	A	B	C	D
2013-01-01	0.024195	0.192700	-1.662034	-1.575218
2013-01-02	1.772569	2.583528	-1.617492	1.292106
2013-01-03	1.404551	0.511434	-0.130941	-0.180790
2013-01-04	-1.088395	-0.959165	0.931658	1.973182
2013-01-05	-0.733413	1.216093	0.712507	0.151266
2013-01-06	-0.583591	-0.677252	-0.045332	1.818146

df2 = pd.DataFrame({'A': 1.,
'B': pd.Timestamp('20130102'),
'C': pd.Series(1, index=list(range(4)), dtype='float32'),
 'D': np.array([3] * 4, dtype='int32'),
'E': pd.Categorical(["test", "train", "test", "train"]),
'F': 'foo'})
df2

	A	B	C	D	E	F
0	1.0	2013-01-02	1.0	3	test	foo
1	1.0	2013-01-02	1.0	3	train	foo
2	1.0	2013-01-02	1.0	3	test	foo
3	1.0	2013-01-02	1.0	3	train	foo

1	df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

df

	A	B	C	D
2013-01-01	0.024195	0.192700	-1.662034	-1.575218
2013-01-02	1.772569	2.583528	-1.617492	1.292106
2013-01-03	1.404551	0.511434	-0.130941	-0.180790
2013-01-04	-1.088395	-0.959165	0.931658	1.973182
2013-01-05	-0.733413	1.216093	0.712507	0.151266
2013-01-06	-0.583591	-0.677252	-0.045332	1.818146

df.head()

	A	B	C	D
2013-01-01	0.024195	0.192700	-1.662034	-1.575218
2013-01-02	1.772569	2.583528	-1.617492	1.292106
2013-01-03	1.404551	0.511434	-0.130941	-0.180790
2013-01-04	-1.088395	-0.959165	0.931658	1.973182
2013-01-05	-0.733413	1.216093	0.712507	0.151266

1	df.tail(3)

	A	B	C	D
2013-01-04	-1.088395	-0.959165	0.931658	1.973182
2013-01-05	-0.733413	1.216093	0.712507	0.151266
2013-01-06	-0.583591	-0.677252	-0.045332	1.818146

df.index

1
2
3

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

1	df.columns

1	Index(['A', 'B', 'C', 'D'], dtype='object')

1	df.to_numpy()

array([[ 0.02419536,  0.1927002 , -1.66203397, -1.57521826],
       [ 1.77256918,  2.58352792, -1.61749227,  1.29210627],
       [ 1.40455066,  0.51143439, -0.13094149, -0.18079014],
       [-1.08839452, -0.95916509,  0.93165768,  1.97318197],
       [-0.73341308,  1.21609308,  0.71250658,  0.15126616],
       [-0.58359064, -0.67725157, -0.04533231,  1.81814613]])

1	df2.to_numpy()

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

1	df.describe()

	A	B	C	D
count	6.000000	6.000000	6.000000	6.000000
mean	0.132653	0.477890	-0.301939	0.579782
std	1.189356	1.300815	1.115929	1.370302
min	-1.088395	-0.959165	-1.662034	-1.575218
25%	-0.695957	-0.459764	-1.245855	-0.097776
50%	-0.279698	0.352067	-0.088137	0.721686
75%	1.059462	1.039928	0.523047	1.686636
max	1.772569	2.583528	0.931658	1.973182

df.T

	2013-01-01	2013-01-02	2013-01-03	2013-01-04	2013-01-05	2013-01-06
A	0.024195	1.772569	1.404551	-1.088395	-0.733413	-0.583591
B	0.192700	2.583528	0.511434	-0.959165	1.216093	-0.677252
C	-1.662034	-1.617492	-0.130941	0.931658	0.712507	-0.045332
D	-1.575218	1.292106	-0.180790	1.973182	0.151266	1.818146

df

	A	B	C	D
2013-01-01	0.024195	0.192700	-1.662034	-1.575218
2013-01-02	1.772569	2.583528	-1.617492	1.292106
2013-01-03	1.404551	0.511434	-0.130941	-0.180790
2013-01-04	-1.088395	-0.959165	0.931658	1.973182
2013-01-05	-0.733413	1.216093	0.712507	0.151266
2013-01-06	-0.583591	-0.677252	-0.045332	1.818146

1	df.sort_index(axis=1,ascending=False)

	D	C	B	A
2013-01-01	-1.575218	-1.662034	0.192700	0.024195
2013-01-02	1.292106	-1.617492	2.583528	1.772569
2013-01-03	-0.180790	-0.130941	0.511434	1.404551
2013-01-04	1.973182	0.931658	-0.959165	-1.088395
2013-01-05	0.151266	0.712507	1.216093	-0.733413
2013-01-06	1.818146	-0.045332	-0.677252	-0.583591

1	df.sort_values(by='B')

	A	B	C	D
2013-01-04	-1.088395	-0.959165	0.931658	1.973182
2013-01-06	-0.583591	-0.677252	-0.045332	1.818146
2013-01-01	0.024195	0.192700	-1.662034	-1.575218
2013-01-03	1.404551	0.511434	-0.130941	-0.180790
2013-01-05	-0.733413	1.216093	0.712507	0.151266
2013-01-02	1.772569	2.583528	-1.617492	1.292106

df['A']

2013-01-01    0.024195
2013-01-02    1.772569
2013-01-03    1.404551
2013-01-04   -1.088395
2013-01-05   -0.733413
2013-01-06   -0.583591
Freq: D, Name: A, dtype: float64

df[0:3]

	A	B	C	D
2013-01-01	0.024195	0.192700	-1.662034	-1.575218
2013-01-02	1.772569	2.583528	-1.617492	1.292106
2013-01-03	1.404551	0.511434	-0.130941	-0.180790

1	df['20130102':'20130104']

	A	B	C	D
2013-01-02	1.772569	2.583528	-1.617492	1.292106
2013-01-03	1.404551	0.511434	-0.130941	-0.180790
2013-01-04	-1.088395	-0.959165	0.931658	1.973182

1	df.loc[dates[0]]

A    0.024195
B    0.192700
C   -1.662034
D   -1.575218
Name: 2013-01-01 00:00:00, dtype: float64

1	df.loc[:,['A','B']]

	A	B
2013-01-01	0.024195	0.192700
2013-01-02	1.772569	2.583528
2013-01-03	1.404551	0.511434
2013-01-04	-1.088395	-0.959165
2013-01-05	-0.733413	1.216093
2013-01-06	-0.583591	-0.677252

1	df.loc['20130102':'20130104',['A','B']]

	A	B
2013-01-02	1.772569	2.583528
2013-01-03	1.404551	0.511434
2013-01-04	-1.088395	-0.959165

1	df.loc['20130102', ['A', 'B']]

1
2
3

A    1.772569
B    2.583528
Name: 2013-01-02 00:00:00, dtype: float64

1	df.loc[dates[0], 'A']

1	0.024195360467579575

1	df.at[dates[0], 'A']

1	0.024195360467579575

1	df.iloc[3]

A   -1.088395
B   -0.959165
C    0.931658
D    1.973182
Name: 2013-01-04 00:00:00, dtype: float64

1	df.iloc[3:5, 0:2]

	A	B
2013-01-04	-1.088395	-0.959165
2013-01-05	-0.733413	1.216093

1	df.iloc[[1, 2, 4], [0, 2]]

	A	C
2013-01-02	1.772569	-1.617492
2013-01-03	1.404551	-0.130941
2013-01-05	-0.733413	0.712507

1	df.iloc[1:3, :]

	A	B	C	D
2013-01-02	1.772569	2.583528	-1.617492	1.292106
2013-01-03	1.404551	0.511434	-0.130941	-0.180790

1	df.iloc[:, 1:3]

	B	C
2013-01-01	0.192700	-1.662034
2013-01-02	2.583528	-1.617492
2013-01-03	0.511434	-0.130941
2013-01-04	-0.959165	0.931658
2013-01-05	1.216093	0.712507
2013-01-06	-0.677252	-0.045332

pandas 索引

中括号索引

df[0:3]

df[0:1]

df['A']

df.A

1	df['A':'D']

1	df[[0:3],[0:3]]

中括号只能索引行与单列

单列的索引，只能是自定义标签，且可以用df.columns_name表示。

行的索引可以用两套索引，但必须带冒号，且顾头不顾尾（按python惯例）。

按标签索引：.loc[[],[]]

同矩阵的索引类似，顾头顾尾，行列的部分的中括号可以酌情省略。

.iat[[],[]],同.iloc[[],[]]，但速度更快。

按位置索引：.iloc[[],[]]

同矩阵的索引类似，顾头不顾尾，行列的部分的中括号可以酌情省略。

.iat[[],[]],同.iloc[[],[]]，但速度更快。

E 布尔索引

Scipy

scipy.special

1
2
3

import numpy as np
import matplotlib.pyplot as plt
import scipy as cp

1	from scipy import constants as con

con.pi

1	3.141592653589793

1 2	import math math.pi

1	3.141592653589793

1	con.golden

1	1.618033988749895

1	con.golden_ratio

1	1.618033988749895

con.c

1	299792458.0

1	con.speed_of_light

1	299792458.0

1	con.speed_of_sound

340.5

con.mu_0

1	1.25663706212e-06

1	con.epsilon_0

1	8.8541878128e-12

1	# help(con.physical_constants)

1	con.physical_constants['alpha particle mass']

1	(6.6446573357e-27, 'kg', 2e-36)

con.value

1	<function scipy.constants.codata.value(key)>

1	from scipy import special as scs

1	scs.geterr()

{'singular': 'ignore',
 'underflow': 'ignore',
 'overflow': 'ignore',
 'slow': 'ignore',
 'loss': 'ignore',
 'no_result': 'ignore',
 'domain': 'ignore',
 'arg': 'ignore',
 'other': 'ignore'}

v = 5
z = np.arange(0,100,0.1)
j = scs.jv(v,z)
plt.plot(z,j)

1	[<matplotlib.lines.Line2D at 0x2b1af1c5280>]

m = 0
v = 10
x = np.linspace(-1,1,100)
a = scs.lpmv(m,v,x)
plt.plot(x,a)

1	[<matplotlib.lines.Line2D at 0x2b1af1d21c0>]

1
2

scipy.optimize 练习

在数学最优化中，Rosenbrock函数是一个用来测试最优化算法性能的非凸函数，由Howard Harry Rosenbrock在1960年提出。也称为Rosenbrock山谷或Rosenbrock香蕉函数，也简称为香蕉函数。

$f(x,y) = (1-x)^2 +100(y-x^2)^2$

Rosenbrock函数的每个等高线大致呈抛物线形，其全域最小值也位在抛物线形的山谷中（香蕉型山谷）。很容易找到这个山谷，但由于山谷内的值变化不大，要找到全域的最小值相当困难。其全域最小值位于 (x,y)=(1,1)点，数值为f(x,y)=0。有时第二项的系数不同，但不会影响全域最小值的位置。

1 2	def fun_rosenbrock(x): return np.array([10 * (x[1] - x[0]**2), (1 - x[0])])

import numpy as np
from scipy.optimize import least_squares
x0_rosenbrock = np.array([2, 2])
res_1 = least_squares(fun_rosenbrock, x0_rosenbrock)
print(res_1.x)

print(res_1.cost)

print(res_1.optimality)
print(res_1)

[1. 1.]
9.866924291084687e-30
8.892886493421953e-14
 active_mask: array([0., 0.])
        cost: 9.866924291084687e-30
         fun: array([4.44089210e-15, 1.11022302e-16])
        grad: array([-8.89288649e-14,  4.44089210e-14])
         jac: array([[-20.00000015,  10.        ],
       [ -1.        ,   0.        ]])
     message: '`gtol` termination condition is satisfied.'
        nfev: 3
        njev: 3
  optimality: 8.892886493421953e-14
      status: 1
     success: True
           x: array([1., 1.])

def jac_rosenbrock(x):
    return np.array([
        [-20 * x[0], 10],
        [-1, 0]])

res_2 = least_squares(fun_rosenbrock, x0_rosenbrock, jac_rosenbrock,
                      bounds=([-np.inf, 1.5], np.inf))
print(res_2.x)

print(res_2.cost)

print(res_2.optimality)
print(res_2)

[1.22437075 1.5       ]
0.025213093946805685
1.5885401433157753e-07
 active_mask: array([ 0, -1])
        cost: 0.025213093946805685
         fun: array([ 0.00916269, -0.22437075])
        grad: array([1.58854014e-07, 9.16268991e-02])
         jac: array([[-24.48741498,  10.        ],
       [ -1.        ,   0.        ]])
     message: '`ftol` termination condition is satisfied.'
        nfev: 9
        njev: 9
  optimality: 1.5885401433157753e-07
      status: 2
     success: True
           x: array([1.22437075, 1.5       ])

from scipy.optimize import leastsq
def func(x):
    return 2*(x-3)**2+1
leastsq(func, 0)

1	(array([2.99999999]), 1)

python 插值：scipy.interpolate

1 2	from scipy import interpolate as si import matplotlib.pyplot as plt

# 1维插值 interp1d
# 样条插值阶数可选

x = np.linspace(0, 10, num=11, endpoint=True)
y = np.cos(-x**2/9.0)
f = si.interp1d(x, y)
f2 = si.interp1d(x, y, kind='cubic')
f3 = si.interp1d(x,y,kind=5)
f4 = si.interp1d(x,y,kind='nearest')
f5 = si.interp1d(x,y,kind='previous')
f6 = si.interp1d(x,y,kind='next')

xnew = np.linspace(0, 10, num=41, endpoint=True)

plt.figure(figsize=(12,9))
plt.subplot(221)
plt.plot(x, y, 'o', xnew, f(xnew), '-', xnew, f2(xnew), '--')
plt.legend(['data', 'linear', 'cubic'], loc='best')

plt.subplot(222)
plt.plot(x, y, 'o', xnew, f3(xnew), '-', xnew, f4(xnew), '--')
plt.legend(['data', 'Spline5', 'nearest'], loc='best')

plt.subplot(223)
plt.plot(x, y, 'o', xnew, f5(xnew), '-', xnew, f6(xnew), '--')
plt.legend(['data', 'previous', 'next'], loc='best')

plt.tight_layout()
plt.show()

# 多变量插值：griddata

def func(x, y):
    return x*(1-x)*np.cos(4*np.pi*x) * np.sin(4*np.pi*y**2)**2

grid_x, grid_y = np.mgrid[0:1:100j, 0:1:200j]

points = np.random.rand(1000, 2)
values = func(points[:,0], points[:,1])

grid_z0 = si.griddata(points, values, (grid_x, grid_y), method='nearest')
grid_z1 = si.griddata(points, values, (grid_x, grid_y), method='linear')
grid_z2 = si.griddata(points, values, (grid_x, grid_y), method='cubic')

plt.subplot(221)
plt.imshow(func(grid_x, grid_y).T, extent=(0,1,0,1), origin='lower')
plt.plot(points[:,0], points[:,1], 'k.', ms=1)
plt.title('Original')
plt.subplot(222)
plt.imshow(grid_z0.T, extent=(0,1,0,1), origin='lower')
plt.title('Nearest')
plt.subplot(223)
plt.imshow(grid_z1.T, extent=(0,1,0,1), origin='lower')
plt.title('Linear')
plt.subplot(224)
plt.imshow(grid_z2.T, extent=(0,1,0,1), origin='lower')
plt.title('Cubic')
plt.gcf().set_size_inches(6, 6)
plt.show()

# 样条插值
# 1. 计算样条表达式
# 2. 计算插值

x = np.arange(0, 2*np.pi+np.pi/4, 2*np.pi/8)
y = np.sin(x)
tck = si.splrep(x, y, s=0)
xnew = np.arange(0, 2*np.pi, np.pi/50)
ynew = si.splev(xnew, tck, der=0)

plt.figure()
plt.plot(x, y, 'x', xnew, ynew, xnew, np.sin(xnew), x, y, 'b')
plt.legend(['Linear', 'Cubic Spline', 'True'])
plt.axis([-0.05, 6.33, -1.05, 1.05])
plt.title('Cubic-spline interpolation')
plt.show()

yder = si.splev(xnew, tck, der=1)
plt.figure()
plt.plot(xnew, yder, xnew, np.cos(xnew),'--')
plt.legend(['Cubic Spline', 'True'])
plt.axis([-0.05, 6.33, -1.05, 1.05])
plt.title('Derivative estimation from spline')
plt.show()

外推法:Scipy.UnivariateSpline

from scipy import interpolate
import numpy as np
x1=np.linspace(0,10,20)
y1=np.sin(x1)

sx1=np.linspace(0,12,100)
func1=interpolate.UnivariateSpline(x1,y1,s=0)#强制通过所有点
sy1=func1(sx1)

import matplotlib.pyplot as plt
plt.plot(x1,y1,'o')
plt.plot(sx1,sy1)
plt.show()

import numpy as np
from scipy import interpolate
x2=np.linspace(0,20,200)
y2=np.sin(x2)+np.random.normal(loc=0,scale=1,size=len(x2))*0.2

sx2=np.linspace(0,22,2000)
func2=interpolate.UnivariateSpline(x2,y2,s=8)
sy2=func2(sx2)

import matplotlib.pyplot as plt
plt.plot(x2,y2,'.')
plt.plot(sx2,sy2)
plt.show()

# 移动平均算法：一种趋势算法。

def waitui(x,w,n,lr):
    '''
    x:data,list_like;
    w:width
    n:number
    lr:left or right
    '''
    for k in range(n):
        if lr == 'l':
            x0 = w*x[int((w-1)/2)-1] - sum(x[:int(w-1)]) 
            x.insert(0,x0)
        if lr == 'r':
            x0 = w*x[int(0-(w-1)/2)] - sum(x[-int(w-1):])
            x.append(x0)
          
    return x

1
2
3

x2=np.linspace(0,20,20)
y2=2*x2+3 +np.random.normal(loc=0,scale=1,size=len(x2))
plt.plot(x2,y2,'.')

1	[<matplotlib.lines.Line2D at 0x1854963fc40>]

y2

array([ 2.13802457,  5.61154911,  6.05997104,  9.16678043,  9.60155538,
       13.1974428 , 15.90229562, 16.02519421, 20.04754014, 22.14220077,
       26.35090094, 26.50786393, 28.57640137, 30.3048343 , 31.92003191,
       33.60209416, 37.30656326, 39.41982125, 41.34787236, 43.59627556])

ls = [1,2,3,4,5]
ls = list(y2)
temp = waitui(ls,5,9,'r')
plt.plot(temp)

1	[<matplotlib.lines.Line2D at 0x18546c3ffa0>]

temp

[2.1380245690246995,
 5.611549114103028,
 6.059971043082472,
 9.166780428711341,
 9.601555382236675,
 13.197442799881992,
 15.90229562319444,
 16.025194212906964,
 20.04754014250857,
 22.142200770196332,
 26.35090093724794,
 26.50786392690083,
 28.576401370363442,
 30.304834299193807,
 31.920031914422612,
 33.60209415559048,
 37.306563260759276,
 39.41982124700761,
 41.347872357070834,
 43.596275556223034,
 45.84467875537524,
 48.09308195452745,
 50.34148515367964,
 59.33509795028846,
 54.83829155198407,
 97.55795233587588,
 59.33509795028846,
 14.367033967244538,
 347.1307074417702,
 -446.55562185895644,
 1761.3763197085043,
 -3909.0965485533447]

1	plt.plot(temp)

1	[<matplotlib.lines.Line2D at 0x18549450640>]

1	3*ls[0] - sum(ls[:2])

ls[:2]

[-13, -3]

1	import scipy

1	scipy.interpolate.Akima1DInterpolator.extrapolate??

1
2
3

[1;31mType:[0m        member_descriptor
[1;31mString form:[0m <member 'extrapolate' of '_PPolyBase' objects>
[1;31mDocstring:[0m   <no docstring>

1	scipy.__version__

'1.5.0'

import matplotlib.pyplot as plt
from scipy import interpolate
x = np.arange(0, 10)
y = np.exp(-x/3.0)
f = interpolate.interp1d(x, y,kind=5,fill_value='extrapolate')

xnew = np.arange(0, 13, 0.1)
ynew = f(xnew)   # use interpolation function returned by `interp1d`
plt.plot(x, y, 'o', xnew, ynew, '-')
plt.show()

光谱平滑 :Scipy.signal.savgol_fliter

移动平滑
Savitzky-Golay卷积平滑

SG平滑算法；多项式平滑算法; 卷积平滑。
- 是一种在时域内基于局域多项式最小二乘法拟合的滤波方法。这种滤波器最大的特点在于在滤除噪声的同时可以确保信号的形状、宽度不变。
S-G滤波器

python 最小二乘相关函数

非线性最小二乘

least_squares

变量要有边界

目标函数，非线性最小二乘里面称为残差(测量值与预测值之差)

当我们用一个模型来描述现实中的一系列数据时，模型的预测结果与实际的测量结果总会存在一定偏差，这一偏差就称为残差。非线性最小二乘的目的就是，调整模型的参数，使得总的残差最小。
残差的平方和是最佳的目标函数

损失函数，减少异常值的影响，多种方式可选
代价函数，$0.5\sum \rho(f(x)^2)$
高斯-牛顿法（Gauss-Newton method）
列文伯格-马夸尔特法（Levenberg-Marquardt method）

在给定变量范围内，找到代价函数的局部最小值

参数

必须给定参数

函数
初值

其它参数可选

对于非线性模型的最小二乘问题是没有线性解析解的
对没有解析解的问题，通常都是用逐渐逼近的思想去解决。而迭代法就正是这种思想的一种重要实现手段。
最小二乘，广义上来说其实是机器学习中的平方损失函数
线性最小二乘有闭式解，可用最小二乘法求解，也可采用迭代法（如梯度下降）求解；非线性最小二乘没有闭式解，只能采用迭代法求解。

线性最小二乘

nnls;This is a wrapper for a FORTRAN non-negative least squares solver.
lsq_linear;Solve a linear least-squares problem with bounds on the variables.

from scipy.optimize import nnls

A = np.array([[1, 0], [1, 0], [0, 1]])
b = np.array([2, 1, 1])
nnls(A, b)

1	(array([1.5, 1. ]), 0.7071067811865475)

from scipy.sparse import rand
from scipy.optimize import lsq_linear

np.random.seed(0)

m = 20000
n = 10000

A = rand(m, n, density=1e-4)
b = np.random.randn(m)

lb = np.random.randn(n)
ub = lb + 1

res = lsq_linear(A, b, bounds=(lb, ub), lsmr_tol='auto', verbose=1)
res

The relative change of the cost function is less than `tol`.
Number of iterations 15, initial cost 1.5178e+04, final cost 1.1150e+04, first-order optimality 7.15e-08.





 active_mask: array([ 1, -1, -1, ..., -1,  0,  1])
        cost: 11150.33129344828
         fun: array([ 0.00219242, -0.10699277, -0.70574952, ...,  0.76716404,
       -0.40184636,  0.67399679])
     message: 'The relative change of the cost function is less than `tol`.'
         nit: 15
  optimality: 7.146292795202272e-08
      status: 2
     success: True
           x: array([ 0.54987404, -0.05812073,  1.34915899, ...,  1.36754417,
        0.        , -0.96004356])

曲线拟合

curve_fit;Use non-linear least squares to fit a function, f, to data.

输入值：

函数：
xdata
ydata
其他参数可选

返回值：

最优参数；此时，残差平方和最小
参数的估计方差；

To compute one standard deviation errors on the parameters use perr = np.sqrt(np.diag(pcov)).

1 2	import matplotlib.pyplot as plt from scipy.optimize import curve_fit

1 2	def func(x, a, b, c): return a * np.exp(-b * x) + c

xdata = np.linspace(0, 4, 50)
y = func(xdata, 2.5, 1.3, 0.5)
np.random.seed(1729)
y_noise = 0.2 * np.random.normal(size=xdata.size)
ydata = y + y_noise
plt.plot(xdata, ydata, 'b-', label='data')

popt, pcov = curve_fit(func, xdata, ydata)
popt

plt.plot(xdata, func(xdata, *popt), 'r-',
         label='fit: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt))

popt, pcov = curve_fit(func, xdata, ydata, bounds=(0, [3., 1., 0.5]))
popt

plt.plot(xdata, func(xdata, *popt), 'g--',
         label='fit: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt))

plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.show()

1 2	xdata = np.linspace(-10, 10, num=20) ydata = f(xdata) + np.random.randn(xdata.size)

1	plt.plot(xdata,ydata)

1	[<matplotlib.lines.Line2D at 0x2b1b02ed490>]

1 2	def f2(x, a, b): return ax2 + bnp.sin(x)

1
2
3

guess = [2, 2]
params, params_covariance = optimize.curve_fit(f2, xdata, ydata, guess)
params

1	array([1.00278316, 9.10173369])

1 2	a = params[0] b = params[1]

1 2	plt.plot(xdata,ydata) plt.plot(x,f2(x,a,b))

1	[<matplotlib.lines.Line2D at 0x2b1b034b700>]

Matplotlib

CSDN_matplotlib详解

1	%matplotlib inline

Colormap reference

中文说明见CSDN

记住一个参数：

plt.get_cmap('str')

Reference for colormaps included with Matplotlib.

A reversed version of each of these colormaps is available by appending
_r to the name, e.g., viridis_r.

See :doc:/tutorials/colors/colormaps for an in-depth discussion about
colormaps, including colorblind-friendliness.

import numpy as np
import matplotlib.pyplot as plt


cmaps = [('Perceptually Uniform Sequential', [
            'viridis', 'plasma', 'inferno', 'magma', 'cividis']),
         ('Sequential', [
            'Greys', 'Purples', 'Blues', 'Greens', 'Oranges', 'Reds',
            'YlOrBr', 'YlOrRd', 'OrRd', 'PuRd', 'RdPu', 'BuPu',
            'GnBu', 'PuBu', 'YlGnBu', 'PuBuGn', 'BuGn', 'YlGn']),
         ('Sequential (2)', [
            'binary', 'gist_yarg', 'gist_gray', 'gray', 'bone', 'pink',
            'spring', 'summer', 'autumn', 'winter', 'cool', 'Wistia',
            'hot', 'afmhot', 'gist_heat', 'copper']),
         ('Diverging', [
            'PiYG', 'PRGn', 'BrBG', 'PuOr', 'RdGy', 'RdBu',
            'RdYlBu', 'RdYlGn', 'Spectral', 'coolwarm', 'bwr', 'seismic']),
         ('Cyclic', ['twilight', 'twilight_shifted', 'hsv']),
         ('Qualitative', [
            'Pastel1', 'Pastel2', 'Paired', 'Accent',
            'Dark2', 'Set1', 'Set2', 'Set3',
            'tab10', 'tab20', 'tab20b', 'tab20c']),
         ('Miscellaneous', [
            'flag', 'prism', 'ocean', 'gist_earth', 'terrain', 'gist_stern',
            'gnuplot', 'gnuplot2', 'CMRmap', 'cubehelix', 'brg',
            'gist_rainbow', 'rainbow', 'jet', 'nipy_spectral', 'gist_ncar'])]


gradient = np.linspace(0, 1, 256)
gradient = np.vstack((gradient, gradient))


def plot_color_gradients(cmap_category, cmap_list):
    # Create figure and adjust figure height to number of colormaps
    nrows = len(cmap_list)
    figh = 0.35 + 0.15 + (nrows + (nrows-1)*0.1)*0.22
    fig, axes = plt.subplots(nrows=nrows, figsize=(6.4, figh))
    fig.subplots_adjust(top=1-.35/figh, bottom=.15/figh, left=0.2, right=0.99)

    axes[0].set_title(cmap_category + ' colormaps', fontsize=14)

    for ax, name in zip(axes, cmap_list):
        ax.imshow(gradient, aspect='auto', cmap=plt.get_cmap(name))
        ax.text(-.01, .5, name, va='center', ha='right', fontsize=10,
                transform=ax.transAxes)

    # Turn off *all* ticks & spines, not just the ones with colormaps.
    for ax in axes:
        ax.set_axis_off()


for cmap_category, cmap_list in cmaps:
    plot_color_gradients(cmap_category, cmap_list)

plt.show()

References
“”””””””””

The use of the following functions, methods, classes and modules is shown
in this example:

import matplotlib
matplotlib.colors
matplotlib.axes.Axes.imshow
matplotlib.figure.Figure.text
matplotlib.axes.Axes.set_axis_off

1	<function matplotlib.axes._base._AxesBase.set_axis_off(self)>

import matplotlib.pyplot as plt
import numpy as np
 
x = np.linspace(-3, 3, 50)
y = 2 * x + 1
plt.figure(num=1, figsize=(8, 5))
plt.plot(x, y)
 
ax = plt.gca()
ax.spines['right'].set_color('none')
ax.spines['top'].set_color('none')
ax.xaxis.set_ticks_position('bottom')
ax.spines['bottom'].set_position(('data', 0))
ax.yaxis.set_ticks_position('left')
ax.spines['left'].set_position(('data', 0))
# 添加点
x0 = 1
y0 = 2 * x0 + 1
plt.scatter(x0, y0, s=50, color='b')
plt.plot([x0, x0], [y0, 0], 'k--', lw=2.5)
# 方法一
plt.annotate(r'$2x+1=%s$' % y0, xy=(x0, y0),
             xycoords='data', xytext=(+30, -30),
             textcoords='offset points', fontsize=16,
             arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=.2'))
# 方法二
plt.text(-3.7, 3, r'$This\ is\ the\ some\ text. \mu\ \sigma_i\ \alpha_t$',
         fontdict={'size': 16, 'color': 'r'})
plt.show()

import matplotlib.pyplot as plt
 
fig = plt.figure()
x = [1, 2, 3, 4, 5, 6, 7]
y = [1, 3, 4, 2, 5, 8, 6]
 
left, bottom, width, height = 0.1, 0.1, 0.8, 0.8
ax1 = fig.add_axes([left, bottom, width, height])
ax1.plot(x, y, 'r')
ax1.set_xlabel('x')
ax1.set_ylabel('y')
ax1.set_title('title')
 
left, bottom, width, height = 0.2, 0.6, 0.25, 0.25
ax2 = fig.add_axes([left, bottom, width, height])
ax2.plot(y, x, 'r')
ax2.set_xlabel('x')
ax2.set_ylabel('y')
ax2.set_title('title inside 1')
 
plt.axes([.6, .2, .25, .25])
plt.plot(y[::-1], x, 'g')
plt.xlabel('x')
plt.ylabel('y')
plt.title('inside 2')
 
plt.show()

import matplotlib.pyplot as plt
import numpy as np
 
x = np.arange(0, 10, 0.1)
y1 = 0.05 * x ** 2
y2 = -1 * y1
 
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.plot(x, y1, 'g-')
ax2.plot(x, y2, 'b-')
 
ax1.set_xlabel('X data')
ax1.set_ylabel('Y1,color=g')
ax2.set_ylabel('Y2,color=b')
 
plt.show()

import matplotlib.pyplot as plt
import numpy as np
from matplotlib import animation
 
fig, ax = plt.subplots()
 
x = np.arange(0, 2 * np.pi, 0.01)
line, = ax.plot(x, np.sin(x))
 
 
def animate(i):
    line.set_ydata(np.sin(x + i / 100))
    return line,
 
 
def init():
    line.set_ydata(np.sin(x))
    return line,
 
 
ani = animation.FuncAnimation(fig=fig, func=animate, frames=100,
                              init_func=init, interval=20, blit=False)
 
plt.show()

Matplotlib 三维图

1 2	from mpl_toolkits.mplot3d import axes3d from matplotlib import cm

fig = plt.figure()
ax = fig.gca(projection='3d')
X, Y, Z = axes3d.get_test_data(0.05)
ax.plot_surface(X, Y, Z, rstride=8, cstride=8, alpha=0.9, cmap=cm.winter_r)
# 绘制等高线
cset = ax.contourf(X, Y, Z, zdir='z', offset=-100, cmap=cm.coolwarm)
cset = ax.contourf(X, Y, Z, zdir='x', offset=-40, cmap=cm.coolwarm)
cset = ax.contourf(X, Y, Z, zdir='y', offset=40, cmap=cm.coolwarm)

ax.set_xlabel('X')
ax.set_xlim(-40, 40)
ax.set_ylabel('Y')
ax.set_ylim(-40, 40)
ax.set_zlabel('Z')
ax.set_zlim(-100, 100)

plt.show()

1	from scipy import optimize # 优化是寻找最小化或等式的数值解的问题

1
2
3

# 1. 寻找标量函数的最小值
def f(x):
    return x**2 + 10*np.sin(x)

1 2	x = np.arange(-10,10,0.1) plt.plot(x,f(x)) # 当有%matplotlib inline时，可以省略plt.show()

1	[<matplotlib.lines.Line2D at 0x2b1b28790d0>]

1 2	#找到这个函数的最小值的常用有效方式是从给定的初始点开始进行一个梯度下降 optimize.fmin_bfgs(f, 0)

Optimization terminated successfully.
         Current function value: -7.945823
         Iterations: 5
         Function evaluations: 12
         Gradient evaluations: 6





array([-1.3064401])

1
2

#这个方法的一个可能问题是，如果这个函数有一些局部最低点，算法可能找到这些局部最低点而不是全局最低点，这取决于初始点
optimize.fmin_bfgs(f, 3, disp=0)

1	array([3.83746709])

# 暴力算法找到全局最优点
# 缺点是，当网格变大时，程序会变慢，scipy.optimize.anneal()
# OpenOpt、IPOPT、PyGMO和PyEvolve是关于全局优化的一些有用的包。
grid = (-10, 10, 0.1)
xmin_global = optimize.brute(f, (grid,))
xmin_global

1	array([-1.30641113])

1
2
3

#要找出局部最低点，让我们用scipy.optimize.fminbound将变量限制在(0,10)区间
xmin_local = optimize.fminbound(f, 0, 10)  
xmin_local

1	3.8374671194983834

1	# 2. 寻找标量函数的根

1 2	root = optimize.fsolve(f, 1) # 我们的最初猜想是1 root

1	array([0.])

1 2	root2 = optimize.fsolve(f, -2.5) root2

1	array([-2.47948183])

1	[<matplotlib.lines.Line2D at 0x2b1b034b700>]

直方图

import numpy as np
import matplotlib.pyplot as plt

# Fixing random state for reproducibility
np.random.seed(19680801)

mu, sigma = 100, 15
x = mu + sigma * np.random.randn(10000)

# the histogram of the data
n, bins, patches = plt.hist(x, np.arange(40,170,0.5), density=True, facecolor='g', alpha=0.75)


plt.xlabel('Smarts')
plt.ylabel('Probability')
# plt.title('Histogram of IQ')
# plt.text(60, .025, r'$\mu=100,\ \sigma=15$')
# plt.xlim(40, 160)
# plt.ylim(0, 0.03)
# plt.grid(True)
plt.show()

n.shape

(259,)

1	len(np.arange(40,170,0.5))

1	plt.bar(np.arange(40,170,0.5)[:-1],n*0.5)

1	<BarContainer object of 259 artists>

1	import pandas as pd

1 2	temp = pd.read_csv(r'C:\Users\hp\Desktop\P59.csv') temp

	OBJECTID	pointid	grid_code	RASTERVALU
0	751834	751834	0.316	0.316
1	751833	751833	0.301	0.301
2	751832	751832	0.277	0.277
3	639012	639012	0.276	0.276
4	639013	639013	0.272	0.272
...	...	...	...	...
1043770	880209	880209	0.001	0.001
1043771	880212	880212	0.001	0.001
1043772	904890	904890	0.001	0.001
1043773	905145	905145	0.001	0.001
1043774	1005453	1005453	0.001	0.001

1043775 rows × 4 columns

1 2	temp = temp[temp.RASTERVALU<0.2] temp

	OBJECTID	pointid	grid_code	RASTERVALU
46	638757	638757	0.199	0.199
47	775890	775890	0.198	0.198
48	170770	170770	0.195	0.195
49	638886	638886	0.194	0.194
50	638756	638756	0.193	0.193
...	...	...	...	...
1043770	880209	880209	0.001	0.001
1043771	880212	880212	0.001	0.001
1043772	904890	904890	0.001	0.001
1043773	905145	905145	0.001	0.001
1043774	1005453	1005453	0.001	0.001

1043729 rows × 4 columns

1	x = temp.RASTERVALU

1	n,bins,patches = plt.hist(x,bins=np.arange(0,0.2,0.001),density=True)

1	plt.bar(bins[:-1],n*0.001,width=0.001)

1	<BarContainer object of 199 artists>

plt.style

1	plt.style.use('seaborn')

1	print(plt.style.available)

['Solarize_Light2', '_classic_test_patch', 'bmh', 'classic', 'dark_background', 'fast', 'fivethirtyeight', 'ggplot', 'grayscale', 'seaborn', 'seaborn-bright', 'seaborn-colorblind', 'seaborn-dark', 'seaborn-dark-palette', 'seaborn-darkgrid', 'seaborn-deep', 'seaborn-muted', 'seaborn-notebook', 'seaborn-paper', 'seaborn-pastel', 'seaborn-poster', 'seaborn-talk', 'seaborn-ticks', 'seaborn-white', 'seaborn-whitegrid', 'tableau-colorblind10']

enumerate
生成式
生成器

容器数据类型

字符串
列表

注意多重列表
注意：乘法可重复列表元素

元组：不可修改元素，便于维护，进程安全，创建更快。
集合：和数学上一致，大括号
字典：键值对
构造器语法？
推导式语法？

类和对象
类是对象的蓝图和模板，而对象是类的实例

私有属性和方法：两个下划线开头。
单下划线开头表示属性是受保护的。

正则表达式

速查表：https://www.debuggex.com/cheatsheet/regex/python
30分钟入门教程：https://deerchao.cn/tutorials/regex/regex.htm
在线验证网站：https://tool.oschina.net/regex/

正则表示即匹配规则

处理字符串
爬取网页
元字符
限定符
精准匹配

1 2	import re # - re.findall()

# 匹配电话号
str1 = '我的电话号是：13888888888；他的电话号是：13666666666'
result = re.findall('\d\d\d\d\d\d\d\d\d\d\d',str1)
print(result)

result = re.findall('\d{11}',str1)
print(result)

1 2	['13888888888', '13666666666'] ['13888888888', '13666666666']

from scipy.signal import savgol_filter

# savgol_filter(data,n,k)
# 窗口宽度n:为奇数，越大越平滑
# 多项式阶数k:小于n,越小越平滑

用sympy求解方程

示例

1	1-4/(1.52.5*2)

1	0.5733333333333333

1	(0.5/2.5)**2+0.05

1	0.09000000000000001

0.91*0.43

0.3913

1 2	from sympy import * %matplotlib inline

1
2
3

x, y, z, t = symbols('x y z t')
k, m, n = symbols('k m n', integer=True)
f, g, h = symbols('f g h', cls=Function)

1 2	f = Rational(3,2)pi + exp(Ix) / (x**2 + y) f

$\displaystyle \frac{3 \pi}{2} + \frac{e^{i x}}{x^{2} + y}$

1 2	x = Symbol('x') exp(I*x).subs(x,pi).evalf()

$\displaystyle -1.0$

1 2	g = exp(I*x) g

$\displaystyle e^{i x}$

1 2	h = f + g h

$\displaystyle e^{i x} + \frac{3 \pi}{2} + \frac{e^{i x}}{x^{2} + y}$

1	exp(I*x).subs(x,pi)

$\displaystyle -1$

expr.args

(x, 2*y)

1	exp(pi * sqrt(163)).evalf() # 计算数值解，括号内参数为有效数字位，默认15？

$\displaystyle 2.62537412640769 \cdot 10^{17}$

1	exp(pi * sqrt(163))

$\displaystyle e^{\sqrt{163} \pi}$

1	latex(S('2*4+10',evaluate=False)) # Latex格式

1	'2 \\cdot 4 + 10'

1	latex(exp(x*2)/2)

1	'\\frac{e^{2 x}}{2}'

1	((x+y)*2 (x+1)).expand() # 展开

$\displaystyle x^{3} + 2 x^{2} y + x^{2} + x y^{2} + 2 x y + y^{2}$

1	((x+y)*2 (x+1))

$\displaystyle \left(x + 1\right) \left(x + y\right)^{2}$

1 2	a = 1/x + (x*sin(x) - 1)/x a

$\displaystyle \frac{x \sin{\left(x \right)} - 1}{x} + \frac{1}{x}$

1	simplify(a)

$\displaystyle \sin{\left(x \right)}$

1	solve(Eq(x*3 + 2x*2 + 4x + 8, 0), x)

1	[-2, -2I, 2I]

1	solve(x*3 + 2x*2 + 4x + 8, x)

1	[-2, -2I, 2I]

1	solve([Eq(x + 5y, 2), Eq(-3x + 6*y, 15)], [x, y])

1	{x: -3, y: 1}

1	solve([x + 5y - 2, -3x + 6*y - 15], [x, y])

1	{x: -3, y: 1}

y=Function('y')
n=Symbol('n', integer=True)
f=y(n)-2*y(n-1)-5*y(n-2)
f

$\displaystyle y{\left(n \right)} - 5 y{\left(n - 2 \right)} - 2 y{\left(n - 1 \right)}$

1	rsolve(f,y(n),[1,4])

$\displaystyle \left(\frac{1}{2} - \frac{\sqrt{6}}{4}\right) \left(1 - \sqrt{6}\right)^{n} + \left(\frac{1}{2} + \frac{\sqrt{6}}{4}\right) \left(1 + \sqrt{6}\right)^{n}$

1
2
3

a, b = symbols('a b')
s = Sum(6*n**2 + 2**n, (n, a, b))
s

$\displaystyle \sum_{n=a}^{b} \left(2^{n} + 6 n^{2}\right)$

s.doit()

$\displaystyle - 2^{a} + 2^{b + 1} - 2 a^{3} + 3 a^{2} - a + 2 b^{3} + 3 b^{2} + b$

1	product(n*(n+1), (n, 1, b))

$\displaystyle {2}^{\left(b\right)} b!$

1
2
3

f=Function('f')
ex=Eq(f(1/x)-3*f(x),x)
ex.subs(x,2)

$\displaystyle f{\left(\frac{1}{2} \right)} - 3 f{\left(2 \right)} = 2$

1	ex.subs(x,Rational(1,2))

$\displaystyle - 3 f{\left(\frac{1}{2} \right)} + f{\left(2 \right)} = \frac{1}{2}$

1	solve([f(Rational(1,2))-3f(2)-2,-3f(Rational(1,2))+f(2)-Rational(1,2)])

1	[{f(1/2): -7/16, f(2): -13/16}]

1	limit((sin(x)-x)/x**3, x, 0)

$\displaystyle - \frac{1}{6}$

1	(1/cos(x)).series(x, 0, 6)

$\displaystyle 1 + \frac{x^{2}}{2} + \frac{5 x^{4}}{24} + O\left(x^{6}\right)$

1	diff(cos(x2)2 / (1+x), x)

$\displaystyle - \frac{4 x \sin{\left(x^{2} \right)} \cos{\left(x^{2} \right)}}{x + 1} - \frac{\cos^{2}{\left(x^{2} \right)}}{\left(x + 1\right)^{2}}$

1	integrate(x*2 cos(x), x)

$\displaystyle x^{2} \sin{\left(x \right)} + 2 x \cos{\left(x \right)} - 2 \sin{\left(x \right)}$

1	integrate(x*2 cos(x), (x, 0, pi/2))

$\displaystyle -2 + \frac{\pi^{2}}{4}$

1 2	f = Function('f') dsolve(Eq(Derivative(f(x),x,x) + 9*f(x), 1), f(x))

$\displaystyle f{\left(x \right)} = C_{1} \sin{\left(3 x \right)} + C_{2} \cos{\left(3 x \right)} + \frac{1}{9}$

1 2	f = Function("f") Eq(f(x).diff(x, x) + 9*f(x), 1)

$\displaystyle 9 f{\left(x \right)} + \frac{d^{2}}{d x^{2}} f{\left(x \right)} = 1$

1	dsolve(_, f(x))

$\displaystyle f{\left(x \right)} = C_{1} \sin{\left(3 x \right)} + C_{2} \cos{\left(3 x \right)} + \frac{1}{9}$

def planck(w, T):
    h = 6.626e-34
    c = 3.0e+8
    k = 1.38e-23

    a = 2.0*h*c**2
    b = h*c/(w*k*T)
    intensity = a/ ( (w**5) * (np.exp(b) - 1.0) )
    return intensity

h = 6.626e-34
c = 3.0e+8
k = 1.38e-23
T = symbols('T')
lamda = symbols('lamda')
k, h, c = symbols('k h c', integer=True)
P = symbols('P', cls=Function)
P = 2*h*c**2/((lamda**5) * (exp(h*c/(lamda*k*T)) - 1))
P

$\displaystyle \frac{2 c^{2} h}{\lambda^{5} \left(e^{\frac{c h}{T k \lambda}} - 1\right)}$

1 2	result = P.subs(c,3.0e+8).subs(h,6.626e-34).subs(k,1.38e-23).subs(lamda,500e-9).subs(T,5777) result.evalf(5)

$\displaystyle 2.6237 \cdot 10^{13}$

1	# solve(Eq(P.subs(c,3.0e+8).subs(h,6.626e-34).subs(k,1.38e-23).subs(lamda,500e-9), 2.6237e13), T)

t = symbols('t',cls=Function)
Ra = symbols('I')
t = h*c/(lamda*k*ln((2*h*c**2/(Ra*lamda**5))+1))
t

$\displaystyle \frac{c h}{k \lambda \log{\left(1 + \frac{2 c^{2} h}{I \lambda^{5}} \right)}}$

1	t.subs(c,3.0e+8).subs(h,6.626e-34).subs(k,1.38e-23).subs(lamda,500e-9).subs(Ra,2.6237e13)

$\displaystyle 5777.00277916166$

r0 = symbols('r0',cls=Function)
w = symbols('w')
r0 = (1-sqrt(1-w))/(1+sqrt(1-w))
r0

$\displaystyle \frac{1 - \sqrt{1 - w}}{\sqrt{1 - w} + 1}$

1	solve(Eq(r0,0),w)

[0]

P = symbols('P',cls=Function)
b,c,g = symbols('b c g')
P = 1+b*cos(pi*g/180)+c*(1.5*cos(pi*g/180)**2-0.5)
P

$\displaystyle b \cos{\left(\frac{\pi g}{180} \right)} + c \left(1.5 \cos^{2}{\left(\frac{\pi g}{180} \right)} - 0.5\right) + 1$

1	cos(x).subs(x,pi).evalf()

$\displaystyle -1.0$

1	solve(Eq(P.subs(b,-0.17).subs(c,0.7),1),g)

1	[48.3981512828419, 120.135426949009, 239.864573050991, 311.601848717158]

1 2	def r1(w): return (1-np.sqrt(1-w))/(1+np.sqrt(1-w))

1
2
3

f = symbols('f',cls=Function)
f = r0 + P
f

---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

<ipython-input-76-b0e877824aad> in <module>
      1 f = symbols('f',cls=Function)
----> 2 f = r0 + P
      3 f


TypeError: unsupported operand type(s) for +: 'function' and 'Add'

1	import sympy as sy

# 1.表达式与表达式求值

# 多项式求解:
# 定义变量：
x = sy.Symbol('x')
fx = 5*x+4
print(fx)
# 使用evalf函数传值
y1 = fx.evalf(subs={x:6})
print(y1)

1 2	5*x + 4 34.0000000000000

#多元表达式
x=sy.Symbol('x')
y=sy.Symbol('y')
fx=x*x+y*y
result=fx.evalf(subs={x:3,y:4})
print(result)

1	25.0000000000000

# 2.函数方程求解

#解方程 有限解
#定义变量
x=sy.Symbol('x')
y=sy.Symbol('y')
fx=x*3+9
#可求解直接给出解向量
print(sy.solve(fx,x))
print(sy.solve(Eq(fx,1),x))

1 2	[-3] [-8/3]

#解方程 无穷多解
#定义变量
x=sy.Symbol('x')
y=sy.Symbol('y')
fx=x*3+y**2
#得到是x与y的关系式，
print(sy.solve(fx,x,y))

1	[(-y**2/3, y)]

#解方程组
#定义变量
x=sy.Symbol('x')
y=sy.Symbol('y')
f1=x+y-3
f2=x-y+5
print(sy.solve([f1,f2],[x,y]))
# print(sy.solve(Eqs([f1,f2],[0,0]),[x,y]))

1	{x: -1, y: 4}

# 求和
#定义变量
n=sy.Symbol('n')
f=2*n
#前面参数放函数，后面放变量的变化范围
s=sy.summation(f,(n,1,100))
print(s)

# 解带有求和式的方程

#解释一下，i可以看做是循环变量，就是x自己加五次
#先定义变量，再写出方程
x=sy.Symbol('x')
i=sy.Symbol('i')
f=sy.summation(x,(i,1,5))+10*x-15
result=sy.solve(f,x)
print(result)

[1]

# 求极限
#求极限使用limit方法
#定义变量与函数
x=sy.Symbol('x')
f1=sy.sin(x)/x
f2=(1+x)**(1/x)
f3=(1+1/x)**x
#三个参数是 函数，变量，趋向值
lim1=sy.limit(f1,x,0)
lim2=sy.limit(f2,x,0)
lim3=sy.limit(f3,x,sy.oo) # 趋于无穷大
print(lim1,lim2,lim3)

1 E E

# 求导

#求导使用diff方法
x=sy.Symbol('x')
f1=2*x**4+3*x+6
#参数是函数与变量
f1_=sy.diff(f,x)
print(f1_)
 
f2=sy.sin(x)
f2_=sy.diff(f2,x)
print(f2_)
 
#求偏导
y=sy.Symbol('y')
f3=2*x**2+3*y**4+2*y
#对x，y分别求导，即偏导
f3_x=sy.diff(f3,x)
f3_y=sy.diff(f3,y)
print(f3_x)
print(f3_y)

15
cos(x)
4*x
12*y**3 + 2

# 求定积分

#求定积分用 integrate方法
x=sy.Symbol('x')
f=2*x
#参数传入 函数，积分变量和范围
result=sy.integrate(f,(x,0,1))
print(result)

# 求定积分

from scipy import integrate
def f(x):
    return x + 1
v, err = integrate.quad(f, 1, 2)# err为误差
print(v)

2.5

#求多重积分，
# 先求里面的积分，再求外面的
x,t=sy.symbols('x t')
f1=2*t
f2=sy.integrate(f1,(t,0,x))
result=sy.integrate(f2,(x,0,3))
print(result)

可视化图表交互—Bokeh

Bokeh 是 Python 的交互式可视图库，用于生成在浏览器里显示的大规模数据集高性能可视图。

github有翻译的教程

import numpy as np
import pandas as pd
import os
import os.path as op
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import bokeh

介绍与设置

Bokeh是什么？Bokeh是用于现代浏览器端展示的交互式可视化库。
我能用Bokeh 做什么？
进行环境设置
- 依赖包
- 示例数据：bokeh sampledata

from bokeh.io import output_file,show,output_notebook
from bokeh.plotting import figure
from bokeh.models import HoverTool
# from bokeh.charts import Scatter,Bar,BoxPlot,Chord #No module named 'bokeh.charts'
# from bokeh.layouts import raw

1	output_notebook()

Loading BokehJS ...

from numpy import cos, linspace
x = linspace(-6, 6, 100)
y = cos(x)
p = figure(width=500, height=500)
p.circle(x, y) #, size=7, color="firebrick", alpha=0.5
show(p)

from bokeh.sampledata.autompg import autompg

grouped = autompg.groupby("yr")
mpg = grouped["mpg"]
avg = mpg.mean()
std = mpg.std()
years = list(grouped.groups.keys())
american = autompg[autompg["origin"]==1]
japanese = autompg[autompg["origin"]==3]

autompg

	mpg	cyl	displ	hp	weight	accel	yr	origin	name
0	18.0	8	307.0	130	3504	12.0	70	1	chevrolet chevelle malibu
1	15.0	8	350.0	165	3693	11.5	70	1	buick skylark 320
2	18.0	8	318.0	150	3436	11.0	70	1	plymouth satellite
3	16.0	8	304.0	150	3433	12.0	70	1	amc rebel sst
4	17.0	8	302.0	140	3449	10.5	70	1	ford torino
...	...	...	...	...	...	...	...	...	...
387	27.0	4	140.0	86	2790	15.6	82	1	ford mustang gl
388	44.0	4	97.0	52	2130	24.6	82	2	vw pickup
389	32.0	4	135.0	84	2295	11.6	82	1	dodge rampage
390	28.0	4	120.0	79	2625	18.6	82	1	ford ranger
391	31.0	4	119.0	82	2720	19.4	82	1	chevy s-10

392 rows × 9 columns

p = figure(title="MPG by Year (Japan and US)",tools="")

p.vbar(x=years, bottom=avg-std, top=avg+std, width=0.8, 
       fill_alpha=0.2, line_color=None, legend_label="MPG 1 stddev")

p.circle(x=japanese["yr"], y=japanese["mpg"], size=10, alpha=0.5,
         color="red", legend_label="Japanese")

p.triangle(x=american["yr"], y=american["mpg"], size=10, alpha=0.3,
           color="blue", legend_label="American")

p.legend.location = "top_left"

p.add_tools(HoverTool(tooltips=[("MPG", "@mpg_mean"), ("Cyl, Mfr", "@cyl_mfr")]))
show(p)

from bokeh.models import ColumnDataSource
from bokeh.layouts import gridplot

source = ColumnDataSource(autompg)

options = dict(plot_width=300, plot_height=300,
               tools="pan,wheel_zoom,box_zoom,box_select,lasso_select")

p1 = figure(title="MPG by Year", **options)
p1.circle("yr", "mpg", color="blue", source=source)

p2 = figure(title="HP vs. Displacement", **options)
p2.circle("hp", "displ", color="green", source=source)

p3 = figure(title="MPG vs. Displacement", **options)
p3.circle("mpg", "displ", size="cyl", line_color="red", fill_color=None, source=source)

p = gridplot([[ p1, p2, p3]], toolbar_location="right")

show(p)

# Plot a complex chart with intearctive hover in a few lines of code

from bokeh.models import ColumnDataSource, HoverTool
from bokeh.plotting import figure
from bokeh.sampledata.autompg import autompg_clean as df
from bokeh.transform import factor_cmap

1 2	df.cyl = df.cyl.astype(str) df.yr = df.yr.astype(str)

1 2	group = df.groupby(['cyl', 'mfr']) source = ColumnDataSource(group)

p = figure(plot_width=800, plot_height=300, title="Mean MPG by # Cylinders and Manufacturer",
           x_range=group, toolbar_location=None, tools="")

p.xgrid.grid_line_color = None
p.xaxis.axis_label = "Manufacturer grouped by # Cylinders"
p.xaxis.major_label_orientation = 1.2

index_cmap = factor_cmap('cyl_mfr', palette=['#2b83ba', '#abdda4', '#ffffbf', '#fdae61', '#d7191c'], 
                         factors=sorted(df.cyl.unique()), end=1)

p.vbar(x='cyl_mfr', top='mpg_mean', width=1, source=source,
       line_color="white", fill_color=index_cmap, 
       hover_line_color="black", hover_fill_color=index_cmap)

p.add_tools(HoverTool(tooltips=[("MPG", "@mpg_mean"), ("Cyl, Mfr", "@cyl_mfr")]))

show(p)

基本绘图

导入与设置：

figure
output_file/output_notebook/output_server
show/save

1
2

from bokeh.io import output_notebook, show
from bokeh.plotting import figure # mid-level接口：从简单的默认图表开始（使用合理的默认工具、网格和轴），添加标记和其他视觉属性直接与数据绑定的图形。

基本的散点图：

1	p = figure(plot_width=400, plot_height=400)

- p.circle
- p.square
- asterisk()
- circle()
- circle_cross()
- circle_x()
- cross()
- diamond()
- diamond_cross()
- inverted_triangle()
- square()
- square_cross()
- square_x()
- triangle()
- x()

基本的线形图
- p.line()
面图
- p.image_rgba()

# create a new plot with default tools, using figure
p = figure(plot_width=400, plot_height=400)

# add a circle renderer with a size, color, and alpha
p.circle([1, 2, 3, 4, 5], [6, 7, 2, 4, 5], size=15, line_color="navy", fill_color="orange", fill_alpha=0.5)

show(p) # show the results

# create a new plot using figure
p = figure(plot_width=400, plot_height=400)

# add a square renderer with a size, color, alpha, and sizes
p.square([1, 2, 3, 4, 5], [6, 7, 2, 4, 5], size=[10, 15, 20, 25, 30], color="firebrick", alpha=0.6)

show(p) # show the results

# create a new plot (with a title) using figure
p = figure(plot_width=400, plot_height=400, title="My Line Plot")

# add a line renderer
p.line([1, 2, 3, 4, 5], [6, 7, 2, 4, 5], line_width=2)

show(p) # show the results

from __future__ import division
import numpy as np

# set up some data
N = 20
img = np.empty((N,N), dtype=np.uint32)
view = img.view(dtype=np.uint8).reshape((N, N, 4))
for i in range(N):
    for j in range(N):
        view[i, j, 0] = int(i/N*255) # red
        view[i, j, 1] = 158          # green
        view[i, j, 2] = int(j/N*255) # blue
        view[i, j, 3] = 255          # alpha
      
# create a new plot (with a fixed range) using figure
p = figure(x_range=[0,10], y_range=[0,10])

# add an RGBA image renderer
p.image_rgba(image=[img], x=[0], y=[0], dw=[10], dh=[10])

show(p) # show the results

# set up some data
x = [1, 2, 3, 4, 5]
y = [6, 7, 8, 7, 3]

# create a new plot with figure
p = figure(plot_width=400, plot_height=400)

# add both a line and circles on the same plot
p.line(x, y, line_width=2)
p.circle(x, y, fill_color="white", size=8)

show(p) # show the results

样式与主题

颜色与属性
- 颜色：
  - 颜色名称：‘green’等
  - 16进制RGB（A）值
  - 整数（r,g,b）(0-255)
  - (r,g,b,a)(0-255;0-1)
- 属性：
  - 三个API:models，plotting,charts
  - 三种视觉样式属性：
    - line:line_color,line_alpha,line_width,line_dash
    - fill:fill_color,fill_alpha
    - text:text_font,text_font_size,text_color,text_alpha
Plots(框图)
- outline
  - outline_line_width
  - outline_line_alpha
  - outline_line_color
- border
Glyphs(标记符号)
- 选择和非选择视觉效果
Axes(轴)
Grid(网格)

# create a new plot with a title
p = figure(plot_width=400, plot_height=400)
p.outline_line_width = 7
p.outline_line_alpha = 0.3
p.outline_line_color = "navy"

p.circle([1,2,3,4,5], [2,5,8,2,7], size=10)

show(p)

p = figure(plot_width=400, plot_height=400)

# keep a reference to the returned GlyphRenderer
r = p.circle([1,2,3,4,5], [2,5,8,2,7])

r.glyph.size = 50
r.glyph.fill_alpha = 0.2
r.glyph.line_color = "firebrick"
r.glyph.line_dash = [5, 1]
r.glyph.line_width = 2

show(p)

p = figure(plot_width=400, plot_height=400, tools="tap", title="Select a circle")
renderer = p.circle([1, 2, 3, 4, 5], [2, 5, 8, 2, 7], size=50,

                    # set visual properties for selected glyphs
                    selection_color="firebrick",

                    # set visual properties for non-selected glyphs
                    nonselection_fill_alpha=0.2,
                    nonselection_fill_color="grey",
                    nonselection_line_color="firebrick",
                    nonselection_line_alpha=1.0)

show(p)

添加注释

Span :Spans是“无限”的垂直线或水平线。
Box Annotations（Box注释）
Label（标签）
LabelSet（标签集）
Arrows（箭头）
Legends（图例）

import numpy as np
from bokeh.models.annotations import Span

x = np.linspace(0, 20, 200)
y = np.sin(x)

p = figure(y_range=(-2, 2))
p.line(x, y)

upper = Span(location=1, dimension='width', line_color='olive', line_width=4)
p.add_layout(upper)

lower = Span(location=-1, dimension='width', line_color='firebrick', line_width=4)
p.add_layout(lower)

show(p)

import numpy as np
from bokeh.models.annotations import BoxAnnotation

x = np.linspace(0, 20, 200)
y = np.sin(x)

p = figure(y_range=(-2, 2))
p.line(x, y)

# region that always fills the top of the plot
upper = BoxAnnotation(bottom=1, fill_alpha=0.1, fill_color='olive')
p.add_layout(upper)

# region that always fills the bottom of the plot
lower = BoxAnnotation(top=-1, fill_alpha=0.1, fill_color='firebrick')
p.add_layout(lower)

# a finite region
center = BoxAnnotation(top=0.6, bottom=-0.3, left=7, right=12, fill_alpha=0.1, fill_color='navy')
p.add_layout(center)

show(p)

from bokeh.models.annotations import Label
from bokeh.plotting import figure

p = figure(x_range=(0,10), y_range=(0,10))
p.circle([2, 5, 8], [4, 7, 6], color="olive", size=10)

label = Label(x=5, y=7, x_offset=12, text="Second Point", text_baseline="middle")
p.add_layout(label)

show(p)

from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, LabelSet


source = ColumnDataSource(data=dict(
    temp=[166, 171, 172, 168, 174, 162],
    pressure=[165, 189, 220, 141, 260, 174],
    names=['A', 'B', 'C', 'D', 'E', 'F']))

p = figure(x_range=(160, 175))
p.scatter(x='temp', y='pressure', size=8, source=source)
p.xaxis.axis_label = 'Temperature (C)'
p.yaxis.axis_label = 'Pressure (lbs)'

labels = LabelSet(x='temp', y='pressure', text='names', level='glyph',
                  x_offset=5, y_offset=5, source=source, render_mode='canvas')


p.add_layout(labels)

show(p)

from bokeh.models.annotations import Arrow
from bokeh.models.arrow_heads import OpenHead, NormalHead, VeeHead

p = figure(plot_width=600, plot_height=600)

p.circle(x=[0, 1, 0.5], y=[0, 0, 0.7], radius=0.1,
         color=["navy", "yellow", "red"], fill_alpha=0.1)

p.add_layout(Arrow(end=OpenHead(line_color="firebrick", line_width=4),
                   x_start=0, y_start=0, x_end=1, y_end=0))

p.add_layout(Arrow(end=NormalHead(fill_color="orange"),
                   x_start=1, y_start=0, x_end=0.5, y_end=0.7))

p.add_layout(Arrow(end=VeeHead(size=35), line_color="red",
                   x_start=0.5, y_start=0.7, x_end=0, y_end=0))

show(p)

import numpy as np

x = np.linspace(0, 4*np.pi, 100)
y = np.sin(x)

p = figure(height=400)

p.circle(x, y, legend_label="sin(x)")
p.line(x, 2*y, legend_label="2*sin(x)", line_dash=[4, 4], line_color="orange", line_width=2)

show(p)

1 2	p.circle(x, y, legend_label="sin(x)") p.line(x, y, legend_label="sin(x)", line_dash=[4, 4], line_color="orange", line_width=2)

GlyphRenderer(

id = '5269', …)

from bokeh.sampledata.autompg import autompg
from bokeh.models import LinearColorMapper, ColorBar
from bokeh.palettes import Viridis256

source = ColumnDataSource(autompg)
color_mapper = LinearColorMapper(palette=Viridis256, low=autompg.weight.min(), high=autompg.weight.max())

p = figure(x_axis_label='Year', y_axis_label='MPG', tools='', toolbar_location='above')
p.circle(x='yr', y='mpg', color={'field': 'weight', 'transform': color_mapper}, size=20, alpha=0.6, source=source)

color_bar = ColorBar(color_mapper=color_mapper, label_standoff=12, location=(0,0), title='Weight')
p.add_layout(color_bar, 'right')

show(p)

数据源和数据转换

Bokeh的ColumnDataSource类型
- 通过 Python Dicts 创建
- 通过 Pandas DataFrames 创建

from bokeh.models import ColumnDataSource
source = ColumnDataSource(data={
    'x' : [1, 2, 3, 4, 5],
    'y' : [3, 7, 8, 5, 1],
})
p = figure(plot_width=400, plot_height=400)
p.circle('x', 'y', size=20, source=source)
show(p)

from bokeh.sampledata.iris import flowers as df

source = ColumnDataSource(df)
p = figure(plot_width=400, plot_height=400)
p.circle('petal_length', 'petal_width', source=source)
show(p)

演示布局：把多个图表（plot）放在一个布局（layout）里

行与列：bokeh.layouts 模块提供了 row 和 column 功能以便在垂直和水平布局中排列图表
网格图：Bokeh还在 bokeh.layouts 模块中提供了一个 gridplot 布局，以便把图表排列成一个网格图

from bokeh.layouts import row

x = list(range(11))
y0, y1, y2 = x, [10-i for i in x], [abs(i-5) for i in x]

# create a new plot
s1 = figure(width=250, plot_height=250)
s1.circle(x, y0, size=10, color="navy", alpha=0.5)

# create another one
s2 = figure(width=250, height=250)
s2.triangle(x, y1, size=10, color="firebrick", alpha=0.5)

# create and another
s3 = figure(width=250, height=250)
s3.square(x, y2, size=10, color="olive", alpha=0.5)

# show the results in a row
show(row(s1, s2, s3))

from bokeh.layouts import gridplot

# create a new plot
s1 = figure(width=250, plot_height=250)
s1.circle(x, y0, size=10, color="navy", alpha=0.5)

# create another one
s2 = figure(width=250, height=250)
s2.triangle(x, y1, size=10, color="firebrick", alpha=0.5)

# create and another
s3 = figure(width=250, height=250)
s3.square(x, y2, size=10, color="olive", alpha=0.5)

# put all the plots in a gridplot
p = gridplot([[s1, s2], [s3, None]], toolbar_location=None)

# show the results
show(p)

关联和交互：把不同的图表联系起来，把图表和控件（widget）联系起来

Linked Interactions：
- 不同的Bokeh图表之间是可以有联系的，两个（或更多）图的范围可能是由联系的，当一个图被平移（或放大，或其范围变化），其它的图也相应更新保持一致。还可以将两个图的选择（selection）联系起来，当在一个图上选择某项时，另一个图中对应的项也被选中。
Linked panning（相连的平移）
Linked brushing
- 相连的选择（selection）通过类似的方式完成，在图块之间共享数据源。注意，通常 bokeh.plotting 和 bokeh.charts 会自动创建一个缺省数据源。然而，为了共享数据源，我们必须手动创建它们并显式地传递。
Hover Tools（悬停工具）
- Bokeh有一个悬停的工具，当用户将鼠标悬停在一个特定的标记符号（glyph）上时，可以在一个弹出框里显示更多的信息。基本的悬停工具配置相当于提供一个 (name, format) 元组列表。
Widgets（控件）
- Bokeh直接集成了一个小的基本控件集。这些控件和Bokeh服务器或 CustomJS 模型协作使用可以加入更多的互动的功能。您可以在用户指南的 Adding Widgets 部分看到完整的widget列表和示例代码。
- CustomJS Callbacks（CustomJS回调）

from bokeh.layouts import gridplot

x = list(range(11))
y0, y1, y2 = x, [10-i for i in x], [abs(i-5) for i in x]

plot_options = dict(width=250, plot_height=250, tools='pan,wheel_zoom')

# create a new plot
s1 = figure(**plot_options)
s1.circle(x, y0, size=10, color="navy")

# create a new plot and share both ranges
s2 = figure(x_range=s1.x_range, y_range=s1.y_range, **plot_options)
s2.triangle(x, y1, size=10, color="firebrick")

# create a new plot and share only one range
s3 = figure(x_range=s1.x_range, **plot_options)
s3.square(x, y2, size=10, color="olive")

p = gridplot([[s1, s2, s3]])

# show the results
show(p)

from bokeh.models import ColumnDataSource

x = list(range(-20, 21))
y0, y1 = [abs(xx) for xx in x], [xx**2 for xx in x]

# create a column data source for the plots to share
source = ColumnDataSource(data=dict(x=x, y0=y0, y1=y1))

TOOLS = "box_select,lasso_select,help"

# create a new plot and add a renderer
left = figure(tools=TOOLS, width=300, height=300)
left.circle('x', 'y0', source=source)

# create another new plot and add a renderer
right = figure(tools=TOOLS, width=300, height=300)
right.circle('x', 'y1', source=source)

p = gridplot([[left, right]])

show(p)

from bokeh.models import HoverTool

source = ColumnDataSource(
        data=dict(
            x=[1, 2, 3, 4, 5],
            y=[2, 5, 8, 2, 7],
            desc=['A', 'b', 'C', 'd', 'E'],
        )
    )

hover = HoverTool(
        tooltips=[
            ("index", "$index"),
            ("(x,y)", "($x, $y)"),
            ("desc", "@desc"),
        ]
    )

p = figure(plot_width=300, plot_height=300, tools=[hover], title="Mouse over the dots")

p.circle('x', 'y', size=20, source=source)

show(p)

from bokeh.layouts import widgetbox
from bokeh.models.widgets import Slider


slider = Slider(start=0, end=10, value=1, step=.1, title="foo")

show(widgetbox(slider))

1	BokehDeprecationWarning: 'WidgetBox' is deprecated and will be removed in Bokeh 3.0, use 'bokeh.models.Column' instead

from bokeh.models import TapTool, CustomJS, ColumnDataSource

callback = CustomJS(code="alert('hello world')")
tap = TapTool(callback=callback)

p = figure(plot_width=600, plot_height=300, tools=[tap])

p.circle(x=[1, 2, 3, 4, 5], y=[2, 5, 8, 2, 7], size=20)

show(p)

from bokeh.layouts import column
from bokeh.models import CustomJS, ColumnDataSource, Slider

x = [x*0.005 for x in range(0, 201)]

source = ColumnDataSource(data=dict(x=x, y=x))

plot = figure(plot_width=400, plot_height=400)
plot.line('x', 'y', source=source, line_width=3, line_alpha=0.6)

slider = Slider(start=0.1, end=6, value=1, step=.1, title="power")

update_curve = CustomJS(args=dict(source=source, slider=slider), code="""
    var data = source.get('data');
    var f = slider.value;
    x = data['x']
    y = data['y']
    for (i = 0; i < x.length; i++) {
        y[i] = Math.pow(x[i], f)
    }
    source.change.emit();
""")
slider.js_on_change('value', update_curve)


show(column(slider, plot))

from random import random

x = [random() for x in range(500)]
y = [random() for y in range(500)]
color = ["navy"] * len(x)

s = ColumnDataSource(data=dict(x=x, y=y, color=color))
p = figure(plot_width=400, plot_height=400, tools="lasso_select", title="Select Here")
p.circle('x', 'y', color='color', size=8, source=s, alpha=0.4)

s2 = ColumnDataSource(data=dict(xm=[0,1],ym=[0.5, 0.5]))
p.line(x='xm', y='ym', color="orange", line_width=5, alpha=0.6, source=s2)

s.callback = CustomJS(args=dict(s2=s2), code="""
    var inds = cb_obj.get('selected')['1d'].indices;
    var d = cb_obj.get('data');
    var ym = 0
  
    if (inds.length == 0) { return; }
  
    for (i = 0; i < d['color'].length; i++) {
        d['color'][i] = "navy"
    }
    for (i = 0; i < inds.length; i++) {
        d['color'][inds[i]] = "firebrick"
        ym += d['y'][inds[i]]
    }
  
    ym /= inds.length
    s2.get('data')['ym'] = [ym, ym]
  
    cb_obj.trigger('change');
    s2.trigger('change');
""")

show(p)

---------------------------------------------------------------------------

AttributeError                            Traceback (most recent call last)

<ipython-input-47-0b628e0d1f9d> in <module>
     12 p.line(x='xm', y='ym', color="orange", line_width=5, alpha=0.6, source=s2)
     13 
---> 14 s.callback = CustomJS(args=dict(s2=s2), code="""
     15     var inds = cb_obj.get('selected')['1d'].indices;
     16     var d = cb_obj.get('data');


E:\ProgramFiles\Anaconda\lib\site-packages\bokeh\core\has_props.py in __setattr__(self, name, value)
    283                 matches, text = props, "possible"
    284 
--> 285             raise AttributeError("unexpected attribute '%s' to %s, %s attributes are %s" %
    286                 (name, self.__class__.__name__, text, nice_join(matches)))
    287 


AttributeError: unexpected attribute 'callback' to ColumnDataSource, similar attributes are js_event_callbacks

1	$ bokeh sampledata

1	from bokeh.plotting import figure

import pandas as pd

from bokeh.palettes import Spectral4
from bokeh.plotting import figure, output_file, show
from bokeh.sampledata.stocks import AAPL, GOOG, IBM, MSFT

p = figure(plot_width=800, plot_height=250, x_axis_type="datetime")
p.title.text = 'Click on legend entries to hide the corresponding lines'

for data, name, color in zip([AAPL, IBM, MSFT, GOOG], ["AAPL", "IBM", "MSFT", "GOOG"], Spectral4):
    df = pd.DataFrame(data) # 将字典转换为DataFrame
    df['date'] = pd.to_datetime(df['date']) 
    p.line(df['date'], df['close'], line_width=2, color=color, alpha=0.8, legend_label=name)

p.legend.location = "top_left"
p.legend.click_policy="hide"

# output_file("interactive_legend.html", title="interactive_legend.py example")

show(p)

from bokeh.io import show
from bokeh.models import Button, CustomJS

button = Button(label="Foo", button_type="success")
button.js_on_click(CustomJS(code="console.log('button: click!', this.toString())"))

show(button)

from bokeh.io import show
from bokeh.models import CheckboxButtonGroup, CustomJS

LABELS = ["Option 1", "Option 2", "Option 3"]

checkbox_button_group = CheckboxButtonGroup(labels=LABELS, active=[0, 1])
checkbox_button_group.js_on_click(CustomJS(code="""
    console.log('checkbox_button_group: active=' + this.active, this.toString())
"""))

show(checkbox_button_group)

from bokeh.io import show
from bokeh.models import CheckboxGroup, CustomJS

LABELS = ["Option 1", "Option 2", "Option 3"]

checkbox_group = CheckboxGroup(labels=LABELS, active=[0, 1])
checkbox_group.js_on_click(CustomJS(code="""
    console.log('checkbox_group: active=' + this.active, this.toString())
"""))

show(checkbox_group)

1 2	temp = zip([1,2],['a','b']) temp

1	<zip at 0x1cad5461880>

zip?

[1;31mInit signature:[0m [0mzip[0m[1;33m([0m[0mself[0m[1;33m,[0m [1;33m/[0m[1;33m,[0m [1;33m*[0m[0margs[0m[1;33m,[0m [1;33m**[0m[0mkwargs[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m   
zip(*iterables) --> zip object

Return a zip object whose .__next__() method returns a tuple where
the i-th element comes from the i-th iterable argument.  The .__next__()
method continues until the shortest iterable in the argument sequence
is exhausted and then it raises StopIteration.
[1;31mType:[0m           type
[1;31mSubclasses:[0m

import pandas as pd
import numpy as np

dates = pd.date_range('20130101', periods=6)
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))

df

1	df[df > 0]

1	df[df['A'].isin(['a', 'b'])]

import matplotlib.pyplot as plt
import numpy as np
T = np.array([6, 7, 8, 9, 10, 11, 12])
power = np.array([1.53E+03, 5.92E+02, 2.04E+02, 7.24E+01, 2.72E+01, 1.10E+01, 4.70E+00])
plt.plot(T,power)
plt.show()

import matplotlib.pyplot as plt
import numpy as np
T = np.array([6, 7, 8, 9, 10, 11, 12])
power = np.array([1.53E+03, 5.92E+02, 2.04E+02, 7.24E+01, 2.72E+01, 1.10E+01, 4.70E+00])

from scipy.interpolate import spline
xnew = np.linspace(T.min(),T.max(),300) #300 represents number of points to make between T.min and T.max
power_smooth = spline(T,power,xnew)
plt.plot(xnew,power_smooth)
plt.show()

---------------------------------------------------------------------------

ImportError                               Traceback (most recent call last)

<ipython-input-2-63c9a1a8c2a4> in <module>
      4 power = np.array([1.53E+03, 5.92E+02, 2.04E+02, 7.24E+01, 2.72E+01, 1.10E+01, 4.70E+00])
      5 
----> 6 from scipy.interpolate import spline
      7 xnew = np.linspace(T.min(),T.max(),300) #300 represents number of points to make between T.min and T.max
      8 power_smooth = spline(T,power,xnew)


ImportError: cannot import name 'spline' from 'scipy.interpolate' (E:\ProgramFiles\Anaconda\lib\site-packages\scipy\interpolate\__init__.py)

OS模块

os.getcwd()
os.name()
os.path()
os.listdir()
os.rename()

import os
print(os.getcwd())
print(os.name)
os.mkdir('./OS创建文件夹')

E:\GitHubWP\JupyterWP
nt



---------------------------------------------------------------------------

FileExistsError                           Traceback (most recent call last)

<ipython-input-1-d2c10eafdc92> in <module>
      2 print(os.getcwd())
      3 print(os.name)
----> 4 os.mkdir('./OS创建文件夹')


FileExistsError: [WinError 183] 当文件已存在时，无法创建该文件。: './OS创建文件夹'

import os.path as op

# isdir(path)
# isfile(path)
# splitext(path) # 分开后缀
# join()
# exists() # 判断文件（夹）是否存在
# getsize() 
# getctime() # 获取创建时间,返回时间戳（1970-01-01，00:00:00）

1 2	results = op.abspath('./OS创建文件夹') # 返回绝对路径 results

1	'E:\\GitHubWP\\JupyterWP\\OS创建文件夹'

# dir(op)

1 2	result = op.getctime('./OS创建文件夹') print(result)

1	1609938276.518

import time
print(result)
timearr = time.localtime(result)
print(timearr)
mytime = time.strftime('%Y-%m-%d %H:%M:%S',timearr)
print(mytime)

1
2
3

1609938276.518
time.struct_time(tm_year=2021, tm_mon=1, tm_mday=6, tm_hour=21, tm_min=4, tm_sec=36, tm_wday=2, tm_yday=6, tm_isdst=0)
2021-01-06 21:04:36

1	os.listdir(os.getcwd())[:5] # os.listdir会列出隐藏文件

['.ipynb_checkpoints',
 '0. python日积月累.ipynb',
 '2014-master',
 'alien_invasion',
 'All数据寻峰-面积-元素-含量表.xlsx']

# 批量修改文件名
# 1.获取指定的路径
# 2.获取路径下的文件名
# 3.修改文件名
# 4.批量？循环 or 函数！

import os
import os.path as op
def mkdir(dirname):
    path = op.join(os.getcwd(),dirname)
    folder = op.exists(path)
 
    if not folder:                   #判断是否存在文件夹如果不存在则创建为文件夹
        os.makedirs(path)            #makedirs 创建文件时如果路径不存在会创建这个路径

mkdir('testdir\子目录')

json模块

JSON（JavaScript Object Notation)是一种轻量级的数据交换格式，易于人阅读和编写，同也易于机器解析和生成，并有效地提升网络传输效率。

给字典加上引号，变成字符串，即json格式字符串。

json_str = '{"name":"张三"}'
json_str1 = '["name","张三"]'

# 将json格式的str转换成python的数据格式
import json
result = json.loads(json_str)
print(result,type(result))
result1 = json.loads(json_str1)
print(result1,type(result1))

result2 = json.dumps(result,ensure_ascii=False)
print(result2,type(result2))

1
2
3

{'name': '张三'} <class 'dict'>
['name', '张三'] <class 'list'>
{"name": "张三"} <class 'str'>

基础知识新知

函数新知

“代码有很多种坏味道，重复是最坏的一种”

默认参数
可变参数
同名函数：python没有函数重载的概念，后面的定义会覆盖前面的定义。

用模块管理

模块中编写执行代码
if __name__ == '__main__'
变量作用域

格式化输出的几种方式

1 2	a, b = 5, 10 print('%d * %d = %d' % (a, b, a * b))

1	5 * 10 = 50

1 2	a, b = 5, 10 print('{0} * {1} = {2}'.format(a, b, a * b))

1	5 * 10 = 50

1 2	a, b = 5, 10 print(f'{a} * {b} = {a * b}')

1	5 * 10 = 50

列表

[[1,2]]*5

1	[[1, 2], [1, 2], [1, 2], [1, 2], [1, 2]]

输入

1	a,b =map(int,input('输入a,b逗号隔开:').split(','))

1	输入a,b逗号隔开: 1,2

分形

import numpy as np
import matplotlib.pyplot as plt
def mandelbrot( h,w, maxit=20 ):
    """Returns an image of the Mandelbrot fractal of size (h,w)."""
    y,x = np.ogrid[ -1.4:1.4:h*1j, -2:0.8:w*1j ]
    c = x+y*1j
    z = c
    divtime = maxit + np.zeros(z.shape, dtype=int)

    for i in range(maxit):
        z = z**2 + c
        diverge = z*np.conj(z) > 2**2            # who is diverging
        div_now = diverge & (divtime==maxit)  # who is diverging now
        divtime[div_now] = i                  # note when
        z[diverge] = 2                        # avoid diverging too much

    return divtime
plt.imshow(mandelbrot(400,400))
#plt.savefig('scatter.eps',dpi=600,format='eps')
plt.show()

批量下载脚本

# 该例子来源于知乎

import urllib.request# url request
import re            # regular expression
import os            # dirs
import time

'''
url 下载网址
pattern 正则化的匹配关键词
Directory 下载目录
'''
def BatchDownload(url,pattern,Directory):
  
    # 拉动请求，模拟成浏览器去访问网站->跳过反爬虫机制
    headers = {'User-Agent', 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36'}
    opener = urllib.request.build_opener()
    opener.addheaders = [headers]
  
    # 获取网页内容
    content = opener.open(url).read().decode('utf8')
  
    # 构造正则表达式，从content中匹配关键词pattern
    raw_hrefs = re.findall(pattern, content, 0)
  
    # set函数消除重复元素
    hset = set(raw_hrefs)
       
    # 下载链接
    for href in hset:
        # 之所以if else 是为了区别只有一个链接的特别情况
        if(len(hset)>1):
            link = url + href[0]
            filename = os.path.join(Directory, href[0])
            print("正在下载",filename)
            urllib.request.urlretrieve(link, filename)
            print("成功下载！")
        else:
            link = url +href
            filename = os.path.join(Directory, href)
            print("正在下载",filename)
            urllib.request.urlretrieve(link, filename)
            print("成功下载！")
          
        # 无sleep间隔，网站认定这种行为是攻击，反反爬虫
        time.sleep(1)
      
#BatchDownload('https://www1.ncdc.noaa.gov/pub/data/swdi/stormevents/csvfiles/',
#              '(Storm-Data-Export-Format.docx)',
#              'E:\stormevents\csvfiles')
      
#BatchDownload('https://www1.ncdc.noaa.gov/pub/data/swdi/stormevents/csvfiles/',
#              '(Storm-Data-Export-Format.pdf)',
#              'E:\stormevents\csvfiles')
      
#BatchDownload('https://www1.ncdc.noaa.gov/pub/data/swdi/stormevents/csvfiles/',
#              '(StormEvents_details-ftp_v1.0_d(\d*)_c(\d*).csv.gz)',
#              'E:\stormevents\csvfiles')
      
#BatchDownload('https://www1.ncdc.noaa.gov/pub/data/swdi/stormevents/csvfiles/',
#              '(StormEvents_fatalities-ftp_v1.0_d(\d*)_c(\d*).csv.gz)',
#              'E:\stormevents\csvfiles')

#BatchDownload('https://www1.ncdc.noaa.gov/pub/data/swdi/stormevents/csvfiles/',
#              '(StormEvents_locations-ftp_v1.0_d(\d*)_c(\d*).csv.gz)',
#              'E:\stormevents\csvfiles')

#BatchDownload('https://www1.ncdc.noaa.gov/pub/data/swdi/stormevents/csvfiles/legacy/',
#              '(ugc_areas.csv)',
#              'E:\stormevents\csvfiles\legacy')

#BatchDownload('https://www1.ncdc.noaa.gov/pub/data/swdi/stormevents/csvfiles/',
#              '(ugc_areas.csv)',
#              'E:\stormevents\csvfiles')

3Blue1Brown 线性代数

向量一般用列向量表示
线性变换矩阵：由变换后的基向量组成
- 逆时针旋转九十度：
- 旋转矩阵：
- 线性变换是操作空间的一种手段
- 列线性相关，则降维
- 将矩阵看做空间变换
矩阵乘法与线性变换复合
- 矩阵运算
- 矩阵乘法不满足交换律
- 矩阵乘法满足结合律（交换变换顺序）
三维空间中的线性变换
行列式
- 行列式可以理解为变换前后的伸缩比例，复数表示改变了图形的定向（感觉上像是将空间翻转了：比如，j从i的左边变换到了右边）（三维空间中的定向：右手定则）
- 只需要检验一个矩阵的行列式是否为零，就可以了解矩阵所代表的变换是否将空间压缩到更小的维度上
- 计算公式：

$\operatorname{det}\left(\left[\begin{array}{cc}a & b \\ c & d\end{array}\right]\right)=(a+b)(c+d)-a c-b d-2 b c=a d-b c$

bc项可理解为平行四边形法则中对角线的伸缩比例。

$\operatorname{det}\left(M_{1} M_{2}\right)=\operatorname{det}\left(M_{1}\right) \operatorname{det}\left(M_{2}\right)$

可用伸缩比例理解。

逆矩阵、列空间、秩、零空间
- 矩阵的用途：
  - 操作空间
  - 求解线性方程组
- 线性方程组：求解一个向量经系数矩阵变换后得到已知的向量。
  - 系数矩阵的行列式为零
  - 系数矩阵的行列式不为零
    - 逆变换求解：逆矩阵
    - 理解矩阵与逆矩阵的乘法等于单位矩阵：恒等变换
    - 矩阵行列式不为零，存在逆矩阵。
    - 矩阵行列式为零，则矩阵变换将空间压缩到更低的维度上，没有逆矩阵（例，不能将一条线“解压缩”为一个平面）
    - 即便行列式为零，仍然可能存在解（例，已知向量正好在压缩后的直线上）
- 秩
  - 秩代表着变换后空间的维数。
- 列空间：矩阵的列所张成的空间
  - 秩是列空间的维数
  - 满秩
  - 零向量一定为被包含在列空间中（因为线性变换必须保持原点位置不变；线性变换是基于原点的旋转伸缩变换）
- 零空间：
  - 核：变换后落在原点的向量的集合，称为矩阵的零空间或核。
  - 当已知向量为零向量时，零空间的向量就是对应的解。
- 非方阵的情况
  - 2*矩阵的集合几何意义是将二维空间映射到三维空间上，该矩阵的列空间是三维空间中一个过原点的二维平面。
  - 其他变换：二维到一维，点积？
点积与对偶性（daulity）
- 几何解释：一个向量的长度乘以另一个向量的投影长度
- 交换顺序，结果不变
- 投影矩阵：与单位向量的点积可以理解为将向量投影到单位向量所在的直线上所得到的投影长度。
- 点积：列向量转转置为行向量，即投影矩阵。
叉积
- 不严谨解释
  - 平行四边形的面积
  - 定向：i*j = 1;i在j的右侧为正，否则为负；夹角的方向定义为从前向量转到后向量的方向。
  - 二维向量可用行列式计算。（转置不改变行列式的值）
  - 向量垂直时，面积最大
- 严谨解释
  - 真正的叉积是通过两个三维向量产生一个新的三维向量。
- 混合积：
- 对偶向量
基变换：不同坐标系（不同视角）
- 原点相同；基底不同
- 对方的基底在我方坐标系下的坐标组成变换矩阵A.（该矩阵将我方坐标网格转换为对方网格：对同一向量而言，则相当于将对方语言翻译为我方语言）
- 对方视角下的向量v,在我方看来则为Av.(类似将对方的语言翻译为我方的语言)
- 该矩阵A取逆,记为invA，将对方网格转换为我方网格，对同一向量而言，相当于将我方语言转换为对方语言。invA v.

线性变换不同于坐标变换，线性变换是向量在同一坐标系下的伸缩转动；坐标变换是同一向量在不同坐标系下的坐标表示。

1
2

- 如何变换一个矩阵：对方基向量在我方表示为A.我方做了一个变换M,则其基向量跟着变换为MA，但这是我方的语言，在对方看来，应为 invAMA.
- invAMA暗示一种数学上的变换，中间的矩阵代表一种变换（例如，旋转90度），两边的矩阵代表视角上的变化。

特征值与特征向量
- 特征向量：在变换中留在其张成空间中的向量；变化仅有伸缩倍数，即特征值。特征值可以为负值。
- 三维旋转：三维空间中的特征向量就是旋转轴；把一个三维旋转看成绕某个轴旋转一定角度，比考虑响应的3*3矩阵直观得多。（这种情况下特征值必须为1，因为只旋转，无伸缩）
- 求解特征值与特征向量Mv=bv
  - 改变形式使右边为零：（M-bI）v=0
  - 右边为零向量，即被压缩，则行列式为0.可以求得特征值；特征值非实数表明没有特征向量
  - 再根据特征值，像求解线性方程组那样，可以求解特征向量。
- 存在一个特征值，多个特征向量的情况，比如讲坐标拉伸2倍，则特征值为2，所有的向量都是特征向量。
- 特殊情况：基向量是特征向量；对角矩阵；对角元是特征值
  - 对角矩阵乘法的计算
  - 用特征向量作为基，进行变换；换到特征向量作基底的视角下，该视角下的变换矩阵必然是对角的，且对角元为特征值。（前提，特征向量能张成全空间）；可应用于矩阵运算
抽象向量空间
- 行列式和特征向量不受所选坐标系的影响
  - 行列式表示缩放比例
  - 特征向量表示在变换中留在它所张成的空间中的向量
  - 你可以自由选取坐标系，这并不会改变它们最根本的值。
- 空间性
- 函数式另一种向量？
  - 向量的所有特征均可应用于函数
    - 相加
    - 数乘
    - 线性变换：符合函数、求导
  - 线性算子：一个函数式线性的？
    - 线性的严格定义：可加性；成比例（一阶齐次）
    - 线性变换保持向量加法运算和数乘运算
    - 求导是线性运算
    - 基函数
    - 求导算符的矩阵：选择基函数，建立矩阵
- 向量是什么？向量的形式不重要，只要满足向量定义的一组格则，它可以是任何东西。
克莱姆法则，几何解释
- 计算线性方程组
  - 高斯消元法
  - 克莱姆法则
  不改变点积的变换称为正交变换，例如旋转矩阵

pyecharts

1 2	from pyecharts.globals import CurrentConfig, NotebookType CurrentConfig.NOTEBOOK_TYPE = NotebookType.JUPYTER_LAB

from pyecharts.charts import Bar

bar = Bar()
bar.add_xaxis(["衬衫", "羊毛衫", "雪纺衫", "裤子", "高跟鞋", "袜子"])
bar.add_yaxis("商家A", [5, 20, 36, 10, 75, 90])
# render 会生成本地 HTML 文件，默认会在当前目录生成 render.html 文件
# 也可以传入路径参数，如 bar.render("mycharts.html")
bar.render_notebook()

<!DOCTYPE html>

Geoprocessing with python

1	from osgeo import ogr

1 2	driver = ogr.GetDriverByName('geojson') print(driver)

1	<osgeo.ogr.Driver; proxy of <Swig Object of type 'OGRDriverShadow *' at 0x00000228D2B5C330> >

1 2	import ospybook as pb pb.print.drivers()

---------------------------------------------------------------------------

ModuleNotFoundError                       Traceback (most recent call last)

<ipython-input-3-4937f463c8c4> in <module>
----> 1 import ospybook as pb
      2 pb.print.drivers()


ModuleNotFoundError: No module named 'ospybook'

1	# %pip install ospybook

Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement ospybook (from versions: none)
ERROR: No matching distribution found for ospybook

"""
Definition of colour schemes for lines and maps that also work for colour-blind
people. See https://personal.sron.nl/~pault/ for background information and
best usage of the schemes.

Copyright (c) 2019, Paul Tol
All rights reserved.

License:  Standard 3-clause BSD
"""
import numpy as np
from matplotlib.colors import LinearSegmentedColormap, to_rgba_array


def discretemap(colormap, hexclrs):
    """
    Produce a colormap from a list of discrete colors without interpolation.
    """
    clrs = to_rgba_array(hexclrs)
    clrs = np.vstack([clrs[0], clrs, clrs[-1]])
    cdict = {}
    for ki, key in enumerate(('red','green','blue')):
        cdict[key] = [ (i/(len(clrs)-2.), clrs[i, ki], clrs[i+1, ki]) for i in range(len(clrs)-1) ]
    return LinearSegmentedColormap(colormap, cdict)


class TOLcmaps(object):
    """
    Class TOLcmaps definition.
    """
    def __init__(self):
        """
        """
        self.cmap = None
        self.cname = None
        self.namelist = (
            'sunset_discrete', 'sunset', 'BuRd_discrete', 'BuRd',
            'PRGn_discrete', 'PRGn', 'YlOrBr_discrete', 'YlOrBr', 'WhOrBr',
            'iridescent', 'rainbow_PuRd', 'rainbow_PuBr', 'rainbow_WhRd',
            'rainbow_WhBr', 'rainbow_discrete')

        self.funcdict = dict(
            zip(self.namelist,
                (self.__sunset_discrete, self.__sunset, self.__BuRd_discrete,
                self.__BuRd, self.__PRGn_discrete, self.__PRGn,
                self.__YlOrBr_discrete, self.__YlOrBr, self.__WhOrBr,
                self.__iridescent, self.__rainbow_PuRd, self.__rainbow_PuBr,
                self.__rainbow_WhRd, self.__rainbow_WhBr,
                self.__rainbow_discrete)))

    def __sunset_discrete(self):
        """
        Define colormap 'sunset_discrete'.
        """
        clrs = ['#364B9A', '#4A7BB7', '#6EA6CD', '#98CAE1', '#C2E4EF',
                '#EAECCC', '#FEDA8B', '#FDB366', '#F67E4B', '#DD3D2D',
                '#A50026']
        self.cmap = discretemap(self.cname, clrs)
        self.cmap.set_bad('#FFFFFF')

    def __sunset(self):
        """
        Define colormap 'sunset'.
        """
        clrs = ['#364B9A', '#4A7BB7', '#6EA6CD', '#98CAE1', '#C2E4EF',
                '#EAECCC', '#FEDA8B', '#FDB366', '#F67E4B', '#DD3D2D',
                '#A50026']
        self.cmap = LinearSegmentedColormap.from_list(self.cname, clrs)
        self.cmap.set_bad('#FFFFFF')

    def __BuRd_discrete(self):
        """
        Define colormap 'BuRd_discrete'.
        """
        clrs = ['#2166AC', '#4393C3', '#92C5DE', '#D1E5F0', '#F7F7F7',
                '#FDDBC7', '#F4A582', '#D6604D', '#B2182B']
        self.cmap = discretemap(self.cname, clrs)
        self.cmap.set_bad('#FFEE99')

    def __BuRd(self):
        """
        Define colormap 'BuRd'.
        """
        clrs = ['#2166AC', '#4393C3', '#92C5DE', '#D1E5F0', '#F7F7F7',
                '#FDDBC7', '#F4A582', '#D6604D', '#B2182B']
        self.cmap = LinearSegmentedColormap.from_list(self.cname, clrs)
        self.cmap.set_bad('#FFEE99')

    def __PRGn_discrete(self):
        """
        Define colormap 'PRGn_discrete'.
        """
        clrs = ['#762A83', '#9970AB', '#C2A5CF', '#E7D4E8', '#F7F7F7',
                '#D9F0D3', '#ACD39E', '#5AAE61', '#1B7837']
        self.cmap = discretemap(self.cname, clrs)
        self.cmap.set_bad('#FFEE99')

    def __PRGn(self):
        """
        Define colormap 'PRGn'.
        """
        clrs = ['#762A83', '#9970AB', '#C2A5CF', '#E7D4E8', '#F7F7F7',
                '#D9F0D3', '#ACD39E', '#5AAE61', '#1B7837']
        self.cmap = LinearSegmentedColormap.from_list(self.cname, clrs)
        self.cmap.set_bad('#FFEE99')

    def __YlOrBr_discrete(self):
        """
        Define colormap 'YlOrBr_discrete'.
        """
        clrs = ['#FFFFE5', '#FFF7BC', '#FEE391', '#FEC44F', '#FB9A29',
                '#EC7014', '#CC4C02', '#993404', '#662506']
        self.cmap = discretemap(self.cname, clrs)
        self.cmap.set_bad('#888888')

    def __YlOrBr(self):
        """
        Define colormap 'YlOrBr'.
        """
        clrs = ['#FFFFE5', '#FFF7BC', '#FEE391', '#FEC44F', '#FB9A29',
                '#EC7014', '#CC4C02', '#993404', '#662506']
        self.cmap = LinearSegmentedColormap.from_list(self.cname, clrs)
        self.cmap.set_bad('#888888')

    def __WhOrBr(self):
        """
        Define colormap 'WhOrBr'.
        """
        clrs = ['#FFFFFF', '#FFF7BC', '#FEE391', '#FEC44F', '#FB9A29',
                '#EC7014', '#CC4C02', '#993404', '#662506']
        self.cmap = LinearSegmentedColormap.from_list(self.cname, clrs)
        self.cmap.set_bad('#888888')

    def __iridescent(self):
        """
        Define colormap 'iridescent'.
        """
        clrs = ['#FEFBE9', '#FCF7D5', '#F5F3C1', '#EAF0B5', '#DDECBF',
                '#D0E7CA', '#C2E3D2', '#B5DDD8', '#A8D8DC', '#9BD2E1',
                '#8DCBE4', '#81C4E7', '#7BBCE7', '#7EB2E4', '#88A5DD',
                '#9398D2', '#9B8AC4', '#9D7DB2', '#9A709E', '#906388',
                '#805770', '#684957', '#46353A']
        self.cmap = LinearSegmentedColormap.from_list(self.cname, clrs)
        self.cmap.set_bad('#999999')

    def __rainbow_PuRd(self):
        """
        Define colormap 'rainbow_PuRd'.
        """
        clrs = ['#6F4C9B', '#6059A9', '#5568B8', '#4E79C5', '#4D8AC6',
                '#4E96BC', '#549EB3', '#59A5A9', '#60AB9E', '#69B190',
                '#77B77D', '#8CBC68', '#A6BE54', '#BEBC48', '#D1B541',
                '#DDAA3C', '#E49C39', '#E78C35', '#E67932', '#E4632D',
                '#DF4828', '#DA2222']
        self.cmap = LinearSegmentedColormap.from_list(self.cname, clrs)
        self.cmap.set_bad('#FFFFFF')

    def __rainbow_PuBr(self):
        """
        Define colormap 'rainbow_PuBr'.
        """
        clrs = ['#6F4C9B', '#6059A9', '#5568B8', '#4E79C5', '#4D8AC6',
                '#4E96BC', '#549EB3', '#59A5A9', '#60AB9E', '#69B190',
                '#77B77D', '#8CBC68', '#A6BE54', '#BEBC48', '#D1B541',
                '#DDAA3C', '#E49C39', '#E78C35', '#E67932', '#E4632D',
                '#DF4828', '#DA2222', '#B8221E', '#95211B', '#721E17',
                '#521A13']
        self.cmap = LinearSegmentedColormap.from_list(self.cname, clrs)
        self.cmap.set_bad('#FFFFFF')

    def __rainbow_WhRd(self):
        """
        Define colormap 'rainbow_WhRd'.
        """
        clrs = ['#E8ECFB', '#DDD8EF', '#D1C1E1', '#C3A8D1', '#B58FC2',
                '#A778B4', '#9B62A7', '#8C4E99', '#6F4C9B', '#6059A9',
                '#5568B8', '#4E79C5', '#4D8AC6', '#4E96BC', '#549EB3',
                '#59A5A9', '#60AB9E', '#69B190', '#77B77D', '#8CBC68',
                '#A6BE54', '#BEBC48', '#D1B541', '#DDAA3C', '#E49C39',
                '#E78C35', '#E67932', '#E4632D', '#DF4828', '#DA2222']
        self.cmap = LinearSegmentedColormap.from_list(self.cname, clrs)
        self.cmap.set_bad('#666666')

    def __rainbow_WhBr(self):
        """
        Define colormap 'rainbow_WhBr'.
        """
        clrs = ['#E8ECFB', '#DDD8EF', '#D1C1E1', '#C3A8D1', '#B58FC2',
                '#A778B4', '#9B62A7', '#8C4E99', '#6F4C9B', '#6059A9',
                '#5568B8', '#4E79C5', '#4D8AC6', '#4E96BC', '#549EB3',
                '#59A5A9', '#60AB9E', '#69B190', '#77B77D', '#8CBC68',
                '#A6BE54', '#BEBC48', '#D1B541', '#DDAA3C', '#E49C39',
                '#E78C35', '#E67932', '#E4632D', '#DF4828', '#DA2222',
                '#B8221E', '#95211B', '#721E17', '#521A13']
        self.cmap = LinearSegmentedColormap.from_list(self.cname, clrs)
        self.cmap.set_bad('#666666')

    def __rainbow_discrete(self, lut=None):
        """
        Define colormap 'rainbow_discrete'.
        """
        clrs = ['#E8ECFB', '#D9CCE3', '#D1BBD7', '#CAACCB', '#BA8DB4',
                '#AE76A3', '#AA6F9E', '#994F88', '#882E72', '#1965B0',
                '#437DBF', '#5289C7', '#6195CF', '#7BAFDE', '#4EB265',
                '#90C987', '#CAE0AB', '#F7F056', '#F7CB45', '#F6C141',
                '#F4A736', '#F1932D', '#EE8026', '#E8601C', '#E65518',
                '#DC050C', '#A5170E', '#72190E', '#42150A']
        indexes = [[9], [9, 25], [9, 17, 25], [9, 14, 17, 25], [9, 13, 14, 17,
                25], [9, 13, 14, 16, 17, 25], [8, 9, 13, 14, 16, 17, 25], [8,
                9, 13, 14, 16, 17, 22, 25], [8, 9, 13, 14, 16, 17, 22, 25, 27],
                [8, 9, 13, 14, 16, 17, 20, 23, 25, 27], [8, 9, 11, 13, 14, 16,
                17, 20, 23, 25, 27], [2, 5, 8, 9, 11, 13, 14, 16, 17, 20, 23,
                25], [2, 5, 8, 9, 11, 13, 14, 15, 16, 17, 20, 23, 25], [2, 5,
                8, 9, 11, 13, 14, 15, 16, 17, 19, 21, 23, 25], [2, 5, 8, 9, 11,
                13, 14, 15, 16, 17, 19, 21, 23, 25, 27], [2, 4, 6, 8, 9, 11,
                13, 14, 15, 16, 17, 19, 21, 23, 25, 27], [2, 4, 6, 7, 8, 9, 11,
                13, 14, 15, 16, 17, 19, 21, 23, 25, 27], [2, 4, 6, 7, 8, 9, 11,
                13, 14, 15, 16, 17, 19, 21, 23, 25, 26, 27], [1, 3, 4, 6, 7, 8,
                9, 11, 13, 14, 15, 16, 17, 19, 21, 23, 25, 26, 27], [1, 3, 4,
                6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 19, 21, 23, 25, 26,
                27], [1, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 20,
                22, 24, 25, 26, 27], [1, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15,
                16, 17, 18, 20, 22, 24, 25, 26, 27, 28], [0, 1, 3, 4, 6, 7, 8,
                9, 10, 12, 13, 14, 15, 16, 17, 18, 20, 22, 24, 25, 26, 27, 28]]
        if lut == None or lut < 1 or lut > 23:
            lut = 22
        self.cmap = discretemap(self.cname, [ clrs[i] for i in indexes[lut-1] ])
        if lut == 23:
            self.cmap.set_bad('#777777')
        else:
            self.cmap.set_bad('#FFFFFF')

    def show(self):
        """
        List names of defined colormaps.
        """
        print(' '.join(repr(n) for n in self.namelist))

    def get(self, cname='rainbow_PuRd', lut=None):
        """
        Return requested colormap, default is 'rainbow_PuRd'.
        """
        self.cname = cname
        if cname == 'rainbow_discrete':
            self.__rainbow_discrete(lut)
        else:
            self.funcdict[cname]()
        return self.cmap


def tol_cmap(colormap=None, lut=None):
    """
    Continuous and discrete color sets for ordered data.
  
    Return a matplotlib colormap.
    Parameter lut is ignored for all colormaps except 'rainbow_discrete'.
    """
    obj = TOLcmaps()
    if colormap == None:
        return obj.namelist
    if colormap not in obj.namelist:
        colormap = 'rainbow_PuRd'
        print('*** Warning: requested colormap not defined,',
              'known colormaps are {}.'.format(obj.namelist),
              'Using {}.'.format(colormap))
    return obj.get(colormap, lut)


def tol_cset(colorset=None):
    """
    Discrete color sets for qualitative data.

    Define a namedtuple instance with the colors.
    Examples for: cset = tol_cset(<scheme>)
      - cset.red and cset[1] give the same color (in default 'bright' colorset)
      - cset._fields gives a tuple with all color names
      - list(cset) gives a list with all colors
    """
    from collections import namedtuple
  
    namelist = ('bright', 'high-contrast', 'vibrant', 'muted', 'light')
    if colorset == None:
        return namelist
    if colorset not in namelist:
        colorset = 'bright'
        print('*** Warning: requested colorset not defined,',
              'known colorsets are {}.'.format(namelist),
              'Using {}.'.format(colorset)) 

    if colorset == 'bright':
        cset = namedtuple('Bcset',
                    'blue red green yellow cyan purple grey black')
        return cset('#4477AA', '#EE6677', '#228833', '#CCBB44', '#66CCEE',
                    '#AA3377', '#BBBBBB', '#000000')
  
    if colorset == 'high-contrast':
        cset = namedtuple('Hcset',
                    'blue yellow red black')
        return cset('#004488', '#DDAA33', '#BB5566', '#000000')

    if colorset == 'vibrant':
        cset = namedtuple('Vcset',
                    'orange blue cyan magenta red teal grey black')
        return cset('#EE7733', '#0077BB', '#33BBEE', '#EE3377', '#CC3311',
                    '#009988', '#BBBBBB', '#000000')

    if colorset == 'muted':
        cset = namedtuple('Mcset',
                    'rose indigo sand green cyan wine teal olive purple pale_grey black')
        return cset('#CC6677', '#332288', '#DDCC77', '#117733', '#88CCEE',
                    '#882255', '#44AA99', '#999933', '#AA4499', '#DDDDDD',
                    '#000000')

    if colorset == 'light':
        cset = namedtuple('Lcset',
                    'light_blue orange light_yellow pink light_cyan mint pear olive pale_grey black')
        return cset('#77AADD', '#EE8866', '#EEDD88', '#FFAABB', '#99DDFF',
                    '#44BB99', '#BBCC33', '#AAAA00', '#DDDDDD', '#000000')


def main():

    from matplotlib import pyplot as plt

    # Change default colorset (for lines) and colormap (for maps).
#    plt.rc('axes', prop_cycle=plt.cycler('color', list(tol_cset('bright'))))
#    plt.cm.register_cmap('rainbow_PuRd', tol_cmap('rainbow_PuRd'))
#    plt.rc('image', cmap='rainbow_PuRd')

    # Show colorsets tol_cset(<scheme>).
    schemes = tol_cset()
    fig, axes = plt.subplots(ncols=len(schemes))
    fig.subplots_adjust(top=0.92, bottom=0.02, left=0.02, right=0.92)
    for ax, scheme in zip(axes, schemes):
        cset = tol_cset(scheme)
        names = cset._fields
        colors = list(cset)
        for name, color in zip(names, colors):
            ax.scatter([], [], c=color, s=80, label=name)
        ax.set_axis_off()
        ax.legend(loc=2)
        ax.set_title(scheme)
    plt.show()

    # Show colormaps tol_cmap(<scheme>). 
    schemes = tol_cmap()
    gradient = np.linspace(0, 1, 256)
    gradient = np.vstack((gradient, gradient))
    fig, axes = plt.subplots(nrows=len(schemes))
    fig.subplots_adjust(top=0.98, bottom=0.02, left=0.2, right=0.99)
    for ax, scheme in zip(axes, schemes):
        pos = list(ax.get_position().bounds)
        ax.set_axis_off()
        ax.imshow(gradient, aspect=4, cmap=tol_cmap(scheme))
        fig.text(pos[0] - 0.01, pos[1] + pos[3]/2., scheme, va='center', ha='right', fontsize=10)
    plt.show()

    # Show colormaps tol_cmap('rainbow_discrete', <lut>). 
    gradient = np.linspace(0, 1, 256)
    gradient = np.vstack((gradient, gradient))
    fig, axes = plt.subplots(nrows=23)
    fig.subplots_adjust(top=0.98, bottom=0.02, left=0.25, right=0.99)
    for lut, ax in enumerate(axes, start=1):
        pos = list(ax.get_position().bounds)
        ax.set_axis_off()
        ax.imshow(gradient, aspect=4, cmap=tol_cmap('rainbow_discrete', lut))
        fig.text(pos[0] - 0.01, pos[1] + pos[3]/2., 'rainbow_discrete, ' + str(lut), va='center', ha='right', fontsize=10)
    plt.show()


if __name__ == '__main__':
    main()

正态分布

import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt

mu,sigma = 0,0.1
s = np.random.normal(mu,sigma,1000)

mean = abs(mu - np.mean(s))
print('mean:',mean)

var = abs(sigma - np.std(s,ddof=1))
print('var:',var)

count,bins,ignored = plt.hist(s,30,density=True)
plt.plot(bins,1/(sigma*np.sqrt(2*np.pi))*np.exp(-(bins-mu)**2/(2*sigma**2)),lw=2,c='r')
plt.show()

1 2	mean: 0.005425582125086683 var: 0.0023657053391205884

差分法求解微分方程

import numpy as np
import matplotlib.pyplot as plt
rc = 2.0 #设置常数
dt = 0.5 #设置步长
n = 1000 #设置分割段数
t = 0.0 #设置初始时间
q = 1.0 #设置初始电量

#先定义三个空列表
qt=[] #用来盛放差分得到的q值
qt0=[] #用来盛放解析得到的q值
time = [] #用来盛放时间值

for i in range(n):
    t = t + dt
    q1 = q - q*dt/rc #qn+1的近似值
    q = q - 0.5*(q1*dt/rc + q*dt/rc) #差分递推关系
    q0 = np.exp(-t/rc) #解析关系
    qt.append(q) #差分得到的q值列表
    qt0.append(q0) #解析得到的q值列表
    time.append(t) #时间列表

plt.plot(time,qt,'o',label='Euler-Modify') #差分得到的电量随时间的变化
plt.plot(time,qt0,'r-',label='Analytical') #解析得到的电量随时间的变化
plt.xlabel('time')
plt.ylabel('charge')
plt.xlim(0,20)
plt.ylim(-0.2,1.0)
plt.legend(loc='upper right')
plt.show()

import numpy as np
import matplotlib.pyplot as plt

h = 0.1#空间步长
N =30#空间步数
dt = 0.0001#时间步长
M = 10000#时间的步数
A = dt/(h**2) #lambda*tau/h^2
U = np.zeros([N+1,M+1])#建立二维空数组
Space = np.arange(0,(N+1)*h,h)#建立空间等差数列，从0到3，公差是h

#边界条件
for k in np.arange(0,M+1):
    U[0,k] = 0.0
    U[N,k] = 0.0

#初始条件
for i in np.arange(0,N):
    U[i,0]=4*i*h*(3-i*h)

#递推关系
for k in np.arange(0,M):
    for i in np.arange(1,N):
        U[i,k+1]=A*U[i+1,k]+(1-2*A)*U[i,k]+A*U[i-1,k]

#不同时刻的温度随空间坐标的变化
plt.plot(Space,U[:,0], 'g-', label='t=0',linewidth=1.0)
plt.plot(Space,U[:,3000], 'b-', label='t=3/10',linewidth=1.0)
plt.plot(Space,U[:,6000], 'k-', label='t=6/10',linewidth=1.0)
plt.plot(Space,U[:,9000], 'r-', label='t=9/10',linewidth=1.0)
plt.plot(Space,U[:,10000], 'y-', label='t=1',linewidth=1.0)
plt.ylabel('u(x,t)', fontsize=20)
plt.xlabel('x', fontsize=20)
plt.xlim(0,3)
plt.ylim(-2,10)
plt.legend(loc='upper right')
plt.show()

#温度等高线随时空坐标的变化，温度越高，颜色越偏红
extent = [0,1,0,3]#时间和空间的取值范围
levels = np.arange(0,10,0.1)#温度等高线的变化范围0-10，变化间隔为0.1
plt.contourf(U,levels,origin='lower',extent=extent,cmap=plt.cm.jet)
plt.ylabel('x', fontsize=20)
plt.xlabel('t', fontsize=20)
plt.show()

最小二乘

from scipy.optimize import least_squares

def model(x, u):
    return x[0] * (u ** 2 + x[1] * u) / (u ** 2 + x[2] * u + x[3])

def fun(x, u, y):
    return model(x, u) - y

def jac(x, u, y):
    J = np.empty((u.size, x.size))
    den = u ** 2 + x[2] * u + x[3]
    num = u ** 2 + x[1] * u
    J[:, 0] = num / den
    J[:, 1] = x[0] * u / den
    J[:, 2] = -x[0] * num * u / den ** 2
    J[:, 3] = -x[0] * num / den ** 2
    return J

u = np.array([4.0, 2.0, 1.0, 5.0e-1, 2.5e-1, 1.67e-1, 1.25e-1, 1.0e-1,
              8.33e-2, 7.14e-2, 6.25e-2])
y = np.array([1.957e-1, 1.947e-1, 1.735e-1, 1.6e-1, 8.44e-2, 6.27e-2,
              4.56e-2, 3.42e-2, 3.23e-2, 2.35e-2, 2.46e-2])
x0 = np.array([2.5, 3.9, 4.15, 3.9])
res = least_squares(fun, x0, jac=jac, bounds=(0, 100), args=(u, y), verbose=2)



res.x


import matplotlib.pyplot as plt
u_test = np.linspace(0, 5)
y_test = model(res.x, u_test)
plt.plot(u, y, 'o', markersize=4, label='data')
plt.plot(u_test, y_test, label='fitted model')
plt.xlabel("u")
plt.ylabel("y")
plt.legend(loc='lower right')
plt.show()

   Iteration     Total nfev        Cost      Cost reduction    Step norm     Optimality   
       0              1         4.4383e+00                                    1.04e+02  
       1              2         9.6619e-01      3.47e+00       5.94e+00       1.37e+01  
       2              3         1.8466e-01      7.82e-01       8.49e+00       1.87e+00  
       3              4         2.9667e-02      1.55e-01       9.68e+00       2.70e-01  
       4              5         5.8265e-03      2.38e-02       8.95e+00       3.72e-02  
       5              6         3.4795e-03      2.35e-03       5.49e+00       3.57e-02  
       6              9         3.0037e-03      4.76e-04       8.95e-01       4.42e-02  
       7             10         2.4853e-03      5.18e-04       2.18e+00       5.05e-01  
       8             12         2.1802e-03      3.05e-04       7.84e-01       9.54e-02  
       9             13         1.9495e-03      2.31e-04       1.56e+00       3.77e-01  
      10             14         1.8691e-03      8.04e-05       2.60e+00       1.14e+00  
      11             15         1.5814e-03      2.88e-04       6.36e-01       8.71e-03  
      12             16         1.4525e-03      1.29e-04       1.46e+00       6.88e-02  
      13             17         1.3709e-03      8.16e-05       3.02e+00       9.07e-01  
      14             18         1.2146e-03      1.56e-04       2.84e+00       6.88e-01  
      15             20         1.1383e-03      7.62e-05       1.40e+00       1.21e-01  
      16             21         1.0924e-03      4.59e-05       2.89e+00       6.50e-01  
      17             22         1.0322e-03      6.02e-05       2.80e+00       5.30e-01  
      18             24         1.0004e-03      3.18e-05       1.37e+00       1.01e-01  
      19             25         9.7276e-04      2.76e-05       2.71e+00       5.07e-01  
      20             26         9.4014e-04      3.26e-05       2.63e+00       4.04e-01  
      21             28         9.2354e-04      1.66e-05       1.28e+00       8.28e-02  
      22             29         9.0343e-04      2.01e-05       2.53e+00       3.95e-01  
      23             30         8.9155e-04      1.19e-05       4.82e+00       1.48e+00  
      24             31         8.3675e-04      5.48e-05       4.37e+00       9.37e-01  
      25             33         8.1455e-04      2.22e-05       1.76e+00       1.22e-01  
      26             34         7.9193e-04      2.26e-05       3.41e+00       7.37e-01  
      27             35         7.8059e-04      1.13e-05       6.50e+00       3.22e+00  
      28             36         7.1291e-04      6.77e-05       2.43e+00       1.00e-01  
      29             37         6.6760e-04      4.53e-05       5.32e+00       2.52e+00  
      30             38         6.1812e-04      4.95e-05       2.96e+00       6.26e-01  
      31             40         5.9431e-04      2.38e-05       1.52e+00       1.42e-01  
      32             41         5.4701e-04      4.73e-05       2.92e+00       8.77e-01  
      33             42         5.0317e-04      4.38e-05       4.21e+00       4.81e+00  
      34             43         4.7950e-04      2.37e-05       7.87e-01       6.81e-05  
      35             44         4.7049e-04      9.01e-06       8.77e+00       1.23e-02  
      36             45         4.6289e-04      7.60e-06       2.97e-01       1.55e-04  
      37             47         4.6277e-04      1.20e-07       2.88e+00       9.10e-01  
      38             48         4.6212e-04      6.53e-07       1.44e+00       2.69e-01  
      39             49         4.6196e-04      1.55e-07       2.82e+00       9.28e-01  
      40             50         4.6117e-04      7.87e-07       1.52e+00       3.13e-01  
      41             51         4.6114e-04      3.49e-08       2.93e+00       1.09e+00  
      42             52         4.6015e-04      9.92e-07       7.46e-01       8.75e-02  
      43             53         4.5976e-04      3.89e-07       1.50e+00       3.05e-01  
      44             55         4.5946e-04      3.01e-07       7.61e-01       8.79e-02  
      45             56         4.5903e-04      4.22e-07       1.53e+00       3.47e-01  
      46             58         4.5867e-04      3.61e-07       7.73e-01       9.77e-02  
      47             59         4.5820e-04      4.68e-07       1.56e+00       3.80e-01  
      48             61         4.5777e-04      4.30e-07       7.85e-01       1.07e-01  
      49             62         4.5725e-04      5.21e-07       1.58e+00       4.16e-01  
      50             64         4.5674e-04      5.18e-07       7.95e-01       1.18e-01  
      51             65         4.5615e-04      5.81e-07       1.60e+00       4.56e-01  
      52             67         4.5552e-04      6.32e-07       8.04e-01       1.31e-01  
      53             68         4.5488e-04      6.47e-07       1.61e+00       5.01e-01  
      54             70         4.5409e-04      7.82e-07       8.10e-01       1.44e-01  
      55             71         4.5337e-04      7.19e-07       1.62e+00       5.50e-01  
      56             72         4.5231e-04      1.06e-06       1.64e+00       6.29e-01  
      57             74         4.5113e-04      1.18e-06       7.19e-01       1.29e-01  
      58             75         4.5019e-04      9.46e-07       1.44e+00       4.98e-01  
      59             77         4.4908e-04      1.11e-06       7.22e-01       1.45e-01  
      60             78         4.4803e-04      1.05e-06       1.44e+00       5.53e-01  
      61             79         4.4649e-04      1.54e-06       1.44e+00       6.29e-01  
      62             81         4.4476e-04      1.73e-06       6.52e-01       1.37e-01  
      63             82         4.4334e-04      1.41e-06       1.30e+00       5.29e-01  
      64             84         4.4161e-04      1.74e-06       6.40e-01       1.49e-01  
      65             85         4.3999e-04      1.62e-06       1.27e+00       5.63e-01  
      66             86         4.3754e-04      2.45e-06       1.24e+00       6.16e-01  
      67             88         4.3507e-04      2.48e-06       5.08e-01       1.09e-01  
      68             89         4.3277e-04      2.29e-06       1.02e+00       4.35e-01  
      69             91         4.3059e-04      2.18e-06       4.84e-01       1.11e-01  
      70             92         4.2791e-04      2.68e-06       9.60e-01       4.35e-01  
      71             94         4.2539e-04      2.52e-06       4.42e-01       1.02e-01  
      72             95         4.2217e-04      3.22e-06       8.80e-01       4.07e-01  
      73             97         4.1937e-04      2.80e-06       3.97e-01       8.97e-02  
      74             98         4.1553e-04      3.84e-06       7.97e-01       3.74e-01  
      75             100        4.1245e-04      3.07e-06       3.53e-01       7.69e-02  
      76             101        4.0791e-04      4.55e-06       7.17e-01       3.39e-01  
      77             102        4.0609e-04      1.81e-06       1.33e+00       1.28e+00  
      78             103        3.9204e-04      1.41e-05       2.07e-01       6.47e-03  
      79             104        3.8550e-04      6.54e-06       5.50e-01       2.61e-01  
      80             105        3.7640e-04      9.10e-06       1.01e+00       9.69e-01  
      81             106        3.6016e-04      1.62e-05       3.44e-01       7.06e-02  
      82             108        3.5059e-04      9.57e-06       4.10e-01       2.10e-01  
      83             109        3.3248e-04      1.81e-05       7.43e-01       7.37e-01  
      84             110        3.1288e-04      1.96e-05       3.00e-01       7.35e-02  
      85             112        2.9892e-04      1.40e-05       2.97e-01       1.72e-01  
      86             113        2.6939e-04      2.95e-05       5.32e-01       5.82e-01  
      87             114        2.4550e-04      2.39e-05       2.12e-01       4.76e-02  
      88             116        2.2954e-04      1.60e-05       1.75e-01       1.09e-01  
      89             117        1.9634e-04      3.32e-05       3.11e-01       3.45e-01  
      90             118        1.7589e-04      2.04e-05       1.36e-01       3.46e-02  
      91             119        1.5868e-04      1.72e-05       2.53e-01       4.30e-01  
      92             120        1.5380e-04      4.88e-06       9.94e-03       2.48e-03  
      93             121        1.5376e-04      4.34e-08       6.30e-03       9.00e-04  
      94             122        1.5375e-04      3.43e-09       4.28e-03       4.76e-04  
      95             123        1.5375e-04      1.17e-09       2.57e-03       8.19e-05  
      96             124        1.5375e-04      4.50e-10       1.60e-03       2.06e-04  
      97             125        1.5375e-04      1.74e-10       1.00e-03       1.22e-05  
      98             126        1.5375e-04      6.80e-11       6.26e-04       8.48e-05  
      99             127        1.5375e-04      2.66e-11       3.92e-04       1.88e-06  
      100            128        1.5375e-04      1.05e-11       2.46e-04       3.40e-05  
      101            129        1.5375e-04      4.12e-12       1.54e-04       2.91e-07  
      102            130        1.5375e-04      1.62e-12       9.70e-05       1.35e-05  
      103            131        1.5375e-04      6.40e-13       6.09e-05       4.52e-08  
`ftol` termination condition is satisfied.
Function evaluations 131, initial cost 4.4383e+00, final cost 1.5375e-04, first-order optimality 4.52e-08.

res

active_mask: array([0, 0, 0, 0])
       cost: 0.0001537528023415085
        fun: array([-1.30664586e-03, -1.87579954e-03,  8.91958340e-03, -1.11095297e-02,
       8.35200541e-03, -1.73677561e-04,  2.58387522e-05,  1.26262347e-03,
      -3.52355168e-03,  6.16759030e-04, -3.88872553e-03])
       grad: array([-4.52564650e-10, -1.44529678e-10,  1.55211428e-09,  8.54196275e-08])
        jac: array([[ 1.00823291,  0.04638017, -0.04676201, -0.0116905 ],
      [ 1.0000944 ,  0.08799521, -0.08800352, -0.04400176],
      [ 0.94613022,  0.15312606, -0.14487719, -0.14487719],
      [ 0.77222944,  0.21537647, -0.16632005, -0.33264011],
      [ 0.4810639 ,  0.21017745, -0.10110878, -0.40443513],
      [ 0.32429656,  0.17450668, -0.05659192, -0.33887375],
      [ 0.23664118,  0.14424711, -0.03413481, -0.27307844],
      [ 0.18392905,  0.12173779, -0.02239112, -0.22391116],
      [ 0.14925079,  0.10479279, -0.01564041, -0.18775999],
      [ 0.12508303,  0.09180226, -0.01148291, -0.160825  ],
      [ 0.10742028,  0.08160364, -0.00876589, -0.14025417]])
    message: '`ftol` termination condition is satisfied.'
       nfev: 131
       njev: 104
 optimality: 4.516920778943166e-08
     status: 2
    success: True
          x: array([0.192806  , 0.19130332, 0.12306046, 0.13607205])

res.x

1	array([0.192806 , 0.19130332, 0.12306046, 0.13607205])

from scipy.optimize import leastsq
def func(x):
    return 2*(x-5)**2+x**3 -5*x -50
a = leastsq(func, 20)
print(a,func(a[0]))

1	(array([4.09901951]), 2) [-1.42108547e-14]

1 2	b = leastsq(func,-20,full_output=True) b

(array([-6.09901951]),
 array([[0.00025849]]),
 {'fvec': array([3.55271368e-14]),
  'nfev': 17,
  'fjac': array([[-62.19803813]]),
  'ipvt': array([1], dtype=int32),
  'qtf': array([1.47077363e-06])},
 'The relative error between two consecutive iterates is at most 0.000000',
 2)

1	func(a[0])

1	array([-1.42108547e-14])

func(-6)

func(-7)

-70

func(-8)

-184

1 2	plt.plot(np.linspace(-9,5,20),func(np.linspace(-9,5,20))) plt.axhline(0)

1	<matplotlib.lines.Line2D at 0x1fad4cb7700>

1	from scipy.optimize import least_squares