面试问题总结3_python具体应用篇

读取文件的方法

正常的读取文件的方法

1
2
3

def read_file(file_route):
    with open(file_route,r'') as f:
        print(f.readlines())

使用生成器1

def read_file(file_route):
    with open(file_route,r'') as f:
        for line in f:
            yield line

使用生成器2

1
2
3

def read_file(file_route):
    with open(file_route,r'') as f:
        yield from f

漂亮的读取文件的方法

def read_file(file_route):
    with open(file_route,'r') as f:
        yield from iter(f.readline,'')

file_route = r'D:\trainingfile\training.txt'
for line in read_file(file_route):
    print(line)

使用mmap(处理超大文件)

import mmap

def read_file(file_route):
    # 注意必须+,否则无权限
    with open(file_route, "r+") as f:
        # 0代表整个文件
        mm = mmap.mmap(f.fileno(), 0)
        # 可以使用文件的标准方法
        yield from iter(mm.readline,b'')
        mm.close()

if __name__=="__main__":
    file_route = r'D:\trainingfile\training.txt'
    for line in read_file(file_route):
        # 返回二进制数据
        print(line)

os的常用操作

os操作 (文件和目录操作)

文件和目录操作和Linux及其相近

import os

# 获取当前文件所在目录
print(os.getcwd())

# 获取当前文件所在目录的所有文件/目录 (注意不会递归)
print(os.listdir(r'D:\trainingfile'))

# 创建目录
os.mkdir('123')

# 删除目录
os.rmdir('123')

# 重命名
os.rename('test.py','test2.py')

# 删除文件
os.remove('13.py')

os.walk方法，主要用来遍历一个目录内各个子目录和子文件。

import os

for root, dirs, files in os.walk(r'D:\trainingfile'):
    print(root)   # string，目录下所有文件夹 
    print(dirs)   # list，上面文件夹下的所有文件夹(子目录)
    print(files)  # list，上面文件夹下的所有文件(子目录的文件)

os.path操作 (路径操作)

import os

# 组合路径
os.path.join(r'c:\a\b',r'c\d')  # c:\a\b\c\d

# 拆分路径
os.path.split(r'c:\a\b\c\d.py')  # ('c:\\a\\b\\c', 'd.py')

# 拆分扩展名
os.path.splitext(r'c:\a\b\c\d.py')  # ('c:\\a\\b\\c\\d', '.py')

# 目录是否存在  (注意,不要被isdir这个名字迷惑了)
os.path.isdir(r'c:\a\b\c\d.py')  # False

# 文件是否存在
os.path.isfile(r'c:\a\b\c\d.py')  # False

# 文件或目录是否存在
os.path.exists(r'd:\note')
os.path.exists(r'D:\trainingfile\config.py')

# 获取目录名
os.path.dirname(r'D:\trainingfile\config.py')  # D:\trainingfile
os.path.dirname(r'D:\trainingfile')  # D:\

获取目录下所有文件(深度优先-递归版)

import os

def print_dir_content(dir_name):
	for child in os.listdir(dir_name):
		print(child)
		new_dir = os.path.join(dir_name,child)
		if os.path.isdir(new_dir):
			print_dir_content(new_dir)

print_dir_content(r'F:\乱七八糟\360安全浏览器下载\chrome\下载\messing\小说')

获取目录下所有文件(深度优先-迭代版)

import os

def print_dir_content(dir_name):
    stack = [dir_name]

    while stack:
        cur = stack.pop()
        print(cur)

        if os.path.isdir(cur):
            # 获取当前文件夹的文件
            stack.extend([os.path.join(dir_name,child) for child in os.listdir(cur)])

print_dir_content(r'F:\乱七八糟\360安全浏览器下载\chrome\下载\messing\小说')

获取目录下所有文件(广度优先-迭代版)

import os

def print_dir_content(dir_name):
    queue = os.listdir(dir_name)

    while queue:
        tmp = []
        for child in queue:
            print(child)
            new_dir = os.path.join(dir_name,child)
            if os.path.isdir(new_dir):
                tmp.extend(os.listdir(new_dir))
        queue = tmp

print_dir_content(r'F:\乱七八糟\360安全浏览器下载\chrome\下载\messing\小说')

Time模块

time模块基于Unix Timestamp。
datetime模块基于time进行了封装，提供更多函数。

这两个模块都处理事件，但是对于时间的封装却不同。

time模块提供struct_time类, 即（time tuple, p_tuple, 时间元组）

datetime模块常用datetime和timedelta类，也提供了date、time类

20180124174004581

import time

# 获取struct_time
p_tuple = time.localtime()
print(p_tuple)  # time.struct_time(tm_year=2019, tm_mon=8, tm_mday=6, tm_hour=9, tm_min=30, tm_sec=58, tm_wday=1, tm_yday=218, tm_isdst=0)


# 获取时间戳(当前时间)
print(time.time())  # 1565055058.6990192
# 获取时间戳(传入时间元组)
print(time.mktime(p_tuple))  # 1565055058.0


# 获取时间字符串(接受时间元组)
print(time.asctime(p_tuple))   # Tue Aug  6 09:32:37 2019
# 获取时间字符串(接受时间戳)
print(time.ctime(time.time())) # Tue Aug  6 09:32:37 2019


# 时间格式化(时间元组转为时间字符串)
print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
# 时间格式化(时间字符串转为时间元组)
s = '2016-08-25 16:30:58'
print(time.strptime(s, "%Y-%m-%d %H:%M:%S"))

字典两种排序

# 现有字典 d= {'a':24,'g':52,'i':12,'k':33}请按value值进行排序?

d= {'a':24,'g':52,'i':12,'k':33}

for val,key in sorted(zip(d.values(),d.keys())):
    print(val,key)
# 12 i
# 24 a
# 33 k
# 52 g

x = sorted(d,key=lambda x:d[x])
print(x)
# ['i', 'a', 'k', 'g']

列表有索引错误,但是没有切片错误

list = ['a','b','c','d','e']

print(list[10])   # IndexError: list index out of range
print(list[10:]) # 返回[]

集合操作

a = t | s            # t 和 s的并集  
b = t & s          # t 和 s的交集
c = t – s           # 求差集（项在t中，但不在s中）  
d = t ^ s          # 对称差集（项在t或s中，但不会同时出现在二者中）

创建单例的方法

法一 : 使用基类

class A:
    __instance = None
    def __new__(cls,*args,**kwargs):
        if not cls.__instance:
            cls.__instance = super().__new__(cls)

        return cls.__instance

    def __init__(self,name):
        self.name = name

a1 = A('hyl')
a2 = A('dsz')

print(id(a1)) # 1985848916792
print(id(a2)) # 1985848916792

# 属性被最后一个单例覆盖
print(a1.name) # dsz
print(a2.name) # dsz

法二 : 使用类装饰器

def singleton(cls):
    # 键为cls,值为instance
    instances = {}
    def wrap(*args,**kwargs):
        if cls not in instances:
            instances[cls] = cls(*args,**kwargs)
        return instances[cls]
    return wrap

@singleton
class A:
    pass

a1 = A()
a2 = A()
print(a1 is a2)  # True

法三 : 使用元类

class Singleton(type):
    def __call__(cls, *args, **kwargs):
        if not hasattr(cls, '_instance'):
            cls._instance = super().__call__(*args, **kwargs)
        return cls._instance


class Foo(metaclass=Singleton):
    pass

foo1 = Foo()
foo2 = Foo()
print(foo1 is foo2)  # True

设计实现遍历目录与子目录，抓取.pyc文件

法一 : 使用递归

import os

def find_pyc(dir_name):
    children = os.listdir(dir_name)
    for child in children:
        child_route = os.path.join(dir_name,child)

        if os.path.isdir(child_route):
            yield from find_pyc(child_route)
        if os.path.isfile(child_route) and os.path.splitext(child_route)[-1] == '.pyc':
            yield child_route


if __name__ == '__main__':
    for file in find_pyc(r'D:\note\Celery\_trainingfile\workspace'):
        print(file)

法二 : 使用os.walk

import os

def find_pyc(dir_route):
    res = []
    for root,_,files in os.walk(dir_route):
        for file in files:
            if os.path.splitext(file)[-1] == '.pyc':
                res.append(os.path.join(root,file))

    return res

if __name__ == '__main__':
    x= find_pyc(r'D:\note\Celery\_trainingfile\workspace')
    print(x)

字符串的操作题目

全字母短句 PANGRAM 是包含所有英文字母的句子.实现一个方法 get_missing_letter, 传入一个字符串采纳数，返回参数字符串变成一个 PANGRAM 中所缺失的字符

from string import ascii_lowercase
from collections import OrderedDict

def get_missing_letter(string):
	d = OrderedDict()
	# 注意fromkeys有返回值
	d = d.fromkeys(ascii_lowercase,0)
	
	string = string.lower()

	for s in string:
		if s in ascii_lowercase:
			d[s] = True

	for char,appear in d.items():
		if not appear:
			print(char)

get_missing_letter("A quick brown for jumps over the lazy dog")  # x

不使用内置api,将字符串 `"123"` 转换成 `123`

def change(str_nums):
    res = 0
    for str_num in str_nums:
        num = ord(str_num) - ord('1') + 1
        res = res*10 + num
    return res

print(change('1382'))  # 1382

统计一个文本中单词频次最高的10个单词

# 统计一个文本中单词频次最高的10个单词
import re
from heapq import nlargest

def top10(file_name):
    def yield_word(file_name):
        with open(file_name,'r') as f:
            for line in f:
                lineone = re.sub(r'\W+',' ',line)
                yield from lineone.split()

    res = {}
    for word in yield_word(file_name):
        res[word] = res.get(word,0) + 1

    return nlargest(10,res,key=lambda x:res[x]) 


if __name__ == '__main__':
    route = r'D:\trainingfile\config.yaml'
    print(top10(route))

返回索引

注意 : index是list和tuple的方法

x = 1
y = [1,2,3,5]

print(y.index(x)) # 0

复杂排序

传入list,让所有奇数都在偶数前面，而且奇数升序排列，偶数降序排序

1
2
3

def func(alist):
    return sorted(alist,key=lambda x: int(x) % 2 == 0 and 20 - int(x) or int(x))
print(func([1,2,3,4,5,6,7,8,9]))  # [1, 3, 5, 7, 9, 8, 6, 4, 2]

搜索旋转数组

假设按照升序排序的数组在预先未知的某个点上进行了旋转。

( 例如，数组 [0,0,1,2,2,5,6] 可能变为 [2,5,6,0,0,1,2] )。

编写一个函数来判断给定的目标值是否存在于数组中。若存在返回 true，否则返回 false。

示例 1:

输入: nums = [2,5,6,0,0,1,2], target = 0
输出: true

示例 2:

输入: nums = [2,5,6,0,0,1,2], target = 3
输出: false

class Solution(object):
    def search(self, nums, target):
        left = 0
        right = len(nums)

        while left < right:
            mid = left + (right - left) // 2

            if nums[mid] == target:
                return mid
            # 左边没有乱
            if nums[mid] >= nums[left]:
                # 此时若目标数小于中间数而且大于最左边，说明就在左边区间
                if nums[left] <= target < nums[mid]:
                    right = mid
                else:
                    left = mid + 1
            # 右边没有乱
            else:
                # 此时若目标数大于中间数而且小于最右边，说明就在右边区间
                if nums[mid] < target <= nums[right]:
                    left = mid + 1
                else:
                    right = mid
        return -1

s = Solution()
print(s.search([4,5,6,7,0,1,2],6))

遍历一个object的所有属性，并print每一个属性名

使用dir方法

常用的装饰器写法

使用函数作为装饰器

from time import perf_counter

def deco(func):
    def wrap(*args,**kwargs):
        start = perf_counter()
        func(*args,**kwargs)
        print(f'{perf_counter() - start :0.2f}')
    return wrap

@deco
def func():
    return [num for num in range(1,11177771)]

func()

使用类作为装饰器

from time import perf_counter

class TimeCounter:
    def __init__(self,func):
        self.func = func

    def __call__(self,*args,**kwargs):
        start = perf_counter()
        self.func(*args,**kwargs)
        print(perf_counter() - start)

@TimeCounter
def func():
    return [num for num in range(1,11177771)]

func()

一行代码解决阶乘函数

1 2	from functools import reduce print(reduce(lambda x,y:x*y,range(1,5))) # 24

一行代码实现将1-N 的整数列表以3为单位分组

N =10

print([[num for num in range(N+1)][i:i+3] for i in range(1,N+1,3)])
# [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10]]