from uuid import uuid4
import datetime
from lxml import etree
import logging
import pymysql
from abc import ABCMeta,abstractmethod
from twisted.enterprise import adbapi
import copy
class Util_WANG(metaclass=ABCMeta):
    @classmethod
    def pos_url(cls,item,settings,response=None):
        """
        判断我们的链接是否为那种直接需要打开的，pdf，image等，这种的
        :param item:原文item
        :param settings: 把那个settings传进来
        :param response: 请求response
        :return: 最好是直接返回是否为需要下载的，需要的返回true，不需要的返回false
        """
        houzui=item['lianjie'][item['lianjie'].rfind('.'):].strip()
        jiewei = ['.doc', '.xls', '.docx', '.xlsx', '.txt', '.rar', '.zip', '.wps', '.pdf','.jpg', '.png', '.jpeg', '.gif', '.svg']
        s=False
        for jiewei_sign in jiewei:
            if item['lianjie'].endswith(jiewei_sign):
                s=True
                break
        if s:
            year = datetime.datetime.now().strftime('%Y')
            mouth = datetime.datetime.now().strftime('%m')
            item['wenjian'] = [{'file_name': '原文件'}]
            item['wenjian'][0]['file_url'] = item['lianjie']
            houzui = item['wenjian'][0]['file_url'][item['wenjian'][0]['file_url'].rfind('/') + 1:]
            new_url = '/' + year + '/' + mouth + '/' + cls.short_uuid() + '_' + houzui
            item['wenjian'][0]['new_file'] = new_url
            item['xiangqing'] = '<div><p>请查看原文附件：<a href="' + settings.get(
                'FILE_PATH') + new_url + '">原文件</a></p></div>'
        return s
    @classmethod
    def jiewei_href_contains(cls):
        """
        返回我们的文件的后缀
        :return:
        """
        str = ''
        jiewei = ['.doc', '.xls', '.docx', '.xlsx', '.txt', '.rar', '.zip', '.wps', '.pdf']
        for j in jiewei:
            str += 'contains(@href,"' + j + '")' + ' or '
        str = str.strip().strip('or').strip()
        return str
    @classmethod
    def jiewei_src_contains(cls):
        """
        返回我们图片文件
        :return:
        """
        str = ''
        jiewei = ['.jpg', '.png', '.jpeg', '.gif', '.svg']
        for j in jiewei:
            str += 'contains(@src,"' + j + '")' + ' or '
        str = str.strip().strip('or').strip()
        return str
    @classmethod
    def short_uuid(cls):
        uuidChars = ("a", "b", "c", "d", "e", "f",
                     "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s",
                     "t", "u", "v", "w", "x", "y", "z", "0", "1", "2", "3", "4", "5",
                     "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H", "I",
                     "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V",
                     "W", "X", "Y", "Z")
        uuid = str(uuid4()).replace('-', '')
        result = ''
        for i in range(0, 8):
            sub = uuid[i * 4: i * 4 + 4]
            x = int(sub, 16)
            result += uuidChars[x % 0x3E]
        return result
    @classmethod
    def tihuan_a_return(cls, item, tihuanlujing,response=None):
        txt = item['xiangqing']
        """
        替换我们得网页里面得a标签，以及实现我们得a标签，并且放入我们得item中,我们必须实现a_fun方法
        :param item:我们要放入得参数的对象
        :param tihuanlujing: 要实现得替换路径得文件路径的前缀
        :return:
        """
        year = datetime.datetime.now().strftime('%Y')
        mouth = datetime.datetime.now().strftime('%m')
        panDuanNone = lambda x: '_' if x is None else x
        html = etree.HTML(txt)
        c='//a[@href and (' + cls.jiewei_href_contains() + ')]'
        alis = html.xpath('//a[@href and (' + cls.jiewei_href_contains() + ')]')
        for alis_single in alis:
            single_a_file = {}
            href = str(alis_single.xpath('@href')[0])
            content = str(panDuanNone(alis_single.xpath('string(.)')))
            if content.strip() in '':
                content='_'
            single_a_file['file_name'] = content
            # 每次只需要修改这里我们实际的下载链接地址
            old_url = href
            if href.lower().startswith('http'):
                  single_a_file['file_url']=old_url
            elif response!=None and (old_url.lower().startswith('./') or old_url.lower().startswith('../')):
                  single_a_file['file_url']=response.urljoin(old_url)
            elif response!=None and (old_url.lower().startswith('/') or old_url.lower().startswith('/')):
                  single_a_file['file_url']=response.urljoin(old_url)
            else:
                  single_a_file['file_url'] = cls.a_fun(cls,href)
            houzui = single_a_file['file_url'][single_a_file['file_url'].rfind('/') + 1:]
            new_url = '/' + year + '/' + mouth + '/' + cls.short_uuid() + '_' + houzui
            txt = txt.replace(old_url, tihuanlujing + new_url)
            single_a_file['new_file'] = new_url
            try:
                item['wenjian'].append(single_a_file)
            except:
                item['wenjian'] = [single_a_file]
        item['xiangqing'] = txt
    @classmethod
    def tihuan_img_return(cls, item, tihuanlujing,response=None):
        txt=item['xiangqing']
        """
        替换我们得网页里面得src标签，以及实现我们得src标签，并且放入我们得item中
        :param item:我们要放入得参数的对象
        :param tihuanlujing: 要实现图片得替换路径得图片路径的前缀
        :return:
        """
        year = datetime.datetime.now().strftime('%Y')
        mouth = datetime.datetime.now().strftime('%m')
        panDuanNone = lambda x: '_' if x is None else x
        html = etree.HTML(txt)
        imglis = html.xpath('//img[@src and (' + cls.jiewei_src_contains() + ')]')
        for imglis_single in imglis:
            single_src_file = {}
            src = str(imglis_single.xpath('@src')[0])
            content = str(panDuanNone(imglis_single.xpath('string(.)')))
            if content.strip() in '':
                content='_'
            single_src_file['file_name'] = content
            old_url = src
            # 每次只需要修改这里我们实际的下载链接地址
            if old_url.lower().startswith('http'):
                single_src_file['file_url']=old_url
            elif response != None and (old_url.lower().startswith('./') or old_url.lower().startswith('../')):
                single_src_file['file_url'] = response.urljoin(old_url)
            elif response !=None and (old_url.lower().startswith('/') or old_url.lower().startswith('/')):
                single_src_file['file_url'] = response.urljoin(old_url)
            else:
                single_src_file['file_url'] = cls.img_fun(cls,src)
            houzui = single_src_file['file_url'][single_src_file['file_url'].rfind('/') + 1:]
            new_url = '/' + year + '/' + mouth + '/' + cls.short_uuid() + '_' + houzui
            txt = txt.replace(old_url, tihuanlujing + new_url)
            single_src_file['new_file'] = new_url
            try:
                item['wenjian'].append(single_src_file)
            except:
                item['wenjian'] = [single_src_file]
        item['xiangqing'] = txt

    @abstractmethod
    def a_fun(self,href):
        """
        这个就是把href操作过后的结果为我们返回
        :param href:
        :return:
        """
        pass
    @abstractmethod
    def img_fun(self, src):
        """
        这个就是把src操作过后的结果为我们返回
        :param src:
        :return:
        """
        pass


# 公共的异步插入
class Asyninser(object):
    '''
    实现初始化
       def __init__(self,dbpool):
            self.dbpool=dbpool
    实现do_insert,
    def do_insert(self, cursor, item):

    '''
    def __init__(self, dbpool):
        self.dbpool = dbpool
    @classmethod
    def from_settings(cls, settings):  # 函数名固定，会被scrapy调用，直接可用settings的值
        """
        数据库建立连接
        :param settings: 配置参数
        :return: 实例化参数
        """
        adbparams = dict(
            host=settings['MYSQL_HOST'],
            db=settings['MYSQL_DATABASE'],
            user=settings['MYSQL_USER'],
            password=settings['MYSQL_PASSWORD'],
            cursorclass=pymysql.cursors.DictCursor  # 指定cursor类型
        )

        # 连接数据池ConnectionPool，使用pymysql或者Mysqldb连接
        dbpool = adbapi.ConnectionPool('pymysql', **adbparams)
        # 返回实例化参数
        return cls(dbpool)
    def close_spider(self, spider):
        logging.info('爬虫运行完毕了')
    def process_item(self, item, spider):
        """
        使用twisted将MySQL插入变成异步执行。通过连接池执行具体的sql操作，返回一个对象
        """
        asynItem = copy.deepcopy(item)
        query = self.dbpool.runInteraction(self.do_insert, asynItem)  # 指定操作方法和操作数据
        # 添加异常处理
        query.addErrback(self.handle_error,asynItem,spider)  # 处理异常
        return asynItem
    def handle_error(self, failure,asynItem,spider):
        if failure:
            # 打印错误信息
            logging.info('----------数据库插入异常信息--------')
            logging.info(failure)
            logging.info('---------异常信息结束--------')